ruby_speech 2.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.travis.yml +17 -0
- data/CHANGELOG.md +144 -0
- data/Gemfile +3 -0
- data/Guardfile +9 -0
- data/LICENSE.md +20 -0
- data/README.md +314 -0
- data/Rakefile +34 -0
- data/assets/grammar-core.xsd +317 -0
- data/assets/grammar.xsd +37 -0
- data/assets/synthesis-core.xsd +445 -0
- data/assets/synthesis.xsd +63 -0
- data/assets/xml.xsd +287 -0
- data/ext/ruby_speech/RubySpeechGRXMLMatcher.java +64 -0
- data/ext/ruby_speech/RubySpeechService.java +23 -0
- data/ext/ruby_speech/extconf.rb +7 -0
- data/ext/ruby_speech/ruby_speech.c +97 -0
- data/lib/ruby_speech/generic_element.rb +169 -0
- data/lib/ruby_speech/grxml/element.rb +29 -0
- data/lib/ruby_speech/grxml/grammar.rb +189 -0
- data/lib/ruby_speech/grxml/item.rb +144 -0
- data/lib/ruby_speech/grxml/match.rb +16 -0
- data/lib/ruby_speech/grxml/matcher.rb +126 -0
- data/lib/ruby_speech/grxml/max_match.rb +6 -0
- data/lib/ruby_speech/grxml/no_match.rb +10 -0
- data/lib/ruby_speech/grxml/one_of.rb +31 -0
- data/lib/ruby_speech/grxml/potential_match.rb +10 -0
- data/lib/ruby_speech/grxml/rule.rb +73 -0
- data/lib/ruby_speech/grxml/ruleref.rb +69 -0
- data/lib/ruby_speech/grxml/tag.rb +29 -0
- data/lib/ruby_speech/grxml/token.rb +31 -0
- data/lib/ruby_speech/grxml.rb +39 -0
- data/lib/ruby_speech/nlsml/builder.rb +34 -0
- data/lib/ruby_speech/nlsml/document.rb +120 -0
- data/lib/ruby_speech/nlsml.rb +18 -0
- data/lib/ruby_speech/ruby_speech.jar +0 -0
- data/lib/ruby_speech/ssml/audio.rb +47 -0
- data/lib/ruby_speech/ssml/break.rb +62 -0
- data/lib/ruby_speech/ssml/desc.rb +24 -0
- data/lib/ruby_speech/ssml/element.rb +23 -0
- data/lib/ruby_speech/ssml/emphasis.rb +44 -0
- data/lib/ruby_speech/ssml/mark.rb +43 -0
- data/lib/ruby_speech/ssml/p.rb +25 -0
- data/lib/ruby_speech/ssml/phoneme.rb +72 -0
- data/lib/ruby_speech/ssml/prosody.rb +172 -0
- data/lib/ruby_speech/ssml/s.rb +25 -0
- data/lib/ruby_speech/ssml/say_as.rb +100 -0
- data/lib/ruby_speech/ssml/speak.rb +27 -0
- data/lib/ruby_speech/ssml/sub.rb +42 -0
- data/lib/ruby_speech/ssml/voice.rb +108 -0
- data/lib/ruby_speech/ssml.rb +39 -0
- data/lib/ruby_speech/version.rb +3 -0
- data/lib/ruby_speech/xml/language.rb +13 -0
- data/lib/ruby_speech/xml.rb +11 -0
- data/lib/ruby_speech.rb +36 -0
- data/ruby_speech.gemspec +42 -0
- data/spec/ruby_speech/grxml/grammar_spec.rb +341 -0
- data/spec/ruby_speech/grxml/item_spec.rb +192 -0
- data/spec/ruby_speech/grxml/match_spec.rb +15 -0
- data/spec/ruby_speech/grxml/matcher_spec.rb +688 -0
- data/spec/ruby_speech/grxml/max_match_spec.rb +17 -0
- data/spec/ruby_speech/grxml/no_match_spec.rb +17 -0
- data/spec/ruby_speech/grxml/one_of_spec.rb +49 -0
- data/spec/ruby_speech/grxml/potential_match_spec.rb +17 -0
- data/spec/ruby_speech/grxml/rule_spec.rb +125 -0
- data/spec/ruby_speech/grxml/ruleref_spec.rb +55 -0
- data/spec/ruby_speech/grxml/tag_spec.rb +41 -0
- data/spec/ruby_speech/grxml/token_spec.rb +62 -0
- data/spec/ruby_speech/grxml_spec.rb +339 -0
- data/spec/ruby_speech/nlsml_spec.rb +353 -0
- data/spec/ruby_speech/ssml/audio_spec.rb +121 -0
- data/spec/ruby_speech/ssml/break_spec.rb +100 -0
- data/spec/ruby_speech/ssml/desc_spec.rb +57 -0
- data/spec/ruby_speech/ssml/emphasis_spec.rb +110 -0
- data/spec/ruby_speech/ssml/mark_spec.rb +53 -0
- data/spec/ruby_speech/ssml/p_spec.rb +96 -0
- data/spec/ruby_speech/ssml/phoneme_spec.rb +65 -0
- data/spec/ruby_speech/ssml/prosody_spec.rb +309 -0
- data/spec/ruby_speech/ssml/s_spec.rb +92 -0
- data/spec/ruby_speech/ssml/say_as_spec.rb +71 -0
- data/spec/ruby_speech/ssml/speak_spec.rb +166 -0
- data/spec/ruby_speech/ssml/sub_spec.rb +57 -0
- data/spec/ruby_speech/ssml/voice_spec.rb +200 -0
- data/spec/ruby_speech/ssml_spec.rb +285 -0
- data/spec/ruby_speech_spec.rb +124 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/support/match_examples.rb +43 -0
- data/spec/support/matchers.rb +46 -0
- metadata +405 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
module RubySpeech
|
|
2
|
+
module GRXML
|
|
3
|
+
##
|
|
4
|
+
# The Speech Recognition Grammar Language is an XML application. The root element is grammar.
|
|
5
|
+
#
|
|
6
|
+
# http://www.w3.org/TR/speech-grammar/#S4.3
|
|
7
|
+
#
|
|
8
|
+
# Attributes: uri, language, root, tag-format
|
|
9
|
+
#
|
|
10
|
+
# tag-format declaration is an optional declaration of a tag-format identifier that indicates the content type of all tags contained within a grammar.
|
|
11
|
+
#
|
|
12
|
+
# NOTE: A grammar without rules is allowed but cannot be used for processing input -- http://www.w3.org/Voice/2003/srgs-ir/
|
|
13
|
+
#
|
|
14
|
+
# TODO: Look into lexicon (probably a sub element)
|
|
15
|
+
#
|
|
16
|
+
class Grammar < Element
|
|
17
|
+
include XML::Language
|
|
18
|
+
|
|
19
|
+
register :grammar
|
|
20
|
+
|
|
21
|
+
self.defaults = { :version => '1.0', :language => "en-US" }
|
|
22
|
+
|
|
23
|
+
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, Rule, Tag].freeze
|
|
24
|
+
|
|
25
|
+
##
|
|
26
|
+
#
|
|
27
|
+
# The mode of a grammar indicates the type of input that the user agent should be detecting. The default mode is "voice" for speech recognition grammars. An alternative input mode is "dtmf" input".
|
|
28
|
+
#
|
|
29
|
+
# @return [String]
|
|
30
|
+
#
|
|
31
|
+
def mode
|
|
32
|
+
read_attr :mode, :to_sym
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
# @param [String] ia
|
|
37
|
+
#
|
|
38
|
+
def mode=(ia)
|
|
39
|
+
self[:mode] = ia
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
##
|
|
43
|
+
#
|
|
44
|
+
# The root ("rule") attribute indicates declares a single rule to be the root rle of the grammar. This attribute is OPTIONAL. The rule declared must be defined within the scope of the grammar. It specified rule can be scoped "public" or "private."
|
|
45
|
+
#
|
|
46
|
+
# @return [String]
|
|
47
|
+
#
|
|
48
|
+
def root
|
|
49
|
+
read_attr :root
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
##
|
|
53
|
+
# @param [String] ia
|
|
54
|
+
#
|
|
55
|
+
def root=(ia)
|
|
56
|
+
self[:root] = ia
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
##
|
|
60
|
+
#
|
|
61
|
+
# @return [String]
|
|
62
|
+
#
|
|
63
|
+
def tag_format
|
|
64
|
+
read_attr :'tag-format'
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
##
|
|
68
|
+
# @param [String] ia
|
|
69
|
+
#
|
|
70
|
+
def tag_format=(s)
|
|
71
|
+
self['tag-format'] = s
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
##
|
|
75
|
+
# @return [Rule] The root rule node for the document
|
|
76
|
+
#
|
|
77
|
+
def root_rule
|
|
78
|
+
children(:rule, :id => root).first
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
##
|
|
82
|
+
# Checks for a root rule matching the value of the root tag
|
|
83
|
+
#
|
|
84
|
+
# @raises [InvalidChildError] if there is not a rule present in the document with the correct ID
|
|
85
|
+
#
|
|
86
|
+
# @return [Grammar] self
|
|
87
|
+
#
|
|
88
|
+
def assert_has_matching_root_rule
|
|
89
|
+
raise InvalidChildError, "A GRXML document must have a rule matching the root rule name" unless has_matching_root_rule?
|
|
90
|
+
self
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
##
|
|
94
|
+
# @return [Grammar] an inlined copy of self
|
|
95
|
+
#
|
|
96
|
+
def inline
|
|
97
|
+
clone.inline!
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
##
|
|
101
|
+
# Replaces rulerefs in the document with a copy of the original rule.
|
|
102
|
+
# Removes all top level rules except the root rule
|
|
103
|
+
#
|
|
104
|
+
# @return self
|
|
105
|
+
#
|
|
106
|
+
def inline!
|
|
107
|
+
xpath("//ns:ruleref", :ns => namespace_href).each do |ref|
|
|
108
|
+
rule = children(:rule, :id => ref[:uri].sub(/^#/, '')).first
|
|
109
|
+
ref.swap rule.nokogiri_children
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
non_root_rules = xpath "./ns:rule[@#{namespace_href && Nokogiri.jruby? ? 'ns:' : ''}id!='#{root}']", :ns => namespace_href
|
|
113
|
+
non_root_rules.remove
|
|
114
|
+
|
|
115
|
+
self
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
##
|
|
119
|
+
# Replaces textual content of the document with token elements containing such content.
|
|
120
|
+
# This homogenises all tokens in the document to a consistent format for processing.
|
|
121
|
+
#
|
|
122
|
+
def tokenize!
|
|
123
|
+
traverse do |element|
|
|
124
|
+
next unless element.is_a? Nokogiri::XML::Text
|
|
125
|
+
|
|
126
|
+
element_type = self.class.import(element.parent).class
|
|
127
|
+
next if [Token, Tag].include?(element_type)
|
|
128
|
+
|
|
129
|
+
tokens = split_tokens(element).map do |string|
|
|
130
|
+
Token.new.tap { |token| token << string }
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
element.swap Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new, tokens)
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
##
|
|
138
|
+
# Normalizes whitespace within tokens in the document according to the rules in the SRGS spec (http://www.w3.org/TR/speech-grammar/#S2.1)
|
|
139
|
+
# Leading and trailing whitespace is removed, and multiple spaces within the string are collapsed down to single spaces.
|
|
140
|
+
#
|
|
141
|
+
def normalize_whitespace
|
|
142
|
+
traverse do |element|
|
|
143
|
+
next if element === self
|
|
144
|
+
|
|
145
|
+
imported_element = self.class.import element
|
|
146
|
+
next unless imported_element.respond_to? :normalize_whitespace
|
|
147
|
+
|
|
148
|
+
imported_element.normalize_whitespace
|
|
149
|
+
element.swap imported_element
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def dtmf?
|
|
154
|
+
mode == :dtmf
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def voice?
|
|
158
|
+
mode == :voice
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def <<(arg)
|
|
162
|
+
raise InvalidChildError, "A Grammar can only accept Rule and Tag as children" unless VALID_CHILD_TYPES.include? arg.class
|
|
163
|
+
super
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def eql?(o)
|
|
167
|
+
super o, :language, :base_uri, :mode, :root
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def embed(other)
|
|
171
|
+
raise InvalidChildError, "Embedded grammars must have the same mode" if other.is_a?(self.class) && other.mode != mode
|
|
172
|
+
super
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
private
|
|
176
|
+
|
|
177
|
+
def has_matching_root_rule?
|
|
178
|
+
!root || root_rule
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def split_tokens(element)
|
|
182
|
+
element.to_s.split(/(\".*\")/).reject(&:empty?).map do |string|
|
|
183
|
+
match = string.match /^\"(.*)\"$/
|
|
184
|
+
match ? match[1] : string.split(' ')
|
|
185
|
+
end.flatten
|
|
186
|
+
end
|
|
187
|
+
end # Grammar
|
|
188
|
+
end # GRXML
|
|
189
|
+
end # RubySpeech
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
module RubySpeech
|
|
2
|
+
module GRXML
|
|
3
|
+
##
|
|
4
|
+
#
|
|
5
|
+
# The item element is one of the valid expansion elements for the SGR rule element
|
|
6
|
+
#
|
|
7
|
+
# http://www.w3.org/TR/speech-grammar/#S2.4 --> XML Form
|
|
8
|
+
#
|
|
9
|
+
# The item element has four (optional) attributes: weight, repeat, repeat-prob, and xml:lang (language identifier)
|
|
10
|
+
#
|
|
11
|
+
# http://www.w3.org/TR/speech-grammar/#S2.4.1
|
|
12
|
+
# http://www.w3.org/TR/speech-grammar/#S2.3
|
|
13
|
+
#
|
|
14
|
+
# A weight may be optionally provided for any number of alternatives in an alternative expansion. Weights are simple positive floating point values without exponentials. Legal formats are "n", "n.", ".n" and "n.n" where "n" is a sequence of one or many digits.
|
|
15
|
+
#
|
|
16
|
+
# A weight is nominally a multiplying factor in the likelihood domain of a speech recognition search. A weight of 1.0 is equivalent to providing no weight at all. A weight greater than "1.0" positively biases the alternative and a weight less than "1.0" negatively biases the alternative.
|
|
17
|
+
#
|
|
18
|
+
# repeat has several valid values...
|
|
19
|
+
#
|
|
20
|
+
# Any repeated legal rule expansion is itself a legal rule expansion.
|
|
21
|
+
#
|
|
22
|
+
# Operators are provided that define a legal rule expansion as being another sub-expansion that is optional, that is repeated zero or more times, that is repeated one or more times, or that is repeated some range of times.
|
|
23
|
+
#
|
|
24
|
+
# repeat probability (repeat-prob) indicates the probability of successive repetition of the repeated expansion. It is ignored if repeat is not specified
|
|
25
|
+
#
|
|
26
|
+
# xml:lang declares declaration declares the language of the grammar section for the item element just as xml:lang in the <grammar> element declares for the entire document
|
|
27
|
+
#
|
|
28
|
+
class Item < Element
|
|
29
|
+
Inf = 1.0 / 0.0
|
|
30
|
+
|
|
31
|
+
register :item
|
|
32
|
+
|
|
33
|
+
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, OneOf, Item, String, Ruleref, Tag, Token].freeze
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
#
|
|
37
|
+
# The optional weight attribute
|
|
38
|
+
#
|
|
39
|
+
# @return [Float]
|
|
40
|
+
#
|
|
41
|
+
def weight
|
|
42
|
+
read_attr :weight, :to_f
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
##
|
|
46
|
+
#
|
|
47
|
+
# The weight attribute takes a positive (floating point) number
|
|
48
|
+
# NOTE: the standard says a format of "n" is valid (eg. an Integer)
|
|
49
|
+
# TODO: possibly support string and check to see if its a valid digit with regex...
|
|
50
|
+
#
|
|
51
|
+
# @param [Numeric] w
|
|
52
|
+
#
|
|
53
|
+
def weight=(w)
|
|
54
|
+
raise ArgumentError, "A Item's weight attribute must be a positive floating point number" unless w.to_s.match(/[^0-9\.]/) == nil and w.to_f >= 0
|
|
55
|
+
self[:weight] = w
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
##
|
|
59
|
+
#
|
|
60
|
+
# The repeat attribute
|
|
61
|
+
#
|
|
62
|
+
# @return [String]
|
|
63
|
+
#
|
|
64
|
+
def repeat
|
|
65
|
+
repeat = read_attr :repeat
|
|
66
|
+
return nil unless repeat
|
|
67
|
+
if repeat.include?('-')
|
|
68
|
+
min, max = repeat.split('-').map &:to_i
|
|
69
|
+
(min || 0)..(max || Inf)
|
|
70
|
+
else
|
|
71
|
+
repeat.to_i
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
##
|
|
76
|
+
#
|
|
77
|
+
# TODO: Raise ArgumentError after doing checking. See
|
|
78
|
+
# http://www.w3.org/TR/speech-grammar/#S2.5
|
|
79
|
+
#
|
|
80
|
+
# @param [String] r
|
|
81
|
+
#
|
|
82
|
+
def repeat=(r)
|
|
83
|
+
r = "#{r.min}-#{r.max unless r.max == Inf}" if r.is_a?(Range)
|
|
84
|
+
r = r.to_s
|
|
85
|
+
error = ArgumentError.new "A Item's repeat must be 0 or a positive integer"
|
|
86
|
+
|
|
87
|
+
raise error unless r.match(/[^0-9-]/) == nil and r.scan("-").size <= 1
|
|
88
|
+
|
|
89
|
+
raise error if case di = r.index('-')
|
|
90
|
+
when nil
|
|
91
|
+
r.to_i < 0 # must be 0 or a positive number
|
|
92
|
+
when 0
|
|
93
|
+
true # negative numbers are illegal
|
|
94
|
+
else
|
|
95
|
+
if di == r.length - 1 # repeat 'm' or more times, m must be 0 or a positive number
|
|
96
|
+
r[0, r.length - 1].to_i < 0
|
|
97
|
+
else # verify range m,n is valid
|
|
98
|
+
m, n = r.split('-').map &:to_i
|
|
99
|
+
m < 0 || n < m
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
self[:repeat] = r
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
##
|
|
106
|
+
#
|
|
107
|
+
# The optional repeat-prob attribute
|
|
108
|
+
#
|
|
109
|
+
# @return [Float]
|
|
110
|
+
#
|
|
111
|
+
def repeat_prob
|
|
112
|
+
read_attr :'repeat-prob', :to_f
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
##
|
|
116
|
+
# @param [Numeric] ia
|
|
117
|
+
#
|
|
118
|
+
def repeat_prob=(rp)
|
|
119
|
+
raise ArgumentError, "A Item's repeat probablity attribute must be a floating point number between 0.0 and 1.0" unless rp.to_s.match(/[^0-9\.]/) == nil and rp.to_f >= 0 and rp.to_f <= 1.0
|
|
120
|
+
self['repeat-prob'] = rp
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def <<(arg)
|
|
124
|
+
raise InvalidChildError, "A Item can only accept String, Ruleref, Tag or Token as children" unless VALID_CHILD_TYPES.include? arg.class
|
|
125
|
+
super
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def eql?(o)
|
|
129
|
+
super o, :weight, :repeat
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def regexp_content # :nodoc:
|
|
133
|
+
case repeat
|
|
134
|
+
when Range
|
|
135
|
+
"#{super}{#{repeat.min},#{repeat.max unless repeat.max == Inf}}"
|
|
136
|
+
when Integer
|
|
137
|
+
"#{super}{#{repeat}}"
|
|
138
|
+
else
|
|
139
|
+
super
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end # Item
|
|
143
|
+
end # GRXML
|
|
144
|
+
end # RubySpeech
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
module RubySpeech
|
|
2
|
+
module GRXML
|
|
3
|
+
class Match
|
|
4
|
+
attr_accessor :mode, :confidence, :utterance, :interpretation
|
|
5
|
+
|
|
6
|
+
def initialize(options = {})
|
|
7
|
+
options.each_pair { |k, v| self.send :"#{k}=", v }
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def eql?(o)
|
|
11
|
+
o.instance_of?(self.class) && [:mode, :confidence, :utterance, :interpretation].all? { |f| self.__send__(f) == o.__send__(f) }
|
|
12
|
+
end
|
|
13
|
+
alias :== :eql?
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
require 'ruby_speech/ruby_speech'
|
|
2
|
+
|
|
3
|
+
if RUBY_PLATFORM =~ /java/
|
|
4
|
+
require 'jruby'
|
|
5
|
+
com.benlangfeld.ruby_speech.RubySpeechService.new.basicLoad(JRuby.runtime)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
module RubySpeech
|
|
9
|
+
module GRXML
|
|
10
|
+
class Matcher
|
|
11
|
+
UTTERANCE_CONVERTER = Hash.new { |hash, key| hash[key] = key }
|
|
12
|
+
UTTERANCE_CONVERTER['*'] = 'star'
|
|
13
|
+
UTTERANCE_CONVERTER['#'] = 'pound'
|
|
14
|
+
|
|
15
|
+
attr_reader :grammar
|
|
16
|
+
|
|
17
|
+
def initialize(grammar)
|
|
18
|
+
@grammar = grammar
|
|
19
|
+
prepare_grammar
|
|
20
|
+
compile_regex regexp_content
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
##
|
|
24
|
+
# Checks the grammar for a match against an input string
|
|
25
|
+
#
|
|
26
|
+
# @param [String] other the input string to check for a match with the grammar
|
|
27
|
+
#
|
|
28
|
+
# @return [NoMatch, PotentialMatch, Match, MaxMatch] depending on the result of a match attempt. A potential match indicates that the buffer is valid, but incomplete. A MaxMatch is differentiated from a Match in that it cannot accept further input. If a match can be found, it will be returned with appropriate mode/confidence/utterance and interpretation attributes
|
|
29
|
+
#
|
|
30
|
+
# @example A grammar that takes a 4 digit pin terminated by hash, or the *9 escape sequence
|
|
31
|
+
# ```ruby
|
|
32
|
+
# grammar = RubySpeech::GRXML.draw :mode => :dtmf, :root => 'pin' do
|
|
33
|
+
# rule :id => 'digit' do
|
|
34
|
+
# one_of do
|
|
35
|
+
# ('0'..'9').map { |d| item { d } }
|
|
36
|
+
# end
|
|
37
|
+
# end
|
|
38
|
+
#
|
|
39
|
+
# rule :id => 'pin', :scope => 'public' do
|
|
40
|
+
# one_of do
|
|
41
|
+
# item do
|
|
42
|
+
# item :repeat => '4' do
|
|
43
|
+
# ruleref :uri => '#digit'
|
|
44
|
+
# end
|
|
45
|
+
# "#"
|
|
46
|
+
# end
|
|
47
|
+
# item do
|
|
48
|
+
# "\* 9"
|
|
49
|
+
# end
|
|
50
|
+
# end
|
|
51
|
+
# end
|
|
52
|
+
# end
|
|
53
|
+
#
|
|
54
|
+
# matcher = RubySpeech::GRXML::Matcher.new grammar
|
|
55
|
+
#
|
|
56
|
+
# >> matcher.match '*9'
|
|
57
|
+
# => #<RubySpeech::GRXML::Match:0x00000100ae5d98
|
|
58
|
+
# @mode = :dtmf,
|
|
59
|
+
# @confidence = 1,
|
|
60
|
+
# @utterance = "*9",
|
|
61
|
+
# @interpretation = "*9"
|
|
62
|
+
# >
|
|
63
|
+
# >> matcher.match '1234#'
|
|
64
|
+
# => #<RubySpeech::GRXML::Match:0x00000100b7e020
|
|
65
|
+
# @mode = :dtmf,
|
|
66
|
+
# @confidence = 1,
|
|
67
|
+
# @utterance = "1234#",
|
|
68
|
+
# @interpretation = "1234#"
|
|
69
|
+
# >
|
|
70
|
+
# >> matcher.match '5678#'
|
|
71
|
+
# => #<RubySpeech::GRXML::Match:0x00000101218688
|
|
72
|
+
# @mode = :dtmf,
|
|
73
|
+
# @confidence = 1,
|
|
74
|
+
# @utterance = "5678#",
|
|
75
|
+
# @interpretation = "5678#"
|
|
76
|
+
# >
|
|
77
|
+
# >> matcher.match '1111#'
|
|
78
|
+
# => #<RubySpeech::GRXML::Match:0x000001012f69d8
|
|
79
|
+
# @mode = :dtmf,
|
|
80
|
+
# @confidence = 1,
|
|
81
|
+
# @utterance = "1111#",
|
|
82
|
+
# @interpretation = "1111#"
|
|
83
|
+
# >
|
|
84
|
+
# >> matcher.match '111'
|
|
85
|
+
# => #<RubySpeech::GRXML::NoMatch:0x00000101371660>
|
|
86
|
+
# ```
|
|
87
|
+
#
|
|
88
|
+
def match(buffer)
|
|
89
|
+
find_match buffer.dup
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def regexp_content
|
|
95
|
+
'^' + grammar.root_rule.children.map(&:regexp_content).join + '$'
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def prepare_grammar
|
|
99
|
+
grammar.inline!
|
|
100
|
+
grammar.tokenize!
|
|
101
|
+
grammar.normalize_whitespace
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def match_for_buffer(buffer, maximal = false)
|
|
105
|
+
match_class = maximal ? MaxMatch : Match
|
|
106
|
+
match_class.new mode: grammar.mode,
|
|
107
|
+
confidence: grammar.dtmf? ? 1 : 0,
|
|
108
|
+
utterance: buffer,
|
|
109
|
+
interpretation: interpret_utterance(buffer)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def interpret_utterance(utterance)
|
|
113
|
+
find_tag(utterance) || utterance.chars.inject([]) do |array, digit|
|
|
114
|
+
array << "dtmf-#{UTTERANCE_CONVERTER[digit]}"
|
|
115
|
+
end.join(' ')
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def find_tag(utterance)
|
|
119
|
+
match = /#{regexp_content}/.match(utterance)
|
|
120
|
+
return if match.captures.all?(&:nil?)
|
|
121
|
+
last_capture_index = match.captures.size - 1 - match.captures.reverse.find_index { |item| !item.nil? }
|
|
122
|
+
match.names[last_capture_index]
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
module RubySpeech
|
|
2
|
+
module GRXML
|
|
3
|
+
##
|
|
4
|
+
#
|
|
5
|
+
# The one-of element is one of the valid expansion elements for the SGR rule element
|
|
6
|
+
#
|
|
7
|
+
# http://www.w3.org/TR/speech-grammar/#S2.4 --> XML Form
|
|
8
|
+
#
|
|
9
|
+
# The one-of element has no attributes
|
|
10
|
+
#
|
|
11
|
+
# The one-of element identifies a set of alternative elements. Each alternative expansion is contained in a item element. There must be at least one item element contained within a one-of element.
|
|
12
|
+
#
|
|
13
|
+
# FIXME: Ensure an 'item' element is in the oneof block... this may be at the final draw or when OneOf is called...
|
|
14
|
+
#
|
|
15
|
+
class OneOf < Element
|
|
16
|
+
|
|
17
|
+
register :'one-of'
|
|
18
|
+
|
|
19
|
+
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, Item].freeze
|
|
20
|
+
|
|
21
|
+
def <<(arg)
|
|
22
|
+
raise InvalidChildError, "A OneOf can only accept Item as children" unless VALID_CHILD_TYPES.include? arg.class
|
|
23
|
+
super
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def regexp_content # :nodoc:
|
|
27
|
+
"(#{children.map(&:regexp_content).join '|'})"
|
|
28
|
+
end
|
|
29
|
+
end # OneOf
|
|
30
|
+
end # GRXML
|
|
31
|
+
end # RubySpeech
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
module RubySpeech
|
|
2
|
+
module GRXML
|
|
3
|
+
##
|
|
4
|
+
#
|
|
5
|
+
# A rule definition associates a legal rule expansion with a rulename. The rule definition is also responsible for defining the scope of the rule definition: whether it is local to the grammar in which it is defined or whether it may be referenced within other grammars.
|
|
6
|
+
#
|
|
7
|
+
# http://www.w3.org/TR/speech-grammar/#S3
|
|
8
|
+
# http://www.w3.org/TR/speech-grammar/#S3.1
|
|
9
|
+
#
|
|
10
|
+
# The rule element has two attributes: id and scope. The id attribute is always required; the scope is optional.
|
|
11
|
+
#
|
|
12
|
+
# The id must be unique with-in the grammar document
|
|
13
|
+
#
|
|
14
|
+
# The scope is either "private" or "public". If it is not explicitly declared in a rule definition then the scope defaults to "private".
|
|
15
|
+
#
|
|
16
|
+
#
|
|
17
|
+
class Rule < Element
|
|
18
|
+
include XML::Language
|
|
19
|
+
|
|
20
|
+
register :rule
|
|
21
|
+
|
|
22
|
+
VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, OneOf, Item, Ruleref, Tag, Token].freeze
|
|
23
|
+
|
|
24
|
+
##
|
|
25
|
+
#
|
|
26
|
+
# The id attribute is the unique name to identify the rule
|
|
27
|
+
#
|
|
28
|
+
#
|
|
29
|
+
# @return [String]
|
|
30
|
+
#
|
|
31
|
+
def id
|
|
32
|
+
read_attr :id, :to_sym
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
# @param [String] ia
|
|
37
|
+
#
|
|
38
|
+
def id=(ia)
|
|
39
|
+
self[:id] = ia
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
##
|
|
43
|
+
#
|
|
44
|
+
# The scope attribute is optional...
|
|
45
|
+
#
|
|
46
|
+
# @return [String]
|
|
47
|
+
#
|
|
48
|
+
def scope
|
|
49
|
+
read_attr :scope, :to_sym
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
##
|
|
53
|
+
#
|
|
54
|
+
# The scope attribute should only be "private" or "public"
|
|
55
|
+
#
|
|
56
|
+
# @param [String] ia
|
|
57
|
+
#
|
|
58
|
+
def scope=(sc)
|
|
59
|
+
raise ArgumentError, "A Rule's scope can only be 'public' or 'private'" unless %w{public private}.include?(sc.to_s)
|
|
60
|
+
self[:scope] = sc
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def <<(arg)
|
|
64
|
+
raise InvalidChildError, "A Rule can only accept OneOf, Item, Ruleref, Tag, or Token as children" unless VALID_CHILD_TYPES.include? arg.class
|
|
65
|
+
super
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def eql?(o)
|
|
69
|
+
super o, :id, :scope, :language
|
|
70
|
+
end
|
|
71
|
+
end # Rule
|
|
72
|
+
end # GRXML
|
|
73
|
+
end # RubySpeech
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module RubySpeech
|
|
2
|
+
module GRXML
|
|
3
|
+
##
|
|
4
|
+
#
|
|
5
|
+
# The ruleref element is an empty element which points to another rule expansion in the grammar document.
|
|
6
|
+
#
|
|
7
|
+
# http://www.w3.org/TR/speech-grammar/#S2.2
|
|
8
|
+
#
|
|
9
|
+
# Every rule definition has a local name that must be unique within the scope of the grammar in which it is defined. A rulename must match the "Name" Production of XML 1.0 [XML §2.3] and be a legal XML ID. Section 3.1 documents the rule definition mechanism and the legal naming of rules.
|
|
10
|
+
#
|
|
11
|
+
# The ruleref has three attributes: uri, special and type. There can be one and only one of the uri or special attribute specified on any given ruleref element.
|
|
12
|
+
#
|
|
13
|
+
# The uri attribute contains named identified named rule being referenced
|
|
14
|
+
#
|
|
15
|
+
# optional 'type' attribute specifies the media type for the uri
|
|
16
|
+
#
|
|
17
|
+
class Ruleref < Element
|
|
18
|
+
|
|
19
|
+
register :ruleref
|
|
20
|
+
|
|
21
|
+
##
|
|
22
|
+
# XML URI: in the XML Form of this specification any URI is provided as an attribute to an element; for example the ruleref and lexicon elements.
|
|
23
|
+
#
|
|
24
|
+
# @return [String]
|
|
25
|
+
#
|
|
26
|
+
def uri
|
|
27
|
+
read_attr :uri
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
##
|
|
31
|
+
# @param [String]
|
|
32
|
+
#
|
|
33
|
+
# @raises ArgumentError if t is nota positive numeric value
|
|
34
|
+
#
|
|
35
|
+
def uri=(u)
|
|
36
|
+
raise ArgumentError, "A Ruleref can only take uri or special" if special
|
|
37
|
+
self[:uri] = u
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
##
|
|
41
|
+
# special...
|
|
42
|
+
#
|
|
43
|
+
# @return [String]
|
|
44
|
+
#
|
|
45
|
+
def special
|
|
46
|
+
read_attr :special
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
##
|
|
50
|
+
# @param [String]
|
|
51
|
+
#
|
|
52
|
+
# TODO: raise ArgumentError if not a valid special...
|
|
53
|
+
#
|
|
54
|
+
def special=(sp)
|
|
55
|
+
raise ArgumentError, "A Ruleref can only take uri or special" if uri
|
|
56
|
+
raise ArgumentError, "The Ruleref#special method only takes :NULL, :VOID, and :GARBAGE" unless %w{NULL VOID GARBAGE}.include? sp.to_s
|
|
57
|
+
self[:special] = sp
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def <<(*args)
|
|
61
|
+
raise InvalidChildError, "A Ruleref cannot contain children"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def eql?(o)
|
|
65
|
+
super o, :uri, :special
|
|
66
|
+
end
|
|
67
|
+
end # Rule
|
|
68
|
+
end # GRXML
|
|
69
|
+
end # RubySpeech
|