ruby_speech 2.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/.gitignore +12 -0
  2. data/.rspec +3 -0
  3. data/.travis.yml +17 -0
  4. data/CHANGELOG.md +144 -0
  5. data/Gemfile +3 -0
  6. data/Guardfile +9 -0
  7. data/LICENSE.md +20 -0
  8. data/README.md +314 -0
  9. data/Rakefile +34 -0
  10. data/assets/grammar-core.xsd +317 -0
  11. data/assets/grammar.xsd +37 -0
  12. data/assets/synthesis-core.xsd +445 -0
  13. data/assets/synthesis.xsd +63 -0
  14. data/assets/xml.xsd +287 -0
  15. data/ext/ruby_speech/RubySpeechGRXMLMatcher.java +64 -0
  16. data/ext/ruby_speech/RubySpeechService.java +23 -0
  17. data/ext/ruby_speech/extconf.rb +7 -0
  18. data/ext/ruby_speech/ruby_speech.c +97 -0
  19. data/lib/ruby_speech/generic_element.rb +169 -0
  20. data/lib/ruby_speech/grxml/element.rb +29 -0
  21. data/lib/ruby_speech/grxml/grammar.rb +189 -0
  22. data/lib/ruby_speech/grxml/item.rb +144 -0
  23. data/lib/ruby_speech/grxml/match.rb +16 -0
  24. data/lib/ruby_speech/grxml/matcher.rb +126 -0
  25. data/lib/ruby_speech/grxml/max_match.rb +6 -0
  26. data/lib/ruby_speech/grxml/no_match.rb +10 -0
  27. data/lib/ruby_speech/grxml/one_of.rb +31 -0
  28. data/lib/ruby_speech/grxml/potential_match.rb +10 -0
  29. data/lib/ruby_speech/grxml/rule.rb +73 -0
  30. data/lib/ruby_speech/grxml/ruleref.rb +69 -0
  31. data/lib/ruby_speech/grxml/tag.rb +29 -0
  32. data/lib/ruby_speech/grxml/token.rb +31 -0
  33. data/lib/ruby_speech/grxml.rb +39 -0
  34. data/lib/ruby_speech/nlsml/builder.rb +34 -0
  35. data/lib/ruby_speech/nlsml/document.rb +120 -0
  36. data/lib/ruby_speech/nlsml.rb +18 -0
  37. data/lib/ruby_speech/ruby_speech.jar +0 -0
  38. data/lib/ruby_speech/ssml/audio.rb +47 -0
  39. data/lib/ruby_speech/ssml/break.rb +62 -0
  40. data/lib/ruby_speech/ssml/desc.rb +24 -0
  41. data/lib/ruby_speech/ssml/element.rb +23 -0
  42. data/lib/ruby_speech/ssml/emphasis.rb +44 -0
  43. data/lib/ruby_speech/ssml/mark.rb +43 -0
  44. data/lib/ruby_speech/ssml/p.rb +25 -0
  45. data/lib/ruby_speech/ssml/phoneme.rb +72 -0
  46. data/lib/ruby_speech/ssml/prosody.rb +172 -0
  47. data/lib/ruby_speech/ssml/s.rb +25 -0
  48. data/lib/ruby_speech/ssml/say_as.rb +100 -0
  49. data/lib/ruby_speech/ssml/speak.rb +27 -0
  50. data/lib/ruby_speech/ssml/sub.rb +42 -0
  51. data/lib/ruby_speech/ssml/voice.rb +108 -0
  52. data/lib/ruby_speech/ssml.rb +39 -0
  53. data/lib/ruby_speech/version.rb +3 -0
  54. data/lib/ruby_speech/xml/language.rb +13 -0
  55. data/lib/ruby_speech/xml.rb +11 -0
  56. data/lib/ruby_speech.rb +36 -0
  57. data/ruby_speech.gemspec +42 -0
  58. data/spec/ruby_speech/grxml/grammar_spec.rb +341 -0
  59. data/spec/ruby_speech/grxml/item_spec.rb +192 -0
  60. data/spec/ruby_speech/grxml/match_spec.rb +15 -0
  61. data/spec/ruby_speech/grxml/matcher_spec.rb +688 -0
  62. data/spec/ruby_speech/grxml/max_match_spec.rb +17 -0
  63. data/spec/ruby_speech/grxml/no_match_spec.rb +17 -0
  64. data/spec/ruby_speech/grxml/one_of_spec.rb +49 -0
  65. data/spec/ruby_speech/grxml/potential_match_spec.rb +17 -0
  66. data/spec/ruby_speech/grxml/rule_spec.rb +125 -0
  67. data/spec/ruby_speech/grxml/ruleref_spec.rb +55 -0
  68. data/spec/ruby_speech/grxml/tag_spec.rb +41 -0
  69. data/spec/ruby_speech/grxml/token_spec.rb +62 -0
  70. data/spec/ruby_speech/grxml_spec.rb +339 -0
  71. data/spec/ruby_speech/nlsml_spec.rb +353 -0
  72. data/spec/ruby_speech/ssml/audio_spec.rb +121 -0
  73. data/spec/ruby_speech/ssml/break_spec.rb +100 -0
  74. data/spec/ruby_speech/ssml/desc_spec.rb +57 -0
  75. data/spec/ruby_speech/ssml/emphasis_spec.rb +110 -0
  76. data/spec/ruby_speech/ssml/mark_spec.rb +53 -0
  77. data/spec/ruby_speech/ssml/p_spec.rb +96 -0
  78. data/spec/ruby_speech/ssml/phoneme_spec.rb +65 -0
  79. data/spec/ruby_speech/ssml/prosody_spec.rb +309 -0
  80. data/spec/ruby_speech/ssml/s_spec.rb +92 -0
  81. data/spec/ruby_speech/ssml/say_as_spec.rb +71 -0
  82. data/spec/ruby_speech/ssml/speak_spec.rb +166 -0
  83. data/spec/ruby_speech/ssml/sub_spec.rb +57 -0
  84. data/spec/ruby_speech/ssml/voice_spec.rb +200 -0
  85. data/spec/ruby_speech/ssml_spec.rb +285 -0
  86. data/spec/ruby_speech_spec.rb +124 -0
  87. data/spec/spec_helper.rb +21 -0
  88. data/spec/support/match_examples.rb +43 -0
  89. data/spec/support/matchers.rb +46 -0
  90. metadata +405 -0
@@ -0,0 +1,172 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The prosody element permits control of the pitch, speaking rate and volume of the speech output.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.4
7
+ #
8
+ # Although each attribute individually is optional, it is an error if no attributes are specified when the prosody element is used. The "x-foo" attribute value names are intended to be mnemonics for "extra foo". Note also that customary pitch levels and standard pitch ranges may vary significantly by language, as may the meanings of the labelled values for pitch targets and ranges.
9
+ #
10
+ # The duration attribute takes precedence over the rate attribute. The contour attribute takes precedence over the pitch and range attributes.
11
+ #
12
+ # The default value of all prosodic attributes is no change. For example, omitting the rate attribute means that the rate is the same within the element as outside.
13
+ #
14
+ class Prosody < Element
15
+
16
+ register :prosody
17
+
18
+ VALID_PITCHES = [:'x-low', :low, :medium, :high, :'x-high', :default].freeze
19
+ VALID_VOLUMES = [:silent, :'x-soft', :soft, :medium, :loud, :'x-loud', :default].freeze
20
+ VALID_RATES = [:'x-slow', :slow, :medium, :fast, :'x-fast', :default].freeze
21
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
22
+
23
+ ##
24
+ # The baseline pitch for the contained text. Although the exact meaning of "baseline pitch" will vary across synthesis processors, increasing/decreasing this value will typically increase/decrease the approximate pitch of the output. Legal values are: a number followed by "Hz", a relative change or "x-low", "low", "medium", "high", "x-high", or "default". Labels "x-low" through "x-high" represent a sequence of monotonically non-decreasing pitch levels.
25
+ #
26
+ # @return [Symbol, String]
27
+ #
28
+ def pitch
29
+ value = read_attr :pitch
30
+ return unless value
31
+ if value.include?('Hz')
32
+ value
33
+ elsif VALID_PITCHES.include?(value.to_sym)
34
+ value.to_sym
35
+ end
36
+ end
37
+
38
+ ##
39
+ # @param [Symbol, String] p
40
+ #
41
+ # @raises ArgumentError if p is not a string that contains 'Hz' or one of VALID_PITCHES
42
+ #
43
+ def pitch=(p)
44
+ hz = p.is_a?(String) && p.include?('Hz') && p.to_f > 0
45
+ raise ArgumentError, "You must specify a valid pitch (\"[positive-number]Hz\", #{VALID_PITCHES.map(&:inspect).join ', '})" unless hz || VALID_PITCHES.include?(p)
46
+ self[:pitch] = p
47
+ end
48
+
49
+ ##
50
+ # The actual pitch contour for the contained text.
51
+ #
52
+ # The pitch contour is defined as a set of white space-separated targets at specified time positions in the speech output. The algorithm for interpolating between the targets is processor-specific. In each pair of the form (time position,target), the first value is a percentage of the period of the contained text (a number followed by "%") and the second value is the value of the pitch attribute (a number followed by "Hz", a relative change, or a label value). Time position values outside 0% to 100% are ignored. If a pitch value is not defined for 0% or 100% then the nearest pitch target is copied. All relative values for the pitch are relative to the pitch value just before the contained text.
53
+ #
54
+ # @return [Symbol]
55
+ #
56
+ def contour
57
+ read_attr :contour
58
+ end
59
+
60
+ ##
61
+ # @param [String] v
62
+ #
63
+ def contour=(v)
64
+ self[:contour] = v
65
+ end
66
+
67
+ ##
68
+ # The pitch range (variability) for the contained text. Although the exact meaning of "pitch range" will vary across synthesis processors, increasing/decreasing this value will typically increase/decrease the dynamic range of the output pitch. Legal values are: a number followed by "Hz", a relative change or "x-low", "low", "medium", "high", "x-high", or "default". Labels "x-low" through "x-high" represent a sequence of monotonically non-decreasing pitch ranges.
69
+ #
70
+ # @return [Symbol]
71
+ #
72
+ def range
73
+ value = read_attr :range
74
+ return unless value
75
+ if value.include?('Hz')
76
+ value
77
+ elsif VALID_PITCHES.include?(value.to_sym)
78
+ value.to_sym
79
+ end
80
+ end
81
+
82
+ ##
83
+ # @param [Symbol, String] p
84
+ #
85
+ # @raises ArgumentError if p is not a string that contains 'Hz' or one of VALID_PITCHES
86
+ #
87
+ def range=(p)
88
+ hz = p.is_a?(String) && p.include?('Hz') && p.to_f > 0
89
+ raise ArgumentError, "You must specify a valid range (\"[positive-number]Hz\", #{VALID_PITCHES.map(&:inspect).join ', '})" unless hz || VALID_PITCHES.include?(p)
90
+ self[:range] = p
91
+ end
92
+
93
+ ##
94
+ # A change in the speaking rate for the contained text. Legal values are: a relative change or "x-slow", "slow", "medium", "fast", "x-fast", or "default". Labels "x-slow" through "x-fast" represent a sequence of monotonically non-decreasing speaking rates. When a number is used to specify a relative change it acts as a multiplier of the default rate. For example, a value of 1 means no change in speaking rate, a value of 2 means a speaking rate twice the default rate, and a value of 0.5 means a speaking rate of half the default rate. The default rate for a voice depends on the language and dialect and on the personality of the voice. The default rate for a voice should be such that it is experienced as a normal speaking rate for the voice when reading aloud text. Since voices are processor-specific, the default rate will be as well.
95
+ #
96
+ # @return [Symbol, Float]
97
+ #
98
+ def rate
99
+ value = read_attr :rate
100
+ return unless value
101
+ if VALID_RATES.include?(value.to_sym)
102
+ value.to_sym
103
+ else
104
+ value.to_f
105
+ end
106
+ end
107
+
108
+ ##
109
+ # @param [Symbol, Numeric] v
110
+ #
111
+ # @raises ArgumentError if v is not either a positive Numeric or one of VALID_RATES
112
+ #
113
+ def rate=(v)
114
+ raise ArgumentError, "You must specify a valid rate ([positive-number](multiplier), #{VALID_RATES.map(&:inspect).join ', '})" unless (v.is_a?(Numeric) && v >= 0) || VALID_RATES.include?(v)
115
+ self[:rate] = v
116
+ end
117
+
118
+ ##
119
+ # A value in seconds for the desired time to take to read the element contents.
120
+ #
121
+ # @return [Integer]
122
+ #
123
+ def duration
124
+ read_attr :duration, :to_i
125
+ end
126
+
127
+ ##
128
+ # @param [Numeric] t
129
+ #
130
+ # @raises ArgumentError if t is not a positive numeric value
131
+ #
132
+ def duration=(t)
133
+ raise ArgumentError, "You must specify a valid duration (positive float value in seconds)" unless t.is_a?(Numeric) && t >= 0
134
+ self[:duration] = "#{t}s"
135
+ end
136
+
137
+ ##
138
+ # The volume for the contained text in the range 0.0 to 100.0 (higher values are louder and specifying a value of zero is equivalent to specifying "silent"). Legal values are: number, a relative change or "silent", "x-soft", "soft", "medium", "loud", "x-loud", or "default". The volume scale is linear amplitude. The default is 100.0. Labels "silent" through "x-loud" represent a sequence of monotonically non-decreasing volume levels.
139
+ #
140
+ # @return [Symbol, Float]
141
+ #
142
+ def volume
143
+ value = read_attr :volume
144
+ return unless value
145
+ if VALID_VOLUMES.include?(value.to_sym)
146
+ value.to_sym
147
+ else
148
+ value.to_f
149
+ end
150
+ end
151
+
152
+ ##
153
+ # @param [Numeric, Symbol] v
154
+ #
155
+ # @raises ArgumentError if v is not one of VALID_VOLUMES or a numeric value between 0.0 and 100.0
156
+ #
157
+ def volume=(v)
158
+ raise ArgumentError, "You must specify a valid volume ([positive-number](0.0 -> 100.0), #{VALID_VOLUMES.map(&:inspect).join ', '})" unless (v.is_a?(Numeric) && (0..100).include?(v)) || VALID_VOLUMES.include?(v)
159
+ self[:volume] = v
160
+ end
161
+
162
+ def <<(arg)
163
+ raise InvalidChildError, "A Prosody can only accept String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
164
+ super
165
+ end
166
+
167
+ def eql?(o)
168
+ super o, :pitch, :contour, :range, :rate, :duration, :volume
169
+ end
170
+ end # Prosody
171
+ end # SSML
172
+ end # RubySpeech
@@ -0,0 +1,25 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # As s element represents a sentence.
5
+ # The use of s elements is optional. Where text occurs without an enclosing s element the synthesis processor should attempt to determine the structure using language-specific knowledge of the format of plain text.
6
+ #
7
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.7
8
+ #
9
+ class S < Element
10
+
11
+ register :s
12
+
13
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice].freeze
14
+
15
+ def <<(arg)
16
+ raise InvalidChildError, "An S can only accept String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
17
+ super
18
+ end
19
+
20
+ def eql?(o)
21
+ super o, :language
22
+ end
23
+ end # S
24
+ end # SSML
25
+ end # RubySpeech
@@ -0,0 +1,100 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The say-as element allows the author to indicate information on the type of text construct contained within the element and to help specify the level of detail for rendering the contained text.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.8
7
+ #
8
+ # Defining a comprehensive set of text format types is difficult because of the variety of languages that have to be considered and because of the innate flexibility of written languages. SSML only specifies the say-as element, its attributes, and their purpose. It does not enumerate the possible values for the attributes. The Working Group expects to produce a separate document that will define standard values and associated normative behavior for these values. Examples given here are only for illustrating the purpose of the element and the attributes.
9
+ #
10
+ # The say-as element has three attributes: interpret-as, format, and detail. The interpret-as attribute is always required; the other two attributes are optional. The legal values for the format attribute depend on the value of the interpret-as attribute.
11
+ #
12
+ # The say-as element can only contain text to be rendered.
13
+ #
14
+ # When specified, the interpret-as and format values are to be interpreted by the synthesis processor as hints provided by the markup document author to aid text normalization and pronunciation.
15
+ #
16
+ # In all cases, the text enclosed by any say-as element is intended to be a standard, orthographic form of the language currently in context. A synthesis processor should be able to support the common, orthographic forms of the specified language for every content type that it supports.
17
+ #
18
+ # When the content of the say-as element contains additional text next to the content that is in the indicated format and interpret-as type, then this additional text must be rendered. The processor may make the rendering of the additional text dependent on the interpret-as type of the element in which it appears.
19
+ # When the content of the say-as element contains no content in the indicated interpret-as type or format, the processor must render the content either as if the format attribute were not present, or as if the interpret-as attribute were not present, or as if neither the format nor interpret-as attributes were present. The processor should also notify the environment of the mismatch.
20
+ #
21
+ # Indicating the content type or format does not necessarily affect the way the information is pronounced. A synthesis processor should pronounce the contained text in a manner in which such content is normally produced for the language.
22
+ #
23
+ class SayAs < Element
24
+
25
+ register :'say-as'
26
+
27
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
28
+
29
+ ##
30
+ #
31
+ # The interpret-as attribute indicates the content type of the contained text construct. Specifying the content type helps the synthesis processor to distinguish and interpret text constructs that may be rendered in different ways depending on what type of information is intended.
32
+ #
33
+ # When the value for the interpret-as attribute is unknown or unsupported by a processor, it must render the contained text as if no interpret-as value were specified.
34
+ #
35
+ # @return [String]
36
+ #
37
+ def interpret_as
38
+ read_attr :'interpret-as'
39
+ end
40
+
41
+ ##
42
+ # @param [String] ia
43
+ #
44
+ def interpret_as=(ia)
45
+ self['interpret-as'] = ia
46
+ end
47
+
48
+ ##
49
+ #
50
+ # Can give further hints on the precise formatting of the contained text for content types that may have ambiguous formats.
51
+ #
52
+ # When the value for the format attribute is unknown or unsupported by a processor, it must render the contained text as if no format value were specified, and should render it using the interpret-as value that is specified.
53
+ #
54
+ # @return [String]
55
+ #
56
+ def format
57
+ read_attr :format
58
+ end
59
+
60
+ ##
61
+ # @param [String] format
62
+ #
63
+ def format=(format)
64
+ self[:format] = format
65
+ end
66
+
67
+ ##
68
+ #
69
+ # The detail attribute is an optional attribute that indicates the level of detail to be read aloud or rendered. Every value of the detail attribute must render all of the informational content in the contained text; however, specific values for the detail attribute can be used to render content that is not usually informational in running text but may be important to render for specific purposes. For example, a synthesis processor will usually render punctuations through appropriate changes in prosody. Setting a higher level of detail may be used to speak punctuations explicitly, e.g. for reading out coded part numbers or pieces of software code.
70
+ #
71
+ # The detail attribute can be used for all interpret-as types.
72
+ #
73
+ # If the detail attribute is not specified, the level of detail that is produced by the synthesis processor depends on the text content and the language.
74
+ #
75
+ # When the value for the detail attribute is unknown or unsupported by a processor, it must render the contained text as if no value were specified for the detail attribute.
76
+ #
77
+ # @return [String]
78
+ #
79
+ def detail
80
+ read_attr :detail
81
+ end
82
+
83
+ ##
84
+ # @param [String] detail
85
+ #
86
+ def detail=(detail)
87
+ self[:detail] = detail
88
+ end
89
+
90
+ def <<(arg)
91
+ raise InvalidChildError, "A SayAs can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
92
+ super
93
+ end
94
+
95
+ def eql?(o)
96
+ super o, :interpret_as, :format, :detail
97
+ end
98
+ end # SayAs
99
+ end # SSML
100
+ end # RubySpeech
@@ -0,0 +1,27 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The Speech Synthesis Markup Language is an XML application. The root element is speak.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.1
7
+ #
8
+ class Speak < Element
9
+ include XML::Language
10
+
11
+ register :speak
12
+
13
+ self.defaults = { :version => '1.0', :language => "en-US" }
14
+
15
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
16
+
17
+ def <<(arg)
18
+ raise InvalidChildError, "A Speak can only accept String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
19
+ super
20
+ end
21
+
22
+ def eql?(o)
23
+ super o, :language, :base_uri
24
+ end
25
+ end # Speak
26
+ end # SSML
27
+ end # RubySpeech
@@ -0,0 +1,42 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The sub element is employed to indicate that the text in the alias attribute value replaces the contained text for pronunciation. This allows a document to contain both a spoken and written form. The required alias attribute specifies the string to be spoken instead of the enclosed string. The processor should apply text normalization to the alias value.
5
+ #
6
+ # The sub element can only contain text (no elements).
7
+ #
8
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.10
9
+ #
10
+ class Sub < Element
11
+
12
+ register :sub
13
+
14
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
15
+
16
+ ##
17
+ # Indicates the string to be spoken instead of the enclosed string
18
+ #
19
+ # @return [String]
20
+ #
21
+ def alias
22
+ read_attr :alias
23
+ end
24
+
25
+ ##
26
+ # @param [String] other the string to be spoken instead of the enclosed string
27
+ #
28
+ def alias=(other)
29
+ self[:alias] = other
30
+ end
31
+
32
+ def <<(arg)
33
+ raise InvalidChildError, "A Sub can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
34
+ super
35
+ end
36
+
37
+ def eql?(o)
38
+ super o, :alias
39
+ end
40
+ end # Sub
41
+ end # SSML
42
+ end # RubySpeech
@@ -0,0 +1,108 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The voice element is a production element that requests a change in speaking voice.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.1
7
+ #
8
+ class Voice < Element
9
+ include XML::Language
10
+
11
+ register :voice
12
+
13
+ VALID_GENDERS = [:male, :female, :neutral].freeze
14
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
15
+
16
+ ##
17
+ # Indicates the preferred gender of the voice to speak the contained text. Enumerated values are: "male", "female", "neutral".
18
+ #
19
+ # @return [Symbol]
20
+ #
21
+ def gender
22
+ read_attr :gender, :to_sym
23
+ end
24
+
25
+ ##
26
+ # @param [Symbol] g the gender selected from VALID_GENDERS
27
+ #
28
+ # @raises ArgumentError if g is not one of VALID_GENDERS
29
+ #
30
+ def gender=(g)
31
+ raise ArgumentError, "You must specify a valid gender (#{VALID_GENDERS.map(&:inspect).join ', '})" unless VALID_GENDERS.include? g
32
+ self[:gender] = g
33
+ end
34
+
35
+ ##
36
+ # Indicates the preferred age in years (since birth) of the voice to speak the contained text.
37
+ #
38
+ # @return [Integer]
39
+ #
40
+ def age
41
+ read_attr :age, :to_i
42
+ end
43
+
44
+ ##
45
+ # @param [Integer] i the age of the voice
46
+ #
47
+ # @raises ArgumentError if i is not a non-negative integer
48
+ #
49
+ def age=(i)
50
+ raise ArgumentError, "You must specify a valid age (non-negative integer)" unless i.is_a?(Integer) && i >= 0
51
+ self[:age] = i
52
+ end
53
+
54
+ ##
55
+ # Indicates a preferred variant of the other voice characteristics to speak the contained text. (e.g. the second male child voice).
56
+ #
57
+ # @return [Integer]
58
+ #
59
+ def variant
60
+ read_attr :variant, :to_i
61
+ end
62
+
63
+ ##
64
+ # @param [Integer] i the variant of the voice
65
+ #
66
+ # @raises ArgumentError if i is not a non-negative integer
67
+ #
68
+ def variant=(i)
69
+ raise ArgumentError, "You must specify a valid variant (positive integer)" unless i.is_a?(Integer) && i > 0
70
+ self[:variant] = i
71
+ end
72
+
73
+ ##
74
+ # A processor-specific voice name to speak the contained text.
75
+ #
76
+ # @return [String, Array, nil] the name or names of the voice
77
+ #
78
+ def name
79
+ names = read_attr :name
80
+ return unless names
81
+ names = names.split ' '
82
+ case names.count
83
+ when 0 then nil
84
+ when 1 then names.first
85
+ else names
86
+ end
87
+ end
88
+
89
+ ##
90
+ # @param [String, Array] the name or names of the voice. May be an array of names ordered from top preference down. The names must not contain any white space.
91
+ #
92
+ def name=(n)
93
+ # TODO: Raise ArgumentError if names contain whitespace
94
+ n = n.join(' ') if n.is_a? Array
95
+ self[:name] = n
96
+ end
97
+
98
+ def <<(arg)
99
+ raise InvalidChildError, "A Voice can only accept String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
100
+ super
101
+ end
102
+
103
+ def eql?(o)
104
+ super o, :language, :gender, :age, :variant, :name
105
+ end
106
+ end # Voice
107
+ end # SSML
108
+ end # RubySpeech
@@ -0,0 +1,39 @@
1
+ module RubySpeech
2
+ module SSML
3
+ extend ActiveSupport::Autoload
4
+
5
+ eager_autoload do
6
+ autoload :Audio
7
+ autoload :Break
8
+ autoload :Desc
9
+ autoload :Element
10
+ autoload :Emphasis
11
+ autoload :Mark
12
+ autoload :P
13
+ autoload :Phoneme
14
+ autoload :Prosody
15
+ autoload :S
16
+ autoload :SayAs
17
+ autoload :Speak
18
+ autoload :Sub
19
+ autoload :Voice
20
+ end
21
+
22
+ InvalidChildError = Class.new StandardError
23
+
24
+ SSML_NAMESPACE = 'http://www.w3.org/2001/10/synthesis'
25
+
26
+ def self.draw(*args, &block)
27
+ Speak.new(*args).tap do |speak|
28
+ block_return = speak.eval_dsl_block &block
29
+ speak << block_return if block_return.is_a?(String)
30
+ end
31
+ end
32
+
33
+ def self.import(other)
34
+ Element.import other
35
+ end
36
+ end # SSML
37
+ end # RubySpeech
38
+
39
+ ActiveSupport::Autoload.eager_autoload!
@@ -0,0 +1,3 @@
1
+ module RubySpeech
2
+ VERSION = "2.1.0"
3
+ end
@@ -0,0 +1,13 @@
1
+ module RubySpeech
2
+ module XML
3
+ module Language
4
+ def language
5
+ self['xml:lang']
6
+ end
7
+
8
+ def language=(l)
9
+ self['xml:lang'] = l
10
+ end
11
+ end # Language
12
+ end # XML
13
+ end # RubySpeech
@@ -0,0 +1,11 @@
1
+ module RubySpeech
2
+ module XML
3
+ extend ActiveSupport::Autoload
4
+
5
+ eager_autoload do
6
+ autoload :Language
7
+ end
8
+ end # XML
9
+ end # RubySpeech
10
+
11
+ ActiveSupport::Autoload.eager_autoload!
@@ -0,0 +1,36 @@
1
+ %w{
2
+ active_support/dependencies/autoload
3
+ active_support/core_ext/object/blank
4
+ active_support/core_ext/numeric/time
5
+ active_support/core_ext/enumerable
6
+ niceogiri
7
+ }.each { |f| require f }
8
+
9
+ module RubySpeech
10
+ extend ActiveSupport::Autoload
11
+
12
+ autoload :Version
13
+
14
+ eager_autoload do
15
+ autoload :GenericElement
16
+ autoload :SSML
17
+ autoload :GRXML
18
+ autoload :NLSML
19
+ autoload :XML
20
+ end
21
+
22
+ def self.parse(string)
23
+ document = Nokogiri::XML.parse string, nil, nil, Nokogiri::XML::ParseOptions::NOBLANKS
24
+ namespace = document.root.namespace
25
+ case namespace && namespace.href
26
+ when SSML::SSML_NAMESPACE
27
+ SSML::Element.import string
28
+ when GRXML::GRXML_NAMESPACE
29
+ GRXML::Element.import string
30
+ when NLSML::NLSML_NAMESPACE, nil
31
+ NLSML::Document.new document
32
+ end
33
+ end
34
+ end
35
+
36
+ ActiveSupport::Autoload.eager_autoload!
@@ -0,0 +1,42 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "ruby_speech/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "ruby_speech"
7
+ s.version = RubySpeech::VERSION
8
+ s.authors = ["Ben Langfeld"]
9
+ s.email = ["ben@langfeld.me"]
10
+ s.homepage = "https://github.com/benlangfeld/ruby_speech"
11
+ s.summary = %q{A Ruby library for TTS & ASR document preparation}
12
+ s.description = %q{Prepare SSML and GRXML documents with ease}
13
+
14
+ s.rubyforge_project = "ruby_speech"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ if RUBY_PLATFORM =~ /java/
22
+ s.platform = "java"
23
+ s.files << "lib/ruby_speech/ruby_speech.jar"
24
+ else
25
+ s.extensions = ['ext/ruby_speech/extconf.rb']
26
+ end
27
+
28
+ s.add_runtime_dependency %q<niceogiri>, ["~> 1.1", ">= 1.1.2"]
29
+ s.add_runtime_dependency %q<nokogiri>, ["~> 1.5", ">= 1.5.6"]
30
+ s.add_runtime_dependency %q<activesupport>, [">= 3.0.7"]
31
+
32
+ s.add_development_dependency %q<bundler>, [">= 1.0.0"]
33
+ s.add_development_dependency %q<rspec>, [">= 2.7"]
34
+ s.add_development_dependency %q<ci_reporter>, [">= 1.6.3"]
35
+ s.add_development_dependency %q<yard>, [">= 0.7.0"]
36
+ s.add_development_dependency %q<rake>, [">= 0"]
37
+ s.add_development_dependency %q<guard>, [">= 0.9.0"]
38
+ s.add_development_dependency %q<guard-rspec>, [">= 0"]
39
+ s.add_development_dependency %q<ruby_gntp>, [">= 0"]
40
+ s.add_development_dependency %q<guard-rake>, [">= 0"]
41
+ s.add_development_dependency %q<rake-compiler>, [">= 0"]
42
+ end