ruby_speech 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/CHANGELOG.md +15 -0
  2. data/README.md +108 -14
  3. data/lib/ruby_speech/generic_element.rb +71 -10
  4. data/lib/ruby_speech/grxml.rb +4 -1
  5. data/lib/ruby_speech/grxml/element.rb +4 -0
  6. data/lib/ruby_speech/grxml/grammar.rb +177 -46
  7. data/lib/ruby_speech/grxml/item.rb +12 -11
  8. data/lib/ruby_speech/grxml/match.rb +16 -0
  9. data/lib/ruby_speech/grxml/no_match.rb +10 -0
  10. data/lib/ruby_speech/grxml/one_of.rb +4 -11
  11. data/lib/ruby_speech/grxml/rule.rb +0 -11
  12. data/lib/ruby_speech/grxml/ruleref.rb +0 -11
  13. data/lib/ruby_speech/grxml/tag.rb +0 -11
  14. data/lib/ruby_speech/grxml/token.rb +8 -11
  15. data/lib/ruby_speech/ssml.rb +6 -0
  16. data/lib/ruby_speech/ssml/audio.rb +1 -12
  17. data/lib/ruby_speech/ssml/break.rb +0 -11
  18. data/lib/ruby_speech/ssml/desc.rb +24 -0
  19. data/lib/ruby_speech/ssml/emphasis.rb +1 -12
  20. data/lib/ruby_speech/ssml/mark.rb +43 -0
  21. data/lib/ruby_speech/ssml/p.rb +25 -0
  22. data/lib/ruby_speech/ssml/phoneme.rb +72 -0
  23. data/lib/ruby_speech/ssml/prosody.rb +1 -12
  24. data/lib/ruby_speech/ssml/s.rb +25 -0
  25. data/lib/ruby_speech/ssml/say_as.rb +0 -11
  26. data/lib/ruby_speech/ssml/speak.rb +2 -44
  27. data/lib/ruby_speech/ssml/sub.rb +42 -0
  28. data/lib/ruby_speech/ssml/voice.rb +1 -12
  29. data/lib/ruby_speech/version.rb +1 -1
  30. data/spec/ruby_speech/grxml/grammar_spec.rb +478 -35
  31. data/spec/ruby_speech/grxml/item_spec.rb +5 -2
  32. data/spec/ruby_speech/grxml/match_spec.rb +49 -0
  33. data/spec/ruby_speech/grxml/no_match_spec.rb +17 -0
  34. data/spec/ruby_speech/grxml/one_of_spec.rb +1 -1
  35. data/spec/ruby_speech/grxml/rule_spec.rb +1 -1
  36. data/spec/ruby_speech/grxml/ruleref_spec.rb +1 -1
  37. data/spec/ruby_speech/grxml/tag_spec.rb +1 -1
  38. data/spec/ruby_speech/grxml/token_spec.rb +11 -1
  39. data/spec/ruby_speech/grxml_spec.rb +64 -5
  40. data/spec/ruby_speech/ssml/audio_spec.rb +5 -6
  41. data/spec/ruby_speech/ssml/break_spec.rb +1 -1
  42. data/spec/ruby_speech/ssml/desc_spec.rb +57 -0
  43. data/spec/ruby_speech/ssml/emphasis_spec.rb +1 -4
  44. data/spec/ruby_speech/ssml/mark_spec.rb +53 -0
  45. data/spec/ruby_speech/ssml/p_spec.rb +96 -0
  46. data/spec/ruby_speech/ssml/phoneme_spec.rb +65 -0
  47. data/spec/ruby_speech/ssml/prosody_spec.rb +9 -4
  48. data/spec/ruby_speech/ssml/s_spec.rb +92 -0
  49. data/spec/ruby_speech/ssml/say_as_spec.rb +1 -1
  50. data/spec/ruby_speech/ssml/speak_spec.rb +1 -6
  51. data/spec/ruby_speech/ssml/sub_spec.rb +57 -0
  52. data/spec/ruby_speech/ssml/voice_spec.rb +1 -6
  53. data/spec/spec_helper.rb +0 -4
  54. data/spec/support/matchers.rb +13 -53
  55. metadata +200 -113
@@ -31,17 +31,6 @@ module RubySpeech
31
31
 
32
32
  VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, OneOf, Item, String, Ruleref, Tag, Token].freeze
33
33
 
34
- ##
35
- # Create a new GRXML item element
36
- #
37
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
38
- #
39
- # @return [Item] an element for use in an GRXML document
40
- #
41
- def self.new(atts = {}, &block)
42
- super 'item', atts, &block
43
- end
44
-
45
34
  ##
46
35
  #
47
36
  # The optional weight attribute
@@ -83,6 +72,7 @@ module RubySpeech
83
72
  # @param [String] r
84
73
  #
85
74
  def repeat=(r)
75
+ r = "#{r.min}-#{r.max}" if r.is_a?(Range)
86
76
  r = r.to_s
87
77
  error = ArgumentError.new "A Item's repeat must be 0 or a positive integer"
88
78
 
@@ -130,6 +120,17 @@ module RubySpeech
130
120
  def eql?(o)
131
121
  super o, :weight, :repeat
132
122
  end
123
+
124
+ def regexp_content # :nodoc:
125
+ return super unless repeat
126
+
127
+ if repeat.include?('-')
128
+ min, max = repeat.split '-'
129
+ "#{super}{#{min},#{max}}"
130
+ else
131
+ "#{super}{#{repeat}}"
132
+ end
133
+ end
133
134
  end # Item
134
135
  end # GRXML
135
136
  end # RubySpeech
@@ -0,0 +1,16 @@
1
+ module RubySpeech
2
+ module GRXML
3
+ class Match
4
+ attr_accessor :mode, :confidence, :utterance, :interpretation
5
+
6
+ def initialize(options = {})
7
+ options.each_pair { |k, v| self.send :"#{k}=", v }
8
+ end
9
+
10
+ def eql?(o)
11
+ o.is_a?(self.class) && [:mode, :confidence, :utterance, :interpretation].all? { |f| self.__send__(f) == o.__send__(f) }
12
+ end
13
+ alias :== :eql?
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,10 @@
1
+ module RubySpeech
2
+ module GRXML
3
+ class NoMatch
4
+ def eql?(o)
5
+ o.is_a? self.class
6
+ end
7
+ alias :== :eql?
8
+ end
9
+ end
10
+ end
@@ -18,21 +18,14 @@ module RubySpeech
18
18
 
19
19
  VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, Item].freeze
20
20
 
21
- ##
22
- # Create a new GRXML one-of element
23
- #
24
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
25
- #
26
- # @return [OneOf] an element for use in an GRXML document
27
- #
28
- def self.new(atts = {}, &block)
29
- super 'one-of', atts, &block
30
- end
31
-
32
21
  def <<(arg)
33
22
  raise InvalidChildError, "A OneOf can only accept Item as children" unless VALID_CHILD_TYPES.include? arg.class
34
23
  super
35
24
  end
25
+
26
+ def regexp_content # :nodoc:
27
+ "(#{children.map(&:regexp_content).join '|'})"
28
+ end
36
29
  end # OneOf
37
30
  end # GRXML
38
31
  end # RubySpeech
@@ -21,17 +21,6 @@ module RubySpeech
21
21
 
22
22
  VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, OneOf, Item, Ruleref, Tag, Token].freeze
23
23
 
24
- ##
25
- # Create a new GRXML rule element
26
- #
27
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
28
- #
29
- # @return [Rule] an element for use in an GRXML document
30
- #
31
- def self.new(atts = {}, &block)
32
- super 'rule', atts, &block
33
- end
34
-
35
24
  ##
36
25
  #
37
26
  # The id attribute is the unique name to identify the rule
@@ -18,17 +18,6 @@ module RubySpeech
18
18
 
19
19
  register :ruleref
20
20
 
21
- ##
22
- # Create a new GRXML ruleref element
23
- #
24
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
25
- #
26
- # @return [Ruleref] an element for use in an GRXML document
27
- #
28
- def self.new(atts = {}, &block)
29
- super 'ruleref', atts, &block
30
- end
31
-
32
21
  ##
33
22
  # XML URI: in the XML Form of this specification any URI is provided as an attribute to an element; for example the ruleref and lexicon elements.
34
23
  #
@@ -16,17 +16,6 @@ module RubySpeech
16
16
 
17
17
  VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
18
18
 
19
- ##
20
- # Create a new GRXML tag element
21
- #
22
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
23
- #
24
- # @return [Tag] an element for use in an GRXML document
25
- #
26
- def self.new(atts = {}, &block)
27
- super 'tag', atts, &block
28
- end
29
-
30
19
  def <<(arg)
31
20
  raise InvalidChildError, "A Tag can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
32
21
  super
@@ -14,21 +14,18 @@ module RubySpeech
14
14
 
15
15
  VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
16
16
 
17
- ##
18
- # Create a new GRXML token element
19
- #
20
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
21
- #
22
- # @return [Token] an element for use in an GRXML document
23
- #
24
- def self.new(atts = {}, &block)
25
- super 'token', atts, &block
26
- end
27
-
28
17
  def <<(arg)
29
18
  raise InvalidChildError, "A Token can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
30
19
  super
31
20
  end
21
+
22
+ def normalize_whitespace
23
+ self.content = content.strip.squeeze ' '
24
+ end
25
+
26
+ def regexp_content # :nodoc:
27
+ Regexp.escape content
28
+ end
32
29
  end # Token
33
30
  end # GRXML
34
31
  end # RubySpeech
@@ -5,11 +5,17 @@ module RubySpeech
5
5
  eager_autoload do
6
6
  autoload :Audio
7
7
  autoload :Break
8
+ autoload :Desc
8
9
  autoload :Element
9
10
  autoload :Emphasis
11
+ autoload :Mark
12
+ autoload :P
13
+ autoload :Phoneme
10
14
  autoload :Prosody
15
+ autoload :S
11
16
  autoload :SayAs
12
17
  autoload :Speak
18
+ autoload :Sub
13
19
  autoload :Voice
14
20
  end
15
21
 
@@ -16,18 +16,7 @@ module RubySpeech
16
16
 
17
17
  register :audio
18
18
 
19
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text,String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
20
-
21
- ##
22
- # Create a new SSML audio element
23
- #
24
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
25
- #
26
- # @return [Break] an element for use in an SSML document
27
- #
28
- def self.new(atts = {}, &block)
29
- super 'audio', atts, &block
30
- end
19
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Desc, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
31
20
 
32
21
  ##
33
22
  # The URI of a document with an appropriate MIME type
@@ -11,17 +11,6 @@ module RubySpeech
11
11
 
12
12
  VALID_STRENGTHS = [:none, :'x-weak', :weak, :medium, :strong, :'x-strong'].freeze
13
13
 
14
- ##
15
- # Create a new SSML break element
16
- #
17
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
18
- #
19
- # @return [Break] an element for use in an SSML document
20
- #
21
- def self.new(atts = {}, &block)
22
- super 'break', atts, &block
23
- end
24
-
25
14
  ##
26
15
  # This attribute is used to indicate the strength of the prosodic break in the speech output. The value "none" indicates that no prosodic break boundary should be outputted, which can be used to prevent a prosodic break which the processor would otherwise produce. The other values indicate monotonically non-decreasing (conceptually increasing) break strength between words. The stronger boundaries are typically accompanied by pauses. "x-weak" and "x-strong" are mnemonics for "extra weak" and "extra strong", respectively.
27
16
  #
@@ -0,0 +1,24 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The emphasis element requests that the contained text be spoken with emphasis (also referred to as prominence or stress). The synthesis processor determines how to render emphasis since the nature of emphasis differs between languages, dialects or even voices.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.2
7
+ #
8
+ class Desc < Element
9
+
10
+ register :desc
11
+
12
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
13
+
14
+ def <<(arg)
15
+ raise InvalidChildError, "A Desc can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
16
+ super
17
+ end
18
+
19
+ def eql?(o)
20
+ super o, :language
21
+ end
22
+ end # Desc
23
+ end # SSML
24
+ end # RubySpeech
@@ -10,18 +10,7 @@ module RubySpeech
10
10
  register :emphasis
11
11
 
12
12
  VALID_LEVELS = [:strong, :moderate, :none, :reduced].freeze
13
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
14
-
15
- ##
16
- # Create a new SSML emphasis element
17
- #
18
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
19
- #
20
- # @return [Emphasis] an element for use in an SSML document
21
- #
22
- def self.new(atts = {}, &block)
23
- super 'emphasis', atts, &block
24
- end
13
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice].freeze
25
14
 
26
15
  ##
27
16
  # Indicates the strength of emphasis to be applied. Defined values are "strong", "moderate", "none" and "reduced". The default level is "moderate". The meaning of "strong" and "moderate" emphasis is interpreted according to the language being spoken (languages indicate emphasis using a possible combination of pitch change, timing changes, loudness and other acoustic differences). The "reduced" level is effectively the opposite of emphasizing a word. For example, when the phrase "going to" is reduced it may be spoken as "gonna". The "none" level is used to prevent the synthesis processor from emphasizing words that it might typically emphasize. The values "none", "moderate", and "strong" are monotonically non-decreasing in strength.
@@ -0,0 +1,43 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # A mark element is an empty element that places a marker into the text/tag sequence. It has one required attribute, name, which is of type xsd:token [SCHEMA2 §3.3.2]. The mark element can be used to reference a specific location in the text/tag sequence, and can additionally be used to insert a marker into an output stream for asynchronous notification. When processing a mark element, a synthesis processor must do one or both of the following:
5
+ #
6
+ # * inform the hosting environment with the value of the name attribute and with information allowing the platform to retrieve the corresponding position in the rendered output.
7
+ # * when audio output of the SSML document reaches the mark, issue an event that includes the required name attribute of the element. The hosting environment defines the destination of the event.
8
+ #
9
+ # The mark element does not affect the speech output process.
10
+ #
11
+ # http://www.w3.org/TR/speech-synthesis/#S3.3.2
12
+ #
13
+ class Mark < Element
14
+
15
+ register :mark
16
+
17
+ ##
18
+ # This attribute is a token by which to reference the mark
19
+ #
20
+ # @return [String]
21
+ #
22
+ def name
23
+ read_attr :name
24
+ end
25
+
26
+ ##
27
+ # @param [String] the name token
28
+ #
29
+ def name=(other)
30
+ write_attr :name, other
31
+ end
32
+
33
+ def <<(*args)
34
+ raise InvalidChildError, "A Mark cannot contain children"
35
+ super
36
+ end
37
+
38
+ def eql?(o)
39
+ super o, :name
40
+ end
41
+ end # Mark
42
+ end # SSML
43
+ end # RubySpeech
@@ -0,0 +1,25 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # A p element represents a paragraph.
5
+ # The use of p elements is optional. Where text occurs without an enclosing p element the synthesis processor should attempt to determine the structure using language-specific knowledge of the format of plain text.
6
+ #
7
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.7
8
+ #
9
+ class P < Element
10
+
11
+ register :p
12
+
13
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
14
+
15
+ def <<(arg)
16
+ raise InvalidChildError, "A P can only accept String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
17
+ super
18
+ end
19
+
20
+ def eql?(o)
21
+ super o, :language
22
+ end
23
+ end # P
24
+ end # SSML
25
+ end # RubySpeech
@@ -0,0 +1,72 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The phoneme element provides a phonemic/phonetic pronunciation for the contained text. The phoneme element may be empty. However, it is recommended that the element contain human-readable text that can be used for non-spoken rendering of the document. For example, the content may be displayed visually for users with hearing impairments.
5
+ #
6
+ # The ph attribute is a required attribute that specifies the phoneme/phone string.
7
+ #
8
+ # This element is designed strictly for phonemic and phonetic notations and is intended to be used to provide pronunciations for words or very short phrases. The phonemic/phonetic string does not undergo text normalization and is not treated as a token for lookup in the lexicon (see Section 3.1.4), while values in say-as and sub may undergo both. Briefly, phonemic strings consist of phonemes, language-dependent speech units that characterize linguistically significant differences in the language; loosely, phonemes represent all the sounds needed to distinguish one word from another in a given language. On the other hand, phonetic strings consist of phones, speech units that characterize the manner (puff of air, click, vocalized, etc.) and place (front, middle, back, etc.) of articulation within the human vocal tract and are thus independent of language; phones represent realized distinctions in human speech production.
9
+ #
10
+ # The alphabet attribute is an optional attribute that specifies the phonemic/phonetic alphabet. An alphabet in this context refers to a collection of symbols to represent the sounds of one or more human languages. The only valid values for this attribute are "ipa" (see the next paragraph) and vendor-defined strings of the form "x-organization" or "x-organization-alphabet". For example, the Japan Electronics and Information Technology Industries Association [JEITA] might wish to encourage the use of an alphabet such as "x-JEITA" or "x-JEITA-2000" for their phoneme alphabet [JEIDAALPHABET].
11
+ #
12
+ # Synthesis processors should support a value for alphabet of "ipa", corresponding to Unicode representations of the phonetic characters developed by the International Phonetic Association [IPA]. In addition to an exhaustive set of vowel and consonant symbols, this character set supports a syllable delimiter, numerous diacritics, stress symbols, lexical tone symbols, intonational markers and more. For this alphabet, legal ph values are strings of the values specified in Appendix 2 of [IPAHNDBK]. Informative tables of the IPA-to-Unicode mappings can be found at [IPAUNICODE1] and [IPAUNICODE2]. Note that not all of the IPA characters are available in Unicode. For processors supporting this alphabet,
13
+ #
14
+ # * The processor must syntactically accept all legal ph values.
15
+ # * The processor should produce output when given Unicode IPA codes that can reasonably be considered to belong to the current language.
16
+ # * The production of output when given other codes is entirely at processor discretion.
17
+ #
18
+ # It is an error if a value for alphabet is specified that is not known or cannot be applied by a synthesis processor. The default behavior when the alphabet attribute is left unspecified is processor-specific.
19
+ #
20
+ # The phoneme element itself can only contain text (no elements).
21
+ #
22
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.9
23
+ #
24
+ class Phoneme < Element
25
+
26
+ register :phoneme
27
+
28
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
29
+
30
+ ##
31
+ # Specifies the phonemic/phonetic alphabet
32
+ #
33
+ # @return [String]
34
+ #
35
+ def alphabet
36
+ read_attr :alphabet
37
+ end
38
+
39
+ ##
40
+ # @param [String] other the phonemic/phonetic alphabet
41
+ #
42
+ def alphabet=(other)
43
+ write_attr :alphabet, other
44
+ end
45
+
46
+ ##
47
+ # Specifies the phoneme/phone string.
48
+ #
49
+ # @return [String]
50
+ #
51
+ def ph
52
+ read_attr :ph
53
+ end
54
+
55
+ ##
56
+ # @param [String] other the phoneme/phone string.
57
+ #
58
+ def ph=(other)
59
+ write_attr :ph, other
60
+ end
61
+
62
+ def <<(arg)
63
+ raise InvalidChildError, "A Phoneme can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
64
+ super
65
+ end
66
+
67
+ def eql?(o)
68
+ super o, :alphabet, :ph
69
+ end
70
+ end # Phoneme
71
+ end # SSML
72
+ end # RubySpeech
@@ -18,18 +18,7 @@ module RubySpeech
18
18
  VALID_PITCHES = [:'x-low', :low, :medium, :high, :'x-high', :default].freeze
19
19
  VALID_VOLUMES = [:silent, :'x-soft', :soft, :medium, :loud, :'x-loud', :default].freeze
20
20
  VALID_RATES = [:'x-slow', :slow, :medium, :fast, :'x-fast', :default].freeze
21
- VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Prosody, SayAs, Voice].freeze
22
-
23
- ##
24
- # Create a new SSML prosody element
25
- #
26
- # @param [Hash] atts Key-value pairs of options mapping to setter methods
27
- #
28
- # @return [Prosody] an element for use in an SSML document
29
- #
30
- def self.new(atts = {}, &block)
31
- super 'prosody', atts, &block
32
- end
21
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
33
22
 
34
23
  ##
35
24
  # The baseline pitch for the contained text. Although the exact meaning of "baseline pitch" will vary across synthesis processors, increasing/decreasing this value will typically increase/decrease the approximate pitch of the output. Legal values are: a number followed by "Hz", a relative change or "x-low", "low", "medium", "high", "x-high", or "default". Labels "x-low" through "x-high" represent a sequence of monotonically non-decreasing pitch levels.