ruby_speech 2.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. data/.gitignore +12 -0
  2. data/.rspec +3 -0
  3. data/.travis.yml +17 -0
  4. data/CHANGELOG.md +144 -0
  5. data/Gemfile +3 -0
  6. data/Guardfile +9 -0
  7. data/LICENSE.md +20 -0
  8. data/README.md +314 -0
  9. data/Rakefile +34 -0
  10. data/assets/grammar-core.xsd +317 -0
  11. data/assets/grammar.xsd +37 -0
  12. data/assets/synthesis-core.xsd +445 -0
  13. data/assets/synthesis.xsd +63 -0
  14. data/assets/xml.xsd +287 -0
  15. data/ext/ruby_speech/RubySpeechGRXMLMatcher.java +64 -0
  16. data/ext/ruby_speech/RubySpeechService.java +23 -0
  17. data/ext/ruby_speech/extconf.rb +7 -0
  18. data/ext/ruby_speech/ruby_speech.c +97 -0
  19. data/lib/ruby_speech/generic_element.rb +169 -0
  20. data/lib/ruby_speech/grxml/element.rb +29 -0
  21. data/lib/ruby_speech/grxml/grammar.rb +189 -0
  22. data/lib/ruby_speech/grxml/item.rb +144 -0
  23. data/lib/ruby_speech/grxml/match.rb +16 -0
  24. data/lib/ruby_speech/grxml/matcher.rb +126 -0
  25. data/lib/ruby_speech/grxml/max_match.rb +6 -0
  26. data/lib/ruby_speech/grxml/no_match.rb +10 -0
  27. data/lib/ruby_speech/grxml/one_of.rb +31 -0
  28. data/lib/ruby_speech/grxml/potential_match.rb +10 -0
  29. data/lib/ruby_speech/grxml/rule.rb +73 -0
  30. data/lib/ruby_speech/grxml/ruleref.rb +69 -0
  31. data/lib/ruby_speech/grxml/tag.rb +29 -0
  32. data/lib/ruby_speech/grxml/token.rb +31 -0
  33. data/lib/ruby_speech/grxml.rb +39 -0
  34. data/lib/ruby_speech/nlsml/builder.rb +34 -0
  35. data/lib/ruby_speech/nlsml/document.rb +120 -0
  36. data/lib/ruby_speech/nlsml.rb +18 -0
  37. data/lib/ruby_speech/ruby_speech.jar +0 -0
  38. data/lib/ruby_speech/ssml/audio.rb +47 -0
  39. data/lib/ruby_speech/ssml/break.rb +62 -0
  40. data/lib/ruby_speech/ssml/desc.rb +24 -0
  41. data/lib/ruby_speech/ssml/element.rb +23 -0
  42. data/lib/ruby_speech/ssml/emphasis.rb +44 -0
  43. data/lib/ruby_speech/ssml/mark.rb +43 -0
  44. data/lib/ruby_speech/ssml/p.rb +25 -0
  45. data/lib/ruby_speech/ssml/phoneme.rb +72 -0
  46. data/lib/ruby_speech/ssml/prosody.rb +172 -0
  47. data/lib/ruby_speech/ssml/s.rb +25 -0
  48. data/lib/ruby_speech/ssml/say_as.rb +100 -0
  49. data/lib/ruby_speech/ssml/speak.rb +27 -0
  50. data/lib/ruby_speech/ssml/sub.rb +42 -0
  51. data/lib/ruby_speech/ssml/voice.rb +108 -0
  52. data/lib/ruby_speech/ssml.rb +39 -0
  53. data/lib/ruby_speech/version.rb +3 -0
  54. data/lib/ruby_speech/xml/language.rb +13 -0
  55. data/lib/ruby_speech/xml.rb +11 -0
  56. data/lib/ruby_speech.rb +36 -0
  57. data/ruby_speech.gemspec +42 -0
  58. data/spec/ruby_speech/grxml/grammar_spec.rb +341 -0
  59. data/spec/ruby_speech/grxml/item_spec.rb +192 -0
  60. data/spec/ruby_speech/grxml/match_spec.rb +15 -0
  61. data/spec/ruby_speech/grxml/matcher_spec.rb +688 -0
  62. data/spec/ruby_speech/grxml/max_match_spec.rb +17 -0
  63. data/spec/ruby_speech/grxml/no_match_spec.rb +17 -0
  64. data/spec/ruby_speech/grxml/one_of_spec.rb +49 -0
  65. data/spec/ruby_speech/grxml/potential_match_spec.rb +17 -0
  66. data/spec/ruby_speech/grxml/rule_spec.rb +125 -0
  67. data/spec/ruby_speech/grxml/ruleref_spec.rb +55 -0
  68. data/spec/ruby_speech/grxml/tag_spec.rb +41 -0
  69. data/spec/ruby_speech/grxml/token_spec.rb +62 -0
  70. data/spec/ruby_speech/grxml_spec.rb +339 -0
  71. data/spec/ruby_speech/nlsml_spec.rb +353 -0
  72. data/spec/ruby_speech/ssml/audio_spec.rb +121 -0
  73. data/spec/ruby_speech/ssml/break_spec.rb +100 -0
  74. data/spec/ruby_speech/ssml/desc_spec.rb +57 -0
  75. data/spec/ruby_speech/ssml/emphasis_spec.rb +110 -0
  76. data/spec/ruby_speech/ssml/mark_spec.rb +53 -0
  77. data/spec/ruby_speech/ssml/p_spec.rb +96 -0
  78. data/spec/ruby_speech/ssml/phoneme_spec.rb +65 -0
  79. data/spec/ruby_speech/ssml/prosody_spec.rb +309 -0
  80. data/spec/ruby_speech/ssml/s_spec.rb +92 -0
  81. data/spec/ruby_speech/ssml/say_as_spec.rb +71 -0
  82. data/spec/ruby_speech/ssml/speak_spec.rb +166 -0
  83. data/spec/ruby_speech/ssml/sub_spec.rb +57 -0
  84. data/spec/ruby_speech/ssml/voice_spec.rb +200 -0
  85. data/spec/ruby_speech/ssml_spec.rb +285 -0
  86. data/spec/ruby_speech_spec.rb +124 -0
  87. data/spec/spec_helper.rb +21 -0
  88. data/spec/support/match_examples.rb +43 -0
  89. data/spec/support/matchers.rb +46 -0
  90. metadata +405 -0
@@ -0,0 +1,29 @@
1
+ module RubySpeech
2
+ module GRXML
3
+ ##
4
+ #
5
+ # The tag element is one of the valid expansion elements for the SGR rule element
6
+ #
7
+ # http://www.w3.org/TR/speech-grammar/#S2.6
8
+ #
9
+ #
10
+ # TODO: Make sure this is complete...
11
+ #
12
+ #
13
+ class Tag < Element
14
+
15
+ register :tag
16
+
17
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
18
+
19
+ def <<(arg)
20
+ raise InvalidChildError, "A Tag can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
21
+ super
22
+ end
23
+
24
+ def regexp_content # :nodoc:
25
+ "?<#{content}>"
26
+ end
27
+ end # Tag
28
+ end # GRXML
29
+ end # RubySpeech
@@ -0,0 +1,31 @@
1
+ module RubySpeech
2
+ module GRXML
3
+ ##
4
+ #
5
+ # A token (a.k.a. a terminal symbol) is the part of a grammar that defines words or other entities that may be spoken. Any legal token is a legal expansion.
6
+ #
7
+ # http://www.w3.org/TR/speech-grammar/#S2.1
8
+ #
9
+ # The token element may include an optional xml:lang attribute to indicate the language of the contained token.
10
+ #
11
+ class Token < Element
12
+
13
+ register :token
14
+
15
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String].freeze
16
+
17
+ def <<(arg)
18
+ raise InvalidChildError, "A Token can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
19
+ super
20
+ end
21
+
22
+ def normalize_whitespace
23
+ self.content = content.strip.squeeze ' '
24
+ end
25
+
26
+ def regexp_content # :nodoc:
27
+ Regexp.escape content
28
+ end
29
+ end # Token
30
+ end # GRXML
31
+ end # RubySpeech
@@ -0,0 +1,39 @@
1
+ module RubySpeech
2
+ module GRXML
3
+ extend ActiveSupport::Autoload
4
+
5
+ eager_autoload do
6
+ autoload :Element
7
+ autoload :Grammar
8
+ autoload :Rule
9
+ autoload :Item
10
+ autoload :OneOf
11
+ autoload :Ruleref
12
+ autoload :Tag
13
+ autoload :Token
14
+ end
15
+
16
+ autoload :Match
17
+ autoload :Matcher
18
+ autoload :MaxMatch
19
+ autoload :NoMatch
20
+ autoload :PotentialMatch
21
+
22
+ InvalidChildError = Class.new StandardError
23
+
24
+ GRXML_NAMESPACE = 'http://www.w3.org/2001/06/grammar'
25
+
26
+ def self.draw(attributes = {}, &block)
27
+ Grammar.new(attributes).tap do |grammar|
28
+ block_return = grammar.eval_dsl_block &block
29
+ grammar << block_return if block_return.is_a?(String)
30
+ end.assert_has_matching_root_rule
31
+ end
32
+
33
+ def self.import(other)
34
+ Element.import other
35
+ end
36
+ end # GRXML
37
+ end # RubySpeech
38
+
39
+ ActiveSupport::Autoload.eager_autoload!
@@ -0,0 +1,34 @@
1
+ module RubySpeech
2
+ module NLSML
3
+ class Builder
4
+ attr_reader :document
5
+
6
+ def initialize(options = {}, &block)
7
+ options = {'xmlns' => NLSML_NAMESPACE}.merge(options)
8
+ @document = Nokogiri::XML::Builder.new do |builder|
9
+ builder.result options do |r|
10
+ apply_block r, &block
11
+ end
12
+ end.doc
13
+ end
14
+
15
+ def interpretation(*args, &block)
16
+ if args.last.respond_to?(:has_key?) && args.last.has_key?(:confidence)
17
+ args.last[:confidence] = args.last[:confidence].to_f
18
+ end
19
+ @result.send :interpretation, *args, &block
20
+ end
21
+
22
+ def method_missing(method_name, *args, &block)
23
+ @result.send method_name, *args, &block
24
+ end
25
+
26
+ private
27
+
28
+ def apply_block(result, &block)
29
+ @result = result
30
+ instance_eval &block
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,120 @@
1
+ require 'delegate'
2
+
3
+ module RubySpeech
4
+ module NLSML
5
+ class Document < SimpleDelegator
6
+ def initialize(xml)
7
+ unless xml.root.namespace
8
+ xml.root.default_namespace = NLSML_NAMESPACE
9
+ xml = Nokogiri::XML.parse xml.to_xml, nil, nil, Nokogiri::XML::ParseOptions::NOBLANKS
10
+ end
11
+ super
12
+ end
13
+
14
+ def grammar
15
+ result['grammar']
16
+ end
17
+
18
+ def interpretations
19
+ interpretation_nodes.map do |interpretation|
20
+ interpretation_hash_for_interpretation interpretation
21
+ end
22
+ end
23
+
24
+ def best_interpretation
25
+ interpretation_hash_for_interpretation interpretation_nodes.first
26
+ end
27
+
28
+ def match?
29
+ interpretation_nodes.count > 0 && !nomatch? && !noinput?
30
+ end
31
+
32
+ def ==(other)
33
+ to_xml == other.to_xml
34
+ end
35
+
36
+ def noinput?
37
+ noinput_elements.any?
38
+ end
39
+
40
+ private
41
+
42
+ def nomatch?
43
+ nomatch_elements.count >= input_elements.count
44
+ end
45
+
46
+ def nomatch_elements
47
+ result.xpath 'ns:interpretation/ns:input/ns:nomatch', 'ns' => NLSML_NAMESPACE
48
+ end
49
+
50
+ def noinput_elements
51
+ result.xpath 'ns:interpretation/ns:input/ns:noinput', 'ns' => NLSML_NAMESPACE
52
+ end
53
+
54
+ def input_elements
55
+ result.xpath 'ns:interpretation/ns:input', 'ns' => NLSML_NAMESPACE
56
+ end
57
+
58
+ def input_hash_for_interpretation(interpretation)
59
+ input_element = interpretation.at_xpath 'ns:input', 'ns' => NLSML_NAMESPACE
60
+ { content: input_element.content }.tap do |h|
61
+ h[:mode] = input_element['mode'].to_sym if input_element['mode']
62
+ end
63
+ end
64
+
65
+ def instance_hash_for_interpretation(interpretation)
66
+ instances = instance_elements interpretation
67
+ return unless instances.any?
68
+ element_children_key_value instances.first
69
+ end
70
+
71
+ def instances_collection_for_interpretation(interpretation)
72
+ instances = instance_elements interpretation
73
+ instances.map do |instance|
74
+ element_children_key_value instance
75
+ end
76
+ end
77
+
78
+ def instance_elements(interpretation)
79
+ interpretation.xpath 'ns:instance', 'ns' => NLSML_NAMESPACE
80
+ end
81
+
82
+ def element_children_key_value(element)
83
+ return element.children.first.content if element.children.first.is_a?(Nokogiri::XML::Text)
84
+ element.children.inject({}) do |acc, child|
85
+ acc[child.node_name.to_sym] = case child.children.count
86
+ when 0
87
+ child.content
88
+ when 1
89
+ if child.children.first.is_a?(Nokogiri::XML::Text)
90
+ child.children.first.content
91
+ else
92
+ element_children_key_value child
93
+ end
94
+ else
95
+ element_children_key_value child
96
+ end
97
+ acc
98
+ end
99
+ end
100
+
101
+ def interpretation_hash_for_interpretation(interpretation)
102
+ {
103
+ confidence: interpretation['confidence'].to_f,
104
+ input: input_hash_for_interpretation(interpretation),
105
+ instance: instance_hash_for_interpretation(interpretation),
106
+ instances: instances_collection_for_interpretation(interpretation)
107
+ }
108
+ end
109
+
110
+ def result
111
+ root
112
+ end
113
+
114
+ def interpretation_nodes
115
+ nodes = result.xpath 'ns:interpretation', 'ns' => NLSML_NAMESPACE
116
+ nodes.sort_by { |int| -int[:confidence].to_f }
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,18 @@
1
+ module RubySpeech
2
+ module NLSML
3
+ extend ActiveSupport::Autoload
4
+
5
+ NLSML_NAMESPACE = 'http://www.ietf.org/xml/ns/mrcpv2'
6
+
7
+ eager_autoload do
8
+ autoload :Builder
9
+ autoload :Document
10
+ end
11
+
12
+ def self.draw(options = {}, &block)
13
+ Builder.new(options, &block).document
14
+ end
15
+ end
16
+ end
17
+
18
+ ActiveSupport::Autoload.eager_autoload!
Binary file
@@ -0,0 +1,47 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The audio element supports the insertion of recorded audio files (see Appendix A for required formats) and the insertion of other audio formats in conjunction with synthesized speech output. The audio element may be empty. If the audio element is not empty then the contents should be the marked-up text to be spoken if the audio document is not available. The alternate content may include text, speech markup, desc elements, or other audio elements. The alternate content may also be used when rendering the document to non-audible output and for accessibility (see the desc element). The required attribute is src, which is the URI of a document with an appropriate MIME type.
5
+ #
6
+ # An audio element is successfully rendered:
7
+ # * If the referenced audio source is played, or
8
+ # * If the synthesis processor is unable to execute #1 but the alternative content is successfully rendered, or
9
+ # * If the processor can detect that text-only output is required and the alternative content is successfully rendered.
10
+ #
11
+ # Deciding which conditions result in the alternative content being rendered is processor-dependent. If the audio element is not successfully rendered, a synthesis processor should continue processing and should notify the hosting environment. The processor may determine after beginning playback of an audio source that the audio cannot be played in its entirety. For example, encoding problems, network disruptions, etc. may occur. The processor may designate this either as successful or unsuccessful rendering, but it must document this behavior.
12
+ #
13
+ # http://www.w3.org/TR/speech-synthesis/#S3.3.1
14
+ #
15
+ class Audio < Element
16
+
17
+ register :audio
18
+
19
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Desc, Emphasis, Mark, P, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
20
+
21
+ ##
22
+ # The URI of a document with an appropriate MIME type
23
+ #
24
+ # @return [String]
25
+ #
26
+ def src
27
+ read_attr :src
28
+ end
29
+
30
+ ##
31
+ # @param [String] the source. Must be a valid URI
32
+ #
33
+ def src=(s)
34
+ self[:src] = s
35
+ end
36
+
37
+ def <<(arg)
38
+ raise InvalidChildError, "An Audio can only accept String, Audio, Break, Emphasis, Mark, P, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
39
+ super
40
+ end
41
+
42
+ def eql?(o)
43
+ super o, :src
44
+ end
45
+ end # Audio
46
+ end # SSML
47
+ end # RubySpeech
@@ -0,0 +1,62 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The break element is an empty element that controls the pausing or other prosodic boundaries between words. The use of the break element between any pair of words is optional. If the element is not present between words, the synthesis processor is expected to automatically determine a break based on the linguistic context. In practice, the break element is most often used to override the typical automatic behavior of a synthesis processor.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.3
7
+ #
8
+ class Break < Element
9
+
10
+ register :break
11
+
12
+ VALID_STRENGTHS = [:none, :'x-weak', :weak, :medium, :strong, :'x-strong'].freeze
13
+
14
+ ##
15
+ # This attribute is used to indicate the strength of the prosodic break in the speech output. The value "none" indicates that no prosodic break boundary should be outputted, which can be used to prevent a prosodic break which the processor would otherwise produce. The other values indicate monotonically non-decreasing (conceptually increasing) break strength between words. The stronger boundaries are typically accompanied by pauses. "x-weak" and "x-strong" are mnemonics for "extra weak" and "extra strong", respectively.
16
+ #
17
+ # @return [Symbol]
18
+ #
19
+ def strength
20
+ read_attr :strength, :to_sym
21
+ end
22
+
23
+ ##
24
+ # @param [Symbol] the strength. Must be one of VALID_STRENGTHS
25
+ #
26
+ # @raises ArgumentError if s is not one of VALID_STRENGTHS
27
+ #
28
+ def strength=(s)
29
+ raise ArgumentError, "You must specify a valid strength (#{VALID_STRENGTHS.map(&:inspect).join ', '})" unless VALID_STRENGTHS.include? s
30
+ self[:strength] = s
31
+ end
32
+
33
+ ##
34
+ # Indicates the duration of a pause to be inserted in the output in seconds or milliseconds. It follows the time value format from the Cascading Style Sheets Level 2 Recommendation [CSS2], e.g. "250ms", "3s".
35
+ #
36
+ # @return [Float]
37
+ #
38
+ def time
39
+ read_attr :time, :to_f
40
+ end
41
+
42
+ ##
43
+ # @param [Numeric] t the time as a positive value in seconds
44
+ #
45
+ # @raises ArgumentError if t is nota positive numeric value
46
+ #
47
+ def time=(t)
48
+ raise ArgumentError, "You must specify a valid time (positive float value in seconds)" unless t.is_a?(Numeric) && t >= 0
49
+ self[:time] = "#{t}s"
50
+ end
51
+
52
+ def <<(*args)
53
+ raise InvalidChildError, "A Break cannot contain children"
54
+ super
55
+ end
56
+
57
+ def eql?(o)
58
+ super o, :strength, :time
59
+ end
60
+ end # Break
61
+ end # SSML
62
+ end # RubySpeech
@@ -0,0 +1,24 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The emphasis element requests that the contained text be spoken with emphasis (also referred to as prominence or stress). The synthesis processor determines how to render emphasis since the nature of emphasis differs between languages, dialects or even voices.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.2
7
+ #
8
+ class Desc < Element
9
+
10
+ register :desc
11
+
12
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
13
+
14
+ def <<(arg)
15
+ raise InvalidChildError, "A Desc can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
16
+ super
17
+ end
18
+
19
+ def eql?(o)
20
+ super o, :language
21
+ end
22
+ end # Desc
23
+ end # SSML
24
+ end # RubySpeech
@@ -0,0 +1,23 @@
1
+ require 'active_support/core_ext/class/attribute'
2
+
3
+ module RubySpeech
4
+ module SSML
5
+ class Element < Niceogiri::XML::Node
6
+ def self.namespace
7
+ SSML_NAMESPACE
8
+ end
9
+
10
+ def self.root_element
11
+ Speak
12
+ end
13
+
14
+ def self.module
15
+ SSML
16
+ end
17
+
18
+ include GenericElement
19
+
20
+ alias :to_doc :document
21
+ end # Element
22
+ end # SSML
23
+ end # RubySpeech
@@ -0,0 +1,44 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The emphasis element requests that the contained text be spoken with emphasis (also referred to as prominence or stress). The synthesis processor determines how to render emphasis since the nature of emphasis differs between languages, dialects or even voices.
5
+ #
6
+ # http://www.w3.org/TR/speech-synthesis/#S3.2.2
7
+ #
8
+ class Emphasis < Element
9
+
10
+ register :emphasis
11
+
12
+ VALID_LEVELS = [:strong, :moderate, :none, :reduced].freeze
13
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice].freeze
14
+
15
+ ##
16
+ # Indicates the strength of emphasis to be applied. Defined values are "strong", "moderate", "none" and "reduced". The default level is "moderate". The meaning of "strong" and "moderate" emphasis is interpreted according to the language being spoken (languages indicate emphasis using a possible combination of pitch change, timing changes, loudness and other acoustic differences). The "reduced" level is effectively the opposite of emphasizing a word. For example, when the phrase "going to" is reduced it may be spoken as "gonna". The "none" level is used to prevent the synthesis processor from emphasizing words that it might typically emphasize. The values "none", "moderate", and "strong" are monotonically non-decreasing in strength.
17
+ #
18
+ # @return [Symbol]
19
+ #
20
+ def level
21
+ read_attr :level, :to_sym
22
+ end
23
+
24
+ ##
25
+ # @param [Symbol] l the level. Must be one of VALID_LEVELS
26
+ #
27
+ # @raises ArgumentError if l is not one of VALID_LEVELS
28
+ #
29
+ def level=(l)
30
+ raise ArgumentError, "You must specify a valid level (#{VALID_LEVELS.map(&:inspect).join ', '})" unless VALID_LEVELS.include? l
31
+ self[:level] = l
32
+ end
33
+
34
+ def <<(arg)
35
+ raise InvalidChildError, "An Emphasis can only accept String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
36
+ super
37
+ end
38
+
39
+ def eql?(o)
40
+ super o, :level
41
+ end
42
+ end # Emphasis
43
+ end # SSML
44
+ end # RubySpeech
@@ -0,0 +1,43 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # A mark element is an empty element that places a marker into the text/tag sequence. It has one required attribute, name, which is of type xsd:token [SCHEMA2 §3.3.2]. The mark element can be used to reference a specific location in the text/tag sequence, and can additionally be used to insert a marker into an output stream for asynchronous notification. When processing a mark element, a synthesis processor must do one or both of the following:
5
+ #
6
+ # * inform the hosting environment with the value of the name attribute and with information allowing the platform to retrieve the corresponding position in the rendered output.
7
+ # * when audio output of the SSML document reaches the mark, issue an event that includes the required name attribute of the element. The hosting environment defines the destination of the event.
8
+ #
9
+ # The mark element does not affect the speech output process.
10
+ #
11
+ # http://www.w3.org/TR/speech-synthesis/#S3.3.2
12
+ #
13
+ class Mark < Element
14
+
15
+ register :mark
16
+
17
+ ##
18
+ # This attribute is a token by which to reference the mark
19
+ #
20
+ # @return [String]
21
+ #
22
+ def name
23
+ read_attr :name
24
+ end
25
+
26
+ ##
27
+ # @param [String] the name token
28
+ #
29
+ def name=(other)
30
+ self[:name] = other
31
+ end
32
+
33
+ def <<(*args)
34
+ raise InvalidChildError, "A Mark cannot contain children"
35
+ super
36
+ end
37
+
38
+ def eql?(o)
39
+ super o, :name
40
+ end
41
+ end # Mark
42
+ end # SSML
43
+ end # RubySpeech
@@ -0,0 +1,25 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # A p element represents a paragraph.
5
+ # The use of p elements is optional. Where text occurs without an enclosing p element the synthesis processor should attempt to determine the structure using language-specific knowledge of the format of plain text.
6
+ #
7
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.7
8
+ #
9
+ class P < Element
10
+
11
+ register :p
12
+
13
+ VALID_CHILD_TYPES = [Nokogiri::XML::Element, Nokogiri::XML::Text, String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, S, SayAs, Sub, Voice].freeze
14
+
15
+ def <<(arg)
16
+ raise InvalidChildError, "A P can only accept String, Audio, Break, Emphasis, Mark, Phoneme, Prosody, SayAs, Sub, S, Voice as children" unless VALID_CHILD_TYPES.include? arg.class
17
+ super
18
+ end
19
+
20
+ def eql?(o)
21
+ super o, :language
22
+ end
23
+ end # P
24
+ end # SSML
25
+ end # RubySpeech
@@ -0,0 +1,72 @@
1
+ module RubySpeech
2
+ module SSML
3
+ ##
4
+ # The phoneme element provides a phonemic/phonetic pronunciation for the contained text. The phoneme element may be empty. However, it is recommended that the element contain human-readable text that can be used for non-spoken rendering of the document. For example, the content may be displayed visually for users with hearing impairments.
5
+ #
6
+ # The ph attribute is a required attribute that specifies the phoneme/phone string.
7
+ #
8
+ # This element is designed strictly for phonemic and phonetic notations and is intended to be used to provide pronunciations for words or very short phrases. The phonemic/phonetic string does not undergo text normalization and is not treated as a token for lookup in the lexicon (see Section 3.1.4), while values in say-as and sub may undergo both. Briefly, phonemic strings consist of phonemes, language-dependent speech units that characterize linguistically significant differences in the language; loosely, phonemes represent all the sounds needed to distinguish one word from another in a given language. On the other hand, phonetic strings consist of phones, speech units that characterize the manner (puff of air, click, vocalized, etc.) and place (front, middle, back, etc.) of articulation within the human vocal tract and are thus independent of language; phones represent realized distinctions in human speech production.
9
+ #
10
+ # The alphabet attribute is an optional attribute that specifies the phonemic/phonetic alphabet. An alphabet in this context refers to a collection of symbols to represent the sounds of one or more human languages. The only valid values for this attribute are "ipa" (see the next paragraph) and vendor-defined strings of the form "x-organization" or "x-organization-alphabet". For example, the Japan Electronics and Information Technology Industries Association [JEITA] might wish to encourage the use of an alphabet such as "x-JEITA" or "x-JEITA-2000" for their phoneme alphabet [JEIDAALPHABET].
11
+ #
12
+ # Synthesis processors should support a value for alphabet of "ipa", corresponding to Unicode representations of the phonetic characters developed by the International Phonetic Association [IPA]. In addition to an exhaustive set of vowel and consonant symbols, this character set supports a syllable delimiter, numerous diacritics, stress symbols, lexical tone symbols, intonational markers and more. For this alphabet, legal ph values are strings of the values specified in Appendix 2 of [IPAHNDBK]. Informative tables of the IPA-to-Unicode mappings can be found at [IPAUNICODE1] and [IPAUNICODE2]. Note that not all of the IPA characters are available in Unicode. For processors supporting this alphabet,
13
+ #
14
+ # * The processor must syntactically accept all legal ph values.
15
+ # * The processor should produce output when given Unicode IPA codes that can reasonably be considered to belong to the current language.
16
+ # * The production of output when given other codes is entirely at processor discretion.
17
+ #
18
+ # It is an error if a value for alphabet is specified that is not known or cannot be applied by a synthesis processor. The default behavior when the alphabet attribute is left unspecified is processor-specific.
19
+ #
20
+ # The phoneme element itself can only contain text (no elements).
21
+ #
22
+ # http://www.w3.org/TR/speech-synthesis/#S3.1.9
23
+ #
24
+ class Phoneme < Element
25
+
26
+ register :phoneme
27
+
28
+ VALID_CHILD_TYPES = [Nokogiri::XML::Text, String].freeze
29
+
30
+ ##
31
+ # Specifies the phonemic/phonetic alphabet
32
+ #
33
+ # @return [String]
34
+ #
35
+ def alphabet
36
+ read_attr :alphabet
37
+ end
38
+
39
+ ##
40
+ # @param [String] other the phonemic/phonetic alphabet
41
+ #
42
+ def alphabet=(other)
43
+ self[:alphabet] = other
44
+ end
45
+
46
+ ##
47
+ # Specifies the phoneme/phone string.
48
+ #
49
+ # @return [String]
50
+ #
51
+ def ph
52
+ read_attr :ph
53
+ end
54
+
55
+ ##
56
+ # @param [String] other the phoneme/phone string.
57
+ #
58
+ def ph=(other)
59
+ self[:ph] = other
60
+ end
61
+
62
+ def <<(arg)
63
+ raise InvalidChildError, "A Phoneme can only accept Strings as children" unless VALID_CHILD_TYPES.include? arg.class
64
+ super
65
+ end
66
+
67
+ def eql?(o)
68
+ super o, :alphabet, :ph
69
+ end
70
+ end # Phoneme
71
+ end # SSML
72
+ end # RubySpeech