ruby_speech 1.1.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +5 -1
- data/CHANGELOG.md +20 -5
- data/Gemfile +1 -1
- data/Guardfile +4 -0
- data/README.md +47 -101
- data/Rakefile +14 -2
- data/ext/ruby_speech/RubySpeechGRXMLMatcher.java +42 -0
- data/ext/ruby_speech/RubySpeechService.java +23 -0
- data/ext/ruby_speech/extconf.rb +7 -0
- data/ext/ruby_speech/ruby_speech.c +41 -0
- data/lib/ruby_speech/grxml.rb +1 -0
- data/lib/ruby_speech/grxml/element.rb +0 -17
- data/lib/ruby_speech/grxml/grammar.rb +0 -103
- data/lib/ruby_speech/grxml/item.rb +0 -21
- data/lib/ruby_speech/grxml/matcher.rb +129 -0
- data/lib/ruby_speech/grxml/one_of.rb +0 -4
- data/lib/ruby_speech/grxml/token.rb +0 -4
- data/lib/ruby_speech/nlsml.rb +1 -2
- data/lib/ruby_speech/nlsml/builder.rb +2 -15
- data/lib/ruby_speech/nlsml/document.rb +13 -9
- data/lib/ruby_speech/version.rb +1 -1
- data/ruby_speech.gemspec +10 -3
- data/spec/ruby_speech/grxml/grammar_spec.rb +0 -528
- data/spec/ruby_speech/grxml/item_spec.rb +0 -385
- data/spec/ruby_speech/grxml/matcher_spec.rb +644 -0
- data/spec/ruby_speech/grxml/one_of_spec.rb +0 -238
- data/spec/ruby_speech/nlsml_spec.rb +106 -148
- data/spec/ruby_speech_spec.rb +11 -21
- data/spec/spec_helper.rb +0 -1
- metadata +52 -78
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a4f1b109d9aca99877fdea094ae7898d160c2bc5
|
4
|
+
data.tar.gz: 61f380357241df94f39f25db15e17433d6766bb5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 01c6b26ae73ae0627b8ce474ea6fdddc8d0b87779a07c3908c57b71e3101ec8a447a8980a88d17de53c216442137122b03c821026146fd650066ec44030ce949
|
7
|
+
data.tar.gz: 7075b29b0be7f4a7f1ca2b83fc3694be0e09566dbd0ef12be3c66933538cb885053d9971a933e094ac59c5090f56bdb82774ef0c3afcd69cd4eedc21d3a2bf82
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -3,9 +3,13 @@ language: ruby
|
|
3
3
|
rvm:
|
4
4
|
- 1.9.2
|
5
5
|
- 1.9.3
|
6
|
+
- 2.0.0
|
6
7
|
- jruby-19mode
|
7
8
|
- rbx-19mode
|
8
9
|
- ruby-head
|
9
10
|
|
11
|
+
before_install:
|
12
|
+
- sudo apt-get install libpcre3 libpcre3-dev
|
13
|
+
|
10
14
|
notifications:
|
11
|
-
irc: "irc.freenode.org#adhearsion
|
15
|
+
irc: "irc.freenode.org#adhearsion"
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
# [develop](https://github.com/benlangfeld/ruby_speech)
|
2
2
|
|
3
|
+
# [2.0.1](https://github.com/benlangfeld/ruby_speech/compare/v2.0.0...v2.0.1) - [2013-04-27](https://rubygems.org/gems/ruby_speech/versions/2.0.1)
|
4
|
+
* Bugfix: Build native C extension in the correct location
|
5
|
+
|
6
|
+
# [2.0.0](https://github.com/benlangfeld/ruby_speech/compare/v1.1.0...v2.0.0) - [2013-04-27](https://rubygems.org/gems/ruby_speech/versions/2.0.0)
|
7
|
+
* Change: Comply with MRCPv2 flavour of NLSML
|
8
|
+
* Confidence is now a float in the XML representation
|
9
|
+
* Models are no longer used
|
10
|
+
* XForms no longer used
|
11
|
+
* Now have a true namespace
|
12
|
+
* Instance is in the NLSML namespace
|
13
|
+
* Must support string instances
|
14
|
+
* Change: Grammar matching now uses a Matcher rather than directly on the Grammar element
|
15
|
+
* Feature: Grammar matching now uses native C/Java regexes with PCRE/java.util.regex for clean partial matching and SPEEEEEED
|
16
|
+
* Bugfix: Item repeats now work correctly
|
17
|
+
|
3
18
|
# [1.1.0](https://github.com/benlangfeld/ruby_speech/compare/v1.0.2...v1.1.0) - [2013-03-02](https://rubygems.org/gems/ruby_speech/versions/1.1.0)
|
4
19
|
* Feature: NLSML building & parsing
|
5
20
|
|
@@ -20,11 +35,11 @@
|
|
20
35
|
|
21
36
|
# 0.5.0 - 2012-01-03
|
22
37
|
* Feature: Add a whole bunch more SSML elements:
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
38
|
+
* p & s
|
39
|
+
* mark
|
40
|
+
* desc
|
41
|
+
* sub
|
42
|
+
* phoneme
|
28
43
|
* Feature: Added the ability to inline grammar rule references in both destructive and non-destructive modes
|
29
44
|
* Feature: Added the ability to tokenize a grammar, turning all tokens into unambiguous `<token/>` elements
|
30
45
|
* Feature: Added the ability to whitespace normalize a grammar
|
data/Gemfile
CHANGED
data/Guardfile
CHANGED
data/README.md
CHANGED
@@ -108,120 +108,82 @@ which becomes
|
|
108
108
|
|
109
109
|
#### Grammar matching
|
110
110
|
|
111
|
-
It is possible to match some arbitrary input against a GRXML grammar
|
111
|
+
It is possible to match some arbitrary input against a GRXML grammar, like so:
|
112
112
|
|
113
113
|
```ruby
|
114
|
-
|
115
|
-
grammy.tokenize!
|
116
|
-
grammy.normalize_whitespace
|
117
|
-
```
|
114
|
+
require 'ruby_speech'
|
118
115
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
<one-of>
|
126
|
-
<item>
|
127
|
-
<token>0</token>
|
128
|
-
</item>
|
129
|
-
<item>
|
130
|
-
<token>1</token>
|
131
|
-
</item>
|
132
|
-
<item>
|
133
|
-
<token>2</token>
|
134
|
-
</item>
|
135
|
-
<item>
|
136
|
-
<token>3</token>
|
137
|
-
</item>
|
138
|
-
<item>
|
139
|
-
<token>4</token>
|
140
|
-
</item>
|
141
|
-
<item>
|
142
|
-
<token>5</token>
|
143
|
-
</item>
|
144
|
-
<item>
|
145
|
-
<token>6</token>
|
146
|
-
</item>
|
147
|
-
<item>
|
148
|
-
<token>7</token>
|
149
|
-
</item>
|
150
|
-
<item>
|
151
|
-
<token>8</token>
|
152
|
-
</item>
|
153
|
-
<item>
|
154
|
-
<token>9</token>
|
155
|
-
</item>
|
156
|
-
</one-of>
|
157
|
-
</item>
|
158
|
-
<token>#</token>
|
159
|
-
</item>
|
160
|
-
<item>
|
161
|
-
<token>*</token>
|
162
|
-
<token>9</token>
|
163
|
-
</item>
|
164
|
-
</one-of>
|
165
|
-
</rule>
|
166
|
-
</grammar>
|
167
|
-
```
|
116
|
+
>> grammar = RubySpeech::GRXML.draw mode: :dtmf, root: 'pin' do
|
117
|
+
rule id: 'digit' do
|
118
|
+
one_of do
|
119
|
+
('0'..'9').map { |d| item { d } }
|
120
|
+
end
|
121
|
+
end
|
168
122
|
|
169
|
-
|
123
|
+
rule id: 'pin', scope: 'public' do
|
124
|
+
one_of do
|
125
|
+
item do
|
126
|
+
item repeat: '4' do
|
127
|
+
ruleref uri: '#digit'
|
128
|
+
end
|
129
|
+
"#"
|
130
|
+
end
|
131
|
+
item do
|
132
|
+
"* 9"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
170
137
|
|
171
|
-
|
172
|
-
|
138
|
+
matcher = RubySpeech::GRXML::Matcher.new grammar
|
139
|
+
|
140
|
+
>> matcher.match '*9'
|
173
141
|
=> #<RubySpeech::GRXML::Match:0x00000100ae5d98
|
174
142
|
@mode = :dtmf,
|
175
143
|
@confidence = 1,
|
176
144
|
@utterance = "*9",
|
177
145
|
@interpretation = "*9"
|
178
146
|
>
|
179
|
-
>>
|
147
|
+
>> matcher.match '1234#'
|
180
148
|
=> #<RubySpeech::GRXML::Match:0x00000100b7e020
|
181
149
|
@mode = :dtmf,
|
182
150
|
@confidence = 1,
|
183
151
|
@utterance = "1234#",
|
184
152
|
@interpretation = "1234#"
|
185
153
|
>
|
186
|
-
>>
|
154
|
+
>> matcher.match '5678#'
|
187
155
|
=> #<RubySpeech::GRXML::Match:0x00000101218688
|
188
156
|
@mode = :dtmf,
|
189
157
|
@confidence = 1,
|
190
158
|
@utterance = "5678#",
|
191
159
|
@interpretation = "5678#"
|
192
160
|
>
|
193
|
-
>>
|
161
|
+
>> matcher.match '1111#'
|
194
162
|
=> #<RubySpeech::GRXML::Match:0x000001012f69d8
|
195
163
|
@mode = :dtmf,
|
196
164
|
@confidence = 1,
|
197
165
|
@utterance = "1111#",
|
198
166
|
@interpretation = "1111#"
|
199
167
|
>
|
200
|
-
>>
|
168
|
+
>> matcher.match '111'
|
201
169
|
=> #<RubySpeech::GRXML::NoMatch:0x00000101371660>
|
202
170
|
```
|
203
171
|
|
204
172
|
### NLSML
|
205
173
|
|
206
|
-
[Natural Language Semantics Markup Language](http://
|
174
|
+
[Natural Language Semantics Markup Language](http://tools.ietf.org/html/draft-ietf-speechsc-mrcpv2-27#section-6.3.1) is the format used by many Speech Recognition engines and natural language processors to add semantic information to human language. RubySpeech is capable of generating and parsing such documents.
|
207
175
|
|
208
176
|
It is possible to generate an NLSML document like so:
|
209
177
|
|
210
178
|
```ruby
|
211
179
|
require 'ruby_speech'
|
212
180
|
|
213
|
-
nlsml = RubySpeech::NLSML.draw
|
181
|
+
nlsml = RubySpeech::NLSML.draw grammar: 'http://flight' do
|
214
182
|
interpretation confidence: 0.6 do
|
215
183
|
input "I want to go to Pittsburgh", mode: :speech
|
216
184
|
|
217
|
-
model do
|
218
|
-
group name: 'airline' do
|
219
|
-
string name: 'to_city'
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
185
|
instance do
|
224
|
-
|
186
|
+
airline do
|
225
187
|
to_city 'Pittsburgh'
|
226
188
|
end
|
227
189
|
end
|
@@ -230,14 +192,8 @@ nlsml = RubySpeech::NLSML.draw(grammar: 'http://flight', 'xmlns:myApp' => 'foo')
|
|
230
192
|
interpretation confidence: 0.4 do
|
231
193
|
input "I want to go to Stockholm"
|
232
194
|
|
233
|
-
model do
|
234
|
-
group name: 'airline' do
|
235
|
-
string name: 'to_city'
|
236
|
-
end
|
237
|
-
end
|
238
|
-
|
239
195
|
instance do
|
240
|
-
|
196
|
+
airline do
|
241
197
|
to_city "Stockholm"
|
242
198
|
end
|
243
199
|
end
|
@@ -251,32 +207,22 @@ becomes:
|
|
251
207
|
|
252
208
|
```xml
|
253
209
|
<?xml version="1.0"?>
|
254
|
-
<result xmlns
|
255
|
-
<interpretation confidence="
|
210
|
+
<result xmlns="http://www.ietf.org/xml/ns/mrcpv2" grammar="http://flight">
|
211
|
+
<interpretation confidence="0.6">
|
256
212
|
<input mode="speech">I want to go to Pittsburgh</input>
|
257
|
-
<
|
258
|
-
<
|
259
|
-
<
|
260
|
-
</
|
261
|
-
</
|
262
|
-
<xf:instance>
|
263
|
-
<myApp:airline>
|
264
|
-
<myApp:to_city>Pittsburgh</myApp:to_city>
|
265
|
-
</myApp:airline>
|
266
|
-
</xf:instance>
|
213
|
+
<instance>
|
214
|
+
<airline>
|
215
|
+
<to_city>Pittsburgh</to_city>
|
216
|
+
</airline>
|
217
|
+
</instance>
|
267
218
|
</interpretation>
|
268
|
-
<interpretation confidence="
|
219
|
+
<interpretation confidence="0.4">
|
269
220
|
<input>I want to go to Stockholm</input>
|
270
|
-
<
|
271
|
-
<
|
272
|
-
<
|
273
|
-
</
|
274
|
-
</
|
275
|
-
<xf:instance>
|
276
|
-
<myApp:airline>
|
277
|
-
<myApp:to_city>Stockholm</myApp:to_city>
|
278
|
-
</myApp:airline>
|
279
|
-
</xf:instance>
|
221
|
+
<instance>
|
222
|
+
<airline>
|
223
|
+
<to_city>Stockholm</to_city>
|
224
|
+
</airline>
|
225
|
+
</instance>
|
280
226
|
</interpretation>
|
281
227
|
</result>
|
282
228
|
```
|
@@ -365,4 +311,4 @@ Check out the [YARD documentation](http://rdoc.info/github/benlangfeld/ruby_spee
|
|
365
311
|
|
366
312
|
## Copyright
|
367
313
|
|
368
|
-
Copyright (c)
|
314
|
+
Copyright (c) 2013 Ben Langfeld. MIT licence (see LICENSE for details).
|
data/Rakefile
CHANGED
@@ -15,8 +15,20 @@ RSpec::Core::RakeTask.new(:rcov) do |spec|
|
|
15
15
|
spec.rspec_opts = '--color'
|
16
16
|
end
|
17
17
|
|
18
|
-
task :default => :spec
|
19
|
-
task :ci => ['ci:setup:rspec', :spec]
|
18
|
+
task :default => [:compile, :spec]
|
19
|
+
task :ci => ['ci:setup:rspec', :compile, :spec]
|
20
20
|
|
21
21
|
require 'yard'
|
22
22
|
YARD::Rake::YardocTask.new
|
23
|
+
|
24
|
+
if RUBY_PLATFORM =~ /java/
|
25
|
+
require 'rake/javaextensiontask'
|
26
|
+
Rake::JavaExtensionTask.new 'ruby_speech' do |ext|
|
27
|
+
ext.lib_dir = 'lib/ruby_speech'
|
28
|
+
end
|
29
|
+
else
|
30
|
+
require 'rake/extensiontask'
|
31
|
+
Rake::ExtensionTask.new 'ruby_speech' do |ext|
|
32
|
+
ext.lib_dir = 'lib/ruby_speech'
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
package com.benlangfeld.ruby_speech;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.RubyClass;
|
5
|
+
import org.jruby.RubyModule;
|
6
|
+
import org.jruby.RubyObject;
|
7
|
+
import org.jruby.anno.JRubyClass;
|
8
|
+
import org.jruby.anno.JRubyMethod;
|
9
|
+
import org.jruby.runtime.ObjectAllocator;
|
10
|
+
import org.jruby.runtime.ThreadContext;
|
11
|
+
import org.jruby.runtime.Visibility;
|
12
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
13
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
14
|
+
|
15
|
+
import java.util.regex.*;
|
16
|
+
|
17
|
+
@JRubyClass(name="RubySpeech::GRXML::Matcher")
|
18
|
+
public class RubySpeechGRXMLMatcher extends RubyObject {
|
19
|
+
|
20
|
+
public RubySpeechGRXMLMatcher(final Ruby runtime, RubyClass rubyClass) {
|
21
|
+
super(runtime, rubyClass);
|
22
|
+
}
|
23
|
+
|
24
|
+
@JRubyMethod(visibility=Visibility.PRIVATE)
|
25
|
+
public IRubyObject check_potential_match(ThreadContext context, IRubyObject buffer)
|
26
|
+
{
|
27
|
+
Ruby runtime = context.getRuntime();
|
28
|
+
|
29
|
+
IRubyObject regex = getInstanceVariable("@regex");
|
30
|
+
|
31
|
+
Pattern p = Pattern.compile(regex.toString());
|
32
|
+
Matcher m = p.matcher(buffer.toString());
|
33
|
+
|
34
|
+
if (m.matches()) {
|
35
|
+
} else if (m.hitEnd()) {
|
36
|
+
RubyModule potential_match = runtime.getClassFromPath("RubySpeech::GRXML::PotentialMatch");
|
37
|
+
return RuntimeHelpers.invoke(context, potential_match, "new");
|
38
|
+
}
|
39
|
+
return runtime.getNil();
|
40
|
+
}
|
41
|
+
|
42
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package com.benlangfeld.ruby_speech;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.RubyClass;
|
5
|
+
import org.jruby.RubyModule;
|
6
|
+
import org.jruby.RubyObject;
|
7
|
+
import org.jruby.runtime.ObjectAllocator;
|
8
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
9
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
10
|
+
|
11
|
+
public class RubySpeechService implements BasicLibraryService {
|
12
|
+
public boolean basicLoad(Ruby ruby) {
|
13
|
+
RubyModule ruby_speech = ruby.defineModule("RubySpeech");
|
14
|
+
RubyModule grxml = ruby_speech.defineModuleUnder("GRXML");
|
15
|
+
RubyClass matcher = grxml.defineClassUnder("Matcher", ruby.getObject(), new ObjectAllocator() {
|
16
|
+
public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) {
|
17
|
+
return new RubySpeechGRXMLMatcher(runtime, rubyClass);
|
18
|
+
}
|
19
|
+
});
|
20
|
+
matcher.defineAnnotatedMethods(RubySpeechGRXMLMatcher.class);
|
21
|
+
return true;
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "pcre.h"
|
3
|
+
#include <stdio.h>
|
4
|
+
|
5
|
+
static VALUE method_check_potential_match(VALUE self, VALUE buffer)
|
6
|
+
{
|
7
|
+
int erroffset = 0;
|
8
|
+
const char *errptr = "";
|
9
|
+
int options = 0;
|
10
|
+
VALUE regex_string = rb_funcall(rb_iv_get(self, "@regex"), rb_intern("to_s"), 0);
|
11
|
+
const char *regex = StringValueCStr(regex_string);
|
12
|
+
|
13
|
+
pcre *compiled_regex = pcre_compile(regex, options, &errptr, &erroffset, NULL);
|
14
|
+
|
15
|
+
int result = 0;
|
16
|
+
int ovector[30];
|
17
|
+
int workspace[1024];
|
18
|
+
char *input = StringValueCStr(buffer);
|
19
|
+
result = pcre_dfa_exec(compiled_regex, NULL, input, strlen(input), 0, PCRE_PARTIAL,
|
20
|
+
ovector, sizeof(ovector) / sizeof(ovector[0]),
|
21
|
+
workspace, sizeof(workspace) / sizeof(workspace[0]));
|
22
|
+
pcre_free(compiled_regex);
|
23
|
+
|
24
|
+
if (result == PCRE_ERROR_PARTIAL) {
|
25
|
+
VALUE RubySpeech = rb_const_get(rb_cObject, rb_intern("RubySpeech"));
|
26
|
+
VALUE GRXML = rb_const_get(RubySpeech, rb_intern("GRXML"));
|
27
|
+
VALUE PotentialMatch = rb_const_get(GRXML, rb_intern("PotentialMatch"));
|
28
|
+
|
29
|
+
return rb_class_new_instance(0, NULL, PotentialMatch);
|
30
|
+
}
|
31
|
+
return Qnil;
|
32
|
+
}
|
33
|
+
|
34
|
+
void Init_ruby_speech()
|
35
|
+
{
|
36
|
+
VALUE RubySpeech = rb_define_module("RubySpeech");
|
37
|
+
VALUE GRXML = rb_define_module_under(RubySpeech, "GRXML");
|
38
|
+
VALUE Matcher = rb_define_class_under(GRXML, "Matcher", rb_cObject);
|
39
|
+
|
40
|
+
rb_define_method(Matcher, "check_potential_match", method_check_potential_match, 1);
|
41
|
+
}
|