ruby_speech 1.1.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/.travis.yml +5 -1
- data/CHANGELOG.md +20 -5
- data/Gemfile +1 -1
- data/Guardfile +4 -0
- data/README.md +47 -101
- data/Rakefile +14 -2
- data/ext/ruby_speech/RubySpeechGRXMLMatcher.java +42 -0
- data/ext/ruby_speech/RubySpeechService.java +23 -0
- data/ext/ruby_speech/extconf.rb +7 -0
- data/ext/ruby_speech/ruby_speech.c +41 -0
- data/lib/ruby_speech/grxml.rb +1 -0
- data/lib/ruby_speech/grxml/element.rb +0 -17
- data/lib/ruby_speech/grxml/grammar.rb +0 -103
- data/lib/ruby_speech/grxml/item.rb +0 -21
- data/lib/ruby_speech/grxml/matcher.rb +129 -0
- data/lib/ruby_speech/grxml/one_of.rb +0 -4
- data/lib/ruby_speech/grxml/token.rb +0 -4
- data/lib/ruby_speech/nlsml.rb +1 -2
- data/lib/ruby_speech/nlsml/builder.rb +2 -15
- data/lib/ruby_speech/nlsml/document.rb +13 -9
- data/lib/ruby_speech/version.rb +1 -1
- data/ruby_speech.gemspec +10 -3
- data/spec/ruby_speech/grxml/grammar_spec.rb +0 -528
- data/spec/ruby_speech/grxml/item_spec.rb +0 -385
- data/spec/ruby_speech/grxml/matcher_spec.rb +644 -0
- data/spec/ruby_speech/grxml/one_of_spec.rb +0 -238
- data/spec/ruby_speech/nlsml_spec.rb +106 -148
- data/spec/ruby_speech_spec.rb +11 -21
- data/spec/spec_helper.rb +0 -1
- metadata +52 -78
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a4f1b109d9aca99877fdea094ae7898d160c2bc5
|
4
|
+
data.tar.gz: 61f380357241df94f39f25db15e17433d6766bb5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 01c6b26ae73ae0627b8ce474ea6fdddc8d0b87779a07c3908c57b71e3101ec8a447a8980a88d17de53c216442137122b03c821026146fd650066ec44030ce949
|
7
|
+
data.tar.gz: 7075b29b0be7f4a7f1ca2b83fc3694be0e09566dbd0ef12be3c66933538cb885053d9971a933e094ac59c5090f56bdb82774ef0c3afcd69cd4eedc21d3a2bf82
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -3,9 +3,13 @@ language: ruby
|
|
3
3
|
rvm:
|
4
4
|
- 1.9.2
|
5
5
|
- 1.9.3
|
6
|
+
- 2.0.0
|
6
7
|
- jruby-19mode
|
7
8
|
- rbx-19mode
|
8
9
|
- ruby-head
|
9
10
|
|
11
|
+
before_install:
|
12
|
+
- sudo apt-get install libpcre3 libpcre3-dev
|
13
|
+
|
10
14
|
notifications:
|
11
|
-
irc: "irc.freenode.org#adhearsion
|
15
|
+
irc: "irc.freenode.org#adhearsion"
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
# [develop](https://github.com/benlangfeld/ruby_speech)
|
2
2
|
|
3
|
+
# [2.0.1](https://github.com/benlangfeld/ruby_speech/compare/v2.0.0...v2.0.1) - [2013-04-27](https://rubygems.org/gems/ruby_speech/versions/2.0.1)
|
4
|
+
* Bugfix: Build native C extension in the correct location
|
5
|
+
|
6
|
+
# [2.0.0](https://github.com/benlangfeld/ruby_speech/compare/v1.1.0...v2.0.0) - [2013-04-27](https://rubygems.org/gems/ruby_speech/versions/2.0.0)
|
7
|
+
* Change: Comply with MRCPv2 flavour of NLSML
|
8
|
+
* Confidence is now a float in the XML representation
|
9
|
+
* Models are no longer used
|
10
|
+
* XForms no longer used
|
11
|
+
* Now have a true namespace
|
12
|
+
* Instance is in the NLSML namespace
|
13
|
+
* Must support string instances
|
14
|
+
* Change: Grammar matching now uses a Matcher rather than directly on the Grammar element
|
15
|
+
* Feature: Grammar matching now uses native C/Java regexes with PCRE/java.util.regex for clean partial matching and SPEEEEEED
|
16
|
+
* Bugfix: Item repeats now work correctly
|
17
|
+
|
3
18
|
# [1.1.0](https://github.com/benlangfeld/ruby_speech/compare/v1.0.2...v1.1.0) - [2013-03-02](https://rubygems.org/gems/ruby_speech/versions/1.1.0)
|
4
19
|
* Feature: NLSML building & parsing
|
5
20
|
|
@@ -20,11 +35,11 @@
|
|
20
35
|
|
21
36
|
# 0.5.0 - 2012-01-03
|
22
37
|
* Feature: Add a whole bunch more SSML elements:
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
38
|
+
* p & s
|
39
|
+
* mark
|
40
|
+
* desc
|
41
|
+
* sub
|
42
|
+
* phoneme
|
28
43
|
* Feature: Added the ability to inline grammar rule references in both destructive and non-destructive modes
|
29
44
|
* Feature: Added the ability to tokenize a grammar, turning all tokens into unambiguous `<token/>` elements
|
30
45
|
* Feature: Added the ability to whitespace normalize a grammar
|
data/Gemfile
CHANGED
data/Guardfile
CHANGED
data/README.md
CHANGED
@@ -108,120 +108,82 @@ which becomes
|
|
108
108
|
|
109
109
|
#### Grammar matching
|
110
110
|
|
111
|
-
It is possible to match some arbitrary input against a GRXML grammar
|
111
|
+
It is possible to match some arbitrary input against a GRXML grammar, like so:
|
112
112
|
|
113
113
|
```ruby
|
114
|
-
|
115
|
-
grammy.tokenize!
|
116
|
-
grammy.normalize_whitespace
|
117
|
-
```
|
114
|
+
require 'ruby_speech'
|
118
115
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
<one-of>
|
126
|
-
<item>
|
127
|
-
<token>0</token>
|
128
|
-
</item>
|
129
|
-
<item>
|
130
|
-
<token>1</token>
|
131
|
-
</item>
|
132
|
-
<item>
|
133
|
-
<token>2</token>
|
134
|
-
</item>
|
135
|
-
<item>
|
136
|
-
<token>3</token>
|
137
|
-
</item>
|
138
|
-
<item>
|
139
|
-
<token>4</token>
|
140
|
-
</item>
|
141
|
-
<item>
|
142
|
-
<token>5</token>
|
143
|
-
</item>
|
144
|
-
<item>
|
145
|
-
<token>6</token>
|
146
|
-
</item>
|
147
|
-
<item>
|
148
|
-
<token>7</token>
|
149
|
-
</item>
|
150
|
-
<item>
|
151
|
-
<token>8</token>
|
152
|
-
</item>
|
153
|
-
<item>
|
154
|
-
<token>9</token>
|
155
|
-
</item>
|
156
|
-
</one-of>
|
157
|
-
</item>
|
158
|
-
<token>#</token>
|
159
|
-
</item>
|
160
|
-
<item>
|
161
|
-
<token>*</token>
|
162
|
-
<token>9</token>
|
163
|
-
</item>
|
164
|
-
</one-of>
|
165
|
-
</rule>
|
166
|
-
</grammar>
|
167
|
-
```
|
116
|
+
>> grammar = RubySpeech::GRXML.draw mode: :dtmf, root: 'pin' do
|
117
|
+
rule id: 'digit' do
|
118
|
+
one_of do
|
119
|
+
('0'..'9').map { |d| item { d } }
|
120
|
+
end
|
121
|
+
end
|
168
122
|
|
169
|
-
|
123
|
+
rule id: 'pin', scope: 'public' do
|
124
|
+
one_of do
|
125
|
+
item do
|
126
|
+
item repeat: '4' do
|
127
|
+
ruleref uri: '#digit'
|
128
|
+
end
|
129
|
+
"#"
|
130
|
+
end
|
131
|
+
item do
|
132
|
+
"* 9"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
170
137
|
|
171
|
-
|
172
|
-
|
138
|
+
matcher = RubySpeech::GRXML::Matcher.new grammar
|
139
|
+
|
140
|
+
>> matcher.match '*9'
|
173
141
|
=> #<RubySpeech::GRXML::Match:0x00000100ae5d98
|
174
142
|
@mode = :dtmf,
|
175
143
|
@confidence = 1,
|
176
144
|
@utterance = "*9",
|
177
145
|
@interpretation = "*9"
|
178
146
|
>
|
179
|
-
>>
|
147
|
+
>> matcher.match '1234#'
|
180
148
|
=> #<RubySpeech::GRXML::Match:0x00000100b7e020
|
181
149
|
@mode = :dtmf,
|
182
150
|
@confidence = 1,
|
183
151
|
@utterance = "1234#",
|
184
152
|
@interpretation = "1234#"
|
185
153
|
>
|
186
|
-
>>
|
154
|
+
>> matcher.match '5678#'
|
187
155
|
=> #<RubySpeech::GRXML::Match:0x00000101218688
|
188
156
|
@mode = :dtmf,
|
189
157
|
@confidence = 1,
|
190
158
|
@utterance = "5678#",
|
191
159
|
@interpretation = "5678#"
|
192
160
|
>
|
193
|
-
>>
|
161
|
+
>> matcher.match '1111#'
|
194
162
|
=> #<RubySpeech::GRXML::Match:0x000001012f69d8
|
195
163
|
@mode = :dtmf,
|
196
164
|
@confidence = 1,
|
197
165
|
@utterance = "1111#",
|
198
166
|
@interpretation = "1111#"
|
199
167
|
>
|
200
|
-
>>
|
168
|
+
>> matcher.match '111'
|
201
169
|
=> #<RubySpeech::GRXML::NoMatch:0x00000101371660>
|
202
170
|
```
|
203
171
|
|
204
172
|
### NLSML
|
205
173
|
|
206
|
-
[Natural Language Semantics Markup Language](http://
|
174
|
+
[Natural Language Semantics Markup Language](http://tools.ietf.org/html/draft-ietf-speechsc-mrcpv2-27#section-6.3.1) is the format used by many Speech Recognition engines and natural language processors to add semantic information to human language. RubySpeech is capable of generating and parsing such documents.
|
207
175
|
|
208
176
|
It is possible to generate an NLSML document like so:
|
209
177
|
|
210
178
|
```ruby
|
211
179
|
require 'ruby_speech'
|
212
180
|
|
213
|
-
nlsml = RubySpeech::NLSML.draw
|
181
|
+
nlsml = RubySpeech::NLSML.draw grammar: 'http://flight' do
|
214
182
|
interpretation confidence: 0.6 do
|
215
183
|
input "I want to go to Pittsburgh", mode: :speech
|
216
184
|
|
217
|
-
model do
|
218
|
-
group name: 'airline' do
|
219
|
-
string name: 'to_city'
|
220
|
-
end
|
221
|
-
end
|
222
|
-
|
223
185
|
instance do
|
224
|
-
|
186
|
+
airline do
|
225
187
|
to_city 'Pittsburgh'
|
226
188
|
end
|
227
189
|
end
|
@@ -230,14 +192,8 @@ nlsml = RubySpeech::NLSML.draw(grammar: 'http://flight', 'xmlns:myApp' => 'foo')
|
|
230
192
|
interpretation confidence: 0.4 do
|
231
193
|
input "I want to go to Stockholm"
|
232
194
|
|
233
|
-
model do
|
234
|
-
group name: 'airline' do
|
235
|
-
string name: 'to_city'
|
236
|
-
end
|
237
|
-
end
|
238
|
-
|
239
195
|
instance do
|
240
|
-
|
196
|
+
airline do
|
241
197
|
to_city "Stockholm"
|
242
198
|
end
|
243
199
|
end
|
@@ -251,32 +207,22 @@ becomes:
|
|
251
207
|
|
252
208
|
```xml
|
253
209
|
<?xml version="1.0"?>
|
254
|
-
<result xmlns
|
255
|
-
<interpretation confidence="
|
210
|
+
<result xmlns="http://www.ietf.org/xml/ns/mrcpv2" grammar="http://flight">
|
211
|
+
<interpretation confidence="0.6">
|
256
212
|
<input mode="speech">I want to go to Pittsburgh</input>
|
257
|
-
<
|
258
|
-
<
|
259
|
-
<
|
260
|
-
</
|
261
|
-
</
|
262
|
-
<xf:instance>
|
263
|
-
<myApp:airline>
|
264
|
-
<myApp:to_city>Pittsburgh</myApp:to_city>
|
265
|
-
</myApp:airline>
|
266
|
-
</xf:instance>
|
213
|
+
<instance>
|
214
|
+
<airline>
|
215
|
+
<to_city>Pittsburgh</to_city>
|
216
|
+
</airline>
|
217
|
+
</instance>
|
267
218
|
</interpretation>
|
268
|
-
<interpretation confidence="
|
219
|
+
<interpretation confidence="0.4">
|
269
220
|
<input>I want to go to Stockholm</input>
|
270
|
-
<
|
271
|
-
<
|
272
|
-
<
|
273
|
-
</
|
274
|
-
</
|
275
|
-
<xf:instance>
|
276
|
-
<myApp:airline>
|
277
|
-
<myApp:to_city>Stockholm</myApp:to_city>
|
278
|
-
</myApp:airline>
|
279
|
-
</xf:instance>
|
221
|
+
<instance>
|
222
|
+
<airline>
|
223
|
+
<to_city>Stockholm</to_city>
|
224
|
+
</airline>
|
225
|
+
</instance>
|
280
226
|
</interpretation>
|
281
227
|
</result>
|
282
228
|
```
|
@@ -365,4 +311,4 @@ Check out the [YARD documentation](http://rdoc.info/github/benlangfeld/ruby_spee
|
|
365
311
|
|
366
312
|
## Copyright
|
367
313
|
|
368
|
-
Copyright (c)
|
314
|
+
Copyright (c) 2013 Ben Langfeld. MIT licence (see LICENSE for details).
|
data/Rakefile
CHANGED
@@ -15,8 +15,20 @@ RSpec::Core::RakeTask.new(:rcov) do |spec|
|
|
15
15
|
spec.rspec_opts = '--color'
|
16
16
|
end
|
17
17
|
|
18
|
-
task :default => :spec
|
19
|
-
task :ci => ['ci:setup:rspec', :spec]
|
18
|
+
task :default => [:compile, :spec]
|
19
|
+
task :ci => ['ci:setup:rspec', :compile, :spec]
|
20
20
|
|
21
21
|
require 'yard'
|
22
22
|
YARD::Rake::YardocTask.new
|
23
|
+
|
24
|
+
if RUBY_PLATFORM =~ /java/
|
25
|
+
require 'rake/javaextensiontask'
|
26
|
+
Rake::JavaExtensionTask.new 'ruby_speech' do |ext|
|
27
|
+
ext.lib_dir = 'lib/ruby_speech'
|
28
|
+
end
|
29
|
+
else
|
30
|
+
require 'rake/extensiontask'
|
31
|
+
Rake::ExtensionTask.new 'ruby_speech' do |ext|
|
32
|
+
ext.lib_dir = 'lib/ruby_speech'
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
package com.benlangfeld.ruby_speech;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.RubyClass;
|
5
|
+
import org.jruby.RubyModule;
|
6
|
+
import org.jruby.RubyObject;
|
7
|
+
import org.jruby.anno.JRubyClass;
|
8
|
+
import org.jruby.anno.JRubyMethod;
|
9
|
+
import org.jruby.runtime.ObjectAllocator;
|
10
|
+
import org.jruby.runtime.ThreadContext;
|
11
|
+
import org.jruby.runtime.Visibility;
|
12
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
13
|
+
import org.jruby.javasupport.util.RuntimeHelpers;
|
14
|
+
|
15
|
+
import java.util.regex.*;
|
16
|
+
|
17
|
+
@JRubyClass(name="RubySpeech::GRXML::Matcher")
|
18
|
+
public class RubySpeechGRXMLMatcher extends RubyObject {
|
19
|
+
|
20
|
+
public RubySpeechGRXMLMatcher(final Ruby runtime, RubyClass rubyClass) {
|
21
|
+
super(runtime, rubyClass);
|
22
|
+
}
|
23
|
+
|
24
|
+
@JRubyMethod(visibility=Visibility.PRIVATE)
|
25
|
+
public IRubyObject check_potential_match(ThreadContext context, IRubyObject buffer)
|
26
|
+
{
|
27
|
+
Ruby runtime = context.getRuntime();
|
28
|
+
|
29
|
+
IRubyObject regex = getInstanceVariable("@regex");
|
30
|
+
|
31
|
+
Pattern p = Pattern.compile(regex.toString());
|
32
|
+
Matcher m = p.matcher(buffer.toString());
|
33
|
+
|
34
|
+
if (m.matches()) {
|
35
|
+
} else if (m.hitEnd()) {
|
36
|
+
RubyModule potential_match = runtime.getClassFromPath("RubySpeech::GRXML::PotentialMatch");
|
37
|
+
return RuntimeHelpers.invoke(context, potential_match, "new");
|
38
|
+
}
|
39
|
+
return runtime.getNil();
|
40
|
+
}
|
41
|
+
|
42
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package com.benlangfeld.ruby_speech;
|
2
|
+
|
3
|
+
import org.jruby.Ruby;
|
4
|
+
import org.jruby.RubyClass;
|
5
|
+
import org.jruby.RubyModule;
|
6
|
+
import org.jruby.RubyObject;
|
7
|
+
import org.jruby.runtime.ObjectAllocator;
|
8
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
9
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
10
|
+
|
11
|
+
public class RubySpeechService implements BasicLibraryService {
|
12
|
+
public boolean basicLoad(Ruby ruby) {
|
13
|
+
RubyModule ruby_speech = ruby.defineModule("RubySpeech");
|
14
|
+
RubyModule grxml = ruby_speech.defineModuleUnder("GRXML");
|
15
|
+
RubyClass matcher = grxml.defineClassUnder("Matcher", ruby.getObject(), new ObjectAllocator() {
|
16
|
+
public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) {
|
17
|
+
return new RubySpeechGRXMLMatcher(runtime, rubyClass);
|
18
|
+
}
|
19
|
+
});
|
20
|
+
matcher.defineAnnotatedMethods(RubySpeechGRXMLMatcher.class);
|
21
|
+
return true;
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "pcre.h"
|
3
|
+
#include <stdio.h>
|
4
|
+
|
5
|
+
static VALUE method_check_potential_match(VALUE self, VALUE buffer)
|
6
|
+
{
|
7
|
+
int erroffset = 0;
|
8
|
+
const char *errptr = "";
|
9
|
+
int options = 0;
|
10
|
+
VALUE regex_string = rb_funcall(rb_iv_get(self, "@regex"), rb_intern("to_s"), 0);
|
11
|
+
const char *regex = StringValueCStr(regex_string);
|
12
|
+
|
13
|
+
pcre *compiled_regex = pcre_compile(regex, options, &errptr, &erroffset, NULL);
|
14
|
+
|
15
|
+
int result = 0;
|
16
|
+
int ovector[30];
|
17
|
+
int workspace[1024];
|
18
|
+
char *input = StringValueCStr(buffer);
|
19
|
+
result = pcre_dfa_exec(compiled_regex, NULL, input, strlen(input), 0, PCRE_PARTIAL,
|
20
|
+
ovector, sizeof(ovector) / sizeof(ovector[0]),
|
21
|
+
workspace, sizeof(workspace) / sizeof(workspace[0]));
|
22
|
+
pcre_free(compiled_regex);
|
23
|
+
|
24
|
+
if (result == PCRE_ERROR_PARTIAL) {
|
25
|
+
VALUE RubySpeech = rb_const_get(rb_cObject, rb_intern("RubySpeech"));
|
26
|
+
VALUE GRXML = rb_const_get(RubySpeech, rb_intern("GRXML"));
|
27
|
+
VALUE PotentialMatch = rb_const_get(GRXML, rb_intern("PotentialMatch"));
|
28
|
+
|
29
|
+
return rb_class_new_instance(0, NULL, PotentialMatch);
|
30
|
+
}
|
31
|
+
return Qnil;
|
32
|
+
}
|
33
|
+
|
34
|
+
void Init_ruby_speech()
|
35
|
+
{
|
36
|
+
VALUE RubySpeech = rb_define_module("RubySpeech");
|
37
|
+
VALUE GRXML = rb_define_module_under(RubySpeech, "GRXML");
|
38
|
+
VALUE Matcher = rb_define_class_under(GRXML, "Matcher", rb_cObject);
|
39
|
+
|
40
|
+
rb_define_method(Matcher, "check_potential_match", method_check_potential_match, 1);
|
41
|
+
}
|