RubyGems - ruby_speech - Versions diffs - 1.1.0 → 2.0.1 - Mend

ruby_speech 1.1.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +7 -0
data/.gitignore +2 -1
data/.travis.yml +5 -1
data/CHANGELOG.md +20 -5
data/Gemfile +1 -1
data/Guardfile +4 -0
data/README.md +47 -101
data/Rakefile +14 -2
data/ext/ruby_speech/RubySpeechGRXMLMatcher.java +42 -0
data/ext/ruby_speech/RubySpeechService.java +23 -0
data/ext/ruby_speech/extconf.rb +7 -0
data/ext/ruby_speech/ruby_speech.c +41 -0
data/lib/ruby_speech/grxml.rb +1 -0
data/lib/ruby_speech/grxml/element.rb +0 -17
data/lib/ruby_speech/grxml/grammar.rb +0 -103
data/lib/ruby_speech/grxml/item.rb +0 -21
data/lib/ruby_speech/grxml/matcher.rb +129 -0
data/lib/ruby_speech/grxml/one_of.rb +0 -4
data/lib/ruby_speech/grxml/token.rb +0 -4
data/lib/ruby_speech/nlsml.rb +1 -2
data/lib/ruby_speech/nlsml/builder.rb +2 -15
data/lib/ruby_speech/nlsml/document.rb +13 -9
data/lib/ruby_speech/version.rb +1 -1
data/ruby_speech.gemspec +10 -3
data/spec/ruby_speech/grxml/grammar_spec.rb +0 -528
data/spec/ruby_speech/grxml/item_spec.rb +0 -385
data/spec/ruby_speech/grxml/matcher_spec.rb +644 -0
data/spec/ruby_speech/grxml/one_of_spec.rb +0 -238
data/spec/ruby_speech/nlsml_spec.rb +106 -148
data/spec/ruby_speech_spec.rb +11 -21
data/spec/spec_helper.rb +0 -1
metadata +52 -78

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: a4f1b109d9aca99877fdea094ae7898d160c2bc5
+  data.tar.gz: 61f380357241df94f39f25db15e17433d6766bb5
+SHA512:
+  metadata.gz: 01c6b26ae73ae0627b8ce474ea6fdddc8d0b87779a07c3908c57b71e3101ec8a447a8980a88d17de53c216442137122b03c821026146fd650066ec44030ce949
+  data.tar.gz: 7075b29b0be7f4a7f1ca2b83fc3694be0e09566dbd0ef12be3c66933538cb885053d9971a933e094ac59c5090f56bdb82774ef0c3afcd69cd4eedc21d3a2bf82

data/.gitignore CHANGED

@@ -1,6 +1,7 @@
 .DS_Store
 *.gem
-.bundle
+*.bundle
+*.jar
 Gemfile.lock
 pkg/*
 spec/reports

data/.travis.yml CHANGED

@@ -3,9 +3,13 @@ language: ruby
 rvm:
   - 1.9.2
   - 1.9.3
+  - 2.0.0
   - jruby-19mode
   - rbx-19mode
   - ruby-head
+before_install:
+  - sudo apt-get install libpcre3 libpcre3-dev
 notifications:
-  irc: "irc.freenode.org#adhearsion-dev"
+  irc: "irc.freenode.org#adhearsion"

data/CHANGELOG.md CHANGED

@@ -1,5 +1,20 @@
 # [develop](https://github.com/benlangfeld/ruby_speech)
+# [2.0.1](https://github.com/benlangfeld/ruby_speech/compare/v2.0.0...v2.0.1) - [2013-04-27](https://rubygems.org/gems/ruby_speech/versions/2.0.1)
+  * Bugfix: Build native C extension in the correct location
+# [2.0.0](https://github.com/benlangfeld/ruby_speech/compare/v1.1.0...v2.0.0) - [2013-04-27](https://rubygems.org/gems/ruby_speech/versions/2.0.0)
+  * Change: Comply with MRCPv2 flavour of NLSML
+    * Confidence is now a float in the XML representation
+    * Models are no longer used
+    * XForms no longer used
+    * Now have a true namespace
+    * Instance is in the NLSML namespace
+    * Must support string instances
+  * Change: Grammar matching now uses a Matcher rather than directly on the Grammar element
+  * Feature: Grammar matching now uses native C/Java regexes with PCRE/java.util.regex for clean partial matching and SPEEEEEED
+  * Bugfix: Item repeats now work correctly
 # [1.1.0](https://github.com/benlangfeld/ruby_speech/compare/v1.0.2...v1.1.0) - [2013-03-02](https://rubygems.org/gems/ruby_speech/versions/1.1.0)
   * Feature: NLSML building & parsing
@@ -20,11 +35,11 @@
 # 0.5.0 - 2012-01-03
   * Feature: Add a whole bunch more SSML elements:
-  ** p & s
-  ** mark
-  ** desc
-  ** sub
-  ** phoneme
+    * p & s
+    * mark
+    * desc
+    * sub
+    * phoneme
   * Feature: Added the ability to inline grammar rule references in both destructive and non-destructive modes
   * Feature: Added the ability to tokenize a grammar, turning all tokens into unambiguous `<token/>` elements
   * Feature: Added the ability to whitespace normalize a grammar

data/Gemfile CHANGED

@@ -1,3 +1,3 @@
-source :rubygems
+source 'https://rubygems.org'
 gemspec

data/Guardfile CHANGED

@@ -1,3 +1,7 @@
+guard 'rake', :task => 'compile' do
+  watch(%r{^ext/(.+)\.c$})
+end
 guard 'rspec', :cli => '--format documentation' do
   watch(%r{^spec/.+_spec\.rb$})
   watch(%r{^lib/(.+)\.rb$})     { |m| "spec/#{m[1]}_spec.rb" }

data/README.md CHANGED

@@ -108,120 +108,82 @@ which becomes
 #### Grammar matching
-It is possible to match some arbitrary input against a GRXML grammar. In order to do so, certain normalization routines should first be run on the grammar in order to prepare it for matching. These are reference inlining, tokenization and whitespace normalization, and are described [in the SRGS spec](http://www.w3.org/TR/speech-grammar/#S2.1). This process will transform the above grammar like so:
+It is possible to match some arbitrary input against a GRXML grammar, like so:
 ```ruby
-grammy.inline!
-grammy.tokenize!
-grammy.normalize_whitespace
-```
+require 'ruby_speech'
-```xml
-<grammar xmlns="http://www.w3.org/2001/06/grammar" version="1.0" xml:lang="en-US" mode="dtmf" root="pin">
-  <rule id="pin" scope="public">
-    <one-of>
-      <item>
-        <item repeat="4">
-          <one-of>
-            <item>
-              <token>0</token>
-            </item>
-            <item>
-              <token>1</token>
-            </item>
-            <item>
-              <token>2</token>
-            </item>
-            <item>
-              <token>3</token>
-            </item>
-            <item>
-              <token>4</token>
-            </item>
-            <item>
-              <token>5</token>
-            </item>
-            <item>
-              <token>6</token>
-            </item>
-            <item>
-              <token>7</token>
-            </item>
-            <item>
-              <token>8</token>
-            </item>
-            <item>
-              <token>9</token>
-            </item>
-          </one-of>
-        </item>
-        <token>#</token>
-      </item>
-      <item>
-        <token>*</token>
-        <token>9</token>
-      </item>
-    </one-of>
-  </rule>
-</grammar>
-```
+>> grammar = RubySpeech::GRXML.draw mode: :dtmf, root: 'pin' do
+  rule id: 'digit' do
+    one_of do
+      ('0'..'9').map { |d| item { d } }
+    end
+  end
-Matching against some sample input strings then returns the following results:
+  rule id: 'pin', scope: 'public' do
+    one_of do
+      item do
+        item repeat: '4' do
+          ruleref uri: '#digit'
+        end
+        "#"
+      end
+      item do
+        "* 9"
+      end
+    end
+  end
+end
-```ruby
->> subject.match '*9'
+matcher = RubySpeech::GRXML::Matcher.new grammar
+>> matcher.match '*9'
 => #<RubySpeech::GRXML::Match:0x00000100ae5d98
       @mode = :dtmf,
       @confidence = 1,
       @utterance = "*9",
       @interpretation = "*9"
     >
->> subject.match '1234#'
+>> matcher.match '1234#'
 => #<RubySpeech::GRXML::Match:0x00000100b7e020
       @mode = :dtmf,
       @confidence = 1,
       @utterance = "1234#",
       @interpretation = "1234#"
     >
->> subject.match '5678#'
+>> matcher.match '5678#'
 => #<RubySpeech::GRXML::Match:0x00000101218688
       @mode = :dtmf,
       @confidence = 1,
       @utterance = "5678#",
       @interpretation = "5678#"
     >
->> subject.match '1111#'
+>> matcher.match '1111#'
 => #<RubySpeech::GRXML::Match:0x000001012f69d8
       @mode = :dtmf,
       @confidence = 1,
       @utterance = "1111#",
       @interpretation = "1111#"
     >
->> subject.match '111'
+>> matcher.match '111'
 => #<RubySpeech::GRXML::NoMatch:0x00000101371660>
 ```
 ### NLSML
-[Natural Language Semantics Markup Language](http://www.w3.org/TR/nl-spec/) is the format used by many Speech Recognition engines and natural language processors to add semantic information to human language. RubySpeech is capable of generating and parsing such documents.
+[Natural Language Semantics Markup Language](http://tools.ietf.org/html/draft-ietf-speechsc-mrcpv2-27#section-6.3.1) is the format used by many Speech Recognition engines and natural language processors to add semantic information to human language. RubySpeech is capable of generating and parsing such documents.
 It is possible to generate an NLSML document like so:
 ```ruby
 require 'ruby_speech'
-nlsml = RubySpeech::NLSML.draw(grammar: 'http://flight', 'xmlns:myApp' => 'foo') do
+nlsml = RubySpeech::NLSML.draw grammar: 'http://flight' do
   interpretation confidence: 0.6 do
     input "I want to go to Pittsburgh", mode: :speech
-    model do
-      group name: 'airline' do
-        string name: 'to_city'
-      end
-    end
     instance do
-      self['myApp'].airline do
+      airline do
         to_city 'Pittsburgh'
       end
     end
@@ -230,14 +192,8 @@ nlsml = RubySpeech::NLSML.draw(grammar: 'http://flight', 'xmlns:myApp' => 'foo')
   interpretation confidence: 0.4 do
     input "I want to go to Stockholm"
-    model do
-      group name: 'airline' do
-        string name: 'to_city'
-      end
-    end
     instance do
-      self['myApp'].airline do
+      airline do
         to_city "Stockholm"
       end
     end
@@ -251,32 +207,22 @@ becomes:
 ```xml
 <?xml version="1.0"?>
-<result xmlns:myApp="foo" xmlns:xf="http://www.w3.org/2000/xforms" grammar="http://flight">
-  <interpretation confidence="60">
+<result xmlns="http://www.ietf.org/xml/ns/mrcpv2" grammar="http://flight">
+  <interpretation confidence="0.6">
     <input mode="speech">I want to go to Pittsburgh</input>
-    <xf:model>
-      <xf:group name="airline">
-        <xf:string name="to_city"/>
-      </xf:group>
-    </xf:model>
-    <xf:instance>
-      <myApp:airline>
-        <myApp:to_city>Pittsburgh</myApp:to_city>
-      </myApp:airline>
-    </xf:instance>
+    <instance>
+      <airline>
+        <to_city>Pittsburgh</to_city>
+      </airline>
+    </instance>
   </interpretation>
-  <interpretation confidence="40">
+  <interpretation confidence="0.4">
     <input>I want to go to Stockholm</input>
-    <xf:model>
-      <xf:group name="airline">
-        <xf:string name="to_city"/>
-      </xf:group>
-    </xf:model>
-    <xf:instance>
-      <myApp:airline>
-        <myApp:to_city>Stockholm</myApp:to_city>
-      </myApp:airline>
-    </xf:instance>
+    <instance>
+      <airline>
+        <to_city>Stockholm</to_city>
+      </airline>
+    </instance>
   </interpretation>
 </result>
 ```
@@ -365,4 +311,4 @@ Check out the [YARD documentation](http://rdoc.info/github/benlangfeld/ruby_spee
 ## Copyright
-Copyright (c) 2011 Ben Langfeld. MIT licence (see LICENSE for details).
+Copyright (c) 2013 Ben Langfeld. MIT licence (see LICENSE for details).

data/Rakefile CHANGED

@@ -15,8 +15,20 @@ RSpec::Core::RakeTask.new(:rcov) do |spec|
   spec.rspec_opts = '--color'
 end
-task :default => :spec
-task :ci => ['ci:setup:rspec', :spec]
+task :default => [:compile, :spec]
+task :ci => ['ci:setup:rspec', :compile, :spec]
 require 'yard'
 YARD::Rake::YardocTask.new
+if RUBY_PLATFORM =~ /java/
+  require 'rake/javaextensiontask'
+  Rake::JavaExtensionTask.new 'ruby_speech' do |ext|
+    ext.lib_dir = 'lib/ruby_speech'
+  end
+else
+  require 'rake/extensiontask'
+  Rake::ExtensionTask.new 'ruby_speech' do |ext|
+    ext.lib_dir = 'lib/ruby_speech'
+  end
+end

data/ext/ruby_speech/RubySpeechGRXMLMatcher.java ADDED

@@ -0,0 +1,42 @@
+package com.benlangfeld.ruby_speech;
+import org.jruby.Ruby;
+import org.jruby.RubyClass;
+import org.jruby.RubyModule;
+import org.jruby.RubyObject;
+import org.jruby.anno.JRubyClass;
+import org.jruby.anno.JRubyMethod;
+import org.jruby.runtime.ObjectAllocator;
+import org.jruby.runtime.ThreadContext;
+import org.jruby.runtime.Visibility;
+import org.jruby.runtime.builtin.IRubyObject;
+import org.jruby.javasupport.util.RuntimeHelpers;
+import java.util.regex.*;
+@JRubyClass(name="RubySpeech::GRXML::Matcher")
+public class RubySpeechGRXMLMatcher extends RubyObject {
+  public RubySpeechGRXMLMatcher(final Ruby runtime, RubyClass rubyClass) {
+    super(runtime, rubyClass);
+  }
+  @JRubyMethod(visibility=Visibility.PRIVATE)
+  public IRubyObject check_potential_match(ThreadContext context, IRubyObject buffer)
+  {
+    Ruby runtime = context.getRuntime();
+    IRubyObject regex = getInstanceVariable("@regex");
+    Pattern p = Pattern.compile(regex.toString());
+    Matcher m = p.matcher(buffer.toString());
+    if (m.matches()) {
+    } else if (m.hitEnd()) {
+      RubyModule potential_match = runtime.getClassFromPath("RubySpeech::GRXML::PotentialMatch");
+      return RuntimeHelpers.invoke(context, potential_match, "new");
+    }
+    return runtime.getNil();
+  }
+}

data/ext/ruby_speech/RubySpeechService.java ADDED

@@ -0,0 +1,23 @@
+package com.benlangfeld.ruby_speech;
+import org.jruby.Ruby;
+import org.jruby.RubyClass;
+import org.jruby.RubyModule;
+import org.jruby.RubyObject;
+import org.jruby.runtime.ObjectAllocator;
+import org.jruby.runtime.builtin.IRubyObject;
+import org.jruby.runtime.load.BasicLibraryService;
+public class RubySpeechService implements BasicLibraryService {
+  public boolean basicLoad(Ruby ruby) {
+    RubyModule ruby_speech = ruby.defineModule("RubySpeech");
+    RubyModule grxml = ruby_speech.defineModuleUnder("GRXML");
+    RubyClass matcher = grxml.defineClassUnder("Matcher", ruby.getObject(), new ObjectAllocator() {
+      public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) {
+        return new RubySpeechGRXMLMatcher(runtime, rubyClass);
+      }
+    });
+    matcher.defineAnnotatedMethods(RubySpeechGRXMLMatcher.class);
+    return true;
+  }
+}

data/ext/ruby_speech/extconf.rb ADDED

@@ -0,0 +1,7 @@
+require 'mkmf'
+$LIBS << " -lpcre"
+abort "-----\n#{lib} is missing.\n-----" unless find_header('pcre.h')
+create_makefile 'ruby_speech/ruby_speech'

data/ext/ruby_speech/ruby_speech.c ADDED

@@ -0,0 +1,41 @@
+#include "ruby.h"
+#include "pcre.h"
+#include <stdio.h>
+static VALUE method_check_potential_match(VALUE self, VALUE buffer)
+{
+  int erroffset = 0;
+  const char *errptr = "";
+  int options = 0;
+  VALUE regex_string = rb_funcall(rb_iv_get(self, "@regex"), rb_intern("to_s"), 0);
+  const char *regex = StringValueCStr(regex_string);
+  pcre *compiled_regex = pcre_compile(regex, options, &errptr, &erroffset, NULL);
+  int result = 0;
+  int ovector[30];
+  int workspace[1024];
+  char *input = StringValueCStr(buffer);
+  result = pcre_dfa_exec(compiled_regex, NULL, input, strlen(input), 0, PCRE_PARTIAL,
+    ovector, sizeof(ovector) / sizeof(ovector[0]),
+    workspace, sizeof(workspace) / sizeof(workspace[0]));
+  pcre_free(compiled_regex);
+  if (result == PCRE_ERROR_PARTIAL) {
+    VALUE RubySpeech      = rb_const_get(rb_cObject, rb_intern("RubySpeech"));
+    VALUE GRXML           = rb_const_get(RubySpeech, rb_intern("GRXML"));
+    VALUE PotentialMatch  = rb_const_get(GRXML, rb_intern("PotentialMatch"));
+    return rb_class_new_instance(0, NULL, PotentialMatch);
+  }
+  return Qnil;
+}
+void Init_ruby_speech()
+{
+  VALUE RubySpeech  = rb_define_module("RubySpeech");
+  VALUE GRXML       = rb_define_module_under(RubySpeech, "GRXML");
+  VALUE Matcher     = rb_define_class_under(GRXML, "Matcher", rb_cObject);
+  rb_define_method(Matcher, "check_potential_match", method_check_potential_match, 1);
+}