RubyGems - attentive - Versions diffs - 0.2.0 → 0.3.0 - Mend

attentive 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

checksums.yaml +4 -4
data/Rakefile +12 -29
data/bin/console +1 -0
data/lib/attentive/composite_entity.rb +3 -0
data/lib/attentive/config.rb +13 -0
data/lib/attentive/cursor.rb +8 -5
data/lib/attentive/entities/core/date/explicit.rb +25 -0
data/lib/attentive/entities/core/date/future.rb +10 -0
data/lib/attentive/entities/core/date/partial/future.rb +21 -0
data/lib/attentive/entities/core/date/partial/past.rb +21 -0
data/lib/attentive/entities/core/date/partial.rb +7 -0
data/lib/attentive/entities/core/date/past.rb +10 -0
data/lib/attentive/entities/core/date/relative/future.rb +2 -1
data/lib/attentive/entities/core/date/relative/past.rb +2 -1
data/lib/attentive/entities/core/date/relative.rb +2 -1
data/lib/attentive/entities/core/date.rb +8 -1
data/lib/attentive/entities/core/number/float/negative.rb +3 -3
data/lib/attentive/entities/core/number/float/positive.rb +3 -3
data/lib/attentive/entities/core/number/float.rb +7 -4
data/lib/attentive/entities/core/number/integer/negative.rb +3 -2
data/lib/attentive/entities/core/number/integer/positive.rb +3 -2
data/lib/attentive/entities/core/number/integer.rb +4 -4
data/lib/attentive/entities/core/number/negative.rb +5 -5
data/lib/attentive/entities/core/number/positive.rb +5 -5
data/lib/attentive/entities/core/number.rb +3 -2
data/lib/attentive/entity.rb +27 -6
data/lib/attentive/listener.rb +0 -1
data/lib/attentive/listener_collection.rb +1 -1
data/lib/attentive/match.rb +19 -2
data/lib/attentive/matcher.rb +11 -4
data/lib/attentive/message.rb +12 -0
data/lib/attentive/phrase.rb +5 -1
data/lib/attentive/substitutions.rb +10 -0
data/lib/attentive/token.rb +21 -4
data/lib/attentive/tokenizer.rb +69 -49
data/lib/attentive/tokens/regexp.rb +2 -2
data/lib/attentive/tokens.rb +3 -2
data/lib/attentive/trie.rb +45 -0
data/lib/attentive/version.rb +1 -1
data/lib/attentive.rb +26 -0
metadata +10 -5
data/lib/attentive/abbreviations.rb +0 -3
data/lib/attentive/contractions.rb +0 -3
data/lib/attentive/text.rb +0 -18

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b5952398cd8d68e82b27a4e9be335b748b315845
-  data.tar.gz: 77f1e15ef3e6952861d110f3c5d7605664e2979f
+  metadata.gz: 880167573144b1749ba6f538a4922210ff5e087b
+  data.tar.gz: 4b50dad29e7eacdf650c58741230d90c44c75c9b
 SHA512:
-  metadata.gz: 49de7e3eff7a8964082ffd35ac964497c5a7c706194f160fcb29bad5710c6977fc1d58ff86189d788cbe8290145575b07147cb35cafcbe16167b10607cd29063
-  data.tar.gz: b7692af23cbaa6b44467ab64c989883477ade8c1a9e26133c6c27e69168dbd132ba4e6dc7b4abbd8a8b5efeda09714a3138181e3e49290d16f449320f7e4d7d5
+  metadata.gz: b89a7ed8760e5e5acc8104c27f2d059fe82a05983fc29d077efabaf25b8cdc4ded15be16e601b1b9e9f94d902bebe5c30908d4d8d1e6311a8e92b24bf7e6cf16
+  data.tar.gz: d440c72708774dedbecf2117e4838ad97e2aaa13ebebdada905ccee018bebebc47cc1f3e3f7d6c5d15834a28279a709adec3164facb703779be0392c0c7aab0e

data/Rakefile CHANGED Viewed

@@ -9,14 +9,14 @@ end
 namespace :compile do
-  desc "Compile contractions.rb and abbreviations.rb"
+  desc "Compile substitutions.rb"
   task :data do
     data_path = File.expand_path(File.dirname(__FILE__) + "/data")
     output_path = File.expand_path(File.dirname(__FILE__) + "/lib/attentive")
-    contractions = {}
-    File.open(data_path + "/contractions.tsv") do |file|
+    substitutions = {}
+    File.open(data_path + "/substitutions.tsv") do |file|
       file.each do |line|
         next if line.start_with?("#") # skip comments
         next if line == "\n" # skip blank lines
@@ -27,37 +27,20 @@ namespace :compile do
         phrases = line.downcase.chomp.split("\t")
         raise "#{line.inspect} must have exactly two values" unless phrases.length >= 2
-        contractions[phrases.shift] = phrases
+        substitutions[phrases.shift] = phrases
       end
     end
-    File.open(output_path + "/contractions.rb", "w") do |file|
+    File.open(output_path + "/substitutions.rb", "w") do |file|
       file.write <<-RUBY
-module Attentive
-  CONTRACTIONS = #{contractions.inspect}.freeze
-end
-      RUBY
-    end
+require "attentive/trie"
-    abbreviations = {}
-    File.open(data_path + "/abbreviations.tsv") do |file|
-      file.each do |line|
-        next if line.start_with?("#") # skip comments
-        next if line == "\n" # skip blank lines
-        # the file contains tab-separated values.
-        # every line should have exactly two values:
-        #  + the first is the slang word
-        #  + the second is the normal word
-        words = line.downcase.chomp.split("\t")
-        raise "#{line.inspect} must have exactly two values" unless words.length == 2
-        abbreviations[words[0]] = words[1]
-      end
-    end
-    File.open(output_path + "/abbreviations.rb", "w") do |file|
-      file.write <<-RUBY
 module Attentive
-  ABBREVIATIONS = #{abbreviations.inspect}.freeze
+  SUBSTITUTIONS = #{substitutions.inspect}.each_with_object({}) do |(key, values), new_hash|
+    tokens = Attentive.tokenize(key, substitutions: false)
+    possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
+    value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
+    new_hash[tokens] = value
+  end.freeze
 end
       RUBY
     end

data/bin/console CHANGED Viewed

@@ -11,4 +11,5 @@ require "attentive"
 # Pry.start
 require "irb"
+include Attentive
 IRB.start

data/lib/attentive/composite_entity.rb CHANGED Viewed

@@ -9,8 +9,11 @@ module Attentive
       attr_accessor :entities
       def define(entity_name, *entities)
+        options = entities.last.is_a?(::Hash) ? entities.pop : {}
         create! entity_name do |entity_klass|
           entity_klass.entities = entities.map { |entity| Entity[entity] }
+          entity_klass.published = options.fetch(:published, true)
         end
       end
     end

data/lib/attentive/config.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+require "attentive/trie"
 module Attentive
   module Config
@@ -6,8 +8,19 @@ module Attentive
     attr_accessor :default_prohibited_contexts
     def invocations=(*values)
+      remove_instance_variable :@substitutions if defined?(@substitutions)
       @invocations = values.flatten
     end
+    def substitutions
+      return @substitutions if defined?(@substitutions)
+      @substitutions = Attentive::Trie.of_substitutions(
+        Attentive::SUBSTITUTIONS.merge(
+          invocations.each_with_object({}) { |invocation, hash|
+            tokens = Attentive.tokenize(invocation, substitutions: false)
+            hash[tokens] = [Attentive::Tokens::Invocation.new(invocation, 0)]
+          } ) )
+    end
   end
 end

data/lib/attentive/cursor.rb CHANGED Viewed

@@ -1,9 +1,10 @@
 module Attentive
   class Cursor
-    attr_reader :tokens, :pos
+    attr_reader :message, :tokens, :pos
-    def initialize(tokens, pos=0)
-      @tokens = tokens
+    def initialize(message, pos=0)
+      @message = message
+      @tokens = message.respond_to?(:tokens) ? message.tokens : message
       @pos = pos
     end
@@ -26,16 +27,18 @@ module Attentive
     end
     def inspect
-      "|#{(tokens[0...pos] || []).join}\e[7m#{tokens[pos]}\e[0m#{(tokens[(pos + 1)..-1] || []).join}|"
+      "<Cursor \"#{(tokens[0...pos] || []).join.inspect[1...-1]}\e[7m#{tokens[pos].to_s.inspect[1...-1]}\e[0m#{(tokens[(pos + 1)..-1] || []).join.inspect[1...-1]}\">"
     end
     def offset
-      peek.pos
+      peek.begin
     end
     def advance(n=1)
       @pos += n
+      self
     end
+    alias :adv :advance
     def eof?
       @pos == @tokens.length

data/lib/attentive/entities/core/date/explicit.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require "attentive/entity"
+require "date"
+Attentive::Entity.define "core.date.explicit",
+  "{{month:core.date.month}} {{day:core.number.integer.positive}} {{year:core.number.integer.positive}}",
+  "{{day:core.number.integer.positive}} {{month:core.date.month}} {{year:core.number.integer.positive}}",
+  %q{(?:(?<month>\d\d?)/(?<day>\d\d?)/(?<year>\d\d(?:\d\d)?))},
+  %q{(?:(?<year>\d\d(?:\d\d)?)-(?<month>\d\d?)-(?<day>\d\d?))},
+  published: false do |match|
+  month = match["month"].to_i
+  day = match["day"].to_i
+  year = match["year"].to_i
+  # Interpret 2-digit years in the 2000s
+  year += 2000 if year < 100
+  nomatch! if day > 31 || month > 12
+  begin
+    Date.new(year, month, day)
+  rescue ArgumentError
+    nomatch!
+  end
+end

data/lib/attentive/entities/core/date/future.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require "attentive/entities/core/date/month"
+require "attentive/entities/core/date/wday"
+require "attentive/entities/core/date/relative"
+require "attentive/entities/core/date/partial"
+require "attentive/entities/core/date/explicit"
+Attentive::CompositeEntity.define "core.date.future",
+  "core.date.explicit",
+  "core.date.relative.future",
+  "core.date.partial.future"

data/lib/attentive/entities/core/date/partial/future.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require "attentive/entity"
+require "date"
+Attentive::Entity.define "core.date.partial.future",
+    "{{month:core.date.month}} {{day:core.number.integer.positive}}",
+    published: false do |match|
+  month = match["month"]
+  day = match["day"]
+  nomatch! if day > 31
+  today = Date.today
+  year = today.year
+  year += 1 if month < today.month || (month == today.month && day < today.day)
+  begin
+    Date.new(year, month, day)
+  rescue ArgumentError
+    nomatch!
+  end
+end

data/lib/attentive/entities/core/date/partial/past.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require "attentive/entity"
+require "date"
+Attentive::Entity.define "core.date.partial.past",
+    "{{month:core.date.month}} {{day:core.number.integer.positive}}",
+    published: false do |match|
+  month = match["month"]
+  day = match["day"]
+  nomatch! if day > 31
+  today = Date.today
+  year = today.year
+  year -= 1 if month > today.month || (month == today.month && day > today.day)
+  begin
+    Date.new(year, month, day)
+  rescue ArgumentError
+    nomatch!
+  end
+end

data/lib/attentive/entities/core/date/partial.rb ADDED Viewed

@@ -0,0 +1,7 @@
+require "attentive/entities/core/date/partial/past"
+require "attentive/entities/core/date/partial/future"
+Attentive::CompositeEntity.define "core.date.partial",
+  "core.date.partial.future",
+  "core.date.partial.past",
+  published: false

data/lib/attentive/entities/core/date/past.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require "attentive/entities/core/date/month"
+require "attentive/entities/core/date/wday"
+require "attentive/entities/core/date/relative"
+require "attentive/entities/core/date/partial"
+require "attentive/entities/core/date/explicit"
+Attentive::CompositeEntity.define "core.date.past",
+  "core.date.explicit",
+  "core.date.relative.past",
+  "core.date.partial.past"

data/lib/attentive/entities/core/date/relative/future.rb CHANGED Viewed

@@ -5,7 +5,8 @@ Attentive::Entity.define "core.date.relative.future",
     "today",
     "tomorrow",
     "{{core.date.wday}}",
-    "next {{core.date.wday}}" do |match|
+    "next {{core.date.wday}}",
+    published: false do |match|
   today = Date.today

data/lib/attentive/entities/core/date/relative/past.rb CHANGED Viewed

@@ -5,7 +5,8 @@ Attentive::Entity.define "core.date.relative.past",
     "today",
     "yesterday",
     "{{core.date.wday}}",
-    "last {{core.date.wday}}" do |match|
+    "last {{core.date.wday}}",
+    published: false do |match|
   today = Date.today

data/lib/attentive/entities/core/date/relative.rb CHANGED Viewed

@@ -3,4 +3,5 @@ require "attentive/entities/core/date/relative/future"
 Attentive::CompositeEntity.define "core.date.relative",
   "core.date.relative.future",
-  "core.date.relative.past"
+  "core.date.relative.past",
+  published: false

data/lib/attentive/entities/core/date.rb CHANGED Viewed

@@ -1,6 +1,13 @@
 require "attentive/entities/core/date/month"
 require "attentive/entities/core/date/wday"
 require "attentive/entities/core/date/relative"
+require "attentive/entities/core/date/partial"
+require "attentive/entities/core/date/explicit"
+require "attentive/entities/core/date/future"
+require "attentive/entities/core/date/past"
 Attentive::CompositeEntity.define "core.date",
-  "core.date.relative"
+  "core.date.explicit",
+  "core.date.relative",
+  "core.date.partial"

data/lib/attentive/entities/core/number/float/negative.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require "attentive/entity"
-require "bigdecimal"
-Attentive::Entity.define "core.number.float.negative", %q{(?<float>\-[\d,]+\.\d+)} do |match|
-  BigDecimal.new(match["float"].gsub(",", ""))
+Attentive::Entity.define "core.number.float.negative", "{{float:core.number.float}}", published: false do |match|
+  nomatch! if match["float"] >= 0
+  match["float"]
 end

data/lib/attentive/entities/core/number/float/positive.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 require "attentive/entity"
-require "bigdecimal"
-Attentive::Entity.define "core.number.float.positive", %q{(?<float>[\d,]+\.\d+)} do |match|
-  BigDecimal.new(match["float"].gsub(",", ""))
+Attentive::Entity.define "core.number.float.positive", "{{float:core.number.float}}", published: false do |match|
+  nomatch! if match["float"] <= 0
+  match["float"]
 end

data/lib/attentive/entities/core/number/float.rb CHANGED Viewed

@@ -1,6 +1,9 @@
+require "attentive/entity"
+require "bigdecimal"
+Attentive::Entity.define "core.number.float", %q{(?<float>\-?[\d,]+\.\d+)}, published: false do |match|
+  BigDecimal.new(match["float"].gsub(",", ""))
+end
 require "attentive/entities/core/number/float/positive"
 require "attentive/entities/core/number/float/negative"
-Attentive::CompositeEntity.define "core.number.float",
-  "core.number.float.positive",
-  "core.number.float.negative"

data/lib/attentive/entities/core/number/integer/negative.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 require "attentive/entity"
-Attentive::Entity.define "core.number.integer.negative", %q{(?<integer>\-\d+)} do |match|
-  match["integer"].gsub(",", "").to_i
+Attentive::Entity.define "core.number.integer.negative", "{{integer:core.number.integer}}", published: false do |match|
+  nomatch! if match["integer"] >= 0
+  match["integer"]
 end

data/lib/attentive/entities/core/number/integer/positive.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 require "attentive/entity"
-Attentive::Entity.define "core.number.integer.positive", %q{(?<integer>[\d,]+)} do |match|
-  match["integer"].gsub(",", "").to_i
+Attentive::Entity.define "core.number.integer.positive", "{{integer:core.number.integer}}", published: false do |match|
+  nomatch! if match["integer"] <= 0
+  match["integer"]
 end

data/lib/attentive/entities/core/number/integer.rb CHANGED Viewed

@@ -1,6 +1,6 @@
+Attentive::Entity.define "core.number.integer", %q{(?<integer>\-?[\d,]+)}, published: false do |match|
+  match["integer"].gsub(",", "").to_i
+end
 require "attentive/entities/core/number/integer/positive"
 require "attentive/entities/core/number/integer/negative"
-Attentive::CompositeEntity.define "core.number.integer",
-  "core.number.integer.positive",
-  "core.number.integer.negative"

data/lib/attentive/entities/core/number/negative.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-require "attentive/entities/core/number/integer/negative"
-require "attentive/entities/core/number/float/negative"
+require "attentive/entity"
-Attentive::CompositeEntity.define "core.number.negative",
-  "core.number.float.negative",
-  "core.number.integer.negative"
+Attentive::Entity.define "core.number.negative", "{{number:core.number}}", published: false do |match|
+  nomatch! if match["number"] >= 0
+  match["number"]
+end

data/lib/attentive/entities/core/number/positive.rb CHANGED Viewed

@@ -1,6 +1,6 @@
-require "attentive/entities/core/number/integer/positive"
-require "attentive/entities/core/number/float/positive"
+require "attentive/entity"
-Attentive::CompositeEntity.define "core.number.positive",
-  "core.number.float.positive",
-  "core.number.integer.positive"
+Attentive::Entity.define "core.number.positive", "{{number:core.number}}", published: false do |match|
+  nomatch! if match["number"] <= 0
+  match["number"]
+end

data/lib/attentive/entities/core/number.rb CHANGED Viewed

@@ -1,8 +1,9 @@
 require "attentive/entities/core/number/integer"
 require "attentive/entities/core/number/float"
-require "attentive/entities/core/number/positive"
-require "attentive/entities/core/number/negative"
 Attentive::CompositeEntity.define "core.number",
   "core.number.float",
   "core.number.integer"
+require "attentive/entities/core/number/positive"
+require "attentive/entities/core/number/negative"

data/lib/attentive/entity.rb CHANGED Viewed

@@ -10,6 +10,15 @@ module Attentive
     class << self
       attr_accessor :phrases
       attr_accessor :token_name
+      attr_writer :published
+      def published?
+        @published
+      end
+      def entities
+        @entities.values.select(&:published?)
+      end
       def [](entity_name)
         entity_name = entity_name.to_sym
@@ -19,10 +28,13 @@ module Attentive
       end
       def define(entity_name, *phrases, &block)
+        options = phrases.last.is_a?(::Hash) ? phrases.pop : {}
         create! entity_name do |entity_klass|
           entity_klass.phrases = phrases.map do |phrase|
             Attentive::Tokenizer.tokenize(phrase, entities: true, regexps: true, ambiguous: false)
           end
+          entity_klass.published = options.fetch(:published, true)
           entity_klass.send :define_method, :_value_from_match, &block if block_given?
         end
       end
@@ -55,7 +67,7 @@ module Attentive
-    def initialize(variable_name, pos=0)
+    def initialize(variable_name=self.class.token_name, pos=0)
       @variable_name = variable_name.to_s
       super pos
     end
@@ -78,11 +90,14 @@ module Attentive
     def matches?(cursor)
       self.class.phrases.each do |phrase|
-        cursor_copy = cursor.new_from_here
-        match = Attentive::Matcher.new(phrase, cursor_copy).match!
-        if match
-          cursor.advance cursor_copy.pos
-          return { variable_name => _value_from_match(match) }
+        catch NOMATCH do
+          cursor_copy = cursor.new_from_here
+          match = Attentive::Matcher.new(phrase, cursor_copy).match!
+          if match
+            value = _value_from_match(match) # <-- might throw
+            cursor.advance cursor_copy.pos
+            return { variable_name => value }
+          end
         end
       end
       false
@@ -92,5 +107,11 @@ module Attentive
       match.to_s
     end
+    def nomatch!
+      throw NOMATCH
+    end
+    NOMATCH = :nomatch.freeze
   end
 end

data/lib/attentive/listener.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-require "attentive/text"
 require "attentive/tokenizer"
 require "set"

data/lib/attentive/listener_collection.rb CHANGED Viewed

@@ -28,7 +28,7 @@ module Attentive
       message.tokens.each_with_index do |token, i|
         listeners.each do |listener|
           listener.phrases.each do |phrase|
-            match = Attentive::Matcher.new(phrase, Cursor.new(message.tokens, i), listener: listener, message: message).match!
+            match = Attentive::Matcher.new(phrase, Cursor.new(message, i), listener: listener).match!
             next unless match
             # Don't match more than one phrase per listener

data/lib/attentive/match.rb CHANGED Viewed

@@ -1,12 +1,14 @@
 module Attentive
   class Match
-    attr_reader :listener, :phrase, :message
+    attr_reader :listener, :phrase, :message, :match_start, :match_end
     def initialize(phrase, attributes={})
       @phrase = phrase.to_s
       @match_data = attributes.fetch(:match_data, {})
+      @match_start = attributes.fetch(:match_start)
+      @match_end = attributes.fetch(:match_end)
+      @message = attributes.fetch(:message)
       @listener = attributes[:listener]
-      @message = attributes[:message]
     end
     def matched?(variable_name)
@@ -15,11 +17,26 @@ module Attentive
     def [](variable_name)
       @match_data.fetch variable_name.to_s
+    rescue KeyError
+      raise KeyError, "#{$!.message} in #{inspect}"
     end
     def to_s
       @phrase
     end
+    def to_h
+      @match_data
+    end
+    def replace_with(tokens)
+      message[match_start...match_end] = tokens
+      match_start + tokens.length
+    end
+    def inspect
+      "#<#{self.class.name} #{@match_data.inspect} #{phrase.inspect}>"
+    end
   end
 end

data/lib/attentive/matcher.rb CHANGED Viewed

@@ -6,9 +6,11 @@ module Attentive
     def initialize(phrase, message, params={})
       @phrase = phrase
+      @match_start = message.pos
       @cursor = Cursor.new(phrase, params.fetch(:pos, 0))
       @message = message
-      @match_params = params.each_with_object({}) { |(key, value), new_hash| new_hash[key] = value if %i{listener message}.member?(key) }
+      self.message.pop while self.message.peek.whitespace?
+      @match_params = params.merge(message: message.message, match_start: message.pos)
       @match_data = {}
       @state = :matching
@@ -34,6 +36,7 @@ module Attentive
             @state = :mismatch
             break
           end
+          message.pop
           cursor.pop while cursor.peek.whitespace?
         elsif match_data = cursor.peek.matches?(message)
@@ -43,14 +46,18 @@ module Attentive
           @state = :found
           # -> This is the one spot where we instantiate a Match
-          return Attentive::Match.new(phrase, @match_params.merge(match_data: @match_data)) if cursor.eof?
+          return Attentive::Match.new(phrase, @match_params.merge(
+            match_end: message.pos,
+            match_data: @match_data)) if cursor.eof?
-        elsif !token.skippable?
+        elsif token.skippable?
+          message.pop
+        else
           @state = :mismatch
           break
         end
-        message.pop
         message.pop while message.peek.whitespace?
       end

data/lib/attentive/message.rb CHANGED Viewed

@@ -15,6 +15,18 @@ module Attentive
       @tokens ||= Attentive::Tokenizer.tokenize(text)
     end
+    def [](key)
+      tokens[key]
+    end
+    def []=(key, value)
+      tokens[key] = value
+    end
+    def length
+      tokens.length
+    end
     alias :to_s :text
     def inspect

data/lib/attentive/phrase.rb CHANGED Viewed

@@ -12,7 +12,11 @@ module Attentive
     end
     def inspect
-      "\"#{to_s}\""
+      map(&:inspect).join("\n")
+    end
+    def dup
+      self.class.new map(&:dup)
     end
   end

data/lib/attentive/substitutions.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require "attentive/trie"
+module Attentive
+  SUBSTITUTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "somebody'd"=>["somebody had", "somebody would"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "would've"=>["would have"], "wouldn't"=>["would not"], "y'all"=>["you all"], "you'd"=>["you had", "you would"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"], "bye"=>["goodbye"], "gonna"=>["going to"], "hi"=>["hello"], "ol'"=>["old"], "'sup"=>["what is up"], "thanks"=>["thank you"], "wanna"=>["want to"], "w/o"=>["without"], "mon"=>["monday"], "tue"=>["tuesday"], "tues"=>["tuesday"], "wed"=>["wednesday"], "thu"=>["thursday"], "thur"=>["thursday"], "thurs"=>["thursday"], "fri"=>["friday"], "sat"=>["saturday"], "sun"=>["sunday"], "jan"=>["january"], "feb"=>["february"], "mar"=>["march"], "apr"=>["april"], "jun"=>["june"], "jul"=>["july"], "aug"=>["august"], "sep"=>["september"], "sept"=>["september"], "oct"=>["october"], "nov"=>["november"], "dec"=>["december"]}.each_with_object({}) do |(key, values), new_hash|
+    tokens = Attentive.tokenize(key, substitutions: false)
+    possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
+    value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
+    new_hash[tokens] = value
+  end.freeze
+end

data/lib/attentive/token.rb CHANGED Viewed

@@ -1,9 +1,13 @@
 module Attentive
   class Token
-    attr_reader :pos
+    attr_accessor :begin
     def initialize(pos=nil)
-      @pos = pos
+      @begin = pos
+    end
+    def end
+      self.begin + to_s.length
     end
     def ==(other)
@@ -31,11 +35,16 @@ module Attentive
     end
     def matches?(cursor)
-      self == cursor.peek
+      if self == cursor.peek
+        cursor.pop
+        return true
+      end
+      false
     end
     def inspect
-      "<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}>"
+      "<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}#{" #{self.begin}" if self.begin}>"
     end
   end
@@ -66,5 +75,13 @@ module Attentive
       self.class == other.class && self.string == other.string
     end
+    def eql?(other)
+      self == other
+    end
+    def hash
+      [ self.class, string ].hash
+    end
   end
 end

data/lib/attentive/tokenizer.rb CHANGED Viewed

@@ -1,6 +1,3 @@
-require "attentive/abbreviations"
-require "attentive/contractions"
-require "attentive/text"
 require "attentive/tokens"
 require "attentive/phrase"
 require "attentive/errors"
@@ -19,75 +16,80 @@ module Attentive
     def initialize(message, options={})
-      @message = Attentive::Text.normalize(message)
+      @message = message.downcase
       @chars = self.message.each_char.to_a
       @options = options
     end
+    def match_entities?
+      options.fetch(:entities, false)
+    end
+    def match_regexps?
+      options.fetch(:regexps, false)
+    end
+    def perform_substitutions?
+      options.fetch(:substitutions, true)
+    end
+    def fail_if_ambiguous?
+      !options.fetch(:ambiguous, true)
+    end
     def tokenize
       i = 0
-      tokens = []
+      @tokens = []
+      @leaves = []
       while i < chars.length
         char = chars[i]
+        char = CHARACTER_SUBSTITIONS.fetch(char, char)
+        pos = tokens.any? ? tokens.last.end : 0
-        if EMOJI_START === char && string = match_emoji_at(i)
-          tokens << emoji(string, pos: i)
-          i += string.length + 2
+        if WHITESPACE === char && string = match_whitespace_at(i)
+          add_token whitespace(string, pos: pos)
+          i += string.length
         elsif ENTITY_START === char && string = match_entity_at(i)
-          tokens << entity(*string.split(":").reverse, pos: i)
+          add_token entity(string, pos: pos)
           i += string.length + 4
-        elsif REGEXP_START === char && string = match_regexp_at(i)
-          tokens << regexp(string, pos: i)
+        elsif NUMBER_START === char && string = match_number_at(i)
+          add_token word(string, pos: pos)
           i += string.length
-        elsif WHITESPACE === char && string = match_whitespace_at(i)
-          tokens << whitespace(string, pos: i)
-          i += string.length
+        elsif EMOJI_START === char && string = match_emoji_at(i)
+          add_token emoji(string, pos: pos)
+          i += string.length + 2
-        elsif NUMBER_START === char && string = match_number_at(i)
-          tokens << word(string, pos: i)
+        elsif REGEXP_START === char && string = match_regexp_at(i)
+          add_token regexp(string, pos: pos)
           i += string.length
-        elsif PUNCTUATION === char # =~ /\W/
-          tokens << punctuation(char, pos: i)
+        elsif PUNCTUATION === char
+          add_token punctuation(char, pos: pos)
           i += 1
-        else
-          string = match_word_at(i)
-          if Attentive.invocations.member?(string)
-            tokens << invocation(string, pos: i)
-          elsif replace_with = Attentive::ABBREVIATIONS[string]
-            tokens.concat self.class.tokenize(replace_with, options)
-          elsif expands_to = Attentive::CONTRACTIONS[string]
-            possibilities = expands_to.map do |possibility|
-              self.class.tokenize(possibility, options)
-            end
-            if possibilities.length == 1
-              tokens.concat possibilities[0]
-            else
-              tokens << any_of(string, possibilities, pos: i)
-            end
-          else
-            tokens << word(string, pos: i)
-          end
+        else string = match_word_at(i)
+          add_token word(string, pos: pos)
           i += string.length
         end
       end
       fail_if_ambiguous!(message, tokens) if fail_if_ambiguous?
       Attentive::Phrase.new(tokens)
     end
+  private
+    attr_reader :tokens
     def match_emoji_at(i)
       emoji = ""
       while (i += 1) < chars.length
@@ -163,20 +165,32 @@ module Attentive
-    def match_entities?
-      options.fetch(:entities, false)
+    def add_token(token)
+      @tokens << token
+      return unless perform_substitutions?
+      @leaves = add_token_to_leaves token, @leaves
     end
-    def match_regexps?
-      options.fetch(:regexps, false)
+    def add_token_to_leaves(token, leaves)
+      (leaves + [Attentive.substitutions]).each_with_object([]) do |leaf, new_leaves|
+        if new_leaf = leaf[token]
+          if new_leaf.fin?
+            i = -1 - leaf.depth
+            offset = tokens[i].begin
+            replacement = new_leaf.fin.dup.each { |token| token.begin += offset }
+            tokens[i..-1] = replacement
+            return add_token_to_leaves replacement.last, []
+          else
+            new_leaves.push new_leaf
+          end
+        end
+      end
     end
-    def fail_if_ambiguous?
-      !options.fetch(:ambiguous, true)
-    end
     WHITESPACE = /\s/.freeze
-    PUNCTUATION = /[^\s\w'@-]/.freeze
+    PUNCTUATION = /[^\sa-z0-9_]/.freeze
     EMOJI_START = ":".freeze
     EMOJI_END = ":".freeze
     ENTITY_START = "{".freeze
@@ -186,7 +200,12 @@ module Attentive
     CONDITIONAL_NUMBER_START = /[\.\-]/.freeze
     NUMBER = /\d/.freeze
     CONDITIONAL_NUMBER = /[\.,]/.freeze
-    WORD = /[\w'\-@]/.freeze
+    WORD = /[a-z0-9_]/.freeze
+    CHARACTER_SUBSTITIONS = {
+      "“" => "\"",
+      "”" => "\"",
+      "‘" => "'",
+      "’" => "'" }.freeze
     def fail_if_ambiguous!(phrase, tokens)
       ambiguous_token = tokens.find(&:ambiguous?)
@@ -204,5 +223,6 @@ end
 # Attentive::Tokenizer needs to be defined first...
 require "attentive/entity"
 require "attentive/composite_entity"
+require "attentive/substitutions"
 require "attentive/entities/core"

data/lib/attentive/tokens/regexp.rb CHANGED Viewed

@@ -21,12 +21,12 @@ module Attentive
         # Find the first token following the match
         new_character_index = cursor.offset + match_data.to_s.length
-        cursor_pos = cursor.tokens.index { |token| token.pos >= new_character_index }
+        cursor_pos = cursor.tokens.index { |token| token.begin >= new_character_index }
         cursor_pos = cursor.tokens.length unless cursor_pos
         # If the match ends in the middle of a token, treat it as a mismatch
         match_end_token = cursor.tokens[cursor_pos - 1]
-        return false if match_end_token.pos + match_end_token.length > new_character_index
+        return false if match_end_token.begin + match_end_token.length > new_character_index
         # Advance the cursor to the first token after the regexp match
         cursor.advance cursor_pos - cursor.pos

data/lib/attentive/tokens.rb CHANGED Viewed

@@ -9,8 +9,9 @@ module Attentive
       Attentive::Tokens::Emoji.new string, pos
     end
-    def entity(entity_name, variable_name=entity_name, pos: nil)
-      Attentive::Entity[entity_name.to_sym].new(variable_name)
+    def entity(string, pos: nil)
+      entity_name, variable_name = *string.split(":").reverse
+      Attentive::Entity[entity_name.to_sym].new(variable_name || entity_name)
     end
     def invocation(string, pos: nil)

data/lib/attentive/trie.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module Attentive
+  class Trie
+    attr_reader :depth
+    def initialize(depth: 0)
+      @depth = depth
+      @children = {}
+    end
+    def [](token)
+      @children[token]
+    end
+    def add(token)
+      raise "Can't add #{token.inspect} to trie because this leaf is a terminus" if fin?
+      @children[token] ||= self.class.new(depth: depth + 1)
+    end
+    def fin?
+      @children.key?(:fin)
+    end
+    def fin
+      @children[:fin]
+    end
+    def fin!(finish)
+      @children[:fin] = finish
+    end
+    def self.of_substitutions(substitutions)
+      substitutions.each_with_object(self.new) do |(tokens, substitution), trie|
+        leaf = trie
+        tokens.each_with_index do |token, i|
+          raise "#{tokens.join} contains #{tokens[0...i].join}" if leaf.fin?
+          leaf = leaf.add token
+        end
+        leaf.fin! substitution
+      end
+    end
+  end
+end

data/lib/attentive/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Attentive
-  VERSION = "0.2.0"
+  VERSION = "0.3.0"
 end

data/lib/attentive.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require "attentive/version"
 require "attentive/config"
 module Attentive
   extend Attentive::Config
@@ -16,6 +17,31 @@ module Attentive
+  # Recognizes entities in a phrase
+  def self.abstract(message)
+    message = Attentive::Message.new(message)
+    entities = Attentive::Entity.entities.map { |entity| Attentive::Phrase.new([entity.new]) }
+    i = 0
+    while i < message.tokens.length
+      entities.each do |entity|
+        match = Attentive::Matcher.new(entity, Cursor.new(message, i)).match!
+        next unless match
+        i = match.replace_with(entity)
+        break
+      end
+      i += 1
+    end
+    message.tokens.to_s
+  end
+  # Shorthand for tokenizer
+  def self.tokenize(message, options={})
+    Attentive::Tokenizer.tokenize(message, options)
+  end
   # Attentive DSL
   def listeners

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: attentive
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Bob Lail
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-05-15 00:00:00.000000000 Z
+date: 2016-05-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: thread_safe
@@ -153,14 +153,18 @@ files:
 - bin/console
 - bin/setup
 - lib/attentive.rb
-- lib/attentive/abbreviations.rb
 - lib/attentive/composite_entity.rb
 - lib/attentive/config.rb
-- lib/attentive/contractions.rb
 - lib/attentive/cursor.rb
 - lib/attentive/entities/core.rb
 - lib/attentive/entities/core/date.rb
+- lib/attentive/entities/core/date/explicit.rb
+- lib/attentive/entities/core/date/future.rb
 - lib/attentive/entities/core/date/month.rb
+- lib/attentive/entities/core/date/partial.rb
+- lib/attentive/entities/core/date/partial/future.rb
+- lib/attentive/entities/core/date/partial/past.rb
+- lib/attentive/entities/core/date/past.rb
 - lib/attentive/entities/core/date/relative.rb
 - lib/attentive/entities/core/date/relative/future.rb
 - lib/attentive/entities/core/date/relative/past.rb
@@ -183,7 +187,7 @@ files:
 - lib/attentive/matcher.rb
 - lib/attentive/message.rb
 - lib/attentive/phrase.rb
-- lib/attentive/text.rb
+- lib/attentive/substitutions.rb
 - lib/attentive/token.rb
 - lib/attentive/tokenizer.rb
 - lib/attentive/tokens.rb
@@ -194,6 +198,7 @@ files:
 - lib/attentive/tokens/regexp.rb
 - lib/attentive/tokens/whitespace.rb
 - lib/attentive/tokens/word.rb
+- lib/attentive/trie.rb
 - lib/attentive/version.rb
 homepage: https://github.com/houston/attentive
 licenses:

data/lib/attentive/abbreviations.rb DELETED Viewed

@@ -1,3 +0,0 @@
-module Attentive
-  ABBREVIATIONS = {"bye"=>"goodbye", "gonna"=>"going to", "hi"=>"hello", "ol'"=>"old", "'sup"=>"what is up", "thanks"=>"thank you", "wanna"=>"want to", "mon"=>"monday", "tue"=>"tuesday", "tues"=>"tuesday", "wed"=>"wednesday", "thu"=>"thursday", "thur"=>"thursday", "thurs"=>"thursday", "fri"=>"friday", "sat"=>"saturday", "sun"=>"sunday", "jan"=>"january", "feb"=>"february", "mar"=>"march", "apr"=>"april", "jun"=>"june", "jul"=>"july", "aug"=>"august", "sep"=>"september", "sept"=>"september", "oct"=>"october", "nov"=>"november", "dec"=>"december"}.freeze
-end

data/lib/attentive/contractions.rb DELETED Viewed

@@ -1,3 +0,0 @@
-module Attentive
-  CONTRACTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "couldn't've"=>["could not have"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hadn't've"=>["had not have"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'd've"=>["he would have"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'd've"=>["i would have"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'd've"=>["it would have"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "mightn't've"=>["might not have"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'd've"=>["she would have"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "shouldn't've"=>["should not have"], "somebody'd"=>["somebody had", "somebody would"], "somebody'd've"=>["somebody would have"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'd've"=>["someone would have"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'd've"=>["something would have"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there'd've"=>["there would have"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'dn't've"=>["they would not have"], "they'd've"=>["they would have"], "they'd'ven't"=>["they would have not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they'll'ven't"=>["they will have not"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'd've"=>["we would have"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'd've"=>["who would have"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "won't've"=>["will not have"], "would've"=>["would have"], "wouldn't"=>["would not"], "wouldn't've"=>["would not have"], "y'all"=>["you all"], "y'all'd've"=>["you all would have"], "y'all'dn't've"=>["you all would not have"], "y'all'll"=>["you all will"], "y'all'lln't"=>["you all will not"], "y'all'll've"=>["you all will have"], "y'all'll'ven't"=>["you all will have not"], "you'd"=>["you had", "you would"], "you'd've"=>["you would have"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"]}.freeze
-end

data/lib/attentive/text.rb DELETED Viewed

@@ -1,18 +0,0 @@
-module Attentive
-  module Text
-    extend self
-    def normalize(text)
-      straighten_quotes downcase text
-    end
-    def downcase(text)
-      text.downcase
-    end
-    def straighten_quotes(text)
-      text.gsub(/[“”]/, "\"").gsub(/[‘’]/, "'")
-    end
-  end
-end