attentive 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +12 -29
- data/bin/console +1 -0
- data/lib/attentive/composite_entity.rb +3 -0
- data/lib/attentive/config.rb +13 -0
- data/lib/attentive/cursor.rb +8 -5
- data/lib/attentive/entities/core/date/explicit.rb +25 -0
- data/lib/attentive/entities/core/date/future.rb +10 -0
- data/lib/attentive/entities/core/date/partial/future.rb +21 -0
- data/lib/attentive/entities/core/date/partial/past.rb +21 -0
- data/lib/attentive/entities/core/date/partial.rb +7 -0
- data/lib/attentive/entities/core/date/past.rb +10 -0
- data/lib/attentive/entities/core/date/relative/future.rb +2 -1
- data/lib/attentive/entities/core/date/relative/past.rb +2 -1
- data/lib/attentive/entities/core/date/relative.rb +2 -1
- data/lib/attentive/entities/core/date.rb +8 -1
- data/lib/attentive/entities/core/number/float/negative.rb +3 -3
- data/lib/attentive/entities/core/number/float/positive.rb +3 -3
- data/lib/attentive/entities/core/number/float.rb +7 -4
- data/lib/attentive/entities/core/number/integer/negative.rb +3 -2
- data/lib/attentive/entities/core/number/integer/positive.rb +3 -2
- data/lib/attentive/entities/core/number/integer.rb +4 -4
- data/lib/attentive/entities/core/number/negative.rb +5 -5
- data/lib/attentive/entities/core/number/positive.rb +5 -5
- data/lib/attentive/entities/core/number.rb +3 -2
- data/lib/attentive/entity.rb +27 -6
- data/lib/attentive/listener.rb +0 -1
- data/lib/attentive/listener_collection.rb +1 -1
- data/lib/attentive/match.rb +19 -2
- data/lib/attentive/matcher.rb +11 -4
- data/lib/attentive/message.rb +12 -0
- data/lib/attentive/phrase.rb +5 -1
- data/lib/attentive/substitutions.rb +10 -0
- data/lib/attentive/token.rb +21 -4
- data/lib/attentive/tokenizer.rb +69 -49
- data/lib/attentive/tokens/regexp.rb +2 -2
- data/lib/attentive/tokens.rb +3 -2
- data/lib/attentive/trie.rb +45 -0
- data/lib/attentive/version.rb +1 -1
- data/lib/attentive.rb +26 -0
- metadata +10 -5
- data/lib/attentive/abbreviations.rb +0 -3
- data/lib/attentive/contractions.rb +0 -3
- data/lib/attentive/text.rb +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 880167573144b1749ba6f538a4922210ff5e087b
|
4
|
+
data.tar.gz: 4b50dad29e7eacdf650c58741230d90c44c75c9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b89a7ed8760e5e5acc8104c27f2d059fe82a05983fc29d077efabaf25b8cdc4ded15be16e601b1b9e9f94d902bebe5c30908d4d8d1e6311a8e92b24bf7e6cf16
|
7
|
+
data.tar.gz: d440c72708774dedbecf2117e4838ad97e2aaa13ebebdada905ccee018bebebc47cc1f3e3f7d6c5d15834a28279a709adec3164facb703779be0392c0c7aab0e
|
data/Rakefile
CHANGED
@@ -9,14 +9,14 @@ end
|
|
9
9
|
|
10
10
|
namespace :compile do
|
11
11
|
|
12
|
-
desc "Compile
|
12
|
+
desc "Compile substitutions.rb"
|
13
13
|
task :data do
|
14
14
|
|
15
15
|
data_path = File.expand_path(File.dirname(__FILE__) + "/data")
|
16
16
|
output_path = File.expand_path(File.dirname(__FILE__) + "/lib/attentive")
|
17
17
|
|
18
|
-
|
19
|
-
File.open(data_path + "/
|
18
|
+
substitutions = {}
|
19
|
+
File.open(data_path + "/substitutions.tsv") do |file|
|
20
20
|
file.each do |line|
|
21
21
|
next if line.start_with?("#") # skip comments
|
22
22
|
next if line == "\n" # skip blank lines
|
@@ -27,37 +27,20 @@ namespace :compile do
|
|
27
27
|
phrases = line.downcase.chomp.split("\t")
|
28
28
|
raise "#{line.inspect} must have exactly two values" unless phrases.length >= 2
|
29
29
|
|
30
|
-
|
30
|
+
substitutions[phrases.shift] = phrases
|
31
31
|
end
|
32
32
|
end
|
33
|
-
File.open(output_path + "/
|
33
|
+
File.open(output_path + "/substitutions.rb", "w") do |file|
|
34
34
|
file.write <<-RUBY
|
35
|
-
|
36
|
-
CONTRACTIONS = #{contractions.inspect}.freeze
|
37
|
-
end
|
38
|
-
RUBY
|
39
|
-
end
|
35
|
+
require "attentive/trie"
|
40
36
|
|
41
|
-
abbreviations = {}
|
42
|
-
File.open(data_path + "/abbreviations.tsv") do |file|
|
43
|
-
file.each do |line|
|
44
|
-
next if line.start_with?("#") # skip comments
|
45
|
-
next if line == "\n" # skip blank lines
|
46
|
-
|
47
|
-
# the file contains tab-separated values.
|
48
|
-
# every line should have exactly two values:
|
49
|
-
# + the first is the slang word
|
50
|
-
# + the second is the normal word
|
51
|
-
words = line.downcase.chomp.split("\t")
|
52
|
-
raise "#{line.inspect} must have exactly two values" unless words.length == 2
|
53
|
-
|
54
|
-
abbreviations[words[0]] = words[1]
|
55
|
-
end
|
56
|
-
end
|
57
|
-
File.open(output_path + "/abbreviations.rb", "w") do |file|
|
58
|
-
file.write <<-RUBY
|
59
37
|
module Attentive
|
60
|
-
|
38
|
+
SUBSTITUTIONS = #{substitutions.inspect}.each_with_object({}) do |(key, values), new_hash|
|
39
|
+
tokens = Attentive.tokenize(key, substitutions: false)
|
40
|
+
possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
|
41
|
+
value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
|
42
|
+
new_hash[tokens] = value
|
43
|
+
end.freeze
|
61
44
|
end
|
62
45
|
RUBY
|
63
46
|
end
|
data/bin/console
CHANGED
@@ -9,8 +9,11 @@ module Attentive
|
|
9
9
|
attr_accessor :entities
|
10
10
|
|
11
11
|
def define(entity_name, *entities)
|
12
|
+
options = entities.last.is_a?(::Hash) ? entities.pop : {}
|
13
|
+
|
12
14
|
create! entity_name do |entity_klass|
|
13
15
|
entity_klass.entities = entities.map { |entity| Entity[entity] }
|
16
|
+
entity_klass.published = options.fetch(:published, true)
|
14
17
|
end
|
15
18
|
end
|
16
19
|
end
|
data/lib/attentive/config.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require "attentive/trie"
|
2
|
+
|
1
3
|
module Attentive
|
2
4
|
module Config
|
3
5
|
|
@@ -6,8 +8,19 @@ module Attentive
|
|
6
8
|
attr_accessor :default_prohibited_contexts
|
7
9
|
|
8
10
|
def invocations=(*values)
|
11
|
+
remove_instance_variable :@substitutions if defined?(@substitutions)
|
9
12
|
@invocations = values.flatten
|
10
13
|
end
|
11
14
|
|
15
|
+
def substitutions
|
16
|
+
return @substitutions if defined?(@substitutions)
|
17
|
+
@substitutions = Attentive::Trie.of_substitutions(
|
18
|
+
Attentive::SUBSTITUTIONS.merge(
|
19
|
+
invocations.each_with_object({}) { |invocation, hash|
|
20
|
+
tokens = Attentive.tokenize(invocation, substitutions: false)
|
21
|
+
hash[tokens] = [Attentive::Tokens::Invocation.new(invocation, 0)]
|
22
|
+
} ) )
|
23
|
+
end
|
24
|
+
|
12
25
|
end
|
13
26
|
end
|
data/lib/attentive/cursor.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module Attentive
|
2
2
|
class Cursor
|
3
|
-
attr_reader :tokens, :pos
|
3
|
+
attr_reader :message, :tokens, :pos
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
5
|
+
def initialize(message, pos=0)
|
6
|
+
@message = message
|
7
|
+
@tokens = message.respond_to?(:tokens) ? message.tokens : message
|
7
8
|
@pos = pos
|
8
9
|
end
|
9
10
|
|
@@ -26,16 +27,18 @@ module Attentive
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def inspect
|
29
|
-
"
|
30
|
+
"<Cursor \"#{(tokens[0...pos] || []).join.inspect[1...-1]}\e[7m#{tokens[pos].to_s.inspect[1...-1]}\e[0m#{(tokens[(pos + 1)..-1] || []).join.inspect[1...-1]}\">"
|
30
31
|
end
|
31
32
|
|
32
33
|
def offset
|
33
|
-
peek.
|
34
|
+
peek.begin
|
34
35
|
end
|
35
36
|
|
36
37
|
def advance(n=1)
|
37
38
|
@pos += n
|
39
|
+
self
|
38
40
|
end
|
41
|
+
alias :adv :advance
|
39
42
|
|
40
43
|
def eof?
|
41
44
|
@pos == @tokens.length
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.date.explicit",
|
5
|
+
"{{month:core.date.month}} {{day:core.number.integer.positive}} {{year:core.number.integer.positive}}",
|
6
|
+
"{{day:core.number.integer.positive}} {{month:core.date.month}} {{year:core.number.integer.positive}}",
|
7
|
+
%q{(?:(?<month>\d\d?)/(?<day>\d\d?)/(?<year>\d\d(?:\d\d)?))},
|
8
|
+
%q{(?:(?<year>\d\d(?:\d\d)?)-(?<month>\d\d?)-(?<day>\d\d?))},
|
9
|
+
published: false do |match|
|
10
|
+
|
11
|
+
month = match["month"].to_i
|
12
|
+
day = match["day"].to_i
|
13
|
+
year = match["year"].to_i
|
14
|
+
|
15
|
+
# Interpret 2-digit years in the 2000s
|
16
|
+
year += 2000 if year < 100
|
17
|
+
|
18
|
+
nomatch! if day > 31 || month > 12
|
19
|
+
|
20
|
+
begin
|
21
|
+
Date.new(year, month, day)
|
22
|
+
rescue ArgumentError
|
23
|
+
nomatch!
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "attentive/entities/core/date/month"
|
2
|
+
require "attentive/entities/core/date/wday"
|
3
|
+
require "attentive/entities/core/date/relative"
|
4
|
+
require "attentive/entities/core/date/partial"
|
5
|
+
require "attentive/entities/core/date/explicit"
|
6
|
+
|
7
|
+
Attentive::CompositeEntity.define "core.date.future",
|
8
|
+
"core.date.explicit",
|
9
|
+
"core.date.relative.future",
|
10
|
+
"core.date.partial.future"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.date.partial.future",
|
5
|
+
"{{month:core.date.month}} {{day:core.number.integer.positive}}",
|
6
|
+
published: false do |match|
|
7
|
+
|
8
|
+
month = match["month"]
|
9
|
+
day = match["day"]
|
10
|
+
nomatch! if day > 31
|
11
|
+
|
12
|
+
today = Date.today
|
13
|
+
year = today.year
|
14
|
+
year += 1 if month < today.month || (month == today.month && day < today.day)
|
15
|
+
|
16
|
+
begin
|
17
|
+
Date.new(year, month, day)
|
18
|
+
rescue ArgumentError
|
19
|
+
nomatch!
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.date.partial.past",
|
5
|
+
"{{month:core.date.month}} {{day:core.number.integer.positive}}",
|
6
|
+
published: false do |match|
|
7
|
+
|
8
|
+
month = match["month"]
|
9
|
+
day = match["day"]
|
10
|
+
nomatch! if day > 31
|
11
|
+
|
12
|
+
today = Date.today
|
13
|
+
year = today.year
|
14
|
+
year -= 1 if month > today.month || (month == today.month && day > today.day)
|
15
|
+
|
16
|
+
begin
|
17
|
+
Date.new(year, month, day)
|
18
|
+
rescue ArgumentError
|
19
|
+
nomatch!
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "attentive/entities/core/date/month"
|
2
|
+
require "attentive/entities/core/date/wday"
|
3
|
+
require "attentive/entities/core/date/relative"
|
4
|
+
require "attentive/entities/core/date/partial"
|
5
|
+
require "attentive/entities/core/date/explicit"
|
6
|
+
|
7
|
+
Attentive::CompositeEntity.define "core.date.past",
|
8
|
+
"core.date.explicit",
|
9
|
+
"core.date.relative.past",
|
10
|
+
"core.date.partial.past"
|
@@ -1,6 +1,13 @@
|
|
1
1
|
require "attentive/entities/core/date/month"
|
2
2
|
require "attentive/entities/core/date/wday"
|
3
3
|
require "attentive/entities/core/date/relative"
|
4
|
+
require "attentive/entities/core/date/partial"
|
5
|
+
require "attentive/entities/core/date/explicit"
|
6
|
+
|
7
|
+
require "attentive/entities/core/date/future"
|
8
|
+
require "attentive/entities/core/date/past"
|
4
9
|
|
5
10
|
Attentive::CompositeEntity.define "core.date",
|
6
|
-
"core.date.
|
11
|
+
"core.date.explicit",
|
12
|
+
"core.date.relative",
|
13
|
+
"core.date.partial"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
|
-
require "bigdecimal"
|
3
2
|
|
4
|
-
Attentive::Entity.define "core.number.float.negative",
|
5
|
-
|
3
|
+
Attentive::Entity.define "core.number.float.negative", "{{float:core.number.float}}", published: false do |match|
|
4
|
+
nomatch! if match["float"] >= 0
|
5
|
+
match["float"]
|
6
6
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
|
-
require "bigdecimal"
|
3
2
|
|
4
|
-
Attentive::Entity.define "core.number.float.positive",
|
5
|
-
|
3
|
+
Attentive::Entity.define "core.number.float.positive", "{{float:core.number.float}}", published: false do |match|
|
4
|
+
nomatch! if match["float"] <= 0
|
5
|
+
match["float"]
|
6
6
|
end
|
@@ -1,6 +1,9 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "bigdecimal"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.number.float", %q{(?<float>\-?[\d,]+\.\d+)}, published: false do |match|
|
5
|
+
BigDecimal.new(match["float"].gsub(",", ""))
|
6
|
+
end
|
7
|
+
|
1
8
|
require "attentive/entities/core/number/float/positive"
|
2
9
|
require "attentive/entities/core/number/float/negative"
|
3
|
-
|
4
|
-
Attentive::CompositeEntity.define "core.number.float",
|
5
|
-
"core.number.float.positive",
|
6
|
-
"core.number.float.negative"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
2
|
|
3
|
-
Attentive::Entity.define "core.number.integer.negative",
|
4
|
-
match["integer"]
|
3
|
+
Attentive::Entity.define "core.number.integer.negative", "{{integer:core.number.integer}}", published: false do |match|
|
4
|
+
nomatch! if match["integer"] >= 0
|
5
|
+
match["integer"]
|
5
6
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
2
|
|
3
|
-
Attentive::Entity.define "core.number.integer.positive",
|
4
|
-
match["integer"]
|
3
|
+
Attentive::Entity.define "core.number.integer.positive", "{{integer:core.number.integer}}", published: false do |match|
|
4
|
+
nomatch! if match["integer"] <= 0
|
5
|
+
match["integer"]
|
5
6
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
+
Attentive::Entity.define "core.number.integer", %q{(?<integer>\-?[\d,]+)}, published: false do |match|
|
2
|
+
match["integer"].gsub(",", "").to_i
|
3
|
+
end
|
4
|
+
|
1
5
|
require "attentive/entities/core/number/integer/positive"
|
2
6
|
require "attentive/entities/core/number/integer/negative"
|
3
|
-
|
4
|
-
Attentive::CompositeEntity.define "core.number.integer",
|
5
|
-
"core.number.integer.positive",
|
6
|
-
"core.number.integer.negative"
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require "attentive/
|
2
|
-
require "attentive/entities/core/number/float/negative"
|
1
|
+
require "attentive/entity"
|
3
2
|
|
4
|
-
Attentive::
|
5
|
-
"
|
6
|
-
"
|
3
|
+
Attentive::Entity.define "core.number.negative", "{{number:core.number}}", published: false do |match|
|
4
|
+
nomatch! if match["number"] >= 0
|
5
|
+
match["number"]
|
6
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require "attentive/
|
2
|
-
require "attentive/entities/core/number/float/positive"
|
1
|
+
require "attentive/entity"
|
3
2
|
|
4
|
-
Attentive::
|
5
|
-
"
|
6
|
-
"
|
3
|
+
Attentive::Entity.define "core.number.positive", "{{number:core.number}}", published: false do |match|
|
4
|
+
nomatch! if match["number"] <= 0
|
5
|
+
match["number"]
|
6
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
require "attentive/entities/core/number/integer"
|
2
2
|
require "attentive/entities/core/number/float"
|
3
|
-
require "attentive/entities/core/number/positive"
|
4
|
-
require "attentive/entities/core/number/negative"
|
5
3
|
|
6
4
|
Attentive::CompositeEntity.define "core.number",
|
7
5
|
"core.number.float",
|
8
6
|
"core.number.integer"
|
7
|
+
|
8
|
+
require "attentive/entities/core/number/positive"
|
9
|
+
require "attentive/entities/core/number/negative"
|
data/lib/attentive/entity.rb
CHANGED
@@ -10,6 +10,15 @@ module Attentive
|
|
10
10
|
class << self
|
11
11
|
attr_accessor :phrases
|
12
12
|
attr_accessor :token_name
|
13
|
+
attr_writer :published
|
14
|
+
|
15
|
+
def published?
|
16
|
+
@published
|
17
|
+
end
|
18
|
+
|
19
|
+
def entities
|
20
|
+
@entities.values.select(&:published?)
|
21
|
+
end
|
13
22
|
|
14
23
|
def [](entity_name)
|
15
24
|
entity_name = entity_name.to_sym
|
@@ -19,10 +28,13 @@ module Attentive
|
|
19
28
|
end
|
20
29
|
|
21
30
|
def define(entity_name, *phrases, &block)
|
31
|
+
options = phrases.last.is_a?(::Hash) ? phrases.pop : {}
|
32
|
+
|
22
33
|
create! entity_name do |entity_klass|
|
23
34
|
entity_klass.phrases = phrases.map do |phrase|
|
24
35
|
Attentive::Tokenizer.tokenize(phrase, entities: true, regexps: true, ambiguous: false)
|
25
36
|
end
|
37
|
+
entity_klass.published = options.fetch(:published, true)
|
26
38
|
entity_klass.send :define_method, :_value_from_match, &block if block_given?
|
27
39
|
end
|
28
40
|
end
|
@@ -55,7 +67,7 @@ module Attentive
|
|
55
67
|
|
56
68
|
|
57
69
|
|
58
|
-
def initialize(variable_name, pos=0)
|
70
|
+
def initialize(variable_name=self.class.token_name, pos=0)
|
59
71
|
@variable_name = variable_name.to_s
|
60
72
|
super pos
|
61
73
|
end
|
@@ -78,11 +90,14 @@ module Attentive
|
|
78
90
|
|
79
91
|
def matches?(cursor)
|
80
92
|
self.class.phrases.each do |phrase|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
93
|
+
catch NOMATCH do
|
94
|
+
cursor_copy = cursor.new_from_here
|
95
|
+
match = Attentive::Matcher.new(phrase, cursor_copy).match!
|
96
|
+
if match
|
97
|
+
value = _value_from_match(match) # <-- might throw
|
98
|
+
cursor.advance cursor_copy.pos
|
99
|
+
return { variable_name => value }
|
100
|
+
end
|
86
101
|
end
|
87
102
|
end
|
88
103
|
false
|
@@ -92,5 +107,11 @@ module Attentive
|
|
92
107
|
match.to_s
|
93
108
|
end
|
94
109
|
|
110
|
+
def nomatch!
|
111
|
+
throw NOMATCH
|
112
|
+
end
|
113
|
+
|
114
|
+
NOMATCH = :nomatch.freeze
|
115
|
+
|
95
116
|
end
|
96
117
|
end
|
data/lib/attentive/listener.rb
CHANGED
@@ -28,7 +28,7 @@ module Attentive
|
|
28
28
|
message.tokens.each_with_index do |token, i|
|
29
29
|
listeners.each do |listener|
|
30
30
|
listener.phrases.each do |phrase|
|
31
|
-
match = Attentive::Matcher.new(phrase, Cursor.new(message
|
31
|
+
match = Attentive::Matcher.new(phrase, Cursor.new(message, i), listener: listener).match!
|
32
32
|
next unless match
|
33
33
|
|
34
34
|
# Don't match more than one phrase per listener
|
data/lib/attentive/match.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
module Attentive
|
2
2
|
class Match
|
3
|
-
attr_reader :listener, :phrase, :message
|
3
|
+
attr_reader :listener, :phrase, :message, :match_start, :match_end
|
4
4
|
|
5
5
|
def initialize(phrase, attributes={})
|
6
6
|
@phrase = phrase.to_s
|
7
7
|
@match_data = attributes.fetch(:match_data, {})
|
8
|
+
@match_start = attributes.fetch(:match_start)
|
9
|
+
@match_end = attributes.fetch(:match_end)
|
10
|
+
@message = attributes.fetch(:message)
|
8
11
|
@listener = attributes[:listener]
|
9
|
-
@message = attributes[:message]
|
10
12
|
end
|
11
13
|
|
12
14
|
def matched?(variable_name)
|
@@ -15,11 +17,26 @@ module Attentive
|
|
15
17
|
|
16
18
|
def [](variable_name)
|
17
19
|
@match_data.fetch variable_name.to_s
|
20
|
+
rescue KeyError
|
21
|
+
raise KeyError, "#{$!.message} in #{inspect}"
|
18
22
|
end
|
19
23
|
|
20
24
|
def to_s
|
21
25
|
@phrase
|
22
26
|
end
|
23
27
|
|
28
|
+
def to_h
|
29
|
+
@match_data
|
30
|
+
end
|
31
|
+
|
32
|
+
def replace_with(tokens)
|
33
|
+
message[match_start...match_end] = tokens
|
34
|
+
match_start + tokens.length
|
35
|
+
end
|
36
|
+
|
37
|
+
def inspect
|
38
|
+
"#<#{self.class.name} #{@match_data.inspect} #{phrase.inspect}>"
|
39
|
+
end
|
40
|
+
|
24
41
|
end
|
25
42
|
end
|
data/lib/attentive/matcher.rb
CHANGED
@@ -6,9 +6,11 @@ module Attentive
|
|
6
6
|
|
7
7
|
def initialize(phrase, message, params={})
|
8
8
|
@phrase = phrase
|
9
|
+
@match_start = message.pos
|
9
10
|
@cursor = Cursor.new(phrase, params.fetch(:pos, 0))
|
10
11
|
@message = message
|
11
|
-
|
12
|
+
self.message.pop while self.message.peek.whitespace?
|
13
|
+
@match_params = params.merge(message: message.message, match_start: message.pos)
|
12
14
|
@match_data = {}
|
13
15
|
@state = :matching
|
14
16
|
|
@@ -34,6 +36,7 @@ module Attentive
|
|
34
36
|
@state = :mismatch
|
35
37
|
break
|
36
38
|
end
|
39
|
+
message.pop
|
37
40
|
cursor.pop while cursor.peek.whitespace?
|
38
41
|
|
39
42
|
elsif match_data = cursor.peek.matches?(message)
|
@@ -43,14 +46,18 @@ module Attentive
|
|
43
46
|
@state = :found
|
44
47
|
|
45
48
|
# -> This is the one spot where we instantiate a Match
|
46
|
-
return Attentive::Match.new(phrase, @match_params.merge(
|
49
|
+
return Attentive::Match.new(phrase, @match_params.merge(
|
50
|
+
match_end: message.pos,
|
51
|
+
match_data: @match_data)) if cursor.eof?
|
47
52
|
|
48
|
-
elsif
|
53
|
+
elsif token.skippable?
|
54
|
+
message.pop
|
55
|
+
|
56
|
+
else
|
49
57
|
@state = :mismatch
|
50
58
|
break
|
51
59
|
end
|
52
60
|
|
53
|
-
message.pop
|
54
61
|
message.pop while message.peek.whitespace?
|
55
62
|
end
|
56
63
|
|
data/lib/attentive/message.rb
CHANGED
@@ -15,6 +15,18 @@ module Attentive
|
|
15
15
|
@tokens ||= Attentive::Tokenizer.tokenize(text)
|
16
16
|
end
|
17
17
|
|
18
|
+
def [](key)
|
19
|
+
tokens[key]
|
20
|
+
end
|
21
|
+
|
22
|
+
def []=(key, value)
|
23
|
+
tokens[key] = value
|
24
|
+
end
|
25
|
+
|
26
|
+
def length
|
27
|
+
tokens.length
|
28
|
+
end
|
29
|
+
|
18
30
|
alias :to_s :text
|
19
31
|
|
20
32
|
def inspect
|
data/lib/attentive/phrase.rb
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "attentive/trie"
|
2
|
+
|
3
|
+
module Attentive
|
4
|
+
SUBSTITUTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "somebody'd"=>["somebody had", "somebody would"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "would've"=>["would have"], "wouldn't"=>["would not"], "y'all"=>["you all"], "you'd"=>["you had", "you would"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"], "bye"=>["goodbye"], "gonna"=>["going to"], "hi"=>["hello"], "ol'"=>["old"], "'sup"=>["what is up"], "thanks"=>["thank you"], "wanna"=>["want to"], "w/o"=>["without"], "mon"=>["monday"], "tue"=>["tuesday"], "tues"=>["tuesday"], "wed"=>["wednesday"], "thu"=>["thursday"], "thur"=>["thursday"], "thurs"=>["thursday"], "fri"=>["friday"], "sat"=>["saturday"], "sun"=>["sunday"], "jan"=>["january"], "feb"=>["february"], "mar"=>["march"], "apr"=>["april"], "jun"=>["june"], "jul"=>["july"], "aug"=>["august"], "sep"=>["september"], "sept"=>["september"], "oct"=>["october"], "nov"=>["november"], "dec"=>["december"]}.each_with_object({}) do |(key, values), new_hash|
|
5
|
+
tokens = Attentive.tokenize(key, substitutions: false)
|
6
|
+
possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
|
7
|
+
value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
|
8
|
+
new_hash[tokens] = value
|
9
|
+
end.freeze
|
10
|
+
end
|
data/lib/attentive/token.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
module Attentive
|
2
2
|
class Token
|
3
|
-
|
3
|
+
attr_accessor :begin
|
4
4
|
|
5
5
|
def initialize(pos=nil)
|
6
|
-
@
|
6
|
+
@begin = pos
|
7
|
+
end
|
8
|
+
|
9
|
+
def end
|
10
|
+
self.begin + to_s.length
|
7
11
|
end
|
8
12
|
|
9
13
|
def ==(other)
|
@@ -31,11 +35,16 @@ module Attentive
|
|
31
35
|
end
|
32
36
|
|
33
37
|
def matches?(cursor)
|
34
|
-
self == cursor.peek
|
38
|
+
if self == cursor.peek
|
39
|
+
cursor.pop
|
40
|
+
return true
|
41
|
+
end
|
42
|
+
|
43
|
+
false
|
35
44
|
end
|
36
45
|
|
37
46
|
def inspect
|
38
|
-
"<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}>"
|
47
|
+
"<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}#{" #{self.begin}" if self.begin}>"
|
39
48
|
end
|
40
49
|
|
41
50
|
end
|
@@ -66,5 +75,13 @@ module Attentive
|
|
66
75
|
self.class == other.class && self.string == other.string
|
67
76
|
end
|
68
77
|
|
78
|
+
def eql?(other)
|
79
|
+
self == other
|
80
|
+
end
|
81
|
+
|
82
|
+
def hash
|
83
|
+
[ self.class, string ].hash
|
84
|
+
end
|
85
|
+
|
69
86
|
end
|
70
87
|
end
|
data/lib/attentive/tokenizer.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
require "attentive/abbreviations"
|
2
|
-
require "attentive/contractions"
|
3
|
-
require "attentive/text"
|
4
1
|
require "attentive/tokens"
|
5
2
|
require "attentive/phrase"
|
6
3
|
require "attentive/errors"
|
@@ -19,75 +16,80 @@ module Attentive
|
|
19
16
|
|
20
17
|
|
21
18
|
def initialize(message, options={})
|
22
|
-
@message =
|
19
|
+
@message = message.downcase
|
23
20
|
@chars = self.message.each_char.to_a
|
24
21
|
@options = options
|
25
22
|
end
|
26
23
|
|
24
|
+
def match_entities?
|
25
|
+
options.fetch(:entities, false)
|
26
|
+
end
|
27
|
+
|
28
|
+
def match_regexps?
|
29
|
+
options.fetch(:regexps, false)
|
30
|
+
end
|
31
|
+
|
32
|
+
def perform_substitutions?
|
33
|
+
options.fetch(:substitutions, true)
|
34
|
+
end
|
35
|
+
|
36
|
+
def fail_if_ambiguous?
|
37
|
+
!options.fetch(:ambiguous, true)
|
38
|
+
end
|
39
|
+
|
27
40
|
|
28
41
|
|
29
42
|
def tokenize
|
30
43
|
i = 0
|
31
|
-
tokens = []
|
44
|
+
@tokens = []
|
45
|
+
@leaves = []
|
46
|
+
|
32
47
|
while i < chars.length
|
33
48
|
char = chars[i]
|
49
|
+
char = CHARACTER_SUBSTITIONS.fetch(char, char)
|
50
|
+
pos = tokens.any? ? tokens.last.end : 0
|
34
51
|
|
35
|
-
if
|
36
|
-
|
37
|
-
i += string.length
|
52
|
+
if WHITESPACE === char && string = match_whitespace_at(i)
|
53
|
+
add_token whitespace(string, pos: pos)
|
54
|
+
i += string.length
|
38
55
|
|
39
56
|
elsif ENTITY_START === char && string = match_entity_at(i)
|
40
|
-
|
57
|
+
add_token entity(string, pos: pos)
|
41
58
|
i += string.length + 4
|
42
59
|
|
43
|
-
elsif
|
44
|
-
|
60
|
+
elsif NUMBER_START === char && string = match_number_at(i)
|
61
|
+
add_token word(string, pos: pos)
|
45
62
|
i += string.length
|
46
63
|
|
47
|
-
elsif
|
48
|
-
|
49
|
-
i += string.length
|
64
|
+
elsif EMOJI_START === char && string = match_emoji_at(i)
|
65
|
+
add_token emoji(string, pos: pos)
|
66
|
+
i += string.length + 2
|
50
67
|
|
51
|
-
elsif
|
52
|
-
|
68
|
+
elsif REGEXP_START === char && string = match_regexp_at(i)
|
69
|
+
add_token regexp(string, pos: pos)
|
53
70
|
i += string.length
|
54
71
|
|
55
|
-
elsif PUNCTUATION === char
|
56
|
-
|
72
|
+
elsif PUNCTUATION === char
|
73
|
+
add_token punctuation(char, pos: pos)
|
57
74
|
i += 1
|
58
75
|
|
59
|
-
else
|
60
|
-
string
|
61
|
-
if Attentive.invocations.member?(string)
|
62
|
-
tokens << invocation(string, pos: i)
|
63
|
-
|
64
|
-
elsif replace_with = Attentive::ABBREVIATIONS[string]
|
65
|
-
tokens.concat self.class.tokenize(replace_with, options)
|
66
|
-
|
67
|
-
elsif expands_to = Attentive::CONTRACTIONS[string]
|
68
|
-
possibilities = expands_to.map do |possibility|
|
69
|
-
self.class.tokenize(possibility, options)
|
70
|
-
end
|
71
|
-
|
72
|
-
if possibilities.length == 1
|
73
|
-
tokens.concat possibilities[0]
|
74
|
-
else
|
75
|
-
tokens << any_of(string, possibilities, pos: i)
|
76
|
-
end
|
77
|
-
|
78
|
-
else
|
79
|
-
tokens << word(string, pos: i)
|
80
|
-
end
|
76
|
+
else string = match_word_at(i)
|
77
|
+
add_token word(string, pos: pos)
|
81
78
|
i += string.length
|
79
|
+
|
82
80
|
end
|
83
81
|
end
|
84
82
|
|
85
83
|
fail_if_ambiguous!(message, tokens) if fail_if_ambiguous?
|
84
|
+
|
86
85
|
Attentive::Phrase.new(tokens)
|
87
86
|
end
|
88
87
|
|
89
88
|
|
90
89
|
|
90
|
+
private
|
91
|
+
attr_reader :tokens
|
92
|
+
|
91
93
|
def match_emoji_at(i)
|
92
94
|
emoji = ""
|
93
95
|
while (i += 1) < chars.length
|
@@ -163,20 +165,32 @@ module Attentive
|
|
163
165
|
|
164
166
|
|
165
167
|
|
166
|
-
def
|
167
|
-
|
168
|
+
def add_token(token)
|
169
|
+
@tokens << token
|
170
|
+
return unless perform_substitutions?
|
171
|
+
@leaves = add_token_to_leaves token, @leaves
|
168
172
|
end
|
169
173
|
|
170
|
-
def
|
171
|
-
|
174
|
+
def add_token_to_leaves(token, leaves)
|
175
|
+
(leaves + [Attentive.substitutions]).each_with_object([]) do |leaf, new_leaves|
|
176
|
+
if new_leaf = leaf[token]
|
177
|
+
if new_leaf.fin?
|
178
|
+
i = -1 - leaf.depth
|
179
|
+
offset = tokens[i].begin
|
180
|
+
replacement = new_leaf.fin.dup.each { |token| token.begin += offset }
|
181
|
+
tokens[i..-1] = replacement
|
182
|
+
return add_token_to_leaves replacement.last, []
|
183
|
+
else
|
184
|
+
new_leaves.push new_leaf
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
172
188
|
end
|
173
189
|
|
174
|
-
|
175
|
-
!options.fetch(:ambiguous, true)
|
176
|
-
end
|
190
|
+
|
177
191
|
|
178
192
|
WHITESPACE = /\s/.freeze
|
179
|
-
PUNCTUATION = /[^\
|
193
|
+
PUNCTUATION = /[^\sa-z0-9_]/.freeze
|
180
194
|
EMOJI_START = ":".freeze
|
181
195
|
EMOJI_END = ":".freeze
|
182
196
|
ENTITY_START = "{".freeze
|
@@ -186,7 +200,12 @@ module Attentive
|
|
186
200
|
CONDITIONAL_NUMBER_START = /[\.\-]/.freeze
|
187
201
|
NUMBER = /\d/.freeze
|
188
202
|
CONDITIONAL_NUMBER = /[\.,]/.freeze
|
189
|
-
WORD = /[
|
203
|
+
WORD = /[a-z0-9_]/.freeze
|
204
|
+
CHARACTER_SUBSTITIONS = {
|
205
|
+
"“" => "\"",
|
206
|
+
"”" => "\"",
|
207
|
+
"‘" => "'",
|
208
|
+
"’" => "'" }.freeze
|
190
209
|
|
191
210
|
def fail_if_ambiguous!(phrase, tokens)
|
192
211
|
ambiguous_token = tokens.find(&:ambiguous?)
|
@@ -204,5 +223,6 @@ end
|
|
204
223
|
# Attentive::Tokenizer needs to be defined first...
|
205
224
|
require "attentive/entity"
|
206
225
|
require "attentive/composite_entity"
|
226
|
+
require "attentive/substitutions"
|
207
227
|
|
208
228
|
require "attentive/entities/core"
|
@@ -21,12 +21,12 @@ module Attentive
|
|
21
21
|
|
22
22
|
# Find the first token following the match
|
23
23
|
new_character_index = cursor.offset + match_data.to_s.length
|
24
|
-
cursor_pos = cursor.tokens.index { |token| token.
|
24
|
+
cursor_pos = cursor.tokens.index { |token| token.begin >= new_character_index }
|
25
25
|
cursor_pos = cursor.tokens.length unless cursor_pos
|
26
26
|
|
27
27
|
# If the match ends in the middle of a token, treat it as a mismatch
|
28
28
|
match_end_token = cursor.tokens[cursor_pos - 1]
|
29
|
-
return false if match_end_token.
|
29
|
+
return false if match_end_token.begin + match_end_token.length > new_character_index
|
30
30
|
|
31
31
|
# Advance the cursor to the first token after the regexp match
|
32
32
|
cursor.advance cursor_pos - cursor.pos
|
data/lib/attentive/tokens.rb
CHANGED
@@ -9,8 +9,9 @@ module Attentive
|
|
9
9
|
Attentive::Tokens::Emoji.new string, pos
|
10
10
|
end
|
11
11
|
|
12
|
-
def entity(
|
13
|
-
|
12
|
+
def entity(string, pos: nil)
|
13
|
+
entity_name, variable_name = *string.split(":").reverse
|
14
|
+
Attentive::Entity[entity_name.to_sym].new(variable_name || entity_name)
|
14
15
|
end
|
15
16
|
|
16
17
|
def invocation(string, pos: nil)
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Attentive
|
2
|
+
class Trie
|
3
|
+
attr_reader :depth
|
4
|
+
|
5
|
+
def initialize(depth: 0)
|
6
|
+
@depth = depth
|
7
|
+
@children = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def [](token)
|
11
|
+
@children[token]
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(token)
|
15
|
+
raise "Can't add #{token.inspect} to trie because this leaf is a terminus" if fin?
|
16
|
+
@children[token] ||= self.class.new(depth: depth + 1)
|
17
|
+
end
|
18
|
+
|
19
|
+
def fin?
|
20
|
+
@children.key?(:fin)
|
21
|
+
end
|
22
|
+
|
23
|
+
def fin
|
24
|
+
@children[:fin]
|
25
|
+
end
|
26
|
+
|
27
|
+
def fin!(finish)
|
28
|
+
@children[:fin] = finish
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
def self.of_substitutions(substitutions)
|
34
|
+
substitutions.each_with_object(self.new) do |(tokens, substitution), trie|
|
35
|
+
leaf = trie
|
36
|
+
tokens.each_with_index do |token, i|
|
37
|
+
raise "#{tokens.join} contains #{tokens[0...i].join}" if leaf.fin?
|
38
|
+
leaf = leaf.add token
|
39
|
+
end
|
40
|
+
leaf.fin! substitution
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
data/lib/attentive/version.rb
CHANGED
data/lib/attentive.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "attentive/version"
|
2
2
|
require "attentive/config"
|
3
3
|
|
4
|
+
|
4
5
|
module Attentive
|
5
6
|
extend Attentive::Config
|
6
7
|
|
@@ -16,6 +17,31 @@ module Attentive
|
|
16
17
|
|
17
18
|
|
18
19
|
|
20
|
+
# Recognizes entities in a phrase
|
21
|
+
def self.abstract(message)
|
22
|
+
message = Attentive::Message.new(message)
|
23
|
+
entities = Attentive::Entity.entities.map { |entity| Attentive::Phrase.new([entity.new]) }
|
24
|
+
i = 0
|
25
|
+
while i < message.tokens.length
|
26
|
+
entities.each do |entity|
|
27
|
+
match = Attentive::Matcher.new(entity, Cursor.new(message, i)).match!
|
28
|
+
next unless match
|
29
|
+
|
30
|
+
i = match.replace_with(entity)
|
31
|
+
break
|
32
|
+
end
|
33
|
+
i += 1
|
34
|
+
end
|
35
|
+
message.tokens.to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
# Shorthand for tokenizer
|
39
|
+
def self.tokenize(message, options={})
|
40
|
+
Attentive::Tokenizer.tokenize(message, options)
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
19
45
|
# Attentive DSL
|
20
46
|
|
21
47
|
def listeners
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: attentive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bob Lail
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|
@@ -153,14 +153,18 @@ files:
|
|
153
153
|
- bin/console
|
154
154
|
- bin/setup
|
155
155
|
- lib/attentive.rb
|
156
|
-
- lib/attentive/abbreviations.rb
|
157
156
|
- lib/attentive/composite_entity.rb
|
158
157
|
- lib/attentive/config.rb
|
159
|
-
- lib/attentive/contractions.rb
|
160
158
|
- lib/attentive/cursor.rb
|
161
159
|
- lib/attentive/entities/core.rb
|
162
160
|
- lib/attentive/entities/core/date.rb
|
161
|
+
- lib/attentive/entities/core/date/explicit.rb
|
162
|
+
- lib/attentive/entities/core/date/future.rb
|
163
163
|
- lib/attentive/entities/core/date/month.rb
|
164
|
+
- lib/attentive/entities/core/date/partial.rb
|
165
|
+
- lib/attentive/entities/core/date/partial/future.rb
|
166
|
+
- lib/attentive/entities/core/date/partial/past.rb
|
167
|
+
- lib/attentive/entities/core/date/past.rb
|
164
168
|
- lib/attentive/entities/core/date/relative.rb
|
165
169
|
- lib/attentive/entities/core/date/relative/future.rb
|
166
170
|
- lib/attentive/entities/core/date/relative/past.rb
|
@@ -183,7 +187,7 @@ files:
|
|
183
187
|
- lib/attentive/matcher.rb
|
184
188
|
- lib/attentive/message.rb
|
185
189
|
- lib/attentive/phrase.rb
|
186
|
-
- lib/attentive/
|
190
|
+
- lib/attentive/substitutions.rb
|
187
191
|
- lib/attentive/token.rb
|
188
192
|
- lib/attentive/tokenizer.rb
|
189
193
|
- lib/attentive/tokens.rb
|
@@ -194,6 +198,7 @@ files:
|
|
194
198
|
- lib/attentive/tokens/regexp.rb
|
195
199
|
- lib/attentive/tokens/whitespace.rb
|
196
200
|
- lib/attentive/tokens/word.rb
|
201
|
+
- lib/attentive/trie.rb
|
197
202
|
- lib/attentive/version.rb
|
198
203
|
homepage: https://github.com/houston/attentive
|
199
204
|
licenses:
|
@@ -1,3 +0,0 @@
|
|
1
|
-
module Attentive
|
2
|
-
ABBREVIATIONS = {"bye"=>"goodbye", "gonna"=>"going to", "hi"=>"hello", "ol'"=>"old", "'sup"=>"what is up", "thanks"=>"thank you", "wanna"=>"want to", "mon"=>"monday", "tue"=>"tuesday", "tues"=>"tuesday", "wed"=>"wednesday", "thu"=>"thursday", "thur"=>"thursday", "thurs"=>"thursday", "fri"=>"friday", "sat"=>"saturday", "sun"=>"sunday", "jan"=>"january", "feb"=>"february", "mar"=>"march", "apr"=>"april", "jun"=>"june", "jul"=>"july", "aug"=>"august", "sep"=>"september", "sept"=>"september", "oct"=>"october", "nov"=>"november", "dec"=>"december"}.freeze
|
3
|
-
end
|
@@ -1,3 +0,0 @@
|
|
1
|
-
module Attentive
|
2
|
-
CONTRACTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "couldn't've"=>["could not have"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hadn't've"=>["had not have"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'd've"=>["he would have"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'd've"=>["i would have"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'd've"=>["it would have"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "mightn't've"=>["might not have"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'd've"=>["she would have"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "shouldn't've"=>["should not have"], "somebody'd"=>["somebody had", "somebody would"], "somebody'd've"=>["somebody would have"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'd've"=>["someone would have"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'd've"=>["something would have"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there'd've"=>["there would have"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'dn't've"=>["they would not have"], "they'd've"=>["they would have"], "they'd'ven't"=>["they would have not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they'll'ven't"=>["they will have not"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'd've"=>["we would have"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'd've"=>["who would have"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "won't've"=>["will not have"], "would've"=>["would have"], "wouldn't"=>["would not"], "wouldn't've"=>["would not have"], "y'all"=>["you all"], "y'all'd've"=>["you all would have"], "y'all'dn't've"=>["you all would not have"], "y'all'll"=>["you all will"], "y'all'lln't"=>["you all will not"], "y'all'll've"=>["you all will have"], "y'all'll'ven't"=>["you all will have not"], "you'd"=>["you had", "you would"], "you'd've"=>["you would have"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"]}.freeze
|
3
|
-
end
|
data/lib/attentive/text.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module Attentive
|
2
|
-
module Text
|
3
|
-
extend self
|
4
|
-
|
5
|
-
def normalize(text)
|
6
|
-
straighten_quotes downcase text
|
7
|
-
end
|
8
|
-
|
9
|
-
def downcase(text)
|
10
|
-
text.downcase
|
11
|
-
end
|
12
|
-
|
13
|
-
def straighten_quotes(text)
|
14
|
-
text.gsub(/[“”]/, "\"").gsub(/[‘’]/, "'")
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
end
|