attentive 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +12 -29
- data/bin/console +1 -0
- data/lib/attentive/composite_entity.rb +3 -0
- data/lib/attentive/config.rb +13 -0
- data/lib/attentive/cursor.rb +8 -5
- data/lib/attentive/entities/core/date/explicit.rb +25 -0
- data/lib/attentive/entities/core/date/future.rb +10 -0
- data/lib/attentive/entities/core/date/partial/future.rb +21 -0
- data/lib/attentive/entities/core/date/partial/past.rb +21 -0
- data/lib/attentive/entities/core/date/partial.rb +7 -0
- data/lib/attentive/entities/core/date/past.rb +10 -0
- data/lib/attentive/entities/core/date/relative/future.rb +2 -1
- data/lib/attentive/entities/core/date/relative/past.rb +2 -1
- data/lib/attentive/entities/core/date/relative.rb +2 -1
- data/lib/attentive/entities/core/date.rb +8 -1
- data/lib/attentive/entities/core/number/float/negative.rb +3 -3
- data/lib/attentive/entities/core/number/float/positive.rb +3 -3
- data/lib/attentive/entities/core/number/float.rb +7 -4
- data/lib/attentive/entities/core/number/integer/negative.rb +3 -2
- data/lib/attentive/entities/core/number/integer/positive.rb +3 -2
- data/lib/attentive/entities/core/number/integer.rb +4 -4
- data/lib/attentive/entities/core/number/negative.rb +5 -5
- data/lib/attentive/entities/core/number/positive.rb +5 -5
- data/lib/attentive/entities/core/number.rb +3 -2
- data/lib/attentive/entity.rb +27 -6
- data/lib/attentive/listener.rb +0 -1
- data/lib/attentive/listener_collection.rb +1 -1
- data/lib/attentive/match.rb +19 -2
- data/lib/attentive/matcher.rb +11 -4
- data/lib/attentive/message.rb +12 -0
- data/lib/attentive/phrase.rb +5 -1
- data/lib/attentive/substitutions.rb +10 -0
- data/lib/attentive/token.rb +21 -4
- data/lib/attentive/tokenizer.rb +69 -49
- data/lib/attentive/tokens/regexp.rb +2 -2
- data/lib/attentive/tokens.rb +3 -2
- data/lib/attentive/trie.rb +45 -0
- data/lib/attentive/version.rb +1 -1
- data/lib/attentive.rb +26 -0
- metadata +10 -5
- data/lib/attentive/abbreviations.rb +0 -3
- data/lib/attentive/contractions.rb +0 -3
- data/lib/attentive/text.rb +0 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 880167573144b1749ba6f538a4922210ff5e087b
|
4
|
+
data.tar.gz: 4b50dad29e7eacdf650c58741230d90c44c75c9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b89a7ed8760e5e5acc8104c27f2d059fe82a05983fc29d077efabaf25b8cdc4ded15be16e601b1b9e9f94d902bebe5c30908d4d8d1e6311a8e92b24bf7e6cf16
|
7
|
+
data.tar.gz: d440c72708774dedbecf2117e4838ad97e2aaa13ebebdada905ccee018bebebc47cc1f3e3f7d6c5d15834a28279a709adec3164facb703779be0392c0c7aab0e
|
data/Rakefile
CHANGED
@@ -9,14 +9,14 @@ end
|
|
9
9
|
|
10
10
|
namespace :compile do
|
11
11
|
|
12
|
-
desc "Compile
|
12
|
+
desc "Compile substitutions.rb"
|
13
13
|
task :data do
|
14
14
|
|
15
15
|
data_path = File.expand_path(File.dirname(__FILE__) + "/data")
|
16
16
|
output_path = File.expand_path(File.dirname(__FILE__) + "/lib/attentive")
|
17
17
|
|
18
|
-
|
19
|
-
File.open(data_path + "/
|
18
|
+
substitutions = {}
|
19
|
+
File.open(data_path + "/substitutions.tsv") do |file|
|
20
20
|
file.each do |line|
|
21
21
|
next if line.start_with?("#") # skip comments
|
22
22
|
next if line == "\n" # skip blank lines
|
@@ -27,37 +27,20 @@ namespace :compile do
|
|
27
27
|
phrases = line.downcase.chomp.split("\t")
|
28
28
|
raise "#{line.inspect} must have exactly two values" unless phrases.length >= 2
|
29
29
|
|
30
|
-
|
30
|
+
substitutions[phrases.shift] = phrases
|
31
31
|
end
|
32
32
|
end
|
33
|
-
File.open(output_path + "/
|
33
|
+
File.open(output_path + "/substitutions.rb", "w") do |file|
|
34
34
|
file.write <<-RUBY
|
35
|
-
|
36
|
-
CONTRACTIONS = #{contractions.inspect}.freeze
|
37
|
-
end
|
38
|
-
RUBY
|
39
|
-
end
|
35
|
+
require "attentive/trie"
|
40
36
|
|
41
|
-
abbreviations = {}
|
42
|
-
File.open(data_path + "/abbreviations.tsv") do |file|
|
43
|
-
file.each do |line|
|
44
|
-
next if line.start_with?("#") # skip comments
|
45
|
-
next if line == "\n" # skip blank lines
|
46
|
-
|
47
|
-
# the file contains tab-separated values.
|
48
|
-
# every line should have exactly two values:
|
49
|
-
# + the first is the slang word
|
50
|
-
# + the second is the normal word
|
51
|
-
words = line.downcase.chomp.split("\t")
|
52
|
-
raise "#{line.inspect} must have exactly two values" unless words.length == 2
|
53
|
-
|
54
|
-
abbreviations[words[0]] = words[1]
|
55
|
-
end
|
56
|
-
end
|
57
|
-
File.open(output_path + "/abbreviations.rb", "w") do |file|
|
58
|
-
file.write <<-RUBY
|
59
37
|
module Attentive
|
60
|
-
|
38
|
+
SUBSTITUTIONS = #{substitutions.inspect}.each_with_object({}) do |(key, values), new_hash|
|
39
|
+
tokens = Attentive.tokenize(key, substitutions: false)
|
40
|
+
possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
|
41
|
+
value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
|
42
|
+
new_hash[tokens] = value
|
43
|
+
end.freeze
|
61
44
|
end
|
62
45
|
RUBY
|
63
46
|
end
|
data/bin/console
CHANGED
@@ -9,8 +9,11 @@ module Attentive
|
|
9
9
|
attr_accessor :entities
|
10
10
|
|
11
11
|
def define(entity_name, *entities)
|
12
|
+
options = entities.last.is_a?(::Hash) ? entities.pop : {}
|
13
|
+
|
12
14
|
create! entity_name do |entity_klass|
|
13
15
|
entity_klass.entities = entities.map { |entity| Entity[entity] }
|
16
|
+
entity_klass.published = options.fetch(:published, true)
|
14
17
|
end
|
15
18
|
end
|
16
19
|
end
|
data/lib/attentive/config.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require "attentive/trie"
|
2
|
+
|
1
3
|
module Attentive
|
2
4
|
module Config
|
3
5
|
|
@@ -6,8 +8,19 @@ module Attentive
|
|
6
8
|
attr_accessor :default_prohibited_contexts
|
7
9
|
|
8
10
|
def invocations=(*values)
|
11
|
+
remove_instance_variable :@substitutions if defined?(@substitutions)
|
9
12
|
@invocations = values.flatten
|
10
13
|
end
|
11
14
|
|
15
|
+
def substitutions
|
16
|
+
return @substitutions if defined?(@substitutions)
|
17
|
+
@substitutions = Attentive::Trie.of_substitutions(
|
18
|
+
Attentive::SUBSTITUTIONS.merge(
|
19
|
+
invocations.each_with_object({}) { |invocation, hash|
|
20
|
+
tokens = Attentive.tokenize(invocation, substitutions: false)
|
21
|
+
hash[tokens] = [Attentive::Tokens::Invocation.new(invocation, 0)]
|
22
|
+
} ) )
|
23
|
+
end
|
24
|
+
|
12
25
|
end
|
13
26
|
end
|
data/lib/attentive/cursor.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module Attentive
|
2
2
|
class Cursor
|
3
|
-
attr_reader :tokens, :pos
|
3
|
+
attr_reader :message, :tokens, :pos
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@
|
5
|
+
def initialize(message, pos=0)
|
6
|
+
@message = message
|
7
|
+
@tokens = message.respond_to?(:tokens) ? message.tokens : message
|
7
8
|
@pos = pos
|
8
9
|
end
|
9
10
|
|
@@ -26,16 +27,18 @@ module Attentive
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def inspect
|
29
|
-
"
|
30
|
+
"<Cursor \"#{(tokens[0...pos] || []).join.inspect[1...-1]}\e[7m#{tokens[pos].to_s.inspect[1...-1]}\e[0m#{(tokens[(pos + 1)..-1] || []).join.inspect[1...-1]}\">"
|
30
31
|
end
|
31
32
|
|
32
33
|
def offset
|
33
|
-
peek.
|
34
|
+
peek.begin
|
34
35
|
end
|
35
36
|
|
36
37
|
def advance(n=1)
|
37
38
|
@pos += n
|
39
|
+
self
|
38
40
|
end
|
41
|
+
alias :adv :advance
|
39
42
|
|
40
43
|
def eof?
|
41
44
|
@pos == @tokens.length
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.date.explicit",
|
5
|
+
"{{month:core.date.month}} {{day:core.number.integer.positive}} {{year:core.number.integer.positive}}",
|
6
|
+
"{{day:core.number.integer.positive}} {{month:core.date.month}} {{year:core.number.integer.positive}}",
|
7
|
+
%q{(?:(?<month>\d\d?)/(?<day>\d\d?)/(?<year>\d\d(?:\d\d)?))},
|
8
|
+
%q{(?:(?<year>\d\d(?:\d\d)?)-(?<month>\d\d?)-(?<day>\d\d?))},
|
9
|
+
published: false do |match|
|
10
|
+
|
11
|
+
month = match["month"].to_i
|
12
|
+
day = match["day"].to_i
|
13
|
+
year = match["year"].to_i
|
14
|
+
|
15
|
+
# Interpret 2-digit years in the 2000s
|
16
|
+
year += 2000 if year < 100
|
17
|
+
|
18
|
+
nomatch! if day > 31 || month > 12
|
19
|
+
|
20
|
+
begin
|
21
|
+
Date.new(year, month, day)
|
22
|
+
rescue ArgumentError
|
23
|
+
nomatch!
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "attentive/entities/core/date/month"
|
2
|
+
require "attentive/entities/core/date/wday"
|
3
|
+
require "attentive/entities/core/date/relative"
|
4
|
+
require "attentive/entities/core/date/partial"
|
5
|
+
require "attentive/entities/core/date/explicit"
|
6
|
+
|
7
|
+
Attentive::CompositeEntity.define "core.date.future",
|
8
|
+
"core.date.explicit",
|
9
|
+
"core.date.relative.future",
|
10
|
+
"core.date.partial.future"
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.date.partial.future",
|
5
|
+
"{{month:core.date.month}} {{day:core.number.integer.positive}}",
|
6
|
+
published: false do |match|
|
7
|
+
|
8
|
+
month = match["month"]
|
9
|
+
day = match["day"]
|
10
|
+
nomatch! if day > 31
|
11
|
+
|
12
|
+
today = Date.today
|
13
|
+
year = today.year
|
14
|
+
year += 1 if month < today.month || (month == today.month && day < today.day)
|
15
|
+
|
16
|
+
begin
|
17
|
+
Date.new(year, month, day)
|
18
|
+
rescue ArgumentError
|
19
|
+
nomatch!
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "date"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.date.partial.past",
|
5
|
+
"{{month:core.date.month}} {{day:core.number.integer.positive}}",
|
6
|
+
published: false do |match|
|
7
|
+
|
8
|
+
month = match["month"]
|
9
|
+
day = match["day"]
|
10
|
+
nomatch! if day > 31
|
11
|
+
|
12
|
+
today = Date.today
|
13
|
+
year = today.year
|
14
|
+
year -= 1 if month > today.month || (month == today.month && day > today.day)
|
15
|
+
|
16
|
+
begin
|
17
|
+
Date.new(year, month, day)
|
18
|
+
rescue ArgumentError
|
19
|
+
nomatch!
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "attentive/entities/core/date/month"
|
2
|
+
require "attentive/entities/core/date/wday"
|
3
|
+
require "attentive/entities/core/date/relative"
|
4
|
+
require "attentive/entities/core/date/partial"
|
5
|
+
require "attentive/entities/core/date/explicit"
|
6
|
+
|
7
|
+
Attentive::CompositeEntity.define "core.date.past",
|
8
|
+
"core.date.explicit",
|
9
|
+
"core.date.relative.past",
|
10
|
+
"core.date.partial.past"
|
@@ -1,6 +1,13 @@
|
|
1
1
|
require "attentive/entities/core/date/month"
|
2
2
|
require "attentive/entities/core/date/wday"
|
3
3
|
require "attentive/entities/core/date/relative"
|
4
|
+
require "attentive/entities/core/date/partial"
|
5
|
+
require "attentive/entities/core/date/explicit"
|
6
|
+
|
7
|
+
require "attentive/entities/core/date/future"
|
8
|
+
require "attentive/entities/core/date/past"
|
4
9
|
|
5
10
|
Attentive::CompositeEntity.define "core.date",
|
6
|
-
"core.date.
|
11
|
+
"core.date.explicit",
|
12
|
+
"core.date.relative",
|
13
|
+
"core.date.partial"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
|
-
require "bigdecimal"
|
3
2
|
|
4
|
-
Attentive::Entity.define "core.number.float.negative",
|
5
|
-
|
3
|
+
Attentive::Entity.define "core.number.float.negative", "{{float:core.number.float}}", published: false do |match|
|
4
|
+
nomatch! if match["float"] >= 0
|
5
|
+
match["float"]
|
6
6
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
|
-
require "bigdecimal"
|
3
2
|
|
4
|
-
Attentive::Entity.define "core.number.float.positive",
|
5
|
-
|
3
|
+
Attentive::Entity.define "core.number.float.positive", "{{float:core.number.float}}", published: false do |match|
|
4
|
+
nomatch! if match["float"] <= 0
|
5
|
+
match["float"]
|
6
6
|
end
|
@@ -1,6 +1,9 @@
|
|
1
|
+
require "attentive/entity"
|
2
|
+
require "bigdecimal"
|
3
|
+
|
4
|
+
Attentive::Entity.define "core.number.float", %q{(?<float>\-?[\d,]+\.\d+)}, published: false do |match|
|
5
|
+
BigDecimal.new(match["float"].gsub(",", ""))
|
6
|
+
end
|
7
|
+
|
1
8
|
require "attentive/entities/core/number/float/positive"
|
2
9
|
require "attentive/entities/core/number/float/negative"
|
3
|
-
|
4
|
-
Attentive::CompositeEntity.define "core.number.float",
|
5
|
-
"core.number.float.positive",
|
6
|
-
"core.number.float.negative"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
2
|
|
3
|
-
Attentive::Entity.define "core.number.integer.negative",
|
4
|
-
match["integer"]
|
3
|
+
Attentive::Entity.define "core.number.integer.negative", "{{integer:core.number.integer}}", published: false do |match|
|
4
|
+
nomatch! if match["integer"] >= 0
|
5
|
+
match["integer"]
|
5
6
|
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "attentive/entity"
|
2
2
|
|
3
|
-
Attentive::Entity.define "core.number.integer.positive",
|
4
|
-
match["integer"]
|
3
|
+
Attentive::Entity.define "core.number.integer.positive", "{{integer:core.number.integer}}", published: false do |match|
|
4
|
+
nomatch! if match["integer"] <= 0
|
5
|
+
match["integer"]
|
5
6
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
+
Attentive::Entity.define "core.number.integer", %q{(?<integer>\-?[\d,]+)}, published: false do |match|
|
2
|
+
match["integer"].gsub(",", "").to_i
|
3
|
+
end
|
4
|
+
|
1
5
|
require "attentive/entities/core/number/integer/positive"
|
2
6
|
require "attentive/entities/core/number/integer/negative"
|
3
|
-
|
4
|
-
Attentive::CompositeEntity.define "core.number.integer",
|
5
|
-
"core.number.integer.positive",
|
6
|
-
"core.number.integer.negative"
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require "attentive/
|
2
|
-
require "attentive/entities/core/number/float/negative"
|
1
|
+
require "attentive/entity"
|
3
2
|
|
4
|
-
Attentive::
|
5
|
-
"
|
6
|
-
"
|
3
|
+
Attentive::Entity.define "core.number.negative", "{{number:core.number}}", published: false do |match|
|
4
|
+
nomatch! if match["number"] >= 0
|
5
|
+
match["number"]
|
6
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require "attentive/
|
2
|
-
require "attentive/entities/core/number/float/positive"
|
1
|
+
require "attentive/entity"
|
3
2
|
|
4
|
-
Attentive::
|
5
|
-
"
|
6
|
-
"
|
3
|
+
Attentive::Entity.define "core.number.positive", "{{number:core.number}}", published: false do |match|
|
4
|
+
nomatch! if match["number"] <= 0
|
5
|
+
match["number"]
|
6
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
require "attentive/entities/core/number/integer"
|
2
2
|
require "attentive/entities/core/number/float"
|
3
|
-
require "attentive/entities/core/number/positive"
|
4
|
-
require "attentive/entities/core/number/negative"
|
5
3
|
|
6
4
|
Attentive::CompositeEntity.define "core.number",
|
7
5
|
"core.number.float",
|
8
6
|
"core.number.integer"
|
7
|
+
|
8
|
+
require "attentive/entities/core/number/positive"
|
9
|
+
require "attentive/entities/core/number/negative"
|
data/lib/attentive/entity.rb
CHANGED
@@ -10,6 +10,15 @@ module Attentive
|
|
10
10
|
class << self
|
11
11
|
attr_accessor :phrases
|
12
12
|
attr_accessor :token_name
|
13
|
+
attr_writer :published
|
14
|
+
|
15
|
+
def published?
|
16
|
+
@published
|
17
|
+
end
|
18
|
+
|
19
|
+
def entities
|
20
|
+
@entities.values.select(&:published?)
|
21
|
+
end
|
13
22
|
|
14
23
|
def [](entity_name)
|
15
24
|
entity_name = entity_name.to_sym
|
@@ -19,10 +28,13 @@ module Attentive
|
|
19
28
|
end
|
20
29
|
|
21
30
|
def define(entity_name, *phrases, &block)
|
31
|
+
options = phrases.last.is_a?(::Hash) ? phrases.pop : {}
|
32
|
+
|
22
33
|
create! entity_name do |entity_klass|
|
23
34
|
entity_klass.phrases = phrases.map do |phrase|
|
24
35
|
Attentive::Tokenizer.tokenize(phrase, entities: true, regexps: true, ambiguous: false)
|
25
36
|
end
|
37
|
+
entity_klass.published = options.fetch(:published, true)
|
26
38
|
entity_klass.send :define_method, :_value_from_match, &block if block_given?
|
27
39
|
end
|
28
40
|
end
|
@@ -55,7 +67,7 @@ module Attentive
|
|
55
67
|
|
56
68
|
|
57
69
|
|
58
|
-
def initialize(variable_name, pos=0)
|
70
|
+
def initialize(variable_name=self.class.token_name, pos=0)
|
59
71
|
@variable_name = variable_name.to_s
|
60
72
|
super pos
|
61
73
|
end
|
@@ -78,11 +90,14 @@ module Attentive
|
|
78
90
|
|
79
91
|
def matches?(cursor)
|
80
92
|
self.class.phrases.each do |phrase|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
93
|
+
catch NOMATCH do
|
94
|
+
cursor_copy = cursor.new_from_here
|
95
|
+
match = Attentive::Matcher.new(phrase, cursor_copy).match!
|
96
|
+
if match
|
97
|
+
value = _value_from_match(match) # <-- might throw
|
98
|
+
cursor.advance cursor_copy.pos
|
99
|
+
return { variable_name => value }
|
100
|
+
end
|
86
101
|
end
|
87
102
|
end
|
88
103
|
false
|
@@ -92,5 +107,11 @@ module Attentive
|
|
92
107
|
match.to_s
|
93
108
|
end
|
94
109
|
|
110
|
+
def nomatch!
|
111
|
+
throw NOMATCH
|
112
|
+
end
|
113
|
+
|
114
|
+
NOMATCH = :nomatch.freeze
|
115
|
+
|
95
116
|
end
|
96
117
|
end
|
data/lib/attentive/listener.rb
CHANGED
@@ -28,7 +28,7 @@ module Attentive
|
|
28
28
|
message.tokens.each_with_index do |token, i|
|
29
29
|
listeners.each do |listener|
|
30
30
|
listener.phrases.each do |phrase|
|
31
|
-
match = Attentive::Matcher.new(phrase, Cursor.new(message
|
31
|
+
match = Attentive::Matcher.new(phrase, Cursor.new(message, i), listener: listener).match!
|
32
32
|
next unless match
|
33
33
|
|
34
34
|
# Don't match more than one phrase per listener
|
data/lib/attentive/match.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
module Attentive
|
2
2
|
class Match
|
3
|
-
attr_reader :listener, :phrase, :message
|
3
|
+
attr_reader :listener, :phrase, :message, :match_start, :match_end
|
4
4
|
|
5
5
|
def initialize(phrase, attributes={})
|
6
6
|
@phrase = phrase.to_s
|
7
7
|
@match_data = attributes.fetch(:match_data, {})
|
8
|
+
@match_start = attributes.fetch(:match_start)
|
9
|
+
@match_end = attributes.fetch(:match_end)
|
10
|
+
@message = attributes.fetch(:message)
|
8
11
|
@listener = attributes[:listener]
|
9
|
-
@message = attributes[:message]
|
10
12
|
end
|
11
13
|
|
12
14
|
def matched?(variable_name)
|
@@ -15,11 +17,26 @@ module Attentive
|
|
15
17
|
|
16
18
|
def [](variable_name)
|
17
19
|
@match_data.fetch variable_name.to_s
|
20
|
+
rescue KeyError
|
21
|
+
raise KeyError, "#{$!.message} in #{inspect}"
|
18
22
|
end
|
19
23
|
|
20
24
|
def to_s
|
21
25
|
@phrase
|
22
26
|
end
|
23
27
|
|
28
|
+
def to_h
|
29
|
+
@match_data
|
30
|
+
end
|
31
|
+
|
32
|
+
def replace_with(tokens)
|
33
|
+
message[match_start...match_end] = tokens
|
34
|
+
match_start + tokens.length
|
35
|
+
end
|
36
|
+
|
37
|
+
def inspect
|
38
|
+
"#<#{self.class.name} #{@match_data.inspect} #{phrase.inspect}>"
|
39
|
+
end
|
40
|
+
|
24
41
|
end
|
25
42
|
end
|
data/lib/attentive/matcher.rb
CHANGED
@@ -6,9 +6,11 @@ module Attentive
|
|
6
6
|
|
7
7
|
def initialize(phrase, message, params={})
|
8
8
|
@phrase = phrase
|
9
|
+
@match_start = message.pos
|
9
10
|
@cursor = Cursor.new(phrase, params.fetch(:pos, 0))
|
10
11
|
@message = message
|
11
|
-
|
12
|
+
self.message.pop while self.message.peek.whitespace?
|
13
|
+
@match_params = params.merge(message: message.message, match_start: message.pos)
|
12
14
|
@match_data = {}
|
13
15
|
@state = :matching
|
14
16
|
|
@@ -34,6 +36,7 @@ module Attentive
|
|
34
36
|
@state = :mismatch
|
35
37
|
break
|
36
38
|
end
|
39
|
+
message.pop
|
37
40
|
cursor.pop while cursor.peek.whitespace?
|
38
41
|
|
39
42
|
elsif match_data = cursor.peek.matches?(message)
|
@@ -43,14 +46,18 @@ module Attentive
|
|
43
46
|
@state = :found
|
44
47
|
|
45
48
|
# -> This is the one spot where we instantiate a Match
|
46
|
-
return Attentive::Match.new(phrase, @match_params.merge(
|
49
|
+
return Attentive::Match.new(phrase, @match_params.merge(
|
50
|
+
match_end: message.pos,
|
51
|
+
match_data: @match_data)) if cursor.eof?
|
47
52
|
|
48
|
-
elsif
|
53
|
+
elsif token.skippable?
|
54
|
+
message.pop
|
55
|
+
|
56
|
+
else
|
49
57
|
@state = :mismatch
|
50
58
|
break
|
51
59
|
end
|
52
60
|
|
53
|
-
message.pop
|
54
61
|
message.pop while message.peek.whitespace?
|
55
62
|
end
|
56
63
|
|
data/lib/attentive/message.rb
CHANGED
@@ -15,6 +15,18 @@ module Attentive
|
|
15
15
|
@tokens ||= Attentive::Tokenizer.tokenize(text)
|
16
16
|
end
|
17
17
|
|
18
|
+
def [](key)
|
19
|
+
tokens[key]
|
20
|
+
end
|
21
|
+
|
22
|
+
def []=(key, value)
|
23
|
+
tokens[key] = value
|
24
|
+
end
|
25
|
+
|
26
|
+
def length
|
27
|
+
tokens.length
|
28
|
+
end
|
29
|
+
|
18
30
|
alias :to_s :text
|
19
31
|
|
20
32
|
def inspect
|
data/lib/attentive/phrase.rb
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
require "attentive/trie"
|
2
|
+
|
3
|
+
module Attentive
|
4
|
+
SUBSTITUTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "somebody'd"=>["somebody had", "somebody would"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "would've"=>["would have"], "wouldn't"=>["would not"], "y'all"=>["you all"], "you'd"=>["you had", "you would"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"], "bye"=>["goodbye"], "gonna"=>["going to"], "hi"=>["hello"], "ol'"=>["old"], "'sup"=>["what is up"], "thanks"=>["thank you"], "wanna"=>["want to"], "w/o"=>["without"], "mon"=>["monday"], "tue"=>["tuesday"], "tues"=>["tuesday"], "wed"=>["wednesday"], "thu"=>["thursday"], "thur"=>["thursday"], "thurs"=>["thursday"], "fri"=>["friday"], "sat"=>["saturday"], "sun"=>["sunday"], "jan"=>["january"], "feb"=>["february"], "mar"=>["march"], "apr"=>["april"], "jun"=>["june"], "jul"=>["july"], "aug"=>["august"], "sep"=>["september"], "sept"=>["september"], "oct"=>["october"], "nov"=>["november"], "dec"=>["december"]}.each_with_object({}) do |(key, values), new_hash|
|
5
|
+
tokens = Attentive.tokenize(key, substitutions: false)
|
6
|
+
possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
|
7
|
+
value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
|
8
|
+
new_hash[tokens] = value
|
9
|
+
end.freeze
|
10
|
+
end
|
data/lib/attentive/token.rb
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
module Attentive
|
2
2
|
class Token
|
3
|
-
|
3
|
+
attr_accessor :begin
|
4
4
|
|
5
5
|
def initialize(pos=nil)
|
6
|
-
@
|
6
|
+
@begin = pos
|
7
|
+
end
|
8
|
+
|
9
|
+
def end
|
10
|
+
self.begin + to_s.length
|
7
11
|
end
|
8
12
|
|
9
13
|
def ==(other)
|
@@ -31,11 +35,16 @@ module Attentive
|
|
31
35
|
end
|
32
36
|
|
33
37
|
def matches?(cursor)
|
34
|
-
self == cursor.peek
|
38
|
+
if self == cursor.peek
|
39
|
+
cursor.pop
|
40
|
+
return true
|
41
|
+
end
|
42
|
+
|
43
|
+
false
|
35
44
|
end
|
36
45
|
|
37
46
|
def inspect
|
38
|
-
"<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}>"
|
47
|
+
"<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}#{" #{self.begin}" if self.begin}>"
|
39
48
|
end
|
40
49
|
|
41
50
|
end
|
@@ -66,5 +75,13 @@ module Attentive
|
|
66
75
|
self.class == other.class && self.string == other.string
|
67
76
|
end
|
68
77
|
|
78
|
+
def eql?(other)
|
79
|
+
self == other
|
80
|
+
end
|
81
|
+
|
82
|
+
def hash
|
83
|
+
[ self.class, string ].hash
|
84
|
+
end
|
85
|
+
|
69
86
|
end
|
70
87
|
end
|
data/lib/attentive/tokenizer.rb
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
require "attentive/abbreviations"
|
2
|
-
require "attentive/contractions"
|
3
|
-
require "attentive/text"
|
4
1
|
require "attentive/tokens"
|
5
2
|
require "attentive/phrase"
|
6
3
|
require "attentive/errors"
|
@@ -19,75 +16,80 @@ module Attentive
|
|
19
16
|
|
20
17
|
|
21
18
|
def initialize(message, options={})
|
22
|
-
@message =
|
19
|
+
@message = message.downcase
|
23
20
|
@chars = self.message.each_char.to_a
|
24
21
|
@options = options
|
25
22
|
end
|
26
23
|
|
24
|
+
def match_entities?
|
25
|
+
options.fetch(:entities, false)
|
26
|
+
end
|
27
|
+
|
28
|
+
def match_regexps?
|
29
|
+
options.fetch(:regexps, false)
|
30
|
+
end
|
31
|
+
|
32
|
+
def perform_substitutions?
|
33
|
+
options.fetch(:substitutions, true)
|
34
|
+
end
|
35
|
+
|
36
|
+
def fail_if_ambiguous?
|
37
|
+
!options.fetch(:ambiguous, true)
|
38
|
+
end
|
39
|
+
|
27
40
|
|
28
41
|
|
29
42
|
def tokenize
|
30
43
|
i = 0
|
31
|
-
tokens = []
|
44
|
+
@tokens = []
|
45
|
+
@leaves = []
|
46
|
+
|
32
47
|
while i < chars.length
|
33
48
|
char = chars[i]
|
49
|
+
char = CHARACTER_SUBSTITIONS.fetch(char, char)
|
50
|
+
pos = tokens.any? ? tokens.last.end : 0
|
34
51
|
|
35
|
-
if
|
36
|
-
|
37
|
-
i += string.length
|
52
|
+
if WHITESPACE === char && string = match_whitespace_at(i)
|
53
|
+
add_token whitespace(string, pos: pos)
|
54
|
+
i += string.length
|
38
55
|
|
39
56
|
elsif ENTITY_START === char && string = match_entity_at(i)
|
40
|
-
|
57
|
+
add_token entity(string, pos: pos)
|
41
58
|
i += string.length + 4
|
42
59
|
|
43
|
-
elsif
|
44
|
-
|
60
|
+
elsif NUMBER_START === char && string = match_number_at(i)
|
61
|
+
add_token word(string, pos: pos)
|
45
62
|
i += string.length
|
46
63
|
|
47
|
-
elsif
|
48
|
-
|
49
|
-
i += string.length
|
64
|
+
elsif EMOJI_START === char && string = match_emoji_at(i)
|
65
|
+
add_token emoji(string, pos: pos)
|
66
|
+
i += string.length + 2
|
50
67
|
|
51
|
-
elsif
|
52
|
-
|
68
|
+
elsif REGEXP_START === char && string = match_regexp_at(i)
|
69
|
+
add_token regexp(string, pos: pos)
|
53
70
|
i += string.length
|
54
71
|
|
55
|
-
elsif PUNCTUATION === char
|
56
|
-
|
72
|
+
elsif PUNCTUATION === char
|
73
|
+
add_token punctuation(char, pos: pos)
|
57
74
|
i += 1
|
58
75
|
|
59
|
-
else
|
60
|
-
string
|
61
|
-
if Attentive.invocations.member?(string)
|
62
|
-
tokens << invocation(string, pos: i)
|
63
|
-
|
64
|
-
elsif replace_with = Attentive::ABBREVIATIONS[string]
|
65
|
-
tokens.concat self.class.tokenize(replace_with, options)
|
66
|
-
|
67
|
-
elsif expands_to = Attentive::CONTRACTIONS[string]
|
68
|
-
possibilities = expands_to.map do |possibility|
|
69
|
-
self.class.tokenize(possibility, options)
|
70
|
-
end
|
71
|
-
|
72
|
-
if possibilities.length == 1
|
73
|
-
tokens.concat possibilities[0]
|
74
|
-
else
|
75
|
-
tokens << any_of(string, possibilities, pos: i)
|
76
|
-
end
|
77
|
-
|
78
|
-
else
|
79
|
-
tokens << word(string, pos: i)
|
80
|
-
end
|
76
|
+
else string = match_word_at(i)
|
77
|
+
add_token word(string, pos: pos)
|
81
78
|
i += string.length
|
79
|
+
|
82
80
|
end
|
83
81
|
end
|
84
82
|
|
85
83
|
fail_if_ambiguous!(message, tokens) if fail_if_ambiguous?
|
84
|
+
|
86
85
|
Attentive::Phrase.new(tokens)
|
87
86
|
end
|
88
87
|
|
89
88
|
|
90
89
|
|
90
|
+
private
|
91
|
+
attr_reader :tokens
|
92
|
+
|
91
93
|
def match_emoji_at(i)
|
92
94
|
emoji = ""
|
93
95
|
while (i += 1) < chars.length
|
@@ -163,20 +165,32 @@ module Attentive
|
|
163
165
|
|
164
166
|
|
165
167
|
|
166
|
-
def
|
167
|
-
|
168
|
+
def add_token(token)
|
169
|
+
@tokens << token
|
170
|
+
return unless perform_substitutions?
|
171
|
+
@leaves = add_token_to_leaves token, @leaves
|
168
172
|
end
|
169
173
|
|
170
|
-
def
|
171
|
-
|
174
|
+
def add_token_to_leaves(token, leaves)
|
175
|
+
(leaves + [Attentive.substitutions]).each_with_object([]) do |leaf, new_leaves|
|
176
|
+
if new_leaf = leaf[token]
|
177
|
+
if new_leaf.fin?
|
178
|
+
i = -1 - leaf.depth
|
179
|
+
offset = tokens[i].begin
|
180
|
+
replacement = new_leaf.fin.dup.each { |token| token.begin += offset }
|
181
|
+
tokens[i..-1] = replacement
|
182
|
+
return add_token_to_leaves replacement.last, []
|
183
|
+
else
|
184
|
+
new_leaves.push new_leaf
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
172
188
|
end
|
173
189
|
|
174
|
-
|
175
|
-
!options.fetch(:ambiguous, true)
|
176
|
-
end
|
190
|
+
|
177
191
|
|
178
192
|
WHITESPACE = /\s/.freeze
|
179
|
-
PUNCTUATION = /[^\
|
193
|
+
PUNCTUATION = /[^\sa-z0-9_]/.freeze
|
180
194
|
EMOJI_START = ":".freeze
|
181
195
|
EMOJI_END = ":".freeze
|
182
196
|
ENTITY_START = "{".freeze
|
@@ -186,7 +200,12 @@ module Attentive
|
|
186
200
|
CONDITIONAL_NUMBER_START = /[\.\-]/.freeze
|
187
201
|
NUMBER = /\d/.freeze
|
188
202
|
CONDITIONAL_NUMBER = /[\.,]/.freeze
|
189
|
-
WORD = /[
|
203
|
+
WORD = /[a-z0-9_]/.freeze
|
204
|
+
CHARACTER_SUBSTITIONS = {
|
205
|
+
"“" => "\"",
|
206
|
+
"”" => "\"",
|
207
|
+
"‘" => "'",
|
208
|
+
"’" => "'" }.freeze
|
190
209
|
|
191
210
|
def fail_if_ambiguous!(phrase, tokens)
|
192
211
|
ambiguous_token = tokens.find(&:ambiguous?)
|
@@ -204,5 +223,6 @@ end
|
|
204
223
|
# Attentive::Tokenizer needs to be defined first...
|
205
224
|
require "attentive/entity"
|
206
225
|
require "attentive/composite_entity"
|
226
|
+
require "attentive/substitutions"
|
207
227
|
|
208
228
|
require "attentive/entities/core"
|
@@ -21,12 +21,12 @@ module Attentive
|
|
21
21
|
|
22
22
|
# Find the first token following the match
|
23
23
|
new_character_index = cursor.offset + match_data.to_s.length
|
24
|
-
cursor_pos = cursor.tokens.index { |token| token.
|
24
|
+
cursor_pos = cursor.tokens.index { |token| token.begin >= new_character_index }
|
25
25
|
cursor_pos = cursor.tokens.length unless cursor_pos
|
26
26
|
|
27
27
|
# If the match ends in the middle of a token, treat it as a mismatch
|
28
28
|
match_end_token = cursor.tokens[cursor_pos - 1]
|
29
|
-
return false if match_end_token.
|
29
|
+
return false if match_end_token.begin + match_end_token.length > new_character_index
|
30
30
|
|
31
31
|
# Advance the cursor to the first token after the regexp match
|
32
32
|
cursor.advance cursor_pos - cursor.pos
|
data/lib/attentive/tokens.rb
CHANGED
@@ -9,8 +9,9 @@ module Attentive
|
|
9
9
|
Attentive::Tokens::Emoji.new string, pos
|
10
10
|
end
|
11
11
|
|
12
|
-
def entity(
|
13
|
-
|
12
|
+
def entity(string, pos: nil)
|
13
|
+
entity_name, variable_name = *string.split(":").reverse
|
14
|
+
Attentive::Entity[entity_name.to_sym].new(variable_name || entity_name)
|
14
15
|
end
|
15
16
|
|
16
17
|
def invocation(string, pos: nil)
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Attentive
|
2
|
+
class Trie
|
3
|
+
attr_reader :depth
|
4
|
+
|
5
|
+
def initialize(depth: 0)
|
6
|
+
@depth = depth
|
7
|
+
@children = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def [](token)
|
11
|
+
@children[token]
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(token)
|
15
|
+
raise "Can't add #{token.inspect} to trie because this leaf is a terminus" if fin?
|
16
|
+
@children[token] ||= self.class.new(depth: depth + 1)
|
17
|
+
end
|
18
|
+
|
19
|
+
def fin?
|
20
|
+
@children.key?(:fin)
|
21
|
+
end
|
22
|
+
|
23
|
+
def fin
|
24
|
+
@children[:fin]
|
25
|
+
end
|
26
|
+
|
27
|
+
def fin!(finish)
|
28
|
+
@children[:fin] = finish
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
|
33
|
+
def self.of_substitutions(substitutions)
|
34
|
+
substitutions.each_with_object(self.new) do |(tokens, substitution), trie|
|
35
|
+
leaf = trie
|
36
|
+
tokens.each_with_index do |token, i|
|
37
|
+
raise "#{tokens.join} contains #{tokens[0...i].join}" if leaf.fin?
|
38
|
+
leaf = leaf.add token
|
39
|
+
end
|
40
|
+
leaf.fin! substitution
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
data/lib/attentive/version.rb
CHANGED
data/lib/attentive.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "attentive/version"
|
2
2
|
require "attentive/config"
|
3
3
|
|
4
|
+
|
4
5
|
module Attentive
|
5
6
|
extend Attentive::Config
|
6
7
|
|
@@ -16,6 +17,31 @@ module Attentive
|
|
16
17
|
|
17
18
|
|
18
19
|
|
20
|
+
# Recognizes entities in a phrase
|
21
|
+
def self.abstract(message)
|
22
|
+
message = Attentive::Message.new(message)
|
23
|
+
entities = Attentive::Entity.entities.map { |entity| Attentive::Phrase.new([entity.new]) }
|
24
|
+
i = 0
|
25
|
+
while i < message.tokens.length
|
26
|
+
entities.each do |entity|
|
27
|
+
match = Attentive::Matcher.new(entity, Cursor.new(message, i)).match!
|
28
|
+
next unless match
|
29
|
+
|
30
|
+
i = match.replace_with(entity)
|
31
|
+
break
|
32
|
+
end
|
33
|
+
i += 1
|
34
|
+
end
|
35
|
+
message.tokens.to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
# Shorthand for tokenizer
|
39
|
+
def self.tokenize(message, options={})
|
40
|
+
Attentive::Tokenizer.tokenize(message, options)
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
|
19
45
|
# Attentive DSL
|
20
46
|
|
21
47
|
def listeners
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: attentive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bob Lail
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|
@@ -153,14 +153,18 @@ files:
|
|
153
153
|
- bin/console
|
154
154
|
- bin/setup
|
155
155
|
- lib/attentive.rb
|
156
|
-
- lib/attentive/abbreviations.rb
|
157
156
|
- lib/attentive/composite_entity.rb
|
158
157
|
- lib/attentive/config.rb
|
159
|
-
- lib/attentive/contractions.rb
|
160
158
|
- lib/attentive/cursor.rb
|
161
159
|
- lib/attentive/entities/core.rb
|
162
160
|
- lib/attentive/entities/core/date.rb
|
161
|
+
- lib/attentive/entities/core/date/explicit.rb
|
162
|
+
- lib/attentive/entities/core/date/future.rb
|
163
163
|
- lib/attentive/entities/core/date/month.rb
|
164
|
+
- lib/attentive/entities/core/date/partial.rb
|
165
|
+
- lib/attentive/entities/core/date/partial/future.rb
|
166
|
+
- lib/attentive/entities/core/date/partial/past.rb
|
167
|
+
- lib/attentive/entities/core/date/past.rb
|
164
168
|
- lib/attentive/entities/core/date/relative.rb
|
165
169
|
- lib/attentive/entities/core/date/relative/future.rb
|
166
170
|
- lib/attentive/entities/core/date/relative/past.rb
|
@@ -183,7 +187,7 @@ files:
|
|
183
187
|
- lib/attentive/matcher.rb
|
184
188
|
- lib/attentive/message.rb
|
185
189
|
- lib/attentive/phrase.rb
|
186
|
-
- lib/attentive/
|
190
|
+
- lib/attentive/substitutions.rb
|
187
191
|
- lib/attentive/token.rb
|
188
192
|
- lib/attentive/tokenizer.rb
|
189
193
|
- lib/attentive/tokens.rb
|
@@ -194,6 +198,7 @@ files:
|
|
194
198
|
- lib/attentive/tokens/regexp.rb
|
195
199
|
- lib/attentive/tokens/whitespace.rb
|
196
200
|
- lib/attentive/tokens/word.rb
|
201
|
+
- lib/attentive/trie.rb
|
197
202
|
- lib/attentive/version.rb
|
198
203
|
homepage: https://github.com/houston/attentive
|
199
204
|
licenses:
|
@@ -1,3 +0,0 @@
|
|
1
|
-
module Attentive
|
2
|
-
ABBREVIATIONS = {"bye"=>"goodbye", "gonna"=>"going to", "hi"=>"hello", "ol'"=>"old", "'sup"=>"what is up", "thanks"=>"thank you", "wanna"=>"want to", "mon"=>"monday", "tue"=>"tuesday", "tues"=>"tuesday", "wed"=>"wednesday", "thu"=>"thursday", "thur"=>"thursday", "thurs"=>"thursday", "fri"=>"friday", "sat"=>"saturday", "sun"=>"sunday", "jan"=>"january", "feb"=>"february", "mar"=>"march", "apr"=>"april", "jun"=>"june", "jul"=>"july", "aug"=>"august", "sep"=>"september", "sept"=>"september", "oct"=>"october", "nov"=>"november", "dec"=>"december"}.freeze
|
3
|
-
end
|
@@ -1,3 +0,0 @@
|
|
1
|
-
module Attentive
|
2
|
-
CONTRACTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "couldn't've"=>["could not have"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hadn't've"=>["had not have"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'd've"=>["he would have"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'd've"=>["i would have"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'd've"=>["it would have"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "mightn't've"=>["might not have"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'd've"=>["she would have"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "shouldn't've"=>["should not have"], "somebody'd"=>["somebody had", "somebody would"], "somebody'd've"=>["somebody would have"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'd've"=>["someone would have"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'd've"=>["something would have"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there'd've"=>["there would have"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'dn't've"=>["they would not have"], "they'd've"=>["they would have"], "they'd'ven't"=>["they would have not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they'll'ven't"=>["they will have not"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'd've"=>["we would have"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'd've"=>["who would have"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "won't've"=>["will not have"], "would've"=>["would have"], "wouldn't"=>["would not"], "wouldn't've"=>["would not have"], "y'all"=>["you all"], "y'all'd've"=>["you all would have"], "y'all'dn't've"=>["you all would not have"], "y'all'll"=>["you all will"], "y'all'lln't"=>["you all will not"], "y'all'll've"=>["you all will have"], "y'all'll'ven't"=>["you all will have not"], "you'd"=>["you had", "you would"], "you'd've"=>["you would have"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"]}.freeze
|
3
|
-
end
|
data/lib/attentive/text.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
module Attentive
|
2
|
-
module Text
|
3
|
-
extend self
|
4
|
-
|
5
|
-
def normalize(text)
|
6
|
-
straighten_quotes downcase text
|
7
|
-
end
|
8
|
-
|
9
|
-
def downcase(text)
|
10
|
-
text.downcase
|
11
|
-
end
|
12
|
-
|
13
|
-
def straighten_quotes(text)
|
14
|
-
text.gsub(/[“”]/, "\"").gsub(/[‘’]/, "'")
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
end
|