attentive 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +12 -29
  3. data/bin/console +1 -0
  4. data/lib/attentive/composite_entity.rb +3 -0
  5. data/lib/attentive/config.rb +13 -0
  6. data/lib/attentive/cursor.rb +8 -5
  7. data/lib/attentive/entities/core/date/explicit.rb +25 -0
  8. data/lib/attentive/entities/core/date/future.rb +10 -0
  9. data/lib/attentive/entities/core/date/partial/future.rb +21 -0
  10. data/lib/attentive/entities/core/date/partial/past.rb +21 -0
  11. data/lib/attentive/entities/core/date/partial.rb +7 -0
  12. data/lib/attentive/entities/core/date/past.rb +10 -0
  13. data/lib/attentive/entities/core/date/relative/future.rb +2 -1
  14. data/lib/attentive/entities/core/date/relative/past.rb +2 -1
  15. data/lib/attentive/entities/core/date/relative.rb +2 -1
  16. data/lib/attentive/entities/core/date.rb +8 -1
  17. data/lib/attentive/entities/core/number/float/negative.rb +3 -3
  18. data/lib/attentive/entities/core/number/float/positive.rb +3 -3
  19. data/lib/attentive/entities/core/number/float.rb +7 -4
  20. data/lib/attentive/entities/core/number/integer/negative.rb +3 -2
  21. data/lib/attentive/entities/core/number/integer/positive.rb +3 -2
  22. data/lib/attentive/entities/core/number/integer.rb +4 -4
  23. data/lib/attentive/entities/core/number/negative.rb +5 -5
  24. data/lib/attentive/entities/core/number/positive.rb +5 -5
  25. data/lib/attentive/entities/core/number.rb +3 -2
  26. data/lib/attentive/entity.rb +27 -6
  27. data/lib/attentive/listener.rb +0 -1
  28. data/lib/attentive/listener_collection.rb +1 -1
  29. data/lib/attentive/match.rb +19 -2
  30. data/lib/attentive/matcher.rb +11 -4
  31. data/lib/attentive/message.rb +12 -0
  32. data/lib/attentive/phrase.rb +5 -1
  33. data/lib/attentive/substitutions.rb +10 -0
  34. data/lib/attentive/token.rb +21 -4
  35. data/lib/attentive/tokenizer.rb +69 -49
  36. data/lib/attentive/tokens/regexp.rb +2 -2
  37. data/lib/attentive/tokens.rb +3 -2
  38. data/lib/attentive/trie.rb +45 -0
  39. data/lib/attentive/version.rb +1 -1
  40. data/lib/attentive.rb +26 -0
  41. metadata +10 -5
  42. data/lib/attentive/abbreviations.rb +0 -3
  43. data/lib/attentive/contractions.rb +0 -3
  44. data/lib/attentive/text.rb +0 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b5952398cd8d68e82b27a4e9be335b748b315845
4
- data.tar.gz: 77f1e15ef3e6952861d110f3c5d7605664e2979f
3
+ metadata.gz: 880167573144b1749ba6f538a4922210ff5e087b
4
+ data.tar.gz: 4b50dad29e7eacdf650c58741230d90c44c75c9b
5
5
  SHA512:
6
- metadata.gz: 49de7e3eff7a8964082ffd35ac964497c5a7c706194f160fcb29bad5710c6977fc1d58ff86189d788cbe8290145575b07147cb35cafcbe16167b10607cd29063
7
- data.tar.gz: b7692af23cbaa6b44467ab64c989883477ade8c1a9e26133c6c27e69168dbd132ba4e6dc7b4abbd8a8b5efeda09714a3138181e3e49290d16f449320f7e4d7d5
6
+ metadata.gz: b89a7ed8760e5e5acc8104c27f2d059fe82a05983fc29d077efabaf25b8cdc4ded15be16e601b1b9e9f94d902bebe5c30908d4d8d1e6311a8e92b24bf7e6cf16
7
+ data.tar.gz: d440c72708774dedbecf2117e4838ad97e2aaa13ebebdada905ccee018bebebc47cc1f3e3f7d6c5d15834a28279a709adec3164facb703779be0392c0c7aab0e
data/Rakefile CHANGED
@@ -9,14 +9,14 @@ end
9
9
 
10
10
  namespace :compile do
11
11
 
12
- desc "Compile contractions.rb and abbreviations.rb"
12
+ desc "Compile substitutions.rb"
13
13
  task :data do
14
14
 
15
15
  data_path = File.expand_path(File.dirname(__FILE__) + "/data")
16
16
  output_path = File.expand_path(File.dirname(__FILE__) + "/lib/attentive")
17
17
 
18
- contractions = {}
19
- File.open(data_path + "/contractions.tsv") do |file|
18
+ substitutions = {}
19
+ File.open(data_path + "/substitutions.tsv") do |file|
20
20
  file.each do |line|
21
21
  next if line.start_with?("#") # skip comments
22
22
  next if line == "\n" # skip blank lines
@@ -27,37 +27,20 @@ namespace :compile do
27
27
  phrases = line.downcase.chomp.split("\t")
28
28
  raise "#{line.inspect} must have exactly two values" unless phrases.length >= 2
29
29
 
30
- contractions[phrases.shift] = phrases
30
+ substitutions[phrases.shift] = phrases
31
31
  end
32
32
  end
33
- File.open(output_path + "/contractions.rb", "w") do |file|
33
+ File.open(output_path + "/substitutions.rb", "w") do |file|
34
34
  file.write <<-RUBY
35
- module Attentive
36
- CONTRACTIONS = #{contractions.inspect}.freeze
37
- end
38
- RUBY
39
- end
35
+ require "attentive/trie"
40
36
 
41
- abbreviations = {}
42
- File.open(data_path + "/abbreviations.tsv") do |file|
43
- file.each do |line|
44
- next if line.start_with?("#") # skip comments
45
- next if line == "\n" # skip blank lines
46
-
47
- # the file contains tab-separated values.
48
- # every line should have exactly two values:
49
- # + the first is the slang word
50
- # + the second is the normal word
51
- words = line.downcase.chomp.split("\t")
52
- raise "#{line.inspect} must have exactly two values" unless words.length == 2
53
-
54
- abbreviations[words[0]] = words[1]
55
- end
56
- end
57
- File.open(output_path + "/abbreviations.rb", "w") do |file|
58
- file.write <<-RUBY
59
37
  module Attentive
60
- ABBREVIATIONS = #{abbreviations.inspect}.freeze
38
+ SUBSTITUTIONS = #{substitutions.inspect}.each_with_object({}) do |(key, values), new_hash|
39
+ tokens = Attentive.tokenize(key, substitutions: false)
40
+ possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
41
+ value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
42
+ new_hash[tokens] = value
43
+ end.freeze
61
44
  end
62
45
  RUBY
63
46
  end
data/bin/console CHANGED
@@ -11,4 +11,5 @@ require "attentive"
11
11
  # Pry.start
12
12
 
13
13
  require "irb"
14
+ include Attentive
14
15
  IRB.start
@@ -9,8 +9,11 @@ module Attentive
9
9
  attr_accessor :entities
10
10
 
11
11
  def define(entity_name, *entities)
12
+ options = entities.last.is_a?(::Hash) ? entities.pop : {}
13
+
12
14
  create! entity_name do |entity_klass|
13
15
  entity_klass.entities = entities.map { |entity| Entity[entity] }
16
+ entity_klass.published = options.fetch(:published, true)
14
17
  end
15
18
  end
16
19
  end
@@ -1,3 +1,5 @@
1
+ require "attentive/trie"
2
+
1
3
  module Attentive
2
4
  module Config
3
5
 
@@ -6,8 +8,19 @@ module Attentive
6
8
  attr_accessor :default_prohibited_contexts
7
9
 
8
10
  def invocations=(*values)
11
+ remove_instance_variable :@substitutions if defined?(@substitutions)
9
12
  @invocations = values.flatten
10
13
  end
11
14
 
15
+ def substitutions
16
+ return @substitutions if defined?(@substitutions)
17
+ @substitutions = Attentive::Trie.of_substitutions(
18
+ Attentive::SUBSTITUTIONS.merge(
19
+ invocations.each_with_object({}) { |invocation, hash|
20
+ tokens = Attentive.tokenize(invocation, substitutions: false)
21
+ hash[tokens] = [Attentive::Tokens::Invocation.new(invocation, 0)]
22
+ } ) )
23
+ end
24
+
12
25
  end
13
26
  end
@@ -1,9 +1,10 @@
1
1
  module Attentive
2
2
  class Cursor
3
- attr_reader :tokens, :pos
3
+ attr_reader :message, :tokens, :pos
4
4
 
5
- def initialize(tokens, pos=0)
6
- @tokens = tokens
5
+ def initialize(message, pos=0)
6
+ @message = message
7
+ @tokens = message.respond_to?(:tokens) ? message.tokens : message
7
8
  @pos = pos
8
9
  end
9
10
 
@@ -26,16 +27,18 @@ module Attentive
26
27
  end
27
28
 
28
29
  def inspect
29
- "|#{(tokens[0...pos] || []).join}\e[7m#{tokens[pos]}\e[0m#{(tokens[(pos + 1)..-1] || []).join}|"
30
+ "<Cursor \"#{(tokens[0...pos] || []).join.inspect[1...-1]}\e[7m#{tokens[pos].to_s.inspect[1...-1]}\e[0m#{(tokens[(pos + 1)..-1] || []).join.inspect[1...-1]}\">"
30
31
  end
31
32
 
32
33
  def offset
33
- peek.pos
34
+ peek.begin
34
35
  end
35
36
 
36
37
  def advance(n=1)
37
38
  @pos += n
39
+ self
38
40
  end
41
+ alias :adv :advance
39
42
 
40
43
  def eof?
41
44
  @pos == @tokens.length
@@ -0,0 +1,25 @@
1
+ require "attentive/entity"
2
+ require "date"
3
+
4
+ Attentive::Entity.define "core.date.explicit",
5
+ "{{month:core.date.month}} {{day:core.number.integer.positive}} {{year:core.number.integer.positive}}",
6
+ "{{day:core.number.integer.positive}} {{month:core.date.month}} {{year:core.number.integer.positive}}",
7
+ %q{(?:(?<month>\d\d?)/(?<day>\d\d?)/(?<year>\d\d(?:\d\d)?))},
8
+ %q{(?:(?<year>\d\d(?:\d\d)?)-(?<month>\d\d?)-(?<day>\d\d?))},
9
+ published: false do |match|
10
+
11
+ month = match["month"].to_i
12
+ day = match["day"].to_i
13
+ year = match["year"].to_i
14
+
15
+ # Interpret 2-digit years in the 2000s
16
+ year += 2000 if year < 100
17
+
18
+ nomatch! if day > 31 || month > 12
19
+
20
+ begin
21
+ Date.new(year, month, day)
22
+ rescue ArgumentError
23
+ nomatch!
24
+ end
25
+ end
@@ -0,0 +1,10 @@
1
+ require "attentive/entities/core/date/month"
2
+ require "attentive/entities/core/date/wday"
3
+ require "attentive/entities/core/date/relative"
4
+ require "attentive/entities/core/date/partial"
5
+ require "attentive/entities/core/date/explicit"
6
+
7
+ Attentive::CompositeEntity.define "core.date.future",
8
+ "core.date.explicit",
9
+ "core.date.relative.future",
10
+ "core.date.partial.future"
@@ -0,0 +1,21 @@
1
+ require "attentive/entity"
2
+ require "date"
3
+
4
+ Attentive::Entity.define "core.date.partial.future",
5
+ "{{month:core.date.month}} {{day:core.number.integer.positive}}",
6
+ published: false do |match|
7
+
8
+ month = match["month"]
9
+ day = match["day"]
10
+ nomatch! if day > 31
11
+
12
+ today = Date.today
13
+ year = today.year
14
+ year += 1 if month < today.month || (month == today.month && day < today.day)
15
+
16
+ begin
17
+ Date.new(year, month, day)
18
+ rescue ArgumentError
19
+ nomatch!
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ require "attentive/entity"
2
+ require "date"
3
+
4
+ Attentive::Entity.define "core.date.partial.past",
5
+ "{{month:core.date.month}} {{day:core.number.integer.positive}}",
6
+ published: false do |match|
7
+
8
+ month = match["month"]
9
+ day = match["day"]
10
+ nomatch! if day > 31
11
+
12
+ today = Date.today
13
+ year = today.year
14
+ year -= 1 if month > today.month || (month == today.month && day > today.day)
15
+
16
+ begin
17
+ Date.new(year, month, day)
18
+ rescue ArgumentError
19
+ nomatch!
20
+ end
21
+ end
@@ -0,0 +1,7 @@
1
+ require "attentive/entities/core/date/partial/past"
2
+ require "attentive/entities/core/date/partial/future"
3
+
4
+ Attentive::CompositeEntity.define "core.date.partial",
5
+ "core.date.partial.future",
6
+ "core.date.partial.past",
7
+ published: false
@@ -0,0 +1,10 @@
1
+ require "attentive/entities/core/date/month"
2
+ require "attentive/entities/core/date/wday"
3
+ require "attentive/entities/core/date/relative"
4
+ require "attentive/entities/core/date/partial"
5
+ require "attentive/entities/core/date/explicit"
6
+
7
+ Attentive::CompositeEntity.define "core.date.past",
8
+ "core.date.explicit",
9
+ "core.date.relative.past",
10
+ "core.date.partial.past"
@@ -5,7 +5,8 @@ Attentive::Entity.define "core.date.relative.future",
5
5
  "today",
6
6
  "tomorrow",
7
7
  "{{core.date.wday}}",
8
- "next {{core.date.wday}}" do |match|
8
+ "next {{core.date.wday}}",
9
+ published: false do |match|
9
10
 
10
11
  today = Date.today
11
12
 
@@ -5,7 +5,8 @@ Attentive::Entity.define "core.date.relative.past",
5
5
  "today",
6
6
  "yesterday",
7
7
  "{{core.date.wday}}",
8
- "last {{core.date.wday}}" do |match|
8
+ "last {{core.date.wday}}",
9
+ published: false do |match|
9
10
 
10
11
  today = Date.today
11
12
 
@@ -3,4 +3,5 @@ require "attentive/entities/core/date/relative/future"
3
3
 
4
4
  Attentive::CompositeEntity.define "core.date.relative",
5
5
  "core.date.relative.future",
6
- "core.date.relative.past"
6
+ "core.date.relative.past",
7
+ published: false
@@ -1,6 +1,13 @@
1
1
  require "attentive/entities/core/date/month"
2
2
  require "attentive/entities/core/date/wday"
3
3
  require "attentive/entities/core/date/relative"
4
+ require "attentive/entities/core/date/partial"
5
+ require "attentive/entities/core/date/explicit"
6
+
7
+ require "attentive/entities/core/date/future"
8
+ require "attentive/entities/core/date/past"
4
9
 
5
10
  Attentive::CompositeEntity.define "core.date",
6
- "core.date.relative"
11
+ "core.date.explicit",
12
+ "core.date.relative",
13
+ "core.date.partial"
@@ -1,6 +1,6 @@
1
1
  require "attentive/entity"
2
- require "bigdecimal"
3
2
 
4
- Attentive::Entity.define "core.number.float.negative", %q{(?<float>\-[\d,]+\.\d+)} do |match|
5
- BigDecimal.new(match["float"].gsub(",", ""))
3
+ Attentive::Entity.define "core.number.float.negative", "{{float:core.number.float}}", published: false do |match|
4
+ nomatch! if match["float"] >= 0
5
+ match["float"]
6
6
  end
@@ -1,6 +1,6 @@
1
1
  require "attentive/entity"
2
- require "bigdecimal"
3
2
 
4
- Attentive::Entity.define "core.number.float.positive", %q{(?<float>[\d,]+\.\d+)} do |match|
5
- BigDecimal.new(match["float"].gsub(",", ""))
3
+ Attentive::Entity.define "core.number.float.positive", "{{float:core.number.float}}", published: false do |match|
4
+ nomatch! if match["float"] <= 0
5
+ match["float"]
6
6
  end
@@ -1,6 +1,9 @@
1
+ require "attentive/entity"
2
+ require "bigdecimal"
3
+
4
+ Attentive::Entity.define "core.number.float", %q{(?<float>\-?[\d,]+\.\d+)}, published: false do |match|
5
+ BigDecimal.new(match["float"].gsub(",", ""))
6
+ end
7
+
1
8
  require "attentive/entities/core/number/float/positive"
2
9
  require "attentive/entities/core/number/float/negative"
3
-
4
- Attentive::CompositeEntity.define "core.number.float",
5
- "core.number.float.positive",
6
- "core.number.float.negative"
@@ -1,5 +1,6 @@
1
1
  require "attentive/entity"
2
2
 
3
- Attentive::Entity.define "core.number.integer.negative", %q{(?<integer>\-\d+)} do |match|
4
- match["integer"].gsub(",", "").to_i
3
+ Attentive::Entity.define "core.number.integer.negative", "{{integer:core.number.integer}}", published: false do |match|
4
+ nomatch! if match["integer"] >= 0
5
+ match["integer"]
5
6
  end
@@ -1,5 +1,6 @@
1
1
  require "attentive/entity"
2
2
 
3
- Attentive::Entity.define "core.number.integer.positive", %q{(?<integer>[\d,]+)} do |match|
4
- match["integer"].gsub(",", "").to_i
3
+ Attentive::Entity.define "core.number.integer.positive", "{{integer:core.number.integer}}", published: false do |match|
4
+ nomatch! if match["integer"] <= 0
5
+ match["integer"]
5
6
  end
@@ -1,6 +1,6 @@
1
+ Attentive::Entity.define "core.number.integer", %q{(?<integer>\-?[\d,]+)}, published: false do |match|
2
+ match["integer"].gsub(",", "").to_i
3
+ end
4
+
1
5
  require "attentive/entities/core/number/integer/positive"
2
6
  require "attentive/entities/core/number/integer/negative"
3
-
4
- Attentive::CompositeEntity.define "core.number.integer",
5
- "core.number.integer.positive",
6
- "core.number.integer.negative"
@@ -1,6 +1,6 @@
1
- require "attentive/entities/core/number/integer/negative"
2
- require "attentive/entities/core/number/float/negative"
1
+ require "attentive/entity"
3
2
 
4
- Attentive::CompositeEntity.define "core.number.negative",
5
- "core.number.float.negative",
6
- "core.number.integer.negative"
3
+ Attentive::Entity.define "core.number.negative", "{{number:core.number}}", published: false do |match|
4
+ nomatch! if match["number"] >= 0
5
+ match["number"]
6
+ end
@@ -1,6 +1,6 @@
1
- require "attentive/entities/core/number/integer/positive"
2
- require "attentive/entities/core/number/float/positive"
1
+ require "attentive/entity"
3
2
 
4
- Attentive::CompositeEntity.define "core.number.positive",
5
- "core.number.float.positive",
6
- "core.number.integer.positive"
3
+ Attentive::Entity.define "core.number.positive", "{{number:core.number}}", published: false do |match|
4
+ nomatch! if match["number"] <= 0
5
+ match["number"]
6
+ end
@@ -1,8 +1,9 @@
1
1
  require "attentive/entities/core/number/integer"
2
2
  require "attentive/entities/core/number/float"
3
- require "attentive/entities/core/number/positive"
4
- require "attentive/entities/core/number/negative"
5
3
 
6
4
  Attentive::CompositeEntity.define "core.number",
7
5
  "core.number.float",
8
6
  "core.number.integer"
7
+
8
+ require "attentive/entities/core/number/positive"
9
+ require "attentive/entities/core/number/negative"
@@ -10,6 +10,15 @@ module Attentive
10
10
  class << self
11
11
  attr_accessor :phrases
12
12
  attr_accessor :token_name
13
+ attr_writer :published
14
+
15
+ def published?
16
+ @published
17
+ end
18
+
19
+ def entities
20
+ @entities.values.select(&:published?)
21
+ end
13
22
 
14
23
  def [](entity_name)
15
24
  entity_name = entity_name.to_sym
@@ -19,10 +28,13 @@ module Attentive
19
28
  end
20
29
 
21
30
  def define(entity_name, *phrases, &block)
31
+ options = phrases.last.is_a?(::Hash) ? phrases.pop : {}
32
+
22
33
  create! entity_name do |entity_klass|
23
34
  entity_klass.phrases = phrases.map do |phrase|
24
35
  Attentive::Tokenizer.tokenize(phrase, entities: true, regexps: true, ambiguous: false)
25
36
  end
37
+ entity_klass.published = options.fetch(:published, true)
26
38
  entity_klass.send :define_method, :_value_from_match, &block if block_given?
27
39
  end
28
40
  end
@@ -55,7 +67,7 @@ module Attentive
55
67
 
56
68
 
57
69
 
58
- def initialize(variable_name, pos=0)
70
+ def initialize(variable_name=self.class.token_name, pos=0)
59
71
  @variable_name = variable_name.to_s
60
72
  super pos
61
73
  end
@@ -78,11 +90,14 @@ module Attentive
78
90
 
79
91
  def matches?(cursor)
80
92
  self.class.phrases.each do |phrase|
81
- cursor_copy = cursor.new_from_here
82
- match = Attentive::Matcher.new(phrase, cursor_copy).match!
83
- if match
84
- cursor.advance cursor_copy.pos
85
- return { variable_name => _value_from_match(match) }
93
+ catch NOMATCH do
94
+ cursor_copy = cursor.new_from_here
95
+ match = Attentive::Matcher.new(phrase, cursor_copy).match!
96
+ if match
97
+ value = _value_from_match(match) # <-- might throw
98
+ cursor.advance cursor_copy.pos
99
+ return { variable_name => value }
100
+ end
86
101
  end
87
102
  end
88
103
  false
@@ -92,5 +107,11 @@ module Attentive
92
107
  match.to_s
93
108
  end
94
109
 
110
+ def nomatch!
111
+ throw NOMATCH
112
+ end
113
+
114
+ NOMATCH = :nomatch.freeze
115
+
95
116
  end
96
117
  end
@@ -1,4 +1,3 @@
1
- require "attentive/text"
2
1
  require "attentive/tokenizer"
3
2
  require "set"
4
3
 
@@ -28,7 +28,7 @@ module Attentive
28
28
  message.tokens.each_with_index do |token, i|
29
29
  listeners.each do |listener|
30
30
  listener.phrases.each do |phrase|
31
- match = Attentive::Matcher.new(phrase, Cursor.new(message.tokens, i), listener: listener, message: message).match!
31
+ match = Attentive::Matcher.new(phrase, Cursor.new(message, i), listener: listener).match!
32
32
  next unless match
33
33
 
34
34
  # Don't match more than one phrase per listener
@@ -1,12 +1,14 @@
1
1
  module Attentive
2
2
  class Match
3
- attr_reader :listener, :phrase, :message
3
+ attr_reader :listener, :phrase, :message, :match_start, :match_end
4
4
 
5
5
  def initialize(phrase, attributes={})
6
6
  @phrase = phrase.to_s
7
7
  @match_data = attributes.fetch(:match_data, {})
8
+ @match_start = attributes.fetch(:match_start)
9
+ @match_end = attributes.fetch(:match_end)
10
+ @message = attributes.fetch(:message)
8
11
  @listener = attributes[:listener]
9
- @message = attributes[:message]
10
12
  end
11
13
 
12
14
  def matched?(variable_name)
@@ -15,11 +17,26 @@ module Attentive
15
17
 
16
18
  def [](variable_name)
17
19
  @match_data.fetch variable_name.to_s
20
+ rescue KeyError
21
+ raise KeyError, "#{$!.message} in #{inspect}"
18
22
  end
19
23
 
20
24
  def to_s
21
25
  @phrase
22
26
  end
23
27
 
28
+ def to_h
29
+ @match_data
30
+ end
31
+
32
+ def replace_with(tokens)
33
+ message[match_start...match_end] = tokens
34
+ match_start + tokens.length
35
+ end
36
+
37
+ def inspect
38
+ "#<#{self.class.name} #{@match_data.inspect} #{phrase.inspect}>"
39
+ end
40
+
24
41
  end
25
42
  end
@@ -6,9 +6,11 @@ module Attentive
6
6
 
7
7
  def initialize(phrase, message, params={})
8
8
  @phrase = phrase
9
+ @match_start = message.pos
9
10
  @cursor = Cursor.new(phrase, params.fetch(:pos, 0))
10
11
  @message = message
11
- @match_params = params.each_with_object({}) { |(key, value), new_hash| new_hash[key] = value if %i{listener message}.member?(key) }
12
+ self.message.pop while self.message.peek.whitespace?
13
+ @match_params = params.merge(message: message.message, match_start: message.pos)
12
14
  @match_data = {}
13
15
  @state = :matching
14
16
 
@@ -34,6 +36,7 @@ module Attentive
34
36
  @state = :mismatch
35
37
  break
36
38
  end
39
+ message.pop
37
40
  cursor.pop while cursor.peek.whitespace?
38
41
 
39
42
  elsif match_data = cursor.peek.matches?(message)
@@ -43,14 +46,18 @@ module Attentive
43
46
  @state = :found
44
47
 
45
48
  # -> This is the one spot where we instantiate a Match
46
- return Attentive::Match.new(phrase, @match_params.merge(match_data: @match_data)) if cursor.eof?
49
+ return Attentive::Match.new(phrase, @match_params.merge(
50
+ match_end: message.pos,
51
+ match_data: @match_data)) if cursor.eof?
47
52
 
48
- elsif !token.skippable?
53
+ elsif token.skippable?
54
+ message.pop
55
+
56
+ else
49
57
  @state = :mismatch
50
58
  break
51
59
  end
52
60
 
53
- message.pop
54
61
  message.pop while message.peek.whitespace?
55
62
  end
56
63
 
@@ -15,6 +15,18 @@ module Attentive
15
15
  @tokens ||= Attentive::Tokenizer.tokenize(text)
16
16
  end
17
17
 
18
+ def [](key)
19
+ tokens[key]
20
+ end
21
+
22
+ def []=(key, value)
23
+ tokens[key] = value
24
+ end
25
+
26
+ def length
27
+ tokens.length
28
+ end
29
+
18
30
  alias :to_s :text
19
31
 
20
32
  def inspect
@@ -12,7 +12,11 @@ module Attentive
12
12
  end
13
13
 
14
14
  def inspect
15
- "\"#{to_s}\""
15
+ map(&:inspect).join("\n")
16
+ end
17
+
18
+ def dup
19
+ self.class.new map(&:dup)
16
20
  end
17
21
 
18
22
  end
@@ -0,0 +1,10 @@
1
+ require "attentive/trie"
2
+
3
+ module Attentive
4
+ SUBSTITUTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "somebody'd"=>["somebody had", "somebody would"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "would've"=>["would have"], "wouldn't"=>["would not"], "y'all"=>["you all"], "you'd"=>["you had", "you would"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"], "bye"=>["goodbye"], "gonna"=>["going to"], "hi"=>["hello"], "ol'"=>["old"], "'sup"=>["what is up"], "thanks"=>["thank you"], "wanna"=>["want to"], "w/o"=>["without"], "mon"=>["monday"], "tue"=>["tuesday"], "tues"=>["tuesday"], "wed"=>["wednesday"], "thu"=>["thursday"], "thur"=>["thursday"], "thurs"=>["thursday"], "fri"=>["friday"], "sat"=>["saturday"], "sun"=>["sunday"], "jan"=>["january"], "feb"=>["february"], "mar"=>["march"], "apr"=>["april"], "jun"=>["june"], "jul"=>["july"], "aug"=>["august"], "sep"=>["september"], "sept"=>["september"], "oct"=>["october"], "nov"=>["november"], "dec"=>["december"]}.each_with_object({}) do |(key, values), new_hash|
5
+ tokens = Attentive.tokenize(key, substitutions: false)
6
+ possibilities = values.map { |value| Attentive.tokenize(value, substitutions: false) }
7
+ value = possibilities.length == 1 ? possibilities[0] : Attentive::Phrase.new([Attentive::Tokens::AnyOf.new(key, possibilities, 0)])
8
+ new_hash[tokens] = value
9
+ end.freeze
10
+ end
@@ -1,9 +1,13 @@
1
1
  module Attentive
2
2
  class Token
3
- attr_reader :pos
3
+ attr_accessor :begin
4
4
 
5
5
  def initialize(pos=nil)
6
- @pos = pos
6
+ @begin = pos
7
+ end
8
+
9
+ def end
10
+ self.begin + to_s.length
7
11
  end
8
12
 
9
13
  def ==(other)
@@ -31,11 +35,16 @@ module Attentive
31
35
  end
32
36
 
33
37
  def matches?(cursor)
34
- self == cursor.peek
38
+ if self == cursor.peek
39
+ cursor.pop
40
+ return true
41
+ end
42
+
43
+ false
35
44
  end
36
45
 
37
46
  def inspect
38
- "<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}>"
47
+ "<#{self.class.name ? self.class.name.split("::").last : "Entity"} #{to_s.inspect}#{" #{self.begin}" if self.begin}>"
39
48
  end
40
49
 
41
50
  end
@@ -66,5 +75,13 @@ module Attentive
66
75
  self.class == other.class && self.string == other.string
67
76
  end
68
77
 
78
+ def eql?(other)
79
+ self == other
80
+ end
81
+
82
+ def hash
83
+ [ self.class, string ].hash
84
+ end
85
+
69
86
  end
70
87
  end
@@ -1,6 +1,3 @@
1
- require "attentive/abbreviations"
2
- require "attentive/contractions"
3
- require "attentive/text"
4
1
  require "attentive/tokens"
5
2
  require "attentive/phrase"
6
3
  require "attentive/errors"
@@ -19,75 +16,80 @@ module Attentive
19
16
 
20
17
 
21
18
  def initialize(message, options={})
22
- @message = Attentive::Text.normalize(message)
19
+ @message = message.downcase
23
20
  @chars = self.message.each_char.to_a
24
21
  @options = options
25
22
  end
26
23
 
24
+ def match_entities?
25
+ options.fetch(:entities, false)
26
+ end
27
+
28
+ def match_regexps?
29
+ options.fetch(:regexps, false)
30
+ end
31
+
32
+ def perform_substitutions?
33
+ options.fetch(:substitutions, true)
34
+ end
35
+
36
+ def fail_if_ambiguous?
37
+ !options.fetch(:ambiguous, true)
38
+ end
39
+
27
40
 
28
41
 
29
42
  def tokenize
30
43
  i = 0
31
- tokens = []
44
+ @tokens = []
45
+ @leaves = []
46
+
32
47
  while i < chars.length
33
48
  char = chars[i]
49
+ char = CHARACTER_SUBSTITIONS.fetch(char, char)
50
+ pos = tokens.any? ? tokens.last.end : 0
34
51
 
35
- if EMOJI_START === char && string = match_emoji_at(i)
36
- tokens << emoji(string, pos: i)
37
- i += string.length + 2
52
+ if WHITESPACE === char && string = match_whitespace_at(i)
53
+ add_token whitespace(string, pos: pos)
54
+ i += string.length
38
55
 
39
56
  elsif ENTITY_START === char && string = match_entity_at(i)
40
- tokens << entity(*string.split(":").reverse, pos: i)
57
+ add_token entity(string, pos: pos)
41
58
  i += string.length + 4
42
59
 
43
- elsif REGEXP_START === char && string = match_regexp_at(i)
44
- tokens << regexp(string, pos: i)
60
+ elsif NUMBER_START === char && string = match_number_at(i)
61
+ add_token word(string, pos: pos)
45
62
  i += string.length
46
63
 
47
- elsif WHITESPACE === char && string = match_whitespace_at(i)
48
- tokens << whitespace(string, pos: i)
49
- i += string.length
64
+ elsif EMOJI_START === char && string = match_emoji_at(i)
65
+ add_token emoji(string, pos: pos)
66
+ i += string.length + 2
50
67
 
51
- elsif NUMBER_START === char && string = match_number_at(i)
52
- tokens << word(string, pos: i)
68
+ elsif REGEXP_START === char && string = match_regexp_at(i)
69
+ add_token regexp(string, pos: pos)
53
70
  i += string.length
54
71
 
55
- elsif PUNCTUATION === char # =~ /\W/
56
- tokens << punctuation(char, pos: i)
72
+ elsif PUNCTUATION === char
73
+ add_token punctuation(char, pos: pos)
57
74
  i += 1
58
75
 
59
- else
60
- string = match_word_at(i)
61
- if Attentive.invocations.member?(string)
62
- tokens << invocation(string, pos: i)
63
-
64
- elsif replace_with = Attentive::ABBREVIATIONS[string]
65
- tokens.concat self.class.tokenize(replace_with, options)
66
-
67
- elsif expands_to = Attentive::CONTRACTIONS[string]
68
- possibilities = expands_to.map do |possibility|
69
- self.class.tokenize(possibility, options)
70
- end
71
-
72
- if possibilities.length == 1
73
- tokens.concat possibilities[0]
74
- else
75
- tokens << any_of(string, possibilities, pos: i)
76
- end
77
-
78
- else
79
- tokens << word(string, pos: i)
80
- end
76
+ else string = match_word_at(i)
77
+ add_token word(string, pos: pos)
81
78
  i += string.length
79
+
82
80
  end
83
81
  end
84
82
 
85
83
  fail_if_ambiguous!(message, tokens) if fail_if_ambiguous?
84
+
86
85
  Attentive::Phrase.new(tokens)
87
86
  end
88
87
 
89
88
 
90
89
 
90
+ private
91
+ attr_reader :tokens
92
+
91
93
  def match_emoji_at(i)
92
94
  emoji = ""
93
95
  while (i += 1) < chars.length
@@ -163,20 +165,32 @@ module Attentive
163
165
 
164
166
 
165
167
 
166
- def match_entities?
167
- options.fetch(:entities, false)
168
+ def add_token(token)
169
+ @tokens << token
170
+ return unless perform_substitutions?
171
+ @leaves = add_token_to_leaves token, @leaves
168
172
  end
169
173
 
170
- def match_regexps?
171
- options.fetch(:regexps, false)
174
+ def add_token_to_leaves(token, leaves)
175
+ (leaves + [Attentive.substitutions]).each_with_object([]) do |leaf, new_leaves|
176
+ if new_leaf = leaf[token]
177
+ if new_leaf.fin?
178
+ i = -1 - leaf.depth
179
+ offset = tokens[i].begin
180
+ replacement = new_leaf.fin.dup.each { |token| token.begin += offset }
181
+ tokens[i..-1] = replacement
182
+ return add_token_to_leaves replacement.last, []
183
+ else
184
+ new_leaves.push new_leaf
185
+ end
186
+ end
187
+ end
172
188
  end
173
189
 
174
- def fail_if_ambiguous?
175
- !options.fetch(:ambiguous, true)
176
- end
190
+
177
191
 
178
192
  WHITESPACE = /\s/.freeze
179
- PUNCTUATION = /[^\s\w'@-]/.freeze
193
+ PUNCTUATION = /[^\sa-z0-9_]/.freeze
180
194
  EMOJI_START = ":".freeze
181
195
  EMOJI_END = ":".freeze
182
196
  ENTITY_START = "{".freeze
@@ -186,7 +200,12 @@ module Attentive
186
200
  CONDITIONAL_NUMBER_START = /[\.\-]/.freeze
187
201
  NUMBER = /\d/.freeze
188
202
  CONDITIONAL_NUMBER = /[\.,]/.freeze
189
- WORD = /[\w'\-@]/.freeze
203
+ WORD = /[a-z0-9_]/.freeze
204
+ CHARACTER_SUBSTITIONS = {
205
+ "“" => "\"",
206
+ "”" => "\"",
207
+ "‘" => "'",
208
+ "’" => "'" }.freeze
190
209
 
191
210
  def fail_if_ambiguous!(phrase, tokens)
192
211
  ambiguous_token = tokens.find(&:ambiguous?)
@@ -204,5 +223,6 @@ end
204
223
  # Attentive::Tokenizer needs to be defined first...
205
224
  require "attentive/entity"
206
225
  require "attentive/composite_entity"
226
+ require "attentive/substitutions"
207
227
 
208
228
  require "attentive/entities/core"
@@ -21,12 +21,12 @@ module Attentive
21
21
 
22
22
  # Find the first token following the match
23
23
  new_character_index = cursor.offset + match_data.to_s.length
24
- cursor_pos = cursor.tokens.index { |token| token.pos >= new_character_index }
24
+ cursor_pos = cursor.tokens.index { |token| token.begin >= new_character_index }
25
25
  cursor_pos = cursor.tokens.length unless cursor_pos
26
26
 
27
27
  # If the match ends in the middle of a token, treat it as a mismatch
28
28
  match_end_token = cursor.tokens[cursor_pos - 1]
29
- return false if match_end_token.pos + match_end_token.length > new_character_index
29
+ return false if match_end_token.begin + match_end_token.length > new_character_index
30
30
 
31
31
  # Advance the cursor to the first token after the regexp match
32
32
  cursor.advance cursor_pos - cursor.pos
@@ -9,8 +9,9 @@ module Attentive
9
9
  Attentive::Tokens::Emoji.new string, pos
10
10
  end
11
11
 
12
- def entity(entity_name, variable_name=entity_name, pos: nil)
13
- Attentive::Entity[entity_name.to_sym].new(variable_name)
12
+ def entity(string, pos: nil)
13
+ entity_name, variable_name = *string.split(":").reverse
14
+ Attentive::Entity[entity_name.to_sym].new(variable_name || entity_name)
14
15
  end
15
16
 
16
17
  def invocation(string, pos: nil)
@@ -0,0 +1,45 @@
1
+ module Attentive
2
+ class Trie
3
+ attr_reader :depth
4
+
5
+ def initialize(depth: 0)
6
+ @depth = depth
7
+ @children = {}
8
+ end
9
+
10
+ def [](token)
11
+ @children[token]
12
+ end
13
+
14
+ def add(token)
15
+ raise "Can't add #{token.inspect} to trie because this leaf is a terminus" if fin?
16
+ @children[token] ||= self.class.new(depth: depth + 1)
17
+ end
18
+
19
+ def fin?
20
+ @children.key?(:fin)
21
+ end
22
+
23
+ def fin
24
+ @children[:fin]
25
+ end
26
+
27
+ def fin!(finish)
28
+ @children[:fin] = finish
29
+ end
30
+
31
+
32
+
33
+ def self.of_substitutions(substitutions)
34
+ substitutions.each_with_object(self.new) do |(tokens, substitution), trie|
35
+ leaf = trie
36
+ tokens.each_with_index do |token, i|
37
+ raise "#{tokens.join} contains #{tokens[0...i].join}" if leaf.fin?
38
+ leaf = leaf.add token
39
+ end
40
+ leaf.fin! substitution
41
+ end
42
+ end
43
+
44
+ end
45
+ end
@@ -1,3 +1,3 @@
1
1
  module Attentive
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/attentive.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require "attentive/version"
2
2
  require "attentive/config"
3
3
 
4
+
4
5
  module Attentive
5
6
  extend Attentive::Config
6
7
 
@@ -16,6 +17,31 @@ module Attentive
16
17
 
17
18
 
18
19
 
20
+ # Recognizes entities in a phrase
21
+ def self.abstract(message)
22
+ message = Attentive::Message.new(message)
23
+ entities = Attentive::Entity.entities.map { |entity| Attentive::Phrase.new([entity.new]) }
24
+ i = 0
25
+ while i < message.tokens.length
26
+ entities.each do |entity|
27
+ match = Attentive::Matcher.new(entity, Cursor.new(message, i)).match!
28
+ next unless match
29
+
30
+ i = match.replace_with(entity)
31
+ break
32
+ end
33
+ i += 1
34
+ end
35
+ message.tokens.to_s
36
+ end
37
+
38
+ # Shorthand for tokenizer
39
+ def self.tokenize(message, options={})
40
+ Attentive::Tokenizer.tokenize(message, options)
41
+ end
42
+
43
+
44
+
19
45
  # Attentive DSL
20
46
 
21
47
  def listeners
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: attentive
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bob Lail
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-15 00:00:00.000000000 Z
11
+ date: 2016-05-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thread_safe
@@ -153,14 +153,18 @@ files:
153
153
  - bin/console
154
154
  - bin/setup
155
155
  - lib/attentive.rb
156
- - lib/attentive/abbreviations.rb
157
156
  - lib/attentive/composite_entity.rb
158
157
  - lib/attentive/config.rb
159
- - lib/attentive/contractions.rb
160
158
  - lib/attentive/cursor.rb
161
159
  - lib/attentive/entities/core.rb
162
160
  - lib/attentive/entities/core/date.rb
161
+ - lib/attentive/entities/core/date/explicit.rb
162
+ - lib/attentive/entities/core/date/future.rb
163
163
  - lib/attentive/entities/core/date/month.rb
164
+ - lib/attentive/entities/core/date/partial.rb
165
+ - lib/attentive/entities/core/date/partial/future.rb
166
+ - lib/attentive/entities/core/date/partial/past.rb
167
+ - lib/attentive/entities/core/date/past.rb
164
168
  - lib/attentive/entities/core/date/relative.rb
165
169
  - lib/attentive/entities/core/date/relative/future.rb
166
170
  - lib/attentive/entities/core/date/relative/past.rb
@@ -183,7 +187,7 @@ files:
183
187
  - lib/attentive/matcher.rb
184
188
  - lib/attentive/message.rb
185
189
  - lib/attentive/phrase.rb
186
- - lib/attentive/text.rb
190
+ - lib/attentive/substitutions.rb
187
191
  - lib/attentive/token.rb
188
192
  - lib/attentive/tokenizer.rb
189
193
  - lib/attentive/tokens.rb
@@ -194,6 +198,7 @@ files:
194
198
  - lib/attentive/tokens/regexp.rb
195
199
  - lib/attentive/tokens/whitespace.rb
196
200
  - lib/attentive/tokens/word.rb
201
+ - lib/attentive/trie.rb
197
202
  - lib/attentive/version.rb
198
203
  homepage: https://github.com/houston/attentive
199
204
  licenses:
@@ -1,3 +0,0 @@
1
- module Attentive
2
- ABBREVIATIONS = {"bye"=>"goodbye", "gonna"=>"going to", "hi"=>"hello", "ol'"=>"old", "'sup"=>"what is up", "thanks"=>"thank you", "wanna"=>"want to", "mon"=>"monday", "tue"=>"tuesday", "tues"=>"tuesday", "wed"=>"wednesday", "thu"=>"thursday", "thur"=>"thursday", "thurs"=>"thursday", "fri"=>"friday", "sat"=>"saturday", "sun"=>"sunday", "jan"=>"january", "feb"=>"february", "mar"=>"march", "apr"=>"april", "jun"=>"june", "jul"=>"july", "aug"=>"august", "sep"=>"september", "sept"=>"september", "oct"=>"october", "nov"=>"november", "dec"=>"december"}.freeze
3
- end
@@ -1,3 +0,0 @@
1
- module Attentive
2
- CONTRACTIONS = {"ain't"=>["am not"], "aren't"=>["are not"], "can't"=>["can not"], "cannot"=>["can not"], "could've"=>["could have"], "couldn't"=>["could not"], "couldn't've"=>["could not have"], "didn't"=>["did not"], "doesn't"=>["does not"], "don't"=>["do not"], "hadn't"=>["had not"], "hadn't've"=>["had not have"], "hasn't"=>["has not"], "haven't"=>["have not"], "he'd"=>["he had", "he would"], "he'd've"=>["he would have"], "he'll"=>["he will", "he shall"], "he's"=>["he is", "he has"], "he'sn't"=>["he is not", "he has not"], "how'd"=>["how did", "how would"], "how'll"=>["how will"], "how's"=>["how is", "how has", "how does"], "i'd"=>["i would", "i had"], "i'd've"=>["i would have"], "i'll"=>["i shall", "i will"], "i'm"=>["i am"], "i've"=>["i have"], "i'ven't"=>["i have not"], "isn't"=>["is not"], "it'd"=>["it would", "it had"], "it'd've"=>["it would have"], "it'll"=>["it will", "it shall"], "it's"=>["it is", "it has"], "it'sn't"=>["it is not", "it has not"], "let's"=>["let us"], "ma'am"=>["madam"], "mightn't"=>["might not"], "mightn't've"=>["might not have"], "might've"=>["might have"], "mustn't"=>["must not"], "must've"=>["must have"], "needn't"=>["need not"], "not've"=>["not have"], "o'clock"=>["of the clock"], "oughtn't"=>["ought not"], "shan't"=>["shall not"], "she'd"=>["she had", "she would"], "she'd've"=>["she would have"], "she'll"=>["she shall", "she will"], "she's"=>["she is", "she has"], "she'sn't"=>["she is not", "she has not"], "should've"=>["should have"], "shouldn't"=>["should not"], "shouldn't've"=>["should not have"], "somebody'd"=>["somebody had", "somebody would"], "somebody'd've"=>["somebody would have"], "somebody'dn't've"=>["somebody would not have"], "somebody'll"=>["somebody shall", "somebody will"], "somebody's"=>["somebody is", "somebody has"], "someone'd"=>["someone had", "someone would"], "someone'd've"=>["someone would have"], "someone'll"=>["someone shall", "someone will"], "someone's"=>["someone is", "someone has"], "something'd"=>["something had", "something would"], "something'd've"=>["something would have"], "something'll"=>["something shall", "something will"], "something's"=>["something is", "something has"], "that'll"=>["that will"], "that's"=>["that is", "that has"], "there'd"=>["there had", "there would"], "there'd've"=>["there would have"], "there're"=>["there are"], "there's"=>["there is", "there has"], "they'd"=>["they would", "they had"], "they'dn't"=>["they would not"], "they'dn't've"=>["they would not have"], "they'd've"=>["they would have"], "they'd'ven't"=>["they would have not"], "they'll"=>["they shall", "they will"], "they'lln't've"=>["they will not have"], "they'll'ven't"=>["they will have not"], "they're"=>["they are"], "they've"=>["they have"], "they'ven't"=>["they have not"], "'tis"=>["it is"], "'twas"=>["it was"], "wasn't"=>["was not"], "we'd"=>["we had", "we would"], "we'd've"=>["we would have"], "we'dn't've"=>["we would not have"], "we'll"=>["we will"], "we'lln't've"=>["we will not have"], "we're"=>["we are"], "we've"=>["we have"], "weren't"=>["were not"], "what'll"=>["what shall", "what will"], "what're"=>["what are"], "what's"=>["what is", "what does", "what has"], "what've"=>["what have"], "when's"=>["when is", "when has"], "where'd"=>["where did"], "where's"=>["where is", "where does", "where has"], "where've"=>["where have"], "who'd"=>["who would", "who had"], "who'd've"=>["who would have"], "who'll"=>["who shall", "who will"], "who're"=>["who are"], "who's"=>["who is", "who has"], "who've"=>["who have"], "why'll"=>["why will"], "why're"=>["why are"], "why's"=>["why is", "why has"], "won't"=>["will not"], "won't've"=>["will not have"], "would've"=>["would have"], "wouldn't"=>["would not"], "wouldn't've"=>["would not have"], "y'all"=>["you all"], "y'all'd've"=>["you all would have"], "y'all'dn't've"=>["you all would not have"], "y'all'll"=>["you all will"], "y'all'lln't"=>["you all will not"], "y'all'll've"=>["you all will have"], "y'all'll'ven't"=>["you all will have not"], "you'd"=>["you had", "you would"], "you'd've"=>["you would have"], "you'll"=>["you shall", "you will"], "you're"=>["you are"], "you'ren't"=>["you are not"], "you've"=>["you have"], "you'ven't"=>["you have not"]}.freeze
3
- end
@@ -1,18 +0,0 @@
1
- module Attentive
2
- module Text
3
- extend self
4
-
5
- def normalize(text)
6
- straighten_quotes downcase text
7
- end
8
-
9
- def downcase(text)
10
- text.downcase
11
- end
12
-
13
- def straighten_quotes(text)
14
- text.gsub(/[“”]/, "\"").gsub(/[‘’]/, "'")
15
- end
16
-
17
- end
18
- end