attentive 0.1.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ require "attentive/match"
2
+
3
+ module Attentive
4
+ class Matcher
5
+ attr_reader :phrase, :cursor, :pos
6
+
7
+ def initialize(phrase, cursor, params={})
8
+ @phrase = phrase
9
+ @cursor = cursor
10
+ @pos = params.fetch(:pos, 0)
11
+ @pos += 1 while phrase[pos] && phrase[pos].whitespace?
12
+ @match_data = {}
13
+ @state = :matching
14
+ end
15
+
16
+ def matching?
17
+ @state == :matching
18
+ end
19
+
20
+ def mismatch?
21
+ @state == :mismatch
22
+ end
23
+
24
+ def match!
25
+ while token = cursor.peek
26
+ if token.ambiguous?
27
+ unless match_subphrase!(token.possibilities)
28
+ @state = :mismatch
29
+ break
30
+ end
31
+ @pos += 1 while phrase[pos] && phrase[pos].whitespace?
32
+
33
+ elsif match_data = phrase[pos].matches?(cursor)
34
+ if match_data.is_a?(MatchData)
35
+ new_character_index = cursor.offset + match_data.to_s.length
36
+ @match_data.merge! Hash[match_data.names.zip(match_data.captures)]
37
+
38
+ # Advance the cursor to the first token after the regexp match
39
+ cursor_pos = cursor.tokens.index { |token| token.pos >= new_character_index }
40
+ cursor_pos = cursor.tokens.length unless cursor_pos
41
+ cursor.instance_variable_set :@pos, cursor_pos
42
+ @pos += 1
43
+ else
44
+ @match_data.merge!(match_data) unless match_data == true
45
+ @pos += 1
46
+ end
47
+ @pos += 1 while phrase[pos] && phrase[pos].whitespace?
48
+ @state = :found
49
+ # puts "matched #{phrase.inspect}"
50
+ return Attentive::Match.new(phrase, match_data: @match_data) if pos == phrase.length
51
+
52
+ elsif !token.skippable?
53
+ @state = :mismatch
54
+ break
55
+ end
56
+
57
+ cursor.pop
58
+ break unless cursor.peek
59
+ while cursor.peek.whitespace?
60
+ cursor.pop
61
+ break unless cursor.peek
62
+ end
63
+ end
64
+
65
+ nil
66
+ end
67
+
68
+ def match_subphrase!(subphrases)
69
+ subphrases.each do |subphrase|
70
+ matcher = Matcher.new(phrase, Cursor.new(subphrase), pos: pos)
71
+ matcher.match!
72
+ unless matcher.mismatch?
73
+ @pos = matcher.pos
74
+ return true
75
+ end
76
+ end
77
+
78
+ false
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,24 @@
1
+ require "set"
2
+ require "attentive/tokenizer"
3
+
4
+ module Attentive
5
+ class Message
6
+ attr_reader :contexts, :text
7
+
8
+ def initialize(text, params)
9
+ @text = text
10
+ @contexts = Set.new(params.fetch(:contexts, []))
11
+ end
12
+
13
+ def tokens
14
+ @tokens ||= Attentive::Tokenizer.tokenize(text)
15
+ end
16
+
17
+ alias :to_s :text
18
+
19
+ def inspect
20
+ tokens.inspect
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ require "delegate"
2
+
3
+ module Attentive
4
+ class Phrase < SimpleDelegator
5
+
6
+ def initialize(tokens)
7
+ super tokens
8
+ end
9
+
10
+ def to_s
11
+ join
12
+ end
13
+
14
+ def inspect
15
+ "\"#{to_s}\""
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,57 @@
1
+ module Attentive
2
+ module Text
3
+ extend self
4
+
5
+ def normalize(text)
6
+ straighten_quotes downcase text
7
+ end
8
+
9
+ def downcase(text)
10
+ text.downcase
11
+ end
12
+
13
+ def straighten_quotes(text)
14
+ text.gsub(/[“”]/, "\"").gsub(/[‘’]/, "'")
15
+ end
16
+
17
+
18
+
19
+ DATA_PATH = File.expand_path(File.dirname(__FILE__) + "/../../data").freeze
20
+
21
+ CONTRACTIONS = {}.tap do |contractions|
22
+ File.open(DATA_PATH + "/contractions.tsv") do |file|
23
+ file.each do |line|
24
+ next if line.start_with?("#") # skip comments
25
+ next if line == "\n" # skip blank lines
26
+
27
+ # the file contains tab-separated values.
28
+ # the first value is the contraction.
29
+ # the remaining values are possible phrases that match it
30
+ phrases = line.chomp.split("\t")
31
+ raise "#{line.inspect} must have exactly two values" unless phrases.length >= 2
32
+
33
+ contractions[phrases.shift] = phrases
34
+ end
35
+ end
36
+ end.freeze
37
+
38
+ SLANG = {}.tap do |slang|
39
+ File.open(DATA_PATH + "/slang.tsv") do |file|
40
+ file.each do |line|
41
+ next if line.start_with?("#") # skip comments
42
+ next if line == "\n" # skip blank lines
43
+
44
+ # the file contains tab-separated values.
45
+ # every line should have exactly two values:
46
+ # + the first is the slang word
47
+ # + the second is the normal word
48
+ words = line.chomp.split("\t")
49
+ raise "#{line.inspect} must have exactly two values" unless words.length == 2
50
+
51
+ slang[words[0]] = words[1]
52
+ end
53
+ end
54
+ end.freeze
55
+
56
+ end
57
+ end
@@ -0,0 +1,58 @@
1
+ module Attentive
2
+ class Token
3
+ attr_reader :pos
4
+
5
+ def initialize(pos)
6
+ @pos = pos
7
+ end
8
+
9
+ def ==(other)
10
+ self.class == other.class
11
+ end
12
+
13
+ def ambiguous?
14
+ false
15
+ end
16
+
17
+ def entity?
18
+ false
19
+ end
20
+
21
+ def whitespace?
22
+ false
23
+ end
24
+
25
+ def skippable?
26
+ false
27
+ end
28
+
29
+ def matches?(cursor)
30
+ self == cursor.peek
31
+ end
32
+
33
+ end
34
+
35
+
36
+
37
+ class StringToken < Token
38
+ attr_reader :string
39
+
40
+ def initialize(string, pos)
41
+ @string = string
42
+ super pos
43
+ end
44
+
45
+ def to_str
46
+ to_s
47
+ end
48
+
49
+ def to_s
50
+ string
51
+ end
52
+
53
+ def ==(other)
54
+ self.class == other.class && self.string == other.string
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,161 @@
1
+ require "attentive/text"
2
+ require "attentive/tokens"
3
+ require "attentive/phrase"
4
+ require "attentive/errors"
5
+
6
+ module Attentive
7
+ class Tokenizer
8
+ extend Attentive::Tokens
9
+
10
+ # Splits apart words and punctuation,
11
+ # treats apostrophes and dashes as a word-characters,
12
+ # trims each fragment of whitepsace
13
+ # SPLITTER = /\s*([\w'-]+)\s*/.freeze
14
+ SPLITTER = /(\n|{{|}}|\s+|\.{2,}|[^\s\w'@-])/.freeze
15
+ PUNCTUATION = /^\W+$/.freeze
16
+ WHITESPACE = /^\s+$/.freeze
17
+ ME = "@me".freeze
18
+ ENTITY_START = "{{".freeze
19
+ ENTITY_END = "}}".freeze
20
+ REGEXP_START = "(".freeze
21
+ REGEXP_END = ")".freeze
22
+ REGEXP_ESCAPE = "\\".freeze
23
+
24
+
25
+ def self.split(message)
26
+ Attentive::Text.normalize(message).split(SPLITTER).reject(&:empty?)
27
+ end
28
+
29
+
30
+ def self.tokenize(message, options={})
31
+ match_entities = options.fetch(:entities, false)
32
+ match_regexps = options.fetch(:regexps, false)
33
+ fail_if_ambiguous = !options.fetch(:ambiguous, true)
34
+ strings = split(message)
35
+ tokens = []
36
+ i = 0
37
+ pos = 0
38
+ while i < strings.length
39
+ string = strings[i]
40
+ case string
41
+ when ""
42
+ # do nothing
43
+
44
+ when WHITESPACE
45
+ tokens << whitespace(string, pos: pos)
46
+
47
+ when ":"
48
+ if strings[i + 2] == ":"
49
+ tokens << emoji(strings[i + 1], pos: pos)
50
+ pos += strings[i + 1].length + 1
51
+ i += 2
52
+ else
53
+ tokens << punctuation(":", pos: pos)
54
+ end
55
+
56
+ when ENTITY_START
57
+ if match_entities
58
+ j = i + 1
59
+ found_entity = false
60
+ while j < strings.length
61
+ if strings[j] == ENTITY_END
62
+ entity = strings[(i + 1)...j] # e.g. ["variable-name", ":" "entity-type"]
63
+ tokens << entity(*entity.join.split(":").reverse, pos: pos)
64
+ i = j + 1
65
+ pos += entity.join.length + 4
66
+ found_entity = true
67
+ break
68
+ end
69
+ j += 1
70
+ end
71
+ next if found_entity
72
+ end
73
+ tokens << punctuation(ENTITY_START, pos: pos)
74
+
75
+ when REGEXP_START
76
+ if match_regexps && strings[i + 1] == "?"
77
+ j = i + 2
78
+ found_regexp = false
79
+ parens = 1
80
+ inside_square_bracket = false
81
+ while j < strings.length
82
+ if strings[j] == "[" && strings[j - 1] != REGEXP_ESCAPE
83
+ inside_square_bracket = true
84
+ elsif strings[j] == "]" && strings[j - 1] != REGEXP_ESCAPE
85
+ inside_square_bracket = false
86
+ end
87
+
88
+ unless inside_square_bracket
89
+ if strings[j] == REGEXP_START && strings[j - 1] != REGEXP_ESCAPE
90
+ parens += 1
91
+ elsif strings[j] == REGEXP_END && strings[j - 1] != REGEXP_ESCAPE
92
+ parens -= 1
93
+ end
94
+
95
+ if parens == 0
96
+ tokens << regexp(strings[i..j].join, pos: pos)
97
+ pos += strings[i..j].join.length + 2
98
+ i = j + 1
99
+ found_regexp = true
100
+ break
101
+ end
102
+ end
103
+ j += 1
104
+ end
105
+ next if found_regexp
106
+ end
107
+ tokens << punctuation(REGEXP_START, pos: pos)
108
+
109
+ when PUNCTUATION
110
+ tokens << punctuation(string, pos: pos)
111
+
112
+ when ME
113
+ tokens << me(pos: pos)
114
+
115
+ else
116
+ if replace_with = Attentive::Text::SLANG[string]
117
+ tokens.concat tokenize(replace_with, options)
118
+
119
+ elsif expands_to = Attentive::Text::CONTRACTIONS[string]
120
+ possibilities = expands_to.map do |possibility|
121
+ tokenize(possibility, options)
122
+ end
123
+
124
+ if possibilities.length == 1
125
+ tokens.concat possibilities[0]
126
+ else
127
+ tokens << any_of(possibilities, pos: pos)
128
+ end
129
+ else
130
+ tokens << word(string, pos: pos)
131
+ end
132
+ end
133
+
134
+ i += 1
135
+ pos += string.length
136
+ end
137
+
138
+ fail_if_ambiguous!(message, tokens) if fail_if_ambiguous
139
+
140
+ Attentive::Phrase.new(tokens)
141
+ end
142
+
143
+ def self.fail_if_ambiguous!(phrase, tokens)
144
+ ambiguous_token = tokens.find(&:ambiguous?)
145
+ return unless ambiguous_token
146
+
147
+ raise Attentive::AmbiguousPhraseError.new(
148
+ "The phrase #{phrase.inspect} is ambiguous. " <<
149
+ "Please use #{ambiguous_token.possibilities.map(&:inspect).join(" or ")}")
150
+ end
151
+
152
+ end
153
+ end
154
+
155
+ # Not the perfect place for these...
156
+ # Attentive::Tokenizer needs to be defined first...
157
+ require "attentive/entity"
158
+ require "attentive/composite_entity"
159
+
160
+ require "attentive/entities/integer"
161
+ require "attentive/entities/relative_date"
@@ -0,0 +1,23 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class AnyOf < Token
6
+ attr_reader :possibilities
7
+
8
+ def initialize(possibilities, pos)
9
+ @possibilities = possibilities
10
+ super pos
11
+ end
12
+
13
+ def ==(other)
14
+ self.class == other.class && self.possibilities == other.possibilities
15
+ end
16
+
17
+ def ambiguous?
18
+ true
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Emoji < StringToken
6
+
7
+ def to_s
8
+ ":#{string}:"
9
+ end
10
+
11
+ def skippable?
12
+ true
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Me < Token
6
+
7
+ def to_s
8
+ Attentive::Tokenizer::ME
9
+ end
10
+
11
+ def skippable?
12
+ true
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Punctuation < StringToken
6
+
7
+ def skippable?
8
+ true
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,27 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Regexp < Token
6
+ attr_reader :regexp
7
+
8
+ def initialize(string, pos)
9
+ @regexp = ::Regexp.compile("^#{string}")
10
+ super pos
11
+ end
12
+
13
+ def ==(other)
14
+ self.class == other.class && self.regexp == other.regexp
15
+ end
16
+
17
+ def matches?(cursor)
18
+ regexp.match(cursor.to_s)
19
+ end
20
+
21
+ def to_s
22
+ regexp.inspect[1...-1]
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,22 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Whitespace < StringToken
6
+
7
+ # All whitespace is equal
8
+ def ==(other)
9
+ self.class == other.class
10
+ end
11
+
12
+ def skippable?
13
+ true
14
+ end
15
+
16
+ def whitespace?
17
+ true
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,8 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Word < StringToken
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,45 @@
1
+ module Attentive
2
+ module Tokens
3
+
4
+ def any_of(possibilities, pos: nil)
5
+ Attentive::Tokens::AnyOf.new possibilities, pos
6
+ end
7
+
8
+ def emoji(string, pos: nil)
9
+ Attentive::Tokens::Emoji.new string, pos
10
+ end
11
+
12
+ def entity(entity_name, variable_name=entity_name, pos: nil)
13
+ Attentive::Entity[entity_name.to_sym].new(variable_name)
14
+ end
15
+
16
+ def me(pos: nil)
17
+ Attentive::Tokens::Me.new pos
18
+ end
19
+
20
+ def punctuation(string, pos: nil)
21
+ Attentive::Tokens::Punctuation.new string, pos
22
+ end
23
+
24
+ def regexp(string, pos: nil)
25
+ Attentive::Tokens::Regexp.new string, pos
26
+ end
27
+
28
+ def whitespace(string, pos: nil)
29
+ Attentive::Tokens::Whitespace.new string, pos
30
+ end
31
+
32
+ def word(string, pos: nil)
33
+ Attentive::Tokens::Word.new string, pos
34
+ end
35
+
36
+ end
37
+ end
38
+
39
+ require "attentive/tokens/any_of"
40
+ require "attentive/tokens/emoji"
41
+ require "attentive/tokens/me"
42
+ require "attentive/tokens/punctuation"
43
+ require "attentive/tokens/regexp"
44
+ require "attentive/tokens/whitespace"
45
+ require "attentive/tokens/word"
@@ -0,0 +1,3 @@
1
+ module Attentive
2
+ VERSION = "0.1.0.beta1"
3
+ end
data/lib/attentive.rb ADDED
@@ -0,0 +1,20 @@
1
+ require "attentive/version"
2
+ require "attentive/listener_collection"
3
+ require "attentive/message"
4
+
5
+ module Attentive
6
+
7
+ def listeners
8
+ @listeners ||= Attentive::ListenerCollection.new
9
+ end
10
+
11
+ def listen_for(*args, &block)
12
+ listeners.listen_for(*args, &block)
13
+ end
14
+
15
+ def hear(message, params={})
16
+ message = Attentive::Message.new(message, params) unless message.is_a?(Attentive::Message)
17
+ listeners.hear message
18
+ end
19
+
20
+ end