attentive 0.1.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+ require "attentive/match"
2
+
3
+ module Attentive
4
+ class Matcher
5
+ attr_reader :phrase, :cursor, :pos
6
+
7
+ def initialize(phrase, cursor, params={})
8
+ @phrase = phrase
9
+ @cursor = cursor
10
+ @pos = params.fetch(:pos, 0)
11
+ @pos += 1 while phrase[pos] && phrase[pos].whitespace?
12
+ @match_data = {}
13
+ @state = :matching
14
+ end
15
+
16
+ def matching?
17
+ @state == :matching
18
+ end
19
+
20
+ def mismatch?
21
+ @state == :mismatch
22
+ end
23
+
24
+ def match!
25
+ while token = cursor.peek
26
+ if token.ambiguous?
27
+ unless match_subphrase!(token.possibilities)
28
+ @state = :mismatch
29
+ break
30
+ end
31
+ @pos += 1 while phrase[pos] && phrase[pos].whitespace?
32
+
33
+ elsif match_data = phrase[pos].matches?(cursor)
34
+ if match_data.is_a?(MatchData)
35
+ new_character_index = cursor.offset + match_data.to_s.length
36
+ @match_data.merge! Hash[match_data.names.zip(match_data.captures)]
37
+
38
+ # Advance the cursor to the first token after the regexp match
39
+ cursor_pos = cursor.tokens.index { |token| token.pos >= new_character_index }
40
+ cursor_pos = cursor.tokens.length unless cursor_pos
41
+ cursor.instance_variable_set :@pos, cursor_pos
42
+ @pos += 1
43
+ else
44
+ @match_data.merge!(match_data) unless match_data == true
45
+ @pos += 1
46
+ end
47
+ @pos += 1 while phrase[pos] && phrase[pos].whitespace?
48
+ @state = :found
49
+ # puts "matched #{phrase.inspect}"
50
+ return Attentive::Match.new(phrase, match_data: @match_data) if pos == phrase.length
51
+
52
+ elsif !token.skippable?
53
+ @state = :mismatch
54
+ break
55
+ end
56
+
57
+ cursor.pop
58
+ break unless cursor.peek
59
+ while cursor.peek.whitespace?
60
+ cursor.pop
61
+ break unless cursor.peek
62
+ end
63
+ end
64
+
65
+ nil
66
+ end
67
+
68
+ def match_subphrase!(subphrases)
69
+ subphrases.each do |subphrase|
70
+ matcher = Matcher.new(phrase, Cursor.new(subphrase), pos: pos)
71
+ matcher.match!
72
+ unless matcher.mismatch?
73
+ @pos = matcher.pos
74
+ return true
75
+ end
76
+ end
77
+
78
+ false
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,24 @@
1
+ require "set"
2
+ require "attentive/tokenizer"
3
+
4
+ module Attentive
5
+ class Message
6
+ attr_reader :contexts, :text
7
+
8
+ def initialize(text, params)
9
+ @text = text
10
+ @contexts = Set.new(params.fetch(:contexts, []))
11
+ end
12
+
13
+ def tokens
14
+ @tokens ||= Attentive::Tokenizer.tokenize(text)
15
+ end
16
+
17
+ alias :to_s :text
18
+
19
+ def inspect
20
+ tokens.inspect
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ require "delegate"
2
+
3
+ module Attentive
4
+ class Phrase < SimpleDelegator
5
+
6
+ def initialize(tokens)
7
+ super tokens
8
+ end
9
+
10
+ def to_s
11
+ join
12
+ end
13
+
14
+ def inspect
15
+ "\"#{to_s}\""
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,57 @@
1
+ module Attentive
2
+ module Text
3
+ extend self
4
+
5
+ def normalize(text)
6
+ straighten_quotes downcase text
7
+ end
8
+
9
+ def downcase(text)
10
+ text.downcase
11
+ end
12
+
13
+ def straighten_quotes(text)
14
+ text.gsub(/[“”]/, "\"").gsub(/[‘’]/, "'")
15
+ end
16
+
17
+
18
+
19
+ DATA_PATH = File.expand_path(File.dirname(__FILE__) + "/../../data").freeze
20
+
21
+ CONTRACTIONS = {}.tap do |contractions|
22
+ File.open(DATA_PATH + "/contractions.tsv") do |file|
23
+ file.each do |line|
24
+ next if line.start_with?("#") # skip comments
25
+ next if line == "\n" # skip blank lines
26
+
27
+ # the file contains tab-separated values.
28
+ # the first value is the contraction.
29
+ # the remaining values are possible phrases that match it
30
+ phrases = line.chomp.split("\t")
31
+ raise "#{line.inspect} must have exactly two values" unless phrases.length >= 2
32
+
33
+ contractions[phrases.shift] = phrases
34
+ end
35
+ end
36
+ end.freeze
37
+
38
+ SLANG = {}.tap do |slang|
39
+ File.open(DATA_PATH + "/slang.tsv") do |file|
40
+ file.each do |line|
41
+ next if line.start_with?("#") # skip comments
42
+ next if line == "\n" # skip blank lines
43
+
44
+ # the file contains tab-separated values.
45
+ # every line should have exactly two values:
46
+ # + the first is the slang word
47
+ # + the second is the normal word
48
+ words = line.chomp.split("\t")
49
+ raise "#{line.inspect} must have exactly two values" unless words.length == 2
50
+
51
+ slang[words[0]] = words[1]
52
+ end
53
+ end
54
+ end.freeze
55
+
56
+ end
57
+ end
@@ -0,0 +1,58 @@
1
+ module Attentive
2
+ class Token
3
+ attr_reader :pos
4
+
5
+ def initialize(pos)
6
+ @pos = pos
7
+ end
8
+
9
+ def ==(other)
10
+ self.class == other.class
11
+ end
12
+
13
+ def ambiguous?
14
+ false
15
+ end
16
+
17
+ def entity?
18
+ false
19
+ end
20
+
21
+ def whitespace?
22
+ false
23
+ end
24
+
25
+ def skippable?
26
+ false
27
+ end
28
+
29
+ def matches?(cursor)
30
+ self == cursor.peek
31
+ end
32
+
33
+ end
34
+
35
+
36
+
37
+ class StringToken < Token
38
+ attr_reader :string
39
+
40
+ def initialize(string, pos)
41
+ @string = string
42
+ super pos
43
+ end
44
+
45
+ def to_str
46
+ to_s
47
+ end
48
+
49
+ def to_s
50
+ string
51
+ end
52
+
53
+ def ==(other)
54
+ self.class == other.class && self.string == other.string
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,161 @@
1
+ require "attentive/text"
2
+ require "attentive/tokens"
3
+ require "attentive/phrase"
4
+ require "attentive/errors"
5
+
6
+ module Attentive
7
+ class Tokenizer
8
+ extend Attentive::Tokens
9
+
10
+ # Splits apart words and punctuation,
11
+ # treats apostrophes and dashes as a word-characters,
12
+ # trims each fragment of whitepsace
13
+ # SPLITTER = /\s*([\w'-]+)\s*/.freeze
14
+ SPLITTER = /(\n|{{|}}|\s+|\.{2,}|[^\s\w'@-])/.freeze
15
+ PUNCTUATION = /^\W+$/.freeze
16
+ WHITESPACE = /^\s+$/.freeze
17
+ ME = "@me".freeze
18
+ ENTITY_START = "{{".freeze
19
+ ENTITY_END = "}}".freeze
20
+ REGEXP_START = "(".freeze
21
+ REGEXP_END = ")".freeze
22
+ REGEXP_ESCAPE = "\\".freeze
23
+
24
+
25
+ def self.split(message)
26
+ Attentive::Text.normalize(message).split(SPLITTER).reject(&:empty?)
27
+ end
28
+
29
+
30
+ def self.tokenize(message, options={})
31
+ match_entities = options.fetch(:entities, false)
32
+ match_regexps = options.fetch(:regexps, false)
33
+ fail_if_ambiguous = !options.fetch(:ambiguous, true)
34
+ strings = split(message)
35
+ tokens = []
36
+ i = 0
37
+ pos = 0
38
+ while i < strings.length
39
+ string = strings[i]
40
+ case string
41
+ when ""
42
+ # do nothing
43
+
44
+ when WHITESPACE
45
+ tokens << whitespace(string, pos: pos)
46
+
47
+ when ":"
48
+ if strings[i + 2] == ":"
49
+ tokens << emoji(strings[i + 1], pos: pos)
50
+ pos += strings[i + 1].length + 1
51
+ i += 2
52
+ else
53
+ tokens << punctuation(":", pos: pos)
54
+ end
55
+
56
+ when ENTITY_START
57
+ if match_entities
58
+ j = i + 1
59
+ found_entity = false
60
+ while j < strings.length
61
+ if strings[j] == ENTITY_END
62
+ entity = strings[(i + 1)...j] # e.g. ["variable-name", ":" "entity-type"]
63
+ tokens << entity(*entity.join.split(":").reverse, pos: pos)
64
+ i = j + 1
65
+ pos += entity.join.length + 4
66
+ found_entity = true
67
+ break
68
+ end
69
+ j += 1
70
+ end
71
+ next if found_entity
72
+ end
73
+ tokens << punctuation(ENTITY_START, pos: pos)
74
+
75
+ when REGEXP_START
76
+ if match_regexps && strings[i + 1] == "?"
77
+ j = i + 2
78
+ found_regexp = false
79
+ parens = 1
80
+ inside_square_bracket = false
81
+ while j < strings.length
82
+ if strings[j] == "[" && strings[j - 1] != REGEXP_ESCAPE
83
+ inside_square_bracket = true
84
+ elsif strings[j] == "]" && strings[j - 1] != REGEXP_ESCAPE
85
+ inside_square_bracket = false
86
+ end
87
+
88
+ unless inside_square_bracket
89
+ if strings[j] == REGEXP_START && strings[j - 1] != REGEXP_ESCAPE
90
+ parens += 1
91
+ elsif strings[j] == REGEXP_END && strings[j - 1] != REGEXP_ESCAPE
92
+ parens -= 1
93
+ end
94
+
95
+ if parens == 0
96
+ tokens << regexp(strings[i..j].join, pos: pos)
97
+ pos += strings[i..j].join.length + 2
98
+ i = j + 1
99
+ found_regexp = true
100
+ break
101
+ end
102
+ end
103
+ j += 1
104
+ end
105
+ next if found_regexp
106
+ end
107
+ tokens << punctuation(REGEXP_START, pos: pos)
108
+
109
+ when PUNCTUATION
110
+ tokens << punctuation(string, pos: pos)
111
+
112
+ when ME
113
+ tokens << me(pos: pos)
114
+
115
+ else
116
+ if replace_with = Attentive::Text::SLANG[string]
117
+ tokens.concat tokenize(replace_with, options)
118
+
119
+ elsif expands_to = Attentive::Text::CONTRACTIONS[string]
120
+ possibilities = expands_to.map do |possibility|
121
+ tokenize(possibility, options)
122
+ end
123
+
124
+ if possibilities.length == 1
125
+ tokens.concat possibilities[0]
126
+ else
127
+ tokens << any_of(possibilities, pos: pos)
128
+ end
129
+ else
130
+ tokens << word(string, pos: pos)
131
+ end
132
+ end
133
+
134
+ i += 1
135
+ pos += string.length
136
+ end
137
+
138
+ fail_if_ambiguous!(message, tokens) if fail_if_ambiguous
139
+
140
+ Attentive::Phrase.new(tokens)
141
+ end
142
+
143
+ def self.fail_if_ambiguous!(phrase, tokens)
144
+ ambiguous_token = tokens.find(&:ambiguous?)
145
+ return unless ambiguous_token
146
+
147
+ raise Attentive::AmbiguousPhraseError.new(
148
+ "The phrase #{phrase.inspect} is ambiguous. " <<
149
+ "Please use #{ambiguous_token.possibilities.map(&:inspect).join(" or ")}")
150
+ end
151
+
152
+ end
153
+ end
154
+
155
+ # Not the perfect place for these...
156
+ # Attentive::Tokenizer needs to be defined first...
157
+ require "attentive/entity"
158
+ require "attentive/composite_entity"
159
+
160
+ require "attentive/entities/integer"
161
+ require "attentive/entities/relative_date"
@@ -0,0 +1,23 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class AnyOf < Token
6
+ attr_reader :possibilities
7
+
8
+ def initialize(possibilities, pos)
9
+ @possibilities = possibilities
10
+ super pos
11
+ end
12
+
13
+ def ==(other)
14
+ self.class == other.class && self.possibilities == other.possibilities
15
+ end
16
+
17
+ def ambiguous?
18
+ true
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,17 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Emoji < StringToken
6
+
7
+ def to_s
8
+ ":#{string}:"
9
+ end
10
+
11
+ def skippable?
12
+ true
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Me < Token
6
+
7
+ def to_s
8
+ Attentive::Tokenizer::ME
9
+ end
10
+
11
+ def skippable?
12
+ true
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,13 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Punctuation < StringToken
6
+
7
+ def skippable?
8
+ true
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,27 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Regexp < Token
6
+ attr_reader :regexp
7
+
8
+ def initialize(string, pos)
9
+ @regexp = ::Regexp.compile("^#{string}")
10
+ super pos
11
+ end
12
+
13
+ def ==(other)
14
+ self.class == other.class && self.regexp == other.regexp
15
+ end
16
+
17
+ def matches?(cursor)
18
+ regexp.match(cursor.to_s)
19
+ end
20
+
21
+ def to_s
22
+ regexp.inspect[1...-1]
23
+ end
24
+
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,22 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Whitespace < StringToken
6
+
7
+ # All whitespace is equal
8
+ def ==(other)
9
+ self.class == other.class
10
+ end
11
+
12
+ def skippable?
13
+ true
14
+ end
15
+
16
+ def whitespace?
17
+ true
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,8 @@
1
+ require "attentive/token"
2
+
3
+ module Attentive
4
+ module Tokens
5
+ class Word < StringToken
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,45 @@
1
+ module Attentive
2
+ module Tokens
3
+
4
+ def any_of(possibilities, pos: nil)
5
+ Attentive::Tokens::AnyOf.new possibilities, pos
6
+ end
7
+
8
+ def emoji(string, pos: nil)
9
+ Attentive::Tokens::Emoji.new string, pos
10
+ end
11
+
12
+ def entity(entity_name, variable_name=entity_name, pos: nil)
13
+ Attentive::Entity[entity_name.to_sym].new(variable_name)
14
+ end
15
+
16
+ def me(pos: nil)
17
+ Attentive::Tokens::Me.new pos
18
+ end
19
+
20
+ def punctuation(string, pos: nil)
21
+ Attentive::Tokens::Punctuation.new string, pos
22
+ end
23
+
24
+ def regexp(string, pos: nil)
25
+ Attentive::Tokens::Regexp.new string, pos
26
+ end
27
+
28
+ def whitespace(string, pos: nil)
29
+ Attentive::Tokens::Whitespace.new string, pos
30
+ end
31
+
32
+ def word(string, pos: nil)
33
+ Attentive::Tokens::Word.new string, pos
34
+ end
35
+
36
+ end
37
+ end
38
+
39
+ require "attentive/tokens/any_of"
40
+ require "attentive/tokens/emoji"
41
+ require "attentive/tokens/me"
42
+ require "attentive/tokens/punctuation"
43
+ require "attentive/tokens/regexp"
44
+ require "attentive/tokens/whitespace"
45
+ require "attentive/tokens/word"
@@ -0,0 +1,3 @@
1
+ module Attentive
2
+ VERSION = "0.1.0.beta1"
3
+ end
data/lib/attentive.rb ADDED
@@ -0,0 +1,20 @@
1
+ require "attentive/version"
2
+ require "attentive/listener_collection"
3
+ require "attentive/message"
4
+
5
+ module Attentive
6
+
7
+ def listeners
8
+ @listeners ||= Attentive::ListenerCollection.new
9
+ end
10
+
11
+ def listen_for(*args, &block)
12
+ listeners.listen_for(*args, &block)
13
+ end
14
+
15
+ def hear(message, params={})
16
+ message = Attentive::Message.new(message, params) unless message.is_a?(Attentive::Message)
17
+ listeners.hear message
18
+ end
19
+
20
+ end