answerific 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a6b5905e26045c85b62eadee1a917f7a6f33edb5
4
- data.tar.gz: b1721baa87c03b00cf36e614dc50561dc8473a35
3
+ metadata.gz: e81fef1ef9b77e8823ea1eea1f157e52c80af930
4
+ data.tar.gz: 9e806579b3bc22837c58daf63a4200eef9a698dd
5
5
  SHA512:
6
- metadata.gz: b30eed839c2149e237033351b458427bb56274d970db991977a689232a5cecd55e7ece89d78a8f1cd40c5ce72b8bb60622fb9f7cd159b87e45b8ac3f14359927
7
- data.tar.gz: 8b347d026f099cbb41b02b626bd524724edc135b7c61f1fd038b7af1efd93a66c302d1d4107e62b79b2009b90dc86b6f3fa70be826185e9d863de8aeae2053e3
6
+ metadata.gz: a5d1fd241089b7f2bd3feea69a193361684734bdd5012cbc8aee7992ac9d6fa2fb1081a2b7e3bd6d0a3b9d3a5d0bb10990262b2cf44b33dbade31e64c8635b88
7
+ data.tar.gz: a84357bd7aa62b49affcb427c7d5e1256bbb7fe1529b498cccb7a50f9694705aa2e5ac5168b0202f8ebde3d2dc9b22252fe19ad45f52546585ab633fd1a647cc
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Answerific
2
2
 
3
- AI Bot that can answer questions posed in natural language.
3
+ Mining bot that can answer natural language questions by mining the web.
4
4
 
5
5
  ## Installation
6
6
 
@@ -20,7 +20,7 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- bot = Answerific::Bot.new()
23
+ bot = Answerific::Miner.new()
24
24
  bot.answer('what is the composition of Pluto?')
25
25
 
26
26
  ## How it works
@@ -35,11 +35,9 @@ Given an input, answerific will
35
35
 
36
36
  ## Roadmap
37
37
 
38
- * Add options at initialization
39
- * Sentence split on dot: handle abbreviations
40
- * Return special message when no result found? Or just nil?
41
- * Better support for wh-words (atm, the bot just gets rid of them)
42
- * Better support for yes-no questions: answer with definite yes-no instead of statement
38
+ * [ ] Add options at initialization
39
+ * [ ] Better support for wh-words (atm, the bot just gets rid of them)
40
+ * [ ] Better support for yes-no questions: answer with definite yes-no instead of statement
43
41
 
44
42
  ## Development
45
43
 
data/Rakefile CHANGED
@@ -12,3 +12,5 @@ task :default => :spec
12
12
  task :console do
13
13
  exec "irb -r answerific -I ./lib"
14
14
  end
15
+
16
+ task :c => :console
@@ -0,0 +1,174 @@
1
+ require 'cgi'
2
+
3
+ # Miner bot that answers questions by extracting information from the web
4
+ # Currently only supports Google Search
5
+ class Answerific::Miner
6
+
7
+ # Answers `question` by querying Google
8
+ # Assumes `question` is downcase, only contains alpha numeric characters
9
+ # (i.e. has been preprocessed by Answerific::Bot.preprocess)
10
+ # Returns a string containing the response or nil if none is found
11
+ def answer(question)
12
+ p 'Answering ' + question
13
+ return nil if !question || question.empty?
14
+ mine(parse(preprocess(question)))
15
+ end
16
+
17
+ # === SELECT RESPONSE ===
18
+
19
+ def process_google_results(results, query)
20
+ candidates = select_responses(results, query)
21
+ select_best_response(candidates)
22
+ end
23
+
24
+ # Returns a single response from the list of responses
25
+ # TODO how to select the best? right now, return the first one
26
+ def select_best_response(responses)
27
+ responses.sample
28
+ end
29
+
30
+ # Returns the responses from `results` that have a the words in `query`
31
+ def select_responses(results, query)
32
+ sentences = results.map { |r| split_at_dot(r) }.flatten
33
+ query_words = query.split ' '
34
+
35
+ # Select the responses, only keeping the sentence that contain the search query
36
+ selected = sentences.select do |sentence|
37
+ query_words.all? { |w| sentence.include? w } # contains all query words
38
+ end
39
+
40
+ return selected
41
+ end
42
+
43
+ # === EXTRACT INFO ===
44
+
45
+ def mine(query)
46
+ results = []
47
+
48
+ Google::Search::Web.new(query: query).each do |r|
49
+ results << clean_google_result(r.content)
50
+ end
51
+
52
+ process_google_results(results, query)
53
+ end
54
+
55
+ # === PARSE AND REARRANGE === (prepare for search engines)
56
+
57
+ def parse(question)
58
+ type = broad_question_type question
59
+ parsed = ''
60
+
61
+ case type
62
+ when 'wh'
63
+ parsed = parse_wh_question question
64
+ when 'yes-no'
65
+ parsed = parse_yes_no_question question
66
+ when 'declarative'
67
+ parsed = parse_declarative_question question
68
+ end
69
+
70
+ return parsed
71
+ end
72
+
73
+ # TODO consider verb permutations
74
+ # TODO consider wh-word: where is the sun => the sun is [located]
75
+ # Parses the wh-question `question` by removing the wh-word and moving the main verb at the end
76
+ # Assumptions:
77
+ # * wh-word is at the beginning
78
+ # * main verb follows the wh-word
79
+ # (TODO not accurate for which/whose but should be ok for the others)
80
+ # Example:
81
+ # question: 'where is the Kuiper belt'
82
+ # returns : 'the Kuiper belt is'
83
+ def parse_wh_question(question)
84
+ words = question.split ' '
85
+ parsed = words[2..-1] << words[1]
86
+ parsed.join " "
87
+ end
88
+
89
+ # Returns an array of permutations of the main verb in the question without the wh-word
90
+ # Parses the wh-question `question` by removing the wh-word
91
+ # Assumptions:
92
+ # * wh-word is at the beginning
93
+ # * main verb follows the wh-word
94
+ # (TODO not accurate for which/whose but should be ok for the others)
95
+ # Example:
96
+ # question: 'where is the Kuiper belt'
97
+ # returns : ['is the Kuiper belt',
98
+ # 'the is Kuiper belt',
99
+ # 'the Kuiper is belt',
100
+ # 'the Kuiper belt is']
101
+ # def parse_wh_question(question)
102
+
103
+ # end
104
+
105
+ # Returns `question` without the yes-no verb
106
+ # Example:
107
+ # question: 'is pluto closer to the sun than saturn'
108
+ # returns : 'pluto closer to the sun than saturn'
109
+ def parse_yes_no_question(question)
110
+ words = question.split ' '
111
+ return words[1..-1].join ' '
112
+ end
113
+
114
+ # Returns `question` without the declarative statement
115
+ # Example:
116
+ # question: 'tell me what is Pluto'
117
+ # returns : 'what is Pluto'
118
+ def parse_declarative_question(question)
119
+ declarative_expressions = [ 'tell me', 'I want to know' ]
120
+ return question.gsub(/^#{Regexp.union(*declarative_expressions)}/, '').strip
121
+ end
122
+
123
+ # === DETECT TYPE OF QUESTION ===
124
+
125
+ def broad_question_type(question)
126
+ return 'wh' if is_wh_question question
127
+ return 'yes-no' if is_yes_no_question question
128
+ return 'declarative'
129
+ end
130
+
131
+ # Returns true if question starts with a wh-question word
132
+ def is_wh_question(question)
133
+ wh_words = %w(who where when why what which how)
134
+ return /^#{Regexp.union(*wh_words)}/ === question
135
+ end
136
+
137
+ # Returns true if question starts with a yes-no question expression
138
+ def is_yes_no_question(question)
139
+ yes_no_words = %w(am are is was were have has do does did can could should may)
140
+ return /^#{Regexp.union(*yes_no_words)}/ === question
141
+ end
142
+
143
+ # === PREPROCESSING ===
144
+
145
+ # Returns cleaned `input`
146
+ def preprocess(input)
147
+ clean(input)
148
+ end
149
+
150
+ # Cleans the string `input` by removing non alpha-numeric characters
151
+ def clean(input)
152
+ ret = input.downcase
153
+ ret.gsub(/[^0-9a-z ]/i, '').strip
154
+ end
155
+
156
+ # === OTHER FORMATTING ===
157
+
158
+ def clean_google_result(string)
159
+ string = CGI.unescapeHTML(string)
160
+ string
161
+ .downcase
162
+ .gsub(/[^\.]+\.{3,}/, '') # remove incomplete sentences
163
+ .gsub(/<("[^"]*"|'[^']*'|[^'">])*>/, '') # html tags
164
+ .gsub(/\w{3} \d{1,2}, \d{4} \.{3} /, '') # dates (27 Jan, 2015)
165
+ .gsub("\n",'') # new lines
166
+ .strip
167
+ end
168
+
169
+ def split_at_dot(string)
170
+ # matches NUM. or ALPHAALPHA.
171
+ re = /([0-9]|[a-z]{2})[\.\?!] ?/i
172
+ string.split(re).each_slice(2).map(&:join)
173
+ end
174
+ end
@@ -1,3 +1,3 @@
1
1
  module Answerific
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/answerific.rb CHANGED
@@ -1,166 +1,6 @@
1
1
  require "answerific/version"
2
+ require "answerific/miner.rb"
2
3
  require "google-search"
3
4
 
4
5
  module Answerific
5
- class Bot
6
-
7
- def answer(question)
8
- mine(parse(preprocess(question)))
9
- end
10
-
11
- # === SELECT RESPONSE ===
12
-
13
- def process_google_results(results, query)
14
- candidates = select_responses(results, query)
15
- select_best_response(candidates)
16
- end
17
-
18
- # Returns a single response from the list of responses
19
- # TODO how to select the best? right now, return the first one
20
- def select_best_response(responses)
21
- responses.sample
22
- end
23
-
24
- # Returns the responses from `results` that have a the words in `query`
25
- def select_responses(results, query)
26
- sentences = results.map { |r| split_at_dot r }.flatten
27
- query_words = query.split ' '
28
-
29
- # Select the responses, only keeping the sentence that contain the search query
30
- selected = sentences.select do |sentence|
31
- query_words.all? { |w| sentence.include? w } # contains all query words
32
- end
33
-
34
- return selected
35
- end
36
-
37
- # === EXTRACT INFO ===
38
-
39
- def mine(query)
40
- results = []
41
-
42
- Google::Search::Web.new(query: query).each do |r|
43
- results << clean_google_result(r.content)
44
- end
45
-
46
- process_google_results(results, query)
47
- end
48
-
49
- # === PARSE AND REARRANGE === (prepare for search engines)
50
-
51
- def parse(question)
52
- type = broad_question_type question
53
- parsed = ''
54
-
55
- case type
56
- when 'wh'
57
- parsed = parse_wh_question question
58
- when 'yes-no'
59
- parsed = parse_yes_no_question question
60
- when 'declarative'
61
- parsed = parse_declarative_question question
62
- end
63
-
64
- return parsed
65
- end
66
-
67
- # TODO consider verb permutations
68
- # TODO consider wh-word: where is the sun => the sun is [located]
69
- # Parses the wh-question `question` by removing the wh-word and moving the main verb at the end
70
- # Assumptions:
71
- # * wh-word is at the beginning
72
- # * main verb follows the wh-word
73
- # (TODO not accurate for which/whose but should be ok for the others)
74
- # Example:
75
- # question: 'where is the Kuiper belt'
76
- # returns : 'the Kuiper belt is'
77
- def parse_wh_question(question)
78
- words = question.split ' '
79
- parsed = words[2..-1] << words[1]
80
- parsed.join " "
81
- end
82
-
83
- # Returns an array of permutations of the main verb in the question without the wh-word
84
- # Parses the wh-question `question` by removing the wh-word
85
- # Assumptions:
86
- # * wh-word is at the beginning
87
- # * main verb follows the wh-word
88
- # (TODO not accurate for which/whose but should be ok for the others)
89
- # Example:
90
- # question: 'where is the Kuiper belt'
91
- # returns : ['is the Kuiper belt',
92
- # 'the is Kuiper belt',
93
- # 'the Kuiper is belt',
94
- # 'the Kuiper belt is']
95
- # def parse_wh_question(question)
96
-
97
- # end
98
-
99
- # Returns `question` without the yes-no verb
100
- # Example:
101
- # question: 'is pluto closer to the sun than saturn'
102
- # returns : 'pluto closer to the sun than saturn'
103
- def parse_yes_no_question(question)
104
- words = question.split ' '
105
- return words[1..-1].join ' '
106
- end
107
-
108
- # Returns `question` without the declarative statement
109
- # Example:
110
- # question: 'tell me what is Pluto'
111
- # returns : 'what is Pluto'
112
- def parse_declarative_question(question)
113
- declarative_expressions = [ 'tell me', 'I want to know' ]
114
- return question.gsub(/^#{Regexp.union(*declarative_expressions)}/, '').strip
115
- end
116
-
117
- # === DETECT TYPE OF QUESTION ===
118
-
119
- def broad_question_type(question)
120
- return 'wh' if is_wh_question question
121
- return 'yes-no' if is_yes_no_question question
122
- return 'declarative'
123
- end
124
-
125
- # Returns true if question starts with a wh-question word
126
- def is_wh_question(question)
127
- wh_words = %w(who where when why what which how)
128
- return /^#{Regexp.union(*wh_words)}/ === question
129
- end
130
-
131
- # Returns true if question starts with a yes-no question expression
132
- def is_yes_no_question(question)
133
- yes_no_words = %w(am are is was were have has do does did can could should may)
134
- return /^#{Regexp.union(*yes_no_words)}/ === question
135
- end
136
-
137
- # === PREPROCESSING ===
138
-
139
- # Returns cleaned `input`
140
- def preprocess(input)
141
- clean(input)
142
- end
143
-
144
- # Cleans the string `input` by removing non alpha-numeric characters
145
- def clean(input)
146
- ret = input.downcase
147
- ret.gsub(/[^0-9a-z ]/i, '').strip
148
- end
149
-
150
- # === OTHER FORMATTING ===
151
-
152
- def clean_google_result(string)
153
- string
154
- .downcase
155
- .gsub(/[^\.]+\.{3,}/, '') # remove incomplete sentences
156
- .gsub(/<("[^"]*"|'[^']*'|[^'">])*>/, '') # html tags
157
- .gsub(/\w{3} \d{1,2}, \d{4} \.{3} /, '') # dates (27 Jan, 2015)
158
- .gsub("\n",'') # new lines
159
- end
160
-
161
- def split_at_dot(string)
162
- re = /([a-z]{2})[\.\?!] ?/i # regex to match *aa. where a is any letter
163
- string.split(re).each_slice(2).map(&:join)
164
- end
165
- end
166
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answerific
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Domingue
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-04-29 00:00:00.000000000 Z
11
+ date: 2015-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -125,6 +125,7 @@ files:
125
125
  - bin/console
126
126
  - bin/setup
127
127
  - lib/answerific.rb
128
+ - lib/answerific/miner.rb
128
129
  - lib/answerific/version.rb
129
130
  homepage: https://github.com/justindomingue/answerific
130
131
  licenses: []