answerific 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a6b5905e26045c85b62eadee1a917f7a6f33edb5
4
- data.tar.gz: b1721baa87c03b00cf36e614dc50561dc8473a35
3
+ metadata.gz: e81fef1ef9b77e8823ea1eea1f157e52c80af930
4
+ data.tar.gz: 9e806579b3bc22837c58daf63a4200eef9a698dd
5
5
  SHA512:
6
- metadata.gz: b30eed839c2149e237033351b458427bb56274d970db991977a689232a5cecd55e7ece89d78a8f1cd40c5ce72b8bb60622fb9f7cd159b87e45b8ac3f14359927
7
- data.tar.gz: 8b347d026f099cbb41b02b626bd524724edc135b7c61f1fd038b7af1efd93a66c302d1d4107e62b79b2009b90dc86b6f3fa70be826185e9d863de8aeae2053e3
6
+ metadata.gz: a5d1fd241089b7f2bd3feea69a193361684734bdd5012cbc8aee7992ac9d6fa2fb1081a2b7e3bd6d0a3b9d3a5d0bb10990262b2cf44b33dbade31e64c8635b88
7
+ data.tar.gz: a84357bd7aa62b49affcb427c7d5e1256bbb7fe1529b498cccb7a50f9694705aa2e5ac5168b0202f8ebde3d2dc9b22252fe19ad45f52546585ab633fd1a647cc
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Answerific
2
2
 
3
- AI Bot that can answer questions posed in natural language.
3
+ Mining bot that can answer natural language questions by mining the web.
4
4
 
5
5
  ## Installation
6
6
 
@@ -20,7 +20,7 @@ Or install it yourself as:
20
20
 
21
21
  ## Usage
22
22
 
23
- bot = Answerific::Bot.new()
23
+ bot = Answerific::Miner.new()
24
24
  bot.answer('what is the composition of Pluto?')
25
25
 
26
26
  ## How it works
@@ -35,11 +35,9 @@ Given an input, answerific will
35
35
 
36
36
  ## Roadmap
37
37
 
38
- * Add options at initialization
39
- * Sentence split on dot: handle abbreviations
40
- * Return special message when no result found? Or just nil?
41
- * Better support for wh-words (atm, the bot just gets rid of them)
42
- * Better support for yes-no questions: answer with definite yes-no instead of statement
38
+ * [ ] Add options at initialization
39
+ * [ ] Better support for wh-words (atm, the bot just gets rid of them)
40
+ * [ ] Better support for yes-no questions: answer with definite yes-no instead of statement
43
41
 
44
42
  ## Development
45
43
 
data/Rakefile CHANGED
@@ -12,3 +12,5 @@ task :default => :spec
12
12
  task :console do
13
13
  exec "irb -r answerific -I ./lib"
14
14
  end
15
+
16
+ task :c => :console
@@ -0,0 +1,174 @@
1
+ require 'cgi'
2
+
3
+ # Miner bot that answers questions by extracting information from the web
4
+ # Currently only supports Google Search
5
+ class Answerific::Miner
6
+
7
+ # Answers `question` by querying Google
8
+ # Assumes `question` is downcase, only contains alpha numeric characters
9
+ # (i.e. has been preprocessed by Answerific::Bot.preprocess)
10
+ # Returns a string containing the response or nil if none is found
11
+ def answer(question)
12
+ p 'Answering ' + question
13
+ return nil if !question || question.empty?
14
+ mine(parse(preprocess(question)))
15
+ end
16
+
17
+ # === SELECT RESPONSE ===
18
+
19
+ def process_google_results(results, query)
20
+ candidates = select_responses(results, query)
21
+ select_best_response(candidates)
22
+ end
23
+
24
+ # Returns a single response from the list of responses
25
+ # TODO how to select the best? right now, return the first one
26
+ def select_best_response(responses)
27
+ responses.sample
28
+ end
29
+
30
+ # Returns the responses from `results` that have a the words in `query`
31
+ def select_responses(results, query)
32
+ sentences = results.map { |r| split_at_dot(r) }.flatten
33
+ query_words = query.split ' '
34
+
35
+ # Select the responses, only keeping the sentence that contain the search query
36
+ selected = sentences.select do |sentence|
37
+ query_words.all? { |w| sentence.include? w } # contains all query words
38
+ end
39
+
40
+ return selected
41
+ end
42
+
43
+ # === EXTRACT INFO ===
44
+
45
+ def mine(query)
46
+ results = []
47
+
48
+ Google::Search::Web.new(query: query).each do |r|
49
+ results << clean_google_result(r.content)
50
+ end
51
+
52
+ process_google_results(results, query)
53
+ end
54
+
55
+ # === PARSE AND REARRANGE === (prepare for search engines)
56
+
57
+ def parse(question)
58
+ type = broad_question_type question
59
+ parsed = ''
60
+
61
+ case type
62
+ when 'wh'
63
+ parsed = parse_wh_question question
64
+ when 'yes-no'
65
+ parsed = parse_yes_no_question question
66
+ when 'declarative'
67
+ parsed = parse_declarative_question question
68
+ end
69
+
70
+ return parsed
71
+ end
72
+
73
+ # TODO consider verb permutations
74
+ # TODO consider wh-word: where is the sun => the sun is [located]
75
+ # Parses the wh-question `question` by removing the wh-word and moving the main verb at the end
76
+ # Assumptions:
77
+ # * wh-word is at the beginning
78
+ # * main verb follows the wh-word
79
+ # (TODO not accurate for which/whose but should be ok for the others)
80
+ # Example:
81
+ # question: 'where is the Kuiper belt'
82
+ # returns : 'the Kuiper belt is'
83
+ def parse_wh_question(question)
84
+ words = question.split ' '
85
+ parsed = words[2..-1] << words[1]
86
+ parsed.join " "
87
+ end
88
+
89
+ # Returns an array of permutations of the main verb in the question without the wh-word
90
+ # Parses the wh-question `question` by removing the wh-word
91
+ # Assumptions:
92
+ # * wh-word is at the beginning
93
+ # * main verb follows the wh-word
94
+ # (TODO not accurate for which/whose but should be ok for the others)
95
+ # Example:
96
+ # question: 'where is the Kuiper belt'
97
+ # returns : ['is the Kuiper belt',
98
+ # 'the is Kuiper belt',
99
+ # 'the Kuiper is belt',
100
+ # 'the Kuiper belt is']
101
+ # def parse_wh_question(question)
102
+
103
+ # end
104
+
105
+ # Returns `question` without the yes-no verb
106
+ # Example:
107
+ # question: 'is pluto closer to the sun than saturn'
108
+ # returns : 'pluto closer to the sun than saturn'
109
+ def parse_yes_no_question(question)
110
+ words = question.split ' '
111
+ return words[1..-1].join ' '
112
+ end
113
+
114
+ # Returns `question` without the declarative statement
115
+ # Example:
116
+ # question: 'tell me what is Pluto'
117
+ # returns : 'what is Pluto'
118
+ def parse_declarative_question(question)
119
+ declarative_expressions = [ 'tell me', 'I want to know' ]
120
+ return question.gsub(/^#{Regexp.union(*declarative_expressions)}/, '').strip
121
+ end
122
+
123
+ # === DETECT TYPE OF QUESTION ===
124
+
125
+ def broad_question_type(question)
126
+ return 'wh' if is_wh_question question
127
+ return 'yes-no' if is_yes_no_question question
128
+ return 'declarative'
129
+ end
130
+
131
+ # Returns true if question starts with a wh-question word
132
+ def is_wh_question(question)
133
+ wh_words = %w(who where when why what which how)
134
+ return /^#{Regexp.union(*wh_words)}/ === question
135
+ end
136
+
137
+ # Returns true if question starts with a yes-no question expression
138
+ def is_yes_no_question(question)
139
+ yes_no_words = %w(am are is was were have has do does did can could should may)
140
+ return /^#{Regexp.union(*yes_no_words)}/ === question
141
+ end
142
+
143
+ # === PREPROCESSING ===
144
+
145
+ # Returns cleaned `input`
146
+ def preprocess(input)
147
+ clean(input)
148
+ end
149
+
150
+ # Cleans the string `input` by removing non alpha-numeric characters
151
+ def clean(input)
152
+ ret = input.downcase
153
+ ret.gsub(/[^0-9a-z ]/i, '').strip
154
+ end
155
+
156
+ # === OTHER FORMATTING ===
157
+
158
+ def clean_google_result(string)
159
+ string = CGI.unescapeHTML(string)
160
+ string
161
+ .downcase
162
+ .gsub(/[^\.]+\.{3,}/, '') # remove incomplete sentences
163
+ .gsub(/<("[^"]*"|'[^']*'|[^'">])*>/, '') # html tags
164
+ .gsub(/\w{3} \d{1,2}, \d{4} \.{3} /, '') # dates (27 Jan, 2015)
165
+ .gsub("\n",'') # new lines
166
+ .strip
167
+ end
168
+
169
+ def split_at_dot(string)
170
+ # matches NUM. or ALPHAALPHA.
171
+ re = /([0-9]|[a-z]{2})[\.\?!] ?/i
172
+ string.split(re).each_slice(2).map(&:join)
173
+ end
174
+ end
@@ -1,3 +1,3 @@
1
1
  module Answerific
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/answerific.rb CHANGED
@@ -1,166 +1,6 @@
1
1
  require "answerific/version"
2
+ require "answerific/miner.rb"
2
3
  require "google-search"
3
4
 
4
5
  module Answerific
5
- class Bot
6
-
7
- def answer(question)
8
- mine(parse(preprocess(question)))
9
- end
10
-
11
- # === SELECT RESPONSE ===
12
-
13
- def process_google_results(results, query)
14
- candidates = select_responses(results, query)
15
- select_best_response(candidates)
16
- end
17
-
18
- # Returns a single response from the list of responses
19
- # TODO how to select the best? right now, return the first one
20
- def select_best_response(responses)
21
- responses.sample
22
- end
23
-
24
- # Returns the responses from `results` that have a the words in `query`
25
- def select_responses(results, query)
26
- sentences = results.map { |r| split_at_dot r }.flatten
27
- query_words = query.split ' '
28
-
29
- # Select the responses, only keeping the sentence that contain the search query
30
- selected = sentences.select do |sentence|
31
- query_words.all? { |w| sentence.include? w } # contains all query words
32
- end
33
-
34
- return selected
35
- end
36
-
37
- # === EXTRACT INFO ===
38
-
39
- def mine(query)
40
- results = []
41
-
42
- Google::Search::Web.new(query: query).each do |r|
43
- results << clean_google_result(r.content)
44
- end
45
-
46
- process_google_results(results, query)
47
- end
48
-
49
- # === PARSE AND REARRANGE === (prepare for search engines)
50
-
51
- def parse(question)
52
- type = broad_question_type question
53
- parsed = ''
54
-
55
- case type
56
- when 'wh'
57
- parsed = parse_wh_question question
58
- when 'yes-no'
59
- parsed = parse_yes_no_question question
60
- when 'declarative'
61
- parsed = parse_declarative_question question
62
- end
63
-
64
- return parsed
65
- end
66
-
67
- # TODO consider verb permutations
68
- # TODO consider wh-word: where is the sun => the sun is [located]
69
- # Parses the wh-question `question` by removing the wh-word and moving the main verb at the end
70
- # Assumptions:
71
- # * wh-word is at the beginning
72
- # * main verb follows the wh-word
73
- # (TODO not accurate for which/whose but should be ok for the others)
74
- # Example:
75
- # question: 'where is the Kuiper belt'
76
- # returns : 'the Kuiper belt is'
77
- def parse_wh_question(question)
78
- words = question.split ' '
79
- parsed = words[2..-1] << words[1]
80
- parsed.join " "
81
- end
82
-
83
- # Returns an array of permutations of the main verb in the question without the wh-word
84
- # Parses the wh-question `question` by removing the wh-word
85
- # Assumptions:
86
- # * wh-word is at the beginning
87
- # * main verb follows the wh-word
88
- # (TODO not accurate for which/whose but should be ok for the others)
89
- # Example:
90
- # question: 'where is the Kuiper belt'
91
- # returns : ['is the Kuiper belt',
92
- # 'the is Kuiper belt',
93
- # 'the Kuiper is belt',
94
- # 'the Kuiper belt is']
95
- # def parse_wh_question(question)
96
-
97
- # end
98
-
99
- # Returns `question` without the yes-no verb
100
- # Example:
101
- # question: 'is pluto closer to the sun than saturn'
102
- # returns : 'pluto closer to the sun than saturn'
103
- def parse_yes_no_question(question)
104
- words = question.split ' '
105
- return words[1..-1].join ' '
106
- end
107
-
108
- # Returns `question` without the declarative statement
109
- # Example:
110
- # question: 'tell me what is Pluto'
111
- # returns : 'what is Pluto'
112
- def parse_declarative_question(question)
113
- declarative_expressions = [ 'tell me', 'I want to know' ]
114
- return question.gsub(/^#{Regexp.union(*declarative_expressions)}/, '').strip
115
- end
116
-
117
- # === DETECT TYPE OF QUESTION ===
118
-
119
- def broad_question_type(question)
120
- return 'wh' if is_wh_question question
121
- return 'yes-no' if is_yes_no_question question
122
- return 'declarative'
123
- end
124
-
125
- # Returns true if question starts with a wh-question word
126
- def is_wh_question(question)
127
- wh_words = %w(who where when why what which how)
128
- return /^#{Regexp.union(*wh_words)}/ === question
129
- end
130
-
131
- # Returns true if question starts with a yes-no question expression
132
- def is_yes_no_question(question)
133
- yes_no_words = %w(am are is was were have has do does did can could should may)
134
- return /^#{Regexp.union(*yes_no_words)}/ === question
135
- end
136
-
137
- # === PREPROCESSING ===
138
-
139
- # Returns cleaned `input`
140
- def preprocess(input)
141
- clean(input)
142
- end
143
-
144
- # Cleans the string `input` by removing non alpha-numeric characters
145
- def clean(input)
146
- ret = input.downcase
147
- ret.gsub(/[^0-9a-z ]/i, '').strip
148
- end
149
-
150
- # === OTHER FORMATTING ===
151
-
152
- def clean_google_result(string)
153
- string
154
- .downcase
155
- .gsub(/[^\.]+\.{3,}/, '') # remove incomplete sentences
156
- .gsub(/<("[^"]*"|'[^']*'|[^'">])*>/, '') # html tags
157
- .gsub(/\w{3} \d{1,2}, \d{4} \.{3} /, '') # dates (27 Jan, 2015)
158
- .gsub("\n",'') # new lines
159
- end
160
-
161
- def split_at_dot(string)
162
- re = /([a-z]{2})[\.\?!] ?/i # regex to match *aa. where a is any letter
163
- string.split(re).each_slice(2).map(&:join)
164
- end
165
- end
166
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: answerific
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Domingue
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2015-04-29 00:00:00.000000000 Z
11
+ date: 2015-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -125,6 +125,7 @@ files:
125
125
  - bin/console
126
126
  - bin/setup
127
127
  - lib/answerific.rb
128
+ - lib/answerific/miner.rb
128
129
  - lib/answerific/version.rb
129
130
  homepage: https://github.com/justindomingue/answerific
130
131
  licenses: []