RubyGems - answerific - Versions diffs - 0.1.1 → 0.2.0 - Mend

answerific 0.1.1 → 0.2.0

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: a6b5905e26045c85b62eadee1a917f7a6f33edb5
-  data.tar.gz: b1721baa87c03b00cf36e614dc50561dc8473a35
+  metadata.gz: e81fef1ef9b77e8823ea1eea1f157e52c80af930
+  data.tar.gz: 9e806579b3bc22837c58daf63a4200eef9a698dd
 SHA512:
-  metadata.gz: b30eed839c2149e237033351b458427bb56274d970db991977a689232a5cecd55e7ece89d78a8f1cd40c5ce72b8bb60622fb9f7cd159b87e45b8ac3f14359927
-  data.tar.gz: 8b347d026f099cbb41b02b626bd524724edc135b7c61f1fd038b7af1efd93a66c302d1d4107e62b79b2009b90dc86b6f3fa70be826185e9d863de8aeae2053e3
+  metadata.gz: a5d1fd241089b7f2bd3feea69a193361684734bdd5012cbc8aee7992ac9d6fa2fb1081a2b7e3bd6d0a3b9d3a5d0bb10990262b2cf44b33dbade31e64c8635b88
+  data.tar.gz: a84357bd7aa62b49affcb427c7d5e1256bbb7fe1529b498cccb7a50f9694705aa2e5ac5168b0202f8ebde3d2dc9b22252fe19ad45f52546585ab633fd1a647cc

data/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # Answerific
-AI Bot that can answer questions posed in natural language.
+Mining bot that can answer natural language questions by mining the web.
 ## Installation
@@ -20,7 +20,7 @@ Or install it yourself as:
 ## Usage
-    bot = Answerific::Bot.new()
+    bot = Answerific::Miner.new()
     bot.answer('what is the composition of Pluto?')
 ## How it works
@@ -35,11 +35,9 @@ Given an input, answerific will
 ## Roadmap
-* Add options at initialization
-* Sentence split on dot: handle abbreviations
-* Return special message when no result found? Or just nil?
-* Better support for wh-words (atm, the bot just gets rid of them)
-* Better support for yes-no questions: answer with definite yes-no instead of statement
+* [ ] Add options at initialization
+* [ ] Better support for wh-words (atm, the bot just gets rid of them)
+* [ ] Better support for yes-no questions: answer with definite yes-no instead of statement
 ## Development

data/Rakefile CHANGED Viewed

@@ -12,3 +12,5 @@ task :default => :spec
 task :console do
   exec "irb -r answerific -I ./lib"
 end
+task :c => :console

data/lib/answerific/miner.rb ADDED Viewed

@@ -0,0 +1,174 @@
+require 'cgi'
+# Miner bot that answers questions by extracting information from the web
+# Currently only supports Google Search
+class Answerific::Miner
+  # Answers `question` by querying Google
+  # Assumes `question` is downcase, only contains alpha numeric characters
+  #   (i.e. has been preprocessed by Answerific::Bot.preprocess)
+  # Returns a string containing the response or nil if none is found
+  def answer(question)
+    p 'Answering ' + question
+    return nil if !question || question.empty?
+    mine(parse(preprocess(question)))
+  end
+  # === SELECT RESPONSE ===
+  def process_google_results(results, query)
+    candidates = select_responses(results, query)
+    select_best_response(candidates)
+  end
+  # Returns a single response from the list of responses
+  # TODO how to select the best? right now, return the first one
+  def select_best_response(responses)
+    responses.sample
+  end
+  # Returns the responses from `results` that have a the words in `query`
+  def select_responses(results, query)
+    sentences = results.map { |r| split_at_dot(r) }.flatten
+    query_words = query.split ' '
+    # Select the responses, only keeping the sentence that contain the search query
+    selected = sentences.select do |sentence|
+      query_words.all? { |w| sentence.include? w }  # contains all query words
+    end
+    return selected
+  end
+  # === EXTRACT INFO ===
+  def mine(query)
+    results = []
+    Google::Search::Web.new(query: query).each do |r|
+      results << clean_google_result(r.content)
+    end
+    process_google_results(results, query)
+  end
+  # === PARSE AND REARRANGE === (prepare for search engines)
+  def parse(question)
+    type = broad_question_type question
+    parsed = ''
+    case type
+    when 'wh'
+      parsed = parse_wh_question question
+    when 'yes-no'
+      parsed = parse_yes_no_question question
+    when 'declarative'
+      parsed = parse_declarative_question question
+    end
+    return parsed
+  end
+  # TODO consider verb permutations
+  # TODO consider wh-word: where is the sun => the sun is [located]
+  # Parses the wh-question `question` by removing the wh-word and moving the main verb at the end
+  # Assumptions:
+  #   * wh-word is at the beginning
+  #   * main verb follows the wh-word
+  #       (TODO not accurate for which/whose but should be ok for the others)
+  # Example:
+  #   question: 'where is the Kuiper belt'
+  #   returns : 'the Kuiper belt is'
+  def parse_wh_question(question)
+    words = question.split ' '
+    parsed = words[2..-1] << words[1]
+    parsed.join " "
+  end
+  # Returns an array of permutations of the main verb in the question without the wh-word
+  # Parses the wh-question `question` by removing the wh-word
+  # Assumptions:
+  #   * wh-word is at the beginning
+  #   * main verb follows the wh-word
+  #       (TODO not accurate for which/whose but should be ok for the others)
+  # Example:
+  #   question: 'where is the Kuiper belt'
+  #   returns : ['is the Kuiper belt',
+  #               'the is Kuiper belt',
+  #               'the Kuiper is belt',
+  #               'the Kuiper belt is']
+  # def parse_wh_question(question)
+  # end
+  # Returns `question` without the yes-no verb
+  # Example:
+  #   question: 'is pluto closer to the sun than saturn'
+  #   returns : 'pluto closer to the sun than saturn'
+  def parse_yes_no_question(question)
+    words = question.split ' '
+    return words[1..-1].join ' '
+  end
+  # Returns `question` without the declarative statement
+  # Example:
+  #   question: 'tell me what is Pluto'
+  #   returns : 'what is Pluto'
+  def parse_declarative_question(question)
+    declarative_expressions = [ 'tell me', 'I want to know' ]
+    return question.gsub(/^#{Regexp.union(*declarative_expressions)}/, '').strip
+  end
+  # === DETECT TYPE OF QUESTION ===
+  def broad_question_type(question)
+    return 'wh' if is_wh_question question
+    return 'yes-no' if is_yes_no_question question
+    return 'declarative'
+  end
+  # Returns true if question starts with a wh-question word
+  def is_wh_question(question)
+    wh_words = %w(who where when why what which how)
+    return /^#{Regexp.union(*wh_words)}/ === question
+  end
+  # Returns true if question starts with a yes-no question expression
+  def is_yes_no_question(question)
+    yes_no_words = %w(am are is was were have has do does did can could should may)
+    return /^#{Regexp.union(*yes_no_words)}/ === question
+  end
+  # === PREPROCESSING ===
+  # Returns cleaned `input`
+  def preprocess(input)
+    clean(input)
+  end
+  # Cleans the string `input` by removing non alpha-numeric characters
+  def clean(input)
+    ret = input.downcase
+    ret.gsub(/[^0-9a-z ]/i, '').strip
+  end
+  # === OTHER FORMATTING ===
+  def clean_google_result(string)
+    string = CGI.unescapeHTML(string)
+    string
+    .downcase
+    .gsub(/[^\.]+\.{3,}/, '')                 # remove incomplete sentences
+    .gsub(/<("[^"]*"|'[^']*'|[^'">])*>/, '')  # html tags
+    .gsub(/\w{3} \d{1,2}, \d{4} \.{3} /, '')  # dates (27 Jan, 2015)
+    .gsub("\n",'')                            # new lines
+    .strip
+  end
+  def split_at_dot(string)
+    # matches NUM. or ALPHAALPHA.
+    re = /([0-9]|[a-z]{2})[\.\?!] ?/i
+    string.split(re).each_slice(2).map(&:join)
+  end
+end

data/lib/answerific/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Answerific
-  VERSION = "0.1.1"
+  VERSION = "0.2.0"
 end

data/lib/answerific.rb CHANGED Viewed

@@ -1,166 +1,6 @@
 require "answerific/version"
+require "answerific/miner.rb"
 require "google-search"
 module Answerific
-  class Bot
-    def answer(question)
-      mine(parse(preprocess(question)))
-    end
-    # === SELECT RESPONSE ===
-    def process_google_results(results, query)
-      candidates = select_responses(results, query)
-      select_best_response(candidates)
-    end
-    # Returns a single response from the list of responses
-    # TODO how to select the best? right now, return the first one
-    def select_best_response(responses)
-      responses.sample
-    end
-    # Returns the responses from `results` that have a the words in `query`
-    def select_responses(results, query)
-      sentences = results.map { |r| split_at_dot r }.flatten
-      query_words = query.split ' '
-      # Select the responses, only keeping the sentence that contain the search query
-      selected = sentences.select do |sentence|
-        query_words.all? { |w| sentence.include? w }  # contains all query words
-      end
-      return selected
-    end
-    # === EXTRACT INFO ===
-    def mine(query)
-      results = []
-      Google::Search::Web.new(query: query).each do |r|
-        results << clean_google_result(r.content)
-      end
-      process_google_results(results, query)
-    end
-    # === PARSE AND REARRANGE === (prepare for search engines)
-    def parse(question)
-      type = broad_question_type question
-      parsed = ''
-      case type
-      when 'wh'
-        parsed = parse_wh_question question
-      when 'yes-no'
-        parsed = parse_yes_no_question question
-      when 'declarative'
-        parsed = parse_declarative_question question
-      end
-      return parsed
-    end
-    # TODO consider verb permutations
-    # TODO consider wh-word: where is the sun => the sun is [located]
-    # Parses the wh-question `question` by removing the wh-word and moving the main verb at the end
-    # Assumptions:
-    #   * wh-word is at the beginning
-    #   * main verb follows the wh-word
-    #       (TODO not accurate for which/whose but should be ok for the others)
-    # Example:
-    #   question: 'where is the Kuiper belt'
-    #   returns : 'the Kuiper belt is'
-    def parse_wh_question(question)
-      words = question.split ' '
-      parsed = words[2..-1] << words[1]
-      parsed.join " "
-    end
-    # Returns an array of permutations of the main verb in the question without the wh-word
-    # Parses the wh-question `question` by removing the wh-word
-    # Assumptions:
-    #   * wh-word is at the beginning
-    #   * main verb follows the wh-word
-    #       (TODO not accurate for which/whose but should be ok for the others)
-    # Example:
-    #   question: 'where is the Kuiper belt'
-    #   returns : ['is the Kuiper belt',
-    #               'the is Kuiper belt',
-    #               'the Kuiper is belt',
-    #               'the Kuiper belt is']
-    # def parse_wh_question(question)
-    # end
-    # Returns `question` without the yes-no verb
-    # Example:
-    #   question: 'is pluto closer to the sun than saturn'
-    #   returns : 'pluto closer to the sun than saturn'
-    def parse_yes_no_question(question)
-      words = question.split ' '
-      return words[1..-1].join ' '
-    end
-    # Returns `question` without the declarative statement
-    # Example:
-    #   question: 'tell me what is Pluto'
-    #   returns : 'what is Pluto'
-    def parse_declarative_question(question)
-      declarative_expressions = [ 'tell me', 'I want to know' ]
-      return question.gsub(/^#{Regexp.union(*declarative_expressions)}/, '').strip
-    end
-    # === DETECT TYPE OF QUESTION ===
-    def broad_question_type(question)
-      return 'wh' if is_wh_question question
-      return 'yes-no' if is_yes_no_question question
-      return 'declarative'
-    end
-    # Returns true if question starts with a wh-question word
-    def is_wh_question(question)
-      wh_words = %w(who where when why what which how)
-      return /^#{Regexp.union(*wh_words)}/ === question
-    end
-    # Returns true if question starts with a yes-no question expression
-    def is_yes_no_question(question)
-      yes_no_words = %w(am are is was were have has do does did can could should may)
-      return /^#{Regexp.union(*yes_no_words)}/ === question
-    end
-    # === PREPROCESSING ===
-    # Returns cleaned `input`
-    def preprocess(input)
-      clean(input)
-    end
-    # Cleans the string `input` by removing non alpha-numeric characters
-    def clean(input)
-      ret = input.downcase
-      ret.gsub(/[^0-9a-z ]/i, '').strip
-    end
-    # === OTHER FORMATTING ===
-    def clean_google_result(string)
-      string
-      .downcase
-      .gsub(/[^\.]+\.{3,}/, '')                 # remove incomplete sentences
-      .gsub(/<("[^"]*"|'[^']*'|[^'">])*>/, '')  # html tags
-      .gsub(/\w{3} \d{1,2}, \d{4} \.{3} /, '')  # dates (27 Jan, 2015)
-      .gsub("\n",'')                            # new lines
-    end
-    def split_at_dot(string)
-      re = /([a-z]{2})[\.\?!] ?/i  # regex to match *aa. where a is any letter
-      string.split(re).each_slice(2).map(&:join)
-    end
-  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: answerific
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.2.0
 platform: ruby
 authors:
 - Justin Domingue
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2015-04-29 00:00:00.000000000 Z
+date: 2015-04-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -125,6 +125,7 @@ files:
 - bin/console
 - bin/setup
 - lib/answerific.rb
+- lib/answerific/miner.rb
 - lib/answerific/version.rb
 homepage: https://github.com/justindomingue/answerific
 licenses: []