RubyGems - clause_extractor - Versions diffs - 0.1.2 → 0.1.3 - Mend

clause_extractor 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

data/lib/clause_extractor.rb +25 -100
metadata +2 -2

data/lib/clause_extractor.rb CHANGED Viewed

@@ -1,89 +1,8 @@
 class ClauseExtractor
   require "conjugations"
-  pronouns        = "(i|you|he|she|it|they|we|there)"
-  present_perfect = "(already|ever|for|just|never|since|yet)"
-  have_has        = "(have|has|haven't|hasn't)"
-  was_were        = "(were|was|wasn't|weren't)"
-  had             = "([a-z]{1,4}'d|had)(n't)*"
-  have_has        = "(have|has|haven't|hasn't|havent|hasnt|has not|have not)"
-  contractions    = "(it'*s|he'*s|she'*s|[a-z]{1,4}'*ve)"
-  to_be           = "(am|are|'m|'re|'s|is|[a-z]{1,4}'re)"
-  will            = "(will|[a-z]{1,4}'ll)"
-  would           = "(would|[a-z]{1,4}'d)"
-  @tense_regexes = {
-    'third'      => {
-      "simple present"                => [
-                                            /\b(he|she|it)\s+search(s)?\b/i,                              #he arrives
-                                            /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i               #adapts it
-                                         ]
-                    },
-    'infinitive' => {
-        "simple present"              => [/\b((I|you|they|we|to)\s+)*+search\b/i],#to arrive
-        "subjunctive future"          => [
-                                            /\bif\s+#{pronouns}\s+#{was_were}\s+(not\s+)*to\s+(not\s+)*search/i,   #if I were to arise
-                                            /\bif\s+#{pronouns}\s+should(n't)*\s+(not\s+)*search/i                #If I should arise
-                                         ],
-        "subjunctive present"         => [  /\bthat\s+#{pronouns}\s+(not\s+)*search/i],                           #that we arrive
-        "conditional simple"          => [  /\b(#{pronouns}\s+)*(would(n't)*|[a-z]{1,4}'d)(\s+not)*\s+search/i],  #I would arise, I wouldn't arise
-        "will-future"                 => [  /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i],        #I'll arise
-        "going to-future"             => [  /\b(#{pronouns}\s+)*#{to_be}\s+(not\s+)*going\s+to\s+search/i],       #they are going to cry
-                      },
-    'gerund' => {
-     "conditional perfect progressive" => [ /\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
-      "present perfect progressive"     => [
-                                            /\b(#{have_has}\s+)(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #have they not been searching
-                                            /\b(#{pronouns}\s+)*#{have_has}*\s+(not\s+)*(#{present_perfect}\s+)*been\s+search/i   #I have been searching
-                                            ],
-     "past perfect progressive"        => [
-                                            /\b(#{pronouns}\s+)*#{had}\s(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #I had been searching,
-                                            /\bhad(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #had he not been searching
-                                          ],
-      "conditional progressive"         => [/\b(#{pronouns}\s+)*#{would}\s+(not\s+)*be\s+search/i],   #I would be searching (I'd)
-      "future progressive"              => [
-                                            /\b((#{pronouns})\s+)*#{will}\s+(not\s+)*be\s+search/i,
-                                            /\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
-                                          ],                                                          #I will be searching
-      "past progressive"                => [/\b(#{pronouns}\s+)*#{was_were}*\s+(not\s+)*search/i],    #I was searching
-      "present progressive"             => [/\b(#{pronouns}\s*)*(#{to_be}\s+)*(not\s+)*search/i],      #I'm rising
-                },
-    "past-participle" => {
-      "conditional perfect"             => [/\b(#{pronouns}\s+)*#{would}\s+(not\s+)*have\s+(not\s+)*search/i],                                 #I would not search
-      "future perfect"                  => [/\b(#{pronouns}\s+)*#{will}\s+have\s+search/i],            #I'll have arisen
-      "past perfect"                    => [
-                                              /\b(#{pronouns}\s+)*#{had}\s+(not\s+)*((#{present_perfect})\s+)*search/i,  #I had arisen
-                                              /\b#{had}\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i
-                                           ],
-      "present perfect"                 => [
-                                            /\b(#{pronouns}\s+)*#{have_has}\s+((#{present_perfect})\s+)*search/,             #They have already seen
-                                            /\b#{have_has}\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*search/       #Have they already seen
-                                           ],
-      "subjunctive past"                => [/\bif\s+#{pronouns}\s+search/i],                          #if I arose
-      "simple past"                     => [/\b#{pronouns}\s+search/i]                                #you chose
-    },
-    #"present perfect"             => [/^\s*search\b/i],                                        #arisen
-    #"simple past"                 => [/^\s*search\b/i]                                          #arose
-  }
-  def self.get_match_start_index(verb, match, index)
-    #get start position of last occurence of verb in match
-    verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
-    #count spaces between match start and verb_index_in_match and subtract that from index
-    lo = index - match[0,verb_index_in_match].split(/\s+/).size
-    hi = lo + match[0,verb_index_in_match].split(/\s+/).size
-    return lo, hi
-  end
+  require "matchers"
-  def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
+  def self.get_clauses(phrase, format = String.new)
     @format        = format
     phrase         = phrase.downcase
     #list           = format.match("audioverb") ? Hash.new : Array.new
@@ -94,32 +13,38 @@ class ClauseExtractor
     @tense_id     ||= get_tenses
     @con_id       ||= get_con_id
     ranges       = []
-    a=Array.new
-    a = phrase.split(/\s+/)
-    a.length.times do |i|
-      a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
-        if @con_id[a[i]] then  #if word matches a conjugation
-        @tense_regexes.each do |k,v|
-          if k.match(/#{@id_tiempo[@tiempos[a[i]]]}/)
+    phrase.gsub!(/[!.?\(\)]/,"") if phrase
+    phrase_a = phrase.split(/\s+/)
+    phrase_a.length.times do |i|
+    #  phrase_a[i].gsub!(/[!.?\(\)]/,"") if phrase_a[i] #remove any punctuation from the word
+        if @con_id[phrase_a[i]] then  #if word matches a conjugation
+        $tense_regexes.each do |k,v|
+          if k.match(/#{@id_tiempo[@tiempos[phrase_a[i]]]}/)
             v.each do |tense, regex_array|
               regex_array.each do |regex|
-                regex = regex.to_s.gsub("search", "#{a[i]}")
-                phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], tense, i, ranges)
+                regex = regex.to_s.gsub("search", "#{phrase_a[i]}")
+                phrase, list, ranges = scan_phrase(phrase, list, regex, phrase_a[i], tense, i, ranges)
               end
             end
           end
         end
       end
-    end
-    list.each do |k, v|
-     list.delete(k) unless ranges.include?(v)
-    end
-    list.each do |k,v|
-      print "#{k}\n"
-    end
+    end
+    list.each { |k, v| list.delete(k) unless ranges.include?(v) }
+    list.each { |k, v| print "#{k}\n" }
     list
   end
+  def self.get_match_start_index(verb, match, index)
+    #get start position of last occurence of verb in match
+    verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
+    #count spaces between match start and verb_index_in_match and subtract that from index
+    lo = index - match[0,verb_index_in_match].split(/\s+/).size
+    hi = lo + match[0,verb_index_in_match].split(/\s+/).size
+    return lo, hi
+  end
    def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges)
      if match = phrase.match(/#{regex}/i)
        match = match.to_s
@@ -128,7 +53,7 @@ class ClauseExtractor
        if @format.match(/audioverb/)
          list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi)
        else
-         list["#{tense_label}:" + match.to_s + ":" + (lo..hi).to_s] = (lo..hi) unless @format.match(/audioverb/)
+         list["#{tense_label}:" + match.to_s + ":" + (lo..hi).to_s] = (lo..hi)
        end
      end
      return phrase, list, ranges

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: clause_extractor
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-09-08 00:00:00.000000000 Z
+date: 2012-10-06 00:00:00.000000000 Z
 dependencies: []
 description: English verbal clause extractor
 email: mikefabrikant@gmail.com