RubyGems - clause_extractor - Versions diffs - 0.0.5 → 0.0.6 - Mend

clause_extractor 0.0.5 → 0.0.6

Files changed (3) hide show

data/lib/clause_extractor.rb CHANGED Viewed

@@ -1,88 +1,158 @@
-module ClauseExtractor
-  class Clause
-    require "./conjugations2"
-    @tense_regexes = {
-      "present perfect"             => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
-      "future progressive"          => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
-      "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i],            #I have been searching
-      "subjunctive future"          => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i],   #if I were to arise
-      "going to-future"             => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i],                     #they are going to cry
-      "present progressive"         => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i],                         #I'm rising
-      "subjunctive present"         => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i],    #if I should arise
-      "conditional perfect"         => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i],                                 #I would not search
-      "past perfect"                => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i],                                             #I had arisen
-      "subjunctive present"         => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i],                                            #that we arrive
-"conditional perfect progressive"   => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i],               #I would have been searching
-      "conditional progressive"     => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i],                #I would be searching (I'd)
-      "subjunctive past"            => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i],                                                    #if I arose
-      "conditional simple"          => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i],                                                   #I would arise
-      "will-future"                 => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i],                                                   #I'll arise
-      "past progressive"            => [/\b(was|were)(n't)*\s+(not\s+)*search/i],                                                     #I was searching
-      "future perfect"              => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i],                                                     #I'll have arisen
-      "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i],                              #I have been searching
-      "simple past"                 => [/\b(i|you|he|she|it|they)\s+search/i],                #you chose
-      "simple present"              => [
-                                      /\b(I|you|they|we|to)\s+search\b/i,                   #arrive
-                                      /\b(he|she|it)\s+search(s)?\b/i,                         #he arrives
-                                      /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i        #adapts it
-                                     ],
-      "present progressive"         => [/^search\b/i],                                        #searching
-      "present perfect"             => [/^search\b/i],                                        #arisen
-      "simple past"                 => [/^search\b/i]                                          #arose
-    }
-    def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
-      if match = phrase.match(/#{regex}/i)
-        if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
-          ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
-          print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
-          list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
-          list << "#{tense_label}:#{match.to_s}"                                    unless @format.match(/audioverb/)
-        end
-      end
-      return phrase, list, ranges
-    end
+class ClauseExtractor
+  require "conjugations"
+  pronouns        = "(i|you|he|she|it|they|we|there)"
+  present_perfect = "(already|ever|for|just|never|since|yet)"
+  have_has        = "(have|has|haven't|hasn't)"
+  contractions    = "it's|he's|she's|[a-z]{1,4}'ve"
+  @tense_regexes = {
+    'third'      => {
+      "simple present"
+                                      => [
+                                            /\b(he|she|it)\s+search(s)?\b/i,                         #he arrives
+                                            /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i        #adapts it
+                                         ]
+                    },
+    'infinitive' => {
+        "simple present"              => [
+                                            /\b((I|you|they|we|to)\s+)*+search\b/i,                   #arrive
+                                       ],
+        "subjunctive future"          => [
+                                            /\bif\s+#{pronouns}\s+were\s+(not\s+)*to\s+(not\s+)*search/i,   #if I were to arise
+                                            /\bif\s+#{pronouns}\s+should\s+(not\s+)*search/i                #If I should arise
+                                         ],
+        "subjunctive present"         => [  /\bthat\s+#{pronouns}\s+(not\s+)*search/i],                       #that we arrive
+        "conditional simple"          => [  /\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)(\s+not)*\s+search/i],    #I would arise
+        "will-future"                 => [  /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i],    #I'll arise
+        "going to-future"             => [  /\b(#{pronouns}\s+)*(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i],   #they are going to cry
+                      },
+    'gerund' => {
+      "conditional perfect progressive" => [/\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i],               #I would have been searching
+      "present perfect progressive"     => [/\b(#{pronouns}\s+)*([a-z]{1,4}'ve|have|has)(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i],   #I have been searching
+      "conditional progressive"         => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i],   #I would be searching (I'd)
+      "future progressive"              => [
+                                          /\b((#{pronouns})\s+)*(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i,
+                                          /\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
+                                          ], #I will be searching
+      "past progressive"                => [/\b(#{pronouns}\s+)*(was|were)(n't)*\s+(not\s+)*search/i],            #I was searching
+      "present progressive"             => [/\b(#{pronouns}\s+)*((am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+)*(not\s+)*search/i],      #I'm rising
+                },
+    "past-participle" => {
+      "conditional perfect"             => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i],                                 #I would not search
+      "future perfect"                  => [/\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)\s+have\s+search/i],                      #I'll have arisen
+      "past perfect"                    => [/\b(#{pronouns}\s+)*(had|[a-z]{1,4}'d)\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i],  #I had arisen
+      "present perfect"                 => [/\b(#{pronouns}\s+)*#{have_has}\s+(#{pronouns}\s+)*(not\s+)*((just|already|ever)\s+)*search/],             #Have you seen
+      "subjunctive past"                => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i],                    #if I arose
+      "simple past"                     => [/\b#{pronouns}\s+search/i]               #you chose
+    },
-    def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
-      @format         = format
-      phrase         = phrase.downcase
-      list           = format.match("audioverb") ? Hash.new : Array.new
-      @verbs        ||= get_verbs
-      @tiempos      ||= get_tiempos
-      @id_tiempo    ||= get_id_tiempos
-      @tense_id     ||= get_tenses
-      @con_id       ||= get_con_id
-      ranges       = []
-      # ####For generating conjugations.rb content
-      # @conjugations = get_conjugations
-      #  @conjugations.each do |k,v|
-      #    @con = v['con']
-      #    @con_id[@con]   = k  #id
-      #    #print "'#{@con}' => #{k},\n"
-      #    #print "'#{@con}' => #{v['verb_id']},\n"
-      #    #print "'#{@con}' => #{v['tiempo_id']},\n"
-      #    #@tiempos[@con]  = v['tiempo_id']  #tiempo_id
-      #    #@verbs[@con]    = v['verb_id']  #verb_id
-      #  end
-      a=Array.new
-      a = phrase.split(/\s+/)
-      a.length.times do |i|
-        a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
-          if @con_id[a[i]] then  #if word matches a conjugation
-          @tense_regexes.each do |k,v|
-            v.each do |regex|
-             regex = regex.to_s.gsub("search", "#{a[i]}")
-             phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
+# #    "present perfect"             => [/^\s*search\b/i],                                        #arisen
+#   #  "simple past"                 => [/^\s*search\b/i]                                          #arose
+  }
+  def self.get_match_start_index(verb, match, index)
+    #get start position of last occurence of verb in match
+    verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
+    #count spaces between match start and verb_index_in_match and subtract that from index
+    lo = index - match[0,verb_index_in_match].split(/\s+/).size
+    hi = lo + match[0,verb_index_in_match].split(/\s+/).size
+    return lo, hi
+  end
+  def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
+    @format        = format
+    phrase         = phrase.downcase
+    #list           = format.match("audioverb") ? Hash.new : Array.new
+    list           = Hash.new
+    @verbs        ||= get_verbs
+    @tiempos      ||= get_tiempos
+    @id_tiempo    ||= get_id_tiempos
+    @tense_id     ||= get_tenses
+    @con_id       ||= get_con_id
+    ranges       = []
+    a=Array.new
+    a = phrase.split(/\s+/)
+    a.length.times do |i|
+      a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
+        if @con_id[a[i]] then  #if word matches a conjugation
+        @tense_regexes.each do |k,v|
+          if k.match(/#{@id_tiempo[@tiempos[a[i]]]}/)
+            v.each do |tense, regex_array|
+              regex_array.each do |regex|
+                regex = regex.to_s.gsub("search", "#{a[i]}")
+                phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], tense, i, ranges)
+              end
             end
           end
-        end #end if is conjugation
-      end#end of looping through each cap
-      @list
+        end
+      end #end if is conjugation
+    end#end of looping through each cap
+    list.each do |k, v|
+     list.delete(k) unless ranges.include?(v)
     end
+    print "#{list}\n"
+    list
   end
+   def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges)
+     if match = phrase.match(/#{regex}/i)
+       match = match.to_s
+       lo, hi = get_match_start_index(verb, match, index)
+       ranges = prioritize_ranges(ranges, lo, hi,match)
+       list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) if @format.match(/audioverb/)
+       list["#{tense_label} :" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) unless @format.match(/audioverb/)
+     end
+     return phrase, list, ranges
+   end
+  def self.prioritize_ranges(ranges, lo, hi,match)
+   range = (lo..hi)
+   ranges.size.times.each do |r|
+     #replace old range with new one if start is same point and new range is longer
+     if ranges[r].begin == lo and ranges[r].count < range.count
+       ranges[r] = range
+     elsif (range.include?(ranges[r].begin) || range.include?(ranges[r].end)) && range.count > ranges[r].count
+       ranges.delete_at(r)
+     end
+   end
+   #add range to ranges if it is not already included in an existing range
+   if ranges.each.select{|r| r.include?(lo) || r.include?(hi)}.size == 0
+     ranges << range
+   end
+   ranges
+ end
 end
+# ####For generating conjugations.rb content
+# @conjugations = get_conjugations
+#  @conjugations.each do |k,v|
+#    @con = v['con']
+#    @con_id[@con]   = k  #id
+#    #print "'#{@con}' => #{k},\n"
+#    #print "'#{@con}' => #{v['verb_id']},\n"
+#    #print "'#{@con}' => #{v['tiempo_id']},\n"
+#    #@tiempos[@con]  = v['tiempo_id']  #tiempo_id
+#    #@verbs[@con]    = v['verb_id']  #verb_id
+#  end

data/lib/conjugations.rb CHANGED Viewed

@@ -2,10 +2,11 @@
 def get_id_tiempos
   id_tiempo = {
-    5 => "infinitive",
-    6 => "gerund",
-    7 => "participle",
-    8 => "past"
+    6 => "infinitive",
+    7 => "gerund",
+    8 => "participle",
+    9 => "past",
+    10 => "third"
   }
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: clause_extractor
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-08-25 00:00:00.000000000 Z
+date: 2012-09-08 00:00:00.000000000 Z
 dependencies: []
 description: A simple hello world gem
 email: mikefabrikant@gmail.com