RubyGems - clause_extractor - Versions diffs - 0.0.5 → 0.0.6 - Mend

clause_extractor 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

data/lib/clause_extractor.rb CHANGED Viewed

@@ -1,88 +1,158 @@
-module ClauseExtractor
-  class Clause
-    require "./conjugations2"
-    @tense_regexes = {
-      "present perfect"             => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
-      "future progressive"          => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
-      "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i],            #I have been searching
-      "subjunctive future"          => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i],   #if I were to arise
-      "going to-future"             => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i],                     #they are going to cry
-      "present progressive"         => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i],                         #I'm rising
-      "subjunctive present"         => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i],    #if I should arise
-      "conditional perfect"         => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i],                                 #I would not search
-      "past perfect"                => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i],                                             #I had arisen
-      "subjunctive present"         => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i],                                            #that we arrive
-"conditional perfect progressive"   => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i],               #I would have been searching
-      "conditional progressive"     => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i],                #I would be searching (I'd)
-      "subjunctive past"            => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i],                                                    #if I arose
-      "conditional simple"          => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i],                                                   #I would arise
-      "will-future"                 => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i],                                                   #I'll arise
-      "past progressive"            => [/\b(was|were)(n't)*\s+(not\s+)*search/i],                                                     #I was searching
-      "future perfect"              => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i],                                                     #I'll have arisen
-      "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i],                              #I have been searching
-      "simple past"                 => [/\b(i|you|he|she|it|they)\s+search/i],                #you chose
-      "simple present"              => [
-                                      /\b(I|you|they|we|to)\s+search\b/i,                   #arrive
-                                      /\b(he|she|it)\s+search(s)?\b/i,                         #he arrives
-                                      /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i        #adapts it
-                                     ],
-      "present progressive"         => [/^search\b/i],                                        #searching
-      "present perfect"             => [/^search\b/i],                                        #arisen
-      "simple past"                 => [/^search\b/i]                                          #arose
-    }
-    def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
-      if match = phrase.match(/#{regex}/i)
-        if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
-          ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
-          print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
-          list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
-          list << "#{tense_label}:#{match.to_s}"                                    unless @format.match(/audioverb/)
-        end
-      end
-      return phrase, list, ranges
-    end
+class ClauseExtractor
+  require "conjugations"
+  pronouns        = "(i|you|he|she|it|they|we|there)"
+  present_perfect = "(already|ever|for|just|never|since|yet)"
+  have_has        = "(have|has|haven't|hasn't)"
+  contractions    = "it's|he's|she's|[a-z]{1,4}'ve"
+  @tense_regexes = {
+    'third'      => {
+      "simple present"
+                                      => [
+                                            /\b(he|she|it)\s+search(s)?\b/i,                         #he arrives
+                                            /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i        #adapts it
+                                         ]
+                    },
+    'infinitive' => {
+        "simple present"              => [
+                                            /\b((I|you|they|we|to)\s+)*+search\b/i,                   #arrive
+                                       ],
+        "subjunctive future"          => [
+                                            /\bif\s+#{pronouns}\s+were\s+(not\s+)*to\s+(not\s+)*search/i,   #if I were to arise
+                                            /\bif\s+#{pronouns}\s+should\s+(not\s+)*search/i                #If I should arise
+                                         ],
+        "subjunctive present"         => [  /\bthat\s+#{pronouns}\s+(not\s+)*search/i],                       #that we arrive
+        "conditional simple"          => [  /\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)(\s+not)*\s+search/i],    #I would arise
+        "will-future"                 => [  /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i],    #I'll arise
+        "going to-future"             => [  /\b(#{pronouns}\s+)*(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i],   #they are going to cry
+                      },
+    'gerund' => {
+      "conditional perfect progressive" => [/\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i],               #I would have been searching
+      "present perfect progressive"     => [/\b(#{pronouns}\s+)*([a-z]{1,4}'ve|have|has)(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i],   #I have been searching
+      "conditional progressive"         => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i],   #I would be searching (I'd)
+      "future progressive"              => [
+                                          /\b((#{pronouns})\s+)*(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i,
+                                          /\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
+                                          ], #I will be searching
+      "past progressive"                => [/\b(#{pronouns}\s+)*(was|were)(n't)*\s+(not\s+)*search/i],            #I was searching
+      "present progressive"             => [/\b(#{pronouns}\s+)*((am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+)*(not\s+)*search/i],      #I'm rising
+                },
+    "past-participle" => {
+      "conditional perfect"             => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i],                                 #I would not search
+      "future perfect"                  => [/\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)\s+have\s+search/i],                      #I'll have arisen
+      "past perfect"                    => [/\b(#{pronouns}\s+)*(had|[a-z]{1,4}'d)\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i],  #I had arisen
+      "present perfect"                 => [/\b(#{pronouns}\s+)*#{have_has}\s+(#{pronouns}\s+)*(not\s+)*((just|already|ever)\s+)*search/],             #Have you seen
+      "subjunctive past"                => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i],                    #if I arose
+      "simple past"                     => [/\b#{pronouns}\s+search/i]               #you chose
+    },
-    def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
-      @format         = format
-      phrase         = phrase.downcase
-      list           = format.match("audioverb") ? Hash.new : Array.new
-      @verbs        ||= get_verbs
-      @tiempos      ||= get_tiempos
-      @id_tiempo    ||= get_id_tiempos
-      @tense_id     ||= get_tenses
-      @con_id       ||= get_con_id
-      ranges       = []
-      # ####For generating conjugations.rb content
-      # @conjugations = get_conjugations
-      #  @conjugations.each do |k,v|
-      #    @con = v['con']
-      #    @con_id[@con]   = k  #id
-      #    #print "'#{@con}' => #{k},\n"
-      #    #print "'#{@con}' => #{v['verb_id']},\n"
-      #    #print "'#{@con}' => #{v['tiempo_id']},\n"
-      #    #@tiempos[@con]  = v['tiempo_id']  #tiempo_id
-      #    #@verbs[@con]    = v['verb_id']  #verb_id
-      #  end
-      a=Array.new
-      a = phrase.split(/\s+/)
-      a.length.times do |i|
-        a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
-          if @con_id[a[i]] then  #if word matches a conjugation
-          @tense_regexes.each do |k,v|
-            v.each do |regex|
-             regex = regex.to_s.gsub("search", "#{a[i]}")
-             phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
+# #    "present perfect"             => [/^\s*search\b/i],                                        #arisen
+#   #  "simple past"                 => [/^\s*search\b/i]                                          #arose
+  }
+  def self.get_match_start_index(verb, match, index)
+    #get start position of last occurence of verb in match
+    verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
+    #count spaces between match start and verb_index_in_match and subtract that from index
+    lo = index - match[0,verb_index_in_match].split(/\s+/).size
+    hi = lo + match[0,verb_index_in_match].split(/\s+/).size
+    return lo, hi
+  end
+  def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
+    @format        = format
+    phrase         = phrase.downcase
+    #list           = format.match("audioverb") ? Hash.new : Array.new
+    list           = Hash.new
+    @verbs        ||= get_verbs
+    @tiempos      ||= get_tiempos
+    @id_tiempo    ||= get_id_tiempos
+    @tense_id     ||= get_tenses
+    @con_id       ||= get_con_id
+    ranges       = []
+    a=Array.new
+    a = phrase.split(/\s+/)
+    a.length.times do |i|
+      a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
+        if @con_id[a[i]] then  #if word matches a conjugation
+        @tense_regexes.each do |k,v|
+          if k.match(/#{@id_tiempo[@tiempos[a[i]]]}/)
+            v.each do |tense, regex_array|
+              regex_array.each do |regex|
+                regex = regex.to_s.gsub("search", "#{a[i]}")
+                phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], tense, i, ranges)
+              end
             end
           end
-        end #end if is conjugation
-      end#end of looping through each cap
-      @list
+        end
+      end #end if is conjugation
+    end#end of looping through each cap
+    list.each do |k, v|
+     list.delete(k) unless ranges.include?(v)
     end
+    print "#{list}\n"
+    list
   end
+   def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges)
+     if match = phrase.match(/#{regex}/i)
+       match = match.to_s
+       lo, hi = get_match_start_index(verb, match, index)
+       ranges = prioritize_ranges(ranges, lo, hi,match)
+       list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) if @format.match(/audioverb/)
+       list["#{tense_label} :" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) unless @format.match(/audioverb/)
+     end
+     return phrase, list, ranges
+   end
+  def self.prioritize_ranges(ranges, lo, hi,match)
+   range = (lo..hi)
+   ranges.size.times.each do |r|
+     #replace old range with new one if start is same point and new range is longer
+     if ranges[r].begin == lo and ranges[r].count < range.count
+       ranges[r] = range
+     elsif (range.include?(ranges[r].begin) || range.include?(ranges[r].end)) && range.count > ranges[r].count
+       ranges.delete_at(r)
+     end
+   end
+   #add range to ranges if it is not already included in an existing range
+   if ranges.each.select{|r| r.include?(lo) || r.include?(hi)}.size == 0
+     ranges << range
+   end
+   ranges
+ end
 end
+# ####For generating conjugations.rb content
+# @conjugations = get_conjugations
+#  @conjugations.each do |k,v|
+#    @con = v['con']
+#    @con_id[@con]   = k  #id
+#    #print "'#{@con}' => #{k},\n"
+#    #print "'#{@con}' => #{v['verb_id']},\n"
+#    #print "'#{@con}' => #{v['tiempo_id']},\n"
+#    #@tiempos[@con]  = v['tiempo_id']  #tiempo_id
+#    #@verbs[@con]    = v['verb_id']  #verb_id
+#  end

data/lib/conjugations.rb CHANGED Viewed

@@ -2,10 +2,11 @@
 def get_id_tiempos
   id_tiempo = {
-    5 => "infinitive",
-    6 => "gerund",
-    7 => "participle",
-    8 => "past"
+    6 => "infinitive",
+    7 => "gerund",
+    8 => "participle",
+    9 => "past",
+    10 => "third"
   }
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: clause_extractor
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-08-25 00:00:00.000000000 Z
+date: 2012-09-08 00:00:00.000000000 Z
 dependencies: []
 description: A simple hello world gem
 email: mikefabrikant@gmail.com