clause_extractor 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/clause_extractor.rb +55 -50
  2. data/lib/conjugations.rb +53415 -40651
  3. metadata +2 -2
@@ -1,67 +1,83 @@
1
1
  module ClauseExtractor
2
2
  class Clause
3
- require "conjugations"
3
+ require "./conjugations2"
4
4
 
5
5
  @tense_regexes = {
6
- "subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
7
- "subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
8
- "conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
9
- "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
10
- "conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
11
- "future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
12
- "past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
13
- "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
14
- "conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
15
- "future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
16
- "conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
17
- "will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
18
- "going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
19
- "present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
20
- "present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
21
- "past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
22
- "subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
23
- "subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
24
- "simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
25
- "simple present" => [
6
+ "present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
7
+ "future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
8
+ "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
9
+ "subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
10
+ "going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
11
+ "present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
12
+ "subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
13
+ "conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
14
+ "past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
15
+ "subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
16
+ "conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
17
+ "conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
18
+ "subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
19
+ "conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
20
+ "will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
21
+ "past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
22
+ "future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
23
+ "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
24
+ "simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
25
+ "simple present" => [
26
26
  /\b(I|you|they|we|to)\s+search\b/i, #arrive
27
- /\b(he|she|it)\s+searchs\b/i, #he arrives
28
- /\bsearchs?\s+(it|them|him|her|me|you|us)\b/i, #adapts it
29
- /\bsearch\s+(it|them|him|her|me|you|us)\b/i], #adopt it
30
- "present progressive" => [/^search\b/i], #searching
31
- "present perfect" => [/^search\b/i], #arisen
32
- "simple past" =>[/^search\b/i] #arose
27
+ /\b(he|she|it)\s+search(s)?\b/i, #he arrives
28
+ /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
29
+ ],
30
+ "present progressive" => [/^search\b/i], #searching
31
+ "present perfect" => [/^search\b/i], #arisen
32
+ "simple past" => [/^search\b/i] #arose
33
33
  }
34
34
 
35
- def self.scan_phrase(regex, a_i, tense_label)
36
- # print "#{@phrase} #{tense_label} .. #{regex}\n"
37
- if match = @phrase.match(/#{regex}/i)
38
- @list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
39
- @list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
35
+ def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
36
+ if match = phrase.match(/#{regex}/i)
37
+ if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
38
+ ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
39
+ print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
40
+ list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
41
+ list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
42
+ end
40
43
  end
41
- return @phrase, @list
44
+ return phrase, list, ranges
42
45
  end
43
46
 
44
47
  def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
45
- @format = format
46
- @phrase = phrase.downcase
47
- @list = format.match("audioverb") ? Hash.new : Array.new
48
-
48
+ @format = format
49
+ phrase = phrase.downcase
50
+ list = format.match("audioverb") ? Hash.new : Array.new
49
51
  @verbs ||= get_verbs
50
52
  @tiempos ||= get_tiempos
51
53
  @id_tiempo ||= get_id_tiempos
52
54
  @tense_id ||= get_tenses
53
55
  @con_id ||= get_con_id
56
+ ranges = []
57
+
54
58
 
59
+ # ####For generating conjugations.rb content
60
+ # @conjugations = get_conjugations
61
+ # @conjugations.each do |k,v|
62
+ # @con = v['con']
63
+ # @con_id[@con] = k #id
64
+ # #print "'#{@con}' => #{k},\n"
65
+ # #print "'#{@con}' => #{v['verb_id']},\n"
66
+ # #print "'#{@con}' => #{v['tiempo_id']},\n"
67
+ # #@tiempos[@con] = v['tiempo_id'] #tiempo_id
68
+ # #@verbs[@con] = v['verb_id'] #verb_id
69
+ # end
55
70
 
56
71
  a=Array.new
57
72
  a = phrase.split(/\s+/)
58
- for i in a.length.downto 0 do
73
+ a.length.times do |i|
59
74
  a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
60
75
  if @con_id[a[i]] then #if word matches a conjugation
61
76
  @tense_regexes.each do |k,v|
62
77
  v.each do |regex|
63
78
  regex = regex.to_s.gsub("search", "#{a[i]}")
64
- scan_phrase(regex, a[i], k)
79
+
80
+ phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
65
81
  end
66
82
  end
67
83
  end #end if is conjugation
@@ -70,14 +86,3 @@ module ClauseExtractor
70
86
  end
71
87
  end
72
88
  end
73
- ####For generating conjugations.rb content
74
- #@conjugations = get_conjugations
75
- # @conjugations.each do |k,v|
76
- # @con = v['con']
77
- # @con_id[@con] = k #id
78
- # # print "'#{@con}' => #{k},\n"
79
- # # #print "'#{@con}' => #{v['verb_id']},\n"
80
- # # #print "'#{@con}' => #{v['tiempo_id']},\n"
81
- # # @tiempos[@con] = v['tiempo_id'] #tiempo_id
82
- # # @verbs[@con] = v['verb_id'] #verb_id
83
- # end