clause_extractor 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/clause_extractor.rb +55 -50
  2. data/lib/conjugations.rb +53415 -40651
  3. metadata +2 -2
@@ -1,67 +1,83 @@
1
1
  module ClauseExtractor
2
2
  class Clause
3
- require "conjugations"
3
+ require "./conjugations2"
4
4
 
5
5
  @tense_regexes = {
6
- "subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
7
- "subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
8
- "conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
9
- "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
10
- "conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
11
- "future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
12
- "past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
13
- "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
14
- "conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
15
- "future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
16
- "conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
17
- "will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
18
- "going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
19
- "present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
20
- "present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
21
- "past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
22
- "subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
23
- "subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
24
- "simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
25
- "simple present" => [
6
+ "present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
7
+ "future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
8
+ "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
9
+ "subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
10
+ "going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
11
+ "present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
12
+ "subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
13
+ "conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
14
+ "past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
15
+ "subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
16
+ "conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
17
+ "conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
18
+ "subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
19
+ "conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
20
+ "will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
21
+ "past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
22
+ "future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
23
+ "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
24
+ "simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
25
+ "simple present" => [
26
26
  /\b(I|you|they|we|to)\s+search\b/i, #arrive
27
- /\b(he|she|it)\s+searchs\b/i, #he arrives
28
- /\bsearchs?\s+(it|them|him|her|me|you|us)\b/i, #adapts it
29
- /\bsearch\s+(it|them|him|her|me|you|us)\b/i], #adopt it
30
- "present progressive" => [/^search\b/i], #searching
31
- "present perfect" => [/^search\b/i], #arisen
32
- "simple past" =>[/^search\b/i] #arose
27
+ /\b(he|she|it)\s+search(s)?\b/i, #he arrives
28
+ /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
29
+ ],
30
+ "present progressive" => [/^search\b/i], #searching
31
+ "present perfect" => [/^search\b/i], #arisen
32
+ "simple past" => [/^search\b/i] #arose
33
33
  }
34
34
 
35
- def self.scan_phrase(regex, a_i, tense_label)
36
- # print "#{@phrase} #{tense_label} .. #{regex}\n"
37
- if match = @phrase.match(/#{regex}/i)
38
- @list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
39
- @list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
35
+ def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
36
+ if match = phrase.match(/#{regex}/i)
37
+ if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
38
+ ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
39
+ print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
40
+ list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
41
+ list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
42
+ end
40
43
  end
41
- return @phrase, @list
44
+ return phrase, list, ranges
42
45
  end
43
46
 
44
47
  def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
45
- @format = format
46
- @phrase = phrase.downcase
47
- @list = format.match("audioverb") ? Hash.new : Array.new
48
-
48
+ @format = format
49
+ phrase = phrase.downcase
50
+ list = format.match("audioverb") ? Hash.new : Array.new
49
51
  @verbs ||= get_verbs
50
52
  @tiempos ||= get_tiempos
51
53
  @id_tiempo ||= get_id_tiempos
52
54
  @tense_id ||= get_tenses
53
55
  @con_id ||= get_con_id
56
+ ranges = []
57
+
54
58
 
59
+ # ####For generating conjugations.rb content
60
+ # @conjugations = get_conjugations
61
+ # @conjugations.each do |k,v|
62
+ # @con = v['con']
63
+ # @con_id[@con] = k #id
64
+ # #print "'#{@con}' => #{k},\n"
65
+ # #print "'#{@con}' => #{v['verb_id']},\n"
66
+ # #print "'#{@con}' => #{v['tiempo_id']},\n"
67
+ # #@tiempos[@con] = v['tiempo_id'] #tiempo_id
68
+ # #@verbs[@con] = v['verb_id'] #verb_id
69
+ # end
55
70
 
56
71
  a=Array.new
57
72
  a = phrase.split(/\s+/)
58
- for i in a.length.downto 0 do
73
+ a.length.times do |i|
59
74
  a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
60
75
  if @con_id[a[i]] then #if word matches a conjugation
61
76
  @tense_regexes.each do |k,v|
62
77
  v.each do |regex|
63
78
  regex = regex.to_s.gsub("search", "#{a[i]}")
64
- scan_phrase(regex, a[i], k)
79
+
80
+ phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
65
81
  end
66
82
  end
67
83
  end #end if is conjugation
@@ -70,14 +86,3 @@ module ClauseExtractor
70
86
  end
71
87
  end
72
88
  end
73
- ####For generating conjugations.rb content
74
- #@conjugations = get_conjugations
75
- # @conjugations.each do |k,v|
76
- # @con = v['con']
77
- # @con_id[@con] = k #id
78
- # # print "'#{@con}' => #{k},\n"
79
- # # #print "'#{@con}' => #{v['verb_id']},\n"
80
- # # #print "'#{@con}' => #{v['tiempo_id']},\n"
81
- # # @tiempos[@con] = v['tiempo_id'] #tiempo_id
82
- # # @verbs[@con] = v['verb_id'] #verb_id
83
- # end