clause_extractor 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,88 +1,158 @@
1
- module ClauseExtractor
2
- class Clause
3
- require "./conjugations2"
4
-
5
- @tense_regexes = {
6
- "present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
7
- "future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
8
- "present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
9
- "subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
10
- "going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
11
- "present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
12
- "subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
13
- "conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
14
- "past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
15
- "subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
16
- "conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
17
- "conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
18
- "subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
19
- "conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
20
- "will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
21
- "past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
22
- "future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
23
- "present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
24
- "simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
25
- "simple present" => [
26
- /\b(I|you|they|we|to)\s+search\b/i, #arrive
27
- /\b(he|she|it)\s+search(s)?\b/i, #he arrives
28
- /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
29
- ],
30
- "present progressive" => [/^search\b/i], #searching
31
- "present perfect" => [/^search\b/i], #arisen
32
- "simple past" => [/^search\b/i] #arose
33
- }
34
-
35
- def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
36
- if match = phrase.match(/#{regex}/i)
37
- if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
38
- ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
39
- print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
40
- list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
41
- list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
42
- end
43
- end
44
- return phrase, list, ranges
45
- end
1
+ class ClauseExtractor
2
+ require "conjugations"
3
+
4
+ pronouns = "(i|you|he|she|it|they|we|there)"
5
+ present_perfect = "(already|ever|for|just|never|since|yet)"
6
+ have_has = "(have|has|haven't|hasn't)"
7
+ contractions = "it's|he's|she's|[a-z]{1,4}'ve"
8
+
9
+
10
+
11
+ @tense_regexes = {
12
+
13
+ 'third' => {
14
+ "simple present"
15
+ => [
16
+ /\b(he|she|it)\s+search(s)?\b/i, #he arrives
17
+ /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
18
+ ]
19
+ },
20
+ 'infinitive' => {
21
+ "simple present" => [
22
+ /\b((I|you|they|we|to)\s+)*+search\b/i, #arrive
23
+ ],
24
+
25
+ "subjunctive future" => [
26
+ /\bif\s+#{pronouns}\s+were\s+(not\s+)*to\s+(not\s+)*search/i, #if I were to arise
27
+ /\bif\s+#{pronouns}\s+should\s+(not\s+)*search/i #If I should arise
28
+ ],
29
+ "subjunctive present" => [ /\bthat\s+#{pronouns}\s+(not\s+)*search/i], #that we arrive
30
+
31
+ "conditional simple" => [ /\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
32
+
33
+ "will-future" => [ /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
34
+
35
+ "going to-future" => [ /\b(#{pronouns}\s+)*(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
36
+ },
37
+ 'gerund' => {
38
+ "conditional perfect progressive" => [/\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
39
+ "present perfect progressive" => [/\b(#{pronouns}\s+)*([a-z]{1,4}'ve|have|has)(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i], #I have been searching
40
+ "conditional progressive" => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
41
+ "future progressive" => [
42
+ /\b((#{pronouns})\s+)*(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i,
43
+ /\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
44
+ ], #I will be searching
45
+ "past progressive" => [/\b(#{pronouns}\s+)*(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
46
+
47
+ "present progressive" => [/\b(#{pronouns}\s+)*((am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+)*(not\s+)*search/i], #I'm rising
48
+
49
+ },
50
+ "past-participle" => {
51
+ "conditional perfect" => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
52
+ "future perfect" => [/\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
53
+ "past perfect" => [/\b(#{pronouns}\s+)*(had|[a-z]{1,4}'d)\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i], #I had arisen
54
+ "present perfect" => [/\b(#{pronouns}\s+)*#{have_has}\s+(#{pronouns}\s+)*(not\s+)*((just|already|ever)\s+)*search/], #Have you seen
55
+ "subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
56
+ "simple past" => [/\b#{pronouns}\s+search/i] #you chose
57
+ },
58
+
59
+
46
60
 
47
- def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
48
- @format = format
49
- phrase = phrase.downcase
50
- list = format.match("audioverb") ? Hash.new : Array.new
51
- @verbs ||= get_verbs
52
- @tiempos ||= get_tiempos
53
- @id_tiempo ||= get_id_tiempos
54
- @tense_id ||= get_tenses
55
- @con_id ||= get_con_id
56
- ranges = []
57
-
58
-
59
- # ####For generating conjugations.rb content
60
- # @conjugations = get_conjugations
61
- # @conjugations.each do |k,v|
62
- # @con = v['con']
63
- # @con_id[@con] = k #id
64
- # #print "'#{@con}' => #{k},\n"
65
- # #print "'#{@con}' => #{v['verb_id']},\n"
66
- # #print "'#{@con}' => #{v['tiempo_id']},\n"
67
- # #@tiempos[@con] = v['tiempo_id'] #tiempo_id
68
- # #@verbs[@con] = v['verb_id'] #verb_id
69
- # end
70
-
71
- a=Array.new
72
- a = phrase.split(/\s+/)
73
- a.length.times do |i|
74
- a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
75
- if @con_id[a[i]] then #if word matches a conjugation
76
- @tense_regexes.each do |k,v|
77
- v.each do |regex|
78
- regex = regex.to_s.gsub("search", "#{a[i]}")
79
-
80
- phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
61
+
62
+
63
+
64
+
65
+ # # "present perfect" => [/^\s*search\b/i], #arisen
66
+ # # "simple past" => [/^\s*search\b/i] #arose
67
+ }
68
+
69
+
70
+
71
+ def self.get_match_start_index(verb, match, index)
72
+ #get start position of last occurence of verb in match
73
+ verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
74
+ #count spaces between match start and verb_index_in_match and subtract that from index
75
+ lo = index - match[0,verb_index_in_match].split(/\s+/).size
76
+ hi = lo + match[0,verb_index_in_match].split(/\s+/).size
77
+ return lo, hi
78
+ end
79
+
80
+ def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
81
+ @format = format
82
+ phrase = phrase.downcase
83
+ #list = format.match("audioverb") ? Hash.new : Array.new
84
+ list = Hash.new
85
+ @verbs ||= get_verbs
86
+ @tiempos ||= get_tiempos
87
+ @id_tiempo ||= get_id_tiempos
88
+ @tense_id ||= get_tenses
89
+ @con_id ||= get_con_id
90
+ ranges = []
91
+ a=Array.new
92
+ a = phrase.split(/\s+/)
93
+ a.length.times do |i|
94
+ a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
95
+ if @con_id[a[i]] then #if word matches a conjugation
96
+ @tense_regexes.each do |k,v|
97
+ if k.match(/#{@id_tiempo[@tiempos[a[i]]]}/)
98
+ v.each do |tense, regex_array|
99
+ regex_array.each do |regex|
100
+ regex = regex.to_s.gsub("search", "#{a[i]}")
101
+ phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], tense, i, ranges)
102
+
103
+ end
81
104
  end
82
105
  end
83
- end #end if is conjugation
84
- end#end of looping through each cap
85
- @list
106
+ end
107
+ end #end if is conjugation
108
+ end#end of looping through each cap
109
+ list.each do |k, v|
110
+ list.delete(k) unless ranges.include?(v)
86
111
  end
112
+ print "#{list}\n"
113
+ list
87
114
  end
115
+
116
+ def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges)
117
+ if match = phrase.match(/#{regex}/i)
118
+ match = match.to_s
119
+ lo, hi = get_match_start_index(verb, match, index)
120
+ ranges = prioritize_ranges(ranges, lo, hi,match)
121
+ list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) if @format.match(/audioverb/)
122
+ list["#{tense_label} :" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) unless @format.match(/audioverb/)
123
+ end
124
+ return phrase, list, ranges
125
+ end
126
+
127
+ def self.prioritize_ranges(ranges, lo, hi,match)
128
+ range = (lo..hi)
129
+
130
+ ranges.size.times.each do |r|
131
+ #replace old range with new one if start is same point and new range is longer
132
+ if ranges[r].begin == lo and ranges[r].count < range.count
133
+ ranges[r] = range
134
+ elsif (range.include?(ranges[r].begin) || range.include?(ranges[r].end)) && range.count > ranges[r].count
135
+ ranges.delete_at(r)
136
+ end
137
+ end
138
+ #add range to ranges if it is not already included in an existing range
139
+ if ranges.each.select{|r| r.include?(lo) || r.include?(hi)}.size == 0
140
+ ranges << range
141
+ end
142
+
143
+ ranges
144
+ end
88
145
  end
146
+
147
+ # ####For generating conjugations.rb content
148
+ # @conjugations = get_conjugations
149
+ # @conjugations.each do |k,v|
150
+ # @con = v['con']
151
+ # @con_id[@con] = k #id
152
+ # #print "'#{@con}' => #{k},\n"
153
+ # #print "'#{@con}' => #{v['verb_id']},\n"
154
+ # #print "'#{@con}' => #{v['tiempo_id']},\n"
155
+ # #@tiempos[@con] = v['tiempo_id'] #tiempo_id
156
+ # #@verbs[@con] = v['verb_id'] #verb_id
157
+ # end
158
+
data/lib/conjugations.rb CHANGED
@@ -2,10 +2,11 @@
2
2
 
3
3
  def get_id_tiempos
4
4
  id_tiempo = {
5
- 5 => "infinitive",
6
- 6 => "gerund",
7
- 7 => "participle",
8
- 8 => "past"
5
+ 6 => "infinitive",
6
+ 7 => "gerund",
7
+ 8 => "participle",
8
+ 9 => "past",
9
+ 10 => "third"
9
10
  }
10
11
  end
11
12
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clause_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-25 00:00:00.000000000 Z
12
+ date: 2012-09-08 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: A simple hello world gem
15
15
  email: mikefabrikant@gmail.com