clause_extractor 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/clause_extractor.rb +152 -82
- data/lib/conjugations.rb +5 -4
- metadata +2 -2
data/lib/clause_extractor.rb
CHANGED
@@ -1,88 +1,158 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
"
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
1
|
+
class ClauseExtractor
|
2
|
+
require "conjugations"
|
3
|
+
|
4
|
+
pronouns = "(i|you|he|she|it|they|we|there)"
|
5
|
+
present_perfect = "(already|ever|for|just|never|since|yet)"
|
6
|
+
have_has = "(have|has|haven't|hasn't)"
|
7
|
+
contractions = "it's|he's|she's|[a-z]{1,4}'ve"
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
@tense_regexes = {
|
12
|
+
|
13
|
+
'third' => {
|
14
|
+
"simple present"
|
15
|
+
=> [
|
16
|
+
/\b(he|she|it)\s+search(s)?\b/i, #he arrives
|
17
|
+
/\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
|
18
|
+
]
|
19
|
+
},
|
20
|
+
'infinitive' => {
|
21
|
+
"simple present" => [
|
22
|
+
/\b((I|you|they|we|to)\s+)*+search\b/i, #arrive
|
23
|
+
],
|
24
|
+
|
25
|
+
"subjunctive future" => [
|
26
|
+
/\bif\s+#{pronouns}\s+were\s+(not\s+)*to\s+(not\s+)*search/i, #if I were to arise
|
27
|
+
/\bif\s+#{pronouns}\s+should\s+(not\s+)*search/i #If I should arise
|
28
|
+
],
|
29
|
+
"subjunctive present" => [ /\bthat\s+#{pronouns}\s+(not\s+)*search/i], #that we arrive
|
30
|
+
|
31
|
+
"conditional simple" => [ /\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
|
32
|
+
|
33
|
+
"will-future" => [ /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
|
34
|
+
|
35
|
+
"going to-future" => [ /\b(#{pronouns}\s+)*(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
|
36
|
+
},
|
37
|
+
'gerund' => {
|
38
|
+
"conditional perfect progressive" => [/\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
|
39
|
+
"present perfect progressive" => [/\b(#{pronouns}\s+)*([a-z]{1,4}'ve|have|has)(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i], #I have been searching
|
40
|
+
"conditional progressive" => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
|
41
|
+
"future progressive" => [
|
42
|
+
/\b((#{pronouns})\s+)*(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i,
|
43
|
+
/\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
|
44
|
+
], #I will be searching
|
45
|
+
"past progressive" => [/\b(#{pronouns}\s+)*(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
|
46
|
+
|
47
|
+
"present progressive" => [/\b(#{pronouns}\s+)*((am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+)*(not\s+)*search/i], #I'm rising
|
48
|
+
|
49
|
+
},
|
50
|
+
"past-participle" => {
|
51
|
+
"conditional perfect" => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
|
52
|
+
"future perfect" => [/\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
|
53
|
+
"past perfect" => [/\b(#{pronouns}\s+)*(had|[a-z]{1,4}'d)\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i], #I had arisen
|
54
|
+
"present perfect" => [/\b(#{pronouns}\s+)*#{have_has}\s+(#{pronouns}\s+)*(not\s+)*((just|already|ever)\s+)*search/], #Have you seen
|
55
|
+
"subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
|
56
|
+
"simple past" => [/\b#{pronouns}\s+search/i] #you chose
|
57
|
+
},
|
58
|
+
|
59
|
+
|
46
60
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
# # "present perfect" => [/^\s*search\b/i], #arisen
|
66
|
+
# # "simple past" => [/^\s*search\b/i] #arose
|
67
|
+
}
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
def self.get_match_start_index(verb, match, index)
|
72
|
+
#get start position of last occurence of verb in match
|
73
|
+
verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
|
74
|
+
#count spaces between match start and verb_index_in_match and subtract that from index
|
75
|
+
lo = index - match[0,verb_index_in_match].split(/\s+/).size
|
76
|
+
hi = lo + match[0,verb_index_in_match].split(/\s+/).size
|
77
|
+
return lo, hi
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
|
81
|
+
@format = format
|
82
|
+
phrase = phrase.downcase
|
83
|
+
#list = format.match("audioverb") ? Hash.new : Array.new
|
84
|
+
list = Hash.new
|
85
|
+
@verbs ||= get_verbs
|
86
|
+
@tiempos ||= get_tiempos
|
87
|
+
@id_tiempo ||= get_id_tiempos
|
88
|
+
@tense_id ||= get_tenses
|
89
|
+
@con_id ||= get_con_id
|
90
|
+
ranges = []
|
91
|
+
a=Array.new
|
92
|
+
a = phrase.split(/\s+/)
|
93
|
+
a.length.times do |i|
|
94
|
+
a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
|
95
|
+
if @con_id[a[i]] then #if word matches a conjugation
|
96
|
+
@tense_regexes.each do |k,v|
|
97
|
+
if k.match(/#{@id_tiempo[@tiempos[a[i]]]}/)
|
98
|
+
v.each do |tense, regex_array|
|
99
|
+
regex_array.each do |regex|
|
100
|
+
regex = regex.to_s.gsub("search", "#{a[i]}")
|
101
|
+
phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], tense, i, ranges)
|
102
|
+
|
103
|
+
end
|
81
104
|
end
|
82
105
|
end
|
83
|
-
end
|
84
|
-
end#end
|
85
|
-
|
106
|
+
end
|
107
|
+
end #end if is conjugation
|
108
|
+
end#end of looping through each cap
|
109
|
+
list.each do |k, v|
|
110
|
+
list.delete(k) unless ranges.include?(v)
|
86
111
|
end
|
112
|
+
print "#{list}\n"
|
113
|
+
list
|
87
114
|
end
|
115
|
+
|
116
|
+
def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges)
|
117
|
+
if match = phrase.match(/#{regex}/i)
|
118
|
+
match = match.to_s
|
119
|
+
lo, hi = get_match_start_index(verb, match, index)
|
120
|
+
ranges = prioritize_ranges(ranges, lo, hi,match)
|
121
|
+
list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) if @format.match(/audioverb/)
|
122
|
+
list["#{tense_label} :" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi) unless @format.match(/audioverb/)
|
123
|
+
end
|
124
|
+
return phrase, list, ranges
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.prioritize_ranges(ranges, lo, hi,match)
|
128
|
+
range = (lo..hi)
|
129
|
+
|
130
|
+
ranges.size.times.each do |r|
|
131
|
+
#replace old range with new one if start is same point and new range is longer
|
132
|
+
if ranges[r].begin == lo and ranges[r].count < range.count
|
133
|
+
ranges[r] = range
|
134
|
+
elsif (range.include?(ranges[r].begin) || range.include?(ranges[r].end)) && range.count > ranges[r].count
|
135
|
+
ranges.delete_at(r)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
#add range to ranges if it is not already included in an existing range
|
139
|
+
if ranges.each.select{|r| r.include?(lo) || r.include?(hi)}.size == 0
|
140
|
+
ranges << range
|
141
|
+
end
|
142
|
+
|
143
|
+
ranges
|
144
|
+
end
|
88
145
|
end
|
146
|
+
|
147
|
+
# ####For generating conjugations.rb content
|
148
|
+
# @conjugations = get_conjugations
|
149
|
+
# @conjugations.each do |k,v|
|
150
|
+
# @con = v['con']
|
151
|
+
# @con_id[@con] = k #id
|
152
|
+
# #print "'#{@con}' => #{k},\n"
|
153
|
+
# #print "'#{@con}' => #{v['verb_id']},\n"
|
154
|
+
# #print "'#{@con}' => #{v['tiempo_id']},\n"
|
155
|
+
# #@tiempos[@con] = v['tiempo_id'] #tiempo_id
|
156
|
+
# #@verbs[@con] = v['verb_id'] #verb_id
|
157
|
+
# end
|
158
|
+
|
data/lib/conjugations.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: clause_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08
|
12
|
+
date: 2012-09-08 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: A simple hello world gem
|
15
15
|
email: mikefabrikant@gmail.com
|