clause_extractor 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/clause_extractor.rb +50 -30
- metadata +1 -1
data/lib/clause_extractor.rb
CHANGED
@@ -4,8 +4,14 @@ class ClauseExtractor
|
|
4
4
|
pronouns = "(i|you|he|she|it|they|we|there)"
|
5
5
|
present_perfect = "(already|ever|for|just|never|since|yet)"
|
6
6
|
have_has = "(have|has|haven't|hasn't)"
|
7
|
-
|
8
|
-
|
7
|
+
was_were = "(were|was|wasn't|weren't)"
|
8
|
+
had = "([a-z]{1,4}'d|had)(n't)*"
|
9
|
+
have_has = "(have|has|haven't|hasn't|havent|hasnt|has not|have not)"
|
10
|
+
contractions = "(it'*s|he'*s|she'*s|[a-z]{1,4}'*ve)"
|
11
|
+
to_be = "(am|are|'m|'re|'s|is|[a-z]{1,4}'re)"
|
12
|
+
will = "(will|[a-z]{1,4}'ll)"
|
13
|
+
would = "(would|[a-z]{1,4}'d)"
|
14
|
+
|
9
15
|
@tense_regexes = {
|
10
16
|
|
11
17
|
'third' => {
|
@@ -15,43 +21,55 @@ class ClauseExtractor
|
|
15
21
|
]
|
16
22
|
},
|
17
23
|
'infinitive' => {
|
18
|
-
"simple present" => [
|
19
|
-
|
20
|
-
],
|
24
|
+
"simple present" => [/\b((I|you|they|we|to)\s+)*+search\b/i],#to arrive
|
25
|
+
|
21
26
|
|
22
27
|
"subjunctive future" => [
|
23
|
-
/\bif\s+#{pronouns}\s
|
24
|
-
/\bif\s+#{pronouns}\s+should
|
28
|
+
/\bif\s+#{pronouns}\s+#{was_were}\s+(not\s+)*to\s+(not\s+)*search/i, #if I were to arise
|
29
|
+
/\bif\s+#{pronouns}\s+should(n't)*\s+(not\s+)*search/i #If I should arise
|
25
30
|
],
|
26
|
-
"subjunctive present" => [ /\bthat\s+#{pronouns}\s+(not\s+)*search/i],
|
31
|
+
"subjunctive present" => [ /\bthat\s+#{pronouns}\s+(not\s+)*search/i], #that we arrive
|
27
32
|
|
28
|
-
"conditional simple" => [ /\b(#{pronouns}\s+)*(would
|
33
|
+
"conditional simple" => [ /\b(#{pronouns}\s+)*(would(n't)*|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise, I wouldn't arise
|
29
34
|
|
30
|
-
"will-future" => [ /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i],
|
35
|
+
"will-future" => [ /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
|
31
36
|
|
32
|
-
"going to-future" => [ /\b(#{pronouns}\s+)
|
37
|
+
"going to-future" => [ /\b(#{pronouns}\s+)*#{to_be}\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
|
33
38
|
},
|
34
39
|
'gerund' => {
|
35
|
-
|
36
|
-
"present perfect progressive" => [
|
37
|
-
|
40
|
+
"conditional perfect progressive" => [ /\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
|
41
|
+
"present perfect progressive" => [
|
42
|
+
/\b(#{have_has}\s+)(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #have they not been searching
|
43
|
+
/\b(#{pronouns}\s+)*#{have_has}*\s+(not\s+)*(#{present_perfect}\s+)*been\s+search/i #I have been searching
|
44
|
+
],
|
45
|
+
"past perfect progressive" => [
|
46
|
+
/\b(#{pronouns}\s+)*#{had}\s(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #I had been searching,
|
47
|
+
/\bhad(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #had he not been searching
|
48
|
+
|
49
|
+
],
|
38
50
|
|
39
|
-
"conditional progressive" => [/\b(#{pronouns}\s+)
|
51
|
+
"conditional progressive" => [/\b(#{pronouns}\s+)*#{would}\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
|
40
52
|
"future progressive" => [
|
41
|
-
/\b((#{pronouns})\s+)
|
53
|
+
/\b((#{pronouns})\s+)*#{will}\s+(not\s+)*be\s+search/i,
|
42
54
|
/\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
|
43
|
-
],
|
44
|
-
"past progressive" => [/\b(#{pronouns}\s+)
|
55
|
+
], #I will be searching
|
56
|
+
"past progressive" => [/\b(#{pronouns}\s+)*#{was_were}*\s+(not\s+)*search/i], #I was searching
|
45
57
|
|
46
|
-
"present progressive" => [/\b(#{pronouns}\s
|
58
|
+
"present progressive" => [/\b(#{pronouns}\s*)*(#{to_be}\s+)*(not\s+)*search/i], #I'm rising
|
47
59
|
},
|
48
60
|
"past-participle" => {
|
49
|
-
"conditional perfect" => [/\b(#{pronouns}\s+)
|
50
|
-
"future perfect" => [/\b(#{pronouns}\s+)
|
51
|
-
"past perfect" => [
|
52
|
-
|
53
|
-
|
54
|
-
|
61
|
+
"conditional perfect" => [/\b(#{pronouns}\s+)*#{would}\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
|
62
|
+
"future perfect" => [/\b(#{pronouns}\s+)*#{will}\s+have\s+search/i], #I'll have arisen
|
63
|
+
"past perfect" => [
|
64
|
+
/\b(#{pronouns}\s+)*#{had}\s+(not\s+)*((#{present_perfect})\s+)*search/i, #I had arisen
|
65
|
+
/\b#{had}\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i
|
66
|
+
],
|
67
|
+
"present perfect" => [
|
68
|
+
/\b(#{pronouns}\s+)*#{have_has}\s+((#{present_perfect})\s+)*search/, #They have already seen
|
69
|
+
/\b#{have_has}\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*search/ #Have they already seen
|
70
|
+
],
|
71
|
+
"subjunctive past" => [/\bif\s+#{pronouns}\s+search/i], #if I arose
|
72
|
+
"simple past" => [/\b#{pronouns}\s+search/i] #you chose
|
55
73
|
},
|
56
74
|
#"present perfect" => [/^\s*search\b/i], #arisen
|
57
75
|
#"simple past" => [/^\s*search\b/i] #arose
|
@@ -91,8 +109,8 @@ class ClauseExtractor
|
|
91
109
|
end
|
92
110
|
end
|
93
111
|
end
|
94
|
-
end
|
95
|
-
end
|
112
|
+
end
|
113
|
+
end
|
96
114
|
list.each do |k, v|
|
97
115
|
list.delete(k) unless ranges.include?(v)
|
98
116
|
end
|
@@ -107,9 +125,11 @@ class ClauseExtractor
|
|
107
125
|
match = match.to_s
|
108
126
|
lo, hi = get_match_start_index(verb, match, index)
|
109
127
|
ranges = prioritize_ranges(ranges, lo, hi,match)
|
110
|
-
|
111
|
-
|
112
|
-
|
128
|
+
if @format.match(/audioverb/)
|
129
|
+
list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi)
|
130
|
+
else
|
131
|
+
list["#{tense_label}:" + match.to_s + ":" + (lo..hi).to_s] = (lo..hi) unless @format.match(/audioverb/)
|
132
|
+
end
|
113
133
|
end
|
114
134
|
return phrase, list, ranges
|
115
135
|
end
|