clause_extractor 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/clause_extractor.rb +12 -26
- metadata +1 -1
data/lib/clause_extractor.rb
CHANGED
|
@@ -6,20 +6,18 @@ class ClauseExtractor
|
|
|
6
6
|
have_has = "(have|has|haven't|hasn't)"
|
|
7
7
|
contractions = "it's|he's|she's|[a-z]{1,4}'ve"
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
9
|
@tense_regexes = {
|
|
12
10
|
|
|
13
11
|
'third' => {
|
|
14
12
|
"simple present"
|
|
15
13
|
=> [
|
|
16
|
-
/\b(he|she|it)\s+search(s)?\b/i,
|
|
17
|
-
/\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i
|
|
14
|
+
/\b(he|she|it)\s+search(s)?\b/i, #he arrives
|
|
15
|
+
/\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
|
|
18
16
|
]
|
|
19
17
|
},
|
|
20
18
|
'infinitive' => {
|
|
21
19
|
"simple present" => [
|
|
22
|
-
/\b((I|you|they|we|to)\s+)*+search\b/i,
|
|
20
|
+
/\b((I|you|they|we|to)\s+)*+search\b/i, #arrive
|
|
23
21
|
],
|
|
24
22
|
|
|
25
23
|
"subjunctive future" => [
|
|
@@ -35,39 +33,28 @@ class ClauseExtractor
|
|
|
35
33
|
"going to-future" => [ /\b(#{pronouns}\s+)*(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
|
|
36
34
|
},
|
|
37
35
|
'gerund' => {
|
|
38
|
-
"conditional perfect progressive" => [/\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i],
|
|
36
|
+
"conditional perfect progressive" => [/\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
|
|
39
37
|
"present perfect progressive" => [/\b(#{pronouns}\s+)*([a-z]{1,4}'ve|have|has)(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i], #I have been searching
|
|
40
38
|
"conditional progressive" => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
|
|
41
39
|
"future progressive" => [
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
],
|
|
40
|
+
/\b((#{pronouns})\s+)*(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i,
|
|
41
|
+
/\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
|
|
42
|
+
], #I will be searching
|
|
45
43
|
"past progressive" => [/\b(#{pronouns}\s+)*(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
|
|
46
44
|
|
|
47
45
|
"present progressive" => [/\b(#{pronouns}\s+)*((am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+)*(not\s+)*search/i], #I'm rising
|
|
48
|
-
|
|
49
46
|
},
|
|
50
47
|
"past-participle" => {
|
|
51
48
|
"conditional perfect" => [/\b(#{pronouns}\s+)*(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
|
|
52
|
-
"future perfect" => [/\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)\s+have\s+search/i],
|
|
49
|
+
"future perfect" => [/\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
|
|
53
50
|
"past perfect" => [/\b(#{pronouns}\s+)*(had|[a-z]{1,4}'d)\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i], #I had arisen
|
|
54
51
|
"present perfect" => [/\b(#{pronouns}\s+)*#{have_has}\s+(#{pronouns}\s+)*(not\s+)*((just|already|ever)\s+)*search/], #Have you seen
|
|
55
|
-
"subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i],
|
|
56
|
-
"simple past" => [/\b#{pronouns}\s+search/i]
|
|
52
|
+
"subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
|
|
53
|
+
"simple past" => [/\b#{pronouns}\s+search/i] #you chose
|
|
57
54
|
},
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
# # "present perfect" => [/^\s*search\b/i], #arisen
|
|
66
|
-
# # "simple past" => [/^\s*search\b/i] #arose
|
|
55
|
+
#"present perfect" => [/^\s*search\b/i], #arisen
|
|
56
|
+
#"simple past" => [/^\s*search\b/i] #arose
|
|
67
57
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
58
|
def self.get_match_start_index(verb, match, index)
|
|
72
59
|
#get start position of last occurence of verb in match
|
|
73
60
|
verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
|
|
@@ -139,7 +126,6 @@ class ClauseExtractor
|
|
|
139
126
|
if ranges.each.select{|r| r.include?(lo) || r.include?(hi)}.size == 0
|
|
140
127
|
ranges << range
|
|
141
128
|
end
|
|
142
|
-
|
|
143
129
|
ranges
|
|
144
130
|
end
|
|
145
131
|
end
|