clause_extractor 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/clause_extractor.rb +55 -50
- data/lib/conjugations.rb +53415 -40651
- metadata +2 -2
data/lib/clause_extractor.rb
CHANGED
@@ -1,67 +1,83 @@
|
|
1
1
|
module ClauseExtractor
|
2
2
|
class Clause
|
3
|
-
require "
|
3
|
+
require "./conjugations2"
|
4
4
|
|
5
5
|
@tense_regexes = {
|
6
|
-
"
|
7
|
-
"
|
8
|
-
"
|
9
|
-
"
|
10
|
-
"
|
11
|
-
"
|
12
|
-
"
|
13
|
-
"
|
14
|
-
"
|
15
|
-
"
|
16
|
-
|
17
|
-
"
|
18
|
-
"
|
19
|
-
"
|
20
|
-
"
|
21
|
-
"past
|
22
|
-
"
|
23
|
-
"
|
24
|
-
"simple past"
|
25
|
-
"simple present"
|
6
|
+
"present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
|
7
|
+
"future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
|
8
|
+
"present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
|
9
|
+
"subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
|
10
|
+
"going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
|
11
|
+
"present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
|
12
|
+
"subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
|
13
|
+
"conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
|
14
|
+
"past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
|
15
|
+
"subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
|
16
|
+
"conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
|
17
|
+
"conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
|
18
|
+
"subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
|
19
|
+
"conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
|
20
|
+
"will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
|
21
|
+
"past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
|
22
|
+
"future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
|
23
|
+
"present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
|
24
|
+
"simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
|
25
|
+
"simple present" => [
|
26
26
|
/\b(I|you|they|we|to)\s+search\b/i, #arrive
|
27
|
-
/\b(he|she|it)\s+
|
28
|
-
/\
|
29
|
-
|
30
|
-
"present progressive"
|
31
|
-
"present perfect"
|
32
|
-
"simple past"
|
27
|
+
/\b(he|she|it)\s+search(s)?\b/i, #he arrives
|
28
|
+
/\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
|
29
|
+
],
|
30
|
+
"present progressive" => [/^search\b/i], #searching
|
31
|
+
"present perfect" => [/^search\b/i], #arisen
|
32
|
+
"simple past" => [/^search\b/i] #arose
|
33
33
|
}
|
34
34
|
|
35
|
-
def self.scan_phrase(regex, a_i, tense_label)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
|
36
|
+
if match = phrase.match(/#{regex}/i)
|
37
|
+
if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
|
38
|
+
ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
|
39
|
+
print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
|
40
|
+
list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
|
41
|
+
list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
|
42
|
+
end
|
40
43
|
end
|
41
|
-
return
|
44
|
+
return phrase, list, ranges
|
42
45
|
end
|
43
46
|
|
44
47
|
def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
|
45
|
-
@format
|
46
|
-
|
47
|
-
|
48
|
-
|
48
|
+
@format = format
|
49
|
+
phrase = phrase.downcase
|
50
|
+
list = format.match("audioverb") ? Hash.new : Array.new
|
49
51
|
@verbs ||= get_verbs
|
50
52
|
@tiempos ||= get_tiempos
|
51
53
|
@id_tiempo ||= get_id_tiempos
|
52
54
|
@tense_id ||= get_tenses
|
53
55
|
@con_id ||= get_con_id
|
56
|
+
ranges = []
|
57
|
+
|
54
58
|
|
59
|
+
# ####For generating conjugations.rb content
|
60
|
+
# @conjugations = get_conjugations
|
61
|
+
# @conjugations.each do |k,v|
|
62
|
+
# @con = v['con']
|
63
|
+
# @con_id[@con] = k #id
|
64
|
+
# #print "'#{@con}' => #{k},\n"
|
65
|
+
# #print "'#{@con}' => #{v['verb_id']},\n"
|
66
|
+
# #print "'#{@con}' => #{v['tiempo_id']},\n"
|
67
|
+
# #@tiempos[@con] = v['tiempo_id'] #tiempo_id
|
68
|
+
# #@verbs[@con] = v['verb_id'] #verb_id
|
69
|
+
# end
|
55
70
|
|
56
71
|
a=Array.new
|
57
72
|
a = phrase.split(/\s+/)
|
58
|
-
|
73
|
+
a.length.times do |i|
|
59
74
|
a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
|
60
75
|
if @con_id[a[i]] then #if word matches a conjugation
|
61
76
|
@tense_regexes.each do |k,v|
|
62
77
|
v.each do |regex|
|
63
78
|
regex = regex.to_s.gsub("search", "#{a[i]}")
|
64
|
-
|
79
|
+
|
80
|
+
phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
|
65
81
|
end
|
66
82
|
end
|
67
83
|
end #end if is conjugation
|
@@ -70,14 +86,3 @@ module ClauseExtractor
|
|
70
86
|
end
|
71
87
|
end
|
72
88
|
end
|
73
|
-
####For generating conjugations.rb content
|
74
|
-
#@conjugations = get_conjugations
|
75
|
-
# @conjugations.each do |k,v|
|
76
|
-
# @con = v['con']
|
77
|
-
# @con_id[@con] = k #id
|
78
|
-
# # print "'#{@con}' => #{k},\n"
|
79
|
-
# # #print "'#{@con}' => #{v['verb_id']},\n"
|
80
|
-
# # #print "'#{@con}' => #{v['tiempo_id']},\n"
|
81
|
-
# # @tiempos[@con] = v['tiempo_id'] #tiempo_id
|
82
|
-
# # @verbs[@con] = v['verb_id'] #verb_id
|
83
|
-
# end
|