clause_extractor 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/clause_extractor.rb +55 -50
- data/lib/conjugations.rb +53415 -40651
- metadata +2 -2
data/lib/clause_extractor.rb
CHANGED
@@ -1,67 +1,83 @@
|
|
1
1
|
module ClauseExtractor
|
2
2
|
class Clause
|
3
|
-
require "
|
3
|
+
require "./conjugations2"
|
4
4
|
|
5
5
|
@tense_regexes = {
|
6
|
-
"
|
7
|
-
"
|
8
|
-
"
|
9
|
-
"
|
10
|
-
"
|
11
|
-
"
|
12
|
-
"
|
13
|
-
"
|
14
|
-
"
|
15
|
-
"
|
16
|
-
|
17
|
-
"
|
18
|
-
"
|
19
|
-
"
|
20
|
-
"
|
21
|
-
"past
|
22
|
-
"
|
23
|
-
"
|
24
|
-
"simple past"
|
25
|
-
"simple present"
|
6
|
+
"present perfect" => [/\b(have|has|it's|he's|she's|[a-z]{1,4}'ve)\s+((i|you|he|she|it|they|we)\s+)*(not\s+)*((just|already)\s+)*search/i], #I have arisen/Have I not arisen
|
7
|
+
"future progressive" => [/\b(will|[a-z]{1,4}'ll)\s+(not\s+)*be\s+search/i, /\b[a-z]{1,4}'ll\s+(not\s+)*be\s+search/i], #I will be searching
|
8
|
+
"present perfect progressive" => [/\b([a-z]{1,4}'ve|have|has)(n't)*\s+(not\s+)*((just|already)\s+)*been\s+search/i], #I have been searching
|
9
|
+
"subjunctive future" => [/\bif\s+(i|you|he|she|it|they|we)\s+were\s+(not\s+)*to\s+(not\s+)*search/i], #if I were to arise
|
10
|
+
"going to-future" => [/\b(am|are|i'm|[a-z]{1,4}'re|[a-z]{1,4}'s)\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
|
11
|
+
"present progressive" => [/\b(am|are|is|i'm|\b[a-z]{1,4}'re|\b[a-z]{1,4}'s)\s+(not\s+)*search/i], #I'm rising
|
12
|
+
"subjunctive present" => [/if\s+(i|you|he|she|it|they|we)\s+should\s+(not\s+)*search/i], #if I should arise
|
13
|
+
"conditional perfect" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
|
14
|
+
"past perfect" => [/\b(had|[a-z]{1,4}'d)\s+(not\s+)*(just\s+)*search/i], #I had arisen
|
15
|
+
"subjunctive present" => [/\bthat\s+(i|you|he|she|they|we)\s+(not\s+)*search/i], #that we arrive
|
16
|
+
"conditional perfect progressive" => [/would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
|
17
|
+
"conditional progressive" => [/\b(would|[a-z]{1,4}'d)\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
|
18
|
+
"subjunctive past" => [/\bif\s+(i|you|he|she|it|they|we)\s+search/i], #if I arose
|
19
|
+
"conditional simple" => [/\b(would|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise
|
20
|
+
"will-future" => [/\b(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
|
21
|
+
"past progressive" => [/\b(was|were)(n't)*\s+(not\s+)*search/i], #I was searching
|
22
|
+
"future perfect" => [/\b(will|[a-z]{1,4}'ll)\s+have\s+search/i], #I'll have arisen
|
23
|
+
"present perfect progressive" => [/\bhave\s+(not\s+)*been\s+search/i], #I have been searching
|
24
|
+
"simple past" => [/\b(i|you|he|she|it|they)\s+search/i], #you chose
|
25
|
+
"simple present" => [
|
26
26
|
/\b(I|you|they|we|to)\s+search\b/i, #arrive
|
27
|
-
/\b(he|she|it)\s+
|
28
|
-
/\
|
29
|
-
|
30
|
-
"present progressive"
|
31
|
-
"present perfect"
|
32
|
-
"simple past"
|
27
|
+
/\b(he|she|it)\s+search(s)?\b/i, #he arrives
|
28
|
+
/\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
|
29
|
+
],
|
30
|
+
"present progressive" => [/^search\b/i], #searching
|
31
|
+
"present perfect" => [/^search\b/i], #arisen
|
32
|
+
"simple past" => [/^search\b/i] #arose
|
33
33
|
}
|
34
34
|
|
35
|
-
def self.scan_phrase(regex, a_i, tense_label)
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
35
|
+
def self.scan_phrase(phrase, list, regex, a_i, tense_label, index, ranges)
|
36
|
+
if match = phrase.match(/#{regex}/i)
|
37
|
+
if ranges.each.select{|r| r.include?(index) || r.include?(index+match.to_s.split(/\s/).length)}.size == 0
|
38
|
+
ranges << (index .. (index + (match.to_s.split(/\s/).length-1)))
|
39
|
+
print "#{ranges} RRR #{match} #{tense_label} ... #{index.class} \n"
|
40
|
+
list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[a_i].to_s]=1 if @format.match(/audioverb/)
|
41
|
+
list << "#{tense_label}:#{match.to_s}" unless @format.match(/audioverb/)
|
42
|
+
end
|
40
43
|
end
|
41
|
-
return
|
44
|
+
return phrase, list, ranges
|
42
45
|
end
|
43
46
|
|
44
47
|
def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
|
45
|
-
@format
|
46
|
-
|
47
|
-
|
48
|
-
|
48
|
+
@format = format
|
49
|
+
phrase = phrase.downcase
|
50
|
+
list = format.match("audioverb") ? Hash.new : Array.new
|
49
51
|
@verbs ||= get_verbs
|
50
52
|
@tiempos ||= get_tiempos
|
51
53
|
@id_tiempo ||= get_id_tiempos
|
52
54
|
@tense_id ||= get_tenses
|
53
55
|
@con_id ||= get_con_id
|
56
|
+
ranges = []
|
57
|
+
|
54
58
|
|
59
|
+
# ####For generating conjugations.rb content
|
60
|
+
# @conjugations = get_conjugations
|
61
|
+
# @conjugations.each do |k,v|
|
62
|
+
# @con = v['con']
|
63
|
+
# @con_id[@con] = k #id
|
64
|
+
# #print "'#{@con}' => #{k},\n"
|
65
|
+
# #print "'#{@con}' => #{v['verb_id']},\n"
|
66
|
+
# #print "'#{@con}' => #{v['tiempo_id']},\n"
|
67
|
+
# #@tiempos[@con] = v['tiempo_id'] #tiempo_id
|
68
|
+
# #@verbs[@con] = v['verb_id'] #verb_id
|
69
|
+
# end
|
55
70
|
|
56
71
|
a=Array.new
|
57
72
|
a = phrase.split(/\s+/)
|
58
|
-
|
73
|
+
a.length.times do |i|
|
59
74
|
a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
|
60
75
|
if @con_id[a[i]] then #if word matches a conjugation
|
61
76
|
@tense_regexes.each do |k,v|
|
62
77
|
v.each do |regex|
|
63
78
|
regex = regex.to_s.gsub("search", "#{a[i]}")
|
64
|
-
|
79
|
+
|
80
|
+
phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], k, i, ranges)
|
65
81
|
end
|
66
82
|
end
|
67
83
|
end #end if is conjugation
|
@@ -70,14 +86,3 @@ module ClauseExtractor
|
|
70
86
|
end
|
71
87
|
end
|
72
88
|
end
|
73
|
-
####For generating conjugations.rb content
|
74
|
-
#@conjugations = get_conjugations
|
75
|
-
# @conjugations.each do |k,v|
|
76
|
-
# @con = v['con']
|
77
|
-
# @con_id[@con] = k #id
|
78
|
-
# # print "'#{@con}' => #{k},\n"
|
79
|
-
# # #print "'#{@con}' => #{v['verb_id']},\n"
|
80
|
-
# # #print "'#{@con}' => #{v['tiempo_id']},\n"
|
81
|
-
# # @tiempos[@con] = v['tiempo_id'] #tiempo_id
|
82
|
-
# # @verbs[@con] = v['verb_id'] #verb_id
|
83
|
-
# end
|