clause_extractor 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/clause_extractor.rb +25 -100
  2. metadata +2 -2
@@ -1,89 +1,8 @@
1
1
  class ClauseExtractor
2
2
  require "conjugations"
3
-
4
- pronouns = "(i|you|he|she|it|they|we|there)"
5
- present_perfect = "(already|ever|for|just|never|since|yet)"
6
- have_has = "(have|has|haven't|hasn't)"
7
- was_were = "(were|was|wasn't|weren't)"
8
- had = "([a-z]{1,4}'d|had)(n't)*"
9
- have_has = "(have|has|haven't|hasn't|havent|hasnt|has not|have not)"
10
- contractions = "(it'*s|he'*s|she'*s|[a-z]{1,4}'*ve)"
11
- to_be = "(am|are|'m|'re|'s|is|[a-z]{1,4}'re)"
12
- will = "(will|[a-z]{1,4}'ll)"
13
- would = "(would|[a-z]{1,4}'d)"
14
-
15
- @tense_regexes = {
16
-
17
- 'third' => {
18
- "simple present" => [
19
- /\b(he|she|it)\s+search(s)?\b/i, #he arrives
20
- /\bsearch(s)?\s+(it|them|him|her|me|you|us)\b/i #adapts it
21
- ]
22
- },
23
- 'infinitive' => {
24
- "simple present" => [/\b((I|you|they|we|to)\s+)*+search\b/i],#to arrive
25
-
26
-
27
- "subjunctive future" => [
28
- /\bif\s+#{pronouns}\s+#{was_were}\s+(not\s+)*to\s+(not\s+)*search/i, #if I were to arise
29
- /\bif\s+#{pronouns}\s+should(n't)*\s+(not\s+)*search/i #If I should arise
30
- ],
31
- "subjunctive present" => [ /\bthat\s+#{pronouns}\s+(not\s+)*search/i], #that we arrive
32
-
33
- "conditional simple" => [ /\b(#{pronouns}\s+)*(would(n't)*|[a-z]{1,4}'d)(\s+not)*\s+search/i], #I would arise, I wouldn't arise
34
-
35
- "will-future" => [ /\b(#{pronouns}\s+)*(will|[a-z]{1,4}'ll)(\s+not)*\s+search/i], #I'll arise
36
-
37
- "going to-future" => [ /\b(#{pronouns}\s+)*#{to_be}\s+(not\s+)*going\s+to\s+search/i], #they are going to cry
38
- },
39
- 'gerund' => {
40
- "conditional perfect progressive" => [ /\b(#{pronouns}\s+)*would\s+(not\s+)*have\s+(not\s+)*been\s+search/i], #I would have been searching
41
- "present perfect progressive" => [
42
- /\b(#{have_has}\s+)(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #have they not been searching
43
- /\b(#{pronouns}\s+)*#{have_has}*\s+(not\s+)*(#{present_perfect}\s+)*been\s+search/i #I have been searching
44
- ],
45
- "past perfect progressive" => [
46
- /\b(#{pronouns}\s+)*#{had}\s(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #I had been searching,
47
- /\bhad(n't)*\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*been\s+search/i, #had he not been searching
48
-
49
- ],
50
-
51
- "conditional progressive" => [/\b(#{pronouns}\s+)*#{would}\s+(not\s+)*be\s+search/i], #I would be searching (I'd)
52
- "future progressive" => [
53
- /\b((#{pronouns})\s+)*#{will}\s+(not\s+)*be\s+search/i,
54
- /\bwill\s+(#{pronouns}\s+)(not\s+)*be\s+search/i,
55
- ], #I will be searching
56
- "past progressive" => [/\b(#{pronouns}\s+)*#{was_were}*\s+(not\s+)*search/i], #I was searching
57
-
58
- "present progressive" => [/\b(#{pronouns}\s*)*(#{to_be}\s+)*(not\s+)*search/i], #I'm rising
59
- },
60
- "past-participle" => {
61
- "conditional perfect" => [/\b(#{pronouns}\s+)*#{would}\s+(not\s+)*have\s+(not\s+)*search/i], #I would not search
62
- "future perfect" => [/\b(#{pronouns}\s+)*#{will}\s+have\s+search/i], #I'll have arisen
63
- "past perfect" => [
64
- /\b(#{pronouns}\s+)*#{had}\s+(not\s+)*((#{present_perfect})\s+)*search/i, #I had arisen
65
- /\b#{had}\s+(#{pronouns}\s+)*(not\s+)*((#{present_perfect})\s+)*search/i
66
- ],
67
- "present perfect" => [
68
- /\b(#{pronouns}\s+)*#{have_has}\s+((#{present_perfect})\s+)*search/, #They have already seen
69
- /\b#{have_has}\s+(#{pronouns}\s+)*(not\s+)*(#{present_perfect}\s+)*search/ #Have they already seen
70
- ],
71
- "subjunctive past" => [/\bif\s+#{pronouns}\s+search/i], #if I arose
72
- "simple past" => [/\b#{pronouns}\s+search/i] #you chose
73
- },
74
- #"present perfect" => [/^\s*search\b/i], #arisen
75
- #"simple past" => [/^\s*search\b/i] #arose
76
- }
77
- def self.get_match_start_index(verb, match, index)
78
- #get start position of last occurence of verb in match
79
- verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
80
- #count spaces between match start and verb_index_in_match and subtract that from index
81
- lo = index - match[0,verb_index_in_match].split(/\s+/).size
82
- hi = lo + match[0,verb_index_in_match].split(/\s+/).size
83
- return lo, hi
84
- end
3
+ require "matchers"
85
4
 
86
- def self.get_clauses(phrase, format = String.new, verbs=nil, tiempo=nil, id_tiempo=nil, tense_id=nil, con_id=nil)
5
+ def self.get_clauses(phrase, format = String.new)
87
6
  @format = format
88
7
  phrase = phrase.downcase
89
8
  #list = format.match("audioverb") ? Hash.new : Array.new
@@ -94,32 +13,38 @@ class ClauseExtractor
94
13
  @tense_id ||= get_tenses
95
14
  @con_id ||= get_con_id
96
15
  ranges = []
97
- a=Array.new
98
- a = phrase.split(/\s+/)
99
- a.length.times do |i|
100
- a[i].gsub!(/[!.?\(\)]/,"") if a[i] #remove any punctuation from the word
101
- if @con_id[a[i]] then #if word matches a conjugation
102
- @tense_regexes.each do |k,v|
103
- if k.match(/#{@id_tiempo[@tiempos[a[i]]]}/)
16
+
17
+ phrase.gsub!(/[!.?\(\)]/,"") if phrase
18
+ phrase_a = phrase.split(/\s+/)
19
+ phrase_a.length.times do |i|
20
+ # phrase_a[i].gsub!(/[!.?\(\)]/,"") if phrase_a[i] #remove any punctuation from the word
21
+ if @con_id[phrase_a[i]] then #if word matches a conjugation
22
+ $tense_regexes.each do |k,v|
23
+ if k.match(/#{@id_tiempo[@tiempos[phrase_a[i]]]}/)
104
24
  v.each do |tense, regex_array|
105
25
  regex_array.each do |regex|
106
- regex = regex.to_s.gsub("search", "#{a[i]}")
107
- phrase, list, ranges = scan_phrase(phrase, list, regex, a[i], tense, i, ranges)
26
+ regex = regex.to_s.gsub("search", "#{phrase_a[i]}")
27
+ phrase, list, ranges = scan_phrase(phrase, list, regex, phrase_a[i], tense, i, ranges)
108
28
  end
109
29
  end
110
30
  end
111
31
  end
112
32
  end
113
- end
114
- list.each do |k, v|
115
- list.delete(k) unless ranges.include?(v)
116
- end
117
- list.each do |k,v|
118
- print "#{k}\n"
119
- end
33
+ end
34
+ list.each { |k, v| list.delete(k) unless ranges.include?(v) }
35
+ list.each { |k, v| print "#{k}\n" }
120
36
  list
121
37
  end
122
38
 
39
+ def self.get_match_start_index(verb, match, index)
40
+ #get start position of last occurence of verb in match
41
+ verb_index_in_match = match.index /#{verb}(?!.*#{verb})/i
42
+ #count spaces between match start and verb_index_in_match and subtract that from index
43
+ lo = index - match[0,verb_index_in_match].split(/\s+/).size
44
+ hi = lo + match[0,verb_index_in_match].split(/\s+/).size
45
+ return lo, hi
46
+ end
47
+
123
48
  def self.scan_phrase(phrase, list, regex, verb, tense_label, index, ranges)
124
49
  if match = phrase.match(/#{regex}/i)
125
50
  match = match.to_s
@@ -128,7 +53,7 @@ class ClauseExtractor
128
53
  if @format.match(/audioverb/)
129
54
  list[@tense_id["#{tense_label}"].to_s+":" + match.to_s + ":" + @verbs[verb].to_s] = (lo..hi)
130
55
  else
131
- list["#{tense_label}:" + match.to_s + ":" + (lo..hi).to_s] = (lo..hi) unless @format.match(/audioverb/)
56
+ list["#{tense_label}:" + match.to_s + ":" + (lo..hi).to_s] = (lo..hi)
132
57
  end
133
58
  end
134
59
  return phrase, list, ranges
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clause_extractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-08 00:00:00.000000000 Z
12
+ date: 2012-10-06 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: English verbal clause extractor
15
15
  email: mikefabrikant@gmail.com