keyword_finder 0.1.2 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/keyword_finder/keywords.rb +5 -43
- data/lib/keyword_finder/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aae2604a62d298eec0d41d58308531c4ee0a69c0
|
4
|
+
data.tar.gz: cf00f39dbe6ceaa3967407c2eb63b1757aba963e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b404c8bef1c440f70629a7cb1a61865cac24a4662c3bf3bbb4c28b01bc263c4ea68aad6c490d6e96d6ab3d882d9299cee818af75bfcdabb17850edb92eae8ea
|
7
|
+
data.tar.gz: 1c3445137b678fe7ba44156b5a699b92d9bb6d7bdf075c0c864970b1fc04c0243a2d3f67b8dbb984f5f4f73db47fac168184fed848ea7750d0b50f7214bda4ab
|
@@ -4,7 +4,7 @@ module KeywordFinder
|
|
4
4
|
self.sort{|a,b| b.length <=> a.length }
|
5
5
|
end
|
6
6
|
def escape_regex_chars string
|
7
|
-
Regexp.escape(string)
|
7
|
+
Regexp.escape(string).downcase
|
8
8
|
end
|
9
9
|
def to_regex
|
10
10
|
@to_regex ||= Regexp.new("(#{
|
@@ -21,9 +21,7 @@ module KeywordFinder
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def combine_more_specifics sentence
|
24
|
-
sentence.
|
25
|
-
gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]+)/) { |s| s.gsub(/(\(|\))/,'') }.
|
26
|
-
gsub(/([A-Za-z]+\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') }
|
24
|
+
sentence.gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') }
|
27
25
|
end
|
28
26
|
|
29
27
|
def scan_part sentence
|
@@ -38,22 +36,14 @@ module KeywordFinder
|
|
38
36
|
results
|
39
37
|
end
|
40
38
|
|
41
|
-
# find in a sentence
|
42
|
-
#
|
43
|
-
#
|
44
|
-
# @param [String] sentence that might contain the keywords this instance was initalized with
|
45
|
-
# @param [Hash] options; notably the +:subsentences_strategy+, which can be one of +:none+, +:ignore_if_found_in_main+, +:always_ignore+
|
46
|
-
|
47
39
|
def find_in sentence, options={}
|
48
40
|
options = {
|
49
41
|
subsentences_strategy: :none # :none, :ignore_if_found_in_main, :always_ignore
|
50
42
|
}.merge(options)
|
51
|
-
|
52
|
-
full_sentence_results = self.scan_part(sentence)
|
43
|
+
sentence = sentence.downcase
|
53
44
|
sentence = self.combine_more_specifics(sentence)
|
54
45
|
main_and_subs = self.separate_main_and_sub_sentences(sentence)
|
55
46
|
main_results = self.scan_part(main_and_subs[:main])
|
56
|
-
|
57
47
|
sub_results = []
|
58
48
|
unless (
|
59
49
|
options[:subsentences_strategy] == :always_ignore or
|
@@ -61,35 +51,7 @@ module KeywordFinder
|
|
61
51
|
)
|
62
52
|
sub_results = main_and_subs[:subs].collect{|subsentence| self.scan_part(subsentence)}.flatten
|
63
53
|
end
|
64
|
-
|
65
|
-
clean_sentence_results = main_results + sub_results
|
66
|
-
|
67
|
-
return select_the_best_results(clean_sentence_results, full_sentence_results)
|
68
|
-
end
|
69
|
-
|
70
|
-
def select_the_best_results result_set_a, result_set_b
|
71
|
-
## check whether there are better matches in the full sentence approach (or the other way around)
|
72
|
-
result_set_a_to_delete = []
|
73
|
-
result_set_b_to_delete = []
|
74
|
-
|
75
|
-
result_set_a.each do |result_a|
|
76
|
-
result_set_b.each do |result_b|
|
77
|
-
if result_a.match(escape_regex_chars(result_b))
|
78
|
-
result_set_b_to_delete << result_b
|
79
|
-
elsif result_b.match(escape_regex_chars(result_a))
|
80
|
-
result_set_a_to_delete << result_a
|
81
|
-
end
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
result_set_a_to_delete.each do |a|
|
86
|
-
result_set_a.delete(a)
|
87
|
-
end
|
88
|
-
result_set_b_to_delete.each do |a|
|
89
|
-
result_set_b.delete(a)
|
90
|
-
end
|
91
|
-
|
92
|
-
return result_set_a + result_set_b
|
54
|
+
return main_results + sub_results
|
93
55
|
end
|
94
56
|
|
95
57
|
def separate_main_and_sub_sentences sentence
|
@@ -97,7 +59,7 @@ module KeywordFinder
|
|
97
59
|
subs.each do |subsentence|
|
98
60
|
sentence = sentence.gsub(subsentence,"")
|
99
61
|
end
|
100
|
-
{main:sentence
|
62
|
+
{main:sentence,subs:subs.collect{|a| a[1..(a.length-2)]}}
|
101
63
|
end
|
102
64
|
|
103
65
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyword_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: '0.2'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- murb
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
94
|
version: '0'
|
95
95
|
requirements: []
|
96
96
|
rubyforge_project:
|
97
|
-
rubygems_version: 2.5
|
97
|
+
rubygems_version: 2.4.5
|
98
98
|
signing_key:
|
99
99
|
specification_version: 4
|
100
100
|
summary: Find given set of keywords in a sentence.
|