keyword_finder 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/keyword_finder/keywords.rb +41 -3
- data/lib/keyword_finder/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0dd010b498bebdedbb2a27b95760645bfe2c5f45
|
4
|
+
data.tar.gz: 12a32eee997aa77ef87247b747bb10d680ff6ce8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c37be69c1fca9f4978123b82ff0d8223de110130b06dc981ae0f157a4f1c488f56ca224a74400e61f78eb160983969ecc502d6d431f932ca497df5df2e8224b5
|
7
|
+
data.tar.gz: ae85cf9f939e53caa71a5ebec19e57ca4edeeef9ec3acad743f5d8434c85b93e7407663701bbb7a40c413223b79715fe92ed08bbf5e6e18453cbb26b58804acb
|
@@ -21,7 +21,9 @@ module KeywordFinder
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def combine_more_specifics sentence
|
24
|
-
sentence.
|
24
|
+
sentence.
|
25
|
+
gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]+)/) { |s| s.gsub(/(\(|\))/,'') }.
|
26
|
+
gsub(/([A-Za-z]+\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') }
|
25
27
|
end
|
26
28
|
|
27
29
|
def scan_part sentence
|
@@ -36,14 +38,22 @@ module KeywordFinder
|
|
36
38
|
results
|
37
39
|
end
|
38
40
|
|
41
|
+
# find in a sentence
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# @param [String] sentence that might contain the keywords this instance was initalized with
|
45
|
+
# @param [Hash] options; notably the +:subsentences_strategy+, which can be one of +:none+, +:ignore_if_found_in_main+, +:always_ignore+
|
46
|
+
|
39
47
|
def find_in sentence, options={}
|
40
48
|
options = {
|
41
49
|
subsentences_strategy: :none # :none, :ignore_if_found_in_main, :always_ignore
|
42
50
|
}.merge(options)
|
43
51
|
|
52
|
+
full_sentence_results = self.scan_part(sentence)
|
44
53
|
sentence = self.combine_more_specifics(sentence)
|
45
54
|
main_and_subs = self.separate_main_and_sub_sentences(sentence)
|
46
55
|
main_results = self.scan_part(main_and_subs[:main])
|
56
|
+
|
47
57
|
sub_results = []
|
48
58
|
unless (
|
49
59
|
options[:subsentences_strategy] == :always_ignore or
|
@@ -51,7 +61,35 @@ module KeywordFinder
|
|
51
61
|
)
|
52
62
|
sub_results = main_and_subs[:subs].collect{|subsentence| self.scan_part(subsentence)}.flatten
|
53
63
|
end
|
54
|
-
|
64
|
+
|
65
|
+
clean_sentence_results = main_results + sub_results
|
66
|
+
|
67
|
+
return select_the_best_results(clean_sentence_results, full_sentence_results)
|
68
|
+
end
|
69
|
+
|
70
|
+
def select_the_best_results result_set_a, result_set_b
|
71
|
+
## check whether there are better matches in the full sentence approach (or the other way around)
|
72
|
+
result_set_a_to_delete = []
|
73
|
+
result_set_b_to_delete = []
|
74
|
+
|
75
|
+
result_set_a.each do |result_a|
|
76
|
+
result_set_b.each do |result_b|
|
77
|
+
if result_a.match(escape_regex_chars(result_b))
|
78
|
+
result_set_b_to_delete << result_b
|
79
|
+
elsif result_b.match(escape_regex_chars(result_a))
|
80
|
+
result_set_a_to_delete << result_a
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
result_set_a_to_delete.each do |a|
|
86
|
+
result_set_a.delete(a)
|
87
|
+
end
|
88
|
+
result_set_b_to_delete.each do |a|
|
89
|
+
result_set_b.delete(a)
|
90
|
+
end
|
91
|
+
|
92
|
+
return result_set_a + result_set_b
|
55
93
|
end
|
56
94
|
|
57
95
|
def separate_main_and_sub_sentences sentence
|
@@ -59,7 +97,7 @@ module KeywordFinder
|
|
59
97
|
subs.each do |subsentence|
|
60
98
|
sentence = sentence.gsub(subsentence,"")
|
61
99
|
end
|
62
|
-
{main:sentence,subs:subs.collect{|a| a[1..(a.length-2)]}}
|
100
|
+
{main:sentence.strip,subs:subs.collect{|a| a[1..(a.length-2)].strip}}
|
63
101
|
end
|
64
102
|
|
65
103
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyword_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- murb
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -94,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
94
|
version: '0'
|
95
95
|
requirements: []
|
96
96
|
rubyforge_project:
|
97
|
-
rubygems_version: 2.
|
97
|
+
rubygems_version: 2.5.1
|
98
98
|
signing_key:
|
99
99
|
specification_version: 4
|
100
100
|
summary: Find given set of keywords in a sentence.
|