keyword_finder 0.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/keyword_finder/keywords.rb +44 -3
- data/lib/keyword_finder/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3959babed302bb4c954ccfc78922caa407abb00
|
4
|
+
data.tar.gz: 1939597c3c374b9925ec03f1a77b18c3b688f8d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63434cc4ad29c3e6aab06fab33065008de54837c1a8fce1be87e97907a0193477cbab6be853f4c1a13ed89289412ee9f1b7448e38056f71a88199533944aba8a
|
7
|
+
data.tar.gz: 67bf8f3c227fd453b0325c36d6896b3469973ad7e91bdc2e22fd077f56a5b4928b76c6fa3118959ba8f76bc46144eeffb4f8b82ecc487d2efe49d8653cad3a9b
|
@@ -21,7 +21,9 @@ module KeywordFinder
|
|
21
21
|
end
|
22
22
|
|
23
23
|
def combine_more_specifics sentence
|
24
|
-
sentence.
|
24
|
+
sentence.
|
25
|
+
gsub(/([A-Za-z]*\([A-Za-z]*\)[A-Za-z]+)/) { |s| s.gsub(/(\(|\))/,'') }.
|
26
|
+
gsub(/([A-Za-z]+\([A-Za-z]*\)[A-Za-z]*)/) { |s| s.gsub(/(\(|\))/,'') }
|
25
27
|
end
|
26
28
|
|
27
29
|
def scan_part sentence
|
@@ -36,14 +38,25 @@ module KeywordFinder
|
|
36
38
|
results
|
37
39
|
end
|
38
40
|
|
41
|
+
# find in a sentence
|
42
|
+
#
|
43
|
+
#
|
44
|
+
# @param [String] sentence that might contain the keywords this instance was initalized with
|
45
|
+
# @param [Hash] options; notably the +:subsentences_strategy+, which can be one of +:none+, +:ignore_if_found_in_main+, +:always_ignore+
|
46
|
+
|
39
47
|
def find_in sentence, options={}
|
40
48
|
options = {
|
41
49
|
subsentences_strategy: :none # :none, :ignore_if_found_in_main, :always_ignore
|
42
50
|
}.merge(options)
|
51
|
+
|
43
52
|
sentence = sentence.downcase
|
53
|
+
|
54
|
+
full_sentence_results = self.scan_part(sentence)
|
55
|
+
|
44
56
|
sentence = self.combine_more_specifics(sentence)
|
45
57
|
main_and_subs = self.separate_main_and_sub_sentences(sentence)
|
46
58
|
main_results = self.scan_part(main_and_subs[:main])
|
59
|
+
|
47
60
|
sub_results = []
|
48
61
|
unless (
|
49
62
|
options[:subsentences_strategy] == :always_ignore or
|
@@ -51,7 +64,35 @@ module KeywordFinder
|
|
51
64
|
)
|
52
65
|
sub_results = main_and_subs[:subs].collect{|subsentence| self.scan_part(subsentence)}.flatten
|
53
66
|
end
|
54
|
-
|
67
|
+
|
68
|
+
clean_sentence_results = main_results + sub_results
|
69
|
+
|
70
|
+
return select_the_best_results(clean_sentence_results, full_sentence_results)
|
71
|
+
end
|
72
|
+
|
73
|
+
def select_the_best_results result_set_a, result_set_b
|
74
|
+
## check whether there are better matches in the full sentence approach (or the other way around)
|
75
|
+
result_set_a_to_delete = []
|
76
|
+
result_set_b_to_delete = []
|
77
|
+
|
78
|
+
result_set_a.each do |result_a|
|
79
|
+
result_set_b.each do |result_b|
|
80
|
+
if result_a.match(escape_regex_chars(result_b))
|
81
|
+
result_set_b_to_delete << result_b
|
82
|
+
elsif result_b.match(escape_regex_chars(result_a))
|
83
|
+
result_set_a_to_delete << result_a
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
result_set_a_to_delete.each do |a|
|
89
|
+
result_set_a.delete(a)
|
90
|
+
end
|
91
|
+
result_set_b_to_delete.each do |a|
|
92
|
+
result_set_b.delete(a)
|
93
|
+
end
|
94
|
+
|
95
|
+
return result_set_a + result_set_b
|
55
96
|
end
|
56
97
|
|
57
98
|
def separate_main_and_sub_sentences sentence
|
@@ -59,7 +100,7 @@ module KeywordFinder
|
|
59
100
|
subs.each do |subsentence|
|
60
101
|
sentence = sentence.gsub(subsentence,"")
|
61
102
|
end
|
62
|
-
{main:sentence,subs:subs.collect{|a| a[1..(a.length-2)]}}
|
103
|
+
{main:sentence.strip,subs:subs.collect{|a| a[1..(a.length-2)].strip}}
|
63
104
|
end
|
64
105
|
|
65
106
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyword_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- murb
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|