keyword_matcher 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +0 -2
- data/keyword_matcher.gemspec +3 -4
- data/lib/keyword_matcher/group.rb +1 -0
- data/lib/keyword_matcher/process.rb +9 -7
- data/lib/keyword_matcher/prophet.rb +6 -7
- data/lib/keyword_matcher/version.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9193a0b98145b71457b73c624d518432ce50da959b6725d10bdc714aabd537ce
|
4
|
+
data.tar.gz: f2dfa8a1e7c48f711ee97369feec3633134283bae94b4d67f8a64b7e16799080
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b966edcfd13292288ddd392b403420685a4ec8f7bc82ede00cf8ad8acda8fe394848f8a73d4085ec85e557253821f83605a2dd347a5160c75f1fd732e531adc3
|
7
|
+
data.tar.gz: 5a5b3cb2c9e991a0677a34fab264171c3fe1aa297b513f265ecaf892db13a1475afe938baea1202d137b5059656c05bde54b1b6cdef52b708ce69aec622a576f
|
data/.rubocop.yml
CHANGED
data/keyword_matcher.gemspec
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'keyword_matcher/version'
|
5
4
|
|
@@ -19,12 +18,12 @@ Gem::Specification.new do |spec|
|
|
19
18
|
spec.bindir = 'exe'
|
20
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
20
|
spec.require_paths = ['lib']
|
22
|
-
spec.required_ruby_version = '>= 2.4.0'
|
21
|
+
spec.required_ruby_version = ['>= 2.4.0', '< 2.6.0']
|
23
22
|
|
24
23
|
spec.add_runtime_dependency 'activesupport', '~> 5.0'
|
25
24
|
spec.add_runtime_dependency 'damerau-levenshtein', '~> 1.3'
|
26
25
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
27
26
|
spec.add_development_dependency 'rake', '~> 10.0'
|
28
27
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
29
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
28
|
+
spec.add_development_dependency 'rubocop', '~> 0.61.1'
|
30
29
|
end
|
@@ -26,6 +26,7 @@ module KeywordMatcher
|
|
26
26
|
|
27
27
|
def not_groups
|
28
28
|
return [] unless title.downcase.match?(/\r?\n#{OPERATOR_NOT}\r?\n/)
|
29
|
+
|
29
30
|
title.downcase.match(/\r?\n#{OPERATOR_NOT}.*/m).to_s.split(/\r?\n#{OPERATOR_NOT}\r?\n/).map do |v|
|
30
31
|
v.split("\n").reject(&:blank?).map(&:split)
|
31
32
|
end.reject(&:blank?)
|
@@ -18,6 +18,7 @@ module KeywordMatcher
|
|
18
18
|
|
19
19
|
def negation_found?
|
20
20
|
return if group.not.blank?
|
21
|
+
|
21
22
|
in_any?(group.not)
|
22
23
|
end
|
23
24
|
|
@@ -26,8 +27,8 @@ module KeywordMatcher
|
|
26
27
|
values.map do |terms|
|
27
28
|
match = false
|
28
29
|
terms.each do |term|
|
29
|
-
words.each do |
|
30
|
-
match = true if condition(term,
|
30
|
+
words.each do |word|
|
31
|
+
match = true if condition(term, word)
|
31
32
|
end
|
32
33
|
end
|
33
34
|
match
|
@@ -35,14 +36,15 @@ module KeywordMatcher
|
|
35
36
|
end.include?(true)
|
36
37
|
end
|
37
38
|
|
38
|
-
def matched?(term,
|
39
|
-
return
|
40
|
-
|
39
|
+
def matched?(term, word)
|
40
|
+
return word == (quoted?(term) ? term[1..-2] : term) if precise?(term)
|
41
|
+
|
42
|
+
::DamerauLevenshtein.distance(term, word) <= FUZZINESS
|
41
43
|
end
|
42
44
|
|
43
|
-
def condition(term,
|
45
|
+
def condition(term, word)
|
44
46
|
synonym = find_synonym(term)
|
45
|
-
synonym.present? ? (matched?(term,
|
47
|
+
synonym.present? ? (matched?(term, word) || matched?(synonym, word)) : matched?(term, word)
|
46
48
|
end
|
47
49
|
|
48
50
|
def find_synonym(term)
|
@@ -2,9 +2,7 @@ module KeywordMatcher
|
|
2
2
|
class Prophet
|
3
3
|
attr_reader :phrase
|
4
4
|
|
5
|
-
|
6
|
-
SPLIT = 0.2
|
7
|
-
SEPARATOR = %r{[\s\(\)\/*:"'\\\/\$\.,=]+}
|
5
|
+
SEPARATOR = %r{[\s\(\)\/*:"#'\[\];<>\\\/\$\.,=]+}.freeze
|
8
6
|
MEASURES = 'кг|г|л|мл|уп|ед|шт|мг|пак'.freeze
|
9
7
|
|
10
8
|
def initialize(phrase)
|
@@ -15,19 +13,20 @@ module KeywordMatcher
|
|
15
13
|
prepare
|
16
14
|
.split(SEPARATOR)
|
17
15
|
.map(&:strip)
|
18
|
-
.map(&:downcase)
|
19
|
-
.reject { |w| w =~ /\d{5,}/ }
|
20
16
|
.reject(&:blank?)
|
21
17
|
end
|
22
18
|
|
23
19
|
def prepare
|
24
|
-
phrase.gsub(
|
20
|
+
phrase.gsub(/\d{5,}/, '') # remove gteq five-digit
|
21
|
+
.gsub(/(\p{Ll}{2,})(\d+\S)/, '\1 \2') # split text contains > 1 character from digits
|
25
22
|
.gsub(/%([\p{L}\d])/, '% \1') # add space after percents
|
26
23
|
.gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
|
27
24
|
.gsub(/(\d)[\.,\s]+(#{MEASURES})\.?/i, '\1\2') # replace gaps between numbers and measures
|
28
25
|
.gsub(/(\p{Ll})(\p{Lu})/, '\1 \2') # split camelcase string
|
29
26
|
.gsub(/(\d)-0+(#{MEASURES})/i, '\1\2') # remove trailing zeroes after measures
|
30
|
-
.gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3')
|
27
|
+
.gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3') # add space between word and measure
|
28
|
+
.gsub(/(\d+)(#{MEASURES})([x|х])/i, '\1\2 \3') # add space before amount
|
29
|
+
.gsub(/([а-я])([a-z]{2,})/i, '\1 \2') # add space between alternating Russian English
|
31
30
|
.downcase
|
32
31
|
end
|
33
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyword_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ivan Novikov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-12-
|
11
|
+
date: 2018-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
89
|
+
version: 0.61.1
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
96
|
+
version: 0.61.1
|
97
97
|
description:
|
98
98
|
email:
|
99
99
|
- ivan.novikov@saltpepper.ru
|
@@ -132,6 +132,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
132
132
|
- - ">="
|
133
133
|
- !ruby/object:Gem::Version
|
134
134
|
version: 2.4.0
|
135
|
+
- - "<"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: 2.6.0
|
135
138
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
139
|
requirements:
|
137
140
|
- - ">="
|