keyword_matcher 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +0 -2
- data/keyword_matcher.gemspec +3 -4
- data/lib/keyword_matcher/group.rb +1 -0
- data/lib/keyword_matcher/process.rb +9 -7
- data/lib/keyword_matcher/prophet.rb +6 -7
- data/lib/keyword_matcher/version.rb +1 -1
- metadata +7 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9193a0b98145b71457b73c624d518432ce50da959b6725d10bdc714aabd537ce
|
|
4
|
+
data.tar.gz: f2dfa8a1e7c48f711ee97369feec3633134283bae94b4d67f8a64b7e16799080
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b966edcfd13292288ddd392b403420685a4ec8f7bc82ede00cf8ad8acda8fe394848f8a73d4085ec85e557253821f83605a2dd347a5160c75f1fd732e531adc3
|
|
7
|
+
data.tar.gz: 5a5b3cb2c9e991a0677a34fab264171c3fe1aa297b513f265ecaf892db13a1475afe938baea1202d137b5059656c05bde54b1b6cdef52b708ce69aec622a576f
|
data/.rubocop.yml
CHANGED
data/keyword_matcher.gemspec
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
3
|
require 'keyword_matcher/version'
|
|
5
4
|
|
|
@@ -19,12 +18,12 @@ Gem::Specification.new do |spec|
|
|
|
19
18
|
spec.bindir = 'exe'
|
|
20
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
21
20
|
spec.require_paths = ['lib']
|
|
22
|
-
spec.required_ruby_version = '>= 2.4.0'
|
|
21
|
+
spec.required_ruby_version = ['>= 2.4.0', '< 2.6.0']
|
|
23
22
|
|
|
24
23
|
spec.add_runtime_dependency 'activesupport', '~> 5.0'
|
|
25
24
|
spec.add_runtime_dependency 'damerau-levenshtein', '~> 1.3'
|
|
26
25
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
|
27
26
|
spec.add_development_dependency 'rake', '~> 10.0'
|
|
28
27
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
|
29
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
|
28
|
+
spec.add_development_dependency 'rubocop', '~> 0.61.1'
|
|
30
29
|
end
|
|
@@ -26,6 +26,7 @@ module KeywordMatcher
|
|
|
26
26
|
|
|
27
27
|
def not_groups
|
|
28
28
|
return [] unless title.downcase.match?(/\r?\n#{OPERATOR_NOT}\r?\n/)
|
|
29
|
+
|
|
29
30
|
title.downcase.match(/\r?\n#{OPERATOR_NOT}.*/m).to_s.split(/\r?\n#{OPERATOR_NOT}\r?\n/).map do |v|
|
|
30
31
|
v.split("\n").reject(&:blank?).map(&:split)
|
|
31
32
|
end.reject(&:blank?)
|
|
@@ -18,6 +18,7 @@ module KeywordMatcher
|
|
|
18
18
|
|
|
19
19
|
def negation_found?
|
|
20
20
|
return if group.not.blank?
|
|
21
|
+
|
|
21
22
|
in_any?(group.not)
|
|
22
23
|
end
|
|
23
24
|
|
|
@@ -26,8 +27,8 @@ module KeywordMatcher
|
|
|
26
27
|
values.map do |terms|
|
|
27
28
|
match = false
|
|
28
29
|
terms.each do |term|
|
|
29
|
-
words.each do |
|
|
30
|
-
match = true if condition(term,
|
|
30
|
+
words.each do |word|
|
|
31
|
+
match = true if condition(term, word)
|
|
31
32
|
end
|
|
32
33
|
end
|
|
33
34
|
match
|
|
@@ -35,14 +36,15 @@ module KeywordMatcher
|
|
|
35
36
|
end.include?(true)
|
|
36
37
|
end
|
|
37
38
|
|
|
38
|
-
def matched?(term,
|
|
39
|
-
return
|
|
40
|
-
|
|
39
|
+
def matched?(term, word)
|
|
40
|
+
return word == (quoted?(term) ? term[1..-2] : term) if precise?(term)
|
|
41
|
+
|
|
42
|
+
::DamerauLevenshtein.distance(term, word) <= FUZZINESS
|
|
41
43
|
end
|
|
42
44
|
|
|
43
|
-
def condition(term,
|
|
45
|
+
def condition(term, word)
|
|
44
46
|
synonym = find_synonym(term)
|
|
45
|
-
synonym.present? ? (matched?(term,
|
|
47
|
+
synonym.present? ? (matched?(term, word) || matched?(synonym, word)) : matched?(term, word)
|
|
46
48
|
end
|
|
47
49
|
|
|
48
50
|
def find_synonym(term)
|
|
@@ -2,9 +2,7 @@ module KeywordMatcher
|
|
|
2
2
|
class Prophet
|
|
3
3
|
attr_reader :phrase
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
SPLIT = 0.2
|
|
7
|
-
SEPARATOR = %r{[\s\(\)\/*:"'\\\/\$\.,=]+}
|
|
5
|
+
SEPARATOR = %r{[\s\(\)\/*:"#'\[\];<>\\\/\$\.,=]+}.freeze
|
|
8
6
|
MEASURES = 'кг|г|л|мл|уп|ед|шт|мг|пак'.freeze
|
|
9
7
|
|
|
10
8
|
def initialize(phrase)
|
|
@@ -15,19 +13,20 @@ module KeywordMatcher
|
|
|
15
13
|
prepare
|
|
16
14
|
.split(SEPARATOR)
|
|
17
15
|
.map(&:strip)
|
|
18
|
-
.map(&:downcase)
|
|
19
|
-
.reject { |w| w =~ /\d{5,}/ }
|
|
20
16
|
.reject(&:blank?)
|
|
21
17
|
end
|
|
22
18
|
|
|
23
19
|
def prepare
|
|
24
|
-
phrase.gsub(
|
|
20
|
+
phrase.gsub(/\d{5,}/, '') # remove gteq five-digit
|
|
21
|
+
.gsub(/(\p{Ll}{2,})(\d+\S)/, '\1 \2') # split text contains > 1 character from digits
|
|
25
22
|
.gsub(/%([\p{L}\d])/, '% \1') # add space after percents
|
|
26
23
|
.gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
|
|
27
24
|
.gsub(/(\d)[\.,\s]+(#{MEASURES})\.?/i, '\1\2') # replace gaps between numbers and measures
|
|
28
25
|
.gsub(/(\p{Ll})(\p{Lu})/, '\1 \2') # split camelcase string
|
|
29
26
|
.gsub(/(\d)-0+(#{MEASURES})/i, '\1\2') # remove trailing zeroes after measures
|
|
30
|
-
.gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3')
|
|
27
|
+
.gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3') # add space between word and measure
|
|
28
|
+
.gsub(/(\d+)(#{MEASURES})([x|х])/i, '\1\2 \3') # add space before amount
|
|
29
|
+
.gsub(/([а-я])([a-z]{2,})/i, '\1 \2') # add space between alternating Russian English
|
|
31
30
|
.downcase
|
|
32
31
|
end
|
|
33
32
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: keyword_matcher
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ivan Novikov
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-12-
|
|
11
|
+
date: 2018-12-28 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: activesupport
|
|
@@ -86,14 +86,14 @@ dependencies:
|
|
|
86
86
|
requirements:
|
|
87
87
|
- - "~>"
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: 0.
|
|
89
|
+
version: 0.61.1
|
|
90
90
|
type: :development
|
|
91
91
|
prerelease: false
|
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
|
93
93
|
requirements:
|
|
94
94
|
- - "~>"
|
|
95
95
|
- !ruby/object:Gem::Version
|
|
96
|
-
version: 0.
|
|
96
|
+
version: 0.61.1
|
|
97
97
|
description:
|
|
98
98
|
email:
|
|
99
99
|
- ivan.novikov@saltpepper.ru
|
|
@@ -132,6 +132,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
132
132
|
- - ">="
|
|
133
133
|
- !ruby/object:Gem::Version
|
|
134
134
|
version: 2.4.0
|
|
135
|
+
- - "<"
|
|
136
|
+
- !ruby/object:Gem::Version
|
|
137
|
+
version: 2.6.0
|
|
135
138
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
136
139
|
requirements:
|
|
137
140
|
- - ">="
|