keyword_matcher 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +0 -2
- data/keyword_matcher.gemspec +2 -3
- data/lib/keyword_matcher.rb +0 -1
- data/lib/keyword_matcher/group.rb +2 -1
- data/lib/keyword_matcher/process.rb +5 -16
- data/lib/keyword_matcher/prophet.rb +3 -3
- data/lib/keyword_matcher/version.rb +1 -1
- metadata +8 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c7609c8851ec36b76d1220692d5c6087b121f309251fb37a8dd22f2d9dd5ed8
|
4
|
+
data.tar.gz: df8423e771b29bca9074d63e178ec5cc60f3251b215a882dcbb86cf6fb69cc62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3dd4f4b380564c9660eb0631497918268b091bf65f2bcfea0a160c25d57d5adce801da740853477cd9376a3289ac8f28f6cfe41a0c356e6bc6f8153d0daa2c55
|
7
|
+
data.tar.gz: 9bdae5184e4e35be59df2ebfb109d1140e17bb80b724c05c8d576fc6d431bc2d207cafff8d03f07b9d77fe97e6e6652f0c1ed24f3f1827068ec44315468facb2
|
data/.rubocop.yml
CHANGED
data/keyword_matcher.gemspec
CHANGED
@@ -20,10 +20,9 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
spec.required_ruby_version = ['>= 2.4.0', '< 2.6.0']
|
22
22
|
|
23
|
-
spec.add_runtime_dependency 'activesupport', '
|
24
|
-
spec.add_runtime_dependency 'damerau-levenshtein', '~> 1.3'
|
23
|
+
spec.add_runtime_dependency 'activesupport', '>= 4.0'
|
25
24
|
spec.add_development_dependency 'bundler', '~> 1.17'
|
26
25
|
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
|
27
26
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
27
|
+
spec.add_development_dependency 'rubocop', '~> 0.76.0'
|
29
28
|
end
|
data/lib/keyword_matcher.rb
CHANGED
@@ -6,7 +6,8 @@ module KeywordMatcher
|
|
6
6
|
OPERATOR_NOT = 'не'.freeze
|
7
7
|
|
8
8
|
def initialize(title)
|
9
|
-
@title = title
|
9
|
+
@title = title.gsub(/(["'])/, '') # remove quotes
|
10
|
+
.gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
|
10
11
|
@all = values
|
11
12
|
@or = or_groups
|
12
13
|
@not = not_groups
|
@@ -2,9 +2,6 @@ module KeywordMatcher
|
|
2
2
|
class Process
|
3
3
|
attr_reader :group, :words
|
4
4
|
|
5
|
-
FUZZINESS = 1
|
6
|
-
MIN_WORD_LENGTH_FOR_FUZZY = 4
|
7
|
-
|
8
5
|
def initialize(group, words)
|
9
6
|
@group = group
|
10
7
|
@words = words
|
@@ -28,7 +25,10 @@ module KeywordMatcher
|
|
28
25
|
match = false
|
29
26
|
terms.each do |term|
|
30
27
|
words.each do |word|
|
31
|
-
|
28
|
+
if condition(term, word)
|
29
|
+
match = true
|
30
|
+
break
|
31
|
+
end
|
32
32
|
end
|
33
33
|
end
|
34
34
|
match
|
@@ -37,9 +37,7 @@ module KeywordMatcher
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def matched?(term, word)
|
40
|
-
|
41
|
-
|
42
|
-
::DamerauLevenshtein.distance(term, word) <= FUZZINESS
|
40
|
+
word == term
|
43
41
|
end
|
44
42
|
|
45
43
|
def condition(term, word)
|
@@ -58,14 +56,5 @@ module KeywordMatcher
|
|
58
56
|
%r{([0-9])([,|.])(.*)} => '\1-\3'
|
59
57
|
}
|
60
58
|
end
|
61
|
-
|
62
|
-
def precise?(term)
|
63
|
-
quoted?(term) || (quoted?(term).blank? && term.length < MIN_WORD_LENGTH_FOR_FUZZY)
|
64
|
-
end
|
65
|
-
|
66
|
-
def quoted?(term)
|
67
|
-
regex = /(["'])(?:(?=(\\?))\2.)*?\1/
|
68
|
-
term.match?(regex)
|
69
|
-
end
|
70
59
|
end
|
71
60
|
end
|
@@ -3,7 +3,7 @@ module KeywordMatcher
|
|
3
3
|
attr_reader :phrase
|
4
4
|
|
5
5
|
SEPARATOR = %r{[\s\(\)\/*:"#'\[\];<>\\\/\$\.,=“”«»]+}.freeze
|
6
|
-
MEASURES = '
|
6
|
+
MEASURES = 'кг|г|л|мл|уп|ед|шт|мг|пак|гр'.freeze
|
7
7
|
|
8
8
|
def initialize(phrase)
|
9
9
|
@phrase = phrase
|
@@ -17,11 +17,11 @@ module KeywordMatcher
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def prepare
|
20
|
-
phrase.gsub(
|
20
|
+
phrase.gsub(/[-]/, ' ') # split text contains - character by space
|
21
21
|
.gsub(/(\p{Ll}{2,})(\d+\S)/, '\1 \2') # split text contains > 1 character from digits
|
22
22
|
.gsub(/%([\p{L}\d])/, '% \1') # add space after percents
|
23
23
|
.gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
|
24
|
-
.gsub(/(\d)[\.,\s]+(#{MEASURES})
|
24
|
+
.gsub(/(\d)[\.,\s]+(#{MEASURES})\.?(?!\p{L})/i, '\1\2') # replace gaps between numbers and measures
|
25
25
|
.gsub(/(\p{Ll})(\p{Lu})/, '\1 \2') # split camelcase string
|
26
26
|
.gsub(/(\d)-0+(#{MEASURES})/i, '\1\2') # remove trailing zeroes after measures
|
27
27
|
.gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3') # add space between word and measure
|
metadata
CHANGED
@@ -1,43 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyword_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ivan Novikov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '5.0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '5.0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: damerau-levenshtein
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
17
|
+
- - ">="
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
19
|
+
version: '4.0'
|
34
20
|
type: :runtime
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
|
-
- - "
|
24
|
+
- - ">="
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
26
|
+
version: '4.0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: bundler
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -92,14 +78,14 @@ dependencies:
|
|
92
78
|
requirements:
|
93
79
|
- - "~>"
|
94
80
|
- !ruby/object:Gem::Version
|
95
|
-
version: 0.
|
81
|
+
version: 0.76.0
|
96
82
|
type: :development
|
97
83
|
prerelease: false
|
98
84
|
version_requirements: !ruby/object:Gem::Requirement
|
99
85
|
requirements:
|
100
86
|
- - "~>"
|
101
87
|
- !ruby/object:Gem::Version
|
102
|
-
version: 0.
|
88
|
+
version: 0.76.0
|
103
89
|
description:
|
104
90
|
email:
|
105
91
|
- ivan.novikov@saltpepper.ru
|