keyword_matcher 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +0 -2
- data/keyword_matcher.gemspec +2 -3
- data/lib/keyword_matcher.rb +0 -1
- data/lib/keyword_matcher/group.rb +2 -1
- data/lib/keyword_matcher/process.rb +5 -16
- data/lib/keyword_matcher/prophet.rb +3 -3
- data/lib/keyword_matcher/version.rb +1 -1
- metadata +8 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c7609c8851ec36b76d1220692d5c6087b121f309251fb37a8dd22f2d9dd5ed8
|
4
|
+
data.tar.gz: df8423e771b29bca9074d63e178ec5cc60f3251b215a882dcbb86cf6fb69cc62
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3dd4f4b380564c9660eb0631497918268b091bf65f2bcfea0a160c25d57d5adce801da740853477cd9376a3289ac8f28f6cfe41a0c356e6bc6f8153d0daa2c55
|
7
|
+
data.tar.gz: 9bdae5184e4e35be59df2ebfb109d1140e17bb80b724c05c8d576fc6d431bc2d207cafff8d03f07b9d77fe97e6e6652f0c1ed24f3f1827068ec44315468facb2
|
data/.rubocop.yml
CHANGED
data/keyword_matcher.gemspec
CHANGED
@@ -20,10 +20,9 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.require_paths = ['lib']
|
21
21
|
spec.required_ruby_version = ['>= 2.4.0', '< 2.6.0']
|
22
22
|
|
23
|
-
spec.add_runtime_dependency 'activesupport', '
|
24
|
-
spec.add_runtime_dependency 'damerau-levenshtein', '~> 1.3'
|
23
|
+
spec.add_runtime_dependency 'activesupport', '>= 4.0'
|
25
24
|
spec.add_development_dependency 'bundler', '~> 1.17'
|
26
25
|
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
|
27
26
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
28
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
27
|
+
spec.add_development_dependency 'rubocop', '~> 0.76.0'
|
29
28
|
end
|
data/lib/keyword_matcher.rb
CHANGED
@@ -6,7 +6,8 @@ module KeywordMatcher
|
|
6
6
|
OPERATOR_NOT = 'не'.freeze
|
7
7
|
|
8
8
|
def initialize(title)
|
9
|
-
@title = title
|
9
|
+
@title = title.gsub(/(["'])/, '') # remove quotes
|
10
|
+
.gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
|
10
11
|
@all = values
|
11
12
|
@or = or_groups
|
12
13
|
@not = not_groups
|
@@ -2,9 +2,6 @@ module KeywordMatcher
|
|
2
2
|
class Process
|
3
3
|
attr_reader :group, :words
|
4
4
|
|
5
|
-
FUZZINESS = 1
|
6
|
-
MIN_WORD_LENGTH_FOR_FUZZY = 4
|
7
|
-
|
8
5
|
def initialize(group, words)
|
9
6
|
@group = group
|
10
7
|
@words = words
|
@@ -28,7 +25,10 @@ module KeywordMatcher
|
|
28
25
|
match = false
|
29
26
|
terms.each do |term|
|
30
27
|
words.each do |word|
|
31
|
-
|
28
|
+
if condition(term, word)
|
29
|
+
match = true
|
30
|
+
break
|
31
|
+
end
|
32
32
|
end
|
33
33
|
end
|
34
34
|
match
|
@@ -37,9 +37,7 @@ module KeywordMatcher
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def matched?(term, word)
|
40
|
-
|
41
|
-
|
42
|
-
::DamerauLevenshtein.distance(term, word) <= FUZZINESS
|
40
|
+
word == term
|
43
41
|
end
|
44
42
|
|
45
43
|
def condition(term, word)
|
@@ -58,14 +56,5 @@ module KeywordMatcher
|
|
58
56
|
%r{([0-9])([,|.])(.*)} => '\1-\3'
|
59
57
|
}
|
60
58
|
end
|
61
|
-
|
62
|
-
def precise?(term)
|
63
|
-
quoted?(term) || (quoted?(term).blank? && term.length < MIN_WORD_LENGTH_FOR_FUZZY)
|
64
|
-
end
|
65
|
-
|
66
|
-
def quoted?(term)
|
67
|
-
regex = /(["'])(?:(?=(\\?))\2.)*?\1/
|
68
|
-
term.match?(regex)
|
69
|
-
end
|
70
59
|
end
|
71
60
|
end
|
@@ -3,7 +3,7 @@ module KeywordMatcher
|
|
3
3
|
attr_reader :phrase
|
4
4
|
|
5
5
|
SEPARATOR = %r{[\s\(\)\/*:"#'\[\];<>\\\/\$\.,=“”«»]+}.freeze
|
6
|
-
MEASURES = '
|
6
|
+
MEASURES = 'кг|г|л|мл|уп|ед|шт|мг|пак|гр'.freeze
|
7
7
|
|
8
8
|
def initialize(phrase)
|
9
9
|
@phrase = phrase
|
@@ -17,11 +17,11 @@ module KeywordMatcher
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def prepare
|
20
|
-
phrase.gsub(
|
20
|
+
phrase.gsub(/[-]/, ' ') # split text contains - character by space
|
21
21
|
.gsub(/(\p{Ll}{2,})(\d+\S)/, '\1 \2') # split text contains > 1 character from digits
|
22
22
|
.gsub(/%([\p{L}\d])/, '% \1') # add space after percents
|
23
23
|
.gsub(/(\d)[\.,](\d)/, '\1-\2') # replace separator between digits from , or . to -
|
24
|
-
.gsub(/(\d)[\.,\s]+(#{MEASURES})
|
24
|
+
.gsub(/(\d)[\.,\s]+(#{MEASURES})\.?(?!\p{L})/i, '\1\2') # replace gaps between numbers and measures
|
25
25
|
.gsub(/(\p{Ll})(\p{Lu})/, '\1 \2') # split camelcase string
|
26
26
|
.gsub(/(\d)-0+(#{MEASURES})/i, '\1\2') # remove trailing zeroes after measures
|
27
27
|
.gsub(/([а-яa-z])(\d+)(#{MEASURES})/i, '\1 \2\3') # add space between word and measure
|
metadata
CHANGED
@@ -1,43 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: keyword_matcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ivan Novikov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '5.0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '5.0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: damerau-levenshtein
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
17
|
+
- - ">="
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
19
|
+
version: '4.0'
|
34
20
|
type: :runtime
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
|
-
- - "
|
24
|
+
- - ">="
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
26
|
+
version: '4.0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: bundler
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -92,14 +78,14 @@ dependencies:
|
|
92
78
|
requirements:
|
93
79
|
- - "~>"
|
94
80
|
- !ruby/object:Gem::Version
|
95
|
-
version: 0.
|
81
|
+
version: 0.76.0
|
96
82
|
type: :development
|
97
83
|
prerelease: false
|
98
84
|
version_requirements: !ruby/object:Gem::Requirement
|
99
85
|
requirements:
|
100
86
|
- - "~>"
|
101
87
|
- !ruby/object:Gem::Version
|
102
|
-
version: 0.
|
88
|
+
version: 0.76.0
|
103
89
|
description:
|
104
90
|
email:
|
105
91
|
- ivan.novikov@saltpepper.ru
|