inci_score 3.1.3 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +21 -0
- data/README.md +14 -50
- data/bin/console +5 -4
- data/bin/inci_score +2 -2
- data/lib/inci_score/cli.rb +18 -19
- data/lib/inci_score/computer.rb +28 -28
- data/lib/inci_score/config.rb +10 -0
- data/lib/inci_score/ingredient.rb +18 -19
- data/lib/inci_score/levenshtein.rb +12 -9
- data/lib/inci_score/normalizer.rb +8 -6
- data/lib/inci_score/normalizer_rules.rb +13 -28
- data/lib/inci_score/recognizer.rb +15 -10
- data/lib/inci_score/recognizer_rules.rb +11 -11
- data/lib/inci_score/refinements.rb +3 -1
- data/lib/inci_score/response.rb +14 -7
- data/lib/inci_score/score.rb +8 -3
- data/lib/inci_score/scorer.rb +20 -13
- data/lib/inci_score/version.rb +3 -1
- data/lib/inci_score.rb +10 -4
- metadata +15 -52
- data/.gitignore +0 -13
- data/.travis.yml +0 -7
- data/Gemfile +0 -4
- data/Rakefile +0 -24
- data/config/catalog.yml +0 -5018
- data/config.ru +0 -3
- data/ext/levenshtein.c +0 -43
- data/inci_score.gemspec +0 -28
- data/lib/inci_score/api.rb +0 -19
- data/lib/inci_score/catalog.rb +0 -13
- data/lib/inci_score/server.rb +0 -51
- data/log/.gitignore +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65b6b9212d839ed17a978457cc654b835e34e6a7d68dad7c61cbd3f54eaac8cf
|
4
|
+
data.tar.gz: ab6f3cebda2cbf8875aa203edc747864f0953559bbf8472f4f757e3a1b505b8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b51ecb14b357c1926f34ee19d558b5aed024bbda9e5070149abe046f9ef713e40d03749d3ce3d78ee6c1fd48358e9530fb84bed301e7024de18af5466bab5865
|
7
|
+
data.tar.gz: 5974307d97068c706bcc483854fd27c6a12e8761d86f3328cfbb1e561cfb06432ddeefcd18aa9d15fee0f3ad8a90260de4919f5a99884119462d93982d361f86
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2022 Commerce Layer
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
CHANGED
@@ -11,9 +11,6 @@
|
|
11
11
|
* [CLI](#cli)
|
12
12
|
* [Benchmark](#benchmark)
|
13
13
|
* [Levenshtein in C](#levenshtein-in-c)
|
14
|
-
* [Platform](#platform)
|
15
|
-
* [Wrk](#wrk)
|
16
|
-
* [Results](#results)
|
17
14
|
|
18
15
|
## Scope
|
19
16
|
This gem computes the score of cosmetic components basing on the information provided by the [Biodizionario site](http://www.biodizionario.it/) by Fabrizio Zago.
|
@@ -70,8 +67,8 @@ In such case the score is computed anyway by considering only recognized compone
|
|
70
67
|
Is still possible to query the object for its state:
|
71
68
|
|
72
69
|
```ruby
|
73
|
-
inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2')
|
74
|
-
inci.valid # false
|
70
|
+
inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2')
|
71
|
+
inci.valid? # false
|
75
72
|
inci.unrecognized # ["noent1", "noent2"]
|
76
73
|
```
|
77
74
|
|
@@ -82,32 +79,15 @@ You can collect INCI data by using the available CLI interface:
|
|
82
79
|
inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
|
83
80
|
|
84
81
|
TOTAL SCORE:
|
85
|
-
|
82
|
+
47.18
|
86
83
|
VALID STATE:
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
84
|
+
true
|
85
|
+
PRECISION:
|
86
|
+
75.0
|
87
|
+
COMPONENTS:
|
88
|
+
aqua\n dimethicone\n peg-10
|
92
89
|
UNRECOGNIZED:
|
93
|
-
|
94
|
-
```
|
95
|
-
|
96
|
-
#### HTTP server
|
97
|
-
The CLI interface exposes a Web layer based on the [Puma](http://puma.io/) application server.
|
98
|
-
The HTTP server is started on the specified port by spawning as many workers as your current workstation supports:
|
99
|
-
```shell
|
100
|
-
inci_score --http=9292
|
101
|
-
```
|
102
|
-
Consider all other options are discarded when running HTTP server.
|
103
|
-
|
104
|
-
##### Triggering a request
|
105
|
-
The HTTP server responds with a JSON representation of the original *InciScore::Response* object.
|
106
|
-
You can pass the source string directly as a HTTP parameter (URI escaped):
|
107
|
-
|
108
|
-
```shell
|
109
|
-
curl http://127.0.0.1:9292?src=aqua,dimethicone
|
110
|
-
=> {"components":{"aqua":0,"dimethicone":4},"unrecognized":[],"score":53.7629,"valid":true}
|
90
|
+
noent
|
111
91
|
```
|
112
92
|
|
113
93
|
#### Getting help
|
@@ -115,7 +95,6 @@ You can get CLI interface help by:
|
|
115
95
|
```shell
|
116
96
|
Usage: inci_score --src="aqua, parfum, etc"
|
117
97
|
-s, --src=SRC The INCI list: "aqua, parfum, etc"
|
118
|
-
--http=PORT Start HTTP server on the specified port
|
119
98
|
-h, --help Prints this help
|
120
99
|
```
|
121
100
|
|
@@ -124,25 +103,10 @@ Usage: inci_score --src="aqua, parfum, etc"
|
|
124
103
|
### Levenshtein in C
|
125
104
|
I noticed the APIs slows down dramatically when dealing with unrecognized components to fuzzy match on.
|
126
105
|
I profiled the code by using the [benchmark-ips](https://github.com/evanphx/benchmark-ips) gem, finding the bottleneck was the pure Ruby implementation of the Levenshtein distance algorithm.
|
127
|
-
After some pointless optimization, i replaced this routine with a C implementation: i opted for the straightforward [Ruby Inline](https://github.com/seattlerb/rubyinline) library to call the C code straight from Ruby.
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
* OSX Sierra
|
132
|
-
* 2,2 GHz Intel Core i7 (4 cores)
|
133
|
-
* 16 GB 1600 MHz DDR3
|
134
|
-
* Ruby 2.4
|
135
|
-
|
136
|
-
### Wrk
|
137
|
-
As always i used [wrk](https://github.com/wg/wrk) as the loading tool.
|
138
|
-
I measured the library three times, picking the best lap.
|
106
|
+
After some pointless optimization, i replaced this routine with a C implementation: i opted for the straightforward [Ruby Inline](https://github.com/seattlerb/rubyinline) library to call the C code straight from Ruby.
|
107
|
+
|
108
|
+
Once downloaded source code, run the bench specs by:
|
109
|
+
|
139
110
|
```shell
|
140
|
-
|
111
|
+
bundle exec rake spec:bench
|
141
112
|
```
|
142
|
-
|
143
|
-
### Results
|
144
|
-
| Source | Throughput (req/s) |
|
145
|
-
| --------------------------: | -----------------: |
|
146
|
-
| aqua,parfum,zeolite | 20296.75 |
|
147
|
-
| agua,porfum,zeolithe | 1098.45 |
|
148
|
-
| agua/water,porfum/fragrance | 1599.47 |
|
data/bin/console
CHANGED
data/bin/inci_score
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
lib = File.expand_path(
|
2
|
+
lib = File.expand_path('../../lib', __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
4
|
|
5
|
-
require
|
5
|
+
require 'inci_score'
|
6
6
|
|
7
7
|
InciScore::CLI.new(args: ARGV.clone).call
|
data/lib/inci_score/cli.rb
CHANGED
@@ -1,38 +1,37 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'inci_score/computer'
|
4
5
|
|
5
6
|
module InciScore
|
6
7
|
class CLI
|
7
|
-
|
8
|
+
attr_reader :args, :io, :catalog
|
9
|
+
attr_accessor :src
|
10
|
+
|
11
|
+
def initialize(args:, io: STDOUT, catalog: Config::CATALOG)
|
8
12
|
@args = args
|
9
13
|
@io = io
|
10
14
|
@catalog = catalog
|
11
15
|
@src = nil
|
12
|
-
@port = nil
|
13
16
|
end
|
14
17
|
|
15
|
-
def call
|
16
|
-
parser.parse!(
|
17
|
-
return
|
18
|
-
|
19
|
-
|
18
|
+
def call
|
19
|
+
parser.parse!(args)
|
20
|
+
return io.puts(%q{Specify inci list as: --src='aqua, parfum, etc'}) unless src
|
21
|
+
computer = Computer.new(src: src, catalog: catalog)
|
22
|
+
io.puts computer.call
|
20
23
|
end
|
21
24
|
|
22
25
|
private def parser
|
23
26
|
OptionParser.new do |opts|
|
24
|
-
opts.banner = %q{Usage: inci_score --src=
|
25
|
-
|
26
|
-
opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
|
27
|
-
@src = src
|
28
|
-
end
|
27
|
+
opts.banner = %q{Usage: inci_score --src='aqua, parfum, etc'}
|
29
28
|
|
30
|
-
opts.on(
|
31
|
-
|
29
|
+
opts.on('-sSRC', '--src=SRC', %q{The INCI list: 'aqua, parfum, etc'}) do |src|
|
30
|
+
self.src = src
|
32
31
|
end
|
33
32
|
|
34
|
-
opts.on(
|
35
|
-
|
33
|
+
opts.on('-h', '--help', 'Prints this help') do
|
34
|
+
io.puts opts
|
36
35
|
exit
|
37
36
|
end
|
38
37
|
end
|
data/lib/inci_score/computer.rb
CHANGED
@@ -1,50 +1,50 @@
|
|
1
|
-
|
2
|
-
require "inci_score/normalizer"
|
3
|
-
require "inci_score/recognizer"
|
4
|
-
require "inci_score/response"
|
5
|
-
require "inci_score/scorer"
|
1
|
+
# frozen_string_literal: true
|
6
2
|
|
7
3
|
module InciScore
|
8
4
|
class Computer
|
9
5
|
TOLERANCE = 30.0
|
10
|
-
|
6
|
+
DECIMALS = 2
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
8
|
+
attr_reader :src, :catalog, :rules, :ingredients, :components, :unrecognized
|
9
|
+
|
10
|
+
def initialize(src:, catalog: Config::CATALOG, rules: Normalizer::DEFAULT_RULES)
|
11
|
+
@unrecognized = []
|
16
12
|
@src = src
|
17
13
|
@catalog = catalog
|
18
|
-
@tolerance = Float(tolerance)
|
19
14
|
@rules = rules
|
20
|
-
@
|
15
|
+
@ingredients = Normalizer.new(src: src, rules: rules).call
|
16
|
+
@components = fetch_components
|
17
|
+
freeze
|
21
18
|
end
|
22
19
|
|
23
20
|
def call
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
Response.new(components: components.map(&:name),
|
22
|
+
unrecognized: unrecognized,
|
23
|
+
score: score,
|
24
|
+
valid: valid?,
|
25
|
+
precision: precision)
|
28
26
|
end
|
29
27
|
|
30
|
-
|
31
|
-
Scorer.new(components.map(&:hazard)).call
|
28
|
+
def score
|
29
|
+
Scorer.new(components.map(&:hazard)).call.round(DECIMALS)
|
32
30
|
end
|
33
31
|
|
34
|
-
|
35
|
-
|
32
|
+
def precision
|
33
|
+
(100 - ((unrecognized.size / Float(ingredients.size)) * 100)).round(DECIMALS)
|
36
34
|
end
|
37
35
|
|
38
|
-
|
39
|
-
|
40
|
-
Recognizer.new(ingredient, @catalog).call.tap do |component|
|
41
|
-
@unrecognized << ingredient unless component
|
42
|
-
end
|
43
|
-
end.compact
|
36
|
+
def valid?
|
37
|
+
precision >= TOLERANCE
|
44
38
|
end
|
45
39
|
|
46
|
-
private
|
47
|
-
|
40
|
+
private
|
41
|
+
|
42
|
+
def fetch_components
|
43
|
+
ingredients.map do |ingredient|
|
44
|
+
Recognizer.new(ingredient, catalog).call.tap do |component|
|
45
|
+
unrecognized << ingredient unless component
|
46
|
+
end
|
47
|
+
end.compact
|
48
48
|
end
|
49
49
|
end
|
50
50
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
|
5
|
+
module InciScore
|
6
|
+
module Config
|
7
|
+
CATALOG = YAML::load_file(File::expand_path('../../../config/catalog.yml', __FILE__)).freeze
|
8
|
+
HAZARDS = YAML::load_file(File::expand_path('../../../config/hazards.yml', __FILE__)).freeze
|
9
|
+
end
|
10
|
+
end
|
@@ -1,40 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module InciScore
|
2
4
|
class Ingredient
|
3
|
-
SLASH =
|
4
|
-
SLASH_RULE = /(?<!ate)
|
5
|
-
PARENTHESIS = %w[( ) [ ]]
|
6
|
-
DETAILS_RULE = /(\(.+\)|\[.+\])
|
5
|
+
SLASH = '/'
|
6
|
+
SLASH_RULE = /(?<!ate)\//.freeze
|
7
|
+
PARENTHESIS = %w[( ) [ ]].freeze
|
8
|
+
DETAILS_RULE = /(\(.+\)|\[.+\])/.freeze
|
9
|
+
|
10
|
+
attr_reader :raw, :tokens, :values
|
7
11
|
|
8
12
|
def initialize(raw)
|
9
13
|
@raw = raw.to_s
|
10
14
|
@tokens = @raw.split(SLASH_RULE).map(&:strip)
|
15
|
+
@values ||= synonims.unshift(name).compact
|
16
|
+
freeze
|
11
17
|
end
|
12
18
|
|
13
19
|
def to_s
|
14
20
|
values.join(SLASH)
|
15
21
|
end
|
16
22
|
|
17
|
-
|
18
|
-
@values ||= synonims.unshift(name).compact
|
19
|
-
end
|
20
|
-
|
21
|
-
private def name
|
22
|
-
return @tokens.first unless parenthesis?
|
23
|
-
@raw.sub(DETAILS_RULE, "").strip
|
24
|
-
end
|
23
|
+
private
|
25
24
|
|
26
|
-
|
27
|
-
|
25
|
+
def name
|
26
|
+
return tokens.first unless parenthesis?
|
27
|
+
raw.sub(DETAILS_RULE, '').strip
|
28
28
|
end
|
29
29
|
|
30
|
-
|
31
|
-
|
32
|
-
@raw.match(DETAILS_RULE)[1].delete(PARENTHESIS.join("|"))
|
30
|
+
def synonims
|
31
|
+
tokens[1, tokens.size].to_a
|
33
32
|
end
|
34
33
|
|
35
|
-
|
34
|
+
def parenthesis?
|
36
35
|
PARENTHESIS.each_slice(2).any? do |pair|
|
37
|
-
pair.all? { |p|
|
36
|
+
pair.all? { |p| raw.index(p) }
|
38
37
|
end
|
39
38
|
end
|
40
39
|
end
|
@@ -1,24 +1,28 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'inline'
|
2
4
|
|
3
5
|
module InciScore
|
4
6
|
class LevenshteinC
|
5
|
-
C_PROGRAM = File::expand_path(
|
7
|
+
C_PROGRAM = File::expand_path('../../../ext/levenshtein.c', __FILE__)
|
6
8
|
|
7
9
|
inline(:C) do |builder|
|
8
|
-
builder.c File::read(C_PROGRAM)
|
10
|
+
builder.c File::read(C_PROGRAM)
|
9
11
|
end
|
10
12
|
end
|
11
13
|
|
12
14
|
class Levenshtein
|
15
|
+
attr_reader :s, :t
|
16
|
+
|
13
17
|
def initialize(s, t)
|
14
|
-
@s = s.downcase.unpack(
|
15
|
-
@t = t.downcase.unpack(
|
18
|
+
@s = s.downcase.unpack('U*')
|
19
|
+
@t = t.downcase.unpack('U*')
|
16
20
|
end
|
17
21
|
|
18
22
|
def call
|
19
|
-
n, m =
|
23
|
+
n, m = s.length, t.length
|
20
24
|
|
21
|
-
return 0 if
|
25
|
+
return 0 if s == t
|
22
26
|
return m if n.zero?
|
23
27
|
return n if m.zero?
|
24
28
|
|
@@ -28,7 +32,7 @@ module InciScore
|
|
28
32
|
n.times do |i|
|
29
33
|
e = i + 1
|
30
34
|
m.times do |j|
|
31
|
-
c =
|
35
|
+
c = s[i] == t[j] ? 0 : 1
|
32
36
|
ins = d[j + 1] + 1
|
33
37
|
del = e + 1
|
34
38
|
sub = d[j] + c
|
@@ -43,4 +47,3 @@ module InciScore
|
|
43
47
|
end
|
44
48
|
end
|
45
49
|
end
|
46
|
-
|
@@ -1,20 +1,22 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'inci_score/normalizer_rules'
|
2
4
|
|
3
5
|
module InciScore
|
4
6
|
class Normalizer
|
5
|
-
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Uniquifier]
|
7
|
+
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Uniquifier].freeze
|
6
8
|
|
7
|
-
attr_reader :src
|
9
|
+
attr_reader :src, :rules
|
8
10
|
|
9
11
|
def initialize(src:, rules: DEFAULT_RULES)
|
10
12
|
@src = src
|
11
13
|
@rules = rules
|
14
|
+
freeze
|
12
15
|
end
|
13
16
|
|
14
17
|
def call
|
15
|
-
|
16
|
-
|
17
|
-
@src = rule.call(src)
|
18
|
+
rules.reduce(src) do |_src, rule|
|
19
|
+
_src = rule.call(_src)
|
18
20
|
end
|
19
21
|
end
|
20
22
|
end
|
@@ -1,8 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module InciScore
|
2
4
|
class Normalizer
|
3
5
|
module Rules
|
4
6
|
SEPARATOR = ','
|
5
7
|
|
8
|
+
Downcaser = ->(src) { src.downcase }.freeze
|
9
|
+
|
10
|
+
Tokenizer = ->(src) { src.split(SEPARATOR).map(&:strip) }.freeze
|
11
|
+
|
12
|
+
Uniquifier = ->(src) { Array(src).uniq }.freeze
|
13
|
+
|
6
14
|
module Replacer
|
7
15
|
extend self
|
8
16
|
|
@@ -14,7 +22,7 @@ module InciScore
|
|
14
22
|
['~', '-'],
|
15
23
|
['|', 'l'],
|
16
24
|
[' I ', '/']
|
17
|
-
]
|
25
|
+
].freeze
|
18
26
|
|
19
27
|
def call(src)
|
20
28
|
REPLACEMENTS.reduce(src) do |_src, replacement|
|
@@ -24,14 +32,6 @@ module InciScore
|
|
24
32
|
end
|
25
33
|
end
|
26
34
|
|
27
|
-
module Downcaser
|
28
|
-
extend self
|
29
|
-
|
30
|
-
def call(src)
|
31
|
-
src.downcase
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
35
|
module Beheader
|
36
36
|
extend self
|
37
37
|
|
@@ -40,7 +40,8 @@ module InciScore
|
|
40
40
|
|
41
41
|
def call(src)
|
42
42
|
sep_index = src.index(TITLE_SEP)
|
43
|
-
return src
|
43
|
+
return src unless sep_index
|
44
|
+
return src if sep_index > MAX_INDEX
|
44
45
|
src[sep_index+1, src.size]
|
45
46
|
end
|
46
47
|
end
|
@@ -48,27 +49,19 @@ module InciScore
|
|
48
49
|
module Separator
|
49
50
|
extend self
|
50
51
|
|
51
|
-
SEPARATORS = [
|
52
|
+
SEPARATORS = ['; ', '. ', " ' ", ' - ', ' : '].freeze
|
52
53
|
|
53
54
|
def call(src)
|
54
55
|
SEPARATORS.reduce(src) do |_src, separator|
|
55
56
|
_src = _src.gsub(separator, SEPARATOR)
|
56
57
|
end
|
57
58
|
end
|
58
|
-
end
|
59
|
-
|
60
|
-
module Tokenizer
|
61
|
-
extend self
|
62
|
-
|
63
|
-
def call(src)
|
64
|
-
src.split(SEPARATOR).map(&:strip)
|
65
|
-
end
|
66
59
|
end
|
67
60
|
|
68
61
|
module Sanitizer
|
69
62
|
extend self
|
70
63
|
|
71
|
-
INVALID_CHARS = /[^\/\[\]\(\)\w\s-]
|
64
|
+
INVALID_CHARS = /[^\/\[\]\(\)\w\s-]/.freeze
|
72
65
|
|
73
66
|
def call(src)
|
74
67
|
Array(src).map do |token|
|
@@ -76,14 +69,6 @@ module InciScore
|
|
76
69
|
end.reject(&:empty?)
|
77
70
|
end
|
78
71
|
end
|
79
|
-
|
80
|
-
module Uniquifier
|
81
|
-
extend self
|
82
|
-
|
83
|
-
def call(src)
|
84
|
-
Array(src).uniq
|
85
|
-
end
|
86
|
-
end
|
87
72
|
end
|
88
73
|
end
|
89
74
|
end
|
@@ -1,38 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "inci_score/recognizer_rules"
|
2
4
|
|
3
5
|
module InciScore
|
4
6
|
class Recognizer
|
5
|
-
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Digits, Rules::Tokens]
|
7
|
+
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Digits, Rules::Hazard, Rules::Tokens].freeze
|
6
8
|
|
7
9
|
Component = Struct.new(:name, :hazard)
|
8
10
|
|
9
|
-
attr_reader :applied
|
11
|
+
attr_reader :ingredient, :catalog, :rules, :applied
|
10
12
|
|
11
13
|
def initialize(ingredient, catalog, rules = DEFAULT_RULES, wrapper = Ingredient)
|
12
14
|
@ingredient = wrapper.new(ingredient)
|
13
15
|
@catalog = catalog
|
14
16
|
@rules = rules
|
15
17
|
@applied = []
|
18
|
+
freeze
|
16
19
|
end
|
17
20
|
|
18
21
|
def call
|
19
|
-
return if
|
22
|
+
return if ingredient.to_s.empty?
|
20
23
|
component = find_component
|
21
24
|
return unless component
|
22
|
-
Component.new(component,
|
23
|
-
end
|
25
|
+
Component.new(component, catalog[component])
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
24
29
|
|
25
|
-
|
26
|
-
|
30
|
+
def find_component
|
31
|
+
rules.reduce(nil) do |component, rule|
|
27
32
|
break(component) if component
|
28
33
|
applied << rule
|
29
34
|
apply(rule)
|
30
35
|
end
|
31
36
|
end
|
32
37
|
|
33
|
-
|
34
|
-
|
35
|
-
rule.call(value,
|
38
|
+
def apply(rule)
|
39
|
+
ingredient.values.map do |value|
|
40
|
+
rule.call(value, catalog)
|
36
41
|
end.find(&:itself)
|
37
42
|
end
|
38
43
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'inci_score/refinements'
|
2
4
|
|
3
5
|
module InciScore
|
4
6
|
using Refinements
|
@@ -6,13 +8,9 @@ module InciScore
|
|
6
8
|
module Rules
|
7
9
|
TOLERANCE = 3
|
8
10
|
|
9
|
-
|
10
|
-
extend self
|
11
|
+
Key = ->(src, catalog) { src if catalog.has_key?(src) }
|
11
12
|
|
12
|
-
|
13
|
-
src if catalog.has_key?(src)
|
14
|
-
end
|
15
|
-
end
|
13
|
+
Hazard = ->(src, _) { 'generic-hazard' if Config::HAZARDS.any? { |h| src.include?(h) } }
|
16
14
|
|
17
15
|
module Levenshtein
|
18
16
|
extend self
|
@@ -54,19 +52,21 @@ module InciScore
|
|
54
52
|
module Tokens
|
55
53
|
extend self
|
56
54
|
|
57
|
-
UNMATCHABLE = %w[extract oil sodium acid sulfate]
|
58
|
-
|
55
|
+
UNMATCHABLE = %w[extract oil sodium acid sulfate].freeze
|
56
|
+
|
59
57
|
def call(src, catalog)
|
60
58
|
tokens(src).each do |token|
|
61
|
-
catalog.each do |component, _|
|
59
|
+
catalog.each do |component, _|
|
62
60
|
return component if component.include?(token)
|
63
61
|
end
|
64
62
|
end
|
65
63
|
nil
|
66
64
|
end
|
67
65
|
|
66
|
+
private
|
67
|
+
|
68
68
|
def tokens(src)
|
69
|
-
(src.split(
|
69
|
+
(src.split(' ') - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort! { |a, b| b.size <=> a.size }
|
70
70
|
end
|
71
71
|
end
|
72
72
|
end
|