inci_score 4.3.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -9
- data/config/catalog.yml +0 -1
- data/config/hazards.yml +29 -31
- data/lib/inci_score/recognizer.rb +1 -5
- data/lib/inci_score/recognizer_rules.rb +22 -12
- data/lib/inci_score/response.rb +24 -5
- data/lib/inci_score/scorer.rb +4 -6
- data/lib/inci_score/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 462ec33d1c493272235feaef061ac62822c4dfd6ad6c339e858da8fdfa491894
|
|
4
|
+
data.tar.gz: 42f1e47b971185e92d4af19f2fbdee0f363dc9f241041b8ed800230ae9bd0e22
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cc4f049d56ea9fc60ce92943d7da50c15b697f8712733b409e3327ab73b3c4c0e60d0687f55fcbbefb9f4fd2fdc4c05fe5aa86435f5b75fb558bf456744c9cc4
|
|
7
|
+
data.tar.gz: a706360921a1cc36b1b5f1fef53932b859de69a27b02fd8b662b76dc8fe9808e1409779297bfa159c0d57bc6bb1dfb6e5287d4455def84500ce194fa0102706a
|
data/README.md
CHANGED
|
@@ -9,8 +9,9 @@
|
|
|
9
9
|
* [Usage](#usage)
|
|
10
10
|
* [Library](#library)
|
|
11
11
|
* [CLI](#cli)
|
|
12
|
-
* [
|
|
12
|
+
* [Benchmarks](#benchmark)
|
|
13
13
|
* [Levenshtein in C](#levenshtein-in-c)
|
|
14
|
+
* [Run benchmarks](#run-benchmarks)
|
|
14
15
|
|
|
15
16
|
## Scope
|
|
16
17
|
This gem computes the score of cosmetic components basing on the information provided by the [Biodizionario site](http://www.biodizionario.it/) by Fabrizio Zago.
|
|
@@ -56,7 +57,7 @@ You can include this gem into your own library and start computing the INCI scor
|
|
|
56
57
|
require "inci_score"
|
|
57
58
|
|
|
58
59
|
inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
|
|
59
|
-
inci.score # 53.
|
|
60
|
+
inci.score # 53.76
|
|
60
61
|
```
|
|
61
62
|
|
|
62
63
|
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI and HTTP interfaces (read below).
|
|
@@ -80,12 +81,10 @@ inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
|
|
|
80
81
|
|
|
81
82
|
TOTAL SCORE:
|
|
82
83
|
47.18
|
|
83
|
-
VALID STATE:
|
|
84
|
-
true
|
|
85
84
|
PRECISION:
|
|
86
85
|
75.0
|
|
87
86
|
COMPONENTS:
|
|
88
|
-
aqua
|
|
87
|
+
aqua (0), dimethicone (4), peg-10 (3)
|
|
89
88
|
UNRECOGNIZED:
|
|
90
89
|
noent
|
|
91
90
|
```
|
|
@@ -98,15 +97,17 @@ Usage: inci_score --src="aqua, parfum, etc"
|
|
|
98
97
|
-h, --help Prints this help
|
|
99
98
|
```
|
|
100
99
|
|
|
101
|
-
##
|
|
100
|
+
## Benchmarks
|
|
102
101
|
|
|
103
102
|
### Levenshtein in C
|
|
104
103
|
I noticed the APIs slows down dramatically when dealing with unrecognized components to fuzzy match on.
|
|
105
104
|
I profiled the code by using the [benchmark-ips](https://github.com/evanphx/benchmark-ips) gem, finding the bottleneck was the pure Ruby implementation of the Levenshtein distance algorithm.
|
|
106
|
-
After some pointless optimization, i replaced this routine with a C implementation: i opted for the straightforward [Ruby Inline](https://github.com/seattlerb/rubyinline) library to call the C code straight from Ruby.
|
|
107
105
|
|
|
108
|
-
|
|
106
|
+
After some pointless optimization, i replaced this routine with a C implementation: i opted for the straightforward [Ruby Inline](https://github.com/seattlerb/rubyinline) library to call the C code straight from Ruby, gaining an order of magnitude in speed (x30).
|
|
107
|
+
|
|
108
|
+
### Run benchmarks
|
|
109
|
+
Once downloaded source code, run the benchmarks by:
|
|
109
110
|
|
|
110
111
|
```shell
|
|
111
|
-
bundle exec rake
|
|
112
|
+
bundle exec rake bench
|
|
112
113
|
```
|
data/config/catalog.yml
CHANGED
data/config/hazards.yml
CHANGED
|
@@ -1,31 +1,29 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
- glycol
|
|
31
|
-
- glicol
|
|
1
|
+
---
|
|
2
|
+
peg-: 3
|
|
3
|
+
ppg-: 3
|
|
4
|
+
dea-: 3
|
|
5
|
+
mipa-: 3
|
|
6
|
+
edta-: 4
|
|
7
|
+
thicone: 4
|
|
8
|
+
siloxane: 4
|
|
9
|
+
chlorexidine: 4
|
|
10
|
+
petrolatum: 3
|
|
11
|
+
paraffinum: 3
|
|
12
|
+
carbomer: 3
|
|
13
|
+
crosspolymer: 3
|
|
14
|
+
acrylate: 3
|
|
15
|
+
styrene: 3
|
|
16
|
+
copolymer: 3
|
|
17
|
+
triethanolamine: 3
|
|
18
|
+
triclosan: 4
|
|
19
|
+
dmdm: 3
|
|
20
|
+
hydantoin: 3
|
|
21
|
+
imidazolidinyl: 4
|
|
22
|
+
diazolidinyl: 3
|
|
23
|
+
methylchloroisothiazolinone: 3
|
|
24
|
+
methylisothiazolinone: 3
|
|
25
|
+
nonoxynol: 4
|
|
26
|
+
poloxamer: 3
|
|
27
|
+
trimonium: 3
|
|
28
|
+
dimonium: 3
|
|
29
|
+
glycol: 3
|
|
@@ -4,8 +4,6 @@ module InciScore
|
|
|
4
4
|
class Recognizer
|
|
5
5
|
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
|
|
6
6
|
|
|
7
|
-
Component = Struct.new(:name, :hazard)
|
|
8
|
-
|
|
9
7
|
attr_reader :ingredient, :rules, :applied
|
|
10
8
|
|
|
11
9
|
def initialize(ingredient, rules = DEFAULT_RULES)
|
|
@@ -17,9 +15,7 @@ module InciScore
|
|
|
17
15
|
|
|
18
16
|
def call
|
|
19
17
|
return if ingredient.to_s.empty?
|
|
20
|
-
|
|
21
|
-
return unless component
|
|
22
|
-
Component.new(component, Config::CATALOG[component])
|
|
18
|
+
find_component
|
|
23
19
|
end
|
|
24
20
|
|
|
25
21
|
private
|
|
@@ -7,14 +7,23 @@ module InciScore
|
|
|
7
7
|
module Rules
|
|
8
8
|
TOLERANCE = 3
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
Component = Struct.new(:name, :hazard)
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
Key = ->(src) do
|
|
13
|
+
score = Config::CATALOG[src]
|
|
14
|
+
Component.new(src, score) if score
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
Hazard = ->(src) do
|
|
18
|
+
if hazard = Config::HAZARDS.detect { |name, _| src.include?(name) }
|
|
19
|
+
Component.new(src, hazard.last)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
13
22
|
|
|
14
23
|
module Levenshtein
|
|
15
24
|
extend self
|
|
16
25
|
|
|
17
|
-
Result = Struct.new(:name, :distance) do
|
|
26
|
+
Result = Struct.new(:name, :distance, :score) do
|
|
18
27
|
def tolerable?(size)
|
|
19
28
|
distance < TOLERANCE && distance <= (size-1)
|
|
20
29
|
end
|
|
@@ -25,14 +34,14 @@ module InciScore
|
|
|
25
34
|
size = src.size
|
|
26
35
|
farthest = Result.new(nil, size)
|
|
27
36
|
initial = src[0]
|
|
28
|
-
result = Config::CATALOG.reduce(farthest) do |nearest, (
|
|
29
|
-
next nearest unless
|
|
30
|
-
next nearest if
|
|
31
|
-
d = src.distance(
|
|
32
|
-
nearest = Result.new(
|
|
37
|
+
result = Config::CATALOG.reduce(farthest) do |nearest, (name, score)|
|
|
38
|
+
next nearest unless name.start_with?(initial)
|
|
39
|
+
next nearest if name.size > (size + TOLERANCE)
|
|
40
|
+
d = src.distance(name)
|
|
41
|
+
nearest = Result.new(name, d, score) if d < nearest.distance
|
|
33
42
|
nearest
|
|
34
43
|
end
|
|
35
|
-
result.name if result.tolerable?(size)
|
|
44
|
+
Component.new(result.name, result.score) if result.tolerable?(size)
|
|
36
45
|
end
|
|
37
46
|
end
|
|
38
47
|
|
|
@@ -44,7 +53,8 @@ module InciScore
|
|
|
44
53
|
def call(src)
|
|
45
54
|
return if src.size < TOLERANCE
|
|
46
55
|
digits = src[0, MIN_MEANINGFUL]
|
|
47
|
-
Config::CATALOG.detect { |
|
|
56
|
+
pairs = Config::CATALOG.detect { |name, _| name.start_with?(digits) }.to_a.first
|
|
57
|
+
Component.new(*pairs) if pairs
|
|
48
58
|
end
|
|
49
59
|
end
|
|
50
60
|
|
|
@@ -56,8 +66,8 @@ module InciScore
|
|
|
56
66
|
def call(src)
|
|
57
67
|
return if src.size <= TOLERANCE
|
|
58
68
|
tokens(src).each do |token|
|
|
59
|
-
Config::CATALOG.each do |
|
|
60
|
-
return
|
|
69
|
+
Config::CATALOG.each do |name, score|
|
|
70
|
+
return Component.new(name, score) if name.include?(token)
|
|
61
71
|
end
|
|
62
72
|
end
|
|
63
73
|
nil
|
data/lib/inci_score/response.rb
CHANGED
|
@@ -20,16 +20,35 @@ module InciScore
|
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def to_s
|
|
23
|
+
[score_str, precision_str, components_str, unrecognized_str].join
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def score_str
|
|
23
29
|
%Q{
|
|
24
30
|
TOTAL SCORE:
|
|
25
|
-
\t#{score}
|
|
31
|
+
\t#{score}}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def precision_str
|
|
35
|
+
%Q{
|
|
26
36
|
PRECISION:
|
|
27
|
-
\t#{precision}
|
|
37
|
+
\t#{precision}}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def components_str
|
|
41
|
+
return '' if components.empty?
|
|
42
|
+
%Q{
|
|
28
43
|
COMPONENTS:
|
|
29
|
-
\t#{components.map { |c| "#{c.name} (#{c.hazard})" }.join(', ')}
|
|
44
|
+
\t#{components.map { |c| "#{c.name} (#{c.hazard})" }.join(', ')}}
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def unrecognized_str
|
|
48
|
+
return '' if unrecognized.empty?
|
|
49
|
+
%Q{
|
|
30
50
|
UNRECOGNIZED:
|
|
31
|
-
\t#{unrecognized.join(', ')}
|
|
32
|
-
}
|
|
51
|
+
\t#{unrecognized.join(', ')}}
|
|
33
52
|
end
|
|
34
53
|
end
|
|
35
54
|
end
|
data/lib/inci_score/scorer.rb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module InciScore
|
|
4
4
|
class Scorer
|
|
5
|
-
|
|
5
|
+
HAZARD_RATIO = 25
|
|
6
6
|
WEIGHT_FACTOR = 5
|
|
7
7
|
|
|
8
8
|
attr_reader :hazards, :size
|
|
@@ -15,7 +15,7 @@ module InciScore
|
|
|
15
15
|
|
|
16
16
|
def call
|
|
17
17
|
return 0 if hazards.empty?
|
|
18
|
-
(100 - avg *
|
|
18
|
+
(100 - avg * HAZARD_RATIO).round(4)
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
private
|
|
@@ -25,10 +25,8 @@ module InciScore
|
|
|
25
25
|
end
|
|
26
26
|
|
|
27
27
|
def avg_weighted
|
|
28
|
-
return hazards.
|
|
29
|
-
weighted.
|
|
30
|
-
acc += score.value
|
|
31
|
-
end
|
|
28
|
+
return hazards.sum if same_hazard?
|
|
29
|
+
weighted.sum(&:value)
|
|
32
30
|
end
|
|
33
31
|
|
|
34
32
|
def same_hazard?
|
data/lib/inci_score/version.rb
CHANGED