inci_score 4.3.0 → 4.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -9
- data/config/catalog.yml +0 -1
- data/config/hazards.yml +29 -31
- data/lib/inci_score/recognizer.rb +1 -5
- data/lib/inci_score/recognizer_rules.rb +22 -12
- data/lib/inci_score/response.rb +24 -5
- data/lib/inci_score/scorer.rb +9 -7
- data/lib/inci_score/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c8d0453d792d9a6f5e628d13720a927c15ee14ba8441faf588736b22ab8bf618
|
4
|
+
data.tar.gz: d354ebb76fa432acc592da8e152e6a9712022c394cce7e8f951e6ebc9f94b93a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcfc4fcafe3b51ed9917937c36055c4043591b499250eae36be97cf4b58d101d0440bc12a57a2fe8e22c5fb4189ed20969cd6f0ab7f58cfa9028fa2ea681fda6
|
7
|
+
data.tar.gz: a34557698ffc1449c4ae4d4bb031465566506e92186c7a6e482ff655453566581d3645f8e78894d5afec6a3feb6066d887d30063356b6d3a91b408204a1a6ed4
|
data/README.md
CHANGED
@@ -9,8 +9,9 @@
|
|
9
9
|
* [Usage](#usage)
|
10
10
|
* [Library](#library)
|
11
11
|
* [CLI](#cli)
|
12
|
-
* [
|
12
|
+
* [Benchmarks](#benchmark)
|
13
13
|
* [Levenshtein in C](#levenshtein-in-c)
|
14
|
+
* [Run benchmarks](#run-benchmarks)
|
14
15
|
|
15
16
|
## Scope
|
16
17
|
This gem computes the score of cosmetic components basing on the information provided by the [Biodizionario site](http://www.biodizionario.it/) by Fabrizio Zago.
|
@@ -56,7 +57,7 @@ You can include this gem into your own library and start computing the INCI scor
|
|
56
57
|
require "inci_score"
|
57
58
|
|
58
59
|
inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
|
59
|
-
inci.score #
|
60
|
+
inci.score # 56.25
|
60
61
|
```
|
61
62
|
|
62
63
|
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI and HTTP interfaces (read below).
|
@@ -80,12 +81,10 @@ inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
|
|
80
81
|
|
81
82
|
TOTAL SCORE:
|
82
83
|
47.18
|
83
|
-
VALID STATE:
|
84
|
-
true
|
85
84
|
PRECISION:
|
86
85
|
75.0
|
87
86
|
COMPONENTS:
|
88
|
-
aqua
|
87
|
+
aqua (0), dimethicone (4), peg-10 (3)
|
89
88
|
UNRECOGNIZED:
|
90
89
|
noent
|
91
90
|
```
|
@@ -98,15 +97,17 @@ Usage: inci_score --src="aqua, parfum, etc"
|
|
98
97
|
-h, --help Prints this help
|
99
98
|
```
|
100
99
|
|
101
|
-
##
|
100
|
+
## Benchmarks
|
102
101
|
|
103
102
|
### Levenshtein in C
|
104
103
|
I noticed the APIs slows down dramatically when dealing with unrecognized components to fuzzy match on.
|
105
104
|
I profiled the code by using the [benchmark-ips](https://github.com/evanphx/benchmark-ips) gem, finding the bottleneck was the pure Ruby implementation of the Levenshtein distance algorithm.
|
106
|
-
After some pointless optimization, i replaced this routine with a C implementation: i opted for the straightforward [Ruby Inline](https://github.com/seattlerb/rubyinline) library to call the C code straight from Ruby.
|
107
105
|
|
108
|
-
|
106
|
+
After some pointless optimization, i replaced this routine with a C implementation: i opted for the straightforward [Ruby Inline](https://github.com/seattlerb/rubyinline) library to call the C code straight from Ruby, gaining an order of magnitude in speed (x30).
|
107
|
+
|
108
|
+
### Run benchmarks
|
109
|
+
Once downloaded source code, run the benchmarks by:
|
109
110
|
|
110
111
|
```shell
|
111
|
-
bundle exec rake
|
112
|
+
bundle exec rake bench
|
112
113
|
```
|
data/config/catalog.yml
CHANGED
data/config/hazards.yml
CHANGED
@@ -1,31 +1,29 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
- glycol
|
31
|
-
- glicol
|
1
|
+
---
|
2
|
+
peg-: 3
|
3
|
+
ppg-: 3
|
4
|
+
dea-: 3
|
5
|
+
mipa-: 3
|
6
|
+
edta-: 4
|
7
|
+
thicone: 4
|
8
|
+
siloxane: 4
|
9
|
+
chlorexidine: 4
|
10
|
+
petrolatum: 3
|
11
|
+
paraffinum: 3
|
12
|
+
carbomer: 3
|
13
|
+
crosspolymer: 3
|
14
|
+
acrylate: 3
|
15
|
+
styrene: 3
|
16
|
+
copolymer: 3
|
17
|
+
triethanolamine: 3
|
18
|
+
triclosan: 4
|
19
|
+
dmdm: 3
|
20
|
+
hydantoin: 3
|
21
|
+
imidazolidinyl: 4
|
22
|
+
diazolidinyl: 3
|
23
|
+
methylchloroisothiazolinone: 3
|
24
|
+
methylisothiazolinone: 3
|
25
|
+
nonoxynol: 4
|
26
|
+
poloxamer: 3
|
27
|
+
trimonium: 3
|
28
|
+
dimonium: 3
|
29
|
+
glycol: 3
|
@@ -4,8 +4,6 @@ module InciScore
|
|
4
4
|
class Recognizer
|
5
5
|
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
|
6
6
|
|
7
|
-
Component = Struct.new(:name, :hazard)
|
8
|
-
|
9
7
|
attr_reader :ingredient, :rules, :applied
|
10
8
|
|
11
9
|
def initialize(ingredient, rules = DEFAULT_RULES)
|
@@ -17,9 +15,7 @@ module InciScore
|
|
17
15
|
|
18
16
|
def call
|
19
17
|
return if ingredient.to_s.empty?
|
20
|
-
|
21
|
-
return unless component
|
22
|
-
Component.new(component, Config::CATALOG[component])
|
18
|
+
find_component
|
23
19
|
end
|
24
20
|
|
25
21
|
private
|
@@ -7,14 +7,23 @@ module InciScore
|
|
7
7
|
module Rules
|
8
8
|
TOLERANCE = 3
|
9
9
|
|
10
|
-
|
10
|
+
Component = Struct.new(:name, :hazard)
|
11
11
|
|
12
|
-
|
12
|
+
Key = ->(src) do
|
13
|
+
score = Config::CATALOG[src]
|
14
|
+
Component.new(src, score) if score
|
15
|
+
end
|
16
|
+
|
17
|
+
Hazard = ->(src) do
|
18
|
+
if hazard = Config::HAZARDS.detect { |name, _| src.include?(name) }
|
19
|
+
Component.new(src, hazard.last)
|
20
|
+
end
|
21
|
+
end
|
13
22
|
|
14
23
|
module Levenshtein
|
15
24
|
extend self
|
16
25
|
|
17
|
-
Result = Struct.new(:name, :distance) do
|
26
|
+
Result = Struct.new(:name, :distance, :score) do
|
18
27
|
def tolerable?(size)
|
19
28
|
distance < TOLERANCE && distance <= (size-1)
|
20
29
|
end
|
@@ -25,14 +34,14 @@ module InciScore
|
|
25
34
|
size = src.size
|
26
35
|
farthest = Result.new(nil, size)
|
27
36
|
initial = src[0]
|
28
|
-
result = Config::CATALOG.reduce(farthest) do |nearest, (
|
29
|
-
next nearest unless
|
30
|
-
next nearest if
|
31
|
-
d = src.distance(
|
32
|
-
nearest = Result.new(
|
37
|
+
result = Config::CATALOG.reduce(farthest) do |nearest, (name, score)|
|
38
|
+
next nearest unless name.start_with?(initial)
|
39
|
+
next nearest if name.size > (size + TOLERANCE)
|
40
|
+
d = src.distance(name)
|
41
|
+
nearest = Result.new(name, d, score) if d < nearest.distance
|
33
42
|
nearest
|
34
43
|
end
|
35
|
-
result.name if result.tolerable?(size)
|
44
|
+
Component.new(result.name, result.score) if result.tolerable?(size)
|
36
45
|
end
|
37
46
|
end
|
38
47
|
|
@@ -44,7 +53,8 @@ module InciScore
|
|
44
53
|
def call(src)
|
45
54
|
return if src.size < TOLERANCE
|
46
55
|
digits = src[0, MIN_MEANINGFUL]
|
47
|
-
Config::CATALOG.detect { |
|
56
|
+
pairs = Config::CATALOG.detect { |name, _| name.start_with?(digits) }.to_a.first
|
57
|
+
Component.new(*pairs) if pairs
|
48
58
|
end
|
49
59
|
end
|
50
60
|
|
@@ -56,8 +66,8 @@ module InciScore
|
|
56
66
|
def call(src)
|
57
67
|
return if src.size <= TOLERANCE
|
58
68
|
tokens(src).each do |token|
|
59
|
-
Config::CATALOG.each do |
|
60
|
-
return
|
69
|
+
Config::CATALOG.each do |name, score|
|
70
|
+
return Component.new(name, score) if name.include?(token)
|
61
71
|
end
|
62
72
|
end
|
63
73
|
nil
|
data/lib/inci_score/response.rb
CHANGED
@@ -20,16 +20,35 @@ module InciScore
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def to_s
|
23
|
+
[score_str, precision_str, components_str, unrecognized_str].join
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def score_str
|
23
29
|
%Q{
|
24
30
|
TOTAL SCORE:
|
25
|
-
\t#{score}
|
31
|
+
\t#{score}}
|
32
|
+
end
|
33
|
+
|
34
|
+
def precision_str
|
35
|
+
%Q{
|
26
36
|
PRECISION:
|
27
|
-
\t#{precision}
|
37
|
+
\t#{precision}}
|
38
|
+
end
|
39
|
+
|
40
|
+
def components_str
|
41
|
+
return '' if components.empty?
|
42
|
+
%Q{
|
28
43
|
COMPONENTS:
|
29
|
-
\t#{components.map { |c| "#{c.name} (#{c.hazard})" }.join(', ')}
|
44
|
+
\t#{components.map { |c| "#{c.name} (#{c.hazard})" }.join(', ')}}
|
45
|
+
end
|
46
|
+
|
47
|
+
def unrecognized_str
|
48
|
+
return '' if unrecognized.empty?
|
49
|
+
%Q{
|
30
50
|
UNRECOGNIZED:
|
31
|
-
\t#{unrecognized.join(', ')}
|
32
|
-
}
|
51
|
+
\t#{unrecognized.join(', ')}}
|
33
52
|
end
|
34
53
|
end
|
35
54
|
end
|
data/lib/inci_score/scorer.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Scorer
|
5
|
-
|
5
|
+
HAZARD_RATIO = 25
|
6
6
|
WEIGHT_FACTOR = 5
|
7
7
|
|
8
8
|
attr_reader :hazards, :size
|
@@ -15,7 +15,7 @@ module InciScore
|
|
15
15
|
|
16
16
|
def call
|
17
17
|
return 0 if hazards.empty?
|
18
|
-
(100 - avg *
|
18
|
+
(100 - avg * HAZARD_RATIO).round(4)
|
19
19
|
end
|
20
20
|
|
21
21
|
private
|
@@ -25,10 +25,8 @@ module InciScore
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def avg_weighted
|
28
|
-
return hazards.
|
29
|
-
weighted.
|
30
|
-
acc += score.value
|
31
|
-
end
|
28
|
+
return hazards.sum if same_hazard?
|
29
|
+
weighted.sum(&:value)
|
32
30
|
end
|
33
31
|
|
34
32
|
def same_hazard?
|
@@ -42,7 +40,11 @@ module InciScore
|
|
42
40
|
end
|
43
41
|
|
44
42
|
def weight(index)
|
45
|
-
Math.log(index+1, size *
|
43
|
+
Math.log(index + 1, size * ratio(index))
|
44
|
+
end
|
45
|
+
|
46
|
+
def ratio(index)
|
47
|
+
WEIGHT_FACTOR / (index + 1)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
end
|
data/lib/inci_score/version.rb
CHANGED