inci_score 4.2.1 → 4.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -9
- data/config/catalog.yml +24 -2
- data/config/cir +6172 -0
- data/config/hazards.yml +29 -31
- data/lib/inci_score/config.rb +1 -0
- data/lib/inci_score/ingredient.rb +12 -11
- data/lib/inci_score/recognizer.rb +3 -7
- data/lib/inci_score/recognizer_rules.rb +22 -12
- data/lib/inci_score/response.rb +24 -5
- data/lib/inci_score/scorer.rb +4 -6
- data/lib/inci_score/version.rb +1 -1
- metadata +3 -2
data/config/hazards.yml
CHANGED
@@ -1,31 +1,29 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
- glycol
|
31
|
-
- glicol
|
1
|
+
---
|
2
|
+
peg-: 3
|
3
|
+
ppg-: 3
|
4
|
+
dea-: 3
|
5
|
+
mipa-: 3
|
6
|
+
edta-: 4
|
7
|
+
thicone: 4
|
8
|
+
siloxane: 4
|
9
|
+
chlorexidine: 4
|
10
|
+
petrolatum: 3
|
11
|
+
paraffinum: 3
|
12
|
+
carbomer: 3
|
13
|
+
crosspolymer: 3
|
14
|
+
acrylate: 3
|
15
|
+
styrene: 3
|
16
|
+
copolymer: 3
|
17
|
+
triethanolamine: 3
|
18
|
+
triclosan: 4
|
19
|
+
dmdm: 3
|
20
|
+
hydantoin: 3
|
21
|
+
imidazolidinyl: 4
|
22
|
+
diazolidinyl: 3
|
23
|
+
methylchloroisothiazolinone: 3
|
24
|
+
methylisothiazolinone: 3
|
25
|
+
nonoxynol: 4
|
26
|
+
poloxamer: 3
|
27
|
+
trimonium: 3
|
28
|
+
dimonium: 3
|
29
|
+
glycol: 3
|
data/lib/inci_score/config.rb
CHANGED
@@ -5,6 +5,7 @@ require 'yaml'
|
|
5
5
|
module InciScore
|
6
6
|
module Config
|
7
7
|
CATALOG = YAML::load_file(File::expand_path('../../../config/catalog.yml', __FILE__)).freeze
|
8
|
+
CIR = File.readlines(File::expand_path('../../../config/cir', __FILE__)).freeze
|
8
9
|
HAZARDS = YAML::load_file(File::expand_path('../../../config/hazards.yml', __FILE__)).freeze
|
9
10
|
end
|
10
11
|
end
|
@@ -5,14 +5,13 @@ module InciScore
|
|
5
5
|
SLASH = '/'
|
6
6
|
SLASH_RULE = /(?<!ate)\//.freeze
|
7
7
|
PARENTHESIS = %w[( ) [ ]].freeze
|
8
|
-
|
8
|
+
PARENTHESIS_RULE = /(\(.+\)|\[.+\])/.freeze
|
9
9
|
|
10
|
-
attr_reader :raw, :
|
10
|
+
attr_reader :raw, :values
|
11
11
|
|
12
12
|
def initialize(raw)
|
13
13
|
@raw = raw.to_s
|
14
|
-
@
|
15
|
-
@values ||= synonims.unshift(name).compact
|
14
|
+
@values = fetch_values
|
16
15
|
freeze
|
17
16
|
end
|
18
17
|
|
@@ -22,13 +21,15 @@ module InciScore
|
|
22
21
|
|
23
22
|
private
|
24
23
|
|
25
|
-
def
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
def fetch_values
|
25
|
+
if parenthesis?
|
26
|
+
parenthesis = PARENTHESIS.join
|
27
|
+
parenthesis_values = raw.match(PARENTHESIS_RULE).captures.map { |c| c.delete(parenthesis) }
|
28
|
+
deparenthesized = raw.sub(PARENTHESIS_RULE, '').sub(/\s+/, ' ').strip
|
29
|
+
[deparenthesized].concat(parenthesis_values)
|
30
|
+
else
|
31
|
+
raw.split(SLASH_RULE).map(&:strip)
|
32
|
+
end
|
32
33
|
end
|
33
34
|
|
34
35
|
def parenthesis?
|
@@ -4,12 +4,10 @@ module InciScore
|
|
4
4
|
class Recognizer
|
5
5
|
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
|
6
6
|
|
7
|
-
Component = Struct.new(:name, :hazard)
|
8
|
-
|
9
7
|
attr_reader :ingredient, :rules, :applied
|
10
8
|
|
11
|
-
def initialize(ingredient, rules = DEFAULT_RULES
|
12
|
-
@ingredient =
|
9
|
+
def initialize(ingredient, rules = DEFAULT_RULES)
|
10
|
+
@ingredient = Ingredient.new(ingredient)
|
13
11
|
@rules = rules
|
14
12
|
@applied = []
|
15
13
|
freeze
|
@@ -17,9 +15,7 @@ module InciScore
|
|
17
15
|
|
18
16
|
def call
|
19
17
|
return if ingredient.to_s.empty?
|
20
|
-
|
21
|
-
return unless component
|
22
|
-
Component.new(component, Config::CATALOG[component])
|
18
|
+
find_component
|
23
19
|
end
|
24
20
|
|
25
21
|
private
|
@@ -7,14 +7,23 @@ module InciScore
|
|
7
7
|
module Rules
|
8
8
|
TOLERANCE = 3
|
9
9
|
|
10
|
-
|
10
|
+
Component = Struct.new(:name, :hazard)
|
11
11
|
|
12
|
-
|
12
|
+
Key = ->(src) do
|
13
|
+
score = Config::CATALOG[src]
|
14
|
+
Component.new(src, score) if score
|
15
|
+
end
|
16
|
+
|
17
|
+
Hazard = ->(src) do
|
18
|
+
if hazard = Config::HAZARDS.detect { |name, _| src.include?(name) }
|
19
|
+
Component.new(src, hazard.last)
|
20
|
+
end
|
21
|
+
end
|
13
22
|
|
14
23
|
module Levenshtein
|
15
24
|
extend self
|
16
25
|
|
17
|
-
Result = Struct.new(:name, :distance) do
|
26
|
+
Result = Struct.new(:name, :distance, :score) do
|
18
27
|
def tolerable?(size)
|
19
28
|
distance < TOLERANCE && distance <= (size-1)
|
20
29
|
end
|
@@ -25,14 +34,14 @@ module InciScore
|
|
25
34
|
size = src.size
|
26
35
|
farthest = Result.new(nil, size)
|
27
36
|
initial = src[0]
|
28
|
-
result = Config::CATALOG.reduce(farthest) do |nearest, (
|
29
|
-
next nearest unless
|
30
|
-
next nearest if
|
31
|
-
d = src.distance(
|
32
|
-
nearest = Result.new(
|
37
|
+
result = Config::CATALOG.reduce(farthest) do |nearest, (name, score)|
|
38
|
+
next nearest unless name.start_with?(initial)
|
39
|
+
next nearest if name.size > (size + TOLERANCE)
|
40
|
+
d = src.distance(name)
|
41
|
+
nearest = Result.new(name, d, score) if d < nearest.distance
|
33
42
|
nearest
|
34
43
|
end
|
35
|
-
result.name if result.tolerable?(size)
|
44
|
+
Component.new(result.name, result.score) if result.tolerable?(size)
|
36
45
|
end
|
37
46
|
end
|
38
47
|
|
@@ -44,7 +53,8 @@ module InciScore
|
|
44
53
|
def call(src)
|
45
54
|
return if src.size < TOLERANCE
|
46
55
|
digits = src[0, MIN_MEANINGFUL]
|
47
|
-
Config::CATALOG.detect { |
|
56
|
+
pairs = Config::CATALOG.detect { |name, _| name.start_with?(digits) }.to_a.first
|
57
|
+
Component.new(*pairs) if pairs
|
48
58
|
end
|
49
59
|
end
|
50
60
|
|
@@ -56,8 +66,8 @@ module InciScore
|
|
56
66
|
def call(src)
|
57
67
|
return if src.size <= TOLERANCE
|
58
68
|
tokens(src).each do |token|
|
59
|
-
Config::CATALOG.each do |
|
60
|
-
return
|
69
|
+
Config::CATALOG.each do |name, score|
|
70
|
+
return Component.new(name, score) if name.include?(token)
|
61
71
|
end
|
62
72
|
end
|
63
73
|
nil
|
data/lib/inci_score/response.rb
CHANGED
@@ -20,16 +20,35 @@ module InciScore
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def to_s
|
23
|
+
[score_str, precision_str, components_str, unrecognized_str].join
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def score_str
|
23
29
|
%Q{
|
24
30
|
TOTAL SCORE:
|
25
|
-
\t#{score}
|
31
|
+
\t#{score}}
|
32
|
+
end
|
33
|
+
|
34
|
+
def precision_str
|
35
|
+
%Q{
|
26
36
|
PRECISION:
|
27
|
-
\t#{precision}
|
37
|
+
\t#{precision}}
|
38
|
+
end
|
39
|
+
|
40
|
+
def components_str
|
41
|
+
return '' if components.empty?
|
42
|
+
%Q{
|
28
43
|
COMPONENTS:
|
29
|
-
\t#{components.map { |c| "#{c.name} (#{c.hazard})" }.join(', ')}
|
44
|
+
\t#{components.map { |c| "#{c.name} (#{c.hazard})" }.join(', ')}}
|
45
|
+
end
|
46
|
+
|
47
|
+
def unrecognized_str
|
48
|
+
return '' if unrecognized.empty?
|
49
|
+
%Q{
|
30
50
|
UNRECOGNIZED:
|
31
|
-
\t#{unrecognized.join(', ')}
|
32
|
-
}
|
51
|
+
\t#{unrecognized.join(', ')}}
|
33
52
|
end
|
34
53
|
end
|
35
54
|
end
|
data/lib/inci_score/scorer.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Scorer
|
5
|
-
|
5
|
+
HAZARD_RATIO = 25
|
6
6
|
WEIGHT_FACTOR = 5
|
7
7
|
|
8
8
|
attr_reader :hazards, :size
|
@@ -15,7 +15,7 @@ module InciScore
|
|
15
15
|
|
16
16
|
def call
|
17
17
|
return 0 if hazards.empty?
|
18
|
-
(100 - avg *
|
18
|
+
(100 - avg * HAZARD_RATIO).round(4)
|
19
19
|
end
|
20
20
|
|
21
21
|
private
|
@@ -25,10 +25,8 @@ module InciScore
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def avg_weighted
|
28
|
-
return hazards.
|
29
|
-
weighted.
|
30
|
-
acc += score.value
|
31
|
-
end
|
28
|
+
return hazards.sum if same_hazard?
|
29
|
+
weighted.sum(&:value)
|
32
30
|
end
|
33
31
|
|
34
32
|
def same_hazard?
|
data/lib/inci_score/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inci_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- costajob
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- README.md
|
107
107
|
- bin/inci_score
|
108
108
|
- config/catalog.yml
|
109
|
+
- config/cir
|
109
110
|
- config/hazards.yml
|
110
111
|
- ext/levenshtein.c
|
111
112
|
- lib/inci_score.rb
|