inci_score 4.1.5 → 4.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/catalog.yml +25 -1
- data/config/cir +6172 -0
- data/lib/inci_score/cli.rb +3 -4
- data/lib/inci_score/computer.rb +3 -4
- data/lib/inci_score/config.rb +1 -0
- data/lib/inci_score/ingredient.rb +12 -11
- data/lib/inci_score/recognizer.rb +6 -7
- data/lib/inci_score/recognizer_rules.rb +12 -10
- data/lib/inci_score/version.rb +1 -1
- metadata +3 -2
data/lib/inci_score/cli.rb
CHANGED
@@ -4,20 +4,19 @@ require 'optparse'
|
|
4
4
|
|
5
5
|
module InciScore
|
6
6
|
class CLI
|
7
|
-
attr_reader :args, :io
|
7
|
+
attr_reader :args, :io
|
8
8
|
attr_accessor :src
|
9
9
|
|
10
|
-
def initialize(args:, io: STDOUT
|
10
|
+
def initialize(args:, io: STDOUT)
|
11
11
|
@args = args
|
12
12
|
@io = io
|
13
|
-
@catalog = catalog
|
14
13
|
@src = nil
|
15
14
|
end
|
16
15
|
|
17
16
|
def call
|
18
17
|
parser.parse!(args)
|
19
18
|
return io.puts(%q{Specify inci list as: --src='aqua, parfum, etc'}) unless src
|
20
|
-
computer = Computer.new(src: src
|
19
|
+
computer = Computer.new(src: src)
|
21
20
|
io.puts computer.call
|
22
21
|
end
|
23
22
|
|
data/lib/inci_score/computer.rb
CHANGED
@@ -5,12 +5,11 @@ module InciScore
|
|
5
5
|
TOLERANCE = 30.0
|
6
6
|
DECIMALS = 2
|
7
7
|
|
8
|
-
attr_reader :src, :
|
8
|
+
attr_reader :src, :rules, :ingredients, :components, :unrecognized
|
9
9
|
|
10
|
-
def initialize(src:,
|
10
|
+
def initialize(src:, rules: Normalizer::DEFAULT_RULES)
|
11
11
|
@unrecognized = []
|
12
12
|
@src = src
|
13
|
-
@catalog = catalog
|
14
13
|
@rules = rules
|
15
14
|
@ingredients = Normalizer.new(src: src, rules: rules).call
|
16
15
|
@components = fetch_components
|
@@ -40,7 +39,7 @@ module InciScore
|
|
40
39
|
|
41
40
|
def fetch_components
|
42
41
|
ingredients.map do |ingredient|
|
43
|
-
Recognizer.new(ingredient
|
42
|
+
Recognizer.new(ingredient).call.tap do |component|
|
44
43
|
unrecognized << ingredient unless component
|
45
44
|
end
|
46
45
|
end.compact
|
data/lib/inci_score/config.rb
CHANGED
@@ -5,6 +5,7 @@ require 'yaml'
|
|
5
5
|
module InciScore
|
6
6
|
module Config
|
7
7
|
CATALOG = YAML::load_file(File::expand_path('../../../config/catalog.yml', __FILE__)).freeze
|
8
|
+
CIR = File.readlines(File::expand_path('../../../config/cir', __FILE__)).freeze
|
8
9
|
HAZARDS = YAML::load_file(File::expand_path('../../../config/hazards.yml', __FILE__)).freeze
|
9
10
|
end
|
10
11
|
end
|
@@ -5,14 +5,13 @@ module InciScore
|
|
5
5
|
SLASH = '/'
|
6
6
|
SLASH_RULE = /(?<!ate)\//.freeze
|
7
7
|
PARENTHESIS = %w[( ) [ ]].freeze
|
8
|
-
|
8
|
+
PARENTHESIS_RULE = /(\(.+\)|\[.+\])/.freeze
|
9
9
|
|
10
|
-
attr_reader :raw, :
|
10
|
+
attr_reader :raw, :values
|
11
11
|
|
12
12
|
def initialize(raw)
|
13
13
|
@raw = raw.to_s
|
14
|
-
@
|
15
|
-
@values ||= synonims.unshift(name).compact
|
14
|
+
@values = fetch_values
|
16
15
|
freeze
|
17
16
|
end
|
18
17
|
|
@@ -22,13 +21,15 @@ module InciScore
|
|
22
21
|
|
23
22
|
private
|
24
23
|
|
25
|
-
def
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
24
|
+
def fetch_values
|
25
|
+
if parenthesis?
|
26
|
+
parenthesis = PARENTHESIS.join
|
27
|
+
parenthesis_values = raw.match(PARENTHESIS_RULE).captures.map { |c| c.delete(parenthesis) }
|
28
|
+
deparenthesized = raw.sub(PARENTHESIS_RULE, '').sub(/\s+/, ' ').strip
|
29
|
+
[deparenthesized].concat(parenthesis_values)
|
30
|
+
else
|
31
|
+
raw.split(SLASH_RULE).map(&:strip)
|
32
|
+
end
|
32
33
|
end
|
33
34
|
|
34
35
|
def parenthesis?
|
@@ -2,15 +2,14 @@
|
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Recognizer
|
5
|
-
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::
|
5
|
+
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
|
6
6
|
|
7
7
|
Component = Struct.new(:name, :hazard)
|
8
8
|
|
9
|
-
attr_reader :ingredient, :
|
9
|
+
attr_reader :ingredient, :rules, :applied
|
10
10
|
|
11
|
-
def initialize(ingredient,
|
12
|
-
@ingredient =
|
13
|
-
@catalog = catalog
|
11
|
+
def initialize(ingredient, rules = DEFAULT_RULES)
|
12
|
+
@ingredient = Ingredient.new(ingredient)
|
14
13
|
@rules = rules
|
15
14
|
@applied = []
|
16
15
|
freeze
|
@@ -20,7 +19,7 @@ module InciScore
|
|
20
19
|
return if ingredient.to_s.empty?
|
21
20
|
component = find_component
|
22
21
|
return unless component
|
23
|
-
Component.new(component,
|
22
|
+
Component.new(component, Config::CATALOG[component])
|
24
23
|
end
|
25
24
|
|
26
25
|
private
|
@@ -35,7 +34,7 @@ module InciScore
|
|
35
34
|
|
36
35
|
def apply(rule)
|
37
36
|
ingredient.values.map do |value|
|
38
|
-
rule.call(value
|
37
|
+
rule.call(value)
|
39
38
|
end.find(&:itself)
|
40
39
|
end
|
41
40
|
end
|
@@ -7,9 +7,9 @@ module InciScore
|
|
7
7
|
module Rules
|
8
8
|
TOLERANCE = 3
|
9
9
|
|
10
|
-
Key = ->(src
|
10
|
+
Key = ->(src) { src if Config::CATALOG.has_key?(src) }
|
11
11
|
|
12
|
-
Hazard = ->(src
|
12
|
+
Hazard = ->(src) { 'generic-hazard' if Config::HAZARDS.any? { |h| src.include?(h) } }
|
13
13
|
|
14
14
|
module Levenshtein
|
15
15
|
extend self
|
@@ -20,12 +20,12 @@ module InciScore
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def call(src
|
23
|
+
def call(src)
|
24
24
|
return if src.empty?
|
25
25
|
size = src.size
|
26
26
|
farthest = Result.new(nil, size)
|
27
27
|
initial = src[0]
|
28
|
-
result =
|
28
|
+
result = Config::CATALOG.reduce(farthest) do |nearest, (component, _)|
|
29
29
|
next nearest unless component.start_with?(initial)
|
30
30
|
next nearest if component.size > (size + TOLERANCE)
|
31
31
|
d = src.distance(component)
|
@@ -36,15 +36,15 @@ module InciScore
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
module
|
39
|
+
module Prefix
|
40
40
|
extend self
|
41
41
|
|
42
42
|
MIN_MEANINGFUL = 7
|
43
43
|
|
44
|
-
def call(src
|
44
|
+
def call(src)
|
45
45
|
return if src.size < TOLERANCE
|
46
46
|
digits = src[0, MIN_MEANINGFUL]
|
47
|
-
|
47
|
+
Config::CATALOG.detect { |component, _| component.start_with?(digits) }.to_a.first
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
@@ -53,9 +53,10 @@ module InciScore
|
|
53
53
|
|
54
54
|
UNMATCHABLE = %w[extract oil sodium acid sulfate].freeze
|
55
55
|
|
56
|
-
def call(src
|
56
|
+
def call(src)
|
57
|
+
return if src.size <= TOLERANCE
|
57
58
|
tokens(src).each do |token|
|
58
|
-
|
59
|
+
Config::CATALOG.each do |component, _|
|
59
60
|
return component if component.include?(token)
|
60
61
|
end
|
61
62
|
end
|
@@ -65,7 +66,8 @@ module InciScore
|
|
65
66
|
private
|
66
67
|
|
67
68
|
def tokens(src)
|
68
|
-
|
69
|
+
words = src.split(' ').map { |w| w.split('-') }.flatten
|
70
|
+
(words - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort! { |a, b| b.size <=> a.size }
|
69
71
|
end
|
70
72
|
end
|
71
73
|
end
|
data/lib/inci_score/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inci_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- costajob
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -106,6 +106,7 @@ files:
|
|
106
106
|
- README.md
|
107
107
|
- bin/inci_score
|
108
108
|
- config/catalog.yml
|
109
|
+
- config/cir
|
109
110
|
- config/hazards.yml
|
110
111
|
- ext/levenshtein.c
|
111
112
|
- lib/inci_score.rb
|