inci_score 4.6.0 → 4.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -5
- data/config/catalog.yml +1 -1
- data/lib/inci_score/cli.rb +1 -1
- data/lib/inci_score/computer.rb +7 -4
- data/lib/inci_score/recognizer.rb +14 -2
- data/lib/inci_score/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fbbf88644bed7f8afcd802d796c2b49a2428a2eac2bbf68761079b83bc485866
|
4
|
+
data.tar.gz: 014d402eacfdb5512b350fdb7131dee8ebc793df1091153b30c04e000e47d161
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44d96ce1e40671e04de3f9120465a129abbbc6bbd104c0e639d78be6d03be52645dedad5f5c22f7ff0c2d02c115fbf32d6941e38d9aa86867f9e4e38763719d3
|
7
|
+
data.tar.gz: 4dcfd975146932d346b4ba693621fd00762b83071b6cf91db73757bec4271d530ab253b42a2e647c661d8e37712682e4cef51c9befa0a5b6717232ddc785b77d
|
data/README.md
CHANGED
@@ -31,11 +31,13 @@ The total score is then calculated on a percent basis.
|
|
31
31
|
Since the ingredients list could come from an unreliable source (e.g. data scanned from a captured image), the gem tries to fuzzy match the ingredients by using different algorithms:
|
32
32
|
* exact matching
|
33
33
|
* [edit distance](https://en.wikipedia.org/wiki/Levenshtein_distance) behind a specified tolerance
|
34
|
+
* known hazards (ie ending in `ethicone`)
|
34
35
|
* first relevant matching digits
|
35
36
|
* matching splitted tokens
|
36
37
|
|
37
38
|
### Sources
|
38
|
-
The library accepts the list of ingredients as a single string of text.
|
39
|
+
The library accepts the list of ingredients as a single string of text.
|
40
|
+
Since this source could come from an OCR program, the library performs a normalization by stripping invalid characters and removing the unimportant parts.
|
39
41
|
The ingredients are typically separated by comma, although normalizer will detect the most appropriate separator:
|
40
42
|
|
41
43
|
```
|
@@ -44,6 +46,7 @@ The ingredients are typically separated by comma, although normalizer will detec
|
|
44
46
|
|
45
47
|
## Installation
|
46
48
|
Install the gem from your shell:
|
49
|
+
|
47
50
|
```shell
|
48
51
|
gem install inci_score
|
49
52
|
```
|
@@ -58,18 +61,21 @@ require "inci_score"
|
|
58
61
|
|
59
62
|
inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
|
60
63
|
inci.score # 56.25
|
64
|
+
inci.precision # 100.0
|
61
65
|
```
|
62
66
|
|
63
|
-
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI
|
67
|
+
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI (read below).
|
64
68
|
|
65
69
|
#### Unrecognized components
|
66
|
-
The API treats unrecognized components as a common case by just marking the object as non valid
|
70
|
+
The API treats unrecognized components as a common case by just marking the object as non valid.
|
67
71
|
In such case the score is computed anyway by considering only recognized components.
|
68
|
-
|
72
|
+
You can check the `precision` value, which is zero for unrecognized components, and changes based on the applied recognizer rule (100% when exact matching).
|
69
73
|
|
70
74
|
```ruby
|
71
75
|
inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2')
|
72
76
|
inci.valid? # false
|
77
|
+
inci.score # 100.0
|
78
|
+
inci.precision # 33.33
|
73
79
|
inci.unrecognized # ["noent1", "noent2"]
|
74
80
|
```
|
75
81
|
|
@@ -82,7 +88,7 @@ inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
|
|
82
88
|
TOTAL SCORE:
|
83
89
|
53.22
|
84
90
|
PRECISION:
|
85
|
-
|
91
|
+
71.54
|
86
92
|
COMPONENTS:
|
87
93
|
aqua (0), dimethicone (4), peg-10 (3)
|
88
94
|
UNRECOGNIZED:
|
@@ -91,6 +97,7 @@ UNRECOGNIZED:
|
|
91
97
|
|
92
98
|
#### Getting help
|
93
99
|
You can get CLI interface help by:
|
100
|
+
|
94
101
|
```shell
|
95
102
|
Usage: inci_score --src="aqua, parfum, etc"
|
96
103
|
-s, --src=SRC The INCI list: "aqua, parfum, etc"
|
data/config/catalog.yml
CHANGED
@@ -2818,7 +2818,7 @@ pantothenic acid polypeptide: 0
|
|
2818
2818
|
papaver orientale: 0
|
2819
2819
|
papaver rhoeas: 0
|
2820
2820
|
paraffin: 3
|
2821
|
-
paraffinum liquidum: 3
|
2821
|
+
paraffinum liquidum (mineral oil): 3
|
2822
2822
|
parietaria officinalis: 0
|
2823
2823
|
passiflora incarnata: 0
|
2824
2824
|
passiflora quadrangularis: 0
|
data/lib/inci_score/cli.rb
CHANGED
@@ -15,7 +15,7 @@ module InciScore
|
|
15
15
|
|
16
16
|
def call
|
17
17
|
parser.parse!(args)
|
18
|
-
return io.puts(%q{Specify
|
18
|
+
return io.puts(%q{Specify INCI list as: --src='aqua, parfum, etc'}) unless src
|
19
19
|
computer = Computer.new(src: src)
|
20
20
|
io.puts computer.call
|
21
21
|
end
|
data/lib/inci_score/computer.rb
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Computer
|
5
|
-
TOLERANCE =
|
5
|
+
TOLERANCE = 50.0
|
6
6
|
DECIMALS = 2
|
7
7
|
|
8
|
-
attr_reader :src, :rules, :ingredients, :components, :unrecognized
|
8
|
+
attr_reader :src, :rules, :ingredients, :components, :unrecognized, :precisions
|
9
9
|
|
10
10
|
def initialize(src:, rules: Normalizer::DEFAULT_RULES)
|
11
11
|
@unrecognized = []
|
12
|
+
@precisions = []
|
12
13
|
@src = src
|
13
14
|
@rules = rules
|
14
15
|
@ingredients = Normalizer.new(src: src, rules: rules).call
|
@@ -28,7 +29,7 @@ module InciScore
|
|
28
29
|
end
|
29
30
|
|
30
31
|
def precision
|
31
|
-
(
|
32
|
+
(precisions.sum / ingredients.size).round(DECIMALS)
|
32
33
|
end
|
33
34
|
|
34
35
|
def valid?
|
@@ -39,7 +40,9 @@ module InciScore
|
|
39
40
|
|
40
41
|
def fetch_components
|
41
42
|
ingredients.map do |ingredient|
|
42
|
-
Recognizer.new(ingredient)
|
43
|
+
recognizer = Recognizer.new(ingredient)
|
44
|
+
recognizer.call.tap do |component|
|
45
|
+
precisions << recognizer.precision
|
43
46
|
unrecognized << ingredient unless component
|
44
47
|
end
|
45
48
|
end.compact
|
@@ -3,19 +3,31 @@
|
|
3
3
|
module InciScore
|
4
4
|
class Recognizer
|
5
5
|
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
|
6
|
+
PRECISION_BASE = 4
|
6
7
|
|
7
8
|
attr_reader :ingredient, :rules, :applied
|
9
|
+
attr_accessor :found
|
8
10
|
|
9
11
|
def initialize(ingredient, rules = DEFAULT_RULES)
|
10
12
|
@ingredient = Ingredient.new(ingredient)
|
11
13
|
@rules = rules
|
12
14
|
@applied = []
|
13
|
-
|
15
|
+
@found = false
|
14
16
|
end
|
15
17
|
|
16
18
|
def call
|
17
19
|
return if ingredient.to_s.empty?
|
18
|
-
find_component
|
20
|
+
find_component.tap do |c|
|
21
|
+
self.found = true if c
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def precision
|
26
|
+
return 0.0 unless found
|
27
|
+
rule = applied.last
|
28
|
+
index = rules.index(rule) + PRECISION_BASE
|
29
|
+
ratio = Math.log(index, PRECISION_BASE)
|
30
|
+
(100 / ratio).round(2)
|
19
31
|
end
|
20
32
|
|
21
33
|
private
|
data/lib/inci_score/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inci_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.6.
|
4
|
+
version: 4.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- costajob
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|