inci_score 4.6.0 → 4.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -5
- data/config/catalog.yml +1 -1
- data/lib/inci_score/cli.rb +1 -1
- data/lib/inci_score/computer.rb +7 -4
- data/lib/inci_score/recognizer.rb +14 -2
- data/lib/inci_score/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fbbf88644bed7f8afcd802d796c2b49a2428a2eac2bbf68761079b83bc485866
|
|
4
|
+
data.tar.gz: 014d402eacfdb5512b350fdb7131dee8ebc793df1091153b30c04e000e47d161
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 44d96ce1e40671e04de3f9120465a129abbbc6bbd104c0e639d78be6d03be52645dedad5f5c22f7ff0c2d02c115fbf32d6941e38d9aa86867f9e4e38763719d3
|
|
7
|
+
data.tar.gz: 4dcfd975146932d346b4ba693621fd00762b83071b6cf91db73757bec4271d530ab253b42a2e647c661d8e37712682e4cef51c9befa0a5b6717232ddc785b77d
|
data/README.md
CHANGED
|
@@ -31,11 +31,13 @@ The total score is then calculated on a percent basis.
|
|
|
31
31
|
Since the ingredients list could come from an unreliable source (e.g. data scanned from a captured image), the gem tries to fuzzy match the ingredients by using different algorithms:
|
|
32
32
|
* exact matching
|
|
33
33
|
* [edit distance](https://en.wikipedia.org/wiki/Levenshtein_distance) behind a specified tolerance
|
|
34
|
+
* known hazards (ie ending in `ethicone`)
|
|
34
35
|
* first relevant matching digits
|
|
35
36
|
* matching splitted tokens
|
|
36
37
|
|
|
37
38
|
### Sources
|
|
38
|
-
The library accepts the list of ingredients as a single string of text.
|
|
39
|
+
The library accepts the list of ingredients as a single string of text.
|
|
40
|
+
Since this source could come from an OCR program, the library performs a normalization by stripping invalid characters and removing the unimportant parts.
|
|
39
41
|
The ingredients are typically separated by comma, although normalizer will detect the most appropriate separator:
|
|
40
42
|
|
|
41
43
|
```
|
|
@@ -44,6 +46,7 @@ The ingredients are typically separated by comma, although normalizer will detec
|
|
|
44
46
|
|
|
45
47
|
## Installation
|
|
46
48
|
Install the gem from your shell:
|
|
49
|
+
|
|
47
50
|
```shell
|
|
48
51
|
gem install inci_score
|
|
49
52
|
```
|
|
@@ -58,18 +61,21 @@ require "inci_score"
|
|
|
58
61
|
|
|
59
62
|
inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
|
|
60
63
|
inci.score # 56.25
|
|
64
|
+
inci.precision # 100.0
|
|
61
65
|
```
|
|
62
66
|
|
|
63
|
-
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI
|
|
67
|
+
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI (read below).
|
|
64
68
|
|
|
65
69
|
#### Unrecognized components
|
|
66
|
-
The API treats unrecognized components as a common case by just marking the object as non valid
|
|
70
|
+
The API treats unrecognized components as a common case by just marking the object as non valid.
|
|
67
71
|
In such case the score is computed anyway by considering only recognized components.
|
|
68
|
-
|
|
72
|
+
You can check the `precision` value, which is zero for unrecognized components, and changes based on the applied recognizer rule (100% when exact matching).
|
|
69
73
|
|
|
70
74
|
```ruby
|
|
71
75
|
inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2')
|
|
72
76
|
inci.valid? # false
|
|
77
|
+
inci.score # 100.0
|
|
78
|
+
inci.precision # 33.33
|
|
73
79
|
inci.unrecognized # ["noent1", "noent2"]
|
|
74
80
|
```
|
|
75
81
|
|
|
@@ -82,7 +88,7 @@ inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
|
|
|
82
88
|
TOTAL SCORE:
|
|
83
89
|
53.22
|
|
84
90
|
PRECISION:
|
|
85
|
-
|
|
91
|
+
71.54
|
|
86
92
|
COMPONENTS:
|
|
87
93
|
aqua (0), dimethicone (4), peg-10 (3)
|
|
88
94
|
UNRECOGNIZED:
|
|
@@ -91,6 +97,7 @@ UNRECOGNIZED:
|
|
|
91
97
|
|
|
92
98
|
#### Getting help
|
|
93
99
|
You can get CLI interface help by:
|
|
100
|
+
|
|
94
101
|
```shell
|
|
95
102
|
Usage: inci_score --src="aqua, parfum, etc"
|
|
96
103
|
-s, --src=SRC The INCI list: "aqua, parfum, etc"
|
data/config/catalog.yml
CHANGED
|
@@ -2818,7 +2818,7 @@ pantothenic acid polypeptide: 0
|
|
|
2818
2818
|
papaver orientale: 0
|
|
2819
2819
|
papaver rhoeas: 0
|
|
2820
2820
|
paraffin: 3
|
|
2821
|
-
paraffinum liquidum: 3
|
|
2821
|
+
paraffinum liquidum (mineral oil): 3
|
|
2822
2822
|
parietaria officinalis: 0
|
|
2823
2823
|
passiflora incarnata: 0
|
|
2824
2824
|
passiflora quadrangularis: 0
|
data/lib/inci_score/cli.rb
CHANGED
|
@@ -15,7 +15,7 @@ module InciScore
|
|
|
15
15
|
|
|
16
16
|
def call
|
|
17
17
|
parser.parse!(args)
|
|
18
|
-
return io.puts(%q{Specify
|
|
18
|
+
return io.puts(%q{Specify INCI list as: --src='aqua, parfum, etc'}) unless src
|
|
19
19
|
computer = Computer.new(src: src)
|
|
20
20
|
io.puts computer.call
|
|
21
21
|
end
|
data/lib/inci_score/computer.rb
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
module InciScore
|
|
4
4
|
class Computer
|
|
5
|
-
TOLERANCE =
|
|
5
|
+
TOLERANCE = 50.0
|
|
6
6
|
DECIMALS = 2
|
|
7
7
|
|
|
8
|
-
attr_reader :src, :rules, :ingredients, :components, :unrecognized
|
|
8
|
+
attr_reader :src, :rules, :ingredients, :components, :unrecognized, :precisions
|
|
9
9
|
|
|
10
10
|
def initialize(src:, rules: Normalizer::DEFAULT_RULES)
|
|
11
11
|
@unrecognized = []
|
|
12
|
+
@precisions = []
|
|
12
13
|
@src = src
|
|
13
14
|
@rules = rules
|
|
14
15
|
@ingredients = Normalizer.new(src: src, rules: rules).call
|
|
@@ -28,7 +29,7 @@ module InciScore
|
|
|
28
29
|
end
|
|
29
30
|
|
|
30
31
|
def precision
|
|
31
|
-
(
|
|
32
|
+
(precisions.sum / ingredients.size).round(DECIMALS)
|
|
32
33
|
end
|
|
33
34
|
|
|
34
35
|
def valid?
|
|
@@ -39,7 +40,9 @@ module InciScore
|
|
|
39
40
|
|
|
40
41
|
def fetch_components
|
|
41
42
|
ingredients.map do |ingredient|
|
|
42
|
-
Recognizer.new(ingredient)
|
|
43
|
+
recognizer = Recognizer.new(ingredient)
|
|
44
|
+
recognizer.call.tap do |component|
|
|
45
|
+
precisions << recognizer.precision
|
|
43
46
|
unrecognized << ingredient unless component
|
|
44
47
|
end
|
|
45
48
|
end.compact
|
|
@@ -3,19 +3,31 @@
|
|
|
3
3
|
module InciScore
|
|
4
4
|
class Recognizer
|
|
5
5
|
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
|
|
6
|
+
PRECISION_BASE = 4
|
|
6
7
|
|
|
7
8
|
attr_reader :ingredient, :rules, :applied
|
|
9
|
+
attr_accessor :found
|
|
8
10
|
|
|
9
11
|
def initialize(ingredient, rules = DEFAULT_RULES)
|
|
10
12
|
@ingredient = Ingredient.new(ingredient)
|
|
11
13
|
@rules = rules
|
|
12
14
|
@applied = []
|
|
13
|
-
|
|
15
|
+
@found = false
|
|
14
16
|
end
|
|
15
17
|
|
|
16
18
|
def call
|
|
17
19
|
return if ingredient.to_s.empty?
|
|
18
|
-
find_component
|
|
20
|
+
find_component.tap do |c|
|
|
21
|
+
self.found = true if c
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def precision
|
|
26
|
+
return 0.0 unless found
|
|
27
|
+
rule = applied.last
|
|
28
|
+
index = rules.index(rule) + PRECISION_BASE
|
|
29
|
+
ratio = Math.log(index, PRECISION_BASE)
|
|
30
|
+
(100 / ratio).round(2)
|
|
19
31
|
end
|
|
20
32
|
|
|
21
33
|
private
|
data/lib/inci_score/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: inci_score
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.6.
|
|
4
|
+
version: 4.6.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- costajob
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-01-
|
|
11
|
+
date: 2023-01-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: oj
|