inci_score 4.6.0 → 4.6.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c1ec290c92d1fb34798dbd04ec8f93a6ad1db12fd124ff2f7d0c4a50c108aa7
4
- data.tar.gz: dad169f8a2486aeeac3a22df47097235d965b9435350d4800d662c9022f0a36c
3
+ metadata.gz: fbbf88644bed7f8afcd802d796c2b49a2428a2eac2bbf68761079b83bc485866
4
+ data.tar.gz: 014d402eacfdb5512b350fdb7131dee8ebc793df1091153b30c04e000e47d161
5
5
  SHA512:
6
- metadata.gz: 34394e8b825952f6ff98696ba899592f23a0acd8e3b95331c1d645903ac9795569dc6e115ff1dca5df0ae4aee551b54aae0e32b91faa302b4338b4e37769d797
7
- data.tar.gz: 1b932c8c80adfc5b0989383a7c31c7b78432890dd3b86c54c887b1d3146f6629332cb7abd16e50b70343113bf3beb76233f4cd0d77fd684e26e7377ca0a8ee7f
6
+ metadata.gz: 44d96ce1e40671e04de3f9120465a129abbbc6bbd104c0e639d78be6d03be52645dedad5f5c22f7ff0c2d02c115fbf32d6941e38d9aa86867f9e4e38763719d3
7
+ data.tar.gz: 4dcfd975146932d346b4ba693621fd00762b83071b6cf91db73757bec4271d530ab253b42a2e647c661d8e37712682e4cef51c9befa0a5b6717232ddc785b77d
data/README.md CHANGED
@@ -31,11 +31,13 @@ The total score is then calculated on a percent basis.
31
31
  Since the ingredients list could come from an unreliable source (e.g. data scanned from a captured image), the gem tries to fuzzy match the ingredients by using different algorithms:
32
32
  * exact matching
33
33
  * [edit distance](https://en.wikipedia.org/wiki/Levenshtein_distance) behind a specified tolerance
34
+ * known hazards (ie ending in `ethicone`)
34
35
  * first relevant matching digits
35
36
  * matching splitted tokens
36
37
 
37
38
  ### Sources
38
- The library accepts the list of ingredients as a single string of text. Since this source could come from an OCR program, the library performs a normalization by stripping invalid characters and removing the unimportant parts.
39
+ The library accepts the list of ingredients as a single string of text.
40
+ Since this source could come from an OCR program, the library performs a normalization by stripping invalid characters and removing the unimportant parts.
39
41
  The ingredients are typically separated by comma, although normalizer will detect the most appropriate separator:
40
42
 
41
43
  ```
@@ -44,6 +46,7 @@ The ingredients are typically separated by comma, although normalizer will detec
44
46
 
45
47
  ## Installation
46
48
  Install the gem from your shell:
49
+
47
50
  ```shell
48
51
  gem install inci_score
49
52
  ```
@@ -58,18 +61,21 @@ require "inci_score"
58
61
 
59
62
  inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
60
63
  inci.score # 56.25
64
+ inci.precision # 100.0
61
65
  ```
62
66
 
63
- As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI and HTTP interfaces (read below).
67
+ As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI (read below).
64
68
 
65
69
  #### Unrecognized components
66
- The API treats unrecognized components as a common case by just marking the object as non valid and raise a warning in case more than 30% of the ingredients are not found.
70
+ The API treats unrecognized components as a common case by just marking the object as non valid.
67
71
  In such case the score is computed anyway by considering only recognized components.
68
- Is still possible to query the object for its state:
72
+ You can check the `precision` value, which is zero for unrecognized components, and changes based on the applied recognizer rule (100% when exact matching).
69
73
 
70
74
  ```ruby
71
75
  inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2')
72
76
  inci.valid? # false
77
+ inci.score # 100.0
78
+ inci.precision # 33.33
73
79
  inci.unrecognized # ["noent1", "noent2"]
74
80
  ```
75
81
 
@@ -82,7 +88,7 @@ inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
82
88
  TOTAL SCORE:
83
89
  53.22
84
90
  PRECISION:
85
- 75.0
91
+ 71.54
86
92
  COMPONENTS:
87
93
  aqua (0), dimethicone (4), peg-10 (3)
88
94
  UNRECOGNIZED:
@@ -91,6 +97,7 @@ UNRECOGNIZED:
91
97
 
92
98
  #### Getting help
93
99
  You can get CLI interface help by:
100
+
94
101
  ```shell
95
102
  Usage: inci_score --src="aqua, parfum, etc"
96
103
  -s, --src=SRC The INCI list: "aqua, parfum, etc"
data/config/catalog.yml CHANGED
@@ -2818,7 +2818,7 @@ pantothenic acid polypeptide: 0
2818
2818
  papaver orientale: 0
2819
2819
  papaver rhoeas: 0
2820
2820
  paraffin: 3
2821
- paraffinum liquidum: 3
2821
+ paraffinum liquidum (mineral oil): 3
2822
2822
  parietaria officinalis: 0
2823
2823
  passiflora incarnata: 0
2824
2824
  passiflora quadrangularis: 0
@@ -15,7 +15,7 @@ module InciScore
15
15
 
16
16
  def call
17
17
  parser.parse!(args)
18
- return io.puts(%q{Specify inci list as: --src='aqua, parfum, etc'}) unless src
18
+ return io.puts(%q{Specify INCI list as: --src='aqua, parfum, etc'}) unless src
19
19
  computer = Computer.new(src: src)
20
20
  io.puts computer.call
21
21
  end
@@ -2,13 +2,14 @@
2
2
 
3
3
  module InciScore
4
4
  class Computer
5
- TOLERANCE = 30.0
5
+ TOLERANCE = 50.0
6
6
  DECIMALS = 2
7
7
 
8
- attr_reader :src, :rules, :ingredients, :components, :unrecognized
8
+ attr_reader :src, :rules, :ingredients, :components, :unrecognized, :precisions
9
9
 
10
10
  def initialize(src:, rules: Normalizer::DEFAULT_RULES)
11
11
  @unrecognized = []
12
+ @precisions = []
12
13
  @src = src
13
14
  @rules = rules
14
15
  @ingredients = Normalizer.new(src: src, rules: rules).call
@@ -28,7 +29,7 @@ module InciScore
28
29
  end
29
30
 
30
31
  def precision
31
- (100 - ((unrecognized.size / Float(ingredients.size)) * 100)).round(DECIMALS)
32
+ (precisions.sum / ingredients.size).round(DECIMALS)
32
33
  end
33
34
 
34
35
  def valid?
@@ -39,7 +40,9 @@ module InciScore
39
40
 
40
41
  def fetch_components
41
42
  ingredients.map do |ingredient|
42
- Recognizer.new(ingredient).call.tap do |component|
43
+ recognizer = Recognizer.new(ingredient)
44
+ recognizer.call.tap do |component|
45
+ precisions << recognizer.precision
43
46
  unrecognized << ingredient unless component
44
47
  end
45
48
  end.compact
@@ -3,19 +3,31 @@
3
3
  module InciScore
4
4
  class Recognizer
5
5
  DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Hazard, Rules::Prefix, Rules::Tokens].freeze
6
+ PRECISION_BASE = 4
6
7
 
7
8
  attr_reader :ingredient, :rules, :applied
9
+ attr_accessor :found
8
10
 
9
11
  def initialize(ingredient, rules = DEFAULT_RULES)
10
12
  @ingredient = Ingredient.new(ingredient)
11
13
  @rules = rules
12
14
  @applied = []
13
- freeze
15
+ @found = false
14
16
  end
15
17
 
16
18
  def call
17
19
  return if ingredient.to_s.empty?
18
- find_component
20
+ find_component.tap do |c|
21
+ self.found = true if c
22
+ end
23
+ end
24
+
25
+ def precision
26
+ return 0.0 unless found
27
+ rule = applied.last
28
+ index = rules.index(rule) + PRECISION_BASE
29
+ ratio = Math.log(index, PRECISION_BASE)
30
+ (100 / ratio).round(2)
19
31
  end
20
32
 
21
33
  private
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module InciScore
4
- VERSION = '4.6.0'
4
+ VERSION = '4.6.3'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inci_score
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.6.0
4
+ version: 4.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - costajob
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-05 00:00:00.000000000 Z
11
+ date: 2023-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: oj