inci_score 2.5.1 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +34 -0
- data/README.md +14 -25
- data/config/catalog.yml +3 -4
- data/config.ru +2 -2
- data/inci_score.gemspec +2 -3
- data/lib/inci_score/{app.rb → api.rb} +1 -1
- data/lib/inci_score/cli.rb +4 -14
- data/lib/inci_score/computer.rb +15 -6
- data/lib/inci_score/ingredient.rb +45 -0
- data/lib/inci_score/normalizer.rb +1 -1
- data/lib/inci_score/normalizer_rules.rb +0 -24
- data/lib/inci_score/recognizer.rb +25 -7
- data/lib/inci_score/recognizer_rules.rb +19 -22
- data/lib/inci_score/refinements.rb +1 -1
- data/lib/inci_score/response.rb +1 -1
- data/lib/inci_score/score.rb +1 -3
- data/lib/inci_score/scorer.rb +7 -9
- data/lib/inci_score/version.rb +1 -1
- data/lib/inci_score.rb +4 -5
- data/spec/bench/levenshtein_bench.rb +17 -0
- data/spec/bench/normalizer_rules_bench.rb +40 -0
- data/spec/bench/recognizer_rules_bench.rb +24 -0
- data/spec/helper.rb +6 -0
- data/spec/integration/api_spec.rb +23 -0
- data/spec/stubs.rb +170 -0
- data/spec/unit/catalog_spec.rb +7 -0
- data/spec/unit/cli_spec.rb +29 -0
- data/spec/unit/computer_spec.rb +31 -0
- data/spec/unit/ingredient_spec.rb +34 -0
- data/spec/unit/levenshtein_spec.rb +19 -0
- data/spec/unit/normalizer_rules_spec.rb +58 -0
- data/spec/unit/normalizer_spec.rb +31 -0
- data/spec/unit/recognizer_rules_spec.rb +46 -0
- data/spec/unit/recognizer_spec.rb +49 -0
- data/spec/unit/response_spec.rb +8 -0
- data/spec/unit/score_spec.rb +12 -0
- data/spec/unit/scorer_spec.rb +11 -0
- data/spec/unit/server_spec.rb +30 -0
- metadata +24 -23
- data/.gitignore +0 -13
- data/.travis.yml +0 -6
- data/bin/console +0 -7
- data/bin/setup +0 -6
- data/lib/inci_score/fetcher.rb +0 -41
- data/log/.gitignore +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3acb19f9ac522f6a50378ec318c6466ae9b67257
|
4
|
+
data.tar.gz: 3d46458825c9d1fb3ff74db0779b329b63c5f97a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ab029ca732a8605502a7263fa3d7fc96ad20be2ca6aaf9b142e599746eae8216bd326ae318b92aed19a24e157456fe63dd99642d52f536c0a3ea0a4cd59e5bd
|
7
|
+
data.tar.gz: 6d23d330b4267abc0d2ba4241c52d0f55d94922bc1dbb776842e3266afd699f366ade1c648f2e76c3ba2a85db7a680aac59257f683d9d85b5fb2b9b352eae536
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
inci_score (3.0.1)
|
5
|
+
RubyInline (~> 3)
|
6
|
+
puma (~> 3)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
RubyInline (3.12.4)
|
12
|
+
ZenTest (~> 4.3)
|
13
|
+
ZenTest (4.11.1)
|
14
|
+
benchmark-ips (2.7.2)
|
15
|
+
minitest (5.10.3)
|
16
|
+
puma (3.10.0)
|
17
|
+
rack (2.0.3)
|
18
|
+
rack-test (0.7.0)
|
19
|
+
rack (>= 1.0, < 3)
|
20
|
+
rake (10.5.0)
|
21
|
+
|
22
|
+
PLATFORMS
|
23
|
+
ruby
|
24
|
+
|
25
|
+
DEPENDENCIES
|
26
|
+
benchmark-ips (~> 2)
|
27
|
+
bundler (~> 1.11)
|
28
|
+
inci_score!
|
29
|
+
minitest (~> 5.0)
|
30
|
+
rack-test (~> 0.6)
|
31
|
+
rake (~> 10.0)
|
32
|
+
|
33
|
+
BUNDLED WITH
|
34
|
+
1.15.4
|
data/README.md
CHANGED
@@ -8,7 +8,6 @@
|
|
8
8
|
* [API](#api)
|
9
9
|
* [Unrecognized components](#unrecognized-components)
|
10
10
|
* [CLI](#cli)
|
11
|
-
* [Refresh catalog](#refresh-catalog)
|
12
11
|
* [HTTP server](#http-server)
|
13
12
|
* [Triggering a request](#triggering-a-request)
|
14
13
|
* [Getting help](#getting-help)
|
@@ -52,9 +51,7 @@ The API of the gem is pretty simple, you can open irb by *bundle console* and st
|
|
52
51
|
|
53
52
|
```ruby
|
54
53
|
inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
|
55
|
-
|
56
|
-
inci.score
|
57
|
-
=> 53.762874945799766
|
54
|
+
inci.score # 53.7629
|
58
55
|
```
|
59
56
|
|
60
57
|
As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI and HTTP interfaces (read below).
|
@@ -66,11 +63,8 @@ Is still possible to query the object for its state:
|
|
66
63
|
|
67
64
|
```ruby
|
68
65
|
inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2').call
|
69
|
-
|
70
|
-
inci.
|
71
|
-
=> false
|
72
|
-
inci.unrecognized
|
73
|
-
=> ["noent1", "noent2"]
|
66
|
+
inci.valid # false
|
67
|
+
inci.unrecognized # ["noent1", "noent2"]
|
74
68
|
```
|
75
69
|
|
76
70
|
## CLI
|
@@ -80,7 +74,7 @@ You can collect INCI data by using the available CLI interface:
|
|
80
74
|
inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
|
81
75
|
|
82
76
|
TOTAL SCORE:
|
83
|
-
47.
|
77
|
+
47.1803
|
84
78
|
VALID STATE:
|
85
79
|
true
|
86
80
|
COMPONENTS (hazard - name):
|
@@ -91,12 +85,6 @@ UNRECOGNIZED:
|
|
91
85
|
noent
|
92
86
|
```
|
93
87
|
|
94
|
-
### Refresh catalog
|
95
|
-
You also have the option to fetch a fresh catalog from www.biodizionario.it by specifyng a flag:
|
96
|
-
```shell
|
97
|
-
inci_score --fresh --src="aqua, dimethicone"
|
98
|
-
```
|
99
|
-
|
100
88
|
### HTTP server
|
101
89
|
The CLI interface exposes a Web layer based on the [Puma](http://puma.io/) application server.
|
102
90
|
The HTTP server is started on the specified port by spawning as many workers as your current workstation supports:
|
@@ -111,17 +99,16 @@ You can pass the source string directly as a HTTP parameter (URI escaped):
|
|
111
99
|
|
112
100
|
```shell
|
113
101
|
curl http://127.0.0.1:9292?src=aqua,dimethicone
|
114
|
-
=> {"components":{"aqua":0,"dimethicone":4},"unrecognized":[],"score":53.
|
102
|
+
=> {"components":{"aqua":0,"dimethicone":4},"unrecognized":[],"score":53.7629,"valid":true}
|
115
103
|
```
|
116
104
|
|
117
105
|
### Getting help
|
118
106
|
You can get CLI interface help by:
|
119
107
|
```shell
|
120
|
-
Usage: inci_score --src="aqua, parfum, etc" --
|
108
|
+
Usage: inci_score --src="aqua, parfum, etc" --precise
|
121
109
|
-s, --src=SRC The INCI list: "aqua, parfum, etc"
|
122
|
-
-f, --fresh Fetch a fresh catalog from remote
|
123
110
|
-p, --precise Compute components more precisely (slower)
|
124
|
-
--http=PORT Start
|
111
|
+
--http=PORT Start HTTP server on the specified port
|
125
112
|
-h, --help Prints this help
|
126
113
|
```
|
127
114
|
|
@@ -140,13 +127,15 @@ I registered these benchmarks with a MacBook PRO 15 mid 2015 having these specs:
|
|
140
127
|
* Ruby 2.4
|
141
128
|
|
142
129
|
### Wrk
|
143
|
-
As always i used [wrk](https://github.com/wg/wrk) as the loading tool
|
130
|
+
As always i used [wrk](https://github.com/wg/wrk) as the loading tool.
|
144
131
|
I measured the library three times, picking the best lap.
|
145
132
|
```shell
|
146
|
-
wrk -t 4 -c 100 -d 30s --timeout 2000 http://0.0.0.0:9292/?src=
|
133
|
+
wrk -t 4 -c 100 -d 30s --timeout 2000 "http://0.0.0.0:9292/?src=<source>&precise=true"
|
147
134
|
```
|
148
135
|
|
149
136
|
### Results
|
150
|
-
| Throughput (req/s) |
|
151
|
-
|
|
152
|
-
|
|
137
|
+
| Source | Throughput (req/s) |
|
138
|
+
| --------------------------: | -----------------: |
|
139
|
+
| aqua,parfum,zeolite | 18784.21 |
|
140
|
+
| agua,porfum,zeolithe | 1087.88 |
|
141
|
+
| agua/water,porfum/fragrance | 1599.47 |
|
data/config/catalog.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
parfum:
|
4
|
-
fragrance:
|
2
|
+
aqua: 0
|
3
|
+
parfum: 3
|
4
|
+
fragrance: 3
|
5
5
|
phosphatidylcholine: 1
|
6
6
|
1-naphthol: 4
|
7
7
|
1,2,4-benzenetriacetate: 4
|
@@ -296,7 +296,6 @@ apricot kernel oil peg-6 esters: 3
|
|
296
296
|
apricotamide dea: 3
|
297
297
|
apricotamidopropyl betaine: 2
|
298
298
|
apricotamidopropyl ethyldimonium ethosulfate: 2
|
299
|
-
aqua: 0
|
300
299
|
arachideth-20: 3
|
301
300
|
arachidic acid: 1
|
302
301
|
arachidonic acid: 1
|
data/config.ru
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/api"
|
2
2
|
|
3
|
-
run InciScore::
|
3
|
+
run InciScore::Api
|
data/inci_score.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
lib = File.expand_path('../lib', __FILE__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
-
require
|
3
|
+
require "inci_score/version"
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "inci_score"
|
@@ -9,14 +9,13 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.email = ["costajob@gmail.com"]
|
10
10
|
s.summary = %q{A library that computes the hazard of cosmetic products components, based on the Biodizionario data.}
|
11
11
|
s.homepage = "https://github.com/costajob/inci_score.git"
|
12
|
-
s.files =
|
12
|
+
s.files = %w(README.md Rakefile inci_score.gemspec Gemfile Gemfile.lock config.ru config/catalog.yml ext/levenshtein.c bin/inci_score) + Dir["{spec,lib}/**/*.rb"]
|
13
13
|
s.bindir = "bin"
|
14
14
|
s.executables << "inci_score"
|
15
15
|
s.require_paths = ["lib"]
|
16
16
|
s.license = "MIT"
|
17
17
|
s.required_ruby_version = ">= 2.2.2"
|
18
18
|
|
19
|
-
s.add_runtime_dependency "nokogiri", "~> 1.6"
|
20
19
|
s.add_runtime_dependency "puma", "~> 3"
|
21
20
|
s.add_runtime_dependency "RubyInline", "~> 3"
|
22
21
|
|
data/lib/inci_score/cli.rb
CHANGED
@@ -9,35 +9,30 @@ module InciScore
|
|
9
9
|
@io = io
|
10
10
|
@catalog = catalog
|
11
11
|
@src = nil
|
12
|
-
@fresh = nil
|
13
12
|
@port = nil
|
14
13
|
@precise = nil
|
15
14
|
end
|
16
15
|
|
17
|
-
def call(server_klass: Server, computer_klass: Computer
|
16
|
+
def call(server_klass: Server, computer_klass: Computer)
|
18
17
|
parser.parse!(@args)
|
19
18
|
return server_klass.new(port: @port, preload: true).run if @port
|
20
19
|
return @io.puts(%q{Specify inci list as: --src="aqua, parfum, etc"}) unless @src
|
21
|
-
@io.puts computer_klass.new(src: @src, catalog: catalog
|
20
|
+
@io.puts computer_klass.new(src: @src, catalog: @catalog, precise: @precise).call
|
22
21
|
end
|
23
22
|
|
24
23
|
private def parser
|
25
24
|
OptionParser.new do |opts|
|
26
|
-
opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --
|
25
|
+
opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --precise}
|
27
26
|
|
28
27
|
opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
|
29
28
|
@src = src
|
30
29
|
end
|
31
30
|
|
32
|
-
opts.on("-f", "--fresh", "Fetch a fresh catalog from remote") do |fresh|
|
33
|
-
@fresh = fresh
|
34
|
-
end
|
35
|
-
|
36
31
|
opts.on("-p", "--precise", "Compute components more precisely (slower)") do |precise|
|
37
32
|
@precise = precise
|
38
33
|
end
|
39
34
|
|
40
|
-
opts.on("--http=PORT", "Start
|
35
|
+
opts.on("--http=PORT", "Start HTTP server on the specified port") do |port|
|
41
36
|
@port = port
|
42
37
|
end
|
43
38
|
|
@@ -47,10 +42,5 @@ module InciScore
|
|
47
42
|
end
|
48
43
|
end
|
49
44
|
end
|
50
|
-
|
51
|
-
private def catalog(fetcher)
|
52
|
-
return @catalog unless @fresh
|
53
|
-
fetcher.call
|
54
|
-
end
|
55
45
|
end
|
56
46
|
end
|
data/lib/inci_score/computer.rb
CHANGED
@@ -1,13 +1,19 @@
|
|
1
|
+
require "inci_score/ingredient"
|
1
2
|
require "inci_score/normalizer"
|
2
3
|
require "inci_score/recognizer"
|
3
|
-
require "inci_score/scorer"
|
4
4
|
require "inci_score/response"
|
5
|
+
require "inci_score/scorer"
|
5
6
|
|
6
7
|
module InciScore
|
7
8
|
class Computer
|
8
9
|
TOLERANCE = 30.0
|
10
|
+
PERCENT = 100.0
|
9
11
|
|
10
|
-
def initialize(src:,
|
12
|
+
def initialize(src:,
|
13
|
+
catalog: Catalog.fetch,
|
14
|
+
tolerance: TOLERANCE,
|
15
|
+
rules: Normalizer::DEFAULT_RULES,
|
16
|
+
precise: false)
|
11
17
|
@src = src
|
12
18
|
@catalog = catalog
|
13
19
|
@tolerance = Float(tolerance)
|
@@ -17,18 +23,21 @@ module InciScore
|
|
17
23
|
end
|
18
24
|
|
19
25
|
def call
|
20
|
-
@response ||= Response.new(components: components.map(&:
|
26
|
+
@response ||= Response.new(components: components.map(&:name),
|
21
27
|
unrecognized: @unrecognized,
|
22
28
|
score: score,
|
23
29
|
valid: valid?)
|
24
30
|
end
|
25
31
|
|
26
32
|
private def score
|
27
|
-
Scorer.new(components.map(&:
|
33
|
+
Scorer.new(components.map(&:hazard)).call
|
28
34
|
end
|
29
35
|
|
30
36
|
private def ingredients
|
31
|
-
@ingredients ||=
|
37
|
+
@ingredients ||= begin
|
38
|
+
tokens = Normalizer.new(src: @src, rules: @rules).call
|
39
|
+
Ingredient.bulk(tokens)
|
40
|
+
end
|
32
41
|
end
|
33
42
|
|
34
43
|
private def components
|
@@ -40,7 +49,7 @@ module InciScore
|
|
40
49
|
end
|
41
50
|
|
42
51
|
private def valid?
|
43
|
-
@unrecognized.size / (ingredients.size /
|
52
|
+
@unrecognized.size / (ingredients.size / PERCENT) <= @tolerance
|
44
53
|
end
|
45
54
|
end
|
46
55
|
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module InciScore
|
2
|
+
class Ingredient
|
3
|
+
SLASH = "/"
|
4
|
+
SLASH_RULE = /(?<!ate)\//
|
5
|
+
PARENTHESIS = %w[( ) [ ]]
|
6
|
+
DETAILS_RULE = /(\(.+\)|\[.+\])/
|
7
|
+
|
8
|
+
def self.bulk(tokens)
|
9
|
+
tokens.map { |raw| new(raw) }
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(raw)
|
13
|
+
@raw = raw
|
14
|
+
@tokens = raw.split(SLASH_RULE).map(&:strip)
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
values.join(SLASH)
|
19
|
+
end
|
20
|
+
|
21
|
+
def values
|
22
|
+
@values ||= synonims.unshift(name).compact
|
23
|
+
end
|
24
|
+
|
25
|
+
private def name
|
26
|
+
return @tokens.first unless parenthesis?
|
27
|
+
@raw.sub(DETAILS_RULE, "").strip
|
28
|
+
end
|
29
|
+
|
30
|
+
private def synonims
|
31
|
+
@tokens[1, @tokens.size]
|
32
|
+
end
|
33
|
+
|
34
|
+
private def details
|
35
|
+
return unless parenthesis?
|
36
|
+
@raw.match(DETAILS_RULE)[1].delete(PARENTHESIS.join("|"))
|
37
|
+
end
|
38
|
+
|
39
|
+
private def parenthesis?
|
40
|
+
PARENTHESIS.each_slice(2).any? do |pair|
|
41
|
+
pair.all? { |p| @raw.index(p) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -2,7 +2,7 @@ require "inci_score/normalizer_rules"
|
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Normalizer
|
5
|
-
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::
|
5
|
+
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Uniquifier]
|
6
6
|
|
7
7
|
attr_reader :src
|
8
8
|
|
@@ -77,30 +77,6 @@ module InciScore
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
|
-
module Desynonymizer
|
81
|
-
extend self
|
82
|
-
|
83
|
-
SYNONYM = /\/.*/
|
84
|
-
|
85
|
-
def call(src)
|
86
|
-
Array(src).map do |token|
|
87
|
-
token.sub(SYNONYM, '').strip
|
88
|
-
end.reject(&:empty?)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
module Deparenthesizer
|
93
|
-
extend self
|
94
|
-
|
95
|
-
PARENTHESIS = /\(.+?\)|\[.+?\]/
|
96
|
-
|
97
|
-
def call(src)
|
98
|
-
Array(src).map do |token|
|
99
|
-
token.sub(PARENTHESIS, '').strip
|
100
|
-
end.reject(&:empty?)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
80
|
module Uniquifier
|
105
81
|
extend self
|
106
82
|
|
@@ -4,19 +4,37 @@ module InciScore
|
|
4
4
|
class Recognizer
|
5
5
|
DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Digits, Rules::Tokens]
|
6
6
|
|
7
|
-
|
8
|
-
|
7
|
+
Component = Struct.new(:name, :hazard)
|
8
|
+
|
9
|
+
attr_reader :applied
|
10
|
+
|
11
|
+
def initialize(ingredient, catalog, rules = DEFAULT_RULES)
|
12
|
+
@ingredient = ingredient
|
9
13
|
@catalog = catalog
|
10
14
|
@rules = rules
|
15
|
+
@applied = []
|
11
16
|
end
|
12
17
|
|
13
18
|
def call(precise = false)
|
14
|
-
|
19
|
+
return if @ingredient.to_s.empty?
|
20
|
+
component = find_component(precise)
|
21
|
+
return unless component
|
22
|
+
Component.new(component, @catalog[component])
|
23
|
+
end
|
24
|
+
|
25
|
+
private def find_component(precise)
|
26
|
+
@rules.reduce(nil) do |component, rule|
|
15
27
|
break(component) if component
|
16
|
-
|
17
|
-
rule
|
28
|
+
applied << rule
|
29
|
+
apply(rule, precise)
|
18
30
|
end
|
19
|
-
|
20
|
-
|
31
|
+
end
|
32
|
+
|
33
|
+
private def apply(rule, precise)
|
34
|
+
return rule.call(@ingredient.to_s, @catalog) unless precise
|
35
|
+
@ingredient.values.map do |value|
|
36
|
+
rule.call(value, @catalog)
|
37
|
+
end.find(&:itself)
|
38
|
+
end
|
21
39
|
end
|
22
40
|
end
|
@@ -9,7 +9,7 @@ module InciScore
|
|
9
9
|
module Key
|
10
10
|
extend self
|
11
11
|
|
12
|
-
def call(src, catalog
|
12
|
+
def call(src, catalog)
|
13
13
|
src if catalog.has_key?(src)
|
14
14
|
end
|
15
15
|
end
|
@@ -17,20 +17,24 @@ module InciScore
|
|
17
17
|
module Levenshtein
|
18
18
|
extend self
|
19
19
|
|
20
|
-
|
20
|
+
Result = Struct.new(:name, :distance) do
|
21
|
+
def tolerable?(size)
|
22
|
+
distance < TOLERANCE && distance <= (size-1)
|
23
|
+
end
|
24
|
+
end
|
21
25
|
|
22
|
-
def call(src, catalog
|
26
|
+
def call(src, catalog)
|
23
27
|
size = src.size
|
28
|
+
farthest = Result.new(nil, size)
|
24
29
|
initial = src[0]
|
25
|
-
|
26
|
-
next
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
min
|
30
|
+
result = catalog.reduce(farthest) do |nearest, (component, _)|
|
31
|
+
next nearest unless component.start_with?(initial)
|
32
|
+
next nearest if component.size > (size + TOLERANCE)
|
33
|
+
d = src.distance(component)
|
34
|
+
nearest = Result.new(component, d) if d < nearest.distance
|
35
|
+
nearest
|
32
36
|
end
|
33
|
-
|
37
|
+
result.name if result.tolerable?(size)
|
34
38
|
end
|
35
39
|
end
|
36
40
|
|
@@ -39,17 +43,10 @@ module InciScore
|
|
39
43
|
|
40
44
|
MIN_MEANINGFUL = 7
|
41
45
|
|
42
|
-
def call(src, catalog
|
46
|
+
def call(src, catalog)
|
43
47
|
return if src.size < TOLERANCE
|
44
|
-
digits = src[0,
|
45
|
-
catalog.detect
|
46
|
-
component.matches?(/^#{Regexp::escape(digits)}/)
|
47
|
-
end.to_a.first
|
48
|
-
end
|
49
|
-
|
50
|
-
def min_meaningful(precise)
|
51
|
-
return MIN_MEANINGFUL unless precise
|
52
|
-
MIN_MEANINGFUL + 2
|
48
|
+
digits = src[0, MIN_MEANINGFUL]
|
49
|
+
catalog.detect { |component, _| component.start_with?(digits) }.to_a.first
|
53
50
|
end
|
54
51
|
end
|
55
52
|
|
@@ -58,7 +55,7 @@ module InciScore
|
|
58
55
|
|
59
56
|
UNMATCHABLE = %w[extract oil sodium acid sulfate]
|
60
57
|
|
61
|
-
def call(src, catalog
|
58
|
+
def call(src, catalog)
|
62
59
|
tokens(src).each do |token|
|
63
60
|
catalog.each do |component, _|
|
64
61
|
return component if component.matches?(/\b#{Regexp.escape(token)}\b/)
|
data/lib/inci_score/response.rb
CHANGED
data/lib/inci_score/score.rb
CHANGED
data/lib/inci_score/scorer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/score"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Scorer
|
@@ -12,33 +12,31 @@ module InciScore
|
|
12
12
|
|
13
13
|
def call
|
14
14
|
return 0 if @hazards.empty?
|
15
|
-
100 - avg * HAZARD_PERCENT
|
15
|
+
(100 - avg * HAZARD_PERCENT).round(4)
|
16
16
|
end
|
17
17
|
|
18
|
-
private
|
19
|
-
|
20
|
-
def avg
|
18
|
+
private def avg
|
21
19
|
avg_weighted / @size.to_f
|
22
20
|
end
|
23
21
|
|
24
|
-
def avg_weighted
|
22
|
+
private def avg_weighted
|
25
23
|
return @hazards.reduce(&:+) if same_hazard?
|
26
24
|
weighted.reduce(0.0) do |acc,score|
|
27
25
|
acc += score.value
|
28
26
|
end
|
29
27
|
end
|
30
28
|
|
31
|
-
def same_hazard?
|
29
|
+
private def same_hazard?
|
32
30
|
@hazards.uniq.size == 1
|
33
31
|
end
|
34
32
|
|
35
|
-
def weighted
|
33
|
+
private def weighted
|
36
34
|
@hazards.each_with_index.map do |h,i|
|
37
35
|
Score.new(h, weight(i))
|
38
36
|
end
|
39
37
|
end
|
40
38
|
|
41
|
-
def weight(index)
|
39
|
+
private def weight(index)
|
42
40
|
Math.log(index+1, @size * WEIGHT_FACTOR)
|
43
41
|
end
|
44
42
|
end
|
data/lib/inci_score/version.rb
CHANGED
data/lib/inci_score.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require 'inci_score/cli'
|
1
|
+
require "inci_score/version"
|
2
|
+
require "inci_score/catalog"
|
3
|
+
require "inci_score/cli"
|
4
|
+
require "inci_score/api"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
s, t = "agua", "aqua"
|
4
|
+
lev_ruby = InciScore::Levenshtein.new(s, t)
|
5
|
+
lev_c = InciScore::LevenshteinC.new
|
6
|
+
|
7
|
+
Benchmark.ips do |x|
|
8
|
+
x.report("levenshtein ruby") do
|
9
|
+
lev_ruby.call
|
10
|
+
end
|
11
|
+
|
12
|
+
x.report("levenshtein C") do
|
13
|
+
lev_c.call(s, s.size, t, t.size)
|
14
|
+
end
|
15
|
+
|
16
|
+
x.compare!
|
17
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
replacer = InciScore::Normalizer::Rules::Replacer
|
4
|
+
downcaser = InciScore::Normalizer::Rules::Downcaser
|
5
|
+
beheader = InciScore::Normalizer::Rules::Beheader
|
6
|
+
separator = InciScore::Normalizer::Rules::Separator
|
7
|
+
tokenizer = InciScore::Normalizer::Rules::Tokenizer
|
8
|
+
sanitizer = InciScore::Normalizer::Rules::Sanitizer
|
9
|
+
uniquifier = InciScore::Normalizer::Rules::Uniquifier
|
10
|
+
src = "‘INGREDIENTS‘:\n\nCOCO—BETANE,AQUA/WATER,DIMETHICONE"
|
11
|
+
|
12
|
+
Benchmark.ips do |x|
|
13
|
+
x.report("replacer") do
|
14
|
+
replacer.call(src)
|
15
|
+
end
|
16
|
+
|
17
|
+
x.report("downcaser") do
|
18
|
+
downcaser.call(src)
|
19
|
+
end
|
20
|
+
|
21
|
+
x.report("beheader") do
|
22
|
+
beheader.call(src)
|
23
|
+
end
|
24
|
+
|
25
|
+
x.report("separator") do
|
26
|
+
separator.call(src)
|
27
|
+
end
|
28
|
+
|
29
|
+
x.report("tokenizer") do
|
30
|
+
tokenizer.call(src)
|
31
|
+
end
|
32
|
+
|
33
|
+
x.report("sanitizer") do
|
34
|
+
sanitizer.call(src)
|
35
|
+
end
|
36
|
+
|
37
|
+
x.report("uniquifier") do
|
38
|
+
uniquifier.call(src)
|
39
|
+
end
|
40
|
+
end
|