inci_score 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -5
- data/Rakefile +11 -11
- data/bin/console +4 -4
- data/lib/inci_score/app.rb +5 -4
- data/lib/inci_score/catalog.rb +2 -2
- data/lib/inci_score/cli.rb +9 -4
- data/lib/inci_score/computer.rb +7 -6
- data/lib/inci_score/fetcher.rb +5 -5
- data/lib/inci_score/levenshtein.rb +2 -2
- data/lib/inci_score/normalizer.rb +5 -5
- data/lib/inci_score/normalizer_rules.rb +8 -0
- data/lib/inci_score/recognizer.rb +3 -3
- data/lib/inci_score/recognizer_rules.rb +14 -9
- data/lib/inci_score/server.rb +13 -4
- data/lib/inci_score/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d5e69b680cde0fed823fd4672b8b4ec16638fe9
|
4
|
+
data.tar.gz: 488e31a72fd25148a61b32c5fa2a589b6ef438c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e9eb717b3460fd139330acd40f7eec880b8b42f6c806f2ff71efdd9f8b2f8a4376a82c2f4133fcc619fa102781e7b96f4a17bb8bb8b81ed245ba11e76a85271
|
7
|
+
data.tar.gz: 7b29bf4e4ab728772be548ba3f81f87ecb6e4b14d8f8e6952a194469314d1445996d9761c90372d19fa99de3030a61f33be7761aaddc98580bc72ce0100ac020
|
data/README.md
CHANGED
@@ -145,11 +145,10 @@ I measured the library three times, picking the best lap.
|
|
145
145
|
The following script command is used:
|
146
146
|
|
147
147
|
```shell
|
148
|
-
wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src
|
148
|
+
wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src=aqua,parfum,zeolithe
|
149
149
|
```
|
150
150
|
|
151
151
|
### Results
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
| agua,porfum,zaolite | 953.44 | 14.67/5.15/82.31 |
|
152
|
+
| Throughput (req/s) | Latency in ms (avg/stdev/max) |
|
153
|
+
| -----------------: | ----------------------------: |
|
154
|
+
| 50060.92 | 0.31/0.45/3.83 |
|
data/Rakefile
CHANGED
@@ -1,23 +1,23 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
3
|
|
4
4
|
namespace :spec do
|
5
5
|
Rake::TestTask.new(:unit) do |t|
|
6
|
-
t.libs <<
|
7
|
-
t.libs <<
|
8
|
-
t.test_files = FileList[
|
6
|
+
t.libs << "spec"
|
7
|
+
t.libs << "lib"
|
8
|
+
t.test_files = FileList["spec/unit/*_spec.rb"]
|
9
9
|
end
|
10
10
|
|
11
11
|
Rake::TestTask.new(:integration) do |t|
|
12
|
-
t.libs <<
|
13
|
-
t.libs <<
|
14
|
-
t.test_files = FileList[
|
12
|
+
t.libs << "spec"
|
13
|
+
t.libs << "lib"
|
14
|
+
t.test_files = FileList["spec/integration/*_spec.rb"]
|
15
15
|
end
|
16
16
|
|
17
17
|
Rake::TestTask.new(:bench) do |t|
|
18
|
-
t.libs <<
|
19
|
-
t.libs <<
|
20
|
-
t.test_files = FileList[
|
18
|
+
t.libs << "spec"
|
19
|
+
t.libs << "lib"
|
20
|
+
t.test_files = FileList["spec/bench/*_bench.rb"]
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
data/bin/console
CHANGED
data/lib/inci_score/app.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "rack"
|
2
|
+
require "inci_score"
|
3
3
|
|
4
4
|
module InciScore
|
5
5
|
module App
|
@@ -12,8 +12,9 @@ module InciScore
|
|
12
12
|
def call(env)
|
13
13
|
req = Rack::Request.new(env)
|
14
14
|
src = req.params["src"]
|
15
|
-
|
16
|
-
|
15
|
+
precise = req.params["precise"]
|
16
|
+
json = src ? Computer.new(src: src, catalog: catalog, precise: !!precise).call.to_json : %q({"error": "no valid source"})
|
17
|
+
["200", {"Content-Type" => "application/json"}, [json]]
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
data/lib/inci_score/catalog.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
1
|
+
require "yaml"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
module Catalog
|
5
5
|
extend self
|
6
6
|
|
7
|
-
YAML_PATH = File::expand_path(
|
7
|
+
YAML_PATH = File::expand_path("../../../config/catalog.yml", __FILE__)
|
8
8
|
|
9
9
|
def fetch(src = File.read(YAML_PATH))
|
10
10
|
YAML::load(src)
|
data/lib/inci_score/cli.rb
CHANGED
@@ -11,20 +11,21 @@ module InciScore
|
|
11
11
|
@src = nil
|
12
12
|
@fresh = nil
|
13
13
|
@port = nil
|
14
|
+
@precise = nil
|
14
15
|
end
|
15
16
|
|
16
17
|
def call(server_klass: Server, computer_klass: Computer, fetcher: Fetcher.new)
|
17
18
|
parser.parse!(@args)
|
18
19
|
return server_klass.new(port: @port, preload: true).run if @port
|
19
|
-
return @io.puts(
|
20
|
-
@io.puts computer_klass.new(src: @src, catalog: catalog(fetcher)).call
|
20
|
+
return @io.puts(%q{Specify inci list as: --src="aqua, parfum, etc"}) unless @src
|
21
|
+
@io.puts computer_klass.new(src: @src, catalog: catalog(fetcher), precise: @precise).call
|
21
22
|
end
|
22
23
|
|
23
24
|
private def parser
|
24
25
|
OptionParser.new do |opts|
|
25
|
-
opts.banner = %q{Usage:
|
26
|
+
opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --fresh --precise}
|
26
27
|
|
27
|
-
opts.on("-sSRC", "--src=SRC",
|
28
|
+
opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
|
28
29
|
@src = src
|
29
30
|
end
|
30
31
|
|
@@ -32,6 +33,10 @@ module InciScore
|
|
32
33
|
@fresh = fresh
|
33
34
|
end
|
34
35
|
|
36
|
+
opts.on("-p", "--precise", "Compute components more precisely (slower)") do |precise|
|
37
|
+
@precise = precise
|
38
|
+
end
|
39
|
+
|
35
40
|
opts.on("--http=PORT", "Start Puma server on the specified port") do |port|
|
36
41
|
@port = port
|
37
42
|
end
|
data/lib/inci_score/computer.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require "inci_score/normalizer"
|
2
|
+
require "inci_score/recognizer"
|
3
|
+
require "inci_score/scorer"
|
4
|
+
require "inci_score/response"
|
5
5
|
|
6
6
|
module InciScore
|
7
7
|
class Computer
|
8
8
|
TOLERANCE = 30.0
|
9
9
|
|
10
|
-
def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES)
|
10
|
+
def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES, precise: false)
|
11
11
|
@src = src
|
12
12
|
@catalog = catalog
|
13
13
|
@tolerance = Float(tolerance)
|
14
14
|
@rules = rules
|
15
15
|
@unrecognized = []
|
16
|
+
@precise = precise
|
16
17
|
end
|
17
18
|
|
18
19
|
def call
|
@@ -32,7 +33,7 @@ module InciScore
|
|
32
33
|
|
33
34
|
private def components
|
34
35
|
@components ||= ingredients.map do |ingredient|
|
35
|
-
Recognizer.new(ingredient, @catalog).call.tap do |component|
|
36
|
+
Recognizer.new(ingredient, @catalog).call(@precise).tap do |component|
|
36
37
|
@unrecognized << ingredient unless component
|
37
38
|
end
|
38
39
|
end.compact
|
data/lib/inci_score/fetcher.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
1
|
+
require "nokogiri"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Fetcher
|
5
|
-
BIODIZIO_URI =
|
5
|
+
BIODIZIO_URI = "http://www.biodizionario.it/biodizio.php"
|
6
6
|
SEMAPHORES = %w[vv v g r rr]
|
7
|
-
CSS_QUERY =
|
7
|
+
CSS_QUERY = %q{table[width="751"] > tr > td img}
|
8
8
|
|
9
9
|
def initialize(src = nil)
|
10
10
|
@src = src || Thread.new { open(BIODIZIO_URI) }
|
@@ -12,7 +12,7 @@ module InciScore
|
|
12
12
|
|
13
13
|
def call
|
14
14
|
@components ||= Nokogiri::HTML(doc).css(CSS_QUERY).inject({}) do |acc, img|
|
15
|
-
hazard = semaphore(img.attr(
|
15
|
+
hazard = semaphore(img.attr("src"))
|
16
16
|
name = img.next_sibling.next_sibling
|
17
17
|
desc = name.next_sibling.next_sibling
|
18
18
|
name, desc = desc, name if swap?(desc.text)
|
@@ -26,7 +26,7 @@ module InciScore
|
|
26
26
|
end
|
27
27
|
|
28
28
|
private def semaphore(src)
|
29
|
-
src.match(/(#{SEMAPHORES.join(
|
29
|
+
src.match(/(#{SEMAPHORES.join("|")}).gif$/)[1]
|
30
30
|
end
|
31
31
|
|
32
32
|
private def normalize(node)
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require
|
1
|
+
require "inline"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class LevenshteinC
|
5
|
-
C_PROGRAM = File::expand_path(
|
5
|
+
C_PROGRAM = File::expand_path("../../../ext/levenshtein.c", __FILE__)
|
6
6
|
|
7
7
|
inline(:C) do |builder|
|
8
8
|
builder.c File::read(C_PROGRAM)
|
@@ -1,14 +1,14 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/normalizer_rules"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Normalizer
|
5
|
-
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer]
|
5
|
+
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer, Rules::Uniquifier]
|
6
6
|
|
7
7
|
attr_reader :src
|
8
8
|
|
9
|
-
def initialize(
|
10
|
-
@src =
|
11
|
-
@rules =
|
9
|
+
def initialize(src:, rules: DEFAULT_RULES)
|
10
|
+
@src = src
|
11
|
+
@rules = rules
|
12
12
|
end
|
13
13
|
|
14
14
|
def call
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/recognizer_rules"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Recognizer
|
@@ -10,11 +10,11 @@ module InciScore
|
|
10
10
|
@rules = rules
|
11
11
|
end
|
12
12
|
|
13
|
-
def call
|
13
|
+
def call(precise = false)
|
14
14
|
@component = @rules.reduce(nil) do |component, rule|
|
15
15
|
break(component) if component
|
16
16
|
yield(rule) if block_given?
|
17
|
-
rule.call(@src, @catalog)
|
17
|
+
rule.call(@src, @catalog, precise)
|
18
18
|
end
|
19
19
|
[@component, @catalog[@component]] if @component
|
20
20
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/refinements"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
using Refinements
|
@@ -9,7 +9,7 @@ module InciScore
|
|
9
9
|
module Key
|
10
10
|
extend self
|
11
11
|
|
12
|
-
def call(src, catalog)
|
12
|
+
def call(src, catalog, precise = false)
|
13
13
|
src if catalog.has_key?(src)
|
14
14
|
end
|
15
15
|
end
|
@@ -17,13 +17,13 @@ module InciScore
|
|
17
17
|
module Levenshtein
|
18
18
|
extend self
|
19
19
|
|
20
|
-
ALTERNATE_SEP =
|
20
|
+
ALTERNATE_SEP = "/"
|
21
21
|
|
22
|
-
def call(src, catalog)
|
22
|
+
def call(src, catalog, precise = false)
|
23
23
|
size = src.size
|
24
24
|
initial = src[0]
|
25
25
|
component, distance = catalog.reduce([nil, size]) do |min, (_component, _)|
|
26
|
-
next min unless _component.start_with?(initial)
|
26
|
+
next min unless precise || _component.start_with?(initial)
|
27
27
|
match = (n = _component.index(ALTERNATE_SEP)) ? _component[0, n] : _component
|
28
28
|
next min if match.size > (size + TOLERANCE)
|
29
29
|
dist = src.distance(match)
|
@@ -39,13 +39,18 @@ module InciScore
|
|
39
39
|
|
40
40
|
MIN_MEANINGFUL = 7
|
41
41
|
|
42
|
-
def call(src, catalog)
|
42
|
+
def call(src, catalog, precise = false)
|
43
43
|
return if src.size < TOLERANCE
|
44
|
-
digits = src[0,
|
44
|
+
digits = src[0, min_meaningful(precise)]
|
45
45
|
catalog.detect do |component, _|
|
46
46
|
component.matches?(/^#{Regexp::escape(digits)}/)
|
47
47
|
end.to_a.first
|
48
48
|
end
|
49
|
+
|
50
|
+
def min_meaningful(precise)
|
51
|
+
return MIN_MEANINGFUL unless precise
|
52
|
+
MIN_MEANINGFUL + 2
|
53
|
+
end
|
49
54
|
end
|
50
55
|
|
51
56
|
module Tokens
|
@@ -53,7 +58,7 @@ module InciScore
|
|
53
58
|
|
54
59
|
UNMATCHABLE = %w[extract oil sodium acid sulfate]
|
55
60
|
|
56
|
-
def call(src, catalog)
|
61
|
+
def call(src, catalog, precise = false)
|
57
62
|
tokens(src).each do |token|
|
58
63
|
catalog.each do |component, _|
|
59
64
|
return component if component.matches?(/\b#{Regexp.escape(token)}\b/)
|
@@ -63,7 +68,7 @@ module InciScore
|
|
63
68
|
end
|
64
69
|
|
65
70
|
def tokens(src)
|
66
|
-
(src.split(
|
71
|
+
(src.split(" ") - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort_by!(&:size).reverse!
|
67
72
|
end
|
68
73
|
end
|
69
74
|
end
|
data/lib/inci_score/server.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "etc"
|
2
|
+
require "puma/launcher"
|
3
|
+
require "puma/configuration"
|
3
4
|
|
4
5
|
module InciScore
|
5
6
|
class Server
|
@@ -29,13 +30,21 @@ module InciScore
|
|
29
30
|
c.rackup RACKUP_FILE
|
30
31
|
c.bind "tcp://#{DEFAULT_HOST}:#{@port}"
|
31
32
|
c.workers @workers if workers?
|
32
|
-
c.threads
|
33
|
+
c.threads(*@threads)
|
33
34
|
c.preload_app! if @preload
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
37
38
|
private def workers?
|
38
|
-
@workers > 1 && !
|
39
|
+
@workers > 1 && !java? && !windows?
|
40
|
+
end
|
41
|
+
|
42
|
+
private def java?
|
43
|
+
RUBY_VERSION == "java"
|
44
|
+
end
|
45
|
+
|
46
|
+
private def windows?
|
47
|
+
Gem.win_platform?
|
39
48
|
end
|
40
49
|
end
|
41
50
|
end
|
data/lib/inci_score/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inci_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- costajob
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|