inci_score 2.2.1 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -5
- data/Rakefile +11 -11
- data/bin/console +4 -4
- data/lib/inci_score/app.rb +5 -4
- data/lib/inci_score/catalog.rb +2 -2
- data/lib/inci_score/cli.rb +9 -4
- data/lib/inci_score/computer.rb +7 -6
- data/lib/inci_score/fetcher.rb +5 -5
- data/lib/inci_score/levenshtein.rb +2 -2
- data/lib/inci_score/normalizer.rb +5 -5
- data/lib/inci_score/normalizer_rules.rb +8 -0
- data/lib/inci_score/recognizer.rb +3 -3
- data/lib/inci_score/recognizer_rules.rb +14 -9
- data/lib/inci_score/server.rb +13 -4
- data/lib/inci_score/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d5e69b680cde0fed823fd4672b8b4ec16638fe9
|
4
|
+
data.tar.gz: 488e31a72fd25148a61b32c5fa2a589b6ef438c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e9eb717b3460fd139330acd40f7eec880b8b42f6c806f2ff71efdd9f8b2f8a4376a82c2f4133fcc619fa102781e7b96f4a17bb8bb8b81ed245ba11e76a85271
|
7
|
+
data.tar.gz: 7b29bf4e4ab728772be548ba3f81f87ecb6e4b14d8f8e6952a194469314d1445996d9761c90372d19fa99de3030a61f33be7761aaddc98580bc72ce0100ac020
|
data/README.md
CHANGED
@@ -145,11 +145,10 @@ I measured the library three times, picking the best lap.
|
|
145
145
|
The following script command is used:
|
146
146
|
|
147
147
|
```shell
|
148
|
-
wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src
|
148
|
+
wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src=aqua,parfum,zeolithe
|
149
149
|
```
|
150
150
|
|
151
151
|
### Results
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
| agua,porfum,zaolite | 953.44 | 14.67/5.15/82.31 |
|
152
|
+
| Throughput (req/s) | Latency in ms (avg/stdev/max) |
|
153
|
+
| -----------------: | ----------------------------: |
|
154
|
+
| 50060.92 | 0.31/0.45/3.83 |
|
data/Rakefile
CHANGED
@@ -1,23 +1,23 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
3
|
|
4
4
|
namespace :spec do
|
5
5
|
Rake::TestTask.new(:unit) do |t|
|
6
|
-
t.libs <<
|
7
|
-
t.libs <<
|
8
|
-
t.test_files = FileList[
|
6
|
+
t.libs << "spec"
|
7
|
+
t.libs << "lib"
|
8
|
+
t.test_files = FileList["spec/unit/*_spec.rb"]
|
9
9
|
end
|
10
10
|
|
11
11
|
Rake::TestTask.new(:integration) do |t|
|
12
|
-
t.libs <<
|
13
|
-
t.libs <<
|
14
|
-
t.test_files = FileList[
|
12
|
+
t.libs << "spec"
|
13
|
+
t.libs << "lib"
|
14
|
+
t.test_files = FileList["spec/integration/*_spec.rb"]
|
15
15
|
end
|
16
16
|
|
17
17
|
Rake::TestTask.new(:bench) do |t|
|
18
|
-
t.libs <<
|
19
|
-
t.libs <<
|
20
|
-
t.test_files = FileList[
|
18
|
+
t.libs << "spec"
|
19
|
+
t.libs << "lib"
|
20
|
+
t.test_files = FileList["spec/bench/*_bench.rb"]
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
data/bin/console
CHANGED
data/lib/inci_score/app.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "rack"
|
2
|
+
require "inci_score"
|
3
3
|
|
4
4
|
module InciScore
|
5
5
|
module App
|
@@ -12,8 +12,9 @@ module InciScore
|
|
12
12
|
def call(env)
|
13
13
|
req = Rack::Request.new(env)
|
14
14
|
src = req.params["src"]
|
15
|
-
|
16
|
-
|
15
|
+
precise = req.params["precise"]
|
16
|
+
json = src ? Computer.new(src: src, catalog: catalog, precise: !!precise).call.to_json : %q({"error": "no valid source"})
|
17
|
+
["200", {"Content-Type" => "application/json"}, [json]]
|
17
18
|
end
|
18
19
|
end
|
19
20
|
end
|
data/lib/inci_score/catalog.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
1
|
+
require "yaml"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
module Catalog
|
5
5
|
extend self
|
6
6
|
|
7
|
-
YAML_PATH = File::expand_path(
|
7
|
+
YAML_PATH = File::expand_path("../../../config/catalog.yml", __FILE__)
|
8
8
|
|
9
9
|
def fetch(src = File.read(YAML_PATH))
|
10
10
|
YAML::load(src)
|
data/lib/inci_score/cli.rb
CHANGED
@@ -11,20 +11,21 @@ module InciScore
|
|
11
11
|
@src = nil
|
12
12
|
@fresh = nil
|
13
13
|
@port = nil
|
14
|
+
@precise = nil
|
14
15
|
end
|
15
16
|
|
16
17
|
def call(server_klass: Server, computer_klass: Computer, fetcher: Fetcher.new)
|
17
18
|
parser.parse!(@args)
|
18
19
|
return server_klass.new(port: @port, preload: true).run if @port
|
19
|
-
return @io.puts(
|
20
|
-
@io.puts computer_klass.new(src: @src, catalog: catalog(fetcher)).call
|
20
|
+
return @io.puts(%q{Specify inci list as: --src="aqua, parfum, etc"}) unless @src
|
21
|
+
@io.puts computer_klass.new(src: @src, catalog: catalog(fetcher), precise: @precise).call
|
21
22
|
end
|
22
23
|
|
23
24
|
private def parser
|
24
25
|
OptionParser.new do |opts|
|
25
|
-
opts.banner = %q{Usage:
|
26
|
+
opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --fresh --precise}
|
26
27
|
|
27
|
-
opts.on("-sSRC", "--src=SRC",
|
28
|
+
opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
|
28
29
|
@src = src
|
29
30
|
end
|
30
31
|
|
@@ -32,6 +33,10 @@ module InciScore
|
|
32
33
|
@fresh = fresh
|
33
34
|
end
|
34
35
|
|
36
|
+
opts.on("-p", "--precise", "Compute components more precisely (slower)") do |precise|
|
37
|
+
@precise = precise
|
38
|
+
end
|
39
|
+
|
35
40
|
opts.on("--http=PORT", "Start Puma server on the specified port") do |port|
|
36
41
|
@port = port
|
37
42
|
end
|
data/lib/inci_score/computer.rb
CHANGED
@@ -1,18 +1,19 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require "inci_score/normalizer"
|
2
|
+
require "inci_score/recognizer"
|
3
|
+
require "inci_score/scorer"
|
4
|
+
require "inci_score/response"
|
5
5
|
|
6
6
|
module InciScore
|
7
7
|
class Computer
|
8
8
|
TOLERANCE = 30.0
|
9
9
|
|
10
|
-
def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES)
|
10
|
+
def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES, precise: false)
|
11
11
|
@src = src
|
12
12
|
@catalog = catalog
|
13
13
|
@tolerance = Float(tolerance)
|
14
14
|
@rules = rules
|
15
15
|
@unrecognized = []
|
16
|
+
@precise = precise
|
16
17
|
end
|
17
18
|
|
18
19
|
def call
|
@@ -32,7 +33,7 @@ module InciScore
|
|
32
33
|
|
33
34
|
private def components
|
34
35
|
@components ||= ingredients.map do |ingredient|
|
35
|
-
Recognizer.new(ingredient, @catalog).call.tap do |component|
|
36
|
+
Recognizer.new(ingredient, @catalog).call(@precise).tap do |component|
|
36
37
|
@unrecognized << ingredient unless component
|
37
38
|
end
|
38
39
|
end.compact
|
data/lib/inci_score/fetcher.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
1
|
+
require "nokogiri"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Fetcher
|
5
|
-
BIODIZIO_URI =
|
5
|
+
BIODIZIO_URI = "http://www.biodizionario.it/biodizio.php"
|
6
6
|
SEMAPHORES = %w[vv v g r rr]
|
7
|
-
CSS_QUERY =
|
7
|
+
CSS_QUERY = %q{table[width="751"] > tr > td img}
|
8
8
|
|
9
9
|
def initialize(src = nil)
|
10
10
|
@src = src || Thread.new { open(BIODIZIO_URI) }
|
@@ -12,7 +12,7 @@ module InciScore
|
|
12
12
|
|
13
13
|
def call
|
14
14
|
@components ||= Nokogiri::HTML(doc).css(CSS_QUERY).inject({}) do |acc, img|
|
15
|
-
hazard = semaphore(img.attr(
|
15
|
+
hazard = semaphore(img.attr("src"))
|
16
16
|
name = img.next_sibling.next_sibling
|
17
17
|
desc = name.next_sibling.next_sibling
|
18
18
|
name, desc = desc, name if swap?(desc.text)
|
@@ -26,7 +26,7 @@ module InciScore
|
|
26
26
|
end
|
27
27
|
|
28
28
|
private def semaphore(src)
|
29
|
-
src.match(/(#{SEMAPHORES.join(
|
29
|
+
src.match(/(#{SEMAPHORES.join("|")}).gif$/)[1]
|
30
30
|
end
|
31
31
|
|
32
32
|
private def normalize(node)
|
@@ -1,8 +1,8 @@
|
|
1
|
-
require
|
1
|
+
require "inline"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class LevenshteinC
|
5
|
-
C_PROGRAM = File::expand_path(
|
5
|
+
C_PROGRAM = File::expand_path("../../../ext/levenshtein.c", __FILE__)
|
6
6
|
|
7
7
|
inline(:C) do |builder|
|
8
8
|
builder.c File::read(C_PROGRAM)
|
@@ -1,14 +1,14 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/normalizer_rules"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Normalizer
|
5
|
-
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer]
|
5
|
+
DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer, Rules::Uniquifier]
|
6
6
|
|
7
7
|
attr_reader :src
|
8
8
|
|
9
|
-
def initialize(
|
10
|
-
@src =
|
11
|
-
@rules =
|
9
|
+
def initialize(src:, rules: DEFAULT_RULES)
|
10
|
+
@src = src
|
11
|
+
@rules = rules
|
12
12
|
end
|
13
13
|
|
14
14
|
def call
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/recognizer_rules"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
class Recognizer
|
@@ -10,11 +10,11 @@ module InciScore
|
|
10
10
|
@rules = rules
|
11
11
|
end
|
12
12
|
|
13
|
-
def call
|
13
|
+
def call(precise = false)
|
14
14
|
@component = @rules.reduce(nil) do |component, rule|
|
15
15
|
break(component) if component
|
16
16
|
yield(rule) if block_given?
|
17
|
-
rule.call(@src, @catalog)
|
17
|
+
rule.call(@src, @catalog, precise)
|
18
18
|
end
|
19
19
|
[@component, @catalog[@component]] if @component
|
20
20
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "inci_score/refinements"
|
2
2
|
|
3
3
|
module InciScore
|
4
4
|
using Refinements
|
@@ -9,7 +9,7 @@ module InciScore
|
|
9
9
|
module Key
|
10
10
|
extend self
|
11
11
|
|
12
|
-
def call(src, catalog)
|
12
|
+
def call(src, catalog, precise = false)
|
13
13
|
src if catalog.has_key?(src)
|
14
14
|
end
|
15
15
|
end
|
@@ -17,13 +17,13 @@ module InciScore
|
|
17
17
|
module Levenshtein
|
18
18
|
extend self
|
19
19
|
|
20
|
-
ALTERNATE_SEP =
|
20
|
+
ALTERNATE_SEP = "/"
|
21
21
|
|
22
|
-
def call(src, catalog)
|
22
|
+
def call(src, catalog, precise = false)
|
23
23
|
size = src.size
|
24
24
|
initial = src[0]
|
25
25
|
component, distance = catalog.reduce([nil, size]) do |min, (_component, _)|
|
26
|
-
next min unless _component.start_with?(initial)
|
26
|
+
next min unless precise || _component.start_with?(initial)
|
27
27
|
match = (n = _component.index(ALTERNATE_SEP)) ? _component[0, n] : _component
|
28
28
|
next min if match.size > (size + TOLERANCE)
|
29
29
|
dist = src.distance(match)
|
@@ -39,13 +39,18 @@ module InciScore
|
|
39
39
|
|
40
40
|
MIN_MEANINGFUL = 7
|
41
41
|
|
42
|
-
def call(src, catalog)
|
42
|
+
def call(src, catalog, precise = false)
|
43
43
|
return if src.size < TOLERANCE
|
44
|
-
digits = src[0,
|
44
|
+
digits = src[0, min_meaningful(precise)]
|
45
45
|
catalog.detect do |component, _|
|
46
46
|
component.matches?(/^#{Regexp::escape(digits)}/)
|
47
47
|
end.to_a.first
|
48
48
|
end
|
49
|
+
|
50
|
+
def min_meaningful(precise)
|
51
|
+
return MIN_MEANINGFUL unless precise
|
52
|
+
MIN_MEANINGFUL + 2
|
53
|
+
end
|
49
54
|
end
|
50
55
|
|
51
56
|
module Tokens
|
@@ -53,7 +58,7 @@ module InciScore
|
|
53
58
|
|
54
59
|
UNMATCHABLE = %w[extract oil sodium acid sulfate]
|
55
60
|
|
56
|
-
def call(src, catalog)
|
61
|
+
def call(src, catalog, precise = false)
|
57
62
|
tokens(src).each do |token|
|
58
63
|
catalog.each do |component, _|
|
59
64
|
return component if component.matches?(/\b#{Regexp.escape(token)}\b/)
|
@@ -63,7 +68,7 @@ module InciScore
|
|
63
68
|
end
|
64
69
|
|
65
70
|
def tokens(src)
|
66
|
-
(src.split(
|
71
|
+
(src.split(" ") - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort_by!(&:size).reverse!
|
67
72
|
end
|
68
73
|
end
|
69
74
|
end
|
data/lib/inci_score/server.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require "etc"
|
2
|
+
require "puma/launcher"
|
3
|
+
require "puma/configuration"
|
3
4
|
|
4
5
|
module InciScore
|
5
6
|
class Server
|
@@ -29,13 +30,21 @@ module InciScore
|
|
29
30
|
c.rackup RACKUP_FILE
|
30
31
|
c.bind "tcp://#{DEFAULT_HOST}:#{@port}"
|
31
32
|
c.workers @workers if workers?
|
32
|
-
c.threads
|
33
|
+
c.threads(*@threads)
|
33
34
|
c.preload_app! if @preload
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
37
38
|
private def workers?
|
38
|
-
@workers > 1 && !
|
39
|
+
@workers > 1 && !java? && !windows?
|
40
|
+
end
|
41
|
+
|
42
|
+
private def java?
|
43
|
+
RUBY_VERSION == "java"
|
44
|
+
end
|
45
|
+
|
46
|
+
private def windows?
|
47
|
+
Gem.win_platform?
|
39
48
|
end
|
40
49
|
end
|
41
50
|
end
|
data/lib/inci_score/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inci_score
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- costajob
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|