inci_score 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bc9d1f6ee80c730c5d5f12837fed8d774950369
4
- data.tar.gz: ccb0394552ba0e2966c5260e84c338aa61b58a09
3
+ metadata.gz: 9d5e69b680cde0fed823fd4672b8b4ec16638fe9
4
+ data.tar.gz: 488e31a72fd25148a61b32c5fa2a589b6ef438c3
5
5
  SHA512:
6
- metadata.gz: 629adf023e81fa545af1dbb675abe4467ef9aae42bb9f22e73d691339a8e9e585e7719902cf2f3b09c9a4d64c46032d17e9d72d92a378ec6a8d4a4beb9ff8e25
7
- data.tar.gz: 36af9780ea2534d18a92be8d122649596a692f18720d5da5eb63b1a44cc9d7c4afe6c55b9a57fc33810f61d8fba5811912481ec0f70b975c751fc7d26334a203
6
+ metadata.gz: 9e9eb717b3460fd139330acd40f7eec880b8b42f6c806f2ff71efdd9f8b2f8a4376a82c2f4133fcc619fa102781e7b96f4a17bb8bb8b81ed245ba11e76a85271
7
+ data.tar.gz: 7b29bf4e4ab728772be548ba3f81f87ecb6e4b14d8f8e6952a194469314d1445996d9761c90372d19fa99de3030a61f33be7761aaddc98580bc72ce0100ac020
data/README.md CHANGED
@@ -145,11 +145,10 @@ I measured the library three times, picking the best lap.
145
145
  The following script command is used:
146
146
 
147
147
  ```shell
148
- wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src=<list_of_ingredients>
148
+ wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src=aqua,parfum,zeolithe
149
149
  ```
150
150
 
151
151
  ### Results
152
- | Ingredients | Throughput (req/s) | Latency in ms (avg/stdev/max) |
153
- | :----------------------- | -----------------: | ----------------------------: |
154
- | aqua,parfum,zeolite | 26054.91 | 0.63/1.03/79.86 |
155
- | agua,porfum,zaolite | 953.44 | 14.67/5.15/82.31 |
152
+ | Throughput (req/s) | Latency in ms (avg/stdev/max) |
153
+ | -----------------: | ----------------------------: |
154
+ | 50060.92 | 0.31/0.45/3.83 |
data/Rakefile CHANGED
@@ -1,23 +1,23 @@
1
- require 'bundler/gem_tasks'
2
- require 'rake/testtask'
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
3
 
4
4
  namespace :spec do
5
5
  Rake::TestTask.new(:unit) do |t|
6
- t.libs << 'spec'
7
- t.libs << 'lib'
8
- t.test_files = FileList['spec/unit/*_spec.rb']
6
+ t.libs << "spec"
7
+ t.libs << "lib"
8
+ t.test_files = FileList["spec/unit/*_spec.rb"]
9
9
  end
10
10
 
11
11
  Rake::TestTask.new(:integration) do |t|
12
- t.libs << 'spec'
13
- t.libs << 'lib'
14
- t.test_files = FileList['spec/integration/*_spec.rb']
12
+ t.libs << "spec"
13
+ t.libs << "lib"
14
+ t.test_files = FileList["spec/integration/*_spec.rb"]
15
15
  end
16
16
 
17
17
  Rake::TestTask.new(:bench) do |t|
18
- t.libs << 'spec'
19
- t.libs << 'lib'
20
- t.test_files = FileList['spec/bench/*_bench.rb']
18
+ t.libs << "spec"
19
+ t.libs << "lib"
20
+ t.test_files = FileList["spec/bench/*_bench.rb"]
21
21
  end
22
22
  end
23
23
 
data/bin/console CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'bundler/setup'
4
- require 'inci_score'
5
- require 'irb'
6
- require 'irb/completion'
3
+ require "bundler/setup"
4
+ require "inci_score"
5
+ require "irb"
6
+ require "irb/completion"
7
7
  IRB.start
@@ -1,5 +1,5 @@
1
- require 'rack'
2
- require 'inci_score'
1
+ require "rack"
2
+ require "inci_score"
3
3
 
4
4
  module InciScore
5
5
  module App
@@ -12,8 +12,9 @@ module InciScore
12
12
  def call(env)
13
13
  req = Rack::Request.new(env)
14
14
  src = req.params["src"]
15
- json = src ? Computer.new(src: src, catalog: catalog).call.to_json : %q({"error": "no valid source"})
16
- ['200', {'Content-Type' => 'application/json'}, [json]]
15
+ precise = req.params["precise"]
16
+ json = src ? Computer.new(src: src, catalog: catalog, precise: !!precise).call.to_json : %q({"error": "no valid source"})
17
+ ["200", {"Content-Type" => "application/json"}, [json]]
17
18
  end
18
19
  end
19
20
  end
@@ -1,10 +1,10 @@
1
- require 'yaml'
1
+ require "yaml"
2
2
 
3
3
  module InciScore
4
4
  module Catalog
5
5
  extend self
6
6
 
7
- YAML_PATH = File::expand_path('../../../config/catalog.yml', __FILE__)
7
+ YAML_PATH = File::expand_path("../../../config/catalog.yml", __FILE__)
8
8
 
9
9
  def fetch(src = File.read(YAML_PATH))
10
10
  YAML::load(src)
@@ -11,20 +11,21 @@ module InciScore
11
11
  @src = nil
12
12
  @fresh = nil
13
13
  @port = nil
14
+ @precise = nil
14
15
  end
15
16
 
16
17
  def call(server_klass: Server, computer_klass: Computer, fetcher: Fetcher.new)
17
18
  parser.parse!(@args)
18
19
  return server_klass.new(port: @port, preload: true).run if @port
19
- return @io.puts("Specify inci list as: --src='aqua, parfum, etc'") unless @src
20
- @io.puts computer_klass.new(src: @src, catalog: catalog(fetcher)).call
20
+ return @io.puts(%q{Specify inci list as: --src="aqua, parfum, etc"}) unless @src
21
+ @io.puts computer_klass.new(src: @src, catalog: catalog(fetcher), precise: @precise).call
21
22
  end
22
23
 
23
24
  private def parser
24
25
  OptionParser.new do |opts|
25
- opts.banner = %q{Usage: ./bin/inci_score --src='aqua, parfum, etc' --fresh}
26
+ opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --fresh --precise}
26
27
 
27
- opts.on("-sSRC", "--src=SRC", "The INCI list: 'aqua, parfum, etc'") do |src|
28
+ opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
28
29
  @src = src
29
30
  end
30
31
 
@@ -32,6 +33,10 @@ module InciScore
32
33
  @fresh = fresh
33
34
  end
34
35
 
36
+ opts.on("-p", "--precise", "Compute components more precisely (slower)") do |precise|
37
+ @precise = precise
38
+ end
39
+
35
40
  opts.on("--http=PORT", "Start Puma server on the specified port") do |port|
36
41
  @port = port
37
42
  end
@@ -1,18 +1,19 @@
1
- require 'inci_score/normalizer'
2
- require 'inci_score/recognizer'
3
- require 'inci_score/scorer'
4
- require 'inci_score/response'
1
+ require "inci_score/normalizer"
2
+ require "inci_score/recognizer"
3
+ require "inci_score/scorer"
4
+ require "inci_score/response"
5
5
 
6
6
  module InciScore
7
7
  class Computer
8
8
  TOLERANCE = 30.0
9
9
 
10
- def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES)
10
+ def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES, precise: false)
11
11
  @src = src
12
12
  @catalog = catalog
13
13
  @tolerance = Float(tolerance)
14
14
  @rules = rules
15
15
  @unrecognized = []
16
+ @precise = precise
16
17
  end
17
18
 
18
19
  def call
@@ -32,7 +33,7 @@ module InciScore
32
33
 
33
34
  private def components
34
35
  @components ||= ingredients.map do |ingredient|
35
- Recognizer.new(ingredient, @catalog).call.tap do |component|
36
+ Recognizer.new(ingredient, @catalog).call(@precise).tap do |component|
36
37
  @unrecognized << ingredient unless component
37
38
  end
38
39
  end.compact
@@ -1,10 +1,10 @@
1
- require 'nokogiri'
1
+ require "nokogiri"
2
2
 
3
3
  module InciScore
4
4
  class Fetcher
5
- BIODIZIO_URI = 'http://www.biodizionario.it/biodizio.php'
5
+ BIODIZIO_URI = "http://www.biodizionario.it/biodizio.php"
6
6
  SEMAPHORES = %w[vv v g r rr]
7
- CSS_QUERY = 'table[width="751"] > tr > td img'
7
+ CSS_QUERY = %q{table[width="751"] > tr > td img}
8
8
 
9
9
  def initialize(src = nil)
10
10
  @src = src || Thread.new { open(BIODIZIO_URI) }
@@ -12,7 +12,7 @@ module InciScore
12
12
 
13
13
  def call
14
14
  @components ||= Nokogiri::HTML(doc).css(CSS_QUERY).inject({}) do |acc, img|
15
- hazard = semaphore(img.attr('src'))
15
+ hazard = semaphore(img.attr("src"))
16
16
  name = img.next_sibling.next_sibling
17
17
  desc = name.next_sibling.next_sibling
18
18
  name, desc = desc, name if swap?(desc.text)
@@ -26,7 +26,7 @@ module InciScore
26
26
  end
27
27
 
28
28
  private def semaphore(src)
29
- src.match(/(#{SEMAPHORES.join('|')}).gif$/)[1]
29
+ src.match(/(#{SEMAPHORES.join("|")}).gif$/)[1]
30
30
  end
31
31
 
32
32
  private def normalize(node)
@@ -1,8 +1,8 @@
1
- require 'inline'
1
+ require "inline"
2
2
 
3
3
  module InciScore
4
4
  class LevenshteinC
5
- C_PROGRAM = File::expand_path('../../../ext/levenshtein.c', __FILE__)
5
+ C_PROGRAM = File::expand_path("../../../ext/levenshtein.c", __FILE__)
6
6
 
7
7
  inline(:C) do |builder|
8
8
  builder.c File::read(C_PROGRAM)
@@ -1,14 +1,14 @@
1
- require 'inci_score/normalizer_rules'
1
+ require "inci_score/normalizer_rules"
2
2
 
3
3
  module InciScore
4
4
  class Normalizer
5
- DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer]
5
+ DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer, Rules::Uniquifier]
6
6
 
7
7
  attr_reader :src
8
8
 
9
- def initialize(options = {})
10
- @src = options[:src] || fail(ArgumentError, 'missing src')
11
- @rules = options.fetch(:rules) { DEFAULT_RULES }
9
+ def initialize(src:, rules: DEFAULT_RULES)
10
+ @src = src
11
+ @rules = rules
12
12
  end
13
13
 
14
14
  def call
@@ -88,6 +88,14 @@ module InciScore
88
88
  end.reject(&:empty?)
89
89
  end
90
90
  end
91
+
92
+ module Uniquifier
93
+ extend self
94
+
95
+ def call(src)
96
+ Array(src).uniq
97
+ end
98
+ end
91
99
  end
92
100
  end
93
101
  end
@@ -1,4 +1,4 @@
1
- require 'inci_score/recognizer_rules'
1
+ require "inci_score/recognizer_rules"
2
2
 
3
3
  module InciScore
4
4
  class Recognizer
@@ -10,11 +10,11 @@ module InciScore
10
10
  @rules = rules
11
11
  end
12
12
 
13
- def call
13
+ def call(precise = false)
14
14
  @component = @rules.reduce(nil) do |component, rule|
15
15
  break(component) if component
16
16
  yield(rule) if block_given?
17
- rule.call(@src, @catalog)
17
+ rule.call(@src, @catalog, precise)
18
18
  end
19
19
  [@component, @catalog[@component]] if @component
20
20
  end
@@ -1,4 +1,4 @@
1
- require 'inci_score/refinements'
1
+ require "inci_score/refinements"
2
2
 
3
3
  module InciScore
4
4
  using Refinements
@@ -9,7 +9,7 @@ module InciScore
9
9
  module Key
10
10
  extend self
11
11
 
12
- def call(src, catalog)
12
+ def call(src, catalog, precise = false)
13
13
  src if catalog.has_key?(src)
14
14
  end
15
15
  end
@@ -17,13 +17,13 @@ module InciScore
17
17
  module Levenshtein
18
18
  extend self
19
19
 
20
- ALTERNATE_SEP = '/'
20
+ ALTERNATE_SEP = "/"
21
21
 
22
- def call(src, catalog)
22
+ def call(src, catalog, precise = false)
23
23
  size = src.size
24
24
  initial = src[0]
25
25
  component, distance = catalog.reduce([nil, size]) do |min, (_component, _)|
26
- next min unless _component.start_with?(initial)
26
+ next min unless precise || _component.start_with?(initial)
27
27
  match = (n = _component.index(ALTERNATE_SEP)) ? _component[0, n] : _component
28
28
  next min if match.size > (size + TOLERANCE)
29
29
  dist = src.distance(match)
@@ -39,13 +39,18 @@ module InciScore
39
39
 
40
40
  MIN_MEANINGFUL = 7
41
41
 
42
- def call(src, catalog)
42
+ def call(src, catalog, precise = false)
43
43
  return if src.size < TOLERANCE
44
- digits = src[0, MIN_MEANINGFUL]
44
+ digits = src[0, min_meaningful(precise)]
45
45
  catalog.detect do |component, _|
46
46
  component.matches?(/^#{Regexp::escape(digits)}/)
47
47
  end.to_a.first
48
48
  end
49
+
50
+ def min_meaningful(precise)
51
+ return MIN_MEANINGFUL unless precise
52
+ MIN_MEANINGFUL + 2
53
+ end
49
54
  end
50
55
 
51
56
  module Tokens
@@ -53,7 +58,7 @@ module InciScore
53
58
 
54
59
  UNMATCHABLE = %w[extract oil sodium acid sulfate]
55
60
 
56
- def call(src, catalog)
61
+ def call(src, catalog, precise = false)
57
62
  tokens(src).each do |token|
58
63
  catalog.each do |component, _|
59
64
  return component if component.matches?(/\b#{Regexp.escape(token)}\b/)
@@ -63,7 +68,7 @@ module InciScore
63
68
  end
64
69
 
65
70
  def tokens(src)
66
- (src.split(' ') - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort_by!(&:size).reverse!
71
+ (src.split(" ") - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort_by!(&:size).reverse!
67
72
  end
68
73
  end
69
74
  end
@@ -1,5 +1,6 @@
1
- require 'etc'
2
- require 'puma'
1
+ require "etc"
2
+ require "puma/launcher"
3
+ require "puma/configuration"
3
4
 
4
5
  module InciScore
5
6
  class Server
@@ -29,13 +30,21 @@ module InciScore
29
30
  c.rackup RACKUP_FILE
30
31
  c.bind "tcp://#{DEFAULT_HOST}:#{@port}"
31
32
  c.workers @workers if workers?
32
- c.threads *@threads
33
+ c.threads(*@threads)
33
34
  c.preload_app! if @preload
34
35
  end
35
36
  end
36
37
 
37
38
  private def workers?
38
- @workers > 1 && !Puma.jruby? && !Puma.windows?
39
+ @workers > 1 && !java? && !windows?
40
+ end
41
+
42
+ private def java?
43
+ RUBY_VERSION == "java"
44
+ end
45
+
46
+ private def windows?
47
+ Gem.win_platform?
39
48
  end
40
49
  end
41
50
  end
@@ -1,3 +1,3 @@
1
1
  module InciScore
2
- VERSION = "2.2.1"
2
+ VERSION = "2.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inci_score
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.1
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - costajob
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-05 00:00:00.000000000 Z
11
+ date: 2017-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri