inci_score 2.2.1 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bc9d1f6ee80c730c5d5f12837fed8d774950369
4
- data.tar.gz: ccb0394552ba0e2966c5260e84c338aa61b58a09
3
+ metadata.gz: 9d5e69b680cde0fed823fd4672b8b4ec16638fe9
4
+ data.tar.gz: 488e31a72fd25148a61b32c5fa2a589b6ef438c3
5
5
  SHA512:
6
- metadata.gz: 629adf023e81fa545af1dbb675abe4467ef9aae42bb9f22e73d691339a8e9e585e7719902cf2f3b09c9a4d64c46032d17e9d72d92a378ec6a8d4a4beb9ff8e25
7
- data.tar.gz: 36af9780ea2534d18a92be8d122649596a692f18720d5da5eb63b1a44cc9d7c4afe6c55b9a57fc33810f61d8fba5811912481ec0f70b975c751fc7d26334a203
6
+ metadata.gz: 9e9eb717b3460fd139330acd40f7eec880b8b42f6c806f2ff71efdd9f8b2f8a4376a82c2f4133fcc619fa102781e7b96f4a17bb8bb8b81ed245ba11e76a85271
7
+ data.tar.gz: 7b29bf4e4ab728772be548ba3f81f87ecb6e4b14d8f8e6952a194469314d1445996d9761c90372d19fa99de3030a61f33be7761aaddc98580bc72ce0100ac020
data/README.md CHANGED
@@ -145,11 +145,10 @@ I measured the library three times, picking the best lap.
145
145
  The following script command is used:
146
146
 
147
147
  ```shell
148
- wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src=<list_of_ingredients>
148
+ wrk -t 4 -c 100 -d 30s --timeout 2000 http://127.0.0.1:9292/?src=aqua,parfum,zeolithe
149
149
  ```
150
150
 
151
151
  ### Results
152
- | Ingredients | Throughput (req/s) | Latency in ms (avg/stdev/max) |
153
- | :----------------------- | -----------------: | ----------------------------: |
154
- | aqua,parfum,zeolite | 26054.91 | 0.63/1.03/79.86 |
155
- | agua,porfum,zaolite | 953.44 | 14.67/5.15/82.31 |
152
+ | Throughput (req/s) | Latency in ms (avg/stdev/max) |
153
+ | -----------------: | ----------------------------: |
154
+ | 50060.92 | 0.31/0.45/3.83 |
data/Rakefile CHANGED
@@ -1,23 +1,23 @@
1
- require 'bundler/gem_tasks'
2
- require 'rake/testtask'
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
3
 
4
4
  namespace :spec do
5
5
  Rake::TestTask.new(:unit) do |t|
6
- t.libs << 'spec'
7
- t.libs << 'lib'
8
- t.test_files = FileList['spec/unit/*_spec.rb']
6
+ t.libs << "spec"
7
+ t.libs << "lib"
8
+ t.test_files = FileList["spec/unit/*_spec.rb"]
9
9
  end
10
10
 
11
11
  Rake::TestTask.new(:integration) do |t|
12
- t.libs << 'spec'
13
- t.libs << 'lib'
14
- t.test_files = FileList['spec/integration/*_spec.rb']
12
+ t.libs << "spec"
13
+ t.libs << "lib"
14
+ t.test_files = FileList["spec/integration/*_spec.rb"]
15
15
  end
16
16
 
17
17
  Rake::TestTask.new(:bench) do |t|
18
- t.libs << 'spec'
19
- t.libs << 'lib'
20
- t.test_files = FileList['spec/bench/*_bench.rb']
18
+ t.libs << "spec"
19
+ t.libs << "lib"
20
+ t.test_files = FileList["spec/bench/*_bench.rb"]
21
21
  end
22
22
  end
23
23
 
data/bin/console CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'bundler/setup'
4
- require 'inci_score'
5
- require 'irb'
6
- require 'irb/completion'
3
+ require "bundler/setup"
4
+ require "inci_score"
5
+ require "irb"
6
+ require "irb/completion"
7
7
  IRB.start
@@ -1,5 +1,5 @@
1
- require 'rack'
2
- require 'inci_score'
1
+ require "rack"
2
+ require "inci_score"
3
3
 
4
4
  module InciScore
5
5
  module App
@@ -12,8 +12,9 @@ module InciScore
12
12
  def call(env)
13
13
  req = Rack::Request.new(env)
14
14
  src = req.params["src"]
15
- json = src ? Computer.new(src: src, catalog: catalog).call.to_json : %q({"error": "no valid source"})
16
- ['200', {'Content-Type' => 'application/json'}, [json]]
15
+ precise = req.params["precise"]
16
+ json = src ? Computer.new(src: src, catalog: catalog, precise: !!precise).call.to_json : %q({"error": "no valid source"})
17
+ ["200", {"Content-Type" => "application/json"}, [json]]
17
18
  end
18
19
  end
19
20
  end
@@ -1,10 +1,10 @@
1
- require 'yaml'
1
+ require "yaml"
2
2
 
3
3
  module InciScore
4
4
  module Catalog
5
5
  extend self
6
6
 
7
- YAML_PATH = File::expand_path('../../../config/catalog.yml', __FILE__)
7
+ YAML_PATH = File::expand_path("../../../config/catalog.yml", __FILE__)
8
8
 
9
9
  def fetch(src = File.read(YAML_PATH))
10
10
  YAML::load(src)
@@ -11,20 +11,21 @@ module InciScore
11
11
  @src = nil
12
12
  @fresh = nil
13
13
  @port = nil
14
+ @precise = nil
14
15
  end
15
16
 
16
17
  def call(server_klass: Server, computer_klass: Computer, fetcher: Fetcher.new)
17
18
  parser.parse!(@args)
18
19
  return server_klass.new(port: @port, preload: true).run if @port
19
- return @io.puts("Specify inci list as: --src='aqua, parfum, etc'") unless @src
20
- @io.puts computer_klass.new(src: @src, catalog: catalog(fetcher)).call
20
+ return @io.puts(%q{Specify inci list as: --src="aqua, parfum, etc"}) unless @src
21
+ @io.puts computer_klass.new(src: @src, catalog: catalog(fetcher), precise: @precise).call
21
22
  end
22
23
 
23
24
  private def parser
24
25
  OptionParser.new do |opts|
25
- opts.banner = %q{Usage: ./bin/inci_score --src='aqua, parfum, etc' --fresh}
26
+ opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --fresh --precise}
26
27
 
27
- opts.on("-sSRC", "--src=SRC", "The INCI list: 'aqua, parfum, etc'") do |src|
28
+ opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
28
29
  @src = src
29
30
  end
30
31
 
@@ -32,6 +33,10 @@ module InciScore
32
33
  @fresh = fresh
33
34
  end
34
35
 
36
+ opts.on("-p", "--precise", "Compute components more precisely (slower)") do |precise|
37
+ @precise = precise
38
+ end
39
+
35
40
  opts.on("--http=PORT", "Start Puma server on the specified port") do |port|
36
41
  @port = port
37
42
  end
@@ -1,18 +1,19 @@
1
- require 'inci_score/normalizer'
2
- require 'inci_score/recognizer'
3
- require 'inci_score/scorer'
4
- require 'inci_score/response'
1
+ require "inci_score/normalizer"
2
+ require "inci_score/recognizer"
3
+ require "inci_score/scorer"
4
+ require "inci_score/response"
5
5
 
6
6
  module InciScore
7
7
  class Computer
8
8
  TOLERANCE = 30.0
9
9
 
10
- def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES)
10
+ def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES, precise: false)
11
11
  @src = src
12
12
  @catalog = catalog
13
13
  @tolerance = Float(tolerance)
14
14
  @rules = rules
15
15
  @unrecognized = []
16
+ @precise = precise
16
17
  end
17
18
 
18
19
  def call
@@ -32,7 +33,7 @@ module InciScore
32
33
 
33
34
  private def components
34
35
  @components ||= ingredients.map do |ingredient|
35
- Recognizer.new(ingredient, @catalog).call.tap do |component|
36
+ Recognizer.new(ingredient, @catalog).call(@precise).tap do |component|
36
37
  @unrecognized << ingredient unless component
37
38
  end
38
39
  end.compact
@@ -1,10 +1,10 @@
1
- require 'nokogiri'
1
+ require "nokogiri"
2
2
 
3
3
  module InciScore
4
4
  class Fetcher
5
- BIODIZIO_URI = 'http://www.biodizionario.it/biodizio.php'
5
+ BIODIZIO_URI = "http://www.biodizionario.it/biodizio.php"
6
6
  SEMAPHORES = %w[vv v g r rr]
7
- CSS_QUERY = 'table[width="751"] > tr > td img'
7
+ CSS_QUERY = %q{table[width="751"] > tr > td img}
8
8
 
9
9
  def initialize(src = nil)
10
10
  @src = src || Thread.new { open(BIODIZIO_URI) }
@@ -12,7 +12,7 @@ module InciScore
12
12
 
13
13
  def call
14
14
  @components ||= Nokogiri::HTML(doc).css(CSS_QUERY).inject({}) do |acc, img|
15
- hazard = semaphore(img.attr('src'))
15
+ hazard = semaphore(img.attr("src"))
16
16
  name = img.next_sibling.next_sibling
17
17
  desc = name.next_sibling.next_sibling
18
18
  name, desc = desc, name if swap?(desc.text)
@@ -26,7 +26,7 @@ module InciScore
26
26
  end
27
27
 
28
28
  private def semaphore(src)
29
- src.match(/(#{SEMAPHORES.join('|')}).gif$/)[1]
29
+ src.match(/(#{SEMAPHORES.join("|")}).gif$/)[1]
30
30
  end
31
31
 
32
32
  private def normalize(node)
@@ -1,8 +1,8 @@
1
- require 'inline'
1
+ require "inline"
2
2
 
3
3
  module InciScore
4
4
  class LevenshteinC
5
- C_PROGRAM = File::expand_path('../../../ext/levenshtein.c', __FILE__)
5
+ C_PROGRAM = File::expand_path("../../../ext/levenshtein.c", __FILE__)
6
6
 
7
7
  inline(:C) do |builder|
8
8
  builder.c File::read(C_PROGRAM)
@@ -1,14 +1,14 @@
1
- require 'inci_score/normalizer_rules'
1
+ require "inci_score/normalizer_rules"
2
2
 
3
3
  module InciScore
4
4
  class Normalizer
5
- DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer]
5
+ DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer, Rules::Uniquifier]
6
6
 
7
7
  attr_reader :src
8
8
 
9
- def initialize(options = {})
10
- @src = options[:src] || fail(ArgumentError, 'missing src')
11
- @rules = options.fetch(:rules) { DEFAULT_RULES }
9
+ def initialize(src:, rules: DEFAULT_RULES)
10
+ @src = src
11
+ @rules = rules
12
12
  end
13
13
 
14
14
  def call
@@ -88,6 +88,14 @@ module InciScore
88
88
  end.reject(&:empty?)
89
89
  end
90
90
  end
91
+
92
+ module Uniquifier
93
+ extend self
94
+
95
+ def call(src)
96
+ Array(src).uniq
97
+ end
98
+ end
91
99
  end
92
100
  end
93
101
  end
@@ -1,4 +1,4 @@
1
- require 'inci_score/recognizer_rules'
1
+ require "inci_score/recognizer_rules"
2
2
 
3
3
  module InciScore
4
4
  class Recognizer
@@ -10,11 +10,11 @@ module InciScore
10
10
  @rules = rules
11
11
  end
12
12
 
13
- def call
13
+ def call(precise = false)
14
14
  @component = @rules.reduce(nil) do |component, rule|
15
15
  break(component) if component
16
16
  yield(rule) if block_given?
17
- rule.call(@src, @catalog)
17
+ rule.call(@src, @catalog, precise)
18
18
  end
19
19
  [@component, @catalog[@component]] if @component
20
20
  end
@@ -1,4 +1,4 @@
1
- require 'inci_score/refinements'
1
+ require "inci_score/refinements"
2
2
 
3
3
  module InciScore
4
4
  using Refinements
@@ -9,7 +9,7 @@ module InciScore
9
9
  module Key
10
10
  extend self
11
11
 
12
- def call(src, catalog)
12
+ def call(src, catalog, precise = false)
13
13
  src if catalog.has_key?(src)
14
14
  end
15
15
  end
@@ -17,13 +17,13 @@ module InciScore
17
17
  module Levenshtein
18
18
  extend self
19
19
 
20
- ALTERNATE_SEP = '/'
20
+ ALTERNATE_SEP = "/"
21
21
 
22
- def call(src, catalog)
22
+ def call(src, catalog, precise = false)
23
23
  size = src.size
24
24
  initial = src[0]
25
25
  component, distance = catalog.reduce([nil, size]) do |min, (_component, _)|
26
- next min unless _component.start_with?(initial)
26
+ next min unless precise || _component.start_with?(initial)
27
27
  match = (n = _component.index(ALTERNATE_SEP)) ? _component[0, n] : _component
28
28
  next min if match.size > (size + TOLERANCE)
29
29
  dist = src.distance(match)
@@ -39,13 +39,18 @@ module InciScore
39
39
 
40
40
  MIN_MEANINGFUL = 7
41
41
 
42
- def call(src, catalog)
42
+ def call(src, catalog, precise = false)
43
43
  return if src.size < TOLERANCE
44
- digits = src[0, MIN_MEANINGFUL]
44
+ digits = src[0, min_meaningful(precise)]
45
45
  catalog.detect do |component, _|
46
46
  component.matches?(/^#{Regexp::escape(digits)}/)
47
47
  end.to_a.first
48
48
  end
49
+
50
+ def min_meaningful(precise)
51
+ return MIN_MEANINGFUL unless precise
52
+ MIN_MEANINGFUL + 2
53
+ end
49
54
  end
50
55
 
51
56
  module Tokens
@@ -53,7 +58,7 @@ module InciScore
53
58
 
54
59
  UNMATCHABLE = %w[extract oil sodium acid sulfate]
55
60
 
56
- def call(src, catalog)
61
+ def call(src, catalog, precise = false)
57
62
  tokens(src).each do |token|
58
63
  catalog.each do |component, _|
59
64
  return component if component.matches?(/\b#{Regexp.escape(token)}\b/)
@@ -63,7 +68,7 @@ module InciScore
63
68
  end
64
69
 
65
70
  def tokens(src)
66
- (src.split(' ') - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort_by!(&:size).reverse!
71
+ (src.split(" ") - UNMATCHABLE).reject { |t| t.size < TOLERANCE }.sort_by!(&:size).reverse!
67
72
  end
68
73
  end
69
74
  end
@@ -1,5 +1,6 @@
1
- require 'etc'
2
- require 'puma'
1
+ require "etc"
2
+ require "puma/launcher"
3
+ require "puma/configuration"
3
4
 
4
5
  module InciScore
5
6
  class Server
@@ -29,13 +30,21 @@ module InciScore
29
30
  c.rackup RACKUP_FILE
30
31
  c.bind "tcp://#{DEFAULT_HOST}:#{@port}"
31
32
  c.workers @workers if workers?
32
- c.threads *@threads
33
+ c.threads(*@threads)
33
34
  c.preload_app! if @preload
34
35
  end
35
36
  end
36
37
 
37
38
  private def workers?
38
- @workers > 1 && !Puma.jruby? && !Puma.windows?
39
+ @workers > 1 && !java? && !windows?
40
+ end
41
+
42
+ private def java?
43
+ RUBY_VERSION == "java"
44
+ end
45
+
46
+ private def windows?
47
+ Gem.win_platform?
39
48
  end
40
49
  end
41
50
  end
@@ -1,3 +1,3 @@
1
1
  module InciScore
2
- VERSION = "2.2.1"
2
+ VERSION = "2.3.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inci_score
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.1
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - costajob
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-05 00:00:00.000000000 Z
11
+ date: 2017-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri