inci_score 2.5.1 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +34 -0
  3. data/README.md +14 -25
  4. data/config/catalog.yml +3 -4
  5. data/config.ru +2 -2
  6. data/inci_score.gemspec +2 -3
  7. data/lib/inci_score/{app.rb → api.rb} +1 -1
  8. data/lib/inci_score/cli.rb +4 -14
  9. data/lib/inci_score/computer.rb +15 -6
  10. data/lib/inci_score/ingredient.rb +45 -0
  11. data/lib/inci_score/normalizer.rb +1 -1
  12. data/lib/inci_score/normalizer_rules.rb +0 -24
  13. data/lib/inci_score/recognizer.rb +25 -7
  14. data/lib/inci_score/recognizer_rules.rb +19 -22
  15. data/lib/inci_score/refinements.rb +1 -1
  16. data/lib/inci_score/response.rb +1 -1
  17. data/lib/inci_score/score.rb +1 -3
  18. data/lib/inci_score/scorer.rb +7 -9
  19. data/lib/inci_score/version.rb +1 -1
  20. data/lib/inci_score.rb +4 -5
  21. data/spec/bench/levenshtein_bench.rb +17 -0
  22. data/spec/bench/normalizer_rules_bench.rb +40 -0
  23. data/spec/bench/recognizer_rules_bench.rb +24 -0
  24. data/spec/helper.rb +6 -0
  25. data/spec/integration/api_spec.rb +23 -0
  26. data/spec/stubs.rb +170 -0
  27. data/spec/unit/catalog_spec.rb +7 -0
  28. data/spec/unit/cli_spec.rb +29 -0
  29. data/spec/unit/computer_spec.rb +31 -0
  30. data/spec/unit/ingredient_spec.rb +34 -0
  31. data/spec/unit/levenshtein_spec.rb +19 -0
  32. data/spec/unit/normalizer_rules_spec.rb +58 -0
  33. data/spec/unit/normalizer_spec.rb +31 -0
  34. data/spec/unit/recognizer_rules_spec.rb +46 -0
  35. data/spec/unit/recognizer_spec.rb +49 -0
  36. data/spec/unit/response_spec.rb +8 -0
  37. data/spec/unit/score_spec.rb +12 -0
  38. data/spec/unit/scorer_spec.rb +11 -0
  39. data/spec/unit/server_spec.rb +30 -0
  40. metadata +24 -23
  41. data/.gitignore +0 -13
  42. data/.travis.yml +0 -6
  43. data/bin/console +0 -7
  44. data/bin/setup +0 -6
  45. data/lib/inci_score/fetcher.rb +0 -41
  46. data/log/.gitignore +0 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e60cb7510ccdae05853733d9da9aefac5fd96c92
4
- data.tar.gz: 338c5331d203e29c7f3022a72867ea9eb23ff6df
3
+ metadata.gz: 3acb19f9ac522f6a50378ec318c6466ae9b67257
4
+ data.tar.gz: 3d46458825c9d1fb3ff74db0779b329b63c5f97a
5
5
  SHA512:
6
- metadata.gz: 355b1ea92c7a1d7fd02f47ef8dd5aed401a3840d6e8a9b6f9e94e54dd5c2b1e4ad4406a379b02fc640ae8ec438c221dd8004ba08451bb2935b0e48188b76f3aa
7
- data.tar.gz: 5749ed66ce6fcf83e277d21dd1d4f1e938d76fa77279c9a61c16760f006bb6f71a499289400614fb560d7f2d4fec13c1ee49e9b9662b2e9b3faa769037d938dd
6
+ metadata.gz: 9ab029ca732a8605502a7263fa3d7fc96ad20be2ca6aaf9b142e599746eae8216bd326ae318b92aed19a24e157456fe63dd99642d52f536c0a3ea0a4cd59e5bd
7
+ data.tar.gz: 6d23d330b4267abc0d2ba4241c52d0f55d94922bc1dbb776842e3266afd699f366ade1c648f2e76c3ba2a85db7a680aac59257f683d9d85b5fb2b9b352eae536
data/Gemfile.lock ADDED
@@ -0,0 +1,34 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ inci_score (3.0.1)
5
+ RubyInline (~> 3)
6
+ puma (~> 3)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ RubyInline (3.12.4)
12
+ ZenTest (~> 4.3)
13
+ ZenTest (4.11.1)
14
+ benchmark-ips (2.7.2)
15
+ minitest (5.10.3)
16
+ puma (3.10.0)
17
+ rack (2.0.3)
18
+ rack-test (0.7.0)
19
+ rack (>= 1.0, < 3)
20
+ rake (10.5.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ benchmark-ips (~> 2)
27
+ bundler (~> 1.11)
28
+ inci_score!
29
+ minitest (~> 5.0)
30
+ rack-test (~> 0.6)
31
+ rake (~> 10.0)
32
+
33
+ BUNDLED WITH
34
+ 1.15.4
data/README.md CHANGED
@@ -8,7 +8,6 @@
8
8
  * [API](#api)
9
9
  * [Unrecognized components](#unrecognized-components)
10
10
  * [CLI](#cli)
11
- * [Refresh catalog](#refresh-catalog)
12
11
  * [HTTP server](#http-server)
13
12
  * [Triggering a request](#triggering-a-request)
14
13
  * [Getting help](#getting-help)
@@ -52,9 +51,7 @@ The API of the gem is pretty simple, you can open irb by *bundle console* and st
52
51
 
53
52
  ```ruby
54
53
  inci = InciScore::Computer.new(src: 'aqua, dimethicone').call
55
- => #<InciScore::Response:0x000000029f8100 @components={"aqua"=>0, "dimethicone"=>4}, @score=53.762874945799766, @unrecognized=[], @valid=true>
56
- inci.score
57
- => 53.762874945799766
54
+ inci.score # 53.7629
58
55
  ```
59
56
 
60
57
  As you see the results are wrapped by an *InciScore::Response* object, this is useful when dealing with the CLI and HTTP interfaces (read below).
@@ -66,11 +63,8 @@ Is still possible to query the object for its state:
66
63
 
67
64
  ```ruby
68
65
  inci = InciScore::Computer.new(src: 'ingredients:aqua,noent1,noent2').call
69
- => #<InciScore::Response:0x000000030c16d0 @components={"aqua"=>0}, @score=100.0, @unrecognized=["noent1", "noent2"], @valid=false>
70
- inci.valid
71
- => false
72
- inci.unrecognized
73
- => ["noent1", "noent2"]
66
+ inci.valid # false
67
+ inci.unrecognized # ["noent1", "noent2"]
74
68
  ```
75
69
 
76
70
  ## CLI
@@ -80,7 +74,7 @@ You can collect INCI data by using the available CLI interface:
80
74
  inci_score --src="ingredients: aqua, dimethicone, pej-10, noent"
81
75
 
82
76
  TOTAL SCORE:
83
- 47.18034913243358
77
+ 47.1803
84
78
  VALID STATE:
85
79
  true
86
80
  COMPONENTS (hazard - name):
@@ -91,12 +85,6 @@ UNRECOGNIZED:
91
85
  noent
92
86
  ```
93
87
 
94
- ### Refresh catalog
95
- You also have the option to fetch a fresh catalog from www.biodizionario.it by specifyng a flag:
96
- ```shell
97
- inci_score --fresh --src="aqua, dimethicone"
98
- ```
99
-
100
88
  ### HTTP server
101
89
  The CLI interface exposes a Web layer based on the [Puma](http://puma.io/) application server.
102
90
  The HTTP server is started on the specified port by spawning as many workers as your current workstation supports:
@@ -111,17 +99,16 @@ You can pass the source string directly as a HTTP parameter (URI escaped):
111
99
 
112
100
  ```shell
113
101
  curl http://127.0.0.1:9292?src=aqua,dimethicone
114
- => {"components":{"aqua":0,"dimethicone":4},"unrecognized":[],"score":53.762874945799766,"valid":true}
102
+ => {"components":{"aqua":0,"dimethicone":4},"unrecognized":[],"score":53.7629,"valid":true}
115
103
  ```
116
104
 
117
105
  ### Getting help
118
106
  You can get CLI interface help by:
119
107
  ```shell
120
- Usage: inci_score --src="aqua, parfum, etc" --fresh --precise
108
+ Usage: inci_score --src="aqua, parfum, etc" --precise
121
109
  -s, --src=SRC The INCI list: "aqua, parfum, etc"
122
- -f, --fresh Fetch a fresh catalog from remote
123
110
  -p, --precise Compute components more precisely (slower)
124
- --http=PORT Start Puma server on the specified port
111
+ --http=PORT Start HTTP server on the specified port
125
112
  -h, --help Prints this help
126
113
  ```
127
114
 
@@ -140,13 +127,15 @@ I registered these benchmarks with a MacBook PRO 15 mid 2015 having these specs:
140
127
  * Ruby 2.4
141
128
 
142
129
  ### Wrk
143
- As always i used [wrk](https://github.com/wg/wrk) as the loading tool, executed on a dedicated workstation.
130
+ As always i used [wrk](https://github.com/wg/wrk) as the loading tool.
144
131
  I measured the library three times, picking the best lap.
145
132
  ```shell
146
- wrk -t 4 -c 100 -d 30s --timeout 2000 http://0.0.0.0:9292/?src=aqua,parfum,zeolithe
133
+ wrk -t 4 -c 100 -d 30s --timeout 2000 "http://0.0.0.0:9292/?src=<source>&precise=true"
147
134
  ```
148
135
 
149
136
  ### Results
150
- | Throughput (req/s) | Latency (avg/stdev/max) |
151
- | -----------------: | ------------------------: |
152
- | 2908.63 | 42.23ms/47.17ms/461.36ms |
137
+ | Source | Throughput (req/s) |
138
+ | --------------------------: | -----------------: |
139
+ | aqua,parfum,zeolite | 18784.21 |
140
+ | agua,porfum,zeolithe | 1087.88 |
141
+ | agua/water,porfum/fragrance | 1599.47 |
data/config/catalog.yml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- water: 0
3
- parfum: 0
4
- fragrance: 0
2
+ aqua: 0
3
+ parfum: 3
4
+ fragrance: 3
5
5
  phosphatidylcholine: 1
6
6
  1-naphthol: 4
7
7
  1,2,4-benzenetriacetate: 4
@@ -296,7 +296,6 @@ apricot kernel oil peg-6 esters: 3
296
296
  apricotamide dea: 3
297
297
  apricotamidopropyl betaine: 2
298
298
  apricotamidopropyl ethyldimonium ethosulfate: 2
299
- aqua: 0
300
299
  arachideth-20: 3
301
300
  arachidic acid: 1
302
301
  arachidonic acid: 1
data/config.ru CHANGED
@@ -1,3 +1,3 @@
1
- require 'inci_score/app'
1
+ require "inci_score/api"
2
2
 
3
- run InciScore::App
3
+ run InciScore::Api
data/inci_score.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  lib = File.expand_path('../lib', __FILE__)
2
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
- require 'inci_score/version'
3
+ require "inci_score/version"
4
4
 
5
5
  Gem::Specification.new do |s|
6
6
  s.name = "inci_score"
@@ -9,14 +9,13 @@ Gem::Specification.new do |s|
9
9
  s.email = ["costajob@gmail.com"]
10
10
  s.summary = %q{A library that computes the hazard of cosmetic products components, based on the Biodizionario data.}
11
11
  s.homepage = "https://github.com/costajob/inci_score.git"
12
- s.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|test|s|features)/}) }
12
+ s.files = %w(README.md Rakefile inci_score.gemspec Gemfile Gemfile.lock config.ru config/catalog.yml ext/levenshtein.c bin/inci_score) + Dir["{spec,lib}/**/*.rb"]
13
13
  s.bindir = "bin"
14
14
  s.executables << "inci_score"
15
15
  s.require_paths = ["lib"]
16
16
  s.license = "MIT"
17
17
  s.required_ruby_version = ">= 2.2.2"
18
18
 
19
- s.add_runtime_dependency "nokogiri", "~> 1.6"
20
19
  s.add_runtime_dependency "puma", "~> 3"
21
20
  s.add_runtime_dependency "RubyInline", "~> 3"
22
21
 
@@ -2,7 +2,7 @@ require "rack"
2
2
  require "inci_score"
3
3
 
4
4
  module InciScore
5
- module App
5
+ module Api
6
6
  extend self
7
7
 
8
8
  def catalog
@@ -9,35 +9,30 @@ module InciScore
9
9
  @io = io
10
10
  @catalog = catalog
11
11
  @src = nil
12
- @fresh = nil
13
12
  @port = nil
14
13
  @precise = nil
15
14
  end
16
15
 
17
- def call(server_klass: Server, computer_klass: Computer, fetcher: Fetcher.new)
16
+ def call(server_klass: Server, computer_klass: Computer)
18
17
  parser.parse!(@args)
19
18
  return server_klass.new(port: @port, preload: true).run if @port
20
19
  return @io.puts(%q{Specify inci list as: --src="aqua, parfum, etc"}) unless @src
21
- @io.puts computer_klass.new(src: @src, catalog: catalog(fetcher), precise: @precise).call
20
+ @io.puts computer_klass.new(src: @src, catalog: @catalog, precise: @precise).call
22
21
  end
23
22
 
24
23
  private def parser
25
24
  OptionParser.new do |opts|
26
- opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --fresh --precise}
25
+ opts.banner = %q{Usage: inci_score --src="aqua, parfum, etc" --precise}
27
26
 
28
27
  opts.on("-sSRC", "--src=SRC", %q{The INCI list: "aqua, parfum, etc"}) do |src|
29
28
  @src = src
30
29
  end
31
30
 
32
- opts.on("-f", "--fresh", "Fetch a fresh catalog from remote") do |fresh|
33
- @fresh = fresh
34
- end
35
-
36
31
  opts.on("-p", "--precise", "Compute components more precisely (slower)") do |precise|
37
32
  @precise = precise
38
33
  end
39
34
 
40
- opts.on("--http=PORT", "Start Puma server on the specified port") do |port|
35
+ opts.on("--http=PORT", "Start HTTP server on the specified port") do |port|
41
36
  @port = port
42
37
  end
43
38
 
@@ -47,10 +42,5 @@ module InciScore
47
42
  end
48
43
  end
49
44
  end
50
-
51
- private def catalog(fetcher)
52
- return @catalog unless @fresh
53
- fetcher.call
54
- end
55
45
  end
56
46
  end
@@ -1,13 +1,19 @@
1
+ require "inci_score/ingredient"
1
2
  require "inci_score/normalizer"
2
3
  require "inci_score/recognizer"
3
- require "inci_score/scorer"
4
4
  require "inci_score/response"
5
+ require "inci_score/scorer"
5
6
 
6
7
  module InciScore
7
8
  class Computer
8
9
  TOLERANCE = 30.0
10
+ PERCENT = 100.0
9
11
 
10
- def initialize(src:, catalog:, tolerance: TOLERANCE, rules: Normalizer::DEFAULT_RULES, precise: false)
12
+ def initialize(src:,
13
+ catalog: Catalog.fetch,
14
+ tolerance: TOLERANCE,
15
+ rules: Normalizer::DEFAULT_RULES,
16
+ precise: false)
11
17
  @src = src
12
18
  @catalog = catalog
13
19
  @tolerance = Float(tolerance)
@@ -17,18 +23,21 @@ module InciScore
17
23
  end
18
24
 
19
25
  def call
20
- @response ||= Response.new(components: components.map(&:first),
26
+ @response ||= Response.new(components: components.map(&:name),
21
27
  unrecognized: @unrecognized,
22
28
  score: score,
23
29
  valid: valid?)
24
30
  end
25
31
 
26
32
  private def score
27
- Scorer.new(components.map(&:last)).call
33
+ Scorer.new(components.map(&:hazard)).call
28
34
  end
29
35
 
30
36
  private def ingredients
31
- @ingredients ||= Normalizer.new(src: @src, rules: @rules).call
37
+ @ingredients ||= begin
38
+ tokens = Normalizer.new(src: @src, rules: @rules).call
39
+ Ingredient.bulk(tokens)
40
+ end
32
41
  end
33
42
 
34
43
  private def components
@@ -40,7 +49,7 @@ module InciScore
40
49
  end
41
50
 
42
51
  private def valid?
43
- @unrecognized.size / (ingredients.size / 100.0) <= @tolerance
52
+ @unrecognized.size / (ingredients.size / PERCENT) <= @tolerance
44
53
  end
45
54
  end
46
55
  end
@@ -0,0 +1,45 @@
1
+ module InciScore
2
+ class Ingredient
3
+ SLASH = "/"
4
+ SLASH_RULE = /(?<!ate)\//
5
+ PARENTHESIS = %w[( ) [ ]]
6
+ DETAILS_RULE = /(\(.+\)|\[.+\])/
7
+
8
+ def self.bulk(tokens)
9
+ tokens.map { |raw| new(raw) }
10
+ end
11
+
12
+ def initialize(raw)
13
+ @raw = raw
14
+ @tokens = raw.split(SLASH_RULE).map(&:strip)
15
+ end
16
+
17
+ def to_s
18
+ values.join(SLASH)
19
+ end
20
+
21
+ def values
22
+ @values ||= synonims.unshift(name).compact
23
+ end
24
+
25
+ private def name
26
+ return @tokens.first unless parenthesis?
27
+ @raw.sub(DETAILS_RULE, "").strip
28
+ end
29
+
30
+ private def synonims
31
+ @tokens[1, @tokens.size]
32
+ end
33
+
34
+ private def details
35
+ return unless parenthesis?
36
+ @raw.match(DETAILS_RULE)[1].delete(PARENTHESIS.join("|"))
37
+ end
38
+
39
+ private def parenthesis?
40
+ PARENTHESIS.each_slice(2).any? do |pair|
41
+ pair.all? { |p| @raw.index(p) }
42
+ end
43
+ end
44
+ end
45
+ end
@@ -2,7 +2,7 @@ require "inci_score/normalizer_rules"
2
2
 
3
3
  module InciScore
4
4
  class Normalizer
5
- DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Desynonymizer, Rules::Deparenthesizer, Rules::Uniquifier]
5
+ DEFAULT_RULES = [Rules::Replacer, Rules::Downcaser, Rules::Beheader, Rules::Separator, Rules::Tokenizer, Rules::Sanitizer, Rules::Uniquifier]
6
6
 
7
7
  attr_reader :src
8
8
 
@@ -77,30 +77,6 @@ module InciScore
77
77
  end
78
78
  end
79
79
 
80
- module Desynonymizer
81
- extend self
82
-
83
- SYNONYM = /\/.*/
84
-
85
- def call(src)
86
- Array(src).map do |token|
87
- token.sub(SYNONYM, '').strip
88
- end.reject(&:empty?)
89
- end
90
- end
91
-
92
- module Deparenthesizer
93
- extend self
94
-
95
- PARENTHESIS = /\(.+?\)|\[.+?\]/
96
-
97
- def call(src)
98
- Array(src).map do |token|
99
- token.sub(PARENTHESIS, '').strip
100
- end.reject(&:empty?)
101
- end
102
- end
103
-
104
80
  module Uniquifier
105
81
  extend self
106
82
 
@@ -4,19 +4,37 @@ module InciScore
4
4
  class Recognizer
5
5
  DEFAULT_RULES = [Rules::Key, Rules::Levenshtein, Rules::Digits, Rules::Tokens]
6
6
 
7
- def initialize(src, catalog, rules = DEFAULT_RULES)
8
- @src = src
7
+ Component = Struct.new(:name, :hazard)
8
+
9
+ attr_reader :applied
10
+
11
+ def initialize(ingredient, catalog, rules = DEFAULT_RULES)
12
+ @ingredient = ingredient
9
13
  @catalog = catalog
10
14
  @rules = rules
15
+ @applied = []
11
16
  end
12
17
 
13
18
  def call(precise = false)
14
- @component = @rules.reduce(nil) do |component, rule|
19
+ return if @ingredient.to_s.empty?
20
+ component = find_component(precise)
21
+ return unless component
22
+ Component.new(component, @catalog[component])
23
+ end
24
+
25
+ private def find_component(precise)
26
+ @rules.reduce(nil) do |component, rule|
15
27
  break(component) if component
16
- yield(rule) if block_given?
17
- rule.call(@src, @catalog, precise)
28
+ applied << rule
29
+ apply(rule, precise)
18
30
  end
19
- [@component, @catalog[@component]] if @component
20
- end
31
+ end
32
+
33
+ private def apply(rule, precise)
34
+ return rule.call(@ingredient.to_s, @catalog) unless precise
35
+ @ingredient.values.map do |value|
36
+ rule.call(value, @catalog)
37
+ end.find(&:itself)
38
+ end
21
39
  end
22
40
  end
@@ -9,7 +9,7 @@ module InciScore
9
9
  module Key
10
10
  extend self
11
11
 
12
- def call(src, catalog, precise = false)
12
+ def call(src, catalog)
13
13
  src if catalog.has_key?(src)
14
14
  end
15
15
  end
@@ -17,20 +17,24 @@ module InciScore
17
17
  module Levenshtein
18
18
  extend self
19
19
 
20
- ALTERNATE_SEP = "/"
20
+ Result = Struct.new(:name, :distance) do
21
+ def tolerable?(size)
22
+ distance < TOLERANCE && distance <= (size-1)
23
+ end
24
+ end
21
25
 
22
- def call(src, catalog, precise = false)
26
+ def call(src, catalog)
23
27
  size = src.size
28
+ farthest = Result.new(nil, size)
24
29
  initial = src[0]
25
- component, distance = catalog.reduce([nil, size]) do |min, (_component, _)|
26
- next min unless precise || _component.start_with?(initial)
27
- match = (n = _component.index(ALTERNATE_SEP)) ? _component[0, n] : _component
28
- next min if match.size > (size + TOLERANCE)
29
- dist = src.distance(match)
30
- min = [_component, dist] if dist < min[1]
31
- min
30
+ result = catalog.reduce(farthest) do |nearest, (component, _)|
31
+ next nearest unless component.start_with?(initial)
32
+ next nearest if component.size > (size + TOLERANCE)
33
+ d = src.distance(component)
34
+ nearest = Result.new(component, d) if d < nearest.distance
35
+ nearest
32
36
  end
33
- component unless distance > TOLERANCE || distance >= (size-1)
37
+ result.name if result.tolerable?(size)
34
38
  end
35
39
  end
36
40
 
@@ -39,17 +43,10 @@ module InciScore
39
43
 
40
44
  MIN_MEANINGFUL = 7
41
45
 
42
- def call(src, catalog, precise = false)
46
+ def call(src, catalog)
43
47
  return if src.size < TOLERANCE
44
- digits = src[0, min_meaningful(precise)]
45
- catalog.detect do |component, _|
46
- component.matches?(/^#{Regexp::escape(digits)}/)
47
- end.to_a.first
48
- end
49
-
50
- def min_meaningful(precise)
51
- return MIN_MEANINGFUL unless precise
52
- MIN_MEANINGFUL + 2
48
+ digits = src[0, MIN_MEANINGFUL]
49
+ catalog.detect { |component, _| component.start_with?(digits) }.to_a.first
53
50
  end
54
51
  end
55
52
 
@@ -58,7 +55,7 @@ module InciScore
58
55
 
59
56
  UNMATCHABLE = %w[extract oil sodium acid sulfate]
60
57
 
61
- def call(src, catalog, precise = false)
58
+ def call(src, catalog)
62
59
  tokens(src).each do |token|
63
60
  catalog.each do |component, _|
64
61
  return component if component.matches?(/\b#{Regexp.escape(token)}\b/)
@@ -1,4 +1,4 @@
1
- require 'inci_score/levenshtein'
1
+ require "inci_score/levenshtein"
2
2
 
3
3
  module InciScore
4
4
  module Refinements
@@ -1,4 +1,4 @@
1
- require 'json'
1
+ require "json"
2
2
 
3
3
  module InciScore
4
4
  class Response
@@ -8,9 +8,7 @@ module InciScore
8
8
  @value = compute
9
9
  end
10
10
 
11
- private
12
-
13
- def compute
11
+ private def compute
14
12
  (@hazard - @weight).tap do |s|
15
13
  return 0.0 if s < 0
16
14
  end
@@ -1,4 +1,4 @@
1
- require 'inci_score/score'
1
+ require "inci_score/score"
2
2
 
3
3
  module InciScore
4
4
  class Scorer
@@ -12,33 +12,31 @@ module InciScore
12
12
 
13
13
  def call
14
14
  return 0 if @hazards.empty?
15
- 100 - avg * HAZARD_PERCENT
15
+ (100 - avg * HAZARD_PERCENT).round(4)
16
16
  end
17
17
 
18
- private
19
-
20
- def avg
18
+ private def avg
21
19
  avg_weighted / @size.to_f
22
20
  end
23
21
 
24
- def avg_weighted
22
+ private def avg_weighted
25
23
  return @hazards.reduce(&:+) if same_hazard?
26
24
  weighted.reduce(0.0) do |acc,score|
27
25
  acc += score.value
28
26
  end
29
27
  end
30
28
 
31
- def same_hazard?
29
+ private def same_hazard?
32
30
  @hazards.uniq.size == 1
33
31
  end
34
32
 
35
- def weighted
33
+ private def weighted
36
34
  @hazards.each_with_index.map do |h,i|
37
35
  Score.new(h, weight(i))
38
36
  end
39
37
  end
40
38
 
41
- def weight(index)
39
+ private def weight(index)
42
40
  Math.log(index+1, @size * WEIGHT_FACTOR)
43
41
  end
44
42
  end
@@ -1,3 +1,3 @@
1
1
  module InciScore
2
- VERSION = "2.5.1"
2
+ VERSION = "3.0.3"
3
3
  end
data/lib/inci_score.rb CHANGED
@@ -1,5 +1,4 @@
1
- require 'open-uri'
2
- require 'inci_score/version'
3
- require 'inci_score/fetcher'
4
- require 'inci_score/catalog'
5
- require 'inci_score/cli'
1
+ require "inci_score/version"
2
+ require "inci_score/catalog"
3
+ require "inci_score/cli"
4
+ require "inci_score/api"
@@ -0,0 +1,17 @@
1
+ require "helper"
2
+
3
+ s, t = "agua", "aqua"
4
+ lev_ruby = InciScore::Levenshtein.new(s, t)
5
+ lev_c = InciScore::LevenshteinC.new
6
+
7
+ Benchmark.ips do |x|
8
+ x.report("levenshtein ruby") do
9
+ lev_ruby.call
10
+ end
11
+
12
+ x.report("levenshtein C") do
13
+ lev_c.call(s, s.size, t, t.size)
14
+ end
15
+
16
+ x.compare!
17
+ end
@@ -0,0 +1,40 @@
1
+ require "helper"
2
+
3
+ replacer = InciScore::Normalizer::Rules::Replacer
4
+ downcaser = InciScore::Normalizer::Rules::Downcaser
5
+ beheader = InciScore::Normalizer::Rules::Beheader
6
+ separator = InciScore::Normalizer::Rules::Separator
7
+ tokenizer = InciScore::Normalizer::Rules::Tokenizer
8
+ sanitizer = InciScore::Normalizer::Rules::Sanitizer
9
+ uniquifier = InciScore::Normalizer::Rules::Uniquifier
10
+ src = "‘INGREDIENTS‘:\n\nCOCO—BETANE,AQUA/WATER,DIMETHICONE"
11
+
12
+ Benchmark.ips do |x|
13
+ x.report("replacer") do
14
+ replacer.call(src)
15
+ end
16
+
17
+ x.report("downcaser") do
18
+ downcaser.call(src)
19
+ end
20
+
21
+ x.report("beheader") do
22
+ beheader.call(src)
23
+ end
24
+
25
+ x.report("separator") do
26
+ separator.call(src)
27
+ end
28
+
29
+ x.report("tokenizer") do
30
+ tokenizer.call(src)
31
+ end
32
+
33
+ x.report("sanitizer") do
34
+ sanitizer.call(src)
35
+ end
36
+
37
+ x.report("uniquifier") do
38
+ uniquifier.call(src)
39
+ end
40
+ end