edits 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e40f01c34af084cdf604c0fa99586d2c45b9e76ac6324ba6c279ef9cc0d02062
4
- data.tar.gz: efceac3eee3f9a6997378751b1cc32970622027d5c7e68e32e02fc15c77048f2
3
+ metadata.gz: f1b79624e5ce72cbf1e7623ecb909c42b1fc6a82763e922b30b25589a040f2c1
4
+ data.tar.gz: 26175c109066ecc63c4df37493493011724e9e4dfe0c57cbae47411f6a4daf85
5
5
  SHA512:
6
- metadata.gz: 998ed35ade1b507c8fb21070ef79e1be31d5edaf187a5e5b7499cbcb2605cbce3661fd9af42c598e90c8a191616a5dad57417e704b8a33002931b4b68d106f66
7
- data.tar.gz: cad3e77feebc2d5d7f56f482f5ba9c26d12cead3a2d140bbe3c290f01c8d26dd7ae31cc9068f8ebfab51ed10b3b4d0f7b15506af3b2d8b0e8c3c81229f0dec9c
6
+ metadata.gz: 7c4b25223ef3db51afad11160fab94028fe5ab9d35f8918b63ac6cc5c3894245bea843247717ef1f93e627923dbdaf6c5844a4a0863f00598515bf0e07aa87a8
7
+ data.tar.gz: 4d060b4eebc4a2cad7b37b1ba6e4e30a4c5c2f38640ce2244960bf5ee0cba06b62ac577beb271e0919df784c999e69a740d4ec47daedac0cf1f06f4c395d7ce4
@@ -1,5 +1,5 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ TargetRubyVersion: 2.5
3
3
  Metrics/AbcSize:
4
4
  Enabled: false
5
5
  Metrics/CyclomaticComplexity:
@@ -3,8 +3,9 @@ language: ruby
3
3
  cache: bundler
4
4
  rvm:
5
5
  - 2.3
6
- - 2.4.3
7
- - 2.5
6
+ - 2.4
7
+ - 2.5.3
8
+ - 2.6
8
9
  - ruby-head
9
10
  - rbx-3
10
11
  matrix:
@@ -19,7 +20,7 @@ jobs:
19
20
  include:
20
21
  - stage: gem release
21
22
  if: tag IS present
22
- rvm: 2.4.3
23
+ rvm: 2.5.3
23
24
  script: echo "Deploying to rubygems.org ..."
24
25
  deploy:
25
26
  provider: rubygems
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Edits
2
2
 
3
3
  [![Build Status](https://travis-ci.org/tcrouch/edits.svg?branch=master)](https://travis-ci.org/tcrouch/edits)
4
- [![codebeat badge](https://codebeat.co/badges/005039fa-25c2-4005-8974-97c1ae9ff6eb)](https://codebeat.co/projects/github-com-tcrouch-edits-master)
4
+ [![Codacy Badge](https://api.codacy.com/project/badge/Grade/64cb50b8e9ce4ec2a752d091e441b09d)](https://app.codacy.com/app/t.crouch/edits?utm_source=github.com&utm_medium=referral&utm_content=tcrouch/edits&utm_campaign=Badge_Grade_Dashboard)
5
5
  [![Inline docs](http://inch-ci.org/github/tcrouch/edits.svg?branch=master)](http://inch-ci.org/github/tcrouch/edits)
6
6
  [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/github/tcrouch/edits)
7
7
 
@@ -95,6 +95,13 @@ Edits::JaroWinkler.distance "information", "informant"
95
95
  # => 0.05858585858585863
96
96
  ```
97
97
 
98
+ ### Hamming
99
+
100
+ ```ruby
101
+ Edits::Hamming.distance("explorer", "exploded")
102
+ # => 2
103
+ ```
104
+
98
105
  ## Development
99
106
 
100
107
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path("../lib", __FILE__)
3
+ lib = File.expand_path("lib", __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "edits/version"
6
6
 
@@ -27,7 +27,8 @@ Gem::Specification.new do |spec|
27
27
  spec.require_paths = ["lib"]
28
28
 
29
29
  spec.add_development_dependency "benchmark-ips", "~> 2.7"
30
- spec.add_development_dependency "bundler", "~> 1.15"
30
+ spec.add_development_dependency "bundler", "~> 2.0"
31
+ spec.add_development_dependency "codacy-coverage", "~> 2.1"
31
32
  spec.add_development_dependency "rake", "~> 12.1"
32
33
  spec.add_development_dependency "redcarpet", "~> 3.4"
33
34
  spec.add_development_dependency "rspec", "~> 3.6"
@@ -18,11 +18,13 @@ module Edits
18
18
  # @return [String, nil] most similar string, or nil for empty array
19
19
  def most_similar(prototype, strings)
20
20
  return nil if strings.empty?
21
+
21
22
  min_s = strings[0]
22
23
  min_d = distance(prototype, min_s)
23
24
 
24
25
  strings[1..-1].each do |s|
25
26
  return min_s if min_d.zero?
27
+
26
28
  d = distance_with_max(prototype, s, min_d)
27
29
  if d < min_d
28
30
  min_d = d
@@ -16,7 +16,7 @@ module Edits
16
16
  # # => 3
17
17
  # @param seq1 [String, Array]
18
18
  # @param seq2 [String, Array]
19
- # @return [Integer]
19
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
20
20
  def self.distance(seq1, seq2)
21
21
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
22
22
 
@@ -8,10 +8,12 @@ module Edits
8
8
  # Calculate the Hamming distance between two sequences.
9
9
  #
10
10
  # @note A true distance metric, satisfies triangle inequality.
11
- #
11
+ # @example
12
+ # Edits::Hamming.distance("explorer", "exploded")
13
+ # # => 2
12
14
  # @param seq1 [String, Array]
13
15
  # @param seq2 [String, Array]
14
- # @return [Integer] Hamming distance
16
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
15
17
  def self.distance(seq1, seq2)
16
18
  # if seq1.is_a?(Integer) && seq2.is_a?(Integer)
17
19
  # return (seq1 ^ seq2).to_s(2).count("1")
@@ -16,7 +16,7 @@ module Edits
16
16
  # # => 0.9023569023569024
17
17
  # @param seq1 [String, Array]
18
18
  # @param seq2 [String, Array]
19
- # @return [Float] similarity, between 0.0 (none) and 1.0 (identical)
19
+ # @return [Float] similarity, from 0.0 (none) to 1.0 (identical)
20
20
  def self.similarity(seq1, seq2)
21
21
  return 1.0 if seq1 == seq2
22
22
  return 0.0 if seq1.empty? || seq2.empty?
@@ -39,7 +39,7 @@ module Edits
39
39
  # Edits::Jaro.distance("information", "informant")
40
40
  # # => 0.09764309764309764
41
41
  # @param (see #distance)
42
- # @return [Float] distance, between 0.0 (identical) and 1.0 (distant)
42
+ # @return [Float] distance, from 0.0 (identical) to 1.0 (distant)
43
43
  def self.distance(str1, str2)
44
44
  1.0 - similarity(str1, str2)
45
45
  end
@@ -87,6 +87,7 @@ module Edits
87
87
  seq1.length.times do |i|
88
88
  # find a match in first string
89
89
  next unless seq1_flags[i] == true
90
+
90
91
  # go to location of next match on second string
91
92
  j += 1 until seq2_flags[j]
92
93
 
@@ -9,7 +9,7 @@ module Edits
9
9
  # Should not exceed 0.25 or metric range will leave 0..1
10
10
  WINKLER_PREFIX_WEIGHT = 0.1
11
11
 
12
- # Threshold for boosting Jaro with winkler prefix multiplier.
12
+ # Threshold for boosting Jaro with Winkler prefix multiplier.
13
13
  # Default is 0.7
14
14
  WINKLER_THRESHOLD = 0.7
15
15
 
@@ -31,27 +31,24 @@ module Edits
31
31
  # @param seq2 [String, Array]
32
32
  # @param threshold [Float] threshold for applying Winkler prefix weighting
33
33
  # @param weight [Float] weighting for common prefix, should not exceed 0.25
34
- # @return [Float] similarity, between 0.0 (none) and 1.0 (identical)
34
+ # @return [Float] similarity, from 0.0 (none) to 1.0 (identical)
35
35
  def self.similarity(
36
36
  seq1, seq2,
37
37
  threshold: WINKLER_THRESHOLD,
38
38
  weight: WINKLER_PREFIX_WEIGHT
39
39
  )
40
40
 
41
- dj = Jaro.similarity(seq1, seq2)
41
+ sj = Jaro.similarity(seq1, seq2)
42
+ return sj unless sj > threshold
42
43
 
43
- if dj > threshold
44
- # size of common prefix, max 4
45
- max_bound = seq1.length > seq2.length ? seq2.length : seq1.length
46
- max_bound = 4 if max_bound > 4
44
+ # size of common prefix, max 4
45
+ max_bound = seq1.length > seq2.length ? seq2.length : seq1.length
46
+ max_bound = 4 if max_bound > 4
47
47
 
48
- l = 0
49
- l += 1 until seq1[l] != seq2[l] || l >= max_bound
48
+ l = 0
49
+ l += 1 until seq1[l] != seq2[l] || l >= max_bound
50
50
 
51
- l < 1 ? dj : dj + (l * weight * (1 - dj))
52
- else
53
- dj
54
- end
51
+ l < 1 ? sj : sj + (l * weight * (1 - sj))
55
52
  end
56
53
 
57
54
  # Calculate Jaro-Winkler distance
@@ -62,7 +59,7 @@ module Edits
62
59
  # Edits::JaroWinkler.distance("information", "informant")
63
60
  # # => 0.05858585858585863
64
61
  # @param (see #distance)
65
- # @return [Float] distance, between 0.0 (identical) and 1.0 (distant)
62
+ # @return [Float] distance, from 0.0 (identical) to 1.0 (distant)
66
63
  def self.distance(
67
64
  seq1, seq2,
68
65
  threshold: WINKLER_THRESHOLD,
@@ -7,6 +7,8 @@ module Edits
7
7
  # * Insertion
8
8
  # * Deletion
9
9
  # * Substitution
10
+ #
11
+ # @see https://en.wikipedia.org/wiki/Levenshtein_distance
10
12
  module Levenshtein
11
13
  extend Compare
12
14
 
@@ -14,11 +16,11 @@ module Edits
14
16
  #
15
17
  # @note A true distance metric, satisfies triangle inequality.
16
18
  # @example
17
- # Levenshtein.distance('sand', 'hands')
19
+ # Levenshtein.distance("sand", "hands")
18
20
  # # => 2
19
21
  # @param seq1 [String, Array]
20
22
  # @param seq2 [String, Array]
21
- # @return [Integer]
23
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
22
24
  def self.distance(seq1, seq2)
23
25
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
24
26
 
@@ -74,7 +76,7 @@ module Edits
74
76
  # @param seq1 [String, Array]
75
77
  # @param seq2 [String, Array]
76
78
  # @param max [Integer] maximum distance
77
- # @return [Integer]
79
+ # @return [Integer] distance, from 0 (identical) to max (more distant)
78
80
  def self.distance_with_max(seq1, seq2, max)
79
81
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
80
82
 
@@ -24,7 +24,7 @@ module Edits
24
24
  # # => 3
25
25
  # @param seq1 [String, Array]
26
26
  # @param seq2 [String, Array]
27
- # @return [Integer]
27
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
28
28
  def self.distance(seq1, seq2)
29
29
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
30
30
 
@@ -96,7 +96,7 @@ module Edits
96
96
  # @param seq1 [String, Array]
97
97
  # @param seq2 [String, Array]
98
98
  # @param max [Integer] maximum distance
99
- # @return [Integer]
99
+ # @return [Integer] distance, from 0 (identical) to max (more distant)
100
100
  def self.distance_with_max(seq1, seq2, max)
101
101
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
102
102
 
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Edits
4
4
  # Current gem version
5
- VERSION = "0.2.1"
5
+ VERSION = "0.2.2"
6
6
  end
@@ -5,7 +5,7 @@ require "benchmark/ips"
5
5
  require "edits"
6
6
 
7
7
  namespace :benchmark do
8
- desc "levenshtein distance vs. distance_with_max (x100)"
8
+ desc "levenshtein distance vs distance_with_max (x100)"
9
9
  task :lev_max do
10
10
  words = File.read("/usr/share/dict/words")
11
11
  .split(/\n/).compact.shuffle(random: Random.new(1))
@@ -64,7 +64,7 @@ namespace :benchmark do
64
64
  end
65
65
  end
66
66
 
67
- desc "restricted distance vs. distance_with_max (x100)"
67
+ desc "restricted distance vs distance_with_max (x100)"
68
68
  task :restricted_max do
69
69
  words = File.read("/usr/share/dict/words")
70
70
  .split(/\n/).compact.shuffle(random: Random.new(1))
@@ -123,7 +123,7 @@ namespace :benchmark do
123
123
  end
124
124
  end
125
125
 
126
- desc "most_similar vs. min_by (100 words)"
126
+ desc "most_similar vs min_by (100 words)"
127
127
  task :lev_similar do
128
128
  words = File.read("/usr/share/dict/words")
129
129
  .split(/\n/).compact.shuffle(random: Random.new(1))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edits
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Crouch
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-02-19 00:00:00.000000000 Z
11
+ date: 2019-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: benchmark-ips
@@ -30,14 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.15'
33
+ version: '2.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.15'
40
+ version: '2.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: codacy-coverage
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.1'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -146,8 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
146
160
  - !ruby/object:Gem::Version
147
161
  version: '0'
148
162
  requirements: []
149
- rubyforge_project:
150
- rubygems_version: 2.7.6
163
+ rubygems_version: 3.0.3
151
164
  signing_key:
152
165
  specification_version: 4
153
166
  summary: A collection of edit distance algorithms.