edits 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e40f01c34af084cdf604c0fa99586d2c45b9e76ac6324ba6c279ef9cc0d02062
4
- data.tar.gz: efceac3eee3f9a6997378751b1cc32970622027d5c7e68e32e02fc15c77048f2
3
+ metadata.gz: f1b79624e5ce72cbf1e7623ecb909c42b1fc6a82763e922b30b25589a040f2c1
4
+ data.tar.gz: 26175c109066ecc63c4df37493493011724e9e4dfe0c57cbae47411f6a4daf85
5
5
  SHA512:
6
- metadata.gz: 998ed35ade1b507c8fb21070ef79e1be31d5edaf187a5e5b7499cbcb2605cbce3661fd9af42c598e90c8a191616a5dad57417e704b8a33002931b4b68d106f66
7
- data.tar.gz: cad3e77feebc2d5d7f56f482f5ba9c26d12cead3a2d140bbe3c290f01c8d26dd7ae31cc9068f8ebfab51ed10b3b4d0f7b15506af3b2d8b0e8c3c81229f0dec9c
6
+ metadata.gz: 7c4b25223ef3db51afad11160fab94028fe5ab9d35f8918b63ac6cc5c3894245bea843247717ef1f93e627923dbdaf6c5844a4a0863f00598515bf0e07aa87a8
7
+ data.tar.gz: 4d060b4eebc4a2cad7b37b1ba6e4e30a4c5c2f38640ce2244960bf5ee0cba06b62ac577beb271e0919df784c999e69a740d4ec47daedac0cf1f06f4c395d7ce4
@@ -1,5 +1,5 @@
1
1
  AllCops:
2
- TargetRubyVersion: 2.4
2
+ TargetRubyVersion: 2.5
3
3
  Metrics/AbcSize:
4
4
  Enabled: false
5
5
  Metrics/CyclomaticComplexity:
@@ -3,8 +3,9 @@ language: ruby
3
3
  cache: bundler
4
4
  rvm:
5
5
  - 2.3
6
- - 2.4.3
7
- - 2.5
6
+ - 2.4
7
+ - 2.5.3
8
+ - 2.6
8
9
  - ruby-head
9
10
  - rbx-3
10
11
  matrix:
@@ -19,7 +20,7 @@ jobs:
19
20
  include:
20
21
  - stage: gem release
21
22
  if: tag IS present
22
- rvm: 2.4.3
23
+ rvm: 2.5.3
23
24
  script: echo "Deploying to rubygems.org ..."
24
25
  deploy:
25
26
  provider: rubygems
data/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Edits
2
2
 
3
3
  [![Build Status](https://travis-ci.org/tcrouch/edits.svg?branch=master)](https://travis-ci.org/tcrouch/edits)
4
- [![codebeat badge](https://codebeat.co/badges/005039fa-25c2-4005-8974-97c1ae9ff6eb)](https://codebeat.co/projects/github-com-tcrouch-edits-master)
4
+ [![Codacy Badge](https://api.codacy.com/project/badge/Grade/64cb50b8e9ce4ec2a752d091e441b09d)](https://app.codacy.com/app/t.crouch/edits?utm_source=github.com&utm_medium=referral&utm_content=tcrouch/edits&utm_campaign=Badge_Grade_Dashboard)
5
5
  [![Inline docs](http://inch-ci.org/github/tcrouch/edits.svg?branch=master)](http://inch-ci.org/github/tcrouch/edits)
6
6
  [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/github/tcrouch/edits)
7
7
 
@@ -95,6 +95,13 @@ Edits::JaroWinkler.distance "information", "informant"
95
95
  # => 0.05858585858585863
96
96
  ```
97
97
 
98
+ ### Hamming
99
+
100
+ ```ruby
101
+ Edits::Hamming.distance("explorer", "exploded")
102
+ # => 2
103
+ ```
104
+
98
105
  ## Development
99
106
 
100
107
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path("../lib", __FILE__)
3
+ lib = File.expand_path("lib", __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "edits/version"
6
6
 
@@ -27,7 +27,8 @@ Gem::Specification.new do |spec|
27
27
  spec.require_paths = ["lib"]
28
28
 
29
29
  spec.add_development_dependency "benchmark-ips", "~> 2.7"
30
- spec.add_development_dependency "bundler", "~> 1.15"
30
+ spec.add_development_dependency "bundler", "~> 2.0"
31
+ spec.add_development_dependency "codacy-coverage", "~> 2.1"
31
32
  spec.add_development_dependency "rake", "~> 12.1"
32
33
  spec.add_development_dependency "redcarpet", "~> 3.4"
33
34
  spec.add_development_dependency "rspec", "~> 3.6"
@@ -18,11 +18,13 @@ module Edits
18
18
  # @return [String, nil] most similar string, or nil for empty array
19
19
  def most_similar(prototype, strings)
20
20
  return nil if strings.empty?
21
+
21
22
  min_s = strings[0]
22
23
  min_d = distance(prototype, min_s)
23
24
 
24
25
  strings[1..-1].each do |s|
25
26
  return min_s if min_d.zero?
27
+
26
28
  d = distance_with_max(prototype, s, min_d)
27
29
  if d < min_d
28
30
  min_d = d
@@ -16,7 +16,7 @@ module Edits
16
16
  # # => 3
17
17
  # @param seq1 [String, Array]
18
18
  # @param seq2 [String, Array]
19
- # @return [Integer]
19
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
20
20
  def self.distance(seq1, seq2)
21
21
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
22
22
 
@@ -8,10 +8,12 @@ module Edits
8
8
  # Calculate the Hamming distance between two sequences.
9
9
  #
10
10
  # @note A true distance metric, satisfies triangle inequality.
11
- #
11
+ # @example
12
+ # Edits::Hamming.distance("explorer", "exploded")
13
+ # # => 2
12
14
  # @param seq1 [String, Array]
13
15
  # @param seq2 [String, Array]
14
- # @return [Integer] Hamming distance
16
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
15
17
  def self.distance(seq1, seq2)
16
18
  # if seq1.is_a?(Integer) && seq2.is_a?(Integer)
17
19
  # return (seq1 ^ seq2).to_s(2).count("1")
@@ -16,7 +16,7 @@ module Edits
16
16
  # # => 0.9023569023569024
17
17
  # @param seq1 [String, Array]
18
18
  # @param seq2 [String, Array]
19
- # @return [Float] similarity, between 0.0 (none) and 1.0 (identical)
19
+ # @return [Float] similarity, from 0.0 (none) to 1.0 (identical)
20
20
  def self.similarity(seq1, seq2)
21
21
  return 1.0 if seq1 == seq2
22
22
  return 0.0 if seq1.empty? || seq2.empty?
@@ -39,7 +39,7 @@ module Edits
39
39
  # Edits::Jaro.distance("information", "informant")
40
40
  # # => 0.09764309764309764
41
41
  # @param (see #distance)
42
- # @return [Float] distance, between 0.0 (identical) and 1.0 (distant)
42
+ # @return [Float] distance, from 0.0 (identical) to 1.0 (distant)
43
43
  def self.distance(str1, str2)
44
44
  1.0 - similarity(str1, str2)
45
45
  end
@@ -87,6 +87,7 @@ module Edits
87
87
  seq1.length.times do |i|
88
88
  # find a match in first string
89
89
  next unless seq1_flags[i] == true
90
+
90
91
  # go to location of next match on second string
91
92
  j += 1 until seq2_flags[j]
92
93
 
@@ -9,7 +9,7 @@ module Edits
9
9
  # Should not exceed 0.25 or metric range will leave 0..1
10
10
  WINKLER_PREFIX_WEIGHT = 0.1
11
11
 
12
- # Threshold for boosting Jaro with winkler prefix multiplier.
12
+ # Threshold for boosting Jaro with Winkler prefix multiplier.
13
13
  # Default is 0.7
14
14
  WINKLER_THRESHOLD = 0.7
15
15
 
@@ -31,27 +31,24 @@ module Edits
31
31
  # @param seq2 [String, Array]
32
32
  # @param threshold [Float] threshold for applying Winkler prefix weighting
33
33
  # @param weight [Float] weighting for common prefix, should not exceed 0.25
34
- # @return [Float] similarity, between 0.0 (none) and 1.0 (identical)
34
+ # @return [Float] similarity, from 0.0 (none) to 1.0 (identical)
35
35
  def self.similarity(
36
36
  seq1, seq2,
37
37
  threshold: WINKLER_THRESHOLD,
38
38
  weight: WINKLER_PREFIX_WEIGHT
39
39
  )
40
40
 
41
- dj = Jaro.similarity(seq1, seq2)
41
+ sj = Jaro.similarity(seq1, seq2)
42
+ return sj unless sj > threshold
42
43
 
43
- if dj > threshold
44
- # size of common prefix, max 4
45
- max_bound = seq1.length > seq2.length ? seq2.length : seq1.length
46
- max_bound = 4 if max_bound > 4
44
+ # size of common prefix, max 4
45
+ max_bound = seq1.length > seq2.length ? seq2.length : seq1.length
46
+ max_bound = 4 if max_bound > 4
47
47
 
48
- l = 0
49
- l += 1 until seq1[l] != seq2[l] || l >= max_bound
48
+ l = 0
49
+ l += 1 until seq1[l] != seq2[l] || l >= max_bound
50
50
 
51
- l < 1 ? dj : dj + (l * weight * (1 - dj))
52
- else
53
- dj
54
- end
51
+ l < 1 ? sj : sj + (l * weight * (1 - sj))
55
52
  end
56
53
 
57
54
  # Calculate Jaro-Winkler distance
@@ -62,7 +59,7 @@ module Edits
62
59
  # Edits::JaroWinkler.distance("information", "informant")
63
60
  # # => 0.05858585858585863
64
61
  # @param (see #distance)
65
- # @return [Float] distance, between 0.0 (identical) and 1.0 (distant)
62
+ # @return [Float] distance, from 0.0 (identical) to 1.0 (distant)
66
63
  def self.distance(
67
64
  seq1, seq2,
68
65
  threshold: WINKLER_THRESHOLD,
@@ -7,6 +7,8 @@ module Edits
7
7
  # * Insertion
8
8
  # * Deletion
9
9
  # * Substitution
10
+ #
11
+ # @see https://en.wikipedia.org/wiki/Levenshtein_distance
10
12
  module Levenshtein
11
13
  extend Compare
12
14
 
@@ -14,11 +16,11 @@ module Edits
14
16
  #
15
17
  # @note A true distance metric, satisfies triangle inequality.
16
18
  # @example
17
- # Levenshtein.distance('sand', 'hands')
19
+ # Levenshtein.distance("sand", "hands")
18
20
  # # => 2
19
21
  # @param seq1 [String, Array]
20
22
  # @param seq2 [String, Array]
21
- # @return [Integer]
23
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
22
24
  def self.distance(seq1, seq2)
23
25
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
24
26
 
@@ -74,7 +76,7 @@ module Edits
74
76
  # @param seq1 [String, Array]
75
77
  # @param seq2 [String, Array]
76
78
  # @param max [Integer] maximum distance
77
- # @return [Integer]
79
+ # @return [Integer] distance, from 0 (identical) to max (more distant)
78
80
  def self.distance_with_max(seq1, seq2, max)
79
81
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
80
82
 
@@ -24,7 +24,7 @@ module Edits
24
24
  # # => 3
25
25
  # @param seq1 [String, Array]
26
26
  # @param seq2 [String, Array]
27
- # @return [Integer]
27
+ # @return [Integer] distance, 0 (identical) or greater (more distant)
28
28
  def self.distance(seq1, seq2)
29
29
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
30
30
 
@@ -96,7 +96,7 @@ module Edits
96
96
  # @param seq1 [String, Array]
97
97
  # @param seq2 [String, Array]
98
98
  # @param max [Integer] maximum distance
99
- # @return [Integer]
99
+ # @return [Integer] distance, from 0 (identical) to max (more distant)
100
100
  def self.distance_with_max(seq1, seq2, max)
101
101
  seq1, seq2 = seq2, seq1 if seq1.length > seq2.length
102
102
 
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Edits
4
4
  # Current gem version
5
- VERSION = "0.2.1"
5
+ VERSION = "0.2.2"
6
6
  end
@@ -5,7 +5,7 @@ require "benchmark/ips"
5
5
  require "edits"
6
6
 
7
7
  namespace :benchmark do
8
- desc "levenshtein distance vs. distance_with_max (x100)"
8
+ desc "levenshtein distance vs distance_with_max (x100)"
9
9
  task :lev_max do
10
10
  words = File.read("/usr/share/dict/words")
11
11
  .split(/\n/).compact.shuffle(random: Random.new(1))
@@ -64,7 +64,7 @@ namespace :benchmark do
64
64
  end
65
65
  end
66
66
 
67
- desc "restricted distance vs. distance_with_max (x100)"
67
+ desc "restricted distance vs distance_with_max (x100)"
68
68
  task :restricted_max do
69
69
  words = File.read("/usr/share/dict/words")
70
70
  .split(/\n/).compact.shuffle(random: Random.new(1))
@@ -123,7 +123,7 @@ namespace :benchmark do
123
123
  end
124
124
  end
125
125
 
126
- desc "most_similar vs. min_by (100 words)"
126
+ desc "most_similar vs min_by (100 words)"
127
127
  task :lev_similar do
128
128
  words = File.read("/usr/share/dict/words")
129
129
  .split(/\n/).compact.shuffle(random: Random.new(1))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edits
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Crouch
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-02-19 00:00:00.000000000 Z
11
+ date: 2019-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: benchmark-ips
@@ -30,14 +30,28 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.15'
33
+ version: '2.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.15'
40
+ version: '2.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: codacy-coverage
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.1'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -146,8 +160,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
146
160
  - !ruby/object:Gem::Version
147
161
  version: '0'
148
162
  requirements: []
149
- rubyforge_project:
150
- rubygems_version: 2.7.6
163
+ rubygems_version: 3.0.3
151
164
  signing_key:
152
165
  specification_version: 4
153
166
  summary: A collection of edit distance algorithms.