string-similarity 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 46587328eab62395108cbef8c3195326428dab84
4
- data.tar.gz: b6066809ed34953ebe8f0ef6111050a05650516d
2
+ SHA256:
3
+ metadata.gz: 48616210ccc1800386ed7330f6895b7f44eb34008fd7a5e6e27da083c27e9d44
4
+ data.tar.gz: e0e63ad2772ae5632670f1453a2462b9f481346dc5d9067401fd65edb0ce759d
5
5
  SHA512:
6
- metadata.gz: 42f448b519b73d26c9db02c14fe2abfb130777bd5e439dba48e71e85edc6e8f898782a70140cfa1f4baefb369e95fa4d99d52d9222d2f52309c4b23b27f27d34
7
- data.tar.gz: d4e91894f077ed938907aeba93cc35a0242dafe775e0e28e50598c7e3de27acd4285096885969997be782d2bfff7bfb9ec1dfe553de584e4734089d8d6152fa9
6
+ metadata.gz: aaaeda7e3961cd455ed4f6679410f4064f4e92d6bd590cd93534a404d502fa2064f8aef858589a03985c8276d46faf6706cc1745784686c5688b52503ef9c5a9
7
+ data.tar.gz: 76e6389bd17d93a81f3b403a2dee3eedb398affa668e6567568e9e4491b7dc360b50b8f624a1857b8086ac44215a50ecf20ecf51675367d7041a33054d5b5c02
@@ -2,14 +2,16 @@ language: ruby
2
2
  cache: bundler
3
3
 
4
4
  rvm:
5
- - 2.4
6
- - 2.3
7
- - 2.2
5
+ - 2.7
6
+ - 2.6
7
+ - 2.5
8
8
  - ruby-head
9
9
  matrix:
10
10
  allow_failures:
11
11
  - rvm: ruby-head
12
12
 
13
+ before_install:
14
+ - gem install bundler
13
15
  before_script:
14
16
  - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
15
17
  - chmod +x ./cc-test-reporter
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [2.1.0] - 2020-03-17
10
+ ### Added
11
+ - Extended `cosine` to allow usage of N-grams (thanks @imustafin)
12
+ ### Fixed
13
+ - updated outdated development dependencies
14
+
9
15
  ## [2.0.1] - 2017-11-22
10
16
  ### Fixed
11
17
  - `require 'string-similarity'` now actually loads the module. (thanks @wppurking)
@@ -32,7 +38,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
32
38
  - Proper Documentation
33
39
 
34
40
 
35
- [Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.0.1...HEAD
41
+ [Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.1.0...HEAD
42
+ [2.1.0]: https://github.com/mhutter/string-similarity/compare/v2.0.1...v2.2.0
36
43
  [2.0.1]: https://github.com/mhutter/string-similarity/compare/v2.0.0...v2.0.1
37
44
  [2.0.0]: https://github.com/mhutter/string-similarity/compare/v1.1.1...v2.0.0
38
45
  [1.1.1]: https://github.com/mhutter/string-similarity/compare/v1.1.0...v1.1.1
@@ -12,17 +12,20 @@ module String::Similarity
12
12
  #
13
13
  # @param str1 [String] first string
14
14
  # @param str2 [String] second string
15
+ # @param ngram [Int] how many characters at once to use
15
16
  # @return [Float] cosine similarity of the two arguments.
16
17
  # - +1.0+ if the strings are identical
17
18
  # - +0.0+ if the strings are completely different
18
19
  # - +0.0+ if one of the strings is empty
19
- def self.cosine(str1, str2)
20
+ def self.cosine(str1, str2, ngram: 1)
21
+ raise ArgumentError.new('ngram should be >= 1') if ngram < 1
22
+
20
23
  return 1.0 if str1 == str2
21
24
  return 0.0 if str1.empty? || str2.empty?
22
25
 
23
26
  # convert both texts to vectors
24
- v1 = vector(str1)
25
- v2 = vector(str2)
27
+ v1 = vector(str1, ngram)
28
+ v2 = vector(str2, ngram)
26
29
 
27
30
  # calculate the dot product
28
31
  dot_product = dot(v1, v2)
@@ -94,13 +97,33 @@ module String::Similarity
94
97
  end
95
98
 
96
99
  # create a vector from +str+
100
+ # keys have a special format:
101
+ # '[left padding, right padding, "string"]'
97
102
  #
98
103
  # @example
99
- # v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
100
- # v1["x"] # => 0
101
- def self.vector(str)
104
+ # v1 = vector('aba', 1) # => {'[0, 0, "a"]' => 2, '[0, 0, "b"]' => 1}
105
+ # v1['[0, 0, "x"]'] # => 0
106
+ # @example
107
+ # vector('abacaba', 2) # => {
108
+ # # '[1, 0, "a"]' => 1,
109
+ # # '[0, 0, "ab"]' => 2,
110
+ # # '[0, 0, "ba"]' => 2,
111
+ # # '[0, 0, "ac"]' => 1,
112
+ # # '[0, 0, "ca"]' => 1
113
+ # # '[0, 1, "a"]' => 1
114
+ # # }
115
+ def self.vector(str, ngram)
102
116
  v = Hash.new(0)
103
- str.each_char { |c| v[c] += 1 }
117
+
118
+ ((1 - ngram)..(str.length - 1)).each do |i|
119
+ before = [-i, 0].max
120
+ after = [ngram - (str.length - i), 0].max
121
+ slice = str[[i, 0].max .. [i + ngram - 1, str.length - 1].min]
122
+ key = [before, after, slice].to_s
123
+
124
+ v[key] += 1
125
+ end
126
+
104
127
  v
105
128
  end
106
129
 
@@ -1,6 +1,6 @@
1
1
  class String
2
2
  module Similarity
3
3
  # Gem version
4
- VERSION = '2.0.1'
4
+ VERSION = '2.1.0'
5
5
  end
6
6
  end
@@ -0,0 +1,5 @@
1
+ {
2
+ "extends": [
3
+ "config:base"
4
+ ]
5
+ }
@@ -32,8 +32,8 @@ This gem provides some methods for calculating similarities of two strings.
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
33
  spec.require_paths = ['lib']
34
34
 
35
- spec.add_development_dependency 'bundler', '~> 1.10'
36
- spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'bundler', '~> 2.0'
36
+ spec.add_development_dependency 'rake', '~> 13.0'
37
37
  spec.add_development_dependency 'rspec'
38
38
  spec.add_development_dependency 'pry'
39
39
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Hutter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-22 00:00:00.000000000 Z
11
+ date: 2020-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.10'
19
+ version: '2.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.10'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '13.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '13.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -101,6 +101,7 @@ files:
101
101
  - lib/string/similarity.rb
102
102
  - lib/string/similarity/version.rb
103
103
  - lib/string/similarity_refinements.rb
104
+ - renovate.json
104
105
  - string-similarity.gemspec
105
106
  homepage: https://github.com/mhutter/string-similarity
106
107
  licenses:
@@ -121,8 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
121
122
  - !ruby/object:Gem::Version
122
123
  version: '0'
123
124
  requirements: []
124
- rubyforge_project:
125
- rubygems_version: 2.6.14
125
+ rubygems_version: 3.1.2
126
126
  signing_key:
127
127
  specification_version: 4
128
128
  summary: Various methods for calculating string similarities.