string-similarity 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 46587328eab62395108cbef8c3195326428dab84
4
- data.tar.gz: b6066809ed34953ebe8f0ef6111050a05650516d
2
+ SHA256:
3
+ metadata.gz: 48616210ccc1800386ed7330f6895b7f44eb34008fd7a5e6e27da083c27e9d44
4
+ data.tar.gz: e0e63ad2772ae5632670f1453a2462b9f481346dc5d9067401fd65edb0ce759d
5
5
  SHA512:
6
- metadata.gz: 42f448b519b73d26c9db02c14fe2abfb130777bd5e439dba48e71e85edc6e8f898782a70140cfa1f4baefb369e95fa4d99d52d9222d2f52309c4b23b27f27d34
7
- data.tar.gz: d4e91894f077ed938907aeba93cc35a0242dafe775e0e28e50598c7e3de27acd4285096885969997be782d2bfff7bfb9ec1dfe553de584e4734089d8d6152fa9
6
+ metadata.gz: aaaeda7e3961cd455ed4f6679410f4064f4e92d6bd590cd93534a404d502fa2064f8aef858589a03985c8276d46faf6706cc1745784686c5688b52503ef9c5a9
7
+ data.tar.gz: 76e6389bd17d93a81f3b403a2dee3eedb398affa668e6567568e9e4491b7dc360b50b8f624a1857b8086ac44215a50ecf20ecf51675367d7041a33054d5b5c02
@@ -2,14 +2,16 @@ language: ruby
2
2
  cache: bundler
3
3
 
4
4
  rvm:
5
- - 2.4
6
- - 2.3
7
- - 2.2
5
+ - 2.7
6
+ - 2.6
7
+ - 2.5
8
8
  - ruby-head
9
9
  matrix:
10
10
  allow_failures:
11
11
  - rvm: ruby-head
12
12
 
13
+ before_install:
14
+ - gem install bundler
13
15
  before_script:
14
16
  - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
15
17
  - chmod +x ./cc-test-reporter
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [2.1.0] - 2020-03-17
10
+ ### Added
11
+ - Extended `cosine` to allow usage of N-grams (thanks @imustafin)
12
+ ### Fixed
13
+ - updated outdated development dependencies
14
+
9
15
  ## [2.0.1] - 2017-11-22
10
16
  ### Fixed
11
17
  - `require 'string-similarity'` now actually loads the module. (thanks @wppurking)
@@ -32,7 +38,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
32
38
  - Proper Documentation
33
39
 
34
40
 
35
- [Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.0.1...HEAD
41
+ [Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.1.0...HEAD
42
+ [2.1.0]: https://github.com/mhutter/string-similarity/compare/v2.0.1...v2.2.0
36
43
  [2.0.1]: https://github.com/mhutter/string-similarity/compare/v2.0.0...v2.0.1
37
44
  [2.0.0]: https://github.com/mhutter/string-similarity/compare/v1.1.1...v2.0.0
38
45
  [1.1.1]: https://github.com/mhutter/string-similarity/compare/v1.1.0...v1.1.1
@@ -12,17 +12,20 @@ module String::Similarity
12
12
  #
13
13
  # @param str1 [String] first string
14
14
  # @param str2 [String] second string
15
+ # @param ngram [Int] how many characters at once to use
15
16
  # @return [Float] cosine similarity of the two arguments.
16
17
  # - +1.0+ if the strings are identical
17
18
  # - +0.0+ if the strings are completely different
18
19
  # - +0.0+ if one of the strings is empty
19
- def self.cosine(str1, str2)
20
+ def self.cosine(str1, str2, ngram: 1)
21
+ raise ArgumentError.new('ngram should be >= 1') if ngram < 1
22
+
20
23
  return 1.0 if str1 == str2
21
24
  return 0.0 if str1.empty? || str2.empty?
22
25
 
23
26
  # convert both texts to vectors
24
- v1 = vector(str1)
25
- v2 = vector(str2)
27
+ v1 = vector(str1, ngram)
28
+ v2 = vector(str2, ngram)
26
29
 
27
30
  # calculate the dot product
28
31
  dot_product = dot(v1, v2)
@@ -94,13 +97,33 @@ module String::Similarity
94
97
  end
95
98
 
96
99
  # create a vector from +str+
100
+ # keys have a special format:
101
+ # '[left padding, right padding, "string"]'
97
102
  #
98
103
  # @example
99
- # v1 = vector('hello') # => {"h"=>1, "e"=>1, "l"=>2, "o"=>1}
100
- # v1["x"] # => 0
101
- def self.vector(str)
104
+ # v1 = vector('aba', 1) # => {'[0, 0, "a"]' => 2, '[0, 0, "b"]' => 1}
105
+ # v1['[0, 0, "x"]'] # => 0
106
+ # @example
107
+ # vector('abacaba', 2) # => {
108
+ # # '[1, 0, "a"]' => 1,
109
+ # # '[0, 0, "ab"]' => 2,
110
+ # # '[0, 0, "ba"]' => 2,
111
+ # # '[0, 0, "ac"]' => 1,
112
+ # # '[0, 0, "ca"]' => 1
113
+ # # '[0, 1, "a"]' => 1
114
+ # # }
115
+ def self.vector(str, ngram)
102
116
  v = Hash.new(0)
103
- str.each_char { |c| v[c] += 1 }
117
+
118
+ ((1 - ngram)..(str.length - 1)).each do |i|
119
+ before = [-i, 0].max
120
+ after = [ngram - (str.length - i), 0].max
121
+ slice = str[[i, 0].max .. [i + ngram - 1, str.length - 1].min]
122
+ key = [before, after, slice].to_s
123
+
124
+ v[key] += 1
125
+ end
126
+
104
127
  v
105
128
  end
106
129
 
@@ -1,6 +1,6 @@
1
1
  class String
2
2
  module Similarity
3
3
  # Gem version
4
- VERSION = '2.0.1'
4
+ VERSION = '2.1.0'
5
5
  end
6
6
  end
@@ -0,0 +1,5 @@
1
+ {
2
+ "extends": [
3
+ "config:base"
4
+ ]
5
+ }
@@ -32,8 +32,8 @@ This gem provides some methods for calculating similarities of two strings.
32
32
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
33
  spec.require_paths = ['lib']
34
34
 
35
- spec.add_development_dependency 'bundler', '~> 1.10'
36
- spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'bundler', '~> 2.0'
36
+ spec.add_development_dependency 'rake', '~> 13.0'
37
37
  spec.add_development_dependency 'rspec'
38
38
  spec.add_development_dependency 'pry'
39
39
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-similarity
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Hutter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-22 00:00:00.000000000 Z
11
+ date: 2020-03-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.10'
19
+ version: '2.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.10'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '13.0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '13.0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -101,6 +101,7 @@ files:
101
101
  - lib/string/similarity.rb
102
102
  - lib/string/similarity/version.rb
103
103
  - lib/string/similarity_refinements.rb
104
+ - renovate.json
104
105
  - string-similarity.gemspec
105
106
  homepage: https://github.com/mhutter/string-similarity
106
107
  licenses:
@@ -121,8 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
121
122
  - !ruby/object:Gem::Version
122
123
  version: '0'
123
124
  requirements: []
124
- rubyforge_project:
125
- rubygems_version: 2.6.14
125
+ rubygems_version: 3.1.2
126
126
  signing_key:
127
127
  specification_version: 4
128
128
  summary: Various methods for calculating string similarities.