string-similarity 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +5 -3
- data/CHANGELOG.md +8 -1
- data/lib/string/similarity.rb +30 -7
- data/lib/string/similarity/version.rb +1 -1
- data/renovate.json +5 -0
- data/string-similarity.gemspec +2 -2
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 48616210ccc1800386ed7330f6895b7f44eb34008fd7a5e6e27da083c27e9d44
|
4
|
+
data.tar.gz: e0e63ad2772ae5632670f1453a2462b9f481346dc5d9067401fd65edb0ce759d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aaaeda7e3961cd455ed4f6679410f4064f4e92d6bd590cd93534a404d502fa2064f8aef858589a03985c8276d46faf6706cc1745784686c5688b52503ef9c5a9
|
7
|
+
data.tar.gz: 76e6389bd17d93a81f3b403a2dee3eedb398affa668e6567568e9e4491b7dc360b50b8f624a1857b8086ac44215a50ecf20ecf51675367d7041a33054d5b5c02
|
data/.travis.yml
CHANGED
@@ -2,14 +2,16 @@ language: ruby
|
|
2
2
|
cache: bundler
|
3
3
|
|
4
4
|
rvm:
|
5
|
-
- 2.
|
6
|
-
- 2.
|
7
|
-
- 2.
|
5
|
+
- 2.7
|
6
|
+
- 2.6
|
7
|
+
- 2.5
|
8
8
|
- ruby-head
|
9
9
|
matrix:
|
10
10
|
allow_failures:
|
11
11
|
- rvm: ruby-head
|
12
12
|
|
13
|
+
before_install:
|
14
|
+
- gem install bundler
|
13
15
|
before_script:
|
14
16
|
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
15
17
|
- chmod +x ./cc-test-reporter
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [2.1.0] - 2020-03-17
|
10
|
+
### Added
|
11
|
+
- Extended `cosine` to allow usage of N-grams (thanks @imustafin)
|
12
|
+
### Fixed
|
13
|
+
- updated outdated development dependencies
|
14
|
+
|
9
15
|
## [2.0.1] - 2017-11-22
|
10
16
|
### Fixed
|
11
17
|
- `require 'string-similarity'` now actually loads the module. (thanks @wppurking)
|
@@ -32,7 +38,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
32
38
|
- Proper Documentation
|
33
39
|
|
34
40
|
|
35
|
-
[Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.0
|
41
|
+
[Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.1.0...HEAD
|
42
|
+
[2.1.0]: https://github.com/mhutter/string-similarity/compare/v2.0.1...v2.2.0
|
36
43
|
[2.0.1]: https://github.com/mhutter/string-similarity/compare/v2.0.0...v2.0.1
|
37
44
|
[2.0.0]: https://github.com/mhutter/string-similarity/compare/v1.1.1...v2.0.0
|
38
45
|
[1.1.1]: https://github.com/mhutter/string-similarity/compare/v1.1.0...v1.1.1
|
data/lib/string/similarity.rb
CHANGED
@@ -12,17 +12,20 @@ module String::Similarity
|
|
12
12
|
#
|
13
13
|
# @param str1 [String] first string
|
14
14
|
# @param str2 [String] second string
|
15
|
+
# @param ngram [Int] how many characters at once to use
|
15
16
|
# @return [Float] cosine similarity of the two arguments.
|
16
17
|
# - +1.0+ if the strings are identical
|
17
18
|
# - +0.0+ if the strings are completely different
|
18
19
|
# - +0.0+ if one of the strings is empty
|
19
|
-
def self.cosine(str1, str2)
|
20
|
+
def self.cosine(str1, str2, ngram: 1)
|
21
|
+
raise ArgumentError.new('ngram should be >= 1') if ngram < 1
|
22
|
+
|
20
23
|
return 1.0 if str1 == str2
|
21
24
|
return 0.0 if str1.empty? || str2.empty?
|
22
25
|
|
23
26
|
# convert both texts to vectors
|
24
|
-
v1 = vector(str1)
|
25
|
-
v2 = vector(str2)
|
27
|
+
v1 = vector(str1, ngram)
|
28
|
+
v2 = vector(str2, ngram)
|
26
29
|
|
27
30
|
# calculate the dot product
|
28
31
|
dot_product = dot(v1, v2)
|
@@ -94,13 +97,33 @@ module String::Similarity
|
|
94
97
|
end
|
95
98
|
|
96
99
|
# create a vector from +str+
|
100
|
+
# keys have a special format:
|
101
|
+
# '[left padding, right padding, "string"]'
|
97
102
|
#
|
98
103
|
# @example
|
99
|
-
# v1 = vector('
|
100
|
-
# v1["x"] # => 0
|
101
|
-
|
104
|
+
# v1 = vector('aba', 1) # => {'[0, 0, "a"]' => 2, '[0, 0, "b"]' => 1}
|
105
|
+
# v1['[0, 0, "x"]'] # => 0
|
106
|
+
# @example
|
107
|
+
# vector('abacaba', 2) # => {
|
108
|
+
# # '[1, 0, "a"]' => 1,
|
109
|
+
# # '[0, 0, "ab"]' => 2,
|
110
|
+
# # '[0, 0, "ba"]' => 2,
|
111
|
+
# # '[0, 0, "ac"]' => 1,
|
112
|
+
# # '[0, 0, "ca"]' => 1
|
113
|
+
# # '[0, 1, "a"]' => 1
|
114
|
+
# # }
|
115
|
+
def self.vector(str, ngram)
|
102
116
|
v = Hash.new(0)
|
103
|
-
|
117
|
+
|
118
|
+
((1 - ngram)..(str.length - 1)).each do |i|
|
119
|
+
before = [-i, 0].max
|
120
|
+
after = [ngram - (str.length - i), 0].max
|
121
|
+
slice = str[[i, 0].max .. [i + ngram - 1, str.length - 1].min]
|
122
|
+
key = [before, after, slice].to_s
|
123
|
+
|
124
|
+
v[key] += 1
|
125
|
+
end
|
126
|
+
|
104
127
|
v
|
105
128
|
end
|
106
129
|
|
data/string-similarity.gemspec
CHANGED
@@ -32,8 +32,8 @@ This gem provides some methods for calculating similarities of two strings.
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ['lib']
|
34
34
|
|
35
|
-
spec.add_development_dependency 'bundler', '~>
|
36
|
-
spec.add_development_dependency 'rake', '~>
|
35
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
36
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
37
37
|
spec.add_development_dependency 'rspec'
|
38
38
|
spec.add_development_dependency 'pry'
|
39
39
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string-similarity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Hutter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '2.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- lib/string/similarity.rb
|
102
102
|
- lib/string/similarity/version.rb
|
103
103
|
- lib/string/similarity_refinements.rb
|
104
|
+
- renovate.json
|
104
105
|
- string-similarity.gemspec
|
105
106
|
homepage: https://github.com/mhutter/string-similarity
|
106
107
|
licenses:
|
@@ -121,8 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
122
|
- !ruby/object:Gem::Version
|
122
123
|
version: '0'
|
123
124
|
requirements: []
|
124
|
-
|
125
|
-
rubygems_version: 2.6.14
|
125
|
+
rubygems_version: 3.1.2
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: Various methods for calculating string similarities.
|