string-similarity 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +5 -3
- data/CHANGELOG.md +8 -1
- data/lib/string/similarity.rb +30 -7
- data/lib/string/similarity/version.rb +1 -1
- data/renovate.json +5 -0
- data/string-similarity.gemspec +2 -2
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 48616210ccc1800386ed7330f6895b7f44eb34008fd7a5e6e27da083c27e9d44
|
4
|
+
data.tar.gz: e0e63ad2772ae5632670f1453a2462b9f481346dc5d9067401fd65edb0ce759d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aaaeda7e3961cd455ed4f6679410f4064f4e92d6bd590cd93534a404d502fa2064f8aef858589a03985c8276d46faf6706cc1745784686c5688b52503ef9c5a9
|
7
|
+
data.tar.gz: 76e6389bd17d93a81f3b403a2dee3eedb398affa668e6567568e9e4491b7dc360b50b8f624a1857b8086ac44215a50ecf20ecf51675367d7041a33054d5b5c02
|
data/.travis.yml
CHANGED
@@ -2,14 +2,16 @@ language: ruby
|
|
2
2
|
cache: bundler
|
3
3
|
|
4
4
|
rvm:
|
5
|
-
- 2.
|
6
|
-
- 2.
|
7
|
-
- 2.
|
5
|
+
- 2.7
|
6
|
+
- 2.6
|
7
|
+
- 2.5
|
8
8
|
- ruby-head
|
9
9
|
matrix:
|
10
10
|
allow_failures:
|
11
11
|
- rvm: ruby-head
|
12
12
|
|
13
|
+
before_install:
|
14
|
+
- gem install bundler
|
13
15
|
before_script:
|
14
16
|
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
15
17
|
- chmod +x ./cc-test-reporter
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [2.1.0] - 2020-03-17
|
10
|
+
### Added
|
11
|
+
- Extended `cosine` to allow usage of N-grams (thanks @imustafin)
|
12
|
+
### Fixed
|
13
|
+
- updated outdated development dependencies
|
14
|
+
|
9
15
|
## [2.0.1] - 2017-11-22
|
10
16
|
### Fixed
|
11
17
|
- `require 'string-similarity'` now actually loads the module. (thanks @wppurking)
|
@@ -32,7 +38,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
32
38
|
- Proper Documentation
|
33
39
|
|
34
40
|
|
35
|
-
[Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.0
|
41
|
+
[Unreleased]: https://github.com/mhutter/string-similarity/compare/v2.1.0...HEAD
|
42
|
+
[2.1.0]: https://github.com/mhutter/string-similarity/compare/v2.0.1...v2.2.0
|
36
43
|
[2.0.1]: https://github.com/mhutter/string-similarity/compare/v2.0.0...v2.0.1
|
37
44
|
[2.0.0]: https://github.com/mhutter/string-similarity/compare/v1.1.1...v2.0.0
|
38
45
|
[1.1.1]: https://github.com/mhutter/string-similarity/compare/v1.1.0...v1.1.1
|
data/lib/string/similarity.rb
CHANGED
@@ -12,17 +12,20 @@ module String::Similarity
|
|
12
12
|
#
|
13
13
|
# @param str1 [String] first string
|
14
14
|
# @param str2 [String] second string
|
15
|
+
# @param ngram [Int] how many characters at once to use
|
15
16
|
# @return [Float] cosine similarity of the two arguments.
|
16
17
|
# - +1.0+ if the strings are identical
|
17
18
|
# - +0.0+ if the strings are completely different
|
18
19
|
# - +0.0+ if one of the strings is empty
|
19
|
-
def self.cosine(str1, str2)
|
20
|
+
def self.cosine(str1, str2, ngram: 1)
|
21
|
+
raise ArgumentError.new('ngram should be >= 1') if ngram < 1
|
22
|
+
|
20
23
|
return 1.0 if str1 == str2
|
21
24
|
return 0.0 if str1.empty? || str2.empty?
|
22
25
|
|
23
26
|
# convert both texts to vectors
|
24
|
-
v1 = vector(str1)
|
25
|
-
v2 = vector(str2)
|
27
|
+
v1 = vector(str1, ngram)
|
28
|
+
v2 = vector(str2, ngram)
|
26
29
|
|
27
30
|
# calculate the dot product
|
28
31
|
dot_product = dot(v1, v2)
|
@@ -94,13 +97,33 @@ module String::Similarity
|
|
94
97
|
end
|
95
98
|
|
96
99
|
# create a vector from +str+
|
100
|
+
# keys have a special format:
|
101
|
+
# '[left padding, right padding, "string"]'
|
97
102
|
#
|
98
103
|
# @example
|
99
|
-
# v1 = vector('
|
100
|
-
# v1["x"] # => 0
|
101
|
-
|
104
|
+
# v1 = vector('aba', 1) # => {'[0, 0, "a"]' => 2, '[0, 0, "b"]' => 1}
|
105
|
+
# v1['[0, 0, "x"]'] # => 0
|
106
|
+
# @example
|
107
|
+
# vector('abacaba', 2) # => {
|
108
|
+
# # '[1, 0, "a"]' => 1,
|
109
|
+
# # '[0, 0, "ab"]' => 2,
|
110
|
+
# # '[0, 0, "ba"]' => 2,
|
111
|
+
# # '[0, 0, "ac"]' => 1,
|
112
|
+
# # '[0, 0, "ca"]' => 1
|
113
|
+
# # '[0, 1, "a"]' => 1
|
114
|
+
# # }
|
115
|
+
def self.vector(str, ngram)
|
102
116
|
v = Hash.new(0)
|
103
|
-
|
117
|
+
|
118
|
+
((1 - ngram)..(str.length - 1)).each do |i|
|
119
|
+
before = [-i, 0].max
|
120
|
+
after = [ngram - (str.length - i), 0].max
|
121
|
+
slice = str[[i, 0].max .. [i + ngram - 1, str.length - 1].min]
|
122
|
+
key = [before, after, slice].to_s
|
123
|
+
|
124
|
+
v[key] += 1
|
125
|
+
end
|
126
|
+
|
104
127
|
v
|
105
128
|
end
|
106
129
|
|
data/string-similarity.gemspec
CHANGED
@@ -32,8 +32,8 @@ This gem provides some methods for calculating similarities of two strings.
|
|
32
32
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
33
33
|
spec.require_paths = ['lib']
|
34
34
|
|
35
|
-
spec.add_development_dependency 'bundler', '~>
|
36
|
-
spec.add_development_dependency 'rake', '~>
|
35
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
36
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
37
37
|
spec.add_development_dependency 'rspec'
|
38
38
|
spec.add_development_dependency 'pry'
|
39
39
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string-similarity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Hutter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-03-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '2.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +101,7 @@ files:
|
|
101
101
|
- lib/string/similarity.rb
|
102
102
|
- lib/string/similarity/version.rb
|
103
103
|
- lib/string/similarity_refinements.rb
|
104
|
+
- renovate.json
|
104
105
|
- string-similarity.gemspec
|
105
106
|
homepage: https://github.com/mhutter/string-similarity
|
106
107
|
licenses:
|
@@ -121,8 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
122
|
- !ruby/object:Gem::Version
|
122
123
|
version: '0'
|
123
124
|
requirements: []
|
124
|
-
|
125
|
-
rubygems_version: 2.6.14
|
125
|
+
rubygems_version: 3.1.2
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: Various methods for calculating string similarities.
|