jaccard 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -8
- data/README.md +10 -0
- data/lib/jaccard.rb +5 -2
- metadata +82 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a7f59e59910e3a93f27753822076ab579651c5b138406d71802779c14996726b
|
4
|
+
data.tar.gz: 711d75975eca08a4d6f1ac79d4926bef9d359baa6e80bab2bd54c2f8929259e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 979b053c4a4ca1fe294d532fc0d53bbdfef7d4d61ad644ed114181526095a85a961ae69e7a65208659ac84e685b6ab8679312e9141671bfbb81a61fb1090ef0a
|
7
|
+
data.tar.gz: 41e8b09e279c6afb490ebb30bf834bfd5bccc1dc19d29b6d3bb021c66b50841b1ac51a2e96257d5c31b9de671f60aeeb9c63059d50ba2fdeea13ed068024b070
|
data/Gemfile
CHANGED
@@ -1,9 +1,5 @@
|
|
1
|
-
source
|
1
|
+
source "https://rubygems.org"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
gem "rspec", "> 2"
|
7
|
-
gem "autotest"
|
8
|
-
gem "ruby-debug", :platform => :ruby_18
|
9
|
-
gem "ruby-debug19", :platform => :ruby_19
|
3
|
+
ruby ">= 1.9.2"
|
4
|
+
|
5
|
+
gemspec
|
data/README.md
CHANGED
@@ -8,6 +8,9 @@ Examples
|
|
8
8
|
|
9
9
|
Calculate how similar two sets are:
|
10
10
|
|
11
|
+
```ruby
|
12
|
+
require 'jaccard'
|
13
|
+
|
11
14
|
a = ["likes:jeans", "likes:blue"]
|
12
15
|
b = ["likes:jeans", "likes:women", "likes:red"]
|
13
16
|
c = ["likes:women", "likes:red"]
|
@@ -23,29 +26,36 @@ Calculate how similar two sets are:
|
|
23
26
|
#=> 0.6666666666666666
|
24
27
|
|
25
28
|
# According to the input data, b and c have the most similar likes.
|
29
|
+
```
|
26
30
|
|
27
31
|
We can also extract the distance quite easily:
|
28
32
|
|
33
|
+
```ruby
|
29
34
|
Jaccard.distance(a, b)
|
30
35
|
#=> 0.75
|
36
|
+
```
|
31
37
|
|
32
38
|
The Jaccard distance is the inverse relation of the coefficient: `1 - coefficient`.
|
33
39
|
|
34
40
|
Find out which set is closest to a given set of attributes (return a value where the distance is the minimum):
|
35
41
|
|
42
|
+
```ruby
|
36
43
|
Jaccard.closest_to(a, [b, c])
|
37
44
|
#=> ["likes:jeans", "likes:women", "likes:red"]
|
38
45
|
|
39
46
|
Jaccard.closest_to(b, [a, c])
|
40
47
|
#=> ["likes:women", "likes:red"]
|
48
|
+
```
|
41
49
|
|
42
50
|
Finally, we can find the best pair in a set:
|
43
51
|
|
52
|
+
```ruby
|
44
53
|
require "pp"
|
45
54
|
pp Jaccard.best_match([a, b, c])
|
46
55
|
# [["likes:jeans", "likes:women", "likes:red"],
|
47
56
|
# ["likes:women", "likes:red"]]
|
48
57
|
#=> nil
|
58
|
+
```
|
49
59
|
|
50
60
|
Notes on scalability
|
51
61
|
====================
|
data/lib/jaccard.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
# We must keep this due to Ruby 2.7 being supported
|
2
|
+
# rubocop:disable Lint/RedundantRequireStatement
|
1
3
|
require "set"
|
4
|
+
# rubocop:enable Lint/RedundantRequireStatement
|
2
5
|
|
3
6
|
# Helpers to calculate the Jaccard Coefficient Index and related metrics easily.
|
4
7
|
#
|
@@ -36,7 +39,7 @@ module Jaccard
|
|
36
39
|
raise ArgumentError, "#{a.inspect} does not implement #+" unless a.respond_to?(:+)
|
37
40
|
|
38
41
|
intersection = a & b
|
39
|
-
union
|
42
|
+
union = a + b
|
40
43
|
|
41
44
|
# Set does not implement #uniq or #uniq! since elements are
|
42
45
|
# always guaranteed to be present only once. That's the only
|
@@ -109,6 +112,6 @@ module Jaccard
|
|
109
112
|
end
|
110
113
|
end
|
111
114
|
|
112
|
-
matches.
|
115
|
+
matches.min.last
|
113
116
|
end
|
114
117
|
end
|
metadata
CHANGED
@@ -1,38 +1,95 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jaccard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- François Beausoleil
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2023-06-20 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 13.0.6
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '14.0'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 13.0.6
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '14.0'
|
14
33
|
- !ruby/object:Gem::Dependency
|
15
34
|
name: rspec
|
16
|
-
requirement:
|
17
|
-
none: false
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
18
36
|
requirements:
|
19
|
-
- -
|
37
|
+
- - ">="
|
20
38
|
- !ruby/object:Gem::Version
|
21
39
|
version: 1.2.9
|
40
|
+
- - "<"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '4.0'
|
22
43
|
type: :development
|
23
44
|
prerelease: false
|
24
|
-
version_requirements:
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.9
|
50
|
+
- - "<"
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '4.0'
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: standardrb
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.0.1
|
60
|
+
- - "<"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '2.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.0.1
|
70
|
+
- - "<"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '2.0'
|
25
73
|
- !ruby/object:Gem::Dependency
|
26
74
|
name: yard
|
27
|
-
requirement:
|
28
|
-
none: false
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
29
76
|
requirements:
|
30
|
-
- -
|
77
|
+
- - ">="
|
31
78
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
79
|
+
version: 0.9.34
|
80
|
+
- - "<"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.0'
|
33
83
|
type: :development
|
34
84
|
prerelease: false
|
35
|
-
version_requirements:
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.9.34
|
90
|
+
- - "<"
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '1.0'
|
36
93
|
description: The Jaccard Coefficient Index is a measure of how similar two sets are.
|
37
94
|
This library makes calculating the coefficient very easy, and provides useful helpers.
|
38
95
|
email: francois@teksol.info
|
@@ -40,33 +97,31 @@ executables: []
|
|
40
97
|
extensions: []
|
41
98
|
extra_rdoc_files: []
|
42
99
|
files:
|
43
|
-
- lib/jaccard.rb
|
44
|
-
- README.md
|
45
|
-
- LICENSE
|
46
100
|
- Gemfile
|
101
|
+
- LICENSE
|
102
|
+
- README.md
|
103
|
+
- lib/jaccard.rb
|
47
104
|
homepage: http://github.com/francois/jaccard
|
48
|
-
licenses:
|
49
|
-
|
105
|
+
licenses:
|
106
|
+
- MIT
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
50
109
|
rdoc_options: []
|
51
110
|
require_paths:
|
52
111
|
- lib
|
53
112
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
-
none: false
|
55
113
|
requirements:
|
56
|
-
- -
|
114
|
+
- - ">="
|
57
115
|
- !ruby/object:Gem::Version
|
58
116
|
version: '0'
|
59
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
-
none: false
|
61
118
|
requirements:
|
62
|
-
- -
|
119
|
+
- - ">="
|
63
120
|
- !ruby/object:Gem::Version
|
64
121
|
version: '0'
|
65
122
|
requirements: []
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
specification_version: 3
|
123
|
+
rubygems_version: 3.4.10
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
70
126
|
summary: A library to make calculating the Jaccard Coefficient Index a snap
|
71
127
|
test_files: []
|
72
|
-
has_rdoc:
|