jaccard 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -8
- data/README.md +10 -0
- data/lib/jaccard.rb +5 -2
- metadata +82 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: a7f59e59910e3a93f27753822076ab579651c5b138406d71802779c14996726b
|
4
|
+
data.tar.gz: 711d75975eca08a4d6f1ac79d4926bef9d359baa6e80bab2bd54c2f8929259e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 979b053c4a4ca1fe294d532fc0d53bbdfef7d4d61ad644ed114181526095a85a961ae69e7a65208659ac84e685b6ab8679312e9141671bfbb81a61fb1090ef0a
|
7
|
+
data.tar.gz: 41e8b09e279c6afb490ebb30bf834bfd5bccc1dc19d29b6d3bb021c66b50841b1ac51a2e96257d5c31b9de671f60aeeb9c63059d50ba2fdeea13ed068024b070
|
data/Gemfile
CHANGED
@@ -1,9 +1,5 @@
|
|
1
|
-
source
|
1
|
+
source "https://rubygems.org"
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
gem "rspec", "> 2"
|
7
|
-
gem "autotest"
|
8
|
-
gem "ruby-debug", :platform => :ruby_18
|
9
|
-
gem "ruby-debug19", :platform => :ruby_19
|
3
|
+
ruby ">= 1.9.2"
|
4
|
+
|
5
|
+
gemspec
|
data/README.md
CHANGED
@@ -8,6 +8,9 @@ Examples
|
|
8
8
|
|
9
9
|
Calculate how similar two sets are:
|
10
10
|
|
11
|
+
```ruby
|
12
|
+
require 'jaccard'
|
13
|
+
|
11
14
|
a = ["likes:jeans", "likes:blue"]
|
12
15
|
b = ["likes:jeans", "likes:women", "likes:red"]
|
13
16
|
c = ["likes:women", "likes:red"]
|
@@ -23,29 +26,36 @@ Calculate how similar two sets are:
|
|
23
26
|
#=> 0.6666666666666666
|
24
27
|
|
25
28
|
# According to the input data, b and c have the most similar likes.
|
29
|
+
```
|
26
30
|
|
27
31
|
We can also extract the distance quite easily:
|
28
32
|
|
33
|
+
```ruby
|
29
34
|
Jaccard.distance(a, b)
|
30
35
|
#=> 0.75
|
36
|
+
```
|
31
37
|
|
32
38
|
The Jaccard distance is the inverse relation of the coefficient: `1 - coefficient`.
|
33
39
|
|
34
40
|
Find out which set is closest to a given set of attributes (return a value where the distance is the minimum):
|
35
41
|
|
42
|
+
```ruby
|
36
43
|
Jaccard.closest_to(a, [b, c])
|
37
44
|
#=> ["likes:jeans", "likes:women", "likes:red"]
|
38
45
|
|
39
46
|
Jaccard.closest_to(b, [a, c])
|
40
47
|
#=> ["likes:women", "likes:red"]
|
48
|
+
```
|
41
49
|
|
42
50
|
Finally, we can find the best pair in a set:
|
43
51
|
|
52
|
+
```ruby
|
44
53
|
require "pp"
|
45
54
|
pp Jaccard.best_match([a, b, c])
|
46
55
|
# [["likes:jeans", "likes:women", "likes:red"],
|
47
56
|
# ["likes:women", "likes:red"]]
|
48
57
|
#=> nil
|
58
|
+
```
|
49
59
|
|
50
60
|
Notes on scalability
|
51
61
|
====================
|
data/lib/jaccard.rb
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
+
# We must keep this due to Ruby 2.7 being supported
|
2
|
+
# rubocop:disable Lint/RedundantRequireStatement
|
1
3
|
require "set"
|
4
|
+
# rubocop:enable Lint/RedundantRequireStatement
|
2
5
|
|
3
6
|
# Helpers to calculate the Jaccard Coefficient Index and related metrics easily.
|
4
7
|
#
|
@@ -36,7 +39,7 @@ module Jaccard
|
|
36
39
|
raise ArgumentError, "#{a.inspect} does not implement #+" unless a.respond_to?(:+)
|
37
40
|
|
38
41
|
intersection = a & b
|
39
|
-
union
|
42
|
+
union = a + b
|
40
43
|
|
41
44
|
# Set does not implement #uniq or #uniq! since elements are
|
42
45
|
# always guaranteed to be present only once. That's the only
|
@@ -109,6 +112,6 @@ module Jaccard
|
|
109
112
|
end
|
110
113
|
end
|
111
114
|
|
112
|
-
matches.
|
115
|
+
matches.min.last
|
113
116
|
end
|
114
117
|
end
|
metadata
CHANGED
@@ -1,38 +1,95 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jaccard
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- François Beausoleil
|
9
|
-
autorequire:
|
8
|
+
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2023-06-20 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 13.0.6
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '14.0'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 13.0.6
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '14.0'
|
14
33
|
- !ruby/object:Gem::Dependency
|
15
34
|
name: rspec
|
16
|
-
requirement:
|
17
|
-
none: false
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
18
36
|
requirements:
|
19
|
-
- -
|
37
|
+
- - ">="
|
20
38
|
- !ruby/object:Gem::Version
|
21
39
|
version: 1.2.9
|
40
|
+
- - "<"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '4.0'
|
22
43
|
type: :development
|
23
44
|
prerelease: false
|
24
|
-
version_requirements:
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.9
|
50
|
+
- - "<"
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '4.0'
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: standardrb
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.0.1
|
60
|
+
- - "<"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '2.0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.0.1
|
70
|
+
- - "<"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '2.0'
|
25
73
|
- !ruby/object:Gem::Dependency
|
26
74
|
name: yard
|
27
|
-
requirement:
|
28
|
-
none: false
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
29
76
|
requirements:
|
30
|
-
- -
|
77
|
+
- - ">="
|
31
78
|
- !ruby/object:Gem::Version
|
32
|
-
version:
|
79
|
+
version: 0.9.34
|
80
|
+
- - "<"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.0'
|
33
83
|
type: :development
|
34
84
|
prerelease: false
|
35
|
-
version_requirements:
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.9.34
|
90
|
+
- - "<"
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '1.0'
|
36
93
|
description: The Jaccard Coefficient Index is a measure of how similar two sets are.
|
37
94
|
This library makes calculating the coefficient very easy, and provides useful helpers.
|
38
95
|
email: francois@teksol.info
|
@@ -40,33 +97,31 @@ executables: []
|
|
40
97
|
extensions: []
|
41
98
|
extra_rdoc_files: []
|
42
99
|
files:
|
43
|
-
- lib/jaccard.rb
|
44
|
-
- README.md
|
45
|
-
- LICENSE
|
46
100
|
- Gemfile
|
101
|
+
- LICENSE
|
102
|
+
- README.md
|
103
|
+
- lib/jaccard.rb
|
47
104
|
homepage: http://github.com/francois/jaccard
|
48
|
-
licenses:
|
49
|
-
|
105
|
+
licenses:
|
106
|
+
- MIT
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
50
109
|
rdoc_options: []
|
51
110
|
require_paths:
|
52
111
|
- lib
|
53
112
|
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
-
none: false
|
55
113
|
requirements:
|
56
|
-
- -
|
114
|
+
- - ">="
|
57
115
|
- !ruby/object:Gem::Version
|
58
116
|
version: '0'
|
59
117
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
60
|
-
none: false
|
61
118
|
requirements:
|
62
|
-
- -
|
119
|
+
- - ">="
|
63
120
|
- !ruby/object:Gem::Version
|
64
121
|
version: '0'
|
65
122
|
requirements: []
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
specification_version: 3
|
123
|
+
rubygems_version: 3.4.10
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
70
126
|
summary: A library to make calculating the Jaccard Coefficient Index a snap
|
71
127
|
test_files: []
|
72
|
-
has_rdoc:
|