doc_sim 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gem_rbs_collection/ast/2.4/.rbs_meta.yaml +9 -0
- data/.gem_rbs_collection/ast/2.4/ast.rbs +73 -0
- data/.gem_rbs_collection/parallel/1.20/.rbs_meta.yaml +9 -0
- data/.gem_rbs_collection/parallel/1.20/parallel.rbs +86 -0
- data/.gem_rbs_collection/rainbow/3.0/.rbs_meta.yaml +9 -0
- data/.gem_rbs_collection/rainbow/3.0/global.rbs +7 -0
- data/.gem_rbs_collection/rainbow/3.0/presenter.rbs +209 -0
- data/.gem_rbs_collection/rainbow/3.0/rainbow.rbs +5 -0
- data/README.md +3 -5
- data/Rakefile +2 -0
- data/Steepfile +21 -21
- data/lib/doc_sim/minhash.rb +5 -2
- data/lib/doc_sim/shingling.rb +2 -3
- data/lib/doc_sim/version.rb +1 -1
- data/rbs_collection.lock.yaml +42 -0
- data/rbs_collection.yaml +17 -0
- data/sig/doc_sim/locality_sensitive_hashing.rbs +3 -3
- data/sig/doc_sim/minhash.rbs +5 -0
- data/sig/doc_sim/shingling.rbs +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d347e8691cd16dac62f9ae78c1a8cda24e8372e24948cc3a081f7bf881c24fb6
|
4
|
+
data.tar.gz: 4fc1200c3388a67832fb97022ea13ff99174be2460f07d3e39d39b2d4162d84c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e98e60ed5cb2a7ceca3a5112c7f787bb48621cf5f8566ac3d75ea422ff202412c6dd2c0976a21f0ab2fd33cb0cd1d571765ab5694a3ff9c09fd6c658c7eb972
|
7
|
+
data.tar.gz: a143cf5b4ba9a0319b9c31f66107cc0bd7234a2cfd434cad97e90d809482736548e1ce98cb0d273ebdb5baec356b245f55390015ca89a4d608c791e1498926c3
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module AST
|
2
|
+
interface _ToAst
|
3
|
+
def to_ast: () -> Node
|
4
|
+
end
|
5
|
+
|
6
|
+
interface _ToSym
|
7
|
+
def to_sym: () -> Symbol
|
8
|
+
end
|
9
|
+
|
10
|
+
class Node
|
11
|
+
public
|
12
|
+
|
13
|
+
attr_reader children: Array[untyped]
|
14
|
+
attr_reader hash: String
|
15
|
+
attr_reader type: Symbol
|
16
|
+
|
17
|
+
alias + concat
|
18
|
+
|
19
|
+
alias << append
|
20
|
+
|
21
|
+
def ==: (untyped other) -> bool
|
22
|
+
|
23
|
+
def append: (untyped element) -> self
|
24
|
+
|
25
|
+
alias clone dup
|
26
|
+
|
27
|
+
def concat: (_ToA[untyped] array) -> self
|
28
|
+
|
29
|
+
def dup: () -> self
|
30
|
+
|
31
|
+
def eql?: (untyped other) -> bool
|
32
|
+
|
33
|
+
def inspect: (?Integer indent) -> String
|
34
|
+
|
35
|
+
alias to_a children
|
36
|
+
|
37
|
+
def to_ast: () -> self
|
38
|
+
|
39
|
+
alias to_s to_sexp
|
40
|
+
|
41
|
+
def to_sexp: (?Integer indent) -> String
|
42
|
+
|
43
|
+
def to_sexp_array: () -> Array[untyped]
|
44
|
+
|
45
|
+
def updated: (?_ToSym? `type`, ?_ToA[untyped]? children, ?Hash[Symbol, untyped]? properties) -> self
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def initialize: (_ToSym `type`, ?_ToA[untyped]? children, ?Hash[Symbol, untyped] properties) -> void
|
50
|
+
|
51
|
+
alias original_dup dup
|
52
|
+
end
|
53
|
+
|
54
|
+
class Processor
|
55
|
+
include Mixin
|
56
|
+
|
57
|
+
module Mixin
|
58
|
+
public
|
59
|
+
|
60
|
+
def handler_missing: (Node node) -> Node?
|
61
|
+
|
62
|
+
def process: (_ToAst? node) -> Node?
|
63
|
+
|
64
|
+
def process_all: (Array[_ToAst] nodes) -> Array[Node]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
module Sexp
|
69
|
+
public
|
70
|
+
|
71
|
+
def s: (_ToSym `type`, *untyped children) -> Node
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Parallel
|
2
|
+
class Break < StandardError
|
3
|
+
attr_reader value: untyped
|
4
|
+
|
5
|
+
def initialize: (?untyped value) -> void
|
6
|
+
end
|
7
|
+
|
8
|
+
class Kill < Break
|
9
|
+
end
|
10
|
+
|
11
|
+
type stop = Object
|
12
|
+
Stop: stop
|
13
|
+
|
14
|
+
VERSION: String
|
15
|
+
|
16
|
+
Version: String
|
17
|
+
|
18
|
+
type callable_source[T] = ^() -> (T | stop)
|
19
|
+
|
20
|
+
def self.all?: [T] (Enumerable[T] | callable_source[T] source,
|
21
|
+
?in_processes: Integer,
|
22
|
+
?in_threads: Integer,
|
23
|
+
?progress: _ToStr,
|
24
|
+
?start: ^(T item, Integer index) -> void,
|
25
|
+
?finish: ^(T item, Integer index, boolish result) -> void) { (T) -> boolish } -> bool
|
26
|
+
|
27
|
+
def self.any?: [T] (Enumerable[T] | callable_source[T] source,
|
28
|
+
?in_processes: Integer,
|
29
|
+
?in_threads: Integer,
|
30
|
+
?progress: _ToStr,
|
31
|
+
?start: ^(T item, Integer index) -> void,
|
32
|
+
?finish: ^(T item, Integer index, boolish result) -> void) { (T) -> boolish } -> bool
|
33
|
+
|
34
|
+
def self.each: [T, U] (Enumerable[T] source,
|
35
|
+
?in_processes: Integer,
|
36
|
+
?in_threads: Integer,
|
37
|
+
?progress: _ToStr,
|
38
|
+
?start: ^(T item, Integer index) -> void,
|
39
|
+
?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> Enumerable[U]
|
40
|
+
| [T, U] (callable_source[T] source,
|
41
|
+
?in_processes: Integer,
|
42
|
+
?in_threads: Integer,
|
43
|
+
?progress: _ToStr,
|
44
|
+
?start: ^(T item, Integer index) -> void,
|
45
|
+
?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> callable_source[T]
|
46
|
+
|
47
|
+
def self.each_with_index: [T, U] (Enumerable[T] source,
|
48
|
+
?in_processes: Integer,
|
49
|
+
?in_threads: Integer,
|
50
|
+
?progress: _ToStr,
|
51
|
+
?start: ^(T item, Integer index) -> void,
|
52
|
+
?finish: ^(T item, Integer index, U result) -> void) { (T, Integer) -> U } -> Enumerable[T]
|
53
|
+
| [T, U] (callable_source[T] array,
|
54
|
+
?in_processes: Integer,
|
55
|
+
?in_threads: Integer,
|
56
|
+
?progress: _ToStr,
|
57
|
+
?start: ^(T item, Integer index) -> void,
|
58
|
+
?finish: ^(T item, Integer index, U result) -> void) { (T, Integer) -> U } -> callable_source[U]
|
59
|
+
|
60
|
+
def self.flat_map: [T, U] (Enumerable[T] | callable_source[T] src,
|
61
|
+
?in_processes: Integer,
|
62
|
+
?in_threads: Integer,
|
63
|
+
?progress: _ToStr,
|
64
|
+
?start: ^(T item, Integer index) -> void,
|
65
|
+
?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> Array[U]
|
66
|
+
|
67
|
+
def self.map: [T, U] (Enumerable[T] | callable_source[T] | Thread::Queue source,
|
68
|
+
?in_processes: Integer,
|
69
|
+
?in_threads: Integer,
|
70
|
+
?progress: _ToStr,
|
71
|
+
?start: ^(T item, Integer index) -> void,
|
72
|
+
?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> Array[U]
|
73
|
+
|
74
|
+
def self.map_with_index: [T, U] (Enumerable[T] | callable_source[T] source,
|
75
|
+
?in_processes: Integer,
|
76
|
+
?in_threads: Integer,
|
77
|
+
?progress: _ToStr,
|
78
|
+
?start: ^(T item, Integer index) -> untyped,
|
79
|
+
?finish: ^(T item, Integer index, U result) -> untyped) { (T, Integer) -> U } -> Array[U]
|
80
|
+
|
81
|
+
def self.physical_processor_count: () -> Integer
|
82
|
+
|
83
|
+
def self.processor_count: () -> Integer
|
84
|
+
|
85
|
+
def self.worker_number: () -> Integer
|
86
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
module Rainbow
|
2
|
+
class Presenter < String
|
3
|
+
# Sets color of this text.
|
4
|
+
def color: (*Symbol values) -> instance
|
5
|
+
|
6
|
+
alias foreground color
|
7
|
+
|
8
|
+
alias fg color
|
9
|
+
|
10
|
+
# Sets background color of this text.
|
11
|
+
def background: (*Symbol values) -> instance
|
12
|
+
|
13
|
+
alias bg background
|
14
|
+
|
15
|
+
# Resets terminal to default colors/backgrounds.
|
16
|
+
#
|
17
|
+
# It shouldn't be needed to use this method because all methods
|
18
|
+
# append terminal reset code to end of string.
|
19
|
+
def reset: () -> instance
|
20
|
+
|
21
|
+
# Turns on bright/bold for this text.
|
22
|
+
def bright: () -> instance
|
23
|
+
|
24
|
+
alias bold bright
|
25
|
+
|
26
|
+
# Turns on faint/dark for this text (not well supported by terminal
|
27
|
+
# emulators).
|
28
|
+
def faint: () -> instance
|
29
|
+
|
30
|
+
# Turns on italic style for this text (not well supported by terminal
|
31
|
+
# emulators).
|
32
|
+
def italic: () -> instance
|
33
|
+
|
34
|
+
# Turns on underline decoration for this text.
|
35
|
+
def underline: () -> instance
|
36
|
+
|
37
|
+
# Turns on blinking attribute for this text (not well supported by terminal
|
38
|
+
# emulators).
|
39
|
+
def blink: () -> instance
|
40
|
+
|
41
|
+
# Inverses current foreground/background colors.
|
42
|
+
def inverse: () -> instance
|
43
|
+
|
44
|
+
# Hides this text (set its color to the same as background).
|
45
|
+
def hide: () -> instance
|
46
|
+
|
47
|
+
def black: () -> instance
|
48
|
+
|
49
|
+
def red: () -> instance
|
50
|
+
|
51
|
+
def green: () -> instance
|
52
|
+
|
53
|
+
def yellow: () -> instance
|
54
|
+
|
55
|
+
def blue: () -> instance
|
56
|
+
|
57
|
+
def magenta: () -> instance
|
58
|
+
|
59
|
+
def cyan: () -> instance
|
60
|
+
|
61
|
+
def white: () -> instance
|
62
|
+
|
63
|
+
# We take care of X11 color method call here.
|
64
|
+
# Such as #aqua, #ghostwhite.
|
65
|
+
def method_missing: (untyped method_name, *untyped args) -> untyped
|
66
|
+
|
67
|
+
def respond_to_missing?: (untyped method_name, *untyped args) -> bool
|
68
|
+
|
69
|
+
def wrap_with_sgr: (untyped codes) -> instance
|
70
|
+
|
71
|
+
def aliceblue: () -> instance
|
72
|
+
def antiquewhite: () -> instance
|
73
|
+
def aqua: () -> instance
|
74
|
+
def aquamarine: () -> instance
|
75
|
+
def azure: () -> instance
|
76
|
+
def beige: () -> instance
|
77
|
+
def bisque: () -> instance
|
78
|
+
def blanchedalmond: () -> instance
|
79
|
+
def blueviolet: () -> instance
|
80
|
+
def brown: () -> instance
|
81
|
+
def burlywood: () -> instance
|
82
|
+
def cadetblue: () -> instance
|
83
|
+
def chartreuse: () -> instance
|
84
|
+
def chocolate: () -> instance
|
85
|
+
def coral: () -> instance
|
86
|
+
def cornflower: () -> instance
|
87
|
+
def cornsilk: () -> instance
|
88
|
+
def crimson: () -> instance
|
89
|
+
def darkblue: () -> instance
|
90
|
+
def darkcyan: () -> instance
|
91
|
+
def darkgoldenrod: () -> instance
|
92
|
+
def darkgray: () -> instance
|
93
|
+
def darkgreen: () -> instance
|
94
|
+
def darkkhaki: () -> instance
|
95
|
+
def darkmagenta: () -> instance
|
96
|
+
def darkolivegreen: () -> instance
|
97
|
+
def darkorange: () -> instance
|
98
|
+
def darkorchid: () -> instance
|
99
|
+
def darkred: () -> instance
|
100
|
+
def darksalmon: () -> instance
|
101
|
+
def darkseagreen: () -> instance
|
102
|
+
def darkslateblue: () -> instance
|
103
|
+
def darkslategray: () -> instance
|
104
|
+
def darkturquoise: () -> instance
|
105
|
+
def darkviolet: () -> instance
|
106
|
+
def deeppink: () -> instance
|
107
|
+
def deepskyblue: () -> instance
|
108
|
+
def dimgray: () -> instance
|
109
|
+
def dodgerblue: () -> instance
|
110
|
+
def firebrick: () -> instance
|
111
|
+
def floralwhite: () -> instance
|
112
|
+
def forestgreen: () -> instance
|
113
|
+
def fuchsia: () -> instance
|
114
|
+
def gainsboro: () -> instance
|
115
|
+
def ghostwhite: () -> instance
|
116
|
+
def gold: () -> instance
|
117
|
+
def goldenrod: () -> instance
|
118
|
+
def gray: () -> instance
|
119
|
+
def greenyellow: () -> instance
|
120
|
+
def honeydew: () -> instance
|
121
|
+
def hotpink: () -> instance
|
122
|
+
def indianred: () -> instance
|
123
|
+
def indigo: () -> instance
|
124
|
+
def ivory: () -> instance
|
125
|
+
def khaki: () -> instance
|
126
|
+
def lavender: () -> instance
|
127
|
+
def lavenderblush: () -> instance
|
128
|
+
def lawngreen: () -> instance
|
129
|
+
def lemonchiffon: () -> instance
|
130
|
+
def lightblue: () -> instance
|
131
|
+
def lightcoral: () -> instance
|
132
|
+
def lightcyan: () -> instance
|
133
|
+
def lightgoldenrod: () -> instance
|
134
|
+
def lightgray: () -> instance
|
135
|
+
def lightgreen: () -> instance
|
136
|
+
def lightpink: () -> instance
|
137
|
+
def lightsalmon: () -> instance
|
138
|
+
def lightseagreen: () -> instance
|
139
|
+
def lightskyblue: () -> instance
|
140
|
+
def lightslategray: () -> instance
|
141
|
+
def lightsteelblue: () -> instance
|
142
|
+
def lightyellow: () -> instance
|
143
|
+
def lime: () -> instance
|
144
|
+
def limegreen: () -> instance
|
145
|
+
def linen: () -> instance
|
146
|
+
def maroon: () -> instance
|
147
|
+
def mediumaquamarine: () -> instance
|
148
|
+
def mediumblue: () -> instance
|
149
|
+
def mediumorchid: () -> instance
|
150
|
+
def mediumpurple: () -> instance
|
151
|
+
def mediumseagreen: () -> instance
|
152
|
+
def mediumslateblue: () -> instance
|
153
|
+
def mediumspringgreen: () -> instance
|
154
|
+
def mediumturquoise: () -> instance
|
155
|
+
def mediumvioletred: () -> instance
|
156
|
+
def midnightblue: () -> instance
|
157
|
+
def mintcream: () -> instance
|
158
|
+
def mistyrose: () -> instance
|
159
|
+
def moccasin: () -> instance
|
160
|
+
def navajowhite: () -> instance
|
161
|
+
def navyblue: () -> instance
|
162
|
+
def oldlace: () -> instance
|
163
|
+
def olive: () -> instance
|
164
|
+
def olivedrab: () -> instance
|
165
|
+
def orange: () -> instance
|
166
|
+
def orangered: () -> instance
|
167
|
+
def orchid: () -> instance
|
168
|
+
def palegoldenrod: () -> instance
|
169
|
+
def palegreen: () -> instance
|
170
|
+
def paleturquoise: () -> instance
|
171
|
+
def palevioletred: () -> instance
|
172
|
+
def papayawhip: () -> instance
|
173
|
+
def peachpuff: () -> instance
|
174
|
+
def peru: () -> instance
|
175
|
+
def pink: () -> instance
|
176
|
+
def plum: () -> instance
|
177
|
+
def powderblue: () -> instance
|
178
|
+
def purple: () -> instance
|
179
|
+
def rebeccapurple: () -> instance
|
180
|
+
def rosybrown: () -> instance
|
181
|
+
def royalblue: () -> instance
|
182
|
+
def saddlebrown: () -> instance
|
183
|
+
def salmon: () -> instance
|
184
|
+
def sandybrown: () -> instance
|
185
|
+
def seagreen: () -> instance
|
186
|
+
def seashell: () -> instance
|
187
|
+
def sienna: () -> instance
|
188
|
+
def silver: () -> instance
|
189
|
+
def skyblue: () -> instance
|
190
|
+
def slateblue: () -> instance
|
191
|
+
def slategray: () -> instance
|
192
|
+
def snow: () -> instance
|
193
|
+
def springgreen: () -> instance
|
194
|
+
def steelblue: () -> instance
|
195
|
+
def tan: () -> instance
|
196
|
+
def teal: () -> instance
|
197
|
+
def thistle: () -> instance
|
198
|
+
def tomato: () -> instance
|
199
|
+
def turquoise: () -> instance
|
200
|
+
def violet: () -> instance
|
201
|
+
def webgray: () -> instance
|
202
|
+
def webgreen: () -> instance
|
203
|
+
def webmaroon: () -> instance
|
204
|
+
def webpurple: () -> instance
|
205
|
+
def wheat: () -> instance
|
206
|
+
def whitesmoke: () -> instance
|
207
|
+
def yellowgreen: () -> instance
|
208
|
+
end
|
209
|
+
end
|
data/README.md
CHANGED
@@ -1,18 +1,16 @@
|
|
1
|
-
#
|
1
|
+
# Doc Sim - Efficient algorithm for calculating approximate document similarity
|
2
2
|
|
3
3
|
A Ruby implementation of [Mining of Massive Datasets](http://www.mmds.org/)'s document similarity algorithm. It uses Minhash and Localitiy Sensitive Hashing to efficiently find documents with a high probability of being similar.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
7
|
-
TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
|
8
|
-
|
9
7
|
Install the gem and add to the application's Gemfile by executing:
|
10
8
|
|
11
|
-
$ bundle add
|
9
|
+
$ bundle add doc_sim
|
12
10
|
|
13
11
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
14
12
|
|
15
|
-
$ gem install
|
13
|
+
$ gem install doc_sim
|
16
14
|
|
17
15
|
## Usage
|
18
16
|
|
data/Rakefile
CHANGED
data/Steepfile
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#
|
11
|
-
|
12
|
-
#
|
13
|
-
#
|
14
|
-
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
|
2
|
+
|
3
|
+
D = Steep::Diagnostic
|
4
|
+
|
5
|
+
target :lib do
|
6
|
+
signature "sig"
|
7
|
+
|
8
|
+
check "lib" # Directory name
|
9
|
+
ignore "Gemfile"
|
10
|
+
# ignore "lib/templates/*.rb"
|
11
|
+
|
12
|
+
# library "pathname" # Standard libraries
|
13
|
+
# library "strong_json" # Gems
|
14
|
+
|
15
|
+
# configure_code_diagnostics(D::Ruby.default) # `default` diagnostics setting (applies by default)
|
16
|
+
# configure_code_diagnostics(D::Ruby.strict) # `strict` diagnostics setting
|
17
|
+
# configure_code_diagnostics(D::Ruby.lenient) # `lenient` diagnostics setting
|
18
|
+
# configure_code_diagnostics(D::Ruby.silent) # `silent` diagnostics setting
|
19
|
+
# configure_code_diagnostics do |hash| # You can setup everything yourself
|
20
|
+
# hash[D::Ruby::NoMethod] = :information
|
21
|
+
# end
|
22
|
+
end
|
23
23
|
|
24
24
|
# target :test do
|
25
25
|
# signature "sig", "sig-private"
|
data/lib/doc_sim/minhash.rb
CHANGED
@@ -7,6 +7,9 @@ module Minhash
|
|
7
7
|
class Minhash
|
8
8
|
attr_reader :seed_root
|
9
9
|
|
10
|
+
# Hashes will always be <= 2**32
|
11
|
+
HASH_MAX = (2**32) + 1
|
12
|
+
|
10
13
|
def initialize(n_hashes = 1, seed_root = rand(2**32))
|
11
14
|
@seed_root = seed_root
|
12
15
|
@hashes = Array.new(n_hashes) do |seed|
|
@@ -16,11 +19,11 @@ module Minhash
|
|
16
19
|
|
17
20
|
# Produces the Minhash signature for a given Set
|
18
21
|
#
|
19
|
-
# @param set [Set] the set to produce the signature for
|
22
|
+
# @param set [Set[String]] the set to produce the signature for
|
20
23
|
#
|
21
24
|
# @return [Array[Integer]] 32 bit integer array of length n_hashes
|
22
25
|
def signature(set)
|
23
|
-
counter = Array.new(@hashes.length,
|
26
|
+
counter = Array.new(@hashes.length, Minhash::HASH_MAX)
|
24
27
|
set.each do |elem|
|
25
28
|
@hashes.each_with_index do |hash_func, i|
|
26
29
|
counter[i] = [counter[i], hash_func.call(elem)].min
|
data/lib/doc_sim/shingling.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
# Shingle a document
|
3
4
|
module Shingling
|
4
5
|
def self.shingle(document, k)
|
5
6
|
max_index = document.length - k + 1
|
6
|
-
max_index.times.
|
7
|
-
document[i...(i + k)]
|
8
|
-
end
|
7
|
+
max_index.times.to_set { |i| document[i...(i + k)] }
|
9
8
|
end
|
10
9
|
end
|
data/lib/doc_sim/version.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
---
|
2
|
+
sources:
|
3
|
+
- type: git
|
4
|
+
name: ruby/gem_rbs_collection
|
5
|
+
revision: a4c633634493ab7ae73219022f56acff56ab69af
|
6
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
7
|
+
repo_dir: gems
|
8
|
+
path: ".gem_rbs_collection"
|
9
|
+
gems:
|
10
|
+
- name: ast
|
11
|
+
version: '2.4'
|
12
|
+
source:
|
13
|
+
type: git
|
14
|
+
name: ruby/gem_rbs_collection
|
15
|
+
revision: a4c633634493ab7ae73219022f56acff56ab69af
|
16
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
17
|
+
repo_dir: gems
|
18
|
+
- name: base64
|
19
|
+
version: '0'
|
20
|
+
source:
|
21
|
+
type: stdlib
|
22
|
+
- name: json
|
23
|
+
version: '0'
|
24
|
+
source:
|
25
|
+
type: stdlib
|
26
|
+
- name: parallel
|
27
|
+
version: '1.20'
|
28
|
+
source:
|
29
|
+
type: git
|
30
|
+
name: ruby/gem_rbs_collection
|
31
|
+
revision: a4c633634493ab7ae73219022f56acff56ab69af
|
32
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
33
|
+
repo_dir: gems
|
34
|
+
- name: rainbow
|
35
|
+
version: '3.0'
|
36
|
+
source:
|
37
|
+
type: git
|
38
|
+
name: ruby/gem_rbs_collection
|
39
|
+
revision: a4c633634493ab7ae73219022f56acff56ab69af
|
40
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
41
|
+
repo_dir: gems
|
42
|
+
gemfile_lock_path: Gemfile.lock
|
data/rbs_collection.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# Download sources
|
2
|
+
sources:
|
3
|
+
- name: ruby/gem_rbs_collection
|
4
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
5
|
+
revision: main
|
6
|
+
repo_dir: gems
|
7
|
+
|
8
|
+
# A directory to install the downloaded RBSs
|
9
|
+
path: .gem_rbs_collection
|
10
|
+
|
11
|
+
gems:
|
12
|
+
# Skip loading rbs gem's RBS.
|
13
|
+
# It's unnecessary if you don't use rbs as a library.
|
14
|
+
- name: rbs
|
15
|
+
ignore: true
|
16
|
+
- name: steep
|
17
|
+
ignore: true
|
@@ -3,14 +3,14 @@
|
|
3
3
|
# Classes
|
4
4
|
module LocalitySensitiveHashing
|
5
5
|
class LocalitySensitiveHashing
|
6
|
-
@buckets: Array[Hash[
|
6
|
+
@buckets: Array[Hash[Array[Integer], Array[Integer]]]
|
7
7
|
@n_rows: Integer
|
8
8
|
|
9
9
|
def initialize: (Integer n_rows, Integer n_bands) -> void
|
10
|
-
def insert: (
|
10
|
+
def insert: (Array[Integer] signature, Integer id) -> void
|
11
11
|
def similar_pairs: -> Set[Array[Integer]]
|
12
12
|
|
13
13
|
private
|
14
|
-
def generate_band_bucket: -> Hash[
|
14
|
+
def generate_band_bucket: -> Hash[Array[Integer], Array[Integer]]
|
15
15
|
end
|
16
16
|
end
|
data/sig/doc_sim/minhash.rbs
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
module Minhash
|
2
2
|
# Class for generating Minhash signature
|
3
3
|
class Minhash
|
4
|
+
@seed_root: Integer
|
5
|
+
@hashes: Array[^(String) -> Integer]
|
6
|
+
|
4
7
|
attr_reader seed_root: Integer
|
5
8
|
|
9
|
+
HASH_MAX: Integer
|
10
|
+
|
6
11
|
def initialize: (?Integer n_hashes, ?Integer seed_root) -> void
|
7
12
|
|
8
13
|
# Produces the Minhash signature for a given Set
|
data/sig/doc_sim/shingling.rbs
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc_sim
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Forthoney
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-08-
|
11
|
+
date: 2023-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: murmurhash3
|
@@ -73,6 +73,14 @@ executables: []
|
|
73
73
|
extensions: []
|
74
74
|
extra_rdoc_files: []
|
75
75
|
files:
|
76
|
+
- ".gem_rbs_collection/ast/2.4/.rbs_meta.yaml"
|
77
|
+
- ".gem_rbs_collection/ast/2.4/ast.rbs"
|
78
|
+
- ".gem_rbs_collection/parallel/1.20/.rbs_meta.yaml"
|
79
|
+
- ".gem_rbs_collection/parallel/1.20/parallel.rbs"
|
80
|
+
- ".gem_rbs_collection/rainbow/3.0/.rbs_meta.yaml"
|
81
|
+
- ".gem_rbs_collection/rainbow/3.0/global.rbs"
|
82
|
+
- ".gem_rbs_collection/rainbow/3.0/presenter.rbs"
|
83
|
+
- ".gem_rbs_collection/rainbow/3.0/rainbow.rbs"
|
76
84
|
- ".rspec"
|
77
85
|
- ".rubocop.yml"
|
78
86
|
- ".ruby-version"
|
@@ -87,6 +95,8 @@ files:
|
|
87
95
|
- lib/doc_sim/minhash.rb
|
88
96
|
- lib/doc_sim/shingling.rb
|
89
97
|
- lib/doc_sim/version.rb
|
98
|
+
- rbs_collection.lock.yaml
|
99
|
+
- rbs_collection.yaml
|
90
100
|
- sig/doc_sim.rbs
|
91
101
|
- sig/doc_sim/locality_sensitive_hashing.rbs
|
92
102
|
- sig/doc_sim/minhash.rbs
|