doc_sim 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 829be4b580fb7cfe5d0d1014e4b70aa9404606f284c929a6be1bf1272b0d6f5c
4
- data.tar.gz: d1cda541ae00d55ba0d0bd6aa073993ae2ed70c1f7778870576926adcf1e0b61
3
+ metadata.gz: d347e8691cd16dac62f9ae78c1a8cda24e8372e24948cc3a081f7bf881c24fb6
4
+ data.tar.gz: 4fc1200c3388a67832fb97022ea13ff99174be2460f07d3e39d39b2d4162d84c
5
5
  SHA512:
6
- metadata.gz: 8f842f40a808d33564e617fad4c562c68acc35f232358ccfd6f9c1c82445d57137cd7f91f945f4b8da0df65f97d6deddebf3a4d201c39944a9cee7ff2918176d
7
- data.tar.gz: 7ec44e6bcf2e5cc8339344599a3c841776aa593bd0370525f7b9ca6b2748e38fe03ac855519afcfbc0f3d8c0f8633a50d9d12d8f78e6dcf274029dc67b72a65f
6
+ metadata.gz: 2e98e60ed5cb2a7ceca3a5112c7f787bb48621cf5f8566ac3d75ea422ff202412c6dd2c0976a21f0ab2fd33cb0cd1d571765ab5694a3ff9c09fd6c658c7eb972
7
+ data.tar.gz: a143cf5b4ba9a0319b9c31f66107cc0bd7234a2cfd434cad97e90d809482736548e1ce98cb0d273ebdb5baec356b245f55390015ca89a4d608c791e1498926c3
@@ -0,0 +1,9 @@
1
+ ---
2
+ name: ast
3
+ version: '2.4'
4
+ source:
5
+ type: git
6
+ name: ruby/gem_rbs_collection
7
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
8
+ remote: https://github.com/ruby/gem_rbs_collection.git
9
+ repo_dir: gems
@@ -0,0 +1,73 @@
1
+ module AST
2
+ interface _ToAst
3
+ def to_ast: () -> Node
4
+ end
5
+
6
+ interface _ToSym
7
+ def to_sym: () -> Symbol
8
+ end
9
+
10
+ class Node
11
+ public
12
+
13
+ attr_reader children: Array[untyped]
14
+ attr_reader hash: String
15
+ attr_reader type: Symbol
16
+
17
+ alias + concat
18
+
19
+ alias << append
20
+
21
+ def ==: (untyped other) -> bool
22
+
23
+ def append: (untyped element) -> self
24
+
25
+ alias clone dup
26
+
27
+ def concat: (_ToA[untyped] array) -> self
28
+
29
+ def dup: () -> self
30
+
31
+ def eql?: (untyped other) -> bool
32
+
33
+ def inspect: (?Integer indent) -> String
34
+
35
+ alias to_a children
36
+
37
+ def to_ast: () -> self
38
+
39
+ alias to_s to_sexp
40
+
41
+ def to_sexp: (?Integer indent) -> String
42
+
43
+ def to_sexp_array: () -> Array[untyped]
44
+
45
+ def updated: (?_ToSym? `type`, ?_ToA[untyped]? children, ?Hash[Symbol, untyped]? properties) -> self
46
+
47
+ private
48
+
49
+ def initialize: (_ToSym `type`, ?_ToA[untyped]? children, ?Hash[Symbol, untyped] properties) -> void
50
+
51
+ alias original_dup dup
52
+ end
53
+
54
+ class Processor
55
+ include Mixin
56
+
57
+ module Mixin
58
+ public
59
+
60
+ def handler_missing: (Node node) -> Node?
61
+
62
+ def process: (_ToAst? node) -> Node?
63
+
64
+ def process_all: (Array[_ToAst] nodes) -> Array[Node]
65
+ end
66
+ end
67
+
68
+ module Sexp
69
+ public
70
+
71
+ def s: (_ToSym `type`, *untyped children) -> Node
72
+ end
73
+ end
@@ -0,0 +1,9 @@
1
+ ---
2
+ name: parallel
3
+ version: '1.20'
4
+ source:
5
+ type: git
6
+ name: ruby/gem_rbs_collection
7
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
8
+ remote: https://github.com/ruby/gem_rbs_collection.git
9
+ repo_dir: gems
@@ -0,0 +1,86 @@
1
+ module Parallel
2
+ class Break < StandardError
3
+ attr_reader value: untyped
4
+
5
+ def initialize: (?untyped value) -> void
6
+ end
7
+
8
+ class Kill < Break
9
+ end
10
+
11
+ type stop = Object
12
+ Stop: stop
13
+
14
+ VERSION: String
15
+
16
+ Version: String
17
+
18
+ type callable_source[T] = ^() -> (T | stop)
19
+
20
+ def self.all?: [T] (Enumerable[T] | callable_source[T] source,
21
+ ?in_processes: Integer,
22
+ ?in_threads: Integer,
23
+ ?progress: _ToStr,
24
+ ?start: ^(T item, Integer index) -> void,
25
+ ?finish: ^(T item, Integer index, boolish result) -> void) { (T) -> boolish } -> bool
26
+
27
+ def self.any?: [T] (Enumerable[T] | callable_source[T] source,
28
+ ?in_processes: Integer,
29
+ ?in_threads: Integer,
30
+ ?progress: _ToStr,
31
+ ?start: ^(T item, Integer index) -> void,
32
+ ?finish: ^(T item, Integer index, boolish result) -> void) { (T) -> boolish } -> bool
33
+
34
+ def self.each: [T, U] (Enumerable[T] source,
35
+ ?in_processes: Integer,
36
+ ?in_threads: Integer,
37
+ ?progress: _ToStr,
38
+ ?start: ^(T item, Integer index) -> void,
39
+ ?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> Enumerable[U]
40
+ | [T, U] (callable_source[T] source,
41
+ ?in_processes: Integer,
42
+ ?in_threads: Integer,
43
+ ?progress: _ToStr,
44
+ ?start: ^(T item, Integer index) -> void,
45
+ ?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> callable_source[T]
46
+
47
+ def self.each_with_index: [T, U] (Enumerable[T] source,
48
+ ?in_processes: Integer,
49
+ ?in_threads: Integer,
50
+ ?progress: _ToStr,
51
+ ?start: ^(T item, Integer index) -> void,
52
+ ?finish: ^(T item, Integer index, U result) -> void) { (T, Integer) -> U } -> Enumerable[T]
53
+ | [T, U] (callable_source[T] array,
54
+ ?in_processes: Integer,
55
+ ?in_threads: Integer,
56
+ ?progress: _ToStr,
57
+ ?start: ^(T item, Integer index) -> void,
58
+ ?finish: ^(T item, Integer index, U result) -> void) { (T, Integer) -> U } -> callable_source[U]
59
+
60
+ def self.flat_map: [T, U] (Enumerable[T] | callable_source[T] src,
61
+ ?in_processes: Integer,
62
+ ?in_threads: Integer,
63
+ ?progress: _ToStr,
64
+ ?start: ^(T item, Integer index) -> void,
65
+ ?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> Array[U]
66
+
67
+ def self.map: [T, U] (Enumerable[T] | callable_source[T] | Thread::Queue source,
68
+ ?in_processes: Integer,
69
+ ?in_threads: Integer,
70
+ ?progress: _ToStr,
71
+ ?start: ^(T item, Integer index) -> void,
72
+ ?finish: ^(T item, Integer index, U result) -> void) { (T) -> U } -> Array[U]
73
+
74
+ def self.map_with_index: [T, U] (Enumerable[T] | callable_source[T] source,
75
+ ?in_processes: Integer,
76
+ ?in_threads: Integer,
77
+ ?progress: _ToStr,
78
+ ?start: ^(T item, Integer index) -> untyped,
79
+ ?finish: ^(T item, Integer index, U result) -> untyped) { (T, Integer) -> U } -> Array[U]
80
+
81
+ def self.physical_processor_count: () -> Integer
82
+
83
+ def self.processor_count: () -> Integer
84
+
85
+ def self.worker_number: () -> Integer
86
+ end
@@ -0,0 +1,9 @@
1
+ ---
2
+ name: rainbow
3
+ version: '3.0'
4
+ source:
5
+ type: git
6
+ name: ruby/gem_rbs_collection
7
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
8
+ remote: https://github.com/ruby/gem_rbs_collection.git
9
+ repo_dir: gems
@@ -0,0 +1,7 @@
1
+ module Rainbow
2
+ def self.enabled: () -> bool
3
+
4
+ def self.enabled=: (bool value) -> bool
5
+
6
+ def self.uncolor: (String string) -> String
7
+ end
@@ -0,0 +1,209 @@
1
+ module Rainbow
2
+ class Presenter < String
3
+ # Sets color of this text.
4
+ def color: (*Symbol values) -> instance
5
+
6
+ alias foreground color
7
+
8
+ alias fg color
9
+
10
+ # Sets background color of this text.
11
+ def background: (*Symbol values) -> instance
12
+
13
+ alias bg background
14
+
15
+ # Resets terminal to default colors/backgrounds.
16
+ #
17
+ # It shouldn't be needed to use this method because all methods
18
+ # append terminal reset code to end of string.
19
+ def reset: () -> instance
20
+
21
+ # Turns on bright/bold for this text.
22
+ def bright: () -> instance
23
+
24
+ alias bold bright
25
+
26
+ # Turns on faint/dark for this text (not well supported by terminal
27
+ # emulators).
28
+ def faint: () -> instance
29
+
30
+ # Turns on italic style for this text (not well supported by terminal
31
+ # emulators).
32
+ def italic: () -> instance
33
+
34
+ # Turns on underline decoration for this text.
35
+ def underline: () -> instance
36
+
37
+ # Turns on blinking attribute for this text (not well supported by terminal
38
+ # emulators).
39
+ def blink: () -> instance
40
+
41
+ # Inverses current foreground/background colors.
42
+ def inverse: () -> instance
43
+
44
+ # Hides this text (set its color to the same as background).
45
+ def hide: () -> instance
46
+
47
+ def black: () -> instance
48
+
49
+ def red: () -> instance
50
+
51
+ def green: () -> instance
52
+
53
+ def yellow: () -> instance
54
+
55
+ def blue: () -> instance
56
+
57
+ def magenta: () -> instance
58
+
59
+ def cyan: () -> instance
60
+
61
+ def white: () -> instance
62
+
63
+ # We take care of X11 color method call here.
64
+ # Such as #aqua, #ghostwhite.
65
+ def method_missing: (untyped method_name, *untyped args) -> untyped
66
+
67
+ def respond_to_missing?: (untyped method_name, *untyped args) -> bool
68
+
69
+ def wrap_with_sgr: (untyped codes) -> instance
70
+
71
+ def aliceblue: () -> instance
72
+ def antiquewhite: () -> instance
73
+ def aqua: () -> instance
74
+ def aquamarine: () -> instance
75
+ def azure: () -> instance
76
+ def beige: () -> instance
77
+ def bisque: () -> instance
78
+ def blanchedalmond: () -> instance
79
+ def blueviolet: () -> instance
80
+ def brown: () -> instance
81
+ def burlywood: () -> instance
82
+ def cadetblue: () -> instance
83
+ def chartreuse: () -> instance
84
+ def chocolate: () -> instance
85
+ def coral: () -> instance
86
+ def cornflower: () -> instance
87
+ def cornsilk: () -> instance
88
+ def crimson: () -> instance
89
+ def darkblue: () -> instance
90
+ def darkcyan: () -> instance
91
+ def darkgoldenrod: () -> instance
92
+ def darkgray: () -> instance
93
+ def darkgreen: () -> instance
94
+ def darkkhaki: () -> instance
95
+ def darkmagenta: () -> instance
96
+ def darkolivegreen: () -> instance
97
+ def darkorange: () -> instance
98
+ def darkorchid: () -> instance
99
+ def darkred: () -> instance
100
+ def darksalmon: () -> instance
101
+ def darkseagreen: () -> instance
102
+ def darkslateblue: () -> instance
103
+ def darkslategray: () -> instance
104
+ def darkturquoise: () -> instance
105
+ def darkviolet: () -> instance
106
+ def deeppink: () -> instance
107
+ def deepskyblue: () -> instance
108
+ def dimgray: () -> instance
109
+ def dodgerblue: () -> instance
110
+ def firebrick: () -> instance
111
+ def floralwhite: () -> instance
112
+ def forestgreen: () -> instance
113
+ def fuchsia: () -> instance
114
+ def gainsboro: () -> instance
115
+ def ghostwhite: () -> instance
116
+ def gold: () -> instance
117
+ def goldenrod: () -> instance
118
+ def gray: () -> instance
119
+ def greenyellow: () -> instance
120
+ def honeydew: () -> instance
121
+ def hotpink: () -> instance
122
+ def indianred: () -> instance
123
+ def indigo: () -> instance
124
+ def ivory: () -> instance
125
+ def khaki: () -> instance
126
+ def lavender: () -> instance
127
+ def lavenderblush: () -> instance
128
+ def lawngreen: () -> instance
129
+ def lemonchiffon: () -> instance
130
+ def lightblue: () -> instance
131
+ def lightcoral: () -> instance
132
+ def lightcyan: () -> instance
133
+ def lightgoldenrod: () -> instance
134
+ def lightgray: () -> instance
135
+ def lightgreen: () -> instance
136
+ def lightpink: () -> instance
137
+ def lightsalmon: () -> instance
138
+ def lightseagreen: () -> instance
139
+ def lightskyblue: () -> instance
140
+ def lightslategray: () -> instance
141
+ def lightsteelblue: () -> instance
142
+ def lightyellow: () -> instance
143
+ def lime: () -> instance
144
+ def limegreen: () -> instance
145
+ def linen: () -> instance
146
+ def maroon: () -> instance
147
+ def mediumaquamarine: () -> instance
148
+ def mediumblue: () -> instance
149
+ def mediumorchid: () -> instance
150
+ def mediumpurple: () -> instance
151
+ def mediumseagreen: () -> instance
152
+ def mediumslateblue: () -> instance
153
+ def mediumspringgreen: () -> instance
154
+ def mediumturquoise: () -> instance
155
+ def mediumvioletred: () -> instance
156
+ def midnightblue: () -> instance
157
+ def mintcream: () -> instance
158
+ def mistyrose: () -> instance
159
+ def moccasin: () -> instance
160
+ def navajowhite: () -> instance
161
+ def navyblue: () -> instance
162
+ def oldlace: () -> instance
163
+ def olive: () -> instance
164
+ def olivedrab: () -> instance
165
+ def orange: () -> instance
166
+ def orangered: () -> instance
167
+ def orchid: () -> instance
168
+ def palegoldenrod: () -> instance
169
+ def palegreen: () -> instance
170
+ def paleturquoise: () -> instance
171
+ def palevioletred: () -> instance
172
+ def papayawhip: () -> instance
173
+ def peachpuff: () -> instance
174
+ def peru: () -> instance
175
+ def pink: () -> instance
176
+ def plum: () -> instance
177
+ def powderblue: () -> instance
178
+ def purple: () -> instance
179
+ def rebeccapurple: () -> instance
180
+ def rosybrown: () -> instance
181
+ def royalblue: () -> instance
182
+ def saddlebrown: () -> instance
183
+ def salmon: () -> instance
184
+ def sandybrown: () -> instance
185
+ def seagreen: () -> instance
186
+ def seashell: () -> instance
187
+ def sienna: () -> instance
188
+ def silver: () -> instance
189
+ def skyblue: () -> instance
190
+ def slateblue: () -> instance
191
+ def slategray: () -> instance
192
+ def snow: () -> instance
193
+ def springgreen: () -> instance
194
+ def steelblue: () -> instance
195
+ def tan: () -> instance
196
+ def teal: () -> instance
197
+ def thistle: () -> instance
198
+ def tomato: () -> instance
199
+ def turquoise: () -> instance
200
+ def violet: () -> instance
201
+ def webgray: () -> instance
202
+ def webgreen: () -> instance
203
+ def webmaroon: () -> instance
204
+ def webpurple: () -> instance
205
+ def wheat: () -> instance
206
+ def whitesmoke: () -> instance
207
+ def yellowgreen: () -> instance
208
+ end
209
+ end
@@ -0,0 +1,5 @@
1
+ class Object
2
+ private
3
+
4
+ def Rainbow: (String) -> Rainbow::Presenter
5
+ end
data/README.md CHANGED
@@ -1,18 +1,16 @@
1
- # Document Similarity - Efficient probablistic algorithm for calculating document similarity
1
+ # Doc Sim - Efficient algorithm for calculating approximate document similarity
2
2
 
3
3
  A Ruby implementation of [Mining of Massive Datasets](http://www.mmds.org/)'s document similarity algorithm. It uses Minhash and Localitiy Sensitive Hashing to efficiently find documents with a high probability of being similar.
4
4
 
5
5
  ## Installation
6
6
 
7
- TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
8
-
9
7
  Install the gem and add to the application's Gemfile by executing:
10
8
 
11
- $ bundle add UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG
9
+ $ bundle add doc_sim
12
10
 
13
11
  If bundler is not being used to manage dependencies, install the gem by executing:
14
12
 
15
- $ gem install UPDATE_WITH_YOUR_GEM_NAME_PRIOR_TO_RELEASE_TO_RUBYGEMS_ORG
13
+ $ gem install doc_sim
16
14
 
17
15
  ## Usage
18
16
 
data/Rakefile CHANGED
@@ -10,3 +10,5 @@ require "rubocop/rake_task"
10
10
  RuboCop::RakeTask.new
11
11
 
12
12
  task default: %i[spec rubocop]
13
+
14
+ # Steep is not included because it's too unstable as of August 2023
data/Steepfile CHANGED
@@ -1,25 +1,25 @@
1
1
  # frozen_string_literal: true
2
- # D = Steep::Diagnostic
3
- #
4
- # target :lib do
5
- # signature "sig"
6
- #
7
- # check "lib" # Directory name
8
- # check "Gemfile" # File name
9
- # check "app/models/**/*.rb" # Glob
10
- # # ignore "lib/templates/*.rb"
11
- #
12
- # # library "pathname" # Standard libraries
13
- # # library "strong_json" # Gems
14
- #
15
- # # configure_code_diagnostics(D::Ruby.default) # `default` diagnostics setting (applies by default)
16
- # # configure_code_diagnostics(D::Ruby.strict) # `strict` diagnostics setting
17
- # # configure_code_diagnostics(D::Ruby.lenient) # `lenient` diagnostics setting
18
- # # configure_code_diagnostics(D::Ruby.silent) # `silent` diagnostics setting
19
- # # configure_code_diagnostics do |hash| # You can setup everything yourself
20
- # # hash[D::Ruby::NoMethod] = :information
21
- # # end
22
- # end
2
+
3
+ D = Steep::Diagnostic
4
+
5
+ target :lib do
6
+ signature "sig"
7
+
8
+ check "lib" # Directory name
9
+ ignore "Gemfile"
10
+ # ignore "lib/templates/*.rb"
11
+
12
+ # library "pathname" # Standard libraries
13
+ # library "strong_json" # Gems
14
+
15
+ # configure_code_diagnostics(D::Ruby.default) # `default` diagnostics setting (applies by default)
16
+ # configure_code_diagnostics(D::Ruby.strict) # `strict` diagnostics setting
17
+ # configure_code_diagnostics(D::Ruby.lenient) # `lenient` diagnostics setting
18
+ # configure_code_diagnostics(D::Ruby.silent) # `silent` diagnostics setting
19
+ # configure_code_diagnostics do |hash| # You can setup everything yourself
20
+ # hash[D::Ruby::NoMethod] = :information
21
+ # end
22
+ end
23
23
 
24
24
  # target :test do
25
25
  # signature "sig", "sig-private"
@@ -7,6 +7,9 @@ module Minhash
7
7
  class Minhash
8
8
  attr_reader :seed_root
9
9
 
10
+ # Hashes will always be <= 2**32
11
+ HASH_MAX = (2**32) + 1
12
+
10
13
  def initialize(n_hashes = 1, seed_root = rand(2**32))
11
14
  @seed_root = seed_root
12
15
  @hashes = Array.new(n_hashes) do |seed|
@@ -16,11 +19,11 @@ module Minhash
16
19
 
17
20
  # Produces the Minhash signature for a given Set
18
21
  #
19
- # @param set [Set] the set to produce the signature for
22
+ # @param set [Set[String]] the set to produce the signature for
20
23
  #
21
24
  # @return [Array[Integer]] 32 bit integer array of length n_hashes
22
25
  def signature(set)
23
- counter = Array.new(@hashes.length, Float::INFINITY)
26
+ counter = Array.new(@hashes.length, Minhash::HASH_MAX)
24
27
  set.each do |elem|
25
28
  @hashes.each_with_index do |hash_func, i|
26
29
  counter[i] = [counter[i], hash_func.call(elem)].min
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Shingle a document
3
4
  module Shingling
4
5
  def self.shingle(document, k)
5
6
  max_index = document.length - k + 1
6
- max_index.times.map do |i|
7
- document[i...(i + k)]
8
- end
7
+ max_index.times.to_set { |i| document[i...(i + k)] }
9
8
  end
10
9
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DocumentSimilarity
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
@@ -0,0 +1,42 @@
1
+ ---
2
+ sources:
3
+ - type: git
4
+ name: ruby/gem_rbs_collection
5
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
6
+ remote: https://github.com/ruby/gem_rbs_collection.git
7
+ repo_dir: gems
8
+ path: ".gem_rbs_collection"
9
+ gems:
10
+ - name: ast
11
+ version: '2.4'
12
+ source:
13
+ type: git
14
+ name: ruby/gem_rbs_collection
15
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
16
+ remote: https://github.com/ruby/gem_rbs_collection.git
17
+ repo_dir: gems
18
+ - name: base64
19
+ version: '0'
20
+ source:
21
+ type: stdlib
22
+ - name: json
23
+ version: '0'
24
+ source:
25
+ type: stdlib
26
+ - name: parallel
27
+ version: '1.20'
28
+ source:
29
+ type: git
30
+ name: ruby/gem_rbs_collection
31
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
32
+ remote: https://github.com/ruby/gem_rbs_collection.git
33
+ repo_dir: gems
34
+ - name: rainbow
35
+ version: '3.0'
36
+ source:
37
+ type: git
38
+ name: ruby/gem_rbs_collection
39
+ revision: a4c633634493ab7ae73219022f56acff56ab69af
40
+ remote: https://github.com/ruby/gem_rbs_collection.git
41
+ repo_dir: gems
42
+ gemfile_lock_path: Gemfile.lock
@@ -0,0 +1,17 @@
1
+ # Download sources
2
+ sources:
3
+ - name: ruby/gem_rbs_collection
4
+ remote: https://github.com/ruby/gem_rbs_collection.git
5
+ revision: main
6
+ repo_dir: gems
7
+
8
+ # A directory to install the downloaded RBSs
9
+ path: .gem_rbs_collection
10
+
11
+ gems:
12
+ # Skip loading rbs gem's RBS.
13
+ # It's unnecessary if you don't use rbs as a library.
14
+ - name: rbs
15
+ ignore: true
16
+ - name: steep
17
+ ignore: true
@@ -3,14 +3,14 @@
3
3
  # Classes
4
4
  module LocalitySensitiveHashing
5
5
  class LocalitySensitiveHashing
6
- @buckets: Array[Hash[Set[Integer], Array[Integer]]]
6
+ @buckets: Array[Hash[Array[Integer], Array[Integer]]]
7
7
  @n_rows: Integer
8
8
 
9
9
  def initialize: (Integer n_rows, Integer n_bands) -> void
10
- def insert: (Set[Integer] signature, Integer id) -> void
10
+ def insert: (Array[Integer] signature, Integer id) -> void
11
11
  def similar_pairs: -> Set[Array[Integer]]
12
12
 
13
13
  private
14
- def generate_band_bucket: -> Hash[Set[Integer], Array[Integer]]
14
+ def generate_band_bucket: -> Hash[Array[Integer], Array[Integer]]
15
15
  end
16
16
  end
@@ -1,8 +1,13 @@
1
1
  module Minhash
2
2
  # Class for generating Minhash signature
3
3
  class Minhash
4
+ @seed_root: Integer
5
+ @hashes: Array[^(String) -> Integer]
6
+
4
7
  attr_reader seed_root: Integer
5
8
 
9
+ HASH_MAX: Integer
10
+
6
11
  def initialize: (?Integer n_hashes, ?Integer seed_root) -> void
7
12
 
8
13
  # Produces the Minhash signature for a given Set
@@ -1,3 +1,3 @@
1
1
  module Shingling
2
- def self.shingle: (String document, Integer k) -> Array[String]
2
+ def self.shingle: (String document, Integer k) -> Set[String]
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_sim
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Forthoney
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-20 00:00:00.000000000 Z
11
+ date: 2023-08-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: murmurhash3
@@ -73,6 +73,14 @@ executables: []
73
73
  extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
+ - ".gem_rbs_collection/ast/2.4/.rbs_meta.yaml"
77
+ - ".gem_rbs_collection/ast/2.4/ast.rbs"
78
+ - ".gem_rbs_collection/parallel/1.20/.rbs_meta.yaml"
79
+ - ".gem_rbs_collection/parallel/1.20/parallel.rbs"
80
+ - ".gem_rbs_collection/rainbow/3.0/.rbs_meta.yaml"
81
+ - ".gem_rbs_collection/rainbow/3.0/global.rbs"
82
+ - ".gem_rbs_collection/rainbow/3.0/presenter.rbs"
83
+ - ".gem_rbs_collection/rainbow/3.0/rainbow.rbs"
76
84
  - ".rspec"
77
85
  - ".rubocop.yml"
78
86
  - ".ruby-version"
@@ -87,6 +95,8 @@ files:
87
95
  - lib/doc_sim/minhash.rb
88
96
  - lib/doc_sim/shingling.rb
89
97
  - lib/doc_sim/version.rb
98
+ - rbs_collection.lock.yaml
99
+ - rbs_collection.yaml
90
100
  - sig/doc_sim.rbs
91
101
  - sig/doc_sim/locality_sensitive_hashing.rbs
92
102
  - sig/doc_sim/minhash.rbs