classifier 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,408 @@
1
+ # rbs_inline: enabled
2
+
3
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
4
+ # Copyright:: Copyright (c) 2024 Lucas Carlson
5
+ # License:: LGPL
6
+
7
+ require 'json'
8
+
9
+ module Classifier
10
+ # TF-IDF vectorizer: transforms text to weighted feature vectors.
11
+ # Downweights common words, upweights discriminative terms.
12
+ #
13
+ # Example:
14
+ # tfidf = Classifier::TFIDF.new
15
+ # tfidf.fit(["Dogs are great pets", "Cats are independent"])
16
+ # tfidf.transform("Dogs are loyal") # => {:dog=>0.7071..., :loyal=>0.7071...}
17
+ #
18
+ class TFIDF
19
+ include Streaming
20
+
21
+ # @rbs @min_df: Integer | Float
22
+ # @rbs @max_df: Integer | Float
23
+ # @rbs @ngram_range: Array[Integer]
24
+ # @rbs @sublinear_tf: bool
25
+ # @rbs @vocabulary: Hash[Symbol, Integer]
26
+ # @rbs @idf: Hash[Symbol, Float]
27
+ # @rbs @num_documents: Integer
28
+ # @rbs @fitted: bool
29
+ # @rbs @dirty: bool
30
+ # @rbs @storage: Storage::Base?
31
+
32
+ attr_reader :vocabulary, :idf, :num_documents
33
+ attr_accessor :storage
34
+
35
+ # Creates a new TF-IDF vectorizer.
36
+ # - min_df/max_df: filter terms by document frequency (Integer for count, Float for proportion)
37
+ # - ngram_range: [1,1] for unigrams, [1,2] for unigrams+bigrams
38
+ # - sublinear_tf: use 1 + log(tf) instead of raw term frequency
39
+ #
40
+ # @rbs (?min_df: Integer | Float, ?max_df: Integer | Float,
41
+ # ?ngram_range: Array[Integer], ?sublinear_tf: bool) -> void
42
+ def initialize(min_df: 1, max_df: 1.0, ngram_range: [1, 1], sublinear_tf: false)
43
+ validate_df!(min_df, 'min_df')
44
+ validate_df!(max_df, 'max_df')
45
+ validate_ngram_range!(ngram_range)
46
+
47
+ @min_df = min_df
48
+ @max_df = max_df
49
+ @ngram_range = ngram_range
50
+ @sublinear_tf = sublinear_tf
51
+ @vocabulary = {}
52
+ @idf = {}
53
+ @num_documents = 0
54
+ @fitted = false
55
+ @dirty = false
56
+ @storage = nil
57
+ end
58
+
59
+ # Learns vocabulary and IDF weights from the corpus.
60
+ # @rbs (Array[String]) -> self
61
+ def fit(documents)
62
+ raise ArgumentError, 'documents must be an array' unless documents.is_a?(Array)
63
+ raise ArgumentError, 'documents cannot be empty' if documents.empty?
64
+
65
+ @num_documents = documents.size
66
+ document_frequencies = Hash.new(0)
67
+
68
+ documents.each do |doc|
69
+ terms = extract_terms(doc)
70
+ terms.each_key { |term| document_frequencies[term] += 1 }
71
+ end
72
+
73
+ @vocabulary = {}
74
+ @idf = {}
75
+ vocab_index = 0
76
+
77
+ document_frequencies.each do |term, df|
78
+ next unless within_df_bounds?(df, @num_documents)
79
+
80
+ @vocabulary[term] = vocab_index
81
+ vocab_index += 1
82
+
83
+ # IDF: log((N + 1) / (df + 1)) + 1
84
+ @idf[term] = Math.log((@num_documents + 1).to_f / (df + 1)) + 1
85
+ end
86
+
87
+ @fitted = true
88
+ @dirty = true
89
+ self
90
+ end
91
+
92
+ # Transforms a document into a normalized TF-IDF vector.
93
+ # @rbs (String) -> Hash[Symbol, Float]
94
+ def transform(document)
95
+ raise NotFittedError, 'TFIDF has not been fitted. Call fit first.' unless @fitted
96
+
97
+ terms = extract_terms(document)
98
+ result = {} #: Hash[Symbol, Float]
99
+
100
+ terms.each do |term, tf|
101
+ next unless @vocabulary.key?(term)
102
+
103
+ tf_value = @sublinear_tf && tf.positive? ? 1 + Math.log(tf) : tf.to_f
104
+ result[term] = (tf_value * @idf[term]).to_f
105
+ end
106
+
107
+ normalize_vector(result)
108
+ end
109
+
110
+ # Fits and transforms in one step.
111
+ # @rbs (Array[String]) -> Array[Hash[Symbol, Float]]
112
+ def fit_transform(documents)
113
+ fit(documents)
114
+ documents.map { |doc| transform(doc) }
115
+ end
116
+
117
+ # Returns vocabulary terms in index order.
118
+ # @rbs () -> Array[Symbol]
119
+ def feature_names
120
+ @vocabulary.keys.sort_by { |term| @vocabulary[term] }
121
+ end
122
+
123
+ # @rbs () -> bool
124
+ def fitted?
125
+ @fitted
126
+ end
127
+
128
+ # Returns true if there are unsaved changes.
129
+ # @rbs () -> bool
130
+ def dirty?
131
+ @dirty
132
+ end
133
+
134
+ # Saves the vectorizer to the configured storage.
135
+ # @rbs () -> void
136
+ def save
137
+ raise ArgumentError, 'No storage configured' unless storage
138
+
139
+ storage.write(to_json)
140
+ @dirty = false
141
+ end
142
+
143
+ # Saves the vectorizer state to a file.
144
+ # @rbs (String) -> Integer
145
+ def save_to_file(path)
146
+ result = File.write(path, to_json)
147
+ @dirty = false
148
+ result
149
+ end
150
+
151
+ # Loads a vectorizer from the configured storage.
152
+ # @rbs (storage: Storage::Base) -> TFIDF
153
+ def self.load(storage:)
154
+ data = storage.read
155
+ raise StorageError, 'No saved state found' unless data
156
+
157
+ instance = from_json(data)
158
+ instance.storage = storage
159
+ instance
160
+ end
161
+
162
+ # Loads a vectorizer from a file.
163
+ # @rbs (String) -> TFIDF
164
+ def self.load_from_file(path)
165
+ from_json(File.read(path))
166
+ end
167
+
168
+ # Reloads the vectorizer from storage, raising if there are unsaved changes.
169
+ # @rbs () -> self
170
+ def reload
171
+ raise ArgumentError, 'No storage configured' unless storage
172
+ raise UnsavedChangesError, 'Unsaved changes would be lost. Call save first or use reload!' if @dirty
173
+
174
+ data = storage.read
175
+ raise StorageError, 'No saved state found' unless data
176
+
177
+ restore_from_json(data)
178
+ @dirty = false
179
+ self
180
+ end
181
+
182
+ # Force reloads the vectorizer from storage, discarding any unsaved changes.
183
+ # @rbs () -> self
184
+ def reload!
185
+ raise ArgumentError, 'No storage configured' unless storage
186
+
187
+ data = storage.read
188
+ raise StorageError, 'No saved state found' unless data
189
+
190
+ restore_from_json(data)
191
+ @dirty = false
192
+ self
193
+ end
194
+
195
+ # @rbs (?untyped) -> Hash[Symbol, untyped]
196
+ def as_json(_options = nil)
197
+ {
198
+ version: 1,
199
+ type: 'tfidf',
200
+ min_df: @min_df,
201
+ max_df: @max_df,
202
+ ngram_range: @ngram_range,
203
+ sublinear_tf: @sublinear_tf,
204
+ vocabulary: @vocabulary,
205
+ idf: @idf,
206
+ num_documents: @num_documents,
207
+ fitted: @fitted
208
+ }
209
+ end
210
+
211
+ # @rbs (?untyped) -> String
212
+ def to_json(_options = nil)
213
+ JSON.generate(as_json)
214
+ end
215
+
216
+ # Loads a vectorizer from JSON.
217
+ # @rbs (String | Hash[String, untyped]) -> TFIDF
218
+ def self.from_json(json)
219
+ data = json.is_a?(String) ? JSON.parse(json) : json
220
+ raise ArgumentError, "Invalid vectorizer type: #{data['type']}" unless data['type'] == 'tfidf'
221
+
222
+ instance = new(
223
+ min_df: data['min_df'],
224
+ max_df: data['max_df'],
225
+ ngram_range: data['ngram_range'],
226
+ sublinear_tf: data['sublinear_tf']
227
+ )
228
+
229
+ instance.instance_variable_set(:@vocabulary, symbolize_keys(data['vocabulary']))
230
+ instance.instance_variable_set(:@idf, symbolize_keys(data['idf']))
231
+ instance.instance_variable_set(:@num_documents, data['num_documents'])
232
+ instance.instance_variable_set(:@fitted, data['fitted'])
233
+ instance.instance_variable_set(:@dirty, false)
234
+ instance.instance_variable_set(:@storage, nil)
235
+
236
+ instance
237
+ end
238
+
239
+ # @rbs () -> Array[untyped]
240
+ def marshal_dump
241
+ [@min_df, @max_df, @ngram_range, @sublinear_tf, @vocabulary, @idf, @num_documents, @fitted]
242
+ end
243
+
244
+ # @rbs (Array[untyped]) -> void
245
+ def marshal_load(data)
246
+ @min_df, @max_df, @ngram_range, @sublinear_tf, @vocabulary, @idf, @num_documents, @fitted = data
247
+ @dirty = false
248
+ @storage = nil
249
+ end
250
+
251
+ # Loads a vectorizer from a checkpoint.
252
+ #
253
+ # @rbs (storage: Storage::Base, checkpoint_id: String) -> TFIDF
254
+ def self.load_checkpoint(storage:, checkpoint_id:)
255
+ raise ArgumentError, 'Storage must be File storage for checkpoints' unless storage.is_a?(Storage::File)
256
+
257
+ dir = File.dirname(storage.path)
258
+ base = File.basename(storage.path, '.*')
259
+ ext = File.extname(storage.path)
260
+ checkpoint_path = File.join(dir, "#{base}_checkpoint_#{checkpoint_id}#{ext}")
261
+
262
+ checkpoint_storage = Storage::File.new(path: checkpoint_path)
263
+ instance = load(storage: checkpoint_storage)
264
+ instance.storage = storage
265
+ instance
266
+ end
267
+
268
+ # Fits the vectorizer from an IO stream.
269
+ # Collects all documents from the stream, then fits the model.
270
+ # Note: All documents must be collected in memory for IDF calculation.
271
+ #
272
+ # @example Fit from a file
273
+ # tfidf.fit_from_stream(File.open('corpus.txt'))
274
+ #
275
+ # @example With progress tracking
276
+ # tfidf.fit_from_stream(io, batch_size: 500) do |progress|
277
+ # puts "#{progress.completed} documents loaded"
278
+ # end
279
+ #
280
+ # @rbs (IO, ?batch_size: Integer) { (Streaming::Progress) -> void } -> self
281
+ def fit_from_stream(io, batch_size: Streaming::DEFAULT_BATCH_SIZE)
282
+ reader = Streaming::LineReader.new(io, batch_size: batch_size)
283
+ total = reader.estimate_line_count
284
+ progress = Streaming::Progress.new(total: total)
285
+
286
+ documents = [] #: Array[String]
287
+
288
+ reader.each_batch do |batch|
289
+ documents.concat(batch)
290
+ progress.completed += batch.size
291
+ progress.current_batch += 1
292
+ yield progress if block_given?
293
+ end
294
+
295
+ fit(documents) unless documents.empty?
296
+ self
297
+ end
298
+
299
+ # TFIDF doesn't support train_from_stream (use fit_from_stream instead).
300
+ # This method raises NotImplementedError with guidance.
301
+ #
302
+ # @rbs (*untyped, **untyped) -> void
303
+ def train_from_stream(*) # steep:ignore
304
+ raise NotImplementedError, 'TFIDF uses fit_from_stream instead of train_from_stream'
305
+ end
306
+
307
+ # TFIDF doesn't support train_batch (use fit instead).
308
+ # This method raises NotImplementedError with guidance.
309
+ #
310
+ # @rbs (*untyped, **untyped) -> void
311
+ def train_batch(*) # steep:ignore
312
+ raise NotImplementedError, 'TFIDF uses fit instead of train_batch'
313
+ end
314
+
315
+ private
316
+
317
+ # Restores vectorizer state from JSON string.
318
+ # @rbs (String) -> void
319
+ def restore_from_json(json)
320
+ data = JSON.parse(json)
321
+
322
+ @min_df = data['min_df']
323
+ @max_df = data['max_df']
324
+ @ngram_range = data['ngram_range']
325
+ @sublinear_tf = data['sublinear_tf']
326
+ @vocabulary = self.class.send(:symbolize_keys, data['vocabulary'])
327
+ @idf = self.class.send(:symbolize_keys, data['idf'])
328
+ @num_documents = data['num_documents']
329
+ @fitted = data['fitted']
330
+ end
331
+
332
+ # @rbs (String) -> Hash[Symbol, Integer]
333
+ def extract_terms(document)
334
+ result = Hash.new(0)
335
+
336
+ if @ngram_range[0] <= 1
337
+ word_hash = document.clean_word_hash
338
+ word_hash.each { |term, count| result[term] += count }
339
+ end
340
+
341
+ return result if @ngram_range[1] <= 1
342
+
343
+ tokens = tokenize_for_ngrams(document)
344
+ (2..@ngram_range[1]).each do |n|
345
+ next if n < @ngram_range[0]
346
+
347
+ generate_ngrams(tokens, n).each { |ngram| result[ngram] += 1 }
348
+ end
349
+
350
+ result
351
+ end
352
+
353
+ # @rbs (String) -> Array[String]
354
+ def tokenize_for_ngrams(document)
355
+ document
356
+ .gsub(/[^\w\s]/, '')
357
+ .split
358
+ .map(&:downcase)
359
+ .reject { |w| w.length <= 2 || String::CORPUS_SKIP_WORDS.include?(w) }
360
+ .map(&:stem)
361
+ end
362
+
363
+ # @rbs (Array[String], Integer) -> Array[Symbol]
364
+ def generate_ngrams(tokens, n) # rubocop:disable Naming/MethodParameterName
365
+ return [] if tokens.size < n
366
+
367
+ tokens.each_cons(n).map { |gram| gram.join('_').intern }
368
+ end
369
+
370
+ # @rbs (Integer, Integer) -> bool
371
+ def within_df_bounds?(doc_freq, num_docs)
372
+ doc_freq.between?(
373
+ @min_df.is_a?(Float) ? (@min_df * num_docs).ceil : @min_df,
374
+ @max_df.is_a?(Float) ? (@max_df * num_docs).floor : @max_df
375
+ )
376
+ end
377
+
378
+ # @rbs (Hash[Symbol, Float]) -> Hash[Symbol, Float]
379
+ def normalize_vector(vector)
380
+ return vector if vector.empty?
381
+
382
+ magnitude = Math.sqrt(vector.values.sum { |v| v * v })
383
+ return vector if magnitude.zero?
384
+
385
+ vector.transform_values { |v| v / magnitude }
386
+ end
387
+
388
+ # @rbs (Integer | Float, String) -> void
389
+ def validate_df!(value, name)
390
+ raise ArgumentError, "#{name} must be an Integer or Float" unless value.is_a?(Float) || value.is_a?(Integer)
391
+ raise ArgumentError, "#{name} must be between 0.0 and 1.0" if value.is_a?(Float) && !value.between?(0.0, 1.0)
392
+ raise ArgumentError, "#{name} must be non-negative" if value.is_a?(Integer) && value.negative?
393
+ end
394
+
395
+ # @rbs (Array[Integer]) -> void
396
+ def validate_ngram_range!(range)
397
+ raise ArgumentError, 'ngram_range must be an array of two integers' unless range.is_a?(Array) && range.size == 2
398
+ raise ArgumentError, 'ngram_range values must be positive integers' unless range.all?(Integer) && range.all?(&:positive?)
399
+ raise ArgumentError, 'ngram_range[0] must be <= ngram_range[1]' if range[0] > range[1]
400
+ end
401
+
402
+ # @rbs (Hash[String, untyped]) -> Hash[Symbol, untyped]
403
+ def self.symbolize_keys(hash)
404
+ hash.transform_keys(&:to_sym)
405
+ end
406
+ private_class_method :symbolize_keys
407
+ end
408
+ end
data/lib/classifier.rb CHANGED
@@ -25,7 +25,13 @@
25
25
  # License:: LGPL
26
26
 
27
27
  require 'rubygems'
28
+ require 'classifier/errors'
29
+ require 'classifier/storage'
30
+ require 'classifier/streaming'
28
31
  require 'classifier/extensions/string'
29
32
  require 'classifier/extensions/vector'
30
33
  require 'classifier/bayes'
31
34
  require 'classifier/lsi'
35
+ require 'classifier/knn'
36
+ require 'classifier/tfidf'
37
+ require 'classifier/logistic_regression'
@@ -0,0 +1,4 @@
1
+ module JSON
2
+ def self.parse: (String source, ?symbolize_names: bool) -> untyped
3
+ def self.generate: (untyped obj) -> String
4
+ end
@@ -1,26 +1,37 @@
1
1
  # Type stubs for matrix gem
2
- class Vector[T]
2
+ # Using untyped elements since our usage is primarily with Floats/Numerics
3
+ class Vector
3
4
  EPSILON: Float
4
5
 
5
- def self.[]: [T] (*T) -> Vector[T]
6
+ def self.[]: (*untyped) -> Vector
6
7
  def size: () -> Integer
7
- def []: (Integer) -> T
8
+ def []: (Integer) -> untyped
8
9
  def magnitude: () -> Float
9
- def normalize: () -> Vector[T]
10
- def each: () { (T) -> void } -> void
11
- def collect: [U] () { (T) -> U } -> Vector[U]
12
- def to_a: () -> Array[T]
10
+ def normalize: () -> Vector
11
+ def each: () { (untyped) -> void } -> void
12
+ def collect: () { (untyped) -> untyped } -> Vector
13
+ def to_a: () -> Array[untyped]
13
14
  def *: (untyped) -> untyped
15
+ def -: (Vector) -> Vector
16
+ def is_a?: (untyped) -> bool
14
17
  end
15
18
 
16
- class Matrix[T]
17
- def self.rows: [T] (Array[Array[T]]) -> Matrix[T]
18
- def self.[]: [T] (*Array[T]) -> Matrix[T]
19
- def self.diag: (untyped) -> Matrix[untyped]
20
- def trans: () -> Matrix[T]
19
+ class Matrix
20
+ def self.rows: (Array[Array[untyped]]) -> Matrix
21
+ def self.[]: (*Array[untyped]) -> Matrix
22
+ def self.diag: (untyped) -> Matrix
23
+ def self.columns: (Array[Array[untyped]]) -> Matrix
24
+ def self.empty: (Integer, Integer) -> Matrix
25
+ def self.zero: (Integer, Integer) -> Matrix
26
+ def self.vstack: (Matrix, Matrix) -> Matrix
27
+ def trans: () -> Matrix
28
+ def transpose: () -> Matrix
21
29
  def *: (untyped) -> untyped
22
30
  def row_size: () -> Integer
23
31
  def column_size: () -> Integer
24
- def column: (Integer) -> Vector[T]
25
- def SV_decomp: () -> [Matrix[T], Matrix[T], untyped]
32
+ def row: (Integer) -> Vector
33
+ def column: (Integer) -> Vector
34
+ def SV_decomp: () -> [Matrix, Matrix, untyped]
35
+ def is_a?: (untyped) -> bool
36
+ def respond_to?: (Symbol) -> bool
26
37
  end
@@ -0,0 +1,16 @@
1
+ # Type stubs for mutex_m gem
2
+ module Mutex_m
3
+ def mu_initialize: () -> void
4
+ def mu_lock: () -> void
5
+ def mu_unlock: () -> void
6
+ def mu_synchronize: [T] () { () -> T } -> T
7
+ def mu_try_lock: () -> bool
8
+ def mu_locked?: () -> bool
9
+
10
+ # Aliases
11
+ alias lock mu_lock
12
+ alias unlock mu_unlock
13
+ alias synchronize mu_synchronize
14
+ alias try_lock mu_try_lock
15
+ alias locked? mu_locked?
16
+ end
@@ -0,0 +1,14 @@
1
+ # Type stubs for Streaming module
2
+ # Defines the interface that including classes must implement
3
+
4
+ module Classifier
5
+ # Interface for classes that include Streaming
6
+ interface _StreamingHost
7
+ def storage: () -> Storage::Base?
8
+ def storage=: (Storage::Base?) -> void
9
+ def save: () -> void
10
+ end
11
+
12
+ module Streaming : _StreamingHost
13
+ end
14
+ end
data/test/test_helper.rb CHANGED
@@ -12,4 +12,6 @@ $LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib")
12
12
 
13
13
  require 'minitest'
14
14
  require 'minitest/autorun'
15
+ require 'tmpdir'
16
+ require 'json'
15
17
  require 'classifier'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
@@ -107,10 +107,28 @@ dependencies:
107
107
  - - ">="
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
- description: A general classifier module to allow Bayesian and other types of classifications.
110
+ - !ruby/object:Gem::Dependency
111
+ name: rake-compiler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ description: A Ruby library for text classification featuring Naive Bayes, LSI (Latent
125
+ Semantic Indexing), Logistic Regression, and k-Nearest Neighbors classifiers. Includes
126
+ TF-IDF vectorization, streaming/incremental training, pluggable persistence backends,
127
+ thread safety, and a native C extension for fast LSI operations.
111
128
  email: lucas@rufy.com
112
129
  executables: []
113
- extensions: []
130
+ extensions:
131
+ - ext/classifier/extconf.rb
114
132
  extra_rdoc_files: []
115
133
  files:
116
134
  - CLAUDE.md
@@ -118,24 +136,49 @@ files:
118
136
  - README.md
119
137
  - bin/bayes.rb
120
138
  - bin/summarize.rb
139
+ - ext/classifier/classifier_ext.c
140
+ - ext/classifier/extconf.rb
141
+ - ext/classifier/incremental_svd.c
142
+ - ext/classifier/linalg.h
143
+ - ext/classifier/matrix.c
144
+ - ext/classifier/svd.c
145
+ - ext/classifier/vector.c
121
146
  - lib/classifier.rb
122
147
  - lib/classifier/bayes.rb
148
+ - lib/classifier/errors.rb
123
149
  - lib/classifier/extensions/string.rb
124
150
  - lib/classifier/extensions/vector.rb
125
- - lib/classifier/extensions/vector_serialize.rb
126
151
  - lib/classifier/extensions/word_hash.rb
152
+ - lib/classifier/knn.rb
153
+ - lib/classifier/logistic_regression.rb
127
154
  - lib/classifier/lsi.rb
128
155
  - lib/classifier/lsi/content_node.rb
156
+ - lib/classifier/lsi/incremental_svd.rb
129
157
  - lib/classifier/lsi/summary.rb
130
158
  - lib/classifier/lsi/word_list.rb
159
+ - lib/classifier/storage.rb
160
+ - lib/classifier/storage/base.rb
161
+ - lib/classifier/storage/file.rb
162
+ - lib/classifier/storage/memory.rb
163
+ - lib/classifier/streaming.rb
164
+ - lib/classifier/streaming/line_reader.rb
165
+ - lib/classifier/streaming/progress.rb
166
+ - lib/classifier/tfidf.rb
131
167
  - sig/vendor/fast_stemmer.rbs
132
168
  - sig/vendor/gsl.rbs
169
+ - sig/vendor/json.rbs
133
170
  - sig/vendor/matrix.rbs
171
+ - sig/vendor/mutex_m.rbs
172
+ - sig/vendor/streaming.rbs
134
173
  - test/test_helper.rb
135
- homepage: https://github.com/cardmagic/classifier
174
+ homepage: https://rubyclassifier.com
136
175
  licenses:
137
176
  - LGPL
138
- metadata: {}
177
+ metadata:
178
+ documentation_uri: https://rubyclassifier.com/docs
179
+ source_code_uri: https://github.com/cardmagic/classifier
180
+ bug_tracker_uri: https://github.com/cardmagic/classifier/issues
181
+ changelog_uri: https://github.com/cardmagic/classifier/releases
139
182
  rdoc_options: []
140
183
  require_paths:
141
184
  - lib
@@ -143,7 +186,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
143
186
  requirements:
144
187
  - - ">="
145
188
  - !ruby/object:Gem::Version
146
- version: '0'
189
+ version: '3.1'
147
190
  required_rubygems_version: !ruby/object:Gem::Requirement
148
191
  requirements:
149
192
  - - ">="
@@ -152,5 +195,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
152
195
  requirements: []
153
196
  rubygems_version: 4.0.3
154
197
  specification_version: 4
155
- summary: A general classifier module to allow Bayesian and other types of classifications.
198
+ summary: Text classification with Bayesian, LSI, Logistic Regression, kNN, and TF-IDF
199
+ vectorization.
156
200
  test_files: []
@@ -1,18 +0,0 @@
1
- module GSL
2
- class Vector
3
- def _dump(_v)
4
- Marshal.dump(to_a)
5
- end
6
-
7
- def self._load(arr)
8
- arry = Marshal.load(arr)
9
- GSL::Vector.alloc(arry)
10
- end
11
- end
12
-
13
- class Matrix
14
- class << self
15
- alias diag diagonal
16
- end
17
- end
18
- end