classifier 1.3.4 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,54 +3,55 @@
3
3
  # License:: LGPL
4
4
 
5
5
  begin
6
- raise LoadError if ENV['NATIVE_VECTOR'] == "true" # to test the native vector class, try `rake test NATIVE_VECTOR=true`
7
-
8
- require 'gsl' # requires http://rb-gsl.rubyforge.org/
9
- require 'classifier/extensions/vector_serialize'
10
- $GSL = true
11
-
6
+ # to test the native vector class, try `rake test NATIVE_VECTOR=true`
7
+ raise LoadError if ENV['NATIVE_VECTOR'] == 'true'
8
+
9
+ require 'gsl' # requires https://github.com/SciRuby/rb-gsl/
10
+ require 'classifier/extensions/vector_serialize'
11
+ $GSL = true
12
12
  rescue LoadError
13
- warn "Notice: for 10x faster LSI support, please install http://rb-gsl.rubyforge.org/"
14
- require 'classifier/extensions/vector'
13
+ warn 'Notice: for 10x faster LSI support, please install https://github.com/SciRuby/rb-gsl/'
14
+ $GSL = false
15
+ require 'classifier/extensions/vector'
15
16
  end
16
-
17
+
17
18
  require 'classifier/lsi/word_list'
18
19
  require 'classifier/lsi/content_node'
19
20
  require 'classifier/lsi/summary'
20
21
 
21
22
  module Classifier
22
-
23
23
  # This class implements a Latent Semantic Indexer, which can search, classify and cluster
24
24
  # data based on underlying semantic relations. For more information on the algorithms used,
25
25
  # please consult Wikipedia[http://en.wikipedia.org/wiki/Latent_Semantic_Indexing].
26
26
  class LSI
27
-
28
27
  attr_reader :word_list
29
28
  attr_accessor :auto_rebuild
30
-
29
+
31
30
  # Create a fresh index.
32
31
  # If you want to call #build_index manually, use
33
32
  # Classifier::LSI.new :auto_rebuild => false
34
33
  #
35
34
  def initialize(options = {})
36
35
  @auto_rebuild = true unless options[:auto_rebuild] == false
37
- @word_list, @items = WordList.new, {}
38
- @version, @built_at_version = 0, -1
36
+ @word_list = WordList.new
37
+ @items = {}
38
+ @version = 0
39
+ @built_at_version = -1
39
40
  end
40
-
41
+
41
42
  # Returns true if the index needs to be rebuilt. The index needs
42
43
  # to be built after all informaton is added, but before you start
43
44
  # using it for search, classification and cluster detection.
44
45
  def needs_rebuild?
45
46
  (@items.keys.size > 1) && (@version != @built_at_version)
46
47
  end
47
-
48
- # Adds an item to the index. item is assumed to be a string, but
48
+
49
+ # Adds an item to the index. item is assumed to be a string, but
49
50
  # any item may be indexed so long as it responds to #to_s or if
50
- # you provide an optional block explaining how the indexer can
51
+ # you provide an optional block explaining how the indexer can
51
52
  # fetch fresh string data. This optional block is passed the item,
52
53
  # so the item may only be a reference to a URL or file name.
53
- #
54
+ #
54
55
  # For example:
55
56
  # lsi = Classifier::LSI.new
56
57
  # lsi.add_item "This is just plain text"
@@ -58,226 +59,252 @@ module Classifier
58
59
  # ar = ActiveRecordObject.find( :all )
59
60
  # lsi.add_item ar, *ar.categories { |x| ar.content }
60
61
  #
61
- def add_item( item, *categories, &block )
62
+ def add_item(item, *categories, &block)
62
63
  clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
63
64
  @items[item] = ContentNode.new(clean_word_hash, *categories)
64
65
  @version += 1
65
66
  build_index if @auto_rebuild
66
67
  end
67
68
 
68
- # A less flexible shorthand for add_item that assumes
69
+ # A less flexible shorthand for add_item that assumes
69
70
  # you are passing in a string with no categorries. item
70
- # will be duck typed via to_s .
71
+ # will be duck typed via to_s .
71
72
  #
72
- def <<( item )
73
- add_item item
73
+ def <<(item)
74
+ add_item(item)
74
75
  end
75
-
76
+
76
77
  # Returns the categories for a given indexed items. You are free to add and remove
77
78
  # items from this as you see fit. It does not invalide an index to change its categories.
78
79
  def categories_for(item)
79
80
  return [] unless @items[item]
80
- return @items[item].categories
81
+
82
+ @items[item].categories
81
83
  end
82
84
 
83
- # Removes an item from the database, if it is indexed.
85
+ # Removes an item from the database, if it is indexed.
84
86
  #
85
- def remove_item( item )
86
- if @items.keys.contain? item
87
- @items.remove item
88
- @version += 1
89
- end
87
+ def remove_item(item)
88
+ return unless @items.key?(item)
89
+
90
+ @items.delete(item)
91
+ @version += 1
90
92
  end
91
-
92
- # Returns an array of items that are indexed.
93
+
94
+ # Returns an array of items that are indexed.
93
95
  def items
94
96
  @items.keys
95
97
  end
96
-
97
- # Returns the categories for a given indexed items. You are free to add and remove
98
- # items from this as you see fit. It does not invalide an index to change its categories.
99
- def categories_for(item)
100
- return [] unless @items[item]
101
- return @items[item].categories
102
- end
103
98
 
104
99
  # This function rebuilds the index if needs_rebuild? returns true.
105
100
  # For very large document spaces, this indexing operation may take some
106
- # time to complete, so it may be wise to place the operation in another
107
- # thread.
101
+ # time to complete, so it may be wise to place the operation in another
102
+ # thread.
108
103
  #
109
104
  # As a rule, indexing will be fairly swift on modern machines until
110
- # you have well over 500 documents indexed, or have an incredibly diverse
111
- # vocabulary for your documents.
105
+ # you have well over 500 documents indexed, or have an incredibly diverse
106
+ # vocabulary for your documents.
112
107
  #
113
108
  # The optional parameter "cutoff" is a tuning parameter. When the index is
114
- # built, a certain number of s-values are discarded from the system. The
109
+ # built, a certain number of s-values are discarded from the system. The
115
110
  # cutoff parameter tells the indexer how many of these values to keep.
116
111
  # A value of 1 for cutoff means that no semantic analysis will take place,
117
112
  # turning the LSI class into a simple vector search engine.
118
- def build_index( cutoff=0.75 )
113
+ def build_index(cutoff = 0.75)
119
114
  return unless needs_rebuild?
115
+
120
116
  make_word_list
121
-
117
+
122
118
  doc_list = @items.values
123
- tda = doc_list.collect { |node| node.raw_vector_with( @word_list ) }
124
-
119
+ tda = doc_list.collect { |node| node.raw_vector_with(@word_list) }
120
+
125
121
  if $GSL
126
- tdm = GSL::Matrix.alloc(*tda).trans
127
- ntdm = build_reduced_matrix(tdm, cutoff)
128
-
129
- ntdm.size[1].times do |col|
130
- vec = GSL::Vector.alloc( ntdm.column(col) ).row
131
- doc_list[col].lsi_vector = vec
132
- doc_list[col].lsi_norm = vec.normalize
133
- end
122
+ tdm = GSL::Matrix.alloc(*tda).trans
123
+ ntdm = build_reduced_matrix(tdm, cutoff)
124
+
125
+ ntdm.size[1].times do |col|
126
+ vec = GSL::Vector.alloc(ntdm.column(col)).row
127
+ doc_list[col].lsi_vector = vec
128
+ doc_list[col].lsi_norm = vec.normalize
129
+ end
134
130
  else
135
- tdm = Matrix.rows(tda).trans
136
- ntdm = build_reduced_matrix(tdm, cutoff)
137
-
138
- ntdm.row_size.times do |col|
139
- doc_list[col].lsi_vector = ntdm.column(col) if doc_list[col]
140
- doc_list[col].lsi_norm = ntdm.column(col).normalize if doc_list[col]
141
- end
131
+ tdm = Matrix.rows(tda).trans
132
+ ntdm = build_reduced_matrix(tdm, cutoff)
133
+
134
+ ntdm.row_size.times do |col|
135
+ doc_list[col].lsi_vector = ntdm.column(col) if doc_list[col]
136
+ doc_list[col].lsi_norm = ntdm.column(col).normalize if doc_list[col]
137
+ end
142
138
  end
143
-
139
+
144
140
  @built_at_version = @version
145
141
  end
146
-
142
+
147
143
  # This method returns max_chunks entries, ordered by their average semantic rating.
148
144
  # Essentially, the average distance of each entry from all other entries is calculated,
149
145
  # the highest are returned.
150
146
  #
151
147
  # This can be used to build a summary service, or to provide more information about
152
148
  # your dataset's general content. For example, if you were to use categorize on the
153
- # results of this data, you could gather information on what your dataset is generally
149
+ # results of this data, you could gather information on what your dataset is generally
154
150
  # about.
155
- def highest_relative_content( max_chunks=10 )
156
- return [] if needs_rebuild?
157
-
158
- avg_density = Hash.new
159
- @items.each_key { |x| avg_density[x] = proximity_array_for_content(x).inject(0.0) { |x,y| x + y[1]} }
160
-
161
- avg_density.keys.sort_by { |x| avg_density[x] }.reverse[0..max_chunks-1].map
151
+ def highest_relative_content(max_chunks = 10)
152
+ return [] if needs_rebuild?
153
+
154
+ avg_density = {}
155
+ @items.each_key { |x| avg_density[x] = proximity_array_for_content(x).inject(0.0) { |x, y| x + y[1] } }
156
+
157
+ avg_density.keys.sort_by { |x| avg_density[x] }.reverse[0..max_chunks - 1].map
162
158
  end
163
159
 
164
- # This function is the primitive that find_related and classify
160
+ # This function is the primitive that find_related and classify
165
161
  # build upon. It returns an array of 2-element arrays. The first element
166
162
  # of this array is a document, and the second is its "score", defining
167
163
  # how "close" it is to other indexed items.
168
- #
164
+ #
169
165
  # These values are somewhat arbitrary, having to do with the vector space
170
166
  # created by your content, so the magnitude is interpretable but not always
171
- # meaningful between indexes.
167
+ # meaningful between indexes.
172
168
  #
173
169
  # The parameter doc is the content to compare. If that content is not
174
- # indexed, you can pass an optional block to define how to create the
175
- # text data. See add_item for examples of how this works.
176
- def proximity_array_for_content( doc, &block )
170
+ # indexed, you can pass an optional block to define how to create the
171
+ # text data. See add_item for examples of how this works.
172
+ def proximity_array_for_content(doc, &block)
177
173
  return [] if needs_rebuild?
178
-
179
- content_node = node_for_content( doc, &block )
180
- result =
174
+
175
+ content_node = node_for_content(doc, &block)
176
+ result =
181
177
  @items.keys.collect do |item|
182
- if $GSL
183
- val = content_node.search_vector * @items[item].search_vector.col
184
- else
185
- val = (Matrix[content_node.search_vector] * @items[item].search_vector)[0]
186
- end
178
+ val = if $GSL
179
+ content_node.search_vector * @items[item].search_vector.col
180
+ else
181
+ (Matrix[content_node.search_vector] * @items[item].search_vector)[0]
182
+ end
187
183
  [item, val]
188
184
  end
189
185
  result.sort_by { |x| x[1] }.reverse
190
- end
191
-
186
+ end
187
+
192
188
  # Similar to proximity_array_for_content, this function takes similar
193
189
  # arguments and returns a similar array. However, it uses the normalized
194
- # calculated vectors instead of their full versions. This is useful when
190
+ # calculated vectors instead of their full versions. This is useful when
195
191
  # you're trying to perform operations on content that is much smaller than
196
192
  # the text you're working with. search uses this primitive.
197
- def proximity_norms_for_content( doc, &block )
193
+ def proximity_norms_for_content(doc, &block)
198
194
  return [] if needs_rebuild?
199
-
200
- content_node = node_for_content( doc, &block )
201
- result =
195
+
196
+ content_node = node_for_content(doc, &block)
197
+ result =
202
198
  @items.keys.collect do |item|
203
- if $GSL
204
- val = content_node.search_norm * @items[item].search_norm.col
205
- else
206
- val = (Matrix[content_node.search_norm] * @items[item].search_norm)[0]
207
- end
199
+ val = if $GSL
200
+ content_node.search_norm * @items[item].search_norm.col
201
+ else
202
+ (Matrix[content_node.search_norm] * @items[item].search_norm)[0]
203
+ end
208
204
  [item, val]
209
205
  end
210
206
  result.sort_by { |x| x[1] }.reverse
211
- end
212
-
207
+ end
208
+
213
209
  # This function allows for text-based search of your index. Unlike other functions
214
210
  # like find_related and classify, search only takes short strings. It will also ignore
215
- # factors like repeated words. It is best for short, google-like search terms.
216
- # A search will first priortize lexical relationships, then semantic ones.
211
+ # factors like repeated words. It is best for short, google-like search terms.
212
+ # A search will first priortize lexical relationships, then semantic ones.
217
213
  #
218
214
  # While this may seem backwards compared to the other functions that LSI supports,
219
215
  # it is actually the same algorithm, just applied on a smaller document.
220
- def search( string, max_nearest=3 )
216
+ def search(string, max_nearest = 3)
221
217
  return [] if needs_rebuild?
222
- carry = proximity_norms_for_content( string )
218
+
219
+ carry = proximity_norms_for_content(string)
223
220
  result = carry.collect { |x| x[0] }
224
- return result[0..max_nearest-1]
221
+ result[0..max_nearest - 1]
225
222
  end
226
-
223
+
227
224
  # This function takes content and finds other documents
228
225
  # that are semantically "close", returning an array of documents sorted
229
226
  # from most to least relavant.
230
- # max_nearest specifies the number of documents to return. A value of
231
- # 0 means that it returns all the indexed documents, sorted by relavence.
227
+ # max_nearest specifies the number of documents to return. A value of
228
+ # 0 means that it returns all the indexed documents, sorted by relavence.
232
229
  #
233
- # This is particularly useful for identifing clusters in your document space.
230
+ # This is particularly useful for identifing clusters in your document space.
234
231
  # For example you may want to identify several "What's Related" items for weblog
235
232
  # articles, or find paragraphs that relate to each other in an essay.
236
- def find_related( doc, max_nearest=3, &block )
237
- carry =
238
- proximity_array_for_content( doc, &block ).reject { |pair| pair[0] == doc }
233
+ def find_related(doc, max_nearest = 3, &block)
234
+ carry =
235
+ proximity_array_for_content(doc, &block).reject { |pair| pair[0] == doc }
239
236
  result = carry.collect { |x| x[0] }
240
- return result[0..max_nearest-1]
237
+ result[0..max_nearest - 1]
241
238
  end
242
-
243
- # This function uses a voting system to categorize documents, based on
244
- # the categories of other documents. It uses the same logic as the
239
+
240
+ # This function uses a voting system to categorize documents, based on
241
+ # the categories of other documents. It uses the same logic as the
245
242
  # find_related function to find related documents, then returns the
246
- # most obvious category from this list.
243
+ # most obvious category from this list.
247
244
  #
248
- # cutoff signifies the number of documents to consider when clasifying
249
- # text. A cutoff of 1 means that every document in the index votes on
245
+ # cutoff signifies the number of documents to consider when clasifying
246
+ # text. A cutoff of 1 means that every document in the index votes on
250
247
  # what category the document is in. This may not always make sense.
251
248
  #
252
- def classify( doc, cutoff=0.30, &block )
249
+ def classify(doc, cutoff = 0.30, &block)
250
+ votes = vote(doc, cutoff, &block)
251
+
252
+ ranking = votes.keys.sort_by { |x| votes[x] }
253
+ ranking[-1]
254
+ end
255
+
256
+ def vote(doc, cutoff = 0.30, &block)
253
257
  icutoff = (@items.size * cutoff).round
254
- carry = proximity_array_for_content( doc, &block )
255
- carry = carry[0..icutoff-1]
258
+ carry = proximity_array_for_content(doc, &block)
259
+ carry = carry[0..icutoff - 1]
256
260
  votes = {}
257
261
  carry.each do |pair|
258
262
  categories = @items[pair[0]].categories
259
- categories.each do |category|
263
+ categories.each do |category|
260
264
  votes[category] ||= 0.0
261
- votes[category] += pair[1]
265
+ votes[category] += pair[1]
262
266
  end
263
267
  end
264
-
268
+ votes
269
+ end
270
+
271
+ # Returns the same category as classify() but also returns
272
+ # a confidence value derived from the vote share that the
273
+ # winning category got.
274
+ #
275
+ # e.g.
276
+ # category,confidence = classify_with_confidence(doc)
277
+ # if confidence < 0.3
278
+ # category = nil
279
+ # end
280
+ #
281
+ #
282
+ # See classify() for argument docs
283
+ def classify_with_confidence(doc, cutoff = 0.30, &block)
284
+ votes = vote(doc, cutoff, &block)
285
+ votes_sum = votes.values.inject(0.0) { |sum, v| sum + v }
286
+ return [nil, nil] if votes_sum.zero?
287
+
265
288
  ranking = votes.keys.sort_by { |x| votes[x] }
266
- return ranking[-1]
289
+ winner = ranking[-1]
290
+ vote_share = votes[winner] / votes_sum.to_f
291
+ [winner, vote_share]
267
292
  end
268
-
293
+
269
294
  # Prototype, only works on indexed documents.
270
295
  # I have no clue if this is going to work, but in theory
271
296
  # it's supposed to.
272
- def highest_ranked_stems( doc, count=3 )
273
- raise "Requested stem ranking on non-indexed content!" unless @items[doc]
297
+ def highest_ranked_stems(doc, count = 3)
298
+ raise 'Requested stem ranking on non-indexed content!' unless @items[doc]
299
+
274
300
  arr = node_for_content(doc).lsi_vector.to_a
275
- top_n = arr.sort.reverse[0..count-1]
276
- return top_n.collect { |x| @word_list.word_for_index(arr.index(x))}
301
+ top_n = arr.sort.reverse[0..count - 1]
302
+ top_n.collect { |x| @word_list.word_for_index(arr.index(x)) }
277
303
  end
278
304
 
279
305
  private
280
- def build_reduced_matrix( matrix, cutoff=0.75 )
306
+
307
+ def build_reduced_matrix(matrix, cutoff = 0.75)
281
308
  # TODO: Check that M>=N on these dimensions! Transpose helps assure this
282
309
  u, v, s = matrix.SV_decomp
283
310
 
@@ -287,32 +314,28 @@ module Classifier
287
314
  s[ord] = 0.0 if s[ord] < s_cutoff
288
315
  end
289
316
  # Reconstruct the term document matrix, only with reduced rank
290
- u * ($GSL ? GSL::Matrix : ::Matrix).diag( s ) * v.trans
317
+ u * ($GSL ? GSL::Matrix : ::Matrix).diag(s) * v.trans
291
318
  end
292
-
293
- def node_for_content(item, &block)
294
- if @items[item]
295
- return @items[item]
296
- else
297
- clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
298
319
 
299
- cn = ContentNode.new(clean_word_hash, &block) # make the node and extract the data
320
+ def node_for_content(item, &block)
321
+ return @items[item] if @items[item]
300
322
 
301
- unless needs_rebuild?
302
- cn.raw_vector_with( @word_list ) # make the lsi raw and norm vectors
303
- end
323
+ clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
324
+
325
+ cn = ContentNode.new(clean_word_hash, &block) # make the node and extract the data
326
+
327
+ unless needs_rebuild?
328
+ cn.raw_vector_with(@word_list) # make the lsi raw and norm vectors
304
329
  end
305
-
306
- return cn
330
+
331
+ cn
307
332
  end
308
-
333
+
309
334
  def make_word_list
310
335
  @word_list = WordList.new
311
336
  @items.each_value do |node|
312
337
  node.word_hash.each_key { |key| @word_list.add_word key }
313
338
  end
314
339
  end
315
-
316
340
  end
317
341
  end
318
-
data/lib/classifier.rb CHANGED
@@ -26,5 +26,6 @@
26
26
 
27
27
  require 'rubygems'
28
28
  require 'classifier/extensions/string'
29
+ require 'classifier/extensions/vector'
29
30
  require 'classifier/bayes'
30
- require 'classifier/lsi'
31
+ require 'classifier/lsi'
data/test/test_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  $:.unshift(File.dirname(__FILE__) + '/../lib')
2
2
 
3
- require 'test/unit'
4
- require 'classifier'
3
+ require 'minitest'
4
+ require 'minitest/autorun'
5
+ require 'classifier'
metadata CHANGED
@@ -1,36 +1,80 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.4
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
8
- autorequire: classifier
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-31 00:00:00.000000000 Z
11
+ date: 2024-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast-stemmer
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 1.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.0.0
27
- description: |2
28
- A general classifier module to allow Bayesian and other types of classifications.
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rdoc
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: A general classifier module to allow Bayesian and other types of classifications.
29
70
  email: lucas@rufy.com
30
71
  executables: []
31
72
  extensions: []
32
73
  extra_rdoc_files: []
33
74
  files:
75
+ - LICENSE
76
+ - bin/bayes.rb
77
+ - bin/summarize.rb
34
78
  - lib/classifier.rb
35
79
  - lib/classifier/bayes.rb
36
80
  - lib/classifier/extensions/string.rb
@@ -41,39 +85,28 @@ files:
41
85
  - lib/classifier/lsi/content_node.rb
42
86
  - lib/classifier/lsi/summary.rb
43
87
  - lib/classifier/lsi/word_list.rb
44
- - bin/bayes.rb
45
- - bin/summarize.rb
46
- - test/bayes/bayesian_test.rb
47
- - test/extensions/word_hash_test.rb
48
- - test/lsi/lsi_test.rb
49
88
  - test/test_helper.rb
50
- - Gemfile
51
- - Gemfile.lock
52
- - LICENSE
53
- - README.markdown
54
- - Rakefile
55
- homepage: http://classifier.rufy.com/
56
- licenses: []
89
+ homepage: https://github.com/cardmagic/classifier
90
+ licenses:
91
+ - LGPL
57
92
  metadata: {}
58
- post_install_message:
93
+ post_install_message:
59
94
  rdoc_options: []
60
95
  require_paths:
61
96
  - lib
62
97
  required_ruby_version: !ruby/object:Gem::Requirement
63
98
  requirements:
64
- - - '>='
99
+ - - ">="
65
100
  - !ruby/object:Gem::Version
66
101
  version: '0'
67
102
  required_rubygems_version: !ruby/object:Gem::Requirement
68
103
  requirements:
69
- - - '>='
104
+ - - ">="
70
105
  - !ruby/object:Gem::Version
71
106
  version: '0'
72
- requirements:
73
- - A porter-stemmer module to split word stems.
74
- rubyforge_project:
75
- rubygems_version: 2.0.3
76
- signing_key:
107
+ requirements: []
108
+ rubygems_version: 3.5.9
109
+ signing_key:
77
110
  specification_version: 4
78
111
  summary: A general classifier module to allow Bayesian and other types of classifications.
79
112
  test_files: []
data/Gemfile DELETED
@@ -1,5 +0,0 @@
1
- source 'https://rubygems.org'
2
- gem 'rake'
3
- gem 'rspec', :require => 'spec'
4
- gem 'rdoc'
5
- gem 'fast-stemmer'
data/Gemfile.lock DELETED
@@ -1,26 +0,0 @@
1
- GEM
2
- remote: https://rubygems.org/
3
- specs:
4
- diff-lcs (1.2.5)
5
- fast-stemmer (1.0.2)
6
- json (1.8.1)
7
- rake (10.1.1)
8
- rdoc (4.1.0)
9
- json (~> 1.4)
10
- rspec (2.14.1)
11
- rspec-core (~> 2.14.0)
12
- rspec-expectations (~> 2.14.0)
13
- rspec-mocks (~> 2.14.0)
14
- rspec-core (2.14.7)
15
- rspec-expectations (2.14.4)
16
- diff-lcs (>= 1.1.3, < 2.0)
17
- rspec-mocks (2.14.4)
18
-
19
- PLATFORMS
20
- ruby
21
-
22
- DEPENDENCIES
23
- fast-stemmer
24
- rake
25
- rdoc
26
- rspec