classifier 1.3.4 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,54 +3,55 @@
3
3
  # License:: LGPL
4
4
 
5
5
  begin
6
- raise LoadError if ENV['NATIVE_VECTOR'] == "true" # to test the native vector class, try `rake test NATIVE_VECTOR=true`
7
-
8
- require 'gsl' # requires http://rb-gsl.rubyforge.org/
9
- require 'classifier/extensions/vector_serialize'
10
- $GSL = true
11
-
6
+ # to test the native vector class, try `rake test NATIVE_VECTOR=true`
7
+ raise LoadError if ENV['NATIVE_VECTOR'] == 'true'
8
+
9
+ require 'gsl' # requires https://github.com/SciRuby/rb-gsl/
10
+ require 'classifier/extensions/vector_serialize'
11
+ $GSL = true
12
12
  rescue LoadError
13
- warn "Notice: for 10x faster LSI support, please install http://rb-gsl.rubyforge.org/"
14
- require 'classifier/extensions/vector'
13
+ warn 'Notice: for 10x faster LSI support, please install https://github.com/SciRuby/rb-gsl/'
14
+ $GSL = false
15
+ require 'classifier/extensions/vector'
15
16
  end
16
-
17
+
17
18
  require 'classifier/lsi/word_list'
18
19
  require 'classifier/lsi/content_node'
19
20
  require 'classifier/lsi/summary'
20
21
 
21
22
  module Classifier
22
-
23
23
  # This class implements a Latent Semantic Indexer, which can search, classify and cluster
24
24
  # data based on underlying semantic relations. For more information on the algorithms used,
25
25
  # please consult Wikipedia[http://en.wikipedia.org/wiki/Latent_Semantic_Indexing].
26
26
  class LSI
27
-
28
27
  attr_reader :word_list
29
28
  attr_accessor :auto_rebuild
30
-
29
+
31
30
  # Create a fresh index.
32
31
  # If you want to call #build_index manually, use
33
32
  # Classifier::LSI.new :auto_rebuild => false
34
33
  #
35
34
  def initialize(options = {})
36
35
  @auto_rebuild = true unless options[:auto_rebuild] == false
37
- @word_list, @items = WordList.new, {}
38
- @version, @built_at_version = 0, -1
36
+ @word_list = WordList.new
37
+ @items = {}
38
+ @version = 0
39
+ @built_at_version = -1
39
40
  end
40
-
41
+
41
42
  # Returns true if the index needs to be rebuilt. The index needs
42
43
  # to be built after all informaton is added, but before you start
43
44
  # using it for search, classification and cluster detection.
44
45
  def needs_rebuild?
45
46
  (@items.keys.size > 1) && (@version != @built_at_version)
46
47
  end
47
-
48
- # Adds an item to the index. item is assumed to be a string, but
48
+
49
+ # Adds an item to the index. item is assumed to be a string, but
49
50
  # any item may be indexed so long as it responds to #to_s or if
50
- # you provide an optional block explaining how the indexer can
51
+ # you provide an optional block explaining how the indexer can
51
52
  # fetch fresh string data. This optional block is passed the item,
52
53
  # so the item may only be a reference to a URL or file name.
53
- #
54
+ #
54
55
  # For example:
55
56
  # lsi = Classifier::LSI.new
56
57
  # lsi.add_item "This is just plain text"
@@ -58,226 +59,252 @@ module Classifier
58
59
  # ar = ActiveRecordObject.find( :all )
59
60
  # lsi.add_item ar, *ar.categories { |x| ar.content }
60
61
  #
61
- def add_item( item, *categories, &block )
62
+ def add_item(item, *categories, &block)
62
63
  clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
63
64
  @items[item] = ContentNode.new(clean_word_hash, *categories)
64
65
  @version += 1
65
66
  build_index if @auto_rebuild
66
67
  end
67
68
 
68
- # A less flexible shorthand for add_item that assumes
69
+ # A less flexible shorthand for add_item that assumes
69
70
  # you are passing in a string with no categorries. item
70
- # will be duck typed via to_s .
71
+ # will be duck typed via to_s .
71
72
  #
72
- def <<( item )
73
- add_item item
73
+ def <<(item)
74
+ add_item(item)
74
75
  end
75
-
76
+
76
77
  # Returns the categories for a given indexed items. You are free to add and remove
77
78
  # items from this as you see fit. It does not invalide an index to change its categories.
78
79
  def categories_for(item)
79
80
  return [] unless @items[item]
80
- return @items[item].categories
81
+
82
+ @items[item].categories
81
83
  end
82
84
 
83
- # Removes an item from the database, if it is indexed.
85
+ # Removes an item from the database, if it is indexed.
84
86
  #
85
- def remove_item( item )
86
- if @items.keys.contain? item
87
- @items.remove item
88
- @version += 1
89
- end
87
+ def remove_item(item)
88
+ return unless @items.key?(item)
89
+
90
+ @items.delete(item)
91
+ @version += 1
90
92
  end
91
-
92
- # Returns an array of items that are indexed.
93
+
94
+ # Returns an array of items that are indexed.
93
95
  def items
94
96
  @items.keys
95
97
  end
96
-
97
- # Returns the categories for a given indexed items. You are free to add and remove
98
- # items from this as you see fit. It does not invalide an index to change its categories.
99
- def categories_for(item)
100
- return [] unless @items[item]
101
- return @items[item].categories
102
- end
103
98
 
104
99
  # This function rebuilds the index if needs_rebuild? returns true.
105
100
  # For very large document spaces, this indexing operation may take some
106
- # time to complete, so it may be wise to place the operation in another
107
- # thread.
101
+ # time to complete, so it may be wise to place the operation in another
102
+ # thread.
108
103
  #
109
104
  # As a rule, indexing will be fairly swift on modern machines until
110
- # you have well over 500 documents indexed, or have an incredibly diverse
111
- # vocabulary for your documents.
105
+ # you have well over 500 documents indexed, or have an incredibly diverse
106
+ # vocabulary for your documents.
112
107
  #
113
108
  # The optional parameter "cutoff" is a tuning parameter. When the index is
114
- # built, a certain number of s-values are discarded from the system. The
109
+ # built, a certain number of s-values are discarded from the system. The
115
110
  # cutoff parameter tells the indexer how many of these values to keep.
116
111
  # A value of 1 for cutoff means that no semantic analysis will take place,
117
112
  # turning the LSI class into a simple vector search engine.
118
- def build_index( cutoff=0.75 )
113
+ def build_index(cutoff = 0.75)
119
114
  return unless needs_rebuild?
115
+
120
116
  make_word_list
121
-
117
+
122
118
  doc_list = @items.values
123
- tda = doc_list.collect { |node| node.raw_vector_with( @word_list ) }
124
-
119
+ tda = doc_list.collect { |node| node.raw_vector_with(@word_list) }
120
+
125
121
  if $GSL
126
- tdm = GSL::Matrix.alloc(*tda).trans
127
- ntdm = build_reduced_matrix(tdm, cutoff)
128
-
129
- ntdm.size[1].times do |col|
130
- vec = GSL::Vector.alloc( ntdm.column(col) ).row
131
- doc_list[col].lsi_vector = vec
132
- doc_list[col].lsi_norm = vec.normalize
133
- end
122
+ tdm = GSL::Matrix.alloc(*tda).trans
123
+ ntdm = build_reduced_matrix(tdm, cutoff)
124
+
125
+ ntdm.size[1].times do |col|
126
+ vec = GSL::Vector.alloc(ntdm.column(col)).row
127
+ doc_list[col].lsi_vector = vec
128
+ doc_list[col].lsi_norm = vec.normalize
129
+ end
134
130
  else
135
- tdm = Matrix.rows(tda).trans
136
- ntdm = build_reduced_matrix(tdm, cutoff)
137
-
138
- ntdm.row_size.times do |col|
139
- doc_list[col].lsi_vector = ntdm.column(col) if doc_list[col]
140
- doc_list[col].lsi_norm = ntdm.column(col).normalize if doc_list[col]
141
- end
131
+ tdm = Matrix.rows(tda).trans
132
+ ntdm = build_reduced_matrix(tdm, cutoff)
133
+
134
+ ntdm.row_size.times do |col|
135
+ doc_list[col].lsi_vector = ntdm.column(col) if doc_list[col]
136
+ doc_list[col].lsi_norm = ntdm.column(col).normalize if doc_list[col]
137
+ end
142
138
  end
143
-
139
+
144
140
  @built_at_version = @version
145
141
  end
146
-
142
+
147
143
  # This method returns max_chunks entries, ordered by their average semantic rating.
148
144
  # Essentially, the average distance of each entry from all other entries is calculated,
149
145
  # the highest are returned.
150
146
  #
151
147
  # This can be used to build a summary service, or to provide more information about
152
148
  # your dataset's general content. For example, if you were to use categorize on the
153
- # results of this data, you could gather information on what your dataset is generally
149
+ # results of this data, you could gather information on what your dataset is generally
154
150
  # about.
155
- def highest_relative_content( max_chunks=10 )
156
- return [] if needs_rebuild?
157
-
158
- avg_density = Hash.new
159
- @items.each_key { |x| avg_density[x] = proximity_array_for_content(x).inject(0.0) { |x,y| x + y[1]} }
160
-
161
- avg_density.keys.sort_by { |x| avg_density[x] }.reverse[0..max_chunks-1].map
151
+ def highest_relative_content(max_chunks = 10)
152
+ return [] if needs_rebuild?
153
+
154
+ avg_density = {}
155
+ @items.each_key { |x| avg_density[x] = proximity_array_for_content(x).inject(0.0) { |x, y| x + y[1] } }
156
+
157
+ avg_density.keys.sort_by { |x| avg_density[x] }.reverse[0..max_chunks - 1].map
162
158
  end
163
159
 
164
- # This function is the primitive that find_related and classify
160
+ # This function is the primitive that find_related and classify
165
161
  # build upon. It returns an array of 2-element arrays. The first element
166
162
  # of this array is a document, and the second is its "score", defining
167
163
  # how "close" it is to other indexed items.
168
- #
164
+ #
169
165
  # These values are somewhat arbitrary, having to do with the vector space
170
166
  # created by your content, so the magnitude is interpretable but not always
171
- # meaningful between indexes.
167
+ # meaningful between indexes.
172
168
  #
173
169
  # The parameter doc is the content to compare. If that content is not
174
- # indexed, you can pass an optional block to define how to create the
175
- # text data. See add_item for examples of how this works.
176
- def proximity_array_for_content( doc, &block )
170
+ # indexed, you can pass an optional block to define how to create the
171
+ # text data. See add_item for examples of how this works.
172
+ def proximity_array_for_content(doc, &block)
177
173
  return [] if needs_rebuild?
178
-
179
- content_node = node_for_content( doc, &block )
180
- result =
174
+
175
+ content_node = node_for_content(doc, &block)
176
+ result =
181
177
  @items.keys.collect do |item|
182
- if $GSL
183
- val = content_node.search_vector * @items[item].search_vector.col
184
- else
185
- val = (Matrix[content_node.search_vector] * @items[item].search_vector)[0]
186
- end
178
+ val = if $GSL
179
+ content_node.search_vector * @items[item].search_vector.col
180
+ else
181
+ (Matrix[content_node.search_vector] * @items[item].search_vector)[0]
182
+ end
187
183
  [item, val]
188
184
  end
189
185
  result.sort_by { |x| x[1] }.reverse
190
- end
191
-
186
+ end
187
+
192
188
  # Similar to proximity_array_for_content, this function takes similar
193
189
  # arguments and returns a similar array. However, it uses the normalized
194
- # calculated vectors instead of their full versions. This is useful when
190
+ # calculated vectors instead of their full versions. This is useful when
195
191
  # you're trying to perform operations on content that is much smaller than
196
192
  # the text you're working with. search uses this primitive.
197
- def proximity_norms_for_content( doc, &block )
193
+ def proximity_norms_for_content(doc, &block)
198
194
  return [] if needs_rebuild?
199
-
200
- content_node = node_for_content( doc, &block )
201
- result =
195
+
196
+ content_node = node_for_content(doc, &block)
197
+ result =
202
198
  @items.keys.collect do |item|
203
- if $GSL
204
- val = content_node.search_norm * @items[item].search_norm.col
205
- else
206
- val = (Matrix[content_node.search_norm] * @items[item].search_norm)[0]
207
- end
199
+ val = if $GSL
200
+ content_node.search_norm * @items[item].search_norm.col
201
+ else
202
+ (Matrix[content_node.search_norm] * @items[item].search_norm)[0]
203
+ end
208
204
  [item, val]
209
205
  end
210
206
  result.sort_by { |x| x[1] }.reverse
211
- end
212
-
207
+ end
208
+
213
209
  # This function allows for text-based search of your index. Unlike other functions
214
210
  # like find_related and classify, search only takes short strings. It will also ignore
215
- # factors like repeated words. It is best for short, google-like search terms.
216
- # A search will first priortize lexical relationships, then semantic ones.
211
+ # factors like repeated words. It is best for short, google-like search terms.
212
+ # A search will first priortize lexical relationships, then semantic ones.
217
213
  #
218
214
  # While this may seem backwards compared to the other functions that LSI supports,
219
215
  # it is actually the same algorithm, just applied on a smaller document.
220
- def search( string, max_nearest=3 )
216
+ def search(string, max_nearest = 3)
221
217
  return [] if needs_rebuild?
222
- carry = proximity_norms_for_content( string )
218
+
219
+ carry = proximity_norms_for_content(string)
223
220
  result = carry.collect { |x| x[0] }
224
- return result[0..max_nearest-1]
221
+ result[0..max_nearest - 1]
225
222
  end
226
-
223
+
227
224
  # This function takes content and finds other documents
228
225
  # that are semantically "close", returning an array of documents sorted
229
226
  # from most to least relavant.
230
- # max_nearest specifies the number of documents to return. A value of
231
- # 0 means that it returns all the indexed documents, sorted by relavence.
227
+ # max_nearest specifies the number of documents to return. A value of
228
+ # 0 means that it returns all the indexed documents, sorted by relavence.
232
229
  #
233
- # This is particularly useful for identifing clusters in your document space.
230
+ # This is particularly useful for identifing clusters in your document space.
234
231
  # For example you may want to identify several "What's Related" items for weblog
235
232
  # articles, or find paragraphs that relate to each other in an essay.
236
- def find_related( doc, max_nearest=3, &block )
237
- carry =
238
- proximity_array_for_content( doc, &block ).reject { |pair| pair[0] == doc }
233
+ def find_related(doc, max_nearest = 3, &block)
234
+ carry =
235
+ proximity_array_for_content(doc, &block).reject { |pair| pair[0] == doc }
239
236
  result = carry.collect { |x| x[0] }
240
- return result[0..max_nearest-1]
237
+ result[0..max_nearest - 1]
241
238
  end
242
-
243
- # This function uses a voting system to categorize documents, based on
244
- # the categories of other documents. It uses the same logic as the
239
+
240
+ # This function uses a voting system to categorize documents, based on
241
+ # the categories of other documents. It uses the same logic as the
245
242
  # find_related function to find related documents, then returns the
246
- # most obvious category from this list.
243
+ # most obvious category from this list.
247
244
  #
248
- # cutoff signifies the number of documents to consider when clasifying
249
- # text. A cutoff of 1 means that every document in the index votes on
245
+ # cutoff signifies the number of documents to consider when clasifying
246
+ # text. A cutoff of 1 means that every document in the index votes on
250
247
  # what category the document is in. This may not always make sense.
251
248
  #
252
- def classify( doc, cutoff=0.30, &block )
249
+ def classify(doc, cutoff = 0.30, &block)
250
+ votes = vote(doc, cutoff, &block)
251
+
252
+ ranking = votes.keys.sort_by { |x| votes[x] }
253
+ ranking[-1]
254
+ end
255
+
256
+ def vote(doc, cutoff = 0.30, &block)
253
257
  icutoff = (@items.size * cutoff).round
254
- carry = proximity_array_for_content( doc, &block )
255
- carry = carry[0..icutoff-1]
258
+ carry = proximity_array_for_content(doc, &block)
259
+ carry = carry[0..icutoff - 1]
256
260
  votes = {}
257
261
  carry.each do |pair|
258
262
  categories = @items[pair[0]].categories
259
- categories.each do |category|
263
+ categories.each do |category|
260
264
  votes[category] ||= 0.0
261
- votes[category] += pair[1]
265
+ votes[category] += pair[1]
262
266
  end
263
267
  end
264
-
268
+ votes
269
+ end
270
+
271
+ # Returns the same category as classify() but also returns
272
+ # a confidence value derived from the vote share that the
273
+ # winning category got.
274
+ #
275
+ # e.g.
276
+ # category,confidence = classify_with_confidence(doc)
277
+ # if confidence < 0.3
278
+ # category = nil
279
+ # end
280
+ #
281
+ #
282
+ # See classify() for argument docs
283
+ def classify_with_confidence(doc, cutoff = 0.30, &block)
284
+ votes = vote(doc, cutoff, &block)
285
+ votes_sum = votes.values.inject(0.0) { |sum, v| sum + v }
286
+ return [nil, nil] if votes_sum.zero?
287
+
265
288
  ranking = votes.keys.sort_by { |x| votes[x] }
266
- return ranking[-1]
289
+ winner = ranking[-1]
290
+ vote_share = votes[winner] / votes_sum.to_f
291
+ [winner, vote_share]
267
292
  end
268
-
293
+
269
294
  # Prototype, only works on indexed documents.
270
295
  # I have no clue if this is going to work, but in theory
271
296
  # it's supposed to.
272
- def highest_ranked_stems( doc, count=3 )
273
- raise "Requested stem ranking on non-indexed content!" unless @items[doc]
297
+ def highest_ranked_stems(doc, count = 3)
298
+ raise 'Requested stem ranking on non-indexed content!' unless @items[doc]
299
+
274
300
  arr = node_for_content(doc).lsi_vector.to_a
275
- top_n = arr.sort.reverse[0..count-1]
276
- return top_n.collect { |x| @word_list.word_for_index(arr.index(x))}
301
+ top_n = arr.sort.reverse[0..count - 1]
302
+ top_n.collect { |x| @word_list.word_for_index(arr.index(x)) }
277
303
  end
278
304
 
279
305
  private
280
- def build_reduced_matrix( matrix, cutoff=0.75 )
306
+
307
+ def build_reduced_matrix(matrix, cutoff = 0.75)
281
308
  # TODO: Check that M>=N on these dimensions! Transpose helps assure this
282
309
  u, v, s = matrix.SV_decomp
283
310
 
@@ -287,32 +314,28 @@ module Classifier
287
314
  s[ord] = 0.0 if s[ord] < s_cutoff
288
315
  end
289
316
  # Reconstruct the term document matrix, only with reduced rank
290
- u * ($GSL ? GSL::Matrix : ::Matrix).diag( s ) * v.trans
317
+ u * ($GSL ? GSL::Matrix : ::Matrix).diag(s) * v.trans
291
318
  end
292
-
293
- def node_for_content(item, &block)
294
- if @items[item]
295
- return @items[item]
296
- else
297
- clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
298
319
 
299
- cn = ContentNode.new(clean_word_hash, &block) # make the node and extract the data
320
+ def node_for_content(item, &block)
321
+ return @items[item] if @items[item]
300
322
 
301
- unless needs_rebuild?
302
- cn.raw_vector_with( @word_list ) # make the lsi raw and norm vectors
303
- end
323
+ clean_word_hash = block ? block.call(item).clean_word_hash : item.to_s.clean_word_hash
324
+
325
+ cn = ContentNode.new(clean_word_hash, &block) # make the node and extract the data
326
+
327
+ unless needs_rebuild?
328
+ cn.raw_vector_with(@word_list) # make the lsi raw and norm vectors
304
329
  end
305
-
306
- return cn
330
+
331
+ cn
307
332
  end
308
-
333
+
309
334
  def make_word_list
310
335
  @word_list = WordList.new
311
336
  @items.each_value do |node|
312
337
  node.word_hash.each_key { |key| @word_list.add_word key }
313
338
  end
314
339
  end
315
-
316
340
  end
317
341
  end
318
-
data/lib/classifier.rb CHANGED
@@ -26,5 +26,6 @@
26
26
 
27
27
  require 'rubygems'
28
28
  require 'classifier/extensions/string'
29
+ require 'classifier/extensions/vector'
29
30
  require 'classifier/bayes'
30
- require 'classifier/lsi'
31
+ require 'classifier/lsi'
data/test/test_helper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  $:.unshift(File.dirname(__FILE__) + '/../lib')
2
2
 
3
- require 'test/unit'
4
- require 'classifier'
3
+ require 'minitest'
4
+ require 'minitest/autorun'
5
+ require 'classifier'
metadata CHANGED
@@ -1,36 +1,80 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.4
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
8
- autorequire: classifier
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-31 00:00:00.000000000 Z
11
+ date: 2024-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast-stemmer
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '>='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 1.0.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '>='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: 1.0.0
27
- description: |2
28
- A general classifier module to allow Bayesian and other types of classifications.
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rdoc
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: A general classifier module to allow Bayesian and other types of classifications.
29
70
  email: lucas@rufy.com
30
71
  executables: []
31
72
  extensions: []
32
73
  extra_rdoc_files: []
33
74
  files:
75
+ - LICENSE
76
+ - bin/bayes.rb
77
+ - bin/summarize.rb
34
78
  - lib/classifier.rb
35
79
  - lib/classifier/bayes.rb
36
80
  - lib/classifier/extensions/string.rb
@@ -41,39 +85,28 @@ files:
41
85
  - lib/classifier/lsi/content_node.rb
42
86
  - lib/classifier/lsi/summary.rb
43
87
  - lib/classifier/lsi/word_list.rb
44
- - bin/bayes.rb
45
- - bin/summarize.rb
46
- - test/bayes/bayesian_test.rb
47
- - test/extensions/word_hash_test.rb
48
- - test/lsi/lsi_test.rb
49
88
  - test/test_helper.rb
50
- - Gemfile
51
- - Gemfile.lock
52
- - LICENSE
53
- - README.markdown
54
- - Rakefile
55
- homepage: http://classifier.rufy.com/
56
- licenses: []
89
+ homepage: https://github.com/cardmagic/classifier
90
+ licenses:
91
+ - LGPL
57
92
  metadata: {}
58
- post_install_message:
93
+ post_install_message:
59
94
  rdoc_options: []
60
95
  require_paths:
61
96
  - lib
62
97
  required_ruby_version: !ruby/object:Gem::Requirement
63
98
  requirements:
64
- - - '>='
99
+ - - ">="
65
100
  - !ruby/object:Gem::Version
66
101
  version: '0'
67
102
  required_rubygems_version: !ruby/object:Gem::Requirement
68
103
  requirements:
69
- - - '>='
104
+ - - ">="
70
105
  - !ruby/object:Gem::Version
71
106
  version: '0'
72
- requirements:
73
- - A porter-stemmer module to split word stems.
74
- rubyforge_project:
75
- rubygems_version: 2.0.3
76
- signing_key:
107
+ requirements: []
108
+ rubygems_version: 3.5.9
109
+ signing_key:
77
110
  specification_version: 4
78
111
  summary: A general classifier module to allow Bayesian and other types of classifications.
79
112
  test_files: []
data/Gemfile DELETED
@@ -1,5 +0,0 @@
1
- source 'https://rubygems.org'
2
- gem 'rake'
3
- gem 'rspec', :require => 'spec'
4
- gem 'rdoc'
5
- gem 'fast-stemmer'
data/Gemfile.lock DELETED
@@ -1,26 +0,0 @@
1
- GEM
2
- remote: https://rubygems.org/
3
- specs:
4
- diff-lcs (1.2.5)
5
- fast-stemmer (1.0.2)
6
- json (1.8.1)
7
- rake (10.1.1)
8
- rdoc (4.1.0)
9
- json (~> 1.4)
10
- rspec (2.14.1)
11
- rspec-core (~> 2.14.0)
12
- rspec-expectations (~> 2.14.0)
13
- rspec-mocks (~> 2.14.0)
14
- rspec-core (2.14.7)
15
- rspec-expectations (2.14.4)
16
- diff-lcs (>= 1.1.3, < 2.0)
17
- rspec-mocks (2.14.4)
18
-
19
- PLATFORMS
20
- ruby
21
-
22
- DEPENDENCIES
23
- fast-stemmer
24
- rake
25
- rdoc
26
- rspec