yury-classifier 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,319 @@
1
+ # Author:: David Fayram (mailto:dfayram@lensmen.net)
2
+ # Copyright:: Copyright (c) 2005 David Fayram II
3
+ # License:: LGPL
4
+
5
+ begin
6
+ raise LoadError if ENV['NATIVE_VECTOR'] == "true" # to test the native vector class, try `rake test NATIVE_VECTOR=true`
7
+
8
+ require 'gsl' # requires http://rb-gsl.rubyforge.org/
9
+ require 'classifier/extensions/vector_serialize'
10
+ $GSL = true
11
+
12
+ rescue LoadError
13
+ warn "Notice: for 10x faster LSI support, please install http://rb-gsl.rubyforge.org/"
14
+ require 'classifier/extensions/vector'
15
+ end
16
+
17
+ require 'classifier/lsi/word_list'
18
+ require 'classifier/lsi/content_node'
19
+ require 'classifier/lsi/summary'
20
+
21
+ module Classifier
22
+
23
+ # This class implements a Latent Semantic Indexer, which can search, classify and cluster
24
+ # data based on underlying semantic relations. For more information on the algorithms used,
25
+ # please consult Wikipedia[http://en.wikipedia.org/wiki/Latent_Semantic_Indexing].
26
+ class LSI < Classifier::Base
27
+
28
+ attr_reader :word_list
29
+ attr_accessor :auto_rebuild
30
+
31
+ # Create a fresh index.
32
+ # If you want to call #build_index manually, use
33
+ # Classifier::LSI.new :auto_rebuild => false
34
+ #
35
+ def initialize(options = {})
36
+ @auto_rebuild = true unless options[:auto_rebuild] == false
37
+ @word_list, @items = WordList.new, {}
38
+ @version, @built_at_version = 0, -1
39
+ super
40
+ end
41
+
42
+ # Returns true if the index needs to be rebuilt. The index needs
43
+ # to be built after all informaton is added, but before you start
44
+ # using it for search, classification and cluster detection.
45
+ def needs_rebuild?
46
+ (@items.keys.size > 1) && (@version != @built_at_version)
47
+ end
48
+
49
+ # Adds an item to the index. item is assumed to be a string, but
50
+ # any item may be indexed so long as it responds to #to_s or if
51
+ # you provide an optional block explaining how the indexer can
52
+ # fetch fresh string data. This optional block is passed the item,
53
+ # so the item may only be a reference to a URL or file name.
54
+ #
55
+ # For example:
56
+ # lsi = Classifier::LSI.new
57
+ # lsi.add_item "This is just plain text"
58
+ # lsi.add_item "/home/me/filename.txt" { |x| File.read x }
59
+ # ar = ActiveRecordObject.find( :all )
60
+ # lsi.add_item ar, *ar.categories { |x| ar.content }
61
+ #
62
+ def add_item( item, *categories, &block )
63
+ clean_word_hash = block ? clean_word_hash(block.call(item)) : clean_word_hash(item.to_s)
64
+ @items[item] = ContentNode.new(clean_word_hash, *categories)
65
+ @version += 1
66
+ build_index if @auto_rebuild
67
+ end
68
+
69
+ # A less flexible shorthand for add_item that assumes
70
+ # you are passing in a string with no categorries. item
71
+ # will be duck typed via to_s .
72
+ #
73
+ def <<( item )
74
+ add_item item
75
+ end
76
+
77
+ # Returns the categories for a given indexed items. You are free to add and remove
78
+ # items from this as you see fit. It does not invalide an index to change its categories.
79
+ def categories_for(item)
80
+ return [] unless @items[item]
81
+ return @items[item].categories
82
+ end
83
+
84
+ # Removes an item from the database, if it is indexed.
85
+ #
86
+ def remove_item( item )
87
+ if @items.keys.contain? item
88
+ @items.remove item
89
+ @version += 1
90
+ end
91
+ end
92
+
93
+ # Returns an array of items that are indexed.
94
+ def items
95
+ @items.keys
96
+ end
97
+
98
+ # Returns the categories for a given indexed items. You are free to add and remove
99
+ # items from this as you see fit. It does not invalide an index to change its categories.
100
+ def categories_for(item)
101
+ return [] unless @items[item]
102
+ return @items[item].categories
103
+ end
104
+
105
+ # This function rebuilds the index if needs_rebuild? returns true.
106
+ # For very large document spaces, this indexing operation may take some
107
+ # time to complete, so it may be wise to place the operation in another
108
+ # thread.
109
+ #
110
+ # As a rule, indexing will be fairly swift on modern machines until
111
+ # you have well over 500 documents indexed, or have an incredibly diverse
112
+ # vocabulary for your documents.
113
+ #
114
+ # The optional parameter "cutoff" is a tuning parameter. When the index is
115
+ # built, a certain number of s-values are discarded from the system. The
116
+ # cutoff parameter tells the indexer how many of these values to keep.
117
+ # A value of 1 for cutoff means that no semantic analysis will take place,
118
+ # turning the LSI class into a simple vector search engine.
119
+ def build_index( cutoff=0.75 )
120
+ return unless needs_rebuild?
121
+ make_word_list
122
+
123
+ doc_list = @items.values
124
+ tda = doc_list.collect { |node| node.raw_vector_with( @word_list ) }
125
+
126
+ if $GSL
127
+ tdm = GSL::Matrix.alloc(*tda).trans
128
+ ntdm = build_reduced_matrix(tdm, cutoff)
129
+
130
+ ntdm.size[1].times do |col|
131
+ vec = GSL::Vector.alloc( ntdm.column(col) ).row
132
+ doc_list[col].lsi_vector = vec
133
+ doc_list[col].lsi_norm = vec.normalize
134
+ end
135
+ else
136
+ tdm = Matrix.rows(tda).trans
137
+ ntdm = build_reduced_matrix(tdm, cutoff)
138
+
139
+ ntdm.row_size.times do |col|
140
+ doc_list[col].lsi_vector = ntdm.column(col) if doc_list[col]
141
+ doc_list[col].lsi_norm = ntdm.column(col).normalize if doc_list[col]
142
+ end
143
+ end
144
+
145
+ @built_at_version = @version
146
+ end
147
+
148
+ # This method returns max_chunks entries, ordered by their average semantic rating.
149
+ # Essentially, the average distance of each entry from all other entries is calculated,
150
+ # the highest are returned.
151
+ #
152
+ # This can be used to build a summary service, or to provide more information about
153
+ # your dataset's general content. For example, if you were to use categorize on the
154
+ # results of this data, you could gather information on what your dataset is generally
155
+ # about.
156
+ def highest_relative_content( max_chunks=10 )
157
+ return [] if needs_rebuild?
158
+
159
+ avg_density = Hash.new
160
+ @items.each_key { |x| avg_density[x] = proximity_array_for_content(x).inject(0.0) { |x,y| x + y[1]} }
161
+
162
+ avg_density.keys.sort_by { |x| avg_density[x] }.reverse[0..max_chunks-1].map
163
+ end
164
+
165
+ # This function is the primitive that find_related and classify
166
+ # build upon. It returns an array of 2-element arrays. The first element
167
+ # of this array is a document, and the second is its "score", defining
168
+ # how "close" it is to other indexed items.
169
+ #
170
+ # These values are somewhat arbitrary, having to do with the vector space
171
+ # created by your content, so the magnitude is interpretable but not always
172
+ # meaningful between indexes.
173
+ #
174
+ # The parameter doc is the content to compare. If that content is not
175
+ # indexed, you can pass an optional block to define how to create the
176
+ # text data. See add_item for examples of how this works.
177
+ def proximity_array_for_content( doc, &block )
178
+ return [] if needs_rebuild?
179
+
180
+ content_node = node_for_content( doc, &block )
181
+ result =
182
+ @items.keys.collect do |item|
183
+ if $GSL
184
+ val = content_node.search_vector * @items[item].search_vector.col
185
+ else
186
+ val = (Matrix[content_node.search_vector] * @items[item].search_vector)[0]
187
+ end
188
+ [item, val]
189
+ end
190
+ result.sort_by { |x| x[1] }.reverse
191
+ end
192
+
193
+ # Similar to proximity_array_for_content, this function takes similar
194
+ # arguments and returns a similar array. However, it uses the normalized
195
+ # calculated vectors instead of their full versions. This is useful when
196
+ # you're trying to perform operations on content that is much smaller than
197
+ # the text you're working with. search uses this primitive.
198
+ def proximity_norms_for_content( doc, &block )
199
+ return [] if needs_rebuild?
200
+
201
+ content_node = node_for_content( doc, &block )
202
+ result =
203
+ @items.keys.collect do |item|
204
+ if $GSL
205
+ val = content_node.search_norm * @items[item].search_norm.col
206
+ else
207
+ val = (Matrix[content_node.search_norm] * @items[item].search_norm)[0]
208
+ end
209
+ [item, val]
210
+ end
211
+ result.sort_by { |x| x[1] }.reverse
212
+ end
213
+
214
+ # This function allows for text-based search of your index. Unlike other functions
215
+ # like find_related and classify, search only takes short strings. It will also ignore
216
+ # factors like repeated words. It is best for short, google-like search terms.
217
+ # A search will first priortize lexical relationships, then semantic ones.
218
+ #
219
+ # While this may seem backwards compared to the other functions that LSI supports,
220
+ # it is actually the same algorithm, just applied on a smaller document.
221
+ def search( string, max_nearest=3 )
222
+ return [] if needs_rebuild?
223
+ carry = proximity_norms_for_content( string )
224
+ result = carry.collect { |x| x[0] }
225
+ return result[0..max_nearest-1]
226
+ end
227
+
228
+ # This function takes content and finds other documents
229
+ # that are semantically "close", returning an array of documents sorted
230
+ # from most to least relavant.
231
+ # max_nearest specifies the number of documents to return. A value of
232
+ # 0 means that it returns all the indexed documents, sorted by relavence.
233
+ #
234
+ # This is particularly useful for identifing clusters in your document space.
235
+ # For example you may want to identify several "What's Related" items for weblog
236
+ # articles, or find paragraphs that relate to each other in an essay.
237
+ def find_related( doc, max_nearest=3, &block )
238
+ carry =
239
+ proximity_array_for_content( doc, &block ).reject { |pair| pair[0] == doc }
240
+ result = carry.collect { |x| x[0] }
241
+ return result[0..max_nearest-1]
242
+ end
243
+
244
+ # This function uses a voting system to categorize documents, based on
245
+ # the categories of other documents. It uses the same logic as the
246
+ # find_related function to find related documents, then returns the
247
+ # most obvious category from this list.
248
+ #
249
+ # cutoff signifies the number of documents to consider when clasifying
250
+ # text. A cutoff of 1 means that every document in the index votes on
251
+ # what category the document is in. This may not always make sense.
252
+ #
253
+ def classify( doc, cutoff=0.30, &block )
254
+ icutoff = (@items.size * cutoff).round
255
+ carry = proximity_array_for_content( doc, &block )
256
+ carry = carry[0..icutoff-1]
257
+ votes = {}
258
+ carry.each do |pair|
259
+ categories = @items[pair[0]].categories
260
+ categories.each do |category|
261
+ votes[category] ||= 0.0
262
+ votes[category] += pair[1]
263
+ end
264
+ end
265
+
266
+ ranking = votes.keys.sort_by { |x| votes[x] }
267
+ return ranking[-1]
268
+ end
269
+
270
+ # Prototype, only works on indexed documents.
271
+ # I have no clue if this is going to work, but in theory
272
+ # it's supposed to.
273
+ def highest_ranked_stems( doc, count=3 )
274
+ raise "Requested stem ranking on non-indexed content!" unless @items[doc]
275
+ arr = node_for_content(doc).lsi_vector.to_a
276
+ top_n = arr.sort.reverse[0..count-1]
277
+ return top_n.collect { |x| @word_list.word_for_index(arr.index(x))}
278
+ end
279
+
280
+ private
281
+ def build_reduced_matrix( matrix, cutoff=0.75 )
282
+ # TODO: Check that M>=N on these dimensions! Transpose helps assure this
283
+ u, v, s = matrix.SV_decomp
284
+
285
+ # TODO: Better than 75% term, please. :\
286
+ s_cutoff = s.sort.reverse[(s.size * cutoff).round - 1]
287
+ s.size.times do |ord|
288
+ s[ord] = 0.0 if s[ord] < s_cutoff
289
+ end
290
+ # Reconstruct the term document matrix, only with reduced rank
291
+ u * Matrix.diag( s ) * v.trans
292
+ end
293
+
294
+ def node_for_content(item, &block)
295
+ if @items[item]
296
+ return @items[item]
297
+ else
298
+ clean_word_hash = block ? clean_word_hash(block.call(item)) : clean_word_hash(item.to_s)
299
+
300
+ cn = ContentNode.new(clean_word_hash, &block) # make the node and extract the data
301
+
302
+ unless needs_rebuild?
303
+ cn.raw_vector_with( @word_list ) # make the lsi raw and norm vectors
304
+ end
305
+ end
306
+
307
+ return cn
308
+ end
309
+
310
+ def make_word_list
311
+ @word_list = WordList.new
312
+ @items.each_value do |node|
313
+ node.word_hash.each_key { |key| @word_list.add_word key }
314
+ end
315
+ end
316
+
317
+ end
318
+ end
319
+
data/lib/classifier.rb ADDED
@@ -0,0 +1,32 @@
1
+ #--
2
+ # Copyright (c) 2005 Lucas Carlson
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
24
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
25
+ # License:: LGPL
26
+
27
+ require 'rubygems'
28
+ require 'activesupport'
29
+ require 'lingua/stemmer'
30
+ require 'classifier/base'
31
+ require 'classifier/bayes'
32
+ require 'classifier/lsi'
data/lib/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'classifier'
data/test/base_test.rb ADDED
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ class HelpersTest < Test::Unit::TestCase
3
+
4
+ def test_word_hash
5
+ c = Classifier::Base.new
6
+ hash = {:good=>1, :"!"=>1, :hope=>1, :"'"=>1, :"."=>1, :love=>1, :word=>1, :them=>1, :test=>1}
7
+ assert_equal hash, c.word_hash("here are some good words of test's. I hope you love them!")
8
+ end
9
+
10
+
11
+ def test_clean_word_hash
12
+ c = Classifier::Base.new
13
+ hash = {:good=>1, :word=>1, :hope=>1, :love=>1, :them=>1, :test=>1}
14
+ assert_equal hash, c.clean_word_hash("here are some good words of test's. I hope you love them!")
15
+ end
16
+
17
+ end
@@ -0,0 +1,40 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class BayesianTest < Test::Unit::TestCase
3
+ def setup
4
+ @classifier = Classifier::Bayes.new :categories => ['Interesting', 'Uninteresting']
5
+ end
6
+
7
+ def test_good_training
8
+ assert_nothing_raised { @classifier.train_interesting "love" }
9
+ end
10
+
11
+ def test_bad_training
12
+ assert_raise(StandardError) { @classifier.train_no_category "words" }
13
+ end
14
+
15
+ def test_bad_method
16
+ assert_raise(NoMethodError) { @classifier.forget_everything_you_know "" }
17
+ end
18
+
19
+ def test_categories
20
+ assert_equal ['Interesting', 'Uninteresting'].sort, @classifier.categories.sort
21
+ end
22
+
23
+ def test_add_category
24
+ @classifier.add_category 'Test'
25
+ assert_equal ['Test', 'Interesting', 'Uninteresting'].sort, @classifier.categories.sort
26
+ end
27
+
28
+ def test_classification
29
+ @classifier.train_interesting "here are some good words. I hope you love them"
30
+ @classifier.train_uninteresting "here are some bad words, I hate you"
31
+ assert_equal 'Uninteresting', @classifier.classify("I hate bad words and you")
32
+ end
33
+
34
+ def test_ru_classification
35
+ c = Classifier::Bayes.new :categories => ['Interesting', 'Uninteresting'], :language => "ru"
36
+ c.train_interesting "вот несколько хороших слов. Я надеюсь вам они понравились"
37
+ c.train_uninteresting "вот несколько плохих слов. Я тебя ненавижу"
38
+ assert_equal 'Uninteresting', c.classify("Я ненавижу плохие слова и тебя")
39
+ end
40
+ end
@@ -0,0 +1,123 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class LSITest < Test::Unit::TestCase
3
+ def setup
4
+ # we repeat principle words to help weight them.
5
+ # This test is rather delicate, since this system is mostly noise.
6
+ @str1 = "This text deals with dogs. Dogs."
7
+ @str2 = "This text involves dogs too. Dogs! "
8
+ @str3 = "This text revolves around cats. Cats."
9
+ @str4 = "This text also involves cats. Cats!"
10
+ @str5 = "This text involves birds. Birds."
11
+ end
12
+
13
+ def test_basic_indexing
14
+ lsi = Classifier::LSI.new
15
+ [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
16
+ assert ! lsi.needs_rebuild?
17
+
18
+ # note that the closest match to str1 is str2, even though it is not
19
+ # the closest text match.
20
+ assert_equal [@str2, @str5, @str3], lsi.find_related(@str1, 3)
21
+ end
22
+
23
+ def test_not_auto_rebuild
24
+ lsi = Classifier::LSI.new :auto_rebuild => false
25
+ lsi.add_item @str1, "Dog"
26
+ lsi.add_item @str2, "Dog"
27
+ assert lsi.needs_rebuild?
28
+ lsi.build_index
29
+ assert ! lsi.needs_rebuild?
30
+ end
31
+
32
+ def test_basic_categorizing
33
+ lsi = Classifier::LSI.new
34
+ lsi.add_item @str2, "Dog"
35
+ lsi.add_item @str3, "Cat"
36
+ lsi.add_item @str4, "Cat"
37
+ lsi.add_item @str5, "Bird"
38
+
39
+ assert_equal "Dog", lsi.classify( @str1 )
40
+ assert_equal "Cat", lsi.classify( @str3 )
41
+ assert_equal "Bird", lsi.classify( @str5 )
42
+ end
43
+
44
+ def test_external_classifying
45
+ lsi = Classifier::LSI.new
46
+ bayes = Classifier::Bayes.new :categories => ['Dog', 'Cat', 'Bird']
47
+ lsi.add_item @str1, "Dog" ; bayes.train_dog @str1
48
+ lsi.add_item @str2, "Dog" ; bayes.train_dog @str2
49
+ lsi.add_item @str3, "Cat" ; bayes.train_cat @str3
50
+ lsi.add_item @str4, "Cat" ; bayes.train_cat @str4
51
+ lsi.add_item @str5, "Bird" ; bayes.train_bird @str5
52
+
53
+ # We're talking about dogs. Even though the text matches the corpus on
54
+ # cats better. Dogs have more semantic weight than cats. So bayes
55
+ # will fail here, but the LSI recognizes content.
56
+ tricky_case = "This text revolves around dogs."
57
+ assert_equal "Dog", lsi.classify( tricky_case )
58
+ assert_not_equal "Dog", bayes.classify( tricky_case )
59
+ end
60
+
61
+ def test_recategorize_interface
62
+ lsi = Classifier::LSI.new
63
+ lsi.add_item @str1, "Dog"
64
+ lsi.add_item @str2, "Dog"
65
+ lsi.add_item @str3, "Cat"
66
+ lsi.add_item @str4, "Cat"
67
+ lsi.add_item @str5, "Bird"
68
+
69
+ tricky_case = "This text revolves around dogs."
70
+ assert_equal "Dog", lsi.classify( tricky_case )
71
+
72
+ # Recategorize as needed.
73
+ lsi.categories_for(@str1).clear.push "Cow"
74
+ lsi.categories_for(@str2).clear.push "Cow"
75
+
76
+ assert !lsi.needs_rebuild?
77
+ assert_equal "Cow", lsi.classify( tricky_case )
78
+ end
79
+
80
+ def test_search
81
+ lsi = Classifier::LSI.new
82
+ [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
83
+
84
+ # Searching by content and text, note that @str2 comes up first, because
85
+ # both "dog" and "involve" are present. But, the next match is @str1 instead
86
+ # of @str4, because "dog" carries more weight than involves.
87
+ assert_equal( [@str2, @str1, @str4, @str5, @str3],
88
+ lsi.search("dog involves", 100) )
89
+
90
+ # Keyword search shows how the space is mapped out in relation to
91
+ # dog when magnitude is remove. Note the relations. We move from dog
92
+ # through involve and then finally to other words.
93
+ assert_equal( [@str1, @str2, @str4, @str5, @str3],
94
+ lsi.search("dog", 5) )
95
+ end
96
+
97
+ def test_serialize_safe
98
+ lsi = Classifier::LSI.new
99
+ [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
100
+
101
+ lsi_md = Marshal.dump lsi
102
+ lsi_m = Marshal.load lsi_md
103
+
104
+ assert_equal lsi_m.search("cat", 3), lsi.search("cat", 3)
105
+ assert_equal lsi_m.find_related(@str1, 3), lsi.find_related(@str1, 3)
106
+ end
107
+
108
+ def test_keyword_search
109
+ lsi = Classifier::LSI.new
110
+ lsi.add_item @str1, "Dog"
111
+ lsi.add_item @str2, "Dog"
112
+ lsi.add_item @str3, "Cat"
113
+ lsi.add_item @str4, "Cat"
114
+ lsi.add_item @str5, "Bird"
115
+
116
+ assert_equal [:dog, :text, :deal], lsi.highest_ranked_stems(@str1)
117
+ end
118
+
119
+ def test_summary
120
+ assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
121
+ end
122
+
123
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+
3
+ require 'test/unit'
4
+ require 'classifier'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yury-classifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.3.1
5
+ platform: ruby
6
+ authors:
7
+ - Yury Korolev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-24 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ - - "="
24
+ - !ruby/object:Gem::Version
25
+ version: 2.2.2
26
+ version:
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-stemmer
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ - - "="
36
+ - !ruby/object:Gem::Version
37
+ version: 0.5.1
38
+ version:
39
+ description: A general classifier module to allow Bayesian and other types of classifications.
40
+ email: yury.korolev@gmail.com
41
+ executables: []
42
+
43
+ extensions: []
44
+
45
+ extra_rdoc_files:
46
+ - lib/classifier/base.rb
47
+ - lib/classifier/bayes.rb
48
+ - lib/classifier/extensions/vector.rb
49
+ - lib/classifier/extensions/vector_serialize.rb
50
+ - lib/classifier/lsi/content_node.rb
51
+ - lib/classifier/lsi/summary.rb
52
+ - lib/classifier/lsi/word_list.rb
53
+ - lib/classifier/lsi.rb
54
+ - lib/classifier.rb
55
+ - lib/init.rb
56
+ - LICENSE
57
+ - README
58
+ files:
59
+ - lib/classifier/base.rb
60
+ - lib/classifier/bayes.rb
61
+ - lib/classifier/extensions/vector.rb
62
+ - lib/classifier/extensions/vector_serialize.rb
63
+ - lib/classifier/lsi/content_node.rb
64
+ - lib/classifier/lsi/summary.rb
65
+ - lib/classifier/lsi/word_list.rb
66
+ - lib/classifier/lsi.rb
67
+ - lib/classifier.rb
68
+ - lib/init.rb
69
+ - LICENSE
70
+ - Rakefile
71
+ - README
72
+ - test/base_test.rb
73
+ - test/bayes/bayesian_test.rb
74
+ - test/lsi/lsi_test.rb
75
+ - test/test_helper.rb
76
+ - Manifest
77
+ - classifier.gemspec
78
+ has_rdoc: true
79
+ homepage: http://github.com/yury/classifier
80
+ post_install_message:
81
+ rdoc_options:
82
+ - --line-numbers
83
+ - --inline-source
84
+ - --title
85
+ - Classifier
86
+ - --main
87
+ - README
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: "0"
95
+ version:
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: "1.2"
101
+ version:
102
+ requirements: []
103
+
104
+ rubyforge_project: classifier
105
+ rubygems_version: 1.2.0
106
+ signing_key:
107
+ specification_version: 2
108
+ summary: A general classifier module to allow Bayesian and other types of classifications.
109
+ test_files:
110
+ - test/base_test.rb
111
+ - test/bayes/bayesian_test.rb
112
+ - test/lsi/lsi_test.rb
113
+ - test/test_helper.rb