yury-classifier 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,319 @@
1
+ # Author:: David Fayram (mailto:dfayram@lensmen.net)
2
+ # Copyright:: Copyright (c) 2005 David Fayram II
3
+ # License:: LGPL
4
+
5
+ begin
6
+ raise LoadError if ENV['NATIVE_VECTOR'] == "true" # to test the native vector class, try `rake test NATIVE_VECTOR=true`
7
+
8
+ require 'gsl' # requires http://rb-gsl.rubyforge.org/
9
+ require 'classifier/extensions/vector_serialize'
10
+ $GSL = true
11
+
12
+ rescue LoadError
13
+ warn "Notice: for 10x faster LSI support, please install http://rb-gsl.rubyforge.org/"
14
+ require 'classifier/extensions/vector'
15
+ end
16
+
17
+ require 'classifier/lsi/word_list'
18
+ require 'classifier/lsi/content_node'
19
+ require 'classifier/lsi/summary'
20
+
21
+ module Classifier
22
+
23
+ # This class implements a Latent Semantic Indexer, which can search, classify and cluster
24
+ # data based on underlying semantic relations. For more information on the algorithms used,
25
+ # please consult Wikipedia[http://en.wikipedia.org/wiki/Latent_Semantic_Indexing].
26
+ class LSI < Classifier::Base
27
+
28
+ attr_reader :word_list
29
+ attr_accessor :auto_rebuild
30
+
31
+ # Create a fresh index.
32
+ # If you want to call #build_index manually, use
33
+ # Classifier::LSI.new :auto_rebuild => false
34
+ #
35
+ def initialize(options = {})
36
+ @auto_rebuild = true unless options[:auto_rebuild] == false
37
+ @word_list, @items = WordList.new, {}
38
+ @version, @built_at_version = 0, -1
39
+ super
40
+ end
41
+
42
+ # Returns true if the index needs to be rebuilt. The index needs
43
+ # to be built after all informaton is added, but before you start
44
+ # using it for search, classification and cluster detection.
45
+ def needs_rebuild?
46
+ (@items.keys.size > 1) && (@version != @built_at_version)
47
+ end
48
+
49
+ # Adds an item to the index. item is assumed to be a string, but
50
+ # any item may be indexed so long as it responds to #to_s or if
51
+ # you provide an optional block explaining how the indexer can
52
+ # fetch fresh string data. This optional block is passed the item,
53
+ # so the item may only be a reference to a URL or file name.
54
+ #
55
+ # For example:
56
+ # lsi = Classifier::LSI.new
57
+ # lsi.add_item "This is just plain text"
58
+ # lsi.add_item "/home/me/filename.txt" { |x| File.read x }
59
+ # ar = ActiveRecordObject.find( :all )
60
+ # lsi.add_item ar, *ar.categories { |x| ar.content }
61
+ #
62
+ def add_item( item, *categories, &block )
63
+ clean_word_hash = block ? clean_word_hash(block.call(item)) : clean_word_hash(item.to_s)
64
+ @items[item] = ContentNode.new(clean_word_hash, *categories)
65
+ @version += 1
66
+ build_index if @auto_rebuild
67
+ end
68
+
69
+ # A less flexible shorthand for add_item that assumes
70
+ # you are passing in a string with no categorries. item
71
+ # will be duck typed via to_s .
72
+ #
73
+ def <<( item )
74
+ add_item item
75
+ end
76
+
77
+ # Returns the categories for a given indexed items. You are free to add and remove
78
+ # items from this as you see fit. It does not invalide an index to change its categories.
79
+ def categories_for(item)
80
+ return [] unless @items[item]
81
+ return @items[item].categories
82
+ end
83
+
84
+ # Removes an item from the database, if it is indexed.
85
+ #
86
+ def remove_item( item )
87
+ if @items.keys.contain? item
88
+ @items.remove item
89
+ @version += 1
90
+ end
91
+ end
92
+
93
+ # Returns an array of items that are indexed.
94
+ def items
95
+ @items.keys
96
+ end
97
+
98
+ # Returns the categories for a given indexed items. You are free to add and remove
99
+ # items from this as you see fit. It does not invalide an index to change its categories.
100
+ def categories_for(item)
101
+ return [] unless @items[item]
102
+ return @items[item].categories
103
+ end
104
+
105
+ # This function rebuilds the index if needs_rebuild? returns true.
106
+ # For very large document spaces, this indexing operation may take some
107
+ # time to complete, so it may be wise to place the operation in another
108
+ # thread.
109
+ #
110
+ # As a rule, indexing will be fairly swift on modern machines until
111
+ # you have well over 500 documents indexed, or have an incredibly diverse
112
+ # vocabulary for your documents.
113
+ #
114
+ # The optional parameter "cutoff" is a tuning parameter. When the index is
115
+ # built, a certain number of s-values are discarded from the system. The
116
+ # cutoff parameter tells the indexer how many of these values to keep.
117
+ # A value of 1 for cutoff means that no semantic analysis will take place,
118
+ # turning the LSI class into a simple vector search engine.
119
+ def build_index( cutoff=0.75 )
120
+ return unless needs_rebuild?
121
+ make_word_list
122
+
123
+ doc_list = @items.values
124
+ tda = doc_list.collect { |node| node.raw_vector_with( @word_list ) }
125
+
126
+ if $GSL
127
+ tdm = GSL::Matrix.alloc(*tda).trans
128
+ ntdm = build_reduced_matrix(tdm, cutoff)
129
+
130
+ ntdm.size[1].times do |col|
131
+ vec = GSL::Vector.alloc( ntdm.column(col) ).row
132
+ doc_list[col].lsi_vector = vec
133
+ doc_list[col].lsi_norm = vec.normalize
134
+ end
135
+ else
136
+ tdm = Matrix.rows(tda).trans
137
+ ntdm = build_reduced_matrix(tdm, cutoff)
138
+
139
+ ntdm.row_size.times do |col|
140
+ doc_list[col].lsi_vector = ntdm.column(col) if doc_list[col]
141
+ doc_list[col].lsi_norm = ntdm.column(col).normalize if doc_list[col]
142
+ end
143
+ end
144
+
145
+ @built_at_version = @version
146
+ end
147
+
148
+ # This method returns max_chunks entries, ordered by their average semantic rating.
149
+ # Essentially, the average distance of each entry from all other entries is calculated,
150
+ # the highest are returned.
151
+ #
152
+ # This can be used to build a summary service, or to provide more information about
153
+ # your dataset's general content. For example, if you were to use categorize on the
154
+ # results of this data, you could gather information on what your dataset is generally
155
+ # about.
156
+ def highest_relative_content( max_chunks=10 )
157
+ return [] if needs_rebuild?
158
+
159
+ avg_density = Hash.new
160
+ @items.each_key { |x| avg_density[x] = proximity_array_for_content(x).inject(0.0) { |x,y| x + y[1]} }
161
+
162
+ avg_density.keys.sort_by { |x| avg_density[x] }.reverse[0..max_chunks-1].map
163
+ end
164
+
165
+ # This function is the primitive that find_related and classify
166
+ # build upon. It returns an array of 2-element arrays. The first element
167
+ # of this array is a document, and the second is its "score", defining
168
+ # how "close" it is to other indexed items.
169
+ #
170
+ # These values are somewhat arbitrary, having to do with the vector space
171
+ # created by your content, so the magnitude is interpretable but not always
172
+ # meaningful between indexes.
173
+ #
174
+ # The parameter doc is the content to compare. If that content is not
175
+ # indexed, you can pass an optional block to define how to create the
176
+ # text data. See add_item for examples of how this works.
177
+ def proximity_array_for_content( doc, &block )
178
+ return [] if needs_rebuild?
179
+
180
+ content_node = node_for_content( doc, &block )
181
+ result =
182
+ @items.keys.collect do |item|
183
+ if $GSL
184
+ val = content_node.search_vector * @items[item].search_vector.col
185
+ else
186
+ val = (Matrix[content_node.search_vector] * @items[item].search_vector)[0]
187
+ end
188
+ [item, val]
189
+ end
190
+ result.sort_by { |x| x[1] }.reverse
191
+ end
192
+
193
+ # Similar to proximity_array_for_content, this function takes similar
194
+ # arguments and returns a similar array. However, it uses the normalized
195
+ # calculated vectors instead of their full versions. This is useful when
196
+ # you're trying to perform operations on content that is much smaller than
197
+ # the text you're working with. search uses this primitive.
198
+ def proximity_norms_for_content( doc, &block )
199
+ return [] if needs_rebuild?
200
+
201
+ content_node = node_for_content( doc, &block )
202
+ result =
203
+ @items.keys.collect do |item|
204
+ if $GSL
205
+ val = content_node.search_norm * @items[item].search_norm.col
206
+ else
207
+ val = (Matrix[content_node.search_norm] * @items[item].search_norm)[0]
208
+ end
209
+ [item, val]
210
+ end
211
+ result.sort_by { |x| x[1] }.reverse
212
+ end
213
+
214
+ # This function allows for text-based search of your index. Unlike other functions
215
+ # like find_related and classify, search only takes short strings. It will also ignore
216
+ # factors like repeated words. It is best for short, google-like search terms.
217
+ # A search will first priortize lexical relationships, then semantic ones.
218
+ #
219
+ # While this may seem backwards compared to the other functions that LSI supports,
220
+ # it is actually the same algorithm, just applied on a smaller document.
221
+ def search( string, max_nearest=3 )
222
+ return [] if needs_rebuild?
223
+ carry = proximity_norms_for_content( string )
224
+ result = carry.collect { |x| x[0] }
225
+ return result[0..max_nearest-1]
226
+ end
227
+
228
+ # This function takes content and finds other documents
229
+ # that are semantically "close", returning an array of documents sorted
230
+ # from most to least relavant.
231
+ # max_nearest specifies the number of documents to return. A value of
232
+ # 0 means that it returns all the indexed documents, sorted by relavence.
233
+ #
234
+ # This is particularly useful for identifing clusters in your document space.
235
+ # For example you may want to identify several "What's Related" items for weblog
236
+ # articles, or find paragraphs that relate to each other in an essay.
237
+ def find_related( doc, max_nearest=3, &block )
238
+ carry =
239
+ proximity_array_for_content( doc, &block ).reject { |pair| pair[0] == doc }
240
+ result = carry.collect { |x| x[0] }
241
+ return result[0..max_nearest-1]
242
+ end
243
+
244
+ # This function uses a voting system to categorize documents, based on
245
+ # the categories of other documents. It uses the same logic as the
246
+ # find_related function to find related documents, then returns the
247
+ # most obvious category from this list.
248
+ #
249
+ # cutoff signifies the number of documents to consider when clasifying
250
+ # text. A cutoff of 1 means that every document in the index votes on
251
+ # what category the document is in. This may not always make sense.
252
+ #
253
+ def classify( doc, cutoff=0.30, &block )
254
+ icutoff = (@items.size * cutoff).round
255
+ carry = proximity_array_for_content( doc, &block )
256
+ carry = carry[0..icutoff-1]
257
+ votes = {}
258
+ carry.each do |pair|
259
+ categories = @items[pair[0]].categories
260
+ categories.each do |category|
261
+ votes[category] ||= 0.0
262
+ votes[category] += pair[1]
263
+ end
264
+ end
265
+
266
+ ranking = votes.keys.sort_by { |x| votes[x] }
267
+ return ranking[-1]
268
+ end
269
+
270
+ # Prototype, only works on indexed documents.
271
+ # I have no clue if this is going to work, but in theory
272
+ # it's supposed to.
273
+ def highest_ranked_stems( doc, count=3 )
274
+ raise "Requested stem ranking on non-indexed content!" unless @items[doc]
275
+ arr = node_for_content(doc).lsi_vector.to_a
276
+ top_n = arr.sort.reverse[0..count-1]
277
+ return top_n.collect { |x| @word_list.word_for_index(arr.index(x))}
278
+ end
279
+
280
+ private
281
+ def build_reduced_matrix( matrix, cutoff=0.75 )
282
+ # TODO: Check that M>=N on these dimensions! Transpose helps assure this
283
+ u, v, s = matrix.SV_decomp
284
+
285
+ # TODO: Better than 75% term, please. :\
286
+ s_cutoff = s.sort.reverse[(s.size * cutoff).round - 1]
287
+ s.size.times do |ord|
288
+ s[ord] = 0.0 if s[ord] < s_cutoff
289
+ end
290
+ # Reconstruct the term document matrix, only with reduced rank
291
+ u * Matrix.diag( s ) * v.trans
292
+ end
293
+
294
+ def node_for_content(item, &block)
295
+ if @items[item]
296
+ return @items[item]
297
+ else
298
+ clean_word_hash = block ? clean_word_hash(block.call(item)) : clean_word_hash(item.to_s)
299
+
300
+ cn = ContentNode.new(clean_word_hash, &block) # make the node and extract the data
301
+
302
+ unless needs_rebuild?
303
+ cn.raw_vector_with( @word_list ) # make the lsi raw and norm vectors
304
+ end
305
+ end
306
+
307
+ return cn
308
+ end
309
+
310
+ def make_word_list
311
+ @word_list = WordList.new
312
+ @items.each_value do |node|
313
+ node.word_hash.each_key { |key| @word_list.add_word key }
314
+ end
315
+ end
316
+
317
+ end
318
+ end
319
+
data/lib/classifier.rb ADDED
@@ -0,0 +1,32 @@
1
+ #--
2
+ # Copyright (c) 2005 Lucas Carlson
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
24
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
25
+ # License:: LGPL
26
+
27
+ require 'rubygems'
28
+ require 'activesupport'
29
+ require 'lingua/stemmer'
30
+ require 'classifier/base'
31
+ require 'classifier/bayes'
32
+ require 'classifier/lsi'
data/lib/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'classifier'
data/test/base_test.rb ADDED
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ class HelpersTest < Test::Unit::TestCase
3
+
4
+ def test_word_hash
5
+ c = Classifier::Base.new
6
+ hash = {:good=>1, :"!"=>1, :hope=>1, :"'"=>1, :"."=>1, :love=>1, :word=>1, :them=>1, :test=>1}
7
+ assert_equal hash, c.word_hash("here are some good words of test's. I hope you love them!")
8
+ end
9
+
10
+
11
+ def test_clean_word_hash
12
+ c = Classifier::Base.new
13
+ hash = {:good=>1, :word=>1, :hope=>1, :love=>1, :them=>1, :test=>1}
14
+ assert_equal hash, c.clean_word_hash("here are some good words of test's. I hope you love them!")
15
+ end
16
+
17
+ end
@@ -0,0 +1,40 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class BayesianTest < Test::Unit::TestCase
3
+ def setup
4
+ @classifier = Classifier::Bayes.new :categories => ['Interesting', 'Uninteresting']
5
+ end
6
+
7
+ def test_good_training
8
+ assert_nothing_raised { @classifier.train_interesting "love" }
9
+ end
10
+
11
+ def test_bad_training
12
+ assert_raise(StandardError) { @classifier.train_no_category "words" }
13
+ end
14
+
15
+ def test_bad_method
16
+ assert_raise(NoMethodError) { @classifier.forget_everything_you_know "" }
17
+ end
18
+
19
+ def test_categories
20
+ assert_equal ['Interesting', 'Uninteresting'].sort, @classifier.categories.sort
21
+ end
22
+
23
+ def test_add_category
24
+ @classifier.add_category 'Test'
25
+ assert_equal ['Test', 'Interesting', 'Uninteresting'].sort, @classifier.categories.sort
26
+ end
27
+
28
+ def test_classification
29
+ @classifier.train_interesting "here are some good words. I hope you love them"
30
+ @classifier.train_uninteresting "here are some bad words, I hate you"
31
+ assert_equal 'Uninteresting', @classifier.classify("I hate bad words and you")
32
+ end
33
+
34
+ def test_ru_classification
35
+ c = Classifier::Bayes.new :categories => ['Interesting', 'Uninteresting'], :language => "ru"
36
+ c.train_interesting "вот несколько хороших слов. Я надеюсь вам они понравились"
37
+ c.train_uninteresting "вот несколько плохих слов. Я тебя ненавижу"
38
+ assert_equal 'Uninteresting', c.classify("Я ненавижу плохие слова и тебя")
39
+ end
40
+ end
@@ -0,0 +1,123 @@
1
+ require File.dirname(__FILE__) + '/../test_helper'
2
+ class LSITest < Test::Unit::TestCase
3
+ def setup
4
+ # we repeat principle words to help weight them.
5
+ # This test is rather delicate, since this system is mostly noise.
6
+ @str1 = "This text deals with dogs. Dogs."
7
+ @str2 = "This text involves dogs too. Dogs! "
8
+ @str3 = "This text revolves around cats. Cats."
9
+ @str4 = "This text also involves cats. Cats!"
10
+ @str5 = "This text involves birds. Birds."
11
+ end
12
+
13
+ def test_basic_indexing
14
+ lsi = Classifier::LSI.new
15
+ [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
16
+ assert ! lsi.needs_rebuild?
17
+
18
+ # note that the closest match to str1 is str2, even though it is not
19
+ # the closest text match.
20
+ assert_equal [@str2, @str5, @str3], lsi.find_related(@str1, 3)
21
+ end
22
+
23
+ def test_not_auto_rebuild
24
+ lsi = Classifier::LSI.new :auto_rebuild => false
25
+ lsi.add_item @str1, "Dog"
26
+ lsi.add_item @str2, "Dog"
27
+ assert lsi.needs_rebuild?
28
+ lsi.build_index
29
+ assert ! lsi.needs_rebuild?
30
+ end
31
+
32
+ def test_basic_categorizing
33
+ lsi = Classifier::LSI.new
34
+ lsi.add_item @str2, "Dog"
35
+ lsi.add_item @str3, "Cat"
36
+ lsi.add_item @str4, "Cat"
37
+ lsi.add_item @str5, "Bird"
38
+
39
+ assert_equal "Dog", lsi.classify( @str1 )
40
+ assert_equal "Cat", lsi.classify( @str3 )
41
+ assert_equal "Bird", lsi.classify( @str5 )
42
+ end
43
+
44
+ def test_external_classifying
45
+ lsi = Classifier::LSI.new
46
+ bayes = Classifier::Bayes.new :categories => ['Dog', 'Cat', 'Bird']
47
+ lsi.add_item @str1, "Dog" ; bayes.train_dog @str1
48
+ lsi.add_item @str2, "Dog" ; bayes.train_dog @str2
49
+ lsi.add_item @str3, "Cat" ; bayes.train_cat @str3
50
+ lsi.add_item @str4, "Cat" ; bayes.train_cat @str4
51
+ lsi.add_item @str5, "Bird" ; bayes.train_bird @str5
52
+
53
+ # We're talking about dogs. Even though the text matches the corpus on
54
+ # cats better. Dogs have more semantic weight than cats. So bayes
55
+ # will fail here, but the LSI recognizes content.
56
+ tricky_case = "This text revolves around dogs."
57
+ assert_equal "Dog", lsi.classify( tricky_case )
58
+ assert_not_equal "Dog", bayes.classify( tricky_case )
59
+ end
60
+
61
+ def test_recategorize_interface
62
+ lsi = Classifier::LSI.new
63
+ lsi.add_item @str1, "Dog"
64
+ lsi.add_item @str2, "Dog"
65
+ lsi.add_item @str3, "Cat"
66
+ lsi.add_item @str4, "Cat"
67
+ lsi.add_item @str5, "Bird"
68
+
69
+ tricky_case = "This text revolves around dogs."
70
+ assert_equal "Dog", lsi.classify( tricky_case )
71
+
72
+ # Recategorize as needed.
73
+ lsi.categories_for(@str1).clear.push "Cow"
74
+ lsi.categories_for(@str2).clear.push "Cow"
75
+
76
+ assert !lsi.needs_rebuild?
77
+ assert_equal "Cow", lsi.classify( tricky_case )
78
+ end
79
+
80
+ def test_search
81
+ lsi = Classifier::LSI.new
82
+ [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
83
+
84
+ # Searching by content and text, note that @str2 comes up first, because
85
+ # both "dog" and "involve" are present. But, the next match is @str1 instead
86
+ # of @str4, because "dog" carries more weight than involves.
87
+ assert_equal( [@str2, @str1, @str4, @str5, @str3],
88
+ lsi.search("dog involves", 100) )
89
+
90
+ # Keyword search shows how the space is mapped out in relation to
91
+ # dog when magnitude is remove. Note the relations. We move from dog
92
+ # through involve and then finally to other words.
93
+ assert_equal( [@str1, @str2, @str4, @str5, @str3],
94
+ lsi.search("dog", 5) )
95
+ end
96
+
97
+ def test_serialize_safe
98
+ lsi = Classifier::LSI.new
99
+ [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x }
100
+
101
+ lsi_md = Marshal.dump lsi
102
+ lsi_m = Marshal.load lsi_md
103
+
104
+ assert_equal lsi_m.search("cat", 3), lsi.search("cat", 3)
105
+ assert_equal lsi_m.find_related(@str1, 3), lsi.find_related(@str1, 3)
106
+ end
107
+
108
+ def test_keyword_search
109
+ lsi = Classifier::LSI.new
110
+ lsi.add_item @str1, "Dog"
111
+ lsi.add_item @str2, "Dog"
112
+ lsi.add_item @str3, "Cat"
113
+ lsi.add_item @str4, "Cat"
114
+ lsi.add_item @str5, "Bird"
115
+
116
+ assert_equal [:dog, :text, :deal], lsi.highest_ranked_stems(@str1)
117
+ end
118
+
119
+ def test_summary
120
+ assert_equal "This text involves dogs too [...] This text also involves cats", [@str1, @str2, @str3, @str4, @str5].join.summary(2)
121
+ end
122
+
123
+ end
@@ -0,0 +1,4 @@
1
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
2
+
3
+ require 'test/unit'
4
+ require 'classifier'
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yury-classifier
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.3.1
5
+ platform: ruby
6
+ authors:
7
+ - Yury Korolev
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-24 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: activesupport
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ - - "="
24
+ - !ruby/object:Gem::Version
25
+ version: 2.2.2
26
+ version:
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-stemmer
29
+ version_requirement:
30
+ version_requirements: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ - - "="
36
+ - !ruby/object:Gem::Version
37
+ version: 0.5.1
38
+ version:
39
+ description: A general classifier module to allow Bayesian and other types of classifications.
40
+ email: yury.korolev@gmail.com
41
+ executables: []
42
+
43
+ extensions: []
44
+
45
+ extra_rdoc_files:
46
+ - lib/classifier/base.rb
47
+ - lib/classifier/bayes.rb
48
+ - lib/classifier/extensions/vector.rb
49
+ - lib/classifier/extensions/vector_serialize.rb
50
+ - lib/classifier/lsi/content_node.rb
51
+ - lib/classifier/lsi/summary.rb
52
+ - lib/classifier/lsi/word_list.rb
53
+ - lib/classifier/lsi.rb
54
+ - lib/classifier.rb
55
+ - lib/init.rb
56
+ - LICENSE
57
+ - README
58
+ files:
59
+ - lib/classifier/base.rb
60
+ - lib/classifier/bayes.rb
61
+ - lib/classifier/extensions/vector.rb
62
+ - lib/classifier/extensions/vector_serialize.rb
63
+ - lib/classifier/lsi/content_node.rb
64
+ - lib/classifier/lsi/summary.rb
65
+ - lib/classifier/lsi/word_list.rb
66
+ - lib/classifier/lsi.rb
67
+ - lib/classifier.rb
68
+ - lib/init.rb
69
+ - LICENSE
70
+ - Rakefile
71
+ - README
72
+ - test/base_test.rb
73
+ - test/bayes/bayesian_test.rb
74
+ - test/lsi/lsi_test.rb
75
+ - test/test_helper.rb
76
+ - Manifest
77
+ - classifier.gemspec
78
+ has_rdoc: true
79
+ homepage: http://github.com/yury/classifier
80
+ post_install_message:
81
+ rdoc_options:
82
+ - --line-numbers
83
+ - --inline-source
84
+ - --title
85
+ - Classifier
86
+ - --main
87
+ - README
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: "0"
95
+ version:
96
+ required_rubygems_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: "1.2"
101
+ version:
102
+ requirements: []
103
+
104
+ rubyforge_project: classifier
105
+ rubygems_version: 1.2.0
106
+ signing_key:
107
+ specification_version: 2
108
+ summary: A general classifier module to allow Bayesian and other types of classifications.
109
+ test_files:
110
+ - test/base_test.rb
111
+ - test/bayes/bayesian_test.rb
112
+ - test/lsi/lsi_test.rb
113
+ - test/test_helper.rb