classifier 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
data/LICENSE ADDED
@@ -0,0 +1,341 @@
1
+
2
+ GNU GENERAL PUBLIC LICENSE
3
+ Version 2, June 1991
4
+
5
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
6
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
7
+ Everyone is permitted to copy and distribute verbatim copies
8
+ of this license document, but changing it is not allowed.
9
+
10
+ Preamble
11
+
12
+ The licenses for most software are designed to take away your
13
+ freedom to share and change it. By contrast, the GNU General Public
14
+ License is intended to guarantee your freedom to share and change free
15
+ software--to make sure the software is free for all its users. This
16
+ General Public License applies to most of the Free Software
17
+ Foundation's software and to any other program whose authors commit to
18
+ using it. (Some other Free Software Foundation software is covered by
19
+ the GNU Library General Public License instead.) You can apply it to
20
+ your programs, too.
21
+
22
+ When we speak of free software, we are referring to freedom, not
23
+ price. Our General Public Licenses are designed to make sure that you
24
+ have the freedom to distribute copies of free software (and charge for
25
+ this service if you wish), that you receive source code or can get it
26
+ if you want it, that you can change the software or use pieces of it
27
+ in new free programs; and that you know you can do these things.
28
+
29
+ To protect your rights, we need to make restrictions that forbid
30
+ anyone to deny you these rights or to ask you to surrender the rights.
31
+ These restrictions translate to certain responsibilities for you if you
32
+ distribute copies of the software, or if you modify it.
33
+
34
+ For example, if you distribute copies of such a program, whether
35
+ gratis or for a fee, you must give the recipients all the rights that
36
+ you have. You must make sure that they, too, receive or can get the
37
+ source code. And you must show them these terms so they know their
38
+ rights.
39
+
40
+ We protect your rights with two steps: (1) copyright the software, and
41
+ (2) offer you this license which gives you legal permission to copy,
42
+ distribute and/or modify the software.
43
+
44
+ Also, for each author's protection and ours, we want to make certain
45
+ that everyone understands that there is no warranty for this free
46
+ software. If the software is modified by someone else and passed on, we
47
+ want its recipients to know that what they have is not the original, so
48
+ that any problems introduced by others will not reflect on the original
49
+ authors' reputations.
50
+
51
+ Finally, any free program is threatened constantly by software
52
+ patents. We wish to avoid the danger that redistributors of a free
53
+ program will individually obtain patent licenses, in effect making the
54
+ program proprietary. To prevent this, we have made it clear that any
55
+ patent must be licensed for everyone's free use or not licensed at all.
56
+
57
+ The precise terms and conditions for copying, distribution and
58
+ modification follow.
59
+
60
+ GNU GENERAL PUBLIC LICENSE
61
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
62
+
63
+ 0. This License applies to any program or other work which contains
64
+ a notice placed by the copyright holder saying it may be distributed
65
+ under the terms of this General Public License. The "Program", below,
66
+ refers to any such program or work, and a "work based on the Program"
67
+ means either the Program or any derivative work under copyright law:
68
+ that is to say, a work containing the Program or a portion of it,
69
+ either verbatim or with modifications and/or translated into another
70
+ language. (Hereinafter, translation is included without limitation in
71
+ the term "modification".) Each licensee is addressed as "you".
72
+
73
+ Activities other than copying, distribution and modification are not
74
+ covered by this License; they are outside its scope. The act of
75
+ running the Program is not restricted, and the output from the Program
76
+ is covered only if its contents constitute a work based on the
77
+ Program (independent of having been made by running the Program).
78
+ Whether that is true depends on what the Program does.
79
+
80
+ 1. You may copy and distribute verbatim copies of the Program's
81
+ source code as you receive it, in any medium, provided that you
82
+ conspicuously and appropriately publish on each copy an appropriate
83
+ copyright notice and disclaimer of warranty; keep intact all the
84
+ notices that refer to this License and to the absence of any warranty;
85
+ and give any other recipients of the Program a copy of this License
86
+ along with the Program.
87
+
88
+ You may charge a fee for the physical act of transferring a copy, and
89
+ you may at your option offer warranty protection in exchange for a fee.
90
+
91
+ 2. You may modify your copy or copies of the Program or any portion
92
+ of it, thus forming a work based on the Program, and copy and
93
+ distribute such modifications or work under the terms of Section 1
94
+ above, provided that you also meet all of these conditions:
95
+
96
+ a) You must cause the modified files to carry prominent notices
97
+ stating that you changed the files and the date of any change.
98
+
99
+ b) You must cause any work that you distribute or publish, that in
100
+ whole or in part contains or is derived from the Program or any
101
+ part thereof, to be licensed as a whole at no charge to all third
102
+ parties under the terms of this License.
103
+
104
+ c) If the modified program normally reads commands interactively
105
+ when run, you must cause it, when started running for such
106
+ interactive use in the most ordinary way, to print or display an
107
+ announcement including an appropriate copyright notice and a
108
+ notice that there is no warranty (or else, saying that you provide
109
+ a warranty) and that users may redistribute the program under
110
+ these conditions, and telling the user how to view a copy of this
111
+ License. (Exception: if the Program itself is interactive but
112
+ does not normally print such an announcement, your work based on
113
+ the Program is not required to print an announcement.)
114
+
115
+ These requirements apply to the modified work as a whole. If
116
+ identifiable sections of that work are not derived from the Program,
117
+ and can be reasonably considered independent and separate works in
118
+ themselves, then this License, and its terms, do not apply to those
119
+ sections when you distribute them as separate works. But when you
120
+ distribute the same sections as part of a whole which is a work based
121
+ on the Program, the distribution of the whole must be on the terms of
122
+ this License, whose permissions for other licensees extend to the
123
+ entire whole, and thus to each and every part regardless of who wrote it.
124
+
125
+ Thus, it is not the intent of this section to claim rights or contest
126
+ your rights to work written entirely by you; rather, the intent is to
127
+ exercise the right to control the distribution of derivative or
128
+ collective works based on the Program.
129
+
130
+ In addition, mere aggregation of another work not based on the Program
131
+ with the Program (or with a work based on the Program) on a volume of
132
+ a storage or distribution medium does not bring the other work under
133
+ the scope of this License.
134
+
135
+ 3. You may copy and distribute the Program (or a work based on it,
136
+ under Section 2) in object code or executable form under the terms of
137
+ Sections 1 and 2 above provided that you also do one of the following:
138
+
139
+ a) Accompany it with the complete corresponding machine-readable
140
+ source code, which must be distributed under the terms of Sections
141
+ 1 and 2 above on a medium customarily used for software interchange; or,
142
+
143
+ b) Accompany it with a written offer, valid for at least three
144
+ years, to give any third party, for a charge no more than your
145
+ cost of physically performing source distribution, a complete
146
+ machine-readable copy of the corresponding source code, to be
147
+ distributed under the terms of Sections 1 and 2 above on a medium
148
+ customarily used for software interchange; or,
149
+
150
+ c) Accompany it with the information you received as to the offer
151
+ to distribute corresponding source code. (This alternative is
152
+ allowed only for noncommercial distribution and only if you
153
+ received the program in object code or executable form with such
154
+ an offer, in accord with Subsection b above.)
155
+
156
+ The source code for a work means the preferred form of the work for
157
+ making modifications to it. For an executable work, complete source
158
+ code means all the source code for all modules it contains, plus any
159
+ associated interface definition files, plus the scripts used to
160
+ control compilation and installation of the executable. However, as a
161
+ special exception, the source code distributed need not include
162
+ anything that is normally distributed (in either source or binary
163
+ form) with the major components (compiler, kernel, and so on) of the
164
+ operating system on which the executable runs, unless that component
165
+ itself accompanies the executable.
166
+
167
+ If distribution of executable or object code is made by offering
168
+ access to copy from a designated place, then offering equivalent
169
+ access to copy the source code from the same place counts as
170
+ distribution of the source code, even though third parties are not
171
+ compelled to copy the source along with the object code.
172
+
173
+ 4. You may not copy, modify, sublicense, or distribute the Program
174
+ except as expressly provided under this License. Any attempt
175
+ otherwise to copy, modify, sublicense or distribute the Program is
176
+ void, and will automatically terminate your rights under this License.
177
+ However, parties who have received copies, or rights, from you under
178
+ this License will not have their licenses terminated so long as such
179
+ parties remain in full compliance.
180
+
181
+ 5. You are not required to accept this License, since you have not
182
+ signed it. However, nothing else grants you permission to modify or
183
+ distribute the Program or its derivative works. These actions are
184
+ prohibited by law if you do not accept this License. Therefore, by
185
+ modifying or distributing the Program (or any work based on the
186
+ Program), you indicate your acceptance of this License to do so, and
187
+ all its terms and conditions for copying, distributing or modifying
188
+ the Program or works based on it.
189
+
190
+ 6. Each time you redistribute the Program (or any work based on the
191
+ Program), the recipient automatically receives a license from the
192
+ original licensor to copy, distribute or modify the Program subject to
193
+ these terms and conditions. You may not impose any further
194
+ restrictions on the recipients' exercise of the rights granted herein.
195
+ You are not responsible for enforcing compliance by third parties to
196
+ this License.
197
+
198
+ 7. If, as a consequence of a court judgment or allegation of patent
199
+ infringement or for any other reason (not limited to patent issues),
200
+ conditions are imposed on you (whether by court order, agreement or
201
+ otherwise) that contradict the conditions of this License, they do not
202
+ excuse you from the conditions of this License. If you cannot
203
+ distribute so as to satisfy simultaneously your obligations under this
204
+ License and any other pertinent obligations, then as a consequence you
205
+ may not distribute the Program at all. For example, if a patent
206
+ license would not permit royalty-free redistribution of the Program by
207
+ all those who receive copies directly or indirectly through you, then
208
+ the only way you could satisfy both it and this License would be to
209
+ refrain entirely from distribution of the Program.
210
+
211
+ If any portion of this section is held invalid or unenforceable under
212
+ any particular circumstance, the balance of the section is intended to
213
+ apply and the section as a whole is intended to apply in other
214
+ circumstances.
215
+
216
+ It is not the purpose of this section to induce you to infringe any
217
+ patents or other property right claims or to contest validity of any
218
+ such claims; this section has the sole purpose of protecting the
219
+ integrity of the free software distribution system, which is
220
+ implemented by public license practices. Many people have made
221
+ generous contributions to the wide range of software distributed
222
+ through that system in reliance on consistent application of that
223
+ system; it is up to the author/donor to decide if he or she is willing
224
+ to distribute software through any other system and a licensee cannot
225
+ impose that choice.
226
+
227
+ This section is intended to make thoroughly clear what is believed to
228
+ be a consequence of the rest of this License.
229
+
230
+ 8. If the distribution and/or use of the Program is restricted in
231
+ certain countries either by patents or by copyrighted interfaces, the
232
+ original copyright holder who places the Program under this License
233
+ may add an explicit geographical distribution limitation excluding
234
+ those countries, so that distribution is permitted only in or among
235
+ countries not thus excluded. In such case, this License incorporates
236
+ the limitation as if written in the body of this License.
237
+
238
+ 9. The Free Software Foundation may publish revised and/or new versions
239
+ of the General Public License from time to time. Such new versions will
240
+ be similar in spirit to the present version, but may differ in detail to
241
+ address new problems or concerns.
242
+
243
+ Each version is given a distinguishing version number. If the Program
244
+ specifies a version number of this License which applies to it and "any
245
+ later version", you have the option of following the terms and conditions
246
+ either of that version or of any later version published by the Free
247
+ Software Foundation. If the Program does not specify a version number of
248
+ this License, you may choose any version ever published by the Free Software
249
+ Foundation.
250
+
251
+ 10. If you wish to incorporate parts of the Program into other free
252
+ programs whose distribution conditions are different, write to the author
253
+ to ask for permission. For software which is copyrighted by the Free
254
+ Software Foundation, write to the Free Software Foundation; we sometimes
255
+ make exceptions for this. Our decision will be guided by the two goals
256
+ of preserving the free status of all derivatives of our free software and
257
+ of promoting the sharing and reuse of software generally.
258
+
259
+ NO WARRANTY
260
+
261
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
262
+ FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
263
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
264
+ PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
265
+ OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
266
+ MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
267
+ TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
268
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
269
+ REPAIR OR CORRECTION.
270
+
271
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
272
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
273
+ REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
274
+ INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
275
+ OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
276
+ TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
277
+ YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
278
+ PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
279
+ POSSIBILITY OF SUCH DAMAGES.
280
+
281
+ END OF TERMS AND CONDITIONS
282
+
283
+ How to Apply These Terms to Your New Programs
284
+
285
+ If you develop a new program, and you want it to be of the greatest
286
+ possible use to the public, the best way to achieve this is to make it
287
+ free software which everyone can redistribute and change under these terms.
288
+
289
+ To do so, attach the following notices to the program. It is safest
290
+ to attach them to the start of each source file to most effectively
291
+ convey the exclusion of warranty; and each file should have at least
292
+ the "copyright" line and a pointer to where the full notice is found.
293
+
294
+ <one line to give the program's name and a brief idea of what it does.>
295
+ Copyright (C) <year> <name of author>
296
+
297
+ This program is free software; you can redistribute it and/or modify
298
+ it under the terms of the GNU General Public License as published by
299
+ the Free Software Foundation; either version 2 of the License, or
300
+ (at your option) any later version.
301
+
302
+ This program is distributed in the hope that it will be useful,
303
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
304
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
305
+ GNU General Public License for more details.
306
+
307
+ You should have received a copy of the GNU General Public License
308
+ along with this program; if not, write to the Free Software
309
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
310
+
311
+
312
+ Also add information on how to contact you by electronic and paper mail.
313
+
314
+ If the program is interactive, make it output a short notice like this
315
+ when it starts in an interactive mode:
316
+
317
+ Gnomovision version 69, Copyright (C) year name of author
318
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
319
+ This is free software, and you are welcome to redistribute it
320
+ under certain conditions; type `show c' for details.
321
+
322
+ The hypothetical commands `show w' and `show c' should show the appropriate
323
+ parts of the General Public License. Of course, the commands you use may
324
+ be called something other than `show w' and `show c'; they could even be
325
+ mouse-clicks or menu items--whatever suits your program.
326
+
327
+ You should also get your employer (if you work as a programmer) or your
328
+ school, if any, to sign a "copyright disclaimer" for the program, if
329
+ necessary. Here is a sample; alter the names:
330
+
331
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
332
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
333
+
334
+ <signature of Ty Coon>, 1 April 1989
335
+ Ty Coon, President of Vice
336
+
337
+ This General Public License does not permit incorporating your program into
338
+ proprietary programs. If your program is a subroutine library, you may
339
+ consider it more useful to permit linking proprietary applications with the
340
+ library. If this is what you want to do, use the GNU Library General
341
+ Public License instead of this License.
data/README CHANGED
@@ -2,7 +2,25 @@
2
2
 
3
3
  Classifier is a general module to allow Bayesian and other types of classifications.
4
4
 
5
- == Usage
5
+ == Download
6
+
7
+ * http://rubyforge.org/projects/classifier
8
+ * gem install classifier
9
+ * svn co http://rufy.com/svn/classifier/trunk
10
+
11
+ == Dependencies
12
+ If you install Classifier from source, you'll need to install Martin Porter's stemmer algorithm with RubyGems as follows:
13
+ gem install stemmer
14
+
15
+ For LSI, you will need the following libraries:
16
+ GNU GSL:: http://www.gnu.org/software/gsl
17
+ rb-gsl:: http://rb-gsl.rubyforge.org
18
+
19
+ == Bayes
20
+ A Bayesian classifier by Lucas Carlson. Bayesian Classifiers are accurate, fast,
21
+ and have modest memory requirements.
22
+
23
+ === Usage
6
24
  require 'classifier'
7
25
  b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
8
26
  b.train_interesting "here are some good words. I hope you love them"
@@ -20,14 +38,49 @@ Classifier is a general module to allow Bayesian and other types of classificati
20
38
 
21
39
  Using Madeleine, your application can persist the learned data over time.
22
40
 
23
- == Bayesian Classification
41
+ === Bayesian Classification
24
42
 
25
43
  * http://www.process.com/precisemail/bayesian_filtering.htm
26
44
  * http://en.wikipedia.org/wiki/Bayesian_filtering
27
45
  * http://www.paulgraham.com/spam.html
28
46
 
29
- == About
47
+ == LSI
48
+ An experimental Latent Semantic Indexer by David Fayram. Latent Semantic Indexing engines
49
+ are not as fast or as small as Bayesian classifiers, but are more flexible, providing
50
+ fast search and clustering detection as well as semantic analysis of the text that
51
+ theoretically simulates human learning.
52
+
53
+ === Usage
54
+ require 'classifier'
55
+ lsi = Classifier::LSI.new
56
+ strings = [ ["This text deals with dogs. Dogs.", :dog],
57
+ ["This text involves dogs too. Dogs! ", :dog],
58
+ ["This text revolves around cats. Cats.", :cat],
59
+ ["This text also involves cats. Cats!", :cat],
60
+ ["This text involves birds. Birds.",:bird ]]
61
+ strings.each {|x| lsi.add_item x.first, x.last}
62
+
63
+ lsi.search("dog", 3)
64
+ # returns => ["This text deals with dogs. Dogs.", "This text involves dogs too. Dogs! ",
65
+ # "This text also involves cats. Cats!"]
66
+
67
+ lsi.find_related(strings[2], 2)
68
+ # returns => ["This text revolves around cats. Cats.", "This text also involves cats. Cats!"]
69
+
70
+ lsi.classify "This text is also about dogs!"
71
+ # returns => :dog
72
+
73
+ Please see the Classifier::LSI documentation for more information. It is possible to index, search and classify
74
+ with more than just simple strings.
75
+
76
+ === Latent Semantic Indexing
77
+ * http://www.c2.com/cgi/wiki?LatentSemanticIndexing
78
+ * http://www.chadfowler.com/index.cgi/Computing/LatentSemanticIndexing.rdoc
79
+ * http://en.wikipedia.org/wiki/Latent_semantic_analysis
80
+
81
+ == Authors
82
+ * Lucas Carlson (mailto:lucas@rufy.com)
83
+ * David Fayram II (mailto:dfayram@lensmen.net)
84
+
85
+ This library is released under the terms of the GNU GPL. See LICENSE for more details.
30
86
 
31
- Author:: Lucas Carlson (mailto:lucas@rufy.com)
32
- Copyright:: Copyright (c) 2005 Lucas Carlson
33
- License:: LGPL
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ require 'rake/rdoctask'
5
5
  require 'rake/gempackagetask'
6
6
  require 'rake/contrib/rubyforgepublisher'
7
7
 
8
- PKG_VERSION = "1.1.1"
8
+ PKG_VERSION = "1.2.0"
9
9
 
10
10
  PKG_FILES = FileList[
11
11
  "lib/**/*", "bin/*", "test/**/*", "[A-Z]*", "Rakefile", "doc/**/*"
@@ -22,6 +22,12 @@ Rake::TestTask.new("test") { |t|
22
22
  t.verbose = true
23
23
  }
24
24
 
25
+ # Make a console, useful when working on tests
26
+ desc "Generate a test console"
27
+ task :console do
28
+ verbose( false ) { sh "irb -I lib/ -r 'classifier'" }
29
+ end
30
+
25
31
  # Genereate the RDoc documentation
26
32
  desc "Create documentation"
27
33
  Rake::RDocTask.new("doc") { |rdoc|
@@ -57,11 +63,16 @@ spec = Gem::Specification.new do |s|
57
63
 
58
64
  s.has_rdoc = true
59
65
 
66
+ #### Dependencies and requirements.
67
+
68
+ s.add_dependency('stemmer', '>= 1.0.0')
69
+ s.requirements << "A porter-stemmer module to split word stems."
70
+
60
71
  #### Author and project details.
61
72
 
62
73
  s.author = "Lucas Carlson"
63
74
  s.email = "lucas@rufy.com"
64
- s.homepage = "http://rubyforge.org/projects/classifier/"
75
+ s.homepage = "http://classifier.rufy.com/"
65
76
  end
66
77
 
67
78
  Rake::GemPackageTask.new(spec) do |pkg|
@@ -69,7 +80,8 @@ Rake::GemPackageTask.new(spec) do |pkg|
69
80
  pkg.need_tar = true
70
81
  end
71
82
 
72
- desc "Publish to RubyForge"
73
- task :rubyforge do
83
+ desc "Publish new documentation"
84
+ task :publish do
85
+ `ssh rufy update-classifier-doc`
74
86
  Rake::RubyForgePublisher.new('classifier', 'cardmagic').upload
75
87
  end