ruby_odeum 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. data/COPYING +504 -0
  2. data/LICENSE +504 -0
  3. data/README +50 -0
  4. data/bin/odeum_mgr +106 -0
  5. data/doc/rdoc/classes/Odeum.html +235 -0
  6. data/doc/rdoc/classes/Odeum.src/M000010.html +25 -0
  7. data/doc/rdoc/classes/Odeum.src/M000011.html +22 -0
  8. data/doc/rdoc/classes/Odeum.src/M000012.html +27 -0
  9. data/doc/rdoc/classes/Odeum.src/M000013.html +27 -0
  10. data/doc/rdoc/classes/Odeum.src/M000014.html +28 -0
  11. data/doc/rdoc/classes/Odeum/Document.html +382 -0
  12. data/doc/rdoc/classes/Odeum/Document.src/M000040.html +25 -0
  13. data/doc/rdoc/classes/Odeum/Document.src/M000041.html +22 -0
  14. data/doc/rdoc/classes/Odeum/Document.src/M000042.html +23 -0
  15. data/doc/rdoc/classes/Odeum/Document.src/M000043.html +23 -0
  16. data/doc/rdoc/classes/Odeum/Document.src/M000044.html +24 -0
  17. data/doc/rdoc/classes/Odeum/Document.src/M000045.html +32 -0
  18. data/doc/rdoc/classes/Odeum/Document.src/M000046.html +22 -0
  19. data/doc/rdoc/classes/Odeum/Document.src/M000047.html +22 -0
  20. data/doc/rdoc/classes/Odeum/Document.src/M000048.html +22 -0
  21. data/doc/rdoc/classes/Odeum/Document.src/M000049.html +22 -0
  22. data/doc/rdoc/classes/Odeum/Document.src/M000050.html +24 -0
  23. data/doc/rdoc/classes/Odeum/Document.src/M000051.html +27 -0
  24. data/doc/rdoc/classes/Odeum/Index.html +662 -0
  25. data/doc/rdoc/classes/Odeum/Index.src/M000015.html +46 -0
  26. data/doc/rdoc/classes/Odeum/Index.src/M000016.html +33 -0
  27. data/doc/rdoc/classes/Odeum/Index.src/M000017.html +35 -0
  28. data/doc/rdoc/classes/Odeum/Index.src/M000018.html +23 -0
  29. data/doc/rdoc/classes/Odeum/Index.src/M000019.html +22 -0
  30. data/doc/rdoc/classes/Odeum/Index.src/M000020.html +22 -0
  31. data/doc/rdoc/classes/Odeum/Index.src/M000021.html +22 -0
  32. data/doc/rdoc/classes/Odeum/Index.src/M000022.html +22 -0
  33. data/doc/rdoc/classes/Odeum/Index.src/M000023.html +22 -0
  34. data/doc/rdoc/classes/Odeum/Index.src/M000024.html +29 -0
  35. data/doc/rdoc/classes/Odeum/Index.src/M000025.html +23 -0
  36. data/doc/rdoc/classes/Odeum/Index.src/M000026.html +24 -0
  37. data/doc/rdoc/classes/Odeum/Index.src/M000027.html +23 -0
  38. data/doc/rdoc/classes/Odeum/Index.src/M000028.html +26 -0
  39. data/doc/rdoc/classes/Odeum/Index.src/M000029.html +24 -0
  40. data/doc/rdoc/classes/Odeum/Index.src/M000030.html +20 -0
  41. data/doc/rdoc/classes/Odeum/Index.src/M000031.html +22 -0
  42. data/doc/rdoc/classes/Odeum/Index.src/M000032.html +22 -0
  43. data/doc/rdoc/classes/Odeum/Index.src/M000033.html +22 -0
  44. data/doc/rdoc/classes/Odeum/Index.src/M000034.html +22 -0
  45. data/doc/rdoc/classes/Odeum/Index.src/M000035.html +20 -0
  46. data/doc/rdoc/classes/Odeum/Index.src/M000036.html +20 -0
  47. data/doc/rdoc/classes/Odeum/Index.src/M000037.html +22 -0
  48. data/doc/rdoc/classes/Odeum/Index.src/M000038.html +22 -0
  49. data/doc/rdoc/classes/Odeum/Index.src/M000039.html +22 -0
  50. data/doc/rdoc/classes/OdeumTest.html +257 -0
  51. data/doc/rdoc/classes/OdeumTest.src/M000001.html +18 -0
  52. data/doc/rdoc/classes/OdeumTest.src/M000002.html +19 -0
  53. data/doc/rdoc/classes/OdeumTest.src/M000003.html +27 -0
  54. data/doc/rdoc/classes/OdeumTest.src/M000004.html +25 -0
  55. data/doc/rdoc/classes/OdeumTest.src/M000005.html +44 -0
  56. data/doc/rdoc/classes/OdeumTest.src/M000006.html +20 -0
  57. data/doc/rdoc/classes/OdeumTest.src/M000007.html +39 -0
  58. data/doc/rdoc/classes/OdeumTest.src/M000008.html +59 -0
  59. data/doc/rdoc/classes/OdeumTest.src/M000009.html +41 -0
  60. data/doc/rdoc/created.rid +1 -0
  61. data/doc/rdoc/files/COPYING.html +756 -0
  62. data/doc/rdoc/files/LICENSE.html +756 -0
  63. data/doc/rdoc/files/README.html +175 -0
  64. data/doc/rdoc/files/ext/odeum_index/odeum_index_c.html +101 -0
  65. data/doc/rdoc/files/test/test_odeum_rb.html +109 -0
  66. data/doc/rdoc/fr_class_index.html +30 -0
  67. data/doc/rdoc/fr_file_index.html +31 -0
  68. data/doc/rdoc/fr_method_index.html +77 -0
  69. data/doc/rdoc/index.html +24 -0
  70. data/doc/rdoc/rdoc-style.css +208 -0
  71. data/ext/odeum_index/cabin.c +2735 -0
  72. data/ext/odeum_index/cabin.h +1040 -0
  73. data/ext/odeum_index/curia.c +1114 -0
  74. data/ext/odeum_index/curia.h +430 -0
  75. data/ext/odeum_index/depot.c +1910 -0
  76. data/ext/odeum_index/depot.h +439 -0
  77. data/ext/odeum_index/extconf.rb +10 -0
  78. data/ext/odeum_index/myconf.c +668 -0
  79. data/ext/odeum_index/myconf.h +523 -0
  80. data/ext/odeum_index/odeum.c +1743 -0
  81. data/ext/odeum_index/odeum.h +541 -0
  82. data/ext/odeum_index/odeum_index.c +991 -0
  83. data/ext/odeum_index/villa.c +1923 -0
  84. data/ext/odeum_index/villa.h +470 -0
  85. data/ext/odeum_index/vista.c +159 -0
  86. data/ext/odeum_index/vista.h +111 -0
  87. data/test/test_odeum.rb +174 -0
  88. metadata +138 -0
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>normalizeword (Odeum)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre>/**
14
+ * call-seq:
15
+ * Odeum::normalizeword(asis) -&gt; normal
16
+ *
17
+ * Given a word from breaktext (which is considered &quot;as-is&quot;)
18
+ * it will &quot;normalize&quot; it in a consistent way which is suitable
19
+ * for searching. The normalization effectively strips puntuation
20
+ * and spacing, and then lowercases the word. If there is nothing
21
+ * but &quot;removed&quot; chars in the asis string then the return is empty.
22
+ * Check for this so you don't try to search for nothing.
23
+ */
24
+ VALUE Odeum_normalizeword(VALUE self, VALUE asis) {
25
+ </pre>
26
+ </body>
27
+ </html>
@@ -0,0 +1,28 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>settuning (Odeum)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre>/**
14
+ * call-seq:
15
+ * Odeum::settuning(ibnum, idnum, cbnum, csiz) -&gt; nil
16
+ *
17
+ * ibnum=32749: Number of buckets for inverted indexes.
18
+ * idnum=7: Division number of inverted index.
19
+ * cbnum=262139: Number of buckets for dirty buffers.
20
+ * csiz=8388608: Maximum bytes to use memory for dirty buffers.
21
+ *
22
+ * This is set globally for all Indexes. Not sure what would happen
23
+ * if you changed this mid-stream, so don't. Make sure everything is closed.
24
+ */
25
+ VALUE Odeum_settuning(VALUE self, VALUE ibnum, VALUE idnum, VALUE cbnum, VALUE csiz) {
26
+ </pre>
27
+ </body>
28
+ </html>
@@ -0,0 +1,382 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Odeum::Document</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Odeum::Document</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/ext/odeum_index/odeum_index_c.html">
59
+ ext/odeum_index/odeum_index.c
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ Object
69
+ </td>
70
+ </tr>
71
+ </table>
72
+ </div>
73
+ <!-- banner header -->
74
+
75
+ <div id="bodyContent">
76
+
77
+
78
+
79
+ <div id="contextContent">
80
+
81
+
82
+
83
+ </div>
84
+
85
+ <div id="method-list">
86
+ <h3 class="section-bar">Methods</h3>
87
+
88
+ <div class="name-list">
89
+ <a href="#M000042">[]</a>&nbsp;&nbsp;
90
+ <a href="#M000041">[]=</a>&nbsp;&nbsp;
91
+ <a href="#M000045">add_content</a>&nbsp;&nbsp;
92
+ <a href="#M000044">add_word_list</a>&nbsp;&nbsp;
93
+ <a href="#M000043">addword</a>&nbsp;&nbsp;
94
+ <a href="#M000049">asis_words</a>&nbsp;&nbsp;
95
+ <a href="#M000051">close</a>&nbsp;&nbsp;
96
+ <a href="#M000046">id</a>&nbsp;&nbsp;
97
+ <a href="#M000040">new</a>&nbsp;&nbsp;
98
+ <a href="#M000048">normal_words</a>&nbsp;&nbsp;
99
+ <a href="#M000050">scores</a>&nbsp;&nbsp;
100
+ <a href="#M000047">uri</a>&nbsp;&nbsp;
101
+ </div>
102
+ </div>
103
+
104
+ </div>
105
+
106
+
107
+ <!-- if includes -->
108
+
109
+ <div id="section">
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+ <!-- if method_list -->
119
+ <div id="methods">
120
+ <h3 class="section-bar">Public Class methods</h3>
121
+
122
+ <div id="method-M000040" class="method-detail">
123
+ <a name="M000040"></a>
124
+
125
+ <div class="method-heading">
126
+ <a href="Document.src/M000040.html" target="Code" class="method-signature"
127
+ onclick="popupCode('Document.src/M000040.html');return false;">
128
+ <span class="method-name">Document.new uri &rarr; Document<br />
129
+ </span>
130
+ </a>
131
+ </div>
132
+
133
+ <div class="method-description">
134
+ <p>
135
+ The uri should be specified if you&#8217;re calling this. Internally the
136
+ Ruby/<a href="../Odeum.html">Odeum</a> library kind of &quot;cheats&quot;
137
+ and passes a Qnil for the uri so that the ODDOC can be assigned externally.
138
+ You should not (and probably cannot) do this from Ruby.
139
+ </p>
140
+ </div>
141
+ </div>
142
+
143
+ <h3 class="section-bar">Public Instance methods</h3>
144
+
145
+ <div id="method-M000042" class="method-detail">
146
+ <a name="M000042"></a>
147
+
148
+ <div class="method-heading">
149
+ <a href="Document.src/M000042.html" target="Code" class="method-signature"
150
+ onclick="popupCode('Document.src/M000042.html');return false;">
151
+ <span class="method-name">document[name] &rarr; String<br />
152
+ </span>
153
+ </a>
154
+ </div>
155
+
156
+ <div class="method-description">
157
+ <p>
158
+ Gets the meta-data attribute for the given name. The name must be a String.
159
+ </p>
160
+ </div>
161
+ </div>
162
+
163
+ <div id="method-M000041" class="method-detail">
164
+ <a name="M000041"></a>
165
+
166
+ <div class="method-heading">
167
+ <a href="Document.src/M000041.html" target="Code" class="method-signature"
168
+ onclick="popupCode('Document.src/M000041.html');return false;">
169
+ <span class="method-name">doc[attr] = value<br />
170
+ </span>
171
+ </a>
172
+ </div>
173
+
174
+ <div class="method-description">
175
+ <p>
176
+ Adds meta-data to the document. They should be Strings only.
177
+ </p>
178
+ </div>
179
+ </div>
180
+
181
+ <div id="method-M000045" class="method-detail">
182
+ <a name="M000045"></a>
183
+
184
+ <div class="method-heading">
185
+ <a href="Document.src/M000045.html" target="Code" class="method-signature"
186
+ onclick="popupCode('Document.src/M000045.html');return false;">
187
+ <span class="method-name">document.add_content(content) &rarr; document<br />
188
+ </span>
189
+ </a>
190
+ </div>
191
+
192
+ <div class="method-description">
193
+ <p>
194
+ Takes the contents, breaks the words up, and then puts them in the document
195
+ in normalized form. This is the common pattern that people use a <a
196
+ href="Document.html">Document</a> with. You may also use <a
197
+ href="Document.html#M000043">Document.addword</a> to add one word a time,
198
+ and <a href="Document.html#M000044">Document.add_word_list</a> to add a
199
+ list of words.
200
+ </p>
201
+ <p>
202
+ It uses the default Odeum::breaktext method to break up the text, so
203
+ don&#8217;t use this if you need specialized stuffs.
204
+ </p>
205
+ <p>
206
+ If the process of normalizing a word creates an empty word, then it is not
207
+ added to the document&#8217;s words. This usually happens for punctation
208
+ that isn&#8217;t usualy searched for anyway.
209
+ </p>
210
+ </div>
211
+ </div>
212
+
213
+ <div id="method-M000044" class="method-detail">
214
+ <a name="M000044"></a>
215
+
216
+ <div class="method-heading">
217
+ <a href="Document.src/M000044.html" target="Code" class="method-signature"
218
+ onclick="popupCode('Document.src/M000044.html');return false;">
219
+ <span class="method-name">document.add_word_list(asis) &rarr; document<br />
220
+ </span>
221
+ </a>
222
+ </div>
223
+
224
+ <div class="method-description">
225
+ <p>
226
+ Takes an array of &quot;as-is&quot; words, normalizes them, and puts them
227
+ in the document. It assumes that the array is composed of asis words and
228
+ normalizes them before putting them in the document.
229
+ </p>
230
+ </div>
231
+ </div>
232
+
233
+ <div id="method-M000043" class="method-detail">
234
+ <a name="M000043"></a>
235
+
236
+ <div class="method-heading">
237
+ <a href="Document.src/M000043.html" target="Code" class="method-signature"
238
+ onclick="popupCode('Document.src/M000043.html');return false;">
239
+ <span class="method-name">document.addword(normal, asis)<br />
240
+ </span>
241
+ </a>
242
+ </div>
243
+
244
+ <div class="method-description">
245
+ <p>
246
+ The basic call to add a normal and asis version of a word to the document
247
+ for indexing.
248
+ </p>
249
+ </div>
250
+ </div>
251
+
252
+ <div id="method-M000049" class="method-detail">
253
+ <a name="M000049"></a>
254
+
255
+ <div class="method-heading">
256
+ <a href="Document.src/M000049.html" target="Code" class="method-signature"
257
+ onclick="popupCode('Document.src/M000049.html');return false;">
258
+ <span class="method-name">document.asis_words &rarr; [word1, word2, ...]<br />
259
+ </span>
260
+ </a>
261
+ </div>
262
+
263
+ <div class="method-description">
264
+ <p>
265
+ Returns all of the asis or &quot;appearance form&quot; words in the
266
+ document.
267
+ </p>
268
+ </div>
269
+ </div>
270
+
271
+ <div id="method-M000051" class="method-detail">
272
+ <a name="M000051"></a>
273
+
274
+ <div class="method-heading">
275
+ <a href="Document.src/M000051.html" target="Code" class="method-signature"
276
+ onclick="popupCode('Document.src/M000051.html');return false;">
277
+ <span class="method-name">doc.close &rarr; nil<br />
278
+ </span>
279
+ </a>
280
+ </div>
281
+
282
+ <div class="method-description">
283
+ <p>
284
+ Explicitly closes a document. Because of what I can only decide is a bug in
285
+ how an each iterator works, you must explicitly close a document if you are
286
+ not storing it and you are in an each. There are probably subtle things
287
+ about Ruby memory management I&#8217;m missing, but my tests show that all
288
+ <a href="Document.html">Document</a> objects created with <a
289
+ href="Index.html#M000020">Index.get</a> do not get garbage collected until
290
+ they exit a block.
291
+ </p>
292
+ </div>
293
+ </div>
294
+
295
+ <div id="method-M000046" class="method-detail">
296
+ <a name="M000046"></a>
297
+
298
+ <div class="method-heading">
299
+ <a href="Document.src/M000046.html" target="Code" class="method-signature"
300
+ onclick="popupCode('Document.src/M000046.html');return false;">
301
+ <span class="method-name">document.id &rarr; Fixnum<br />
302
+ </span>
303
+ </a>
304
+ </div>
305
+
306
+ <div class="method-description">
307
+ <p>
308
+ Gives you the <a href="Index.html">Odeum::Index</a> id used to for the
309
+ document.
310
+ </p>
311
+ </div>
312
+ </div>
313
+
314
+ <div id="method-M000048" class="method-detail">
315
+ <a name="M000048"></a>
316
+
317
+ <div class="method-heading">
318
+ <a href="Document.src/M000048.html" target="Code" class="method-signature"
319
+ onclick="popupCode('Document.src/M000048.html');return false;">
320
+ <span class="method-name">document.normal_words &rarr; [word1, word2, ... ]<br />
321
+ </span>
322
+ </a>
323
+ </div>
324
+
325
+ <div class="method-description">
326
+ <p>
327
+ Returns the list of &quot;normal&quot; words in this document.
328
+ </p>
329
+ </div>
330
+ </div>
331
+
332
+ <div id="method-M000050" class="method-detail">
333
+ <a name="M000050"></a>
334
+
335
+ <div class="method-heading">
336
+ <a href="Document.src/M000050.html" target="Code" class="method-signature"
337
+ onclick="popupCode('Document.src/M000050.html');return false;">
338
+ <span class="method-name">document.scores(max, index) &rarr; { word => score, word => score, ...}<br />
339
+ </span>
340
+ </a>
341
+ </div>
342
+
343
+ <div class="method-description">
344
+ <p>
345
+ Get the normalized words and their scores in the document. The strange
346
+ thing is that the scores are returned as Strings, but they are decimal
347
+ strings.
348
+ </p>
349
+ </div>
350
+ </div>
351
+
352
+ <div id="method-M000047" class="method-detail">
353
+ <a name="M000047"></a>
354
+
355
+ <div class="method-heading">
356
+ <a href="Document.src/M000047.html" target="Code" class="method-signature"
357
+ onclick="popupCode('Document.src/M000047.html');return false;">
358
+ <span class="method-name">document.uri &rarr; String<br />
359
+ </span>
360
+ </a>
361
+ </div>
362
+
363
+ <div class="method-description">
364
+ <p>
365
+ Gets the uri that this document represents.
366
+ </p>
367
+ </div>
368
+ </div>
369
+
370
+
371
+ </div>
372
+
373
+
374
+ </div>
375
+
376
+
377
+ <div id="validator-badges">
378
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
379
+ </div>
380
+
381
+ </body>
382
+ </html>
@@ -0,0 +1,25 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>new (Odeum::Document)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre>/**
14
+ * call-seq:
15
+ * Document.new uri -&gt; Document
16
+ *
17
+ * The uri should be specified if you're calling this. Internally the
18
+ * Ruby/Odeum library kind of &quot;cheats&quot; and passes a Qnil for the uri
19
+ * so that the ODDOC can be assigned externally. You should not
20
+ * (and probably cannot) do this from Ruby.
21
+ */
22
+ VALUE Document_initialize(VALUE self, VALUE uri) {
23
+ </pre>
24
+ </body>
25
+ </html>