jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/CHANGELOG ADDED
@@ -0,0 +1,24 @@
1
+ Fri Oct 20 22:25:37 JST 2006
2
+ * Added Filter#bits method to built-in Filters.
3
+ * Added MappingFilter < TokenFilter that can be used to map strings to other
4
+ strings during analysis. A possible use of this is it to Filter utf-8
5
+ characters to ascii characters.
6
+
7
+ Fri Oct 13 09:18:31 JST 2006
8
+ * Changed documentation to state truthfully that FULL_ENGLISH_STOP_WORDS is
9
+ being used by default in StandardAnalyzer and StopwordFilter.
10
+ * Removed 'will', 's' and 't' from ENGLISH_STOP_WORDS so that all words in
11
+ ENGLISH_STOP_WORDS can be found in FULL_ENGLISH_STOP_WORDS, that is
12
+ ENGLISH_STOP_WORDS is a subset of FULL_ENGLISH_STOP_WORDS.
13
+
14
+ Thu Oct 12 23:04:19 JST 2006
15
+ * Fixed adding SortField to Sort object in Ruby. Garbage collection wasn't
16
+ working.
17
+ * Can now set :sort => SortField#new
18
+
19
+ Tue Oct 10 14:42:17 JST 2006
20
+ * Fixed MultiTermDocEnum bug introduced in version 0.10.10 during
21
+ performance enhancements.
22
+ * Added Filter#bits(index_reader) method to C implemented filters so that
23
+ they can be used in Ruby.
24
+
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2005-2006 David Balmain
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,90 @@
1
+ = Ferret
2
+
3
+ Ferret is a Ruby search library inspired by the Apache Lucene search engine for
4
+ Java (http://jakarta.apache.org/lucene/). In the same way as Lucene, it is not
5
+ a standalone application, but a library you can use to index documents and
6
+ search for things in them later.
7
+
8
+ == Requirements
9
+
10
+ * Ruby 1.8
11
+ * C compiler to build the extension. Tested with gcc, VC6
12
+ * make (or nmake on windows)
13
+
14
+ == Installation
15
+
16
+ $ sudo gem install ferret
17
+
18
+ If you don't have rubygems installed you can still install Ferret. Just
19
+ download one of the zipped up versions of Ferret, unzip it and change into the
20
+ unzipped directory. Then run the following set of commands;
21
+
22
+ $ ruby setup.rb config
23
+ $ ruby setup.rb setup
24
+ $ sudo ruby setup.rb install
25
+
26
+ == Usage
27
+
28
+ You can read the TUTORIAL which you'll find in the same directory as this
29
+ README. You can also check the following modules for more specific
30
+ documentation.
31
+
32
+ * Ferret::Analysis: for more information on how the data is processed when it
33
+ is tokenized. There are a number of things you can do with your data such as
34
+ adding stop lists or perhaps a porter stemmer. There are also a number of
35
+ analyzers already available and it is almost trivial to create a new one
36
+ with a simple regular expression.
37
+
38
+ * Ferret::Search: for more information on querying the index. There are a
39
+ number of already available queries and it's unlikely you'll need to create
40
+ your own. You may however want to take advantage of the sorting or filtering
41
+ abilities of Ferret to present your data the best way you see fit.
42
+
43
+ * Ferret::Document: to find out how to create documents. This part of Ferret
44
+ is relatively straightforward. If you know how Strings, Hashes and Arrays work
45
+ Ferret then you'll be able to create Documents.
46
+
47
+ * Ferret::QueryParser: if you want to find out more about what you can do with
48
+ Ferret's Query Parser, this is the place to look. The query parser is one
49
+ area that could use a bit of work so please send your suggestions.
50
+
51
+ * Ferret::Index: for more advanced access to the index you'll probably want to
52
+ use the Ferret::Index::IndexWriter and Ferret::Index::IndexReader. This is
53
+ the place to look for more information on them.
54
+
55
+ * Ferret::Store: This is the module used to access the actual index storage
56
+ and won't be of much interest to most people.
57
+
58
+ === Performance
59
+
60
+ We are unaware of any alternatives that can out-perform Ferret while still
61
+ matching it in features.
62
+
63
+ == Contact
64
+
65
+ For bug reports and patches I have set up Trac here;
66
+
67
+ http://ferret.davebalmain.com/trac
68
+
69
+ Queries, discussion etc should be addressed to the mailing lists here;
70
+
71
+ http://rubyforge.org/projects/ferret/
72
+
73
+ Alternatively you could create a new page for discussion on the Ferret wiki;
74
+
75
+ http://ferret.davebalmain.com/trac
76
+
77
+ Of course, since Ferret was ported from Apache Lucene, most of what you can
78
+ do with Lucene you can also do with Ferret.
79
+
80
+ == Authors
81
+
82
+ [<b>David Balmain</b>] Port to Ruby
83
+
84
+ [The Apache Software Foundation (Doug Cutting and friends)] Original Apache Lucene
85
+
86
+ == License
87
+
88
+ Ferret is available under an MIT-style license.
89
+
90
+ :include: MIT-LICENSE
data/RELEASE_CHANGES ADDED
@@ -0,0 +1,137 @@
1
+ (in /home/dave/w/ferret/ruby)
2
+ ------------------------------------------------------------------------
3
+ r830 | dbalmain | 2008-03-01 14:10:47 +1100 (Sat, 01 Mar 2008) | 1 line
4
+
5
+ A few more updates to the build system
6
+ ------------------------------------------------------------------------
7
+ r829 | dbalmain | 2008-03-01 13:55:16 +1100 (Sat, 01 Mar 2008) | 1 line
8
+
9
+ A few more updates to the build system
10
+ ------------------------------------------------------------------------
11
+ r828 | dbalmain | 2008-02-29 10:48:33 +1100 (Fri, 29 Feb 2008) | 1 line
12
+
13
+ Moved largefile test to a new long_running test directory so that tests can be run more easily
14
+ ------------------------------------------------------------------------
15
+ r826 | dbalmain | 2008-02-29 10:09:48 +1100 (Fri, 29 Feb 2008) | 1 line
16
+
17
+ Updated references to ferret/version.rb. Updated Rakefile to allow release task testing
18
+ ------------------------------------------------------------------------
19
+ r825 | dbalmain | 2008-02-29 10:02:27 +1100 (Fri, 29 Feb 2008) | 1 line
20
+
21
+ Updated Rakefile to clean it up (added namespaces).
22
+ ------------------------------------------------------------------------
23
+ r824 | dbalmain | 2008-02-29 10:01:08 +1100 (Fri, 29 Feb 2008) | 1 line
24
+
25
+ Updated :filter_proc so that custom PostFilter extensions can be built and applied to searches. See ruby/examples/c_extensions/age_filter
26
+ ------------------------------------------------------------------------
27
+ r823 | dbalmain | 2008-02-26 18:44:39 +1100 (Tue, 26 Feb 2008) | 1 line
28
+
29
+ Fixed ticket #277. This will also help with the correct highlighting of matching urls in queries
30
+ ------------------------------------------------------------------------
31
+ r822 | dbalmain | 2008-02-22 12:46:48 +1100 (Fri, 22 Feb 2008) | 1 line
32
+
33
+ Updated to latest posh.h => http://poshlib.hookatooka.com/poshlib/
34
+ ------------------------------------------------------------------------
35
+ r821 | dbalmain | 2008-02-22 09:05:25 +1100 (Fri, 22 Feb 2008) | 1 line
36
+
37
+ Added ruby bindings to MultiMapper for testing in Ruby
38
+ ------------------------------------------------------------------------
39
+ r820 | dbalmain | 2008-02-09 14:30:55 +1100 (Sat, 09 Feb 2008) | 1 line
40
+
41
+ Updated svn:ignore properties to handle new stemmer files
42
+ ------------------------------------------------------------------------
43
+ r819 | dbalmain | 2008-02-09 14:27:46 +1100 (Sat, 09 Feb 2008) | 9 lines
44
+
45
+ Fixed Ticket #337. StemFilter.new now works with 'English', :english or
46
+ 'EnGlIsH' and 'UTF_8' or 'utf-8' etc.
47
+
48
+ Also, good news for Norwegians, Romanians, Turks and Finns. We now have 3 new
49
+ stemmers for Norwegian, Romanian and Turkish and 2 new stop-word lists for
50
+ Finnish and Hungarian. Please try them out and let me know if there are any
51
+ problems.
52
+
53
+
54
+ ------------------------------------------------------------------------
55
+ r818 | dbalmain | 2008-02-09 13:20:37 +1100 (Sat, 09 Feb 2008) | 1 line
56
+
57
+ Removed old version of stemmer. Making way for new version. This will break build temporarily
58
+ ------------------------------------------------------------------------
59
+ r817 | dbalmain | 2008-02-09 11:55:02 +1100 (Sat, 09 Feb 2008) | 1 line
60
+
61
+ Added a very useful group_by example.
62
+ ------------------------------------------------------------------------
63
+ r816 | dbalmain | 2008-02-09 09:26:20 +1100 (Sat, 09 Feb 2008) | 1 line
64
+
65
+ Updated documentation for :filter_proc to indicate that you can return a Float to be used to modify the score.
66
+ ------------------------------------------------------------------------
67
+ r815 | dbalmain | 2008-02-09 00:27:58 +1100 (Sat, 09 Feb 2008) | 1 line
68
+
69
+ Made TypedRangeQuery the default range query when used from Ferret::Index::Index
70
+ ------------------------------------------------------------------------
71
+ r814 | dbalmain | 2008-02-08 23:16:55 +1100 (Fri, 08 Feb 2008) | 1 line
72
+
73
+ Made the TypedRangeQuery optional in the query parser
74
+ ------------------------------------------------------------------------
75
+ r813 | dbalmain | 2008-02-08 23:12:13 +1100 (Fri, 08 Feb 2008) | 1 line
76
+
77
+ Added TypedRangeQuery and TypedRangeFilter to the ruby bindings
78
+ ------------------------------------------------------------------------
79
+ r812 | dbalmain | 2008-02-08 22:19:31 +1100 (Fri, 08 Feb 2008) | 1 line
80
+
81
+ Added TypedRangeQuery so that you can do range queries with unpadded numbers
82
+ ------------------------------------------------------------------------
83
+ r811 | dbalmain | 2008-02-08 16:22:06 +1100 (Fri, 08 Feb 2008) | 3 lines
84
+
85
+ Whoops, quick fix. Had unnecessarily nested locks.
86
+
87
+
88
+ ------------------------------------------------------------------------
89
+ r810 | dbalmain | 2008-02-08 16:17:33 +1100 (Fri, 08 Feb 2008) | 1 line
90
+
91
+ Added patch for Ticket #340 which adds batch updating and deleting. Made significant modifications from the patch.
92
+ ------------------------------------------------------------------------
93
+ r809 | dbalmain | 2008-02-08 13:49:07 +1100 (Fri, 08 Feb 2008) | 18 lines
94
+
95
+ Added score filter. This enables you to filter the results and modify the score
96
+ to change the sort order.
97
+
98
+ For example to modify the scoring so that a document with todays date gets
99
+ twice the score factor as a document 50 days ago and four times the score
100
+ factor of a document 100 days ago (ie a half life of 50 days) you would do
101
+ this;
102
+
103
+ fifty_day_half_life_filter = lambda do |doc, score, searcher|
104
+ days = (Date.today() - Date.parse(searcher[doc][:date])).to_i
105
+ 1.0 / (2.0 ** (days.to_f / 50.0))
106
+ end
107
+
108
+ top_docs = @searcher.search(q, :filter_proc => fifty_day_half_life_filter)
109
+
110
+
111
+
112
+
113
+ ------------------------------------------------------------------------
114
+ r808 | dbalmain | 2008-01-11 07:14:01 +1100 (Fri, 11 Jan 2008) | 1 line
115
+
116
+ Changed unsigned long longs to f_u64 type to fix ticket #336
117
+ ------------------------------------------------------------------------
118
+ r807 | dbalmain | 2008-01-11 07:12:40 +1100 (Fri, 11 Jan 2008) | 1 line
119
+
120
+ Changed unsigned long longs to f_u64 type to fix ticket #336
121
+ ------------------------------------------------------------------------
122
+ r806 | dbalmain | 2008-01-11 07:01:00 +1100 (Fri, 11 Jan 2008) | 1 line
123
+
124
+ Minor comment correction
125
+ ------------------------------------------------------------------------
126
+ r805 | dbalmain | 2007-12-12 10:28:23 +1100 (Wed, 12 Dec 2007) | 1 line
127
+
128
+ Fixed Ticket #332. Added spaces so that code parses correctly.
129
+ ------------------------------------------------------------------------
130
+ r804 | dbalmain | 2007-12-03 11:20:34 +1100 (Mon, 03 Dec 2007) | 1 line
131
+
132
+ Added test for ticket #324
133
+ ------------------------------------------------------------------------
134
+ r803 | dbalmain | 2007-12-03 11:12:55 +1100 (Mon, 03 Dec 2007) | 1 line
135
+
136
+ Fixed Ferret::Index::Index#query_update for ticket #324. Was only updating a maximum of 10 records.
137
+ ------------------------------------------------------------------------
data/RELEASE_NOTES ADDED
@@ -0,0 +1,60 @@
1
+ The most significant update in this release is that you can now alter the
2
+ scoring, thereby altering the ordering of search results. A great application of
3
+ this is that you can now change the weight of a document based on its age. In
4
+ this example we have 365 day half life, ie a year-old document has half the
5
+ weight of a new document and twice the weight of a 2-year-old document;
6
+
7
+ require 'ferret'
8
+
9
+ age_weight = lambda do |doc, score, searcher|
10
+ age = (Date.today - Date.parse(searcher[doc][:date])).to_i
11
+ 1 / 2 ** (age.to_f/365)
12
+ end
13
+
14
+ index = Ferret::I.new
15
+
16
+ sales = [
17
+ { :artist => 'Giovanni Bellini',
18
+ :date => '2006-10-23',
19
+ :work => 'Transfiguration'
20
+ },
21
+ { :artist => 'Giovanni Bellini',
22
+ :date => '2008-01-05',
23
+ :work => 'Pesaro Altarpiece'
24
+ },
25
+ { :artist => 'Gentile Bellini',
26
+ :date => '2008-02-10',
27
+ :work => 'St. Dominic'
28
+ },
29
+ ].each {|doc| index << doc}
30
+
31
+ puts index.search('artist:(Giovanni Bellini)').to_s(:work)
32
+ # =>
33
+ # TopDocs: total_hits = 3, max_score = 0.767351 [
34
+ # 0 "Transfiguration": 0.767351
35
+ # 1 "Pesaro Altarpiece": 0.767351
36
+ # 2 "St. Dominic": 0.129147
37
+ # ]
38
+
39
+ puts index.search('artist:(Giovanni Bellini)',
40
+ :filter_proc => age_weight).to_s(:work)
41
+ # =>
42
+ # TopDocs: total_hits = 3, max_score = 0.718006 [
43
+ # 1 "Pesaro Altarpiece": 0.718006
44
+ # 0 "Transfiguration": 0.311937
45
+ # 2 "St. Dominic": 0.129147
46
+ # ]
47
+
48
+ You can also now write your own C extensions to filter the search results. You
49
+ can see an example of this by downloading the source and looking at;
50
+
51
+ ferret_unzipped/ruby/examples/c_extensions/age_filter/
52
+
53
+ Alternatively, you can view the code at;
54
+
55
+ http://ferret.davebalmain.com/trac/browser/trunk/ruby/examples/c_extensions?rev=828
56
+
57
+ Also, good news for Norwegians, Romanians, Turks and Finns. We now have 3 new
58
+ stemmers for Norwegian, Romanian and Turkish and 2 new stop-word lists for
59
+ Finnish and Hungarian. Please try them out and let me know if there are any
60
+ problems.
data/Rakefile ADDED
@@ -0,0 +1,443 @@
1
+ require 'rake'
2
+ require 'rake/clean'
3
+ require 'rake/gempackagetask'
4
+ require 'rake/rdoctask'
5
+ require 'rake/testtask'
6
+
7
+ $:. << 'lib'
8
+ require 'ferret/version'
9
+
10
+
11
+ def say(msg='')
12
+ STDERR.puts msg
13
+ end
14
+
15
+ def prompt(msg)
16
+ STDERR.print "#{msg} [Yna]: "
17
+ while true
18
+ case STDIN.gets.chomp!
19
+ when /^(y(es)?)?$/i then return true
20
+ when /^no?$/i then return false
21
+ when /^a(bort)?$/i then fail('aborted')
22
+ else
23
+ STDERR.print "Sorry, I don't understand. Please type y, n or a: "
24
+ end
25
+ end
26
+ end
27
+
28
+ windows = (RUBY_PLATFORM =~ /win32|cygwin/) rescue nil
29
+ SUDO = windows ? "" : "sudo "
30
+
31
+
32
+ task :default => 'test:unit'
33
+ #task :default => :build do
34
+ # sh "ruby test/unit/index/tc_index.rb"
35
+ #end
36
+
37
+ BZLIB_SRC = FileList["../c/lib/bzlib/*.h"] +
38
+ FileList["../c/lib/bzlib/*.c"].map do |fn|
39
+ fn.gsub(%r{/([^/]*.c)}, '/BZ_\1')
40
+ end
41
+ ##############################################################################
42
+ # Building
43
+ ##############################################################################
44
+
45
+ task :build => 'build:compile'
46
+ namespace :build do
47
+ EXT = "ferret_ext.so"
48
+ # Note: libstemmer.[h] is necessary so that the file isn't included when it
49
+ # doesn't exist. It needs to have one regular expression element.
50
+ EXT_SRC = FileList["../c/src/*.[ch]", "../c/include/*.h",
51
+ "../c/lib/bzlib/*.[ch]",
52
+ "../c/lib/libstemmer_c/src_c/*.[ch]",
53
+ "../c/lib/libstemmer_c/runtime/*.[ch]",
54
+ "../c/lib/libstemmer_c/libstemmer/*.[ch]",
55
+ "../c/lib/libstemmer_c/include/libstemmer.[h]"]
56
+ EXT_SRC.exclude('../c/**/ind.[ch]',
57
+ '../c/**/symbol.[ch]',
58
+ '../c/include/threading.h',
59
+ '../c/include/scanner.h',
60
+ '../c/include/internal.h',
61
+ '../c/src/lang.c',
62
+ '../c/include/lang.h')
63
+
64
+ EXT_SRC_MAP = {}
65
+ EXT_SRC_DEST = EXT_SRC.map do |fn|
66
+ ext_fn = File.join("ext", File.basename(fn))
67
+ if fn =~ /.c$/ and fn =~ /(bzlib|stemmer)/
68
+ prefix = $1.upcase
69
+ ext_fn.gsub!(/ext\//, "ext/#{prefix}_")
70
+ end
71
+ EXT_SRC_MAP[fn] = ext_fn
72
+ end
73
+ SRC = FileList["ext/*.[ch]", EXT_SRC_DEST, 'ext/internal.h'].uniq
74
+
75
+ CLEAN.include ['**/*.o', '**/*.obj', '.config', 'ext/cferret.c']
76
+ CLOBBER.include ['doc/api', 'ext/*.so', 'ext/Makefile',
77
+ 'ext/internal.h', EXT_SRC_DEST]
78
+
79
+ # The following block creates file tasks for all of the c files. They
80
+ # belong in the ../c directory in source the working copy and they need
81
+ # to be linked to in the ext directory
82
+ EXT_SRC.each do |fn|
83
+ dest_fn = EXT_SRC_MAP[fn]
84
+ # prepend lib files to avoid conflicts
85
+ file dest_fn => fn do |t|
86
+ ln_sf File.expand_path(fn), File.expand_path(dest_fn)
87
+
88
+ if fn =~ /stemmer/
89
+ # flatten the directory structure for lib_stemmer
90
+ open(dest_fn) do |in_f|
91
+ open(dest_fn + ".out", "w") do |out_f|
92
+ in_f.each do |line|
93
+ out_f.write(line.sub(/(#include ["<])[.a-z_\/]*\//, '\1'))
94
+ end
95
+ end
96
+ end
97
+ mv dest_fn + ".out", dest_fn
98
+ end
99
+ end
100
+ end if File.exists?("../c")
101
+
102
+ file 'ext/internal.h' => '../c/include/internal.h' do
103
+ File.open('ext/internal.h', 'w') do |f|
104
+ File.readlines('../c/include/internal.h').each do |l|
105
+ next if l =~ /ALLOC/ and l !~ /ZERO|MP_/
106
+ f.puts(l)
107
+ end
108
+ end
109
+ end
110
+
111
+ desc "Build the extension (ferret_ext.so). You'll need a C compiler and Make."
112
+ task :compile => ["ext/#{EXT}"] + SRC
113
+
114
+ file "ext/#{EXT}" => "ext/Makefile" do
115
+ cd "ext"
116
+ if windows and ENV['make'].nil?
117
+ begin
118
+ sh "nmake"
119
+ rescue Exception => e
120
+ path = ':\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT'
121
+ if File.exists? "f#{path}"
122
+ sh "f#{path}"
123
+ elsif File.exists? "c#{path}"
124
+ sh "c#{path}"
125
+ else
126
+ say
127
+ say "***************************************************************"
128
+ say "You need to have Visual C++ 6 to build Ferret on Windows."
129
+ say "If you have it installed, you may need to run;"
130
+ say ' C:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT'
131
+ say "***************************************************************"
132
+ say
133
+ raise e
134
+ end
135
+ sh "nmake"
136
+ end
137
+ else
138
+ sh "make"
139
+ end
140
+ cd ".."
141
+ end
142
+
143
+ file "ext/Makefile" => SRC do
144
+ cd "ext"
145
+ `ruby extconf.rb`
146
+ cd ".."
147
+ end
148
+ end
149
+
150
+ ##############################################################################
151
+ # Testing
152
+ ##############################################################################
153
+
154
+ task :test => 'test:units'
155
+ namespace :test do
156
+ desc "Run tests with Valgrind"
157
+ task :valgrind do
158
+ sh "valgrind --suppressions=ferret_valgrind.supp " +
159
+ "--leak-check=yes --show-reachable=yes " +
160
+ "-v ruby test/unit/index/tc_index_reader.rb"
161
+ end
162
+
163
+ desc "Run all tests"
164
+ task :all => [ :units ]
165
+
166
+ desc "run unit tests in test/unit"
167
+ Rake::TestTask.new("units" => :build) do |t|
168
+ t.libs << "test/unit"
169
+ t.pattern = 'test/unit/t[cs]_*.rb'
170
+ t.verbose = true
171
+ end
172
+ task :unit => :units
173
+
174
+ desc "run tests using locally installed gem"
175
+ Rake::TestTask.new("installed") do |t|
176
+ t.libs << "test/unit"
177
+ t.ruby_opts << '-rtest/test_installed'
178
+ t.pattern = 'test/unit/t[cs]_*.rb'
179
+ t.verbose = true
180
+ end
181
+ end
182
+
183
+ ##############################################################################
184
+ # Documentation
185
+ ##############################################################################
186
+
187
+ desc "Generate API documentation"
188
+ task :doc => 'doc:rdoc'
189
+ namespace :doc do
190
+ if allison = Gem.cache.find_name('allison').last
191
+ allison_template = File.join(allison.full_gem_path, 'lib/allison.rb')
192
+ end
193
+ desc "Generate documentation for the application"
194
+ $rd = Rake::RDocTask.new do |rdoc|
195
+ rdoc.rdoc_dir = 'doc/api'
196
+ rdoc.title = "Ferret Search Library Documentation"
197
+ rdoc.options << '--line-numbers'
198
+ rdoc.options << '--inline-source'
199
+ rdoc.options << '--charset=utf-8'
200
+ rdoc.template = allison_template if allison_template
201
+ rdoc.rdoc_files.include('README')
202
+ rdoc.rdoc_files.include('TODO')
203
+ rdoc.rdoc_files.include('TUTORIAL')
204
+ rdoc.rdoc_files.include('MIT-LICENSE')
205
+ rdoc.rdoc_files.include('lib/**/*.rb')
206
+ rdoc.rdoc_files.include('ext/r_*.c')
207
+ rdoc.rdoc_files.include('ext/ferret.c')
208
+ end
209
+
210
+ desc "Look for TODO and FIXME tags in the code"
211
+ task :todo do
212
+ FileList['**/*.rb', 'ext/*.[ch]'].egrep /[#*].*(FIXME|TODO|TBD)/i
213
+ end
214
+ end
215
+
216
+ ##############################################################################
217
+ # Packaging and Installing
218
+ ##############################################################################
219
+
220
+ PKG_FILES = FileList[
221
+ 'setup.rb',
222
+ '[-A-Z]*',
223
+ 'lib/**/*.rb',
224
+ 'lib/**/*.rhtml',
225
+ 'lib/**/*.css',
226
+ 'lib/**/*.js',
227
+ 'test/**/*.rb',
228
+ 'test/**/wordfile',
229
+ 'rake_utils/**/*.rb',
230
+ 'Rakefile',
231
+ SRC
232
+ ]
233
+
234
+ spec = Gem::Specification.new do |s|
235
+
236
+ #### Basic information.
237
+ s.name = 'jk-ferret'
238
+ s.version = Ferret::VERSION
239
+ s.summary = "Ruby indexing library."
240
+ s.description = "Ferret is a super fast, highly configurable search library."
241
+
242
+ #### Dependencies and requirements.
243
+ s.add_dependency('rake')
244
+ s.files = PKG_FILES.to_a
245
+ s.extensions << "ext/extconf.rb"
246
+ s.require_path = 'lib'
247
+ s.bindir = 'bin'
248
+ s.executables = ['ferret-browser']
249
+ s.default_executable = 'ferret-browser'
250
+
251
+ #### Author and project details.
252
+ s.author = "David Balmain"
253
+ s.email = "dbalmain@gmail.com"
254
+ #s.homepage = "http://ferret.davebalmain.com/trac"
255
+ s.homepage = "http://github.com/jkraemer/ferret"
256
+ s.rubyforge_project = "ferret"
257
+
258
+ s.has_rdoc = true
259
+ s.extra_rdoc_files = $rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
260
+ s.rdoc_options <<
261
+ '--title' << 'Ferret -- Ruby Search Library' <<
262
+ '--main' << 'README' << '--line-numbers' <<
263
+ 'TUTORIAL' << 'TODO'
264
+
265
+ key_file = File.expand_path('~/.gem/gem-private_key.pem')
266
+ key_file = nil unless File.exists?(key_file)
267
+ cert_file = File.expand_path('~/.gem/gem-public_cert.pem')
268
+ cert_file = nil unless File.exists?(cert_file)
269
+ if key_file and cert_file
270
+ s.signing_key = key_file
271
+ s.cert_chain = cert_file
272
+ end
273
+
274
+ if windows
275
+ s.files = PKG_FILES.to_a + ["ext/#{EXT}"]
276
+ s.extensions.clear
277
+ s.platform = Gem::Platform::WIN32
278
+ else
279
+ s.platform = Gem::Platform::RUBY
280
+ end
281
+ end
282
+
283
+ package_task = Rake::GemPackageTask.new(spec) do |pkg|
284
+ unless windows
285
+ pkg.need_zip = true
286
+ pkg.need_tar = true
287
+ end
288
+ end
289
+
290
+ desc "Run :gem and install the resulting gem"
291
+ task :install => :gem do
292
+ sh "#{SUDO}gem install pkg/ferret-#{Ferret::VERSION}.gem --no-rdoc --no-ri -l"
293
+ end
294
+
295
+ desc "Run :clobber and uninstall the .gem"
296
+ task :uninstall => :clobber do
297
+ sh "#{SUDO}gem uninstall ferret"
298
+ end
299
+
300
+ desc "Same as :install but you must be rootgem"
301
+ task :root_install => :gem do
302
+ sh "gem install pkg/ferret-#{Ferret::VERSION}.gem --no-rdoc --no-ri -l"
303
+ end
304
+
305
+ desc "Same as :uninstall but you must be root"
306
+ task :root_uninstall => :clobber do
307
+ sh "gem uninstall ferret"
308
+ end
309
+
310
+ def list_changes_since_last_release
311
+ tag_listing = `svn list svn://davebalmain.com/ferret/tags`
312
+ last_tag = tag_listing.split("\n").last
313
+ log = `svn log --stop-on-copy svn://davebalmain.com/ferret/tags/#{last_tag}`
314
+ first_log = log.split(/-------+/)[-2]
315
+ last_revision = /^r(\d+)\s+\|/.match(first_log)[1]
316
+ `svn log .. -rHEAD:#{last_revision}`
317
+ end
318
+
319
+ desc "List changes since last release"
320
+ task :changes do
321
+ puts list_changes_since_last_release
322
+ end
323
+
324
+ if ENV['FERRET_DEV']
325
+ ##############################################################################
326
+ # Releasing
327
+ ##############################################################################
328
+
329
+ desc "Generate and upload a new release"
330
+ task :release => 'release:release'
331
+ namespace :release do
332
+ task :release => [:status_check, 'test:all', :package, :tag] do
333
+ say
334
+ say "**************************************************************"
335
+ say "* Release #{Ferret::VERSION} Complete."
336
+ say "* Packages ready to upload."
337
+ say "**************************************************************"
338
+ say
339
+ reversion("lib/ferret/version.rb")
340
+ end
341
+
342
+ # Validate that everything is ready to go for a release.
343
+ task :status_check do
344
+ # Are all source files checked in?
345
+ unless `svn -q --ignore-externals status` =~ /^$/
346
+ fail "'svn -q status' is not clean ... do you have unchecked-in files?"
347
+ end
348
+
349
+ say "No outstanding checkins found ... OK"
350
+ end
351
+
352
+ def reversion(fn)
353
+ new_version = nil
354
+ begin
355
+ print "Ferret is currently at #{Ferret::VERSION}. What version now? "
356
+ new_version = STDIN.gets.chomp!
357
+ end until prompt("Change to version #{new_version}?")
358
+
359
+ if ENV['RELTEST']
360
+ say "Would change the version in lib/ferret/version.rb from"
361
+ say " #{Ferret::VERSION} => #{new_version}"
362
+ say "and then commit the changes with the command"
363
+ say " svn ci -m \"Updated to version #{new_version}\" " +
364
+ "lib/ferret/version.rb"
365
+ else
366
+ open(fn) do |ferret_in|
367
+ open(fn + ".new", "w") do |ferret_out|
368
+ ferret_in.each do |line|
369
+ if line =~ /^ VERSION\s*=\s*/
370
+ ferret_out.puts " VERSION = '#{new_version}'"
371
+ else
372
+ ferret_out.puts line
373
+ end
374
+ end
375
+ end
376
+ end
377
+ mv fn + ".new", fn
378
+ sh %{svn ci -m "Updated to version #{new_version}" lib/ferret/version.rb}
379
+ end
380
+ end
381
+
382
+ # Tag all the SVN files with the latest release number
383
+ task :tag => :status_check do
384
+ reltag = "REL-#{Ferret::VERSION}"
385
+ say "Tagging SVN with [#{reltag}]"
386
+ if ENV['RELTEST']
387
+ say "Release Task Testing, skipping SVN tagging. Would do;"
388
+ say %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}}
389
+ else
390
+ sh %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}}
391
+ end
392
+ end
393
+
394
+ end
395
+
396
+ ##############################################################################
397
+ # Publishing
398
+ ##############################################################################
399
+
400
+ namespace :publish do
401
+ PUBLISH_PROMPT = <<-EOF
402
+ Make sure you updated RELEASE_NOTES and RELEASE_CHANGES and that the
403
+ package exists. Are you sure you want to continue?
404
+ EOF
405
+ desc "Publish gem on rubyforge for download. Will only do the linux version"
406
+ task :release do
407
+ exit unless prompt(PUBLISH_PROMPT)
408
+ require 'rubyforge'
409
+ require 'rake/contrib/rubyforgepublisher'
410
+ version = Ferret::VERSION
411
+
412
+ packages = %w(gem tgz zip).map {|ext| "pkg/ferret-#{version}.#{ext}"}
413
+
414
+ rubyforge = RubyForge.new
415
+ rubyforge.login
416
+ rubyforge.add_release('ferret', 'ferret',
417
+ "ferret-#{version}", *packages)
418
+ end
419
+
420
+ desc "Publish the documentation"
421
+ task :docs => 'doc:rdoc' do
422
+ sh %{rsync -rzv --delete -e 'ssh -p 8900' doc/api/ davebalmain.com:/var/www/ferret/api}
423
+ end
424
+
425
+ desc "Publish the documentation and release"
426
+ task :all => [:doc, :release]
427
+ end
428
+ end
429
+
430
+
431
+
432
+
433
+ #
434
+ # In case I ever need to add another racc parser, here's how
435
+ #
436
+ # # Make Parsers ---------------------------------------------------------------
437
+ #
438
+ # RACC_SRC = FileList["lib/**/*.y"]
439
+ #
440
+ # task :parsers => RACC_OUT
441
+ # rule(/\.tab\.rb$/ => [proc {|tn| tn.sub(/\.tab\.rb$/, '.y')}]) do |t|
442
+ # sh "racc #{t.source}"
443
+ # end