np-ferret 0.11.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/Makefile +218 -0
  9. data/ext/analysis.c +1584 -0
  10. data/ext/analysis.h +219 -0
  11. data/ext/analysis.o +0 -0
  12. data/ext/api.c +69 -0
  13. data/ext/api.h +27 -0
  14. data/ext/api.o +0 -0
  15. data/ext/array.c +123 -0
  16. data/ext/array.h +53 -0
  17. data/ext/array.o +0 -0
  18. data/ext/bitvector.c +540 -0
  19. data/ext/bitvector.h +272 -0
  20. data/ext/bitvector.o +0 -0
  21. data/ext/compound_io.c +383 -0
  22. data/ext/compound_io.o +0 -0
  23. data/ext/config.h +42 -0
  24. data/ext/document.c +156 -0
  25. data/ext/document.h +53 -0
  26. data/ext/document.o +0 -0
  27. data/ext/except.c +120 -0
  28. data/ext/except.h +168 -0
  29. data/ext/except.o +0 -0
  30. data/ext/extconf.rb +14 -0
  31. data/ext/ferret.c +402 -0
  32. data/ext/ferret.h +91 -0
  33. data/ext/ferret.o +0 -0
  34. data/ext/ferret_ext.bundle +0 -0
  35. data/ext/filter.c +156 -0
  36. data/ext/filter.o +0 -0
  37. data/ext/fs_store.c +484 -0
  38. data/ext/fs_store.o +0 -0
  39. data/ext/global.c +418 -0
  40. data/ext/global.h +117 -0
  41. data/ext/global.o +0 -0
  42. data/ext/hash.c +598 -0
  43. data/ext/hash.h +475 -0
  44. data/ext/hash.o +0 -0
  45. data/ext/hashset.c +170 -0
  46. data/ext/hashset.h +187 -0
  47. data/ext/hashset.o +0 -0
  48. data/ext/header.h +58 -0
  49. data/ext/helper.c +62 -0
  50. data/ext/helper.h +13 -0
  51. data/ext/helper.o +0 -0
  52. data/ext/inc/lang.h +48 -0
  53. data/ext/inc/threading.h +31 -0
  54. data/ext/index.c +6510 -0
  55. data/ext/index.h +964 -0
  56. data/ext/index.o +0 -0
  57. data/ext/lang.h +66 -0
  58. data/ext/libstemmer.c +92 -0
  59. data/ext/libstemmer.h +79 -0
  60. data/ext/libstemmer.o +0 -0
  61. data/ext/mempool.c +87 -0
  62. data/ext/mempool.h +35 -0
  63. data/ext/mempool.o +0 -0
  64. data/ext/modules.h +162 -0
  65. data/ext/multimapper.c +310 -0
  66. data/ext/multimapper.h +51 -0
  67. data/ext/multimapper.o +0 -0
  68. data/ext/posh.c +1006 -0
  69. data/ext/posh.h +1007 -0
  70. data/ext/posh.o +0 -0
  71. data/ext/priorityqueue.c +151 -0
  72. data/ext/priorityqueue.h +143 -0
  73. data/ext/priorityqueue.o +0 -0
  74. data/ext/q_boolean.c +1608 -0
  75. data/ext/q_boolean.o +0 -0
  76. data/ext/q_const_score.c +165 -0
  77. data/ext/q_const_score.o +0 -0
  78. data/ext/q_filtered_query.c +209 -0
  79. data/ext/q_filtered_query.o +0 -0
  80. data/ext/q_fuzzy.c +335 -0
  81. data/ext/q_fuzzy.o +0 -0
  82. data/ext/q_match_all.c +148 -0
  83. data/ext/q_match_all.o +0 -0
  84. data/ext/q_multi_term.c +677 -0
  85. data/ext/q_multi_term.o +0 -0
  86. data/ext/q_parser.c +2825 -0
  87. data/ext/q_parser.o +0 -0
  88. data/ext/q_phrase.c +1126 -0
  89. data/ext/q_phrase.o +0 -0
  90. data/ext/q_prefix.c +100 -0
  91. data/ext/q_prefix.o +0 -0
  92. data/ext/q_range.c +356 -0
  93. data/ext/q_range.o +0 -0
  94. data/ext/q_span.c +2402 -0
  95. data/ext/q_span.o +0 -0
  96. data/ext/q_term.c +337 -0
  97. data/ext/q_term.o +0 -0
  98. data/ext/q_wildcard.c +171 -0
  99. data/ext/q_wildcard.o +0 -0
  100. data/ext/r_analysis.c +2636 -0
  101. data/ext/r_analysis.o +0 -0
  102. data/ext/r_index.c +3509 -0
  103. data/ext/r_index.o +0 -0
  104. data/ext/r_qparser.c +585 -0
  105. data/ext/r_qparser.o +0 -0
  106. data/ext/r_search.c +4240 -0
  107. data/ext/r_search.o +0 -0
  108. data/ext/r_store.c +513 -0
  109. data/ext/r_store.o +0 -0
  110. data/ext/r_utils.c +963 -0
  111. data/ext/r_utils.o +0 -0
  112. data/ext/ram_store.c +471 -0
  113. data/ext/ram_store.o +0 -0
  114. data/ext/search.c +1743 -0
  115. data/ext/search.h +885 -0
  116. data/ext/search.o +0 -0
  117. data/ext/similarity.c +150 -0
  118. data/ext/similarity.h +82 -0
  119. data/ext/similarity.o +0 -0
  120. data/ext/sort.c +985 -0
  121. data/ext/sort.o +0 -0
  122. data/ext/stem_ISO_8859_1_danish.c +338 -0
  123. data/ext/stem_ISO_8859_1_danish.h +16 -0
  124. data/ext/stem_ISO_8859_1_danish.o +0 -0
  125. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  126. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.o +0 -0
  128. data/ext/stem_ISO_8859_1_english.c +1156 -0
  129. data/ext/stem_ISO_8859_1_english.h +16 -0
  130. data/ext/stem_ISO_8859_1_english.o +0 -0
  131. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  132. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  133. data/ext/stem_ISO_8859_1_finnish.o +0 -0
  134. data/ext/stem_ISO_8859_1_french.c +1276 -0
  135. data/ext/stem_ISO_8859_1_french.h +16 -0
  136. data/ext/stem_ISO_8859_1_french.o +0 -0
  137. data/ext/stem_ISO_8859_1_german.c +512 -0
  138. data/ext/stem_ISO_8859_1_german.h +16 -0
  139. data/ext/stem_ISO_8859_1_german.o +0 -0
  140. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  141. data/ext/stem_ISO_8859_1_italian.h +16 -0
  142. data/ext/stem_ISO_8859_1_italian.o +0 -0
  143. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  144. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  145. data/ext/stem_ISO_8859_1_norwegian.o +0 -0
  146. data/ext/stem_ISO_8859_1_porter.c +776 -0
  147. data/ext/stem_ISO_8859_1_porter.h +16 -0
  148. data/ext/stem_ISO_8859_1_porter.o +0 -0
  149. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  150. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  151. data/ext/stem_ISO_8859_1_portuguese.o +0 -0
  152. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  153. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  154. data/ext/stem_ISO_8859_1_spanish.o +0 -0
  155. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  156. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  157. data/ext/stem_ISO_8859_1_swedish.o +0 -0
  158. data/ext/stem_KOI8_R_russian.c +701 -0
  159. data/ext/stem_KOI8_R_russian.h +16 -0
  160. data/ext/stem_KOI8_R_russian.o +0 -0
  161. data/ext/stem_UTF_8_danish.c +344 -0
  162. data/ext/stem_UTF_8_danish.h +16 -0
  163. data/ext/stem_UTF_8_danish.o +0 -0
  164. data/ext/stem_UTF_8_dutch.c +653 -0
  165. data/ext/stem_UTF_8_dutch.h +16 -0
  166. data/ext/stem_UTF_8_dutch.o +0 -0
  167. data/ext/stem_UTF_8_english.c +1176 -0
  168. data/ext/stem_UTF_8_english.h +16 -0
  169. data/ext/stem_UTF_8_english.o +0 -0
  170. data/ext/stem_UTF_8_finnish.c +808 -0
  171. data/ext/stem_UTF_8_finnish.h +16 -0
  172. data/ext/stem_UTF_8_finnish.o +0 -0
  173. data/ext/stem_UTF_8_french.c +1296 -0
  174. data/ext/stem_UTF_8_french.h +16 -0
  175. data/ext/stem_UTF_8_french.o +0 -0
  176. data/ext/stem_UTF_8_german.c +526 -0
  177. data/ext/stem_UTF_8_german.h +16 -0
  178. data/ext/stem_UTF_8_german.o +0 -0
  179. data/ext/stem_UTF_8_italian.c +1113 -0
  180. data/ext/stem_UTF_8_italian.h +16 -0
  181. data/ext/stem_UTF_8_italian.o +0 -0
  182. data/ext/stem_UTF_8_norwegian.c +302 -0
  183. data/ext/stem_UTF_8_norwegian.h +16 -0
  184. data/ext/stem_UTF_8_norwegian.o +0 -0
  185. data/ext/stem_UTF_8_porter.c +794 -0
  186. data/ext/stem_UTF_8_porter.h +16 -0
  187. data/ext/stem_UTF_8_porter.o +0 -0
  188. data/ext/stem_UTF_8_portuguese.c +1055 -0
  189. data/ext/stem_UTF_8_portuguese.h +16 -0
  190. data/ext/stem_UTF_8_portuguese.o +0 -0
  191. data/ext/stem_UTF_8_russian.c +709 -0
  192. data/ext/stem_UTF_8_russian.h +16 -0
  193. data/ext/stem_UTF_8_russian.o +0 -0
  194. data/ext/stem_UTF_8_spanish.c +1137 -0
  195. data/ext/stem_UTF_8_spanish.h +16 -0
  196. data/ext/stem_UTF_8_spanish.o +0 -0
  197. data/ext/stem_UTF_8_swedish.c +313 -0
  198. data/ext/stem_UTF_8_swedish.h +16 -0
  199. data/ext/stem_UTF_8_swedish.o +0 -0
  200. data/ext/stopwords.c +401 -0
  201. data/ext/stopwords.o +0 -0
  202. data/ext/store.c +692 -0
  203. data/ext/store.h +777 -0
  204. data/ext/store.o +0 -0
  205. data/ext/term_vectors.c +352 -0
  206. data/ext/term_vectors.o +0 -0
  207. data/ext/threading.h +31 -0
  208. data/ext/utilities.c +446 -0
  209. data/ext/utilities.o +0 -0
  210. data/ext/win32.h +54 -0
  211. data/ferret.gemspec +39 -0
  212. data/lib/ferret.rb +29 -0
  213. data/lib/ferret/browser.rb +246 -0
  214. data/lib/ferret/browser/s/global.js +192 -0
  215. data/lib/ferret/browser/s/style.css +148 -0
  216. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  217. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  218. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  219. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  220. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  221. data/lib/ferret/browser/views/layout.rhtml +22 -0
  222. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  223. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  224. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  225. data/lib/ferret/browser/webrick.rb +14 -0
  226. data/lib/ferret/document.rb +130 -0
  227. data/lib/ferret/field_infos.rb +44 -0
  228. data/lib/ferret/index.rb +786 -0
  229. data/lib/ferret/number_tools.rb +157 -0
  230. data/lib/ferret_ext.bundle +0 -0
  231. data/lib/ferret_version.rb +3 -0
  232. data/pkg/ferret-0.11.6.gem +0 -0
  233. data/pkg/ferret-0.11.6.tgz +0 -0
  234. data/pkg/ferret-0.11.6.zip +0 -0
  235. data/setup.rb +1555 -0
  236. data/test/test_all.rb +5 -0
  237. data/test/test_helper.rb +24 -0
  238. data/test/threading/number_to_spoken.rb +132 -0
  239. data/test/threading/thread_safety_index_test.rb +79 -0
  240. data/test/threading/thread_safety_read_write_test.rb +76 -0
  241. data/test/threading/thread_safety_test.rb +133 -0
  242. data/test/unit/analysis/tc_analyzer.rb +548 -0
  243. data/test/unit/analysis/tc_token_stream.rb +646 -0
  244. data/test/unit/index/tc_index.rb +762 -0
  245. data/test/unit/index/tc_index_reader.rb +699 -0
  246. data/test/unit/index/tc_index_writer.rb +437 -0
  247. data/test/unit/index/th_doc.rb +315 -0
  248. data/test/unit/largefile/tc_largefile.rb +46 -0
  249. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  250. data/test/unit/search/tc_filter.rb +135 -0
  251. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  252. data/test/unit/search/tc_index_searcher.rb +61 -0
  253. data/test/unit/search/tc_multi_searcher.rb +128 -0
  254. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  255. data/test/unit/search/tc_search_and_sort.rb +179 -0
  256. data/test/unit/search/tc_sort.rb +49 -0
  257. data/test/unit/search/tc_sort_field.rb +27 -0
  258. data/test/unit/search/tc_spans.rb +190 -0
  259. data/test/unit/search/tm_searcher.rb +384 -0
  260. data/test/unit/store/tc_fs_store.rb +77 -0
  261. data/test/unit/store/tc_ram_store.rb +35 -0
  262. data/test/unit/store/tm_store.rb +34 -0
  263. data/test/unit/store/tm_store_lock.rb +68 -0
  264. data/test/unit/tc_document.rb +81 -0
  265. data/test/unit/ts_analysis.rb +2 -0
  266. data/test/unit/ts_index.rb +2 -0
  267. data/test/unit/ts_largefile.rb +4 -0
  268. data/test/unit/ts_query_parser.rb +2 -0
  269. data/test/unit/ts_search.rb +2 -0
  270. data/test/unit/ts_store.rb +2 -0
  271. data/test/unit/ts_utils.rb +2 -0
  272. data/test/unit/utils/tc_bit_vector.rb +295 -0
  273. data/test/unit/utils/tc_number_tools.rb +117 -0
  274. data/test/unit/utils/tc_priority_queue.rb +106 -0
  275. metadata +392 -0
data/CHANGELOG ADDED
@@ -0,0 +1,24 @@
1
+ Fri Oct 20 22:25:37 JST 2006
2
+ * Added Filter#bits method to built-in Filters.
3
+ * Added MappingFilter < TokenFilter that can be used to map strings to other
4
+ strings during analysis. A possible use of this is it to Filter utf-8
5
+ characters to ascii characters.
6
+
7
+ Fri Oct 13 09:18:31 JST 2006
8
+ * Changed documentation to state truthfully that FULL_ENGLISH_STOP_WORDS is
9
+ being used by default in StandardAnalyzer and StopwordFilter.
10
+ * Removed 'will', 's' and 't' from ENGLISH_STOP_WORDS so that all words in
11
+ ENGLISH_STOP_WORDS can be found in FULL_ENGLISH_STOP_WORDS, that is
12
+ ENGLISH_STOP_WORDS is a subset of FULL_ENGLISH_STOP_WORDS.
13
+
14
+ Thu Oct 12 23:04:19 JST 2006
15
+ * Fixed adding SortField to Sort object in Ruby. Garbage collection wasn't
16
+ working.
17
+ * Can now set :sort => SortField#new
18
+
19
+ Tue Oct 10 14:42:17 JST 2006
20
+ * Fixed MultiTermDocEnum bug introduced in version 0.10.10 during
21
+ performance enhancements.
22
+ * Added Filter#bits(index_reader) method to C implemented filters so that
23
+ they can be used in Ruby.
24
+
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2005-2006 David Balmain
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,102 @@
1
+ = Ferret
2
+
3
+ Ferret is a Ruby port of the Java Lucene search engine.
4
+ (http://jakarta.apache.org/lucene/) In the same way as Lucene, it is not a
5
+ standalone application, but a library you can use to index documents and
6
+ search for things in them later.
7
+
8
+ == Requirements
9
+
10
+ * Ruby 1.8
11
+ * C compiler to build the extension. Tested with gcc, VC6 and VC2005
12
+
13
+ == Installation
14
+
15
+ If you have gems installed you can simply do;
16
+
17
+ gem install ferret
18
+
19
+ Otherwise, you will need Rake installed. De-compress the archive and enter its top directory.
20
+
21
+ tar zxpvf ferret-<version>.tar.gz
22
+ cd ferret-<version>
23
+
24
+ Run the following;
25
+
26
+ $ rake ext
27
+ $ ruby setup.rb config
28
+ $ ruby setup.rb setup
29
+ # ruby setup.rb install
30
+
31
+ These simple steps install ferret in the default location of Ruby libraries.
32
+ You can also install files into your favorite directory by supplying setup.rb
33
+ some options. Try;
34
+
35
+ $ ruby setup.rb --help
36
+
37
+
38
+ == Usage
39
+
40
+ You can read the TUTORIAL which you'll find in the same directory as this
41
+ README. You can also check the following modules for more specific
42
+ documentation.
43
+
44
+ * Ferret::Analysis: for more information on how the data is processed when it
45
+ is tokenized. There are a number of things you can do with your data such as
46
+ adding stop lists or perhaps a porter stemmer. There are also a number of
47
+ analyzers already available and it is almost trivial to create a new one
48
+ with a simple regular expression.
49
+
50
+ * Ferret::Search: for more information on querying the index. There are a
51
+ number of already available queries and it's unlikely you'll need to create
52
+ your own. You may however want to take advantage of the sorting or filtering
53
+ abilities of Ferret to present your data the best way you see fit.
54
+
55
+ * Ferret::Document: to find out how to create documents. This part of Ferret
56
+ is relatively straightforward. If you know how Strings, Hashes and Arrays work
57
+ Ferret then you'll be able to create Documents.
58
+
59
+ * Ferret::QueryParser: if you want to find out more about what you can do with
60
+ Ferret's Query Parser, this is the place to look. The query parser is one
61
+ area that could use a bit of work so please send your suggestions.
62
+
63
+ * Ferret::Index: for more advanced access to the index you'll probably want to
64
+ use the Ferret::Index::IndexWriter and Ferret::Index::IndexReader. This is
65
+ the place to look for more information on them.
66
+
67
+ * Ferret::Store: This is the module used to access the actual index storage
68
+ and won't be of much interest to most people.
69
+
70
+ === Performance
71
+
72
+ We are unaware of any alternatives that can out-perform Ferret while still
73
+ matching it in features.
74
+
75
+ == Contact
76
+
77
+ For bug reports and patches I have set up Trac here;
78
+
79
+ http://ferret.davebalmain.com/trac
80
+
81
+ Queries, discussion etc should be addressed to the mailing lists here;
82
+
83
+ http://rubyforge.org/projects/ferret/
84
+
85
+ Alternatively you could create a new page for discussion on the Ferret wiki;
86
+
87
+ http://ferret.davebalmain.com/trac
88
+
89
+ Of course, since Ferret was ported from Apache Lucene, most of what you can
90
+ do with Lucene you can also do with Ferret.
91
+
92
+ == Authors
93
+
94
+ [<b>David Balmain</b>] Port to Ruby
95
+
96
+ [The Apache Software Foundation (Doug Cutting and friends)] Original Apache Lucene
97
+
98
+ == License
99
+
100
+ Ferret is available under an MIT-style license.
101
+
102
+ :include: MIT-LICENSE
data/Rakefile ADDED
@@ -0,0 +1,338 @@
1
+ $:. << 'lib'
2
+ # Some parts of this Rakefile where taken from Jim Weirich's Rakefile for
3
+ # Rake. Other parts where taken from the David Heinemeier Hansson's Rails
4
+ # Rakefile. Both are under MIT-LICENSE. Thanks to both for their excellent
5
+ # projects.
6
+
7
+ require 'rake'
8
+ require 'rake/testtask'
9
+ require 'rake/rdoctask'
10
+ require 'rake/clean'
11
+ require 'ferret_version'
12
+
13
+ begin
14
+ require 'rubygems'
15
+ require 'rake/gempackagetask'
16
+ rescue Exception
17
+ nil
18
+ end
19
+
20
+ CURRENT_VERSION = Ferret::VERSION
21
+ if ENV['REL']
22
+ PKG_VERSION = ENV['REL']
23
+ else
24
+ PKG_VERSION = CURRENT_VERSION
25
+ end
26
+
27
+ def announce(msg='')
28
+ STDERR.puts msg
29
+ end
30
+
31
+ EXT = "ferret_ext.so"
32
+ EXT_SRC = FileList["../c/src/*.[c]", "../c/include/*.h",
33
+ "../c/lib/libstemmer_c/src_c/*.[ch]",
34
+ "../c/lib/libstemmer_c/runtime/*.[ch]",
35
+ "../c/lib/libstemmer_c/libstemmer/*.[ch]",
36
+ "../c/lib/libstemmer_c/include/libstemmer.h"]
37
+ EXT_SRC.exclude('../**/ind.[ch]')
38
+
39
+ EXT_SRC_DEST = EXT_SRC.map {|fn| File.join("ext", File.basename(fn))}
40
+ SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
41
+
42
+ CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles',
43
+ '.config', 'ext/cferret.c'])
44
+ CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
45
+ POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
46
+
47
+ desc "Clean specifically for the release."
48
+ task :polish => [:clean] do
49
+ POLISH.each { |fn| rm_r fn rescue nil }
50
+ end
51
+
52
+ desc "Run tests with Valgrind"
53
+ task :valgrind do
54
+ sh "valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp " +
55
+ "--leak-check=yes --show-reachable=yes -v ruby test/test_all.rb"
56
+ #sh "valgrind --suppressions=ferret_valgrind.supp " +
57
+ # "--leak-check=yes --show-reachable=yes -v ruby test/unit/index/tc_index_reader.rb"
58
+ #valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp --leak-check=yes --show-reachable=yes -v ruby test/test_all.rb
59
+ end
60
+
61
+ task :default => :test_all
62
+ #task :default => :ext do
63
+ # sh "ruby test/unit/index/tc_index.rb"
64
+ #end
65
+
66
+ desc "Run all tests"
67
+ task :test_all => [ :test_units ]
68
+
69
+ desc "Generate API documentation"
70
+ task :doc => [ :appdoc ]
71
+
72
+ desc "run unit tests in test/unit"
73
+ Rake::TestTask.new("test_units" => :ext) do |t|
74
+ t.libs << "test/unit"
75
+ t.pattern = 'test/unit/t[cs]_*.rb'
76
+ #t.pattern = 'test/unit/search/tc_index_searcher.rb'
77
+ t.verbose = true
78
+ end
79
+
80
+ desc "Generate documentation for the application"
81
+ rd = Rake::RDocTask.new("appdoc") do |rdoc|
82
+ rdoc.rdoc_dir = 'doc/api'
83
+ rdoc.title = "Ferret Search Library Documentation"
84
+ rdoc.options << '--line-numbers'
85
+ rdoc.options << '--inline-source'
86
+ rdoc.options << '--charset=utf-8'
87
+ rdoc.rdoc_files.include('README')
88
+ rdoc.rdoc_files.include('TODO')
89
+ rdoc.rdoc_files.include('TUTORIAL')
90
+ rdoc.rdoc_files.include('MIT-LICENSE')
91
+ rdoc.rdoc_files.include('lib/**/*.rb')
92
+ rdoc.rdoc_files.include('ext/r_*.c')
93
+ rdoc.rdoc_files.include('ext/ferret.c')
94
+ end
95
+
96
+ EXT_SRC.each do |fn|
97
+ dest_fn = File.join("ext", File.basename(fn))
98
+ file dest_fn => fn do |t|
99
+ begin
100
+ raise "copy for release" if ENV["REL"]
101
+ ln_s File.join("..", fn), dest_fn
102
+ rescue Exception => e
103
+ cp File.expand_path(fn), dest_fn
104
+ end
105
+
106
+ if fn =~ /stemmer/
107
+ # flatten the directory structure for lib_stemmer
108
+ open(dest_fn) do |in_f|
109
+ open(dest_fn + ".out", "w") do |out_f|
110
+ in_f.each {|line| out_f.write(line.sub(/(#include ["<])[.a-z_\/]*\//) {"#{$1}"})}
111
+ end
112
+ end
113
+ mv dest_fn + ".out", dest_fn
114
+ end
115
+ end
116
+ end if File.exists?("../c")
117
+
118
+ desc "Build the extension"
119
+ task :ext => ["ext/#{EXT}"] + SRC do
120
+ rm_f 'ext/mem_pool.*'
121
+ rm_f 'ext/defines.h'
122
+ end
123
+
124
+ file "ext/#{EXT}" => ["ext/Makefile"] do
125
+ cp "ext/inc/lang.h", "ext/lang.h"
126
+ cp "ext/inc/threading.h", "ext/threading.h"
127
+ cd "ext"
128
+ if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
129
+ begin
130
+ sh "nmake"
131
+ rescue Exception => e
132
+ puts
133
+ puts "**********************************************************************"
134
+ puts "You may need to call VCVARS32.BAT to set the environment variables."
135
+ puts ' "f:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT"'
136
+ puts "**********************************************************************"
137
+ puts
138
+ raise e
139
+ end
140
+ else
141
+ sh "make"
142
+ end
143
+ cd ".."
144
+ end
145
+
146
+ file "ext/lang.h" => ["ext/inc/lang.h"] do
147
+ rm_f "ext/lang.h"
148
+ cp "ext/inc/lang.h", "ext/lang.h"
149
+ end
150
+
151
+ file "ext/threading.h" => ["ext/inc/threading.h"] do
152
+ rm_f "ext/threading.h"
153
+ cp "ext/inc/threading.h", "ext/threading.h"
154
+ end
155
+
156
+ file "ext/Makefile" => SRC do
157
+ cd "ext"
158
+ `ruby extconf.rb`
159
+ cd ".."
160
+ end
161
+
162
+ # Make Parsers ---------------------------------------------------------------
163
+
164
+ RACC_SRC = FileList["lib/**/*.y"]
165
+ RACC_OUT = RACC_SRC.collect { |fn| fn.sub(/\.y$/, '.tab.rb') }
166
+
167
+ task :parsers => RACC_OUT
168
+ rule(/\.tab\.rb$/ => [proc {|tn| tn.sub(/\.tab\.rb$/, '.y')}]) do |t|
169
+ sh "racc #{t.source}"
170
+ end
171
+
172
+ # Create Packages ------------------------------------------------------------
173
+
174
+ PKG_FILES = FileList[
175
+ 'setup.rb',
176
+ '[-A-Z]*',
177
+ 'ext/**/*.[ch]',
178
+ 'lib/**/*.rb',
179
+ 'lib/**/*.rhtml',
180
+ 'lib/**/*.css',
181
+ 'lib/**/*.js',
182
+ 'test/**/*.rb',
183
+ 'test/**/wordfile',
184
+ 'rake_utils/**/*.rb',
185
+ 'Rakefile'
186
+ ]
187
+ PKG_FILES.exclude('**/*.o')
188
+ PKG_FILES.exclude('**/Makefile')
189
+ PKG_FILES.exclude('ext/ferret_ext.so')
190
+
191
+
192
+ if ! defined?(Gem)
193
+ puts "Package Target requires RubyGEMs"
194
+ else
195
+ spec = Gem::Specification.new do |s|
196
+
197
+ #### Basic information.
198
+ s.name = 'ferret'
199
+ s.version = PKG_VERSION
200
+ s.summary = "Ruby indexing library."
201
+ s.description = <<-EOF
202
+ Ferret is a port of the Java Lucene project. It is a powerful
203
+ indexing and search library.
204
+ EOF
205
+
206
+ #### Dependencies and requirements.
207
+ s.add_dependency('rake')
208
+ s.files = PKG_FILES.to_a
209
+ s.extensions << "ext/extconf.rb"
210
+ s.require_path = 'lib'
211
+ s.autorequire = 'ferret'
212
+ s.bindir = 'bin'
213
+ s.executables = ['ferret-browser']
214
+ s.default_executable = 'ferret-browser'
215
+
216
+ #### Author and project details.
217
+ s.author = "David Balmain"
218
+ s.email = "dbalmain@gmail.com"
219
+ s.homepage = "http://ferret.davebalmain.com/trac"
220
+ s.rubyforge_project = "ferret"
221
+
222
+ s.has_rdoc = true
223
+ s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
224
+ s.rdoc_options <<
225
+ '--title' << 'Ferret -- Ruby Indexer' <<
226
+ '--main' << 'README' << '--line-numbers' <<
227
+ 'TUTORIAL' << 'TODO'
228
+
229
+ if RUBY_PLATFORM =~ /mswin/
230
+ s.files = PKG_FILES.to_a + ["ext/#{EXT}"]
231
+ s.extensions.clear
232
+ s.platform = Gem::Platform::WIN32
233
+ end
234
+ end
235
+
236
+ package_task = Rake::GemPackageTask.new(spec) do |pkg|
237
+ unless RUBY_PLATFORM =~ /mswin/
238
+ pkg.need_zip = true
239
+ pkg.need_tar = true
240
+ end
241
+ end
242
+ end
243
+
244
+ # Support Tasks ------------------------------------------------------
245
+
246
+ desc "Look for TODO and FIXME tags in the code"
247
+ task :todo do
248
+ FileList['**/*.rb'].egrep /#.*(FIXME|TODO|TBD)/
249
+ end
250
+ # --------------------------------------------------------------------
251
+ # Creating a release
252
+
253
+ desc "Make a new release"
254
+ task :release => [
255
+ :prerelease,
256
+ :polish,
257
+ :test_all,
258
+ :update_version,
259
+ :package,
260
+ :tag] do
261
+ announce
262
+ announce "**************************************************************"
263
+ announce "* Release #{PKG_VERSION} Complete."
264
+ announce "* Packages ready to upload."
265
+ announce "**************************************************************"
266
+ announce
267
+ end
268
+
269
+ # Validate that everything is ready to go for a release.
270
+ task :prerelease do
271
+ announce
272
+ announce "**************************************************************"
273
+ announce "* Making RubyGem Release #{PKG_VERSION}"
274
+ announce "* (current version #{CURRENT_VERSION})"
275
+ announce "**************************************************************"
276
+ announce
277
+
278
+ # Is a release number supplied?
279
+ unless ENV['REL']
280
+ fail "Usage: rake release REL=x.y.z [REUSE=tag_suffix]"
281
+ end
282
+
283
+ # Is the release different than the current release.
284
+ # (or is REUSE set?)
285
+ if PKG_VERSION == CURRENT_VERSION && ! ENV['REUSE']
286
+ fail "Current version is #{PKG_VERSION}, must specify REUSE=tag_suffix to reuse version"
287
+ end
288
+
289
+ # Are all source files checked in?
290
+ data = `svn -q --ignore-externals status`
291
+ unless data =~ /^$/
292
+ fail "'svn -q status' is not clean ... do you have unchecked-in files?"
293
+ end
294
+
295
+ announce "No outstanding checkins found ... OK"
296
+ end
297
+
298
+ def reversion(fn)
299
+ open(fn) do |ferret_in|
300
+ open(fn + ".new", "w") do |ferret_out|
301
+ ferret_in.each do |line|
302
+ if line =~ /^ VERSION\s*=\s*/
303
+ ferret_out.puts " VERSION = '#{PKG_VERSION}'"
304
+ else
305
+ ferret_out.puts line
306
+ end
307
+ end
308
+ end
309
+ end
310
+ mv fn + ".new", fn
311
+ end
312
+
313
+ task :update_version => [:prerelease] do
314
+ if PKG_VERSION == CURRENT_VERSION
315
+ announce "No version change ... skipping version update"
316
+ else
317
+ announce "Updating Ferret version to #{PKG_VERSION}"
318
+ reversion("lib/ferret_version.rb")
319
+ if ENV['RELTEST']
320
+ announce "Release Task Testing, skipping commiting of new version"
321
+ else
322
+ sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/ferret_version.rb}
323
+ end
324
+ end
325
+ end
326
+
327
+ desc "Tag all the SVN files with the latest release number (REL=x.y.z)"
328
+ task :tag => [:prerelease] do
329
+ reltag = "REL-#{PKG_VERSION}"
330
+ reltag << ENV['REUSE'] if ENV['REUSE']
331
+ announce "Tagging SVN with [#{reltag}]"
332
+ if ENV['RELTEST']
333
+ announce "Release Task Testing, skipping SVN tagging. Would do the following;"
334
+ announce %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}}
335
+ else
336
+ sh %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}}
337
+ end
338
+ end