np-ferret 0.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +102 -0
- data/Rakefile +338 -0
- data/TODO +17 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/Makefile +218 -0
- data/ext/analysis.c +1584 -0
- data/ext/analysis.h +219 -0
- data/ext/analysis.o +0 -0
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/api.o +0 -0
- data/ext/array.c +123 -0
- data/ext/array.h +53 -0
- data/ext/array.o +0 -0
- data/ext/bitvector.c +540 -0
- data/ext/bitvector.h +272 -0
- data/ext/bitvector.o +0 -0
- data/ext/compound_io.c +383 -0
- data/ext/compound_io.o +0 -0
- data/ext/config.h +42 -0
- data/ext/document.c +156 -0
- data/ext/document.h +53 -0
- data/ext/document.o +0 -0
- data/ext/except.c +120 -0
- data/ext/except.h +168 -0
- data/ext/except.o +0 -0
- data/ext/extconf.rb +14 -0
- data/ext/ferret.c +402 -0
- data/ext/ferret.h +91 -0
- data/ext/ferret.o +0 -0
- data/ext/ferret_ext.bundle +0 -0
- data/ext/filter.c +156 -0
- data/ext/filter.o +0 -0
- data/ext/fs_store.c +484 -0
- data/ext/fs_store.o +0 -0
- data/ext/global.c +418 -0
- data/ext/global.h +117 -0
- data/ext/global.o +0 -0
- data/ext/hash.c +598 -0
- data/ext/hash.h +475 -0
- data/ext/hash.o +0 -0
- data/ext/hashset.c +170 -0
- data/ext/hashset.h +187 -0
- data/ext/hashset.o +0 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +62 -0
- data/ext/helper.h +13 -0
- data/ext/helper.o +0 -0
- data/ext/inc/lang.h +48 -0
- data/ext/inc/threading.h +31 -0
- data/ext/index.c +6510 -0
- data/ext/index.h +964 -0
- data/ext/index.o +0 -0
- data/ext/lang.h +66 -0
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/libstemmer.o +0 -0
- data/ext/mempool.c +87 -0
- data/ext/mempool.h +35 -0
- data/ext/mempool.o +0 -0
- data/ext/modules.h +162 -0
- data/ext/multimapper.c +310 -0
- data/ext/multimapper.h +51 -0
- data/ext/multimapper.o +0 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/posh.o +0 -0
- data/ext/priorityqueue.c +151 -0
- data/ext/priorityqueue.h +143 -0
- data/ext/priorityqueue.o +0 -0
- data/ext/q_boolean.c +1608 -0
- data/ext/q_boolean.o +0 -0
- data/ext/q_const_score.c +165 -0
- data/ext/q_const_score.o +0 -0
- data/ext/q_filtered_query.c +209 -0
- data/ext/q_filtered_query.o +0 -0
- data/ext/q_fuzzy.c +335 -0
- data/ext/q_fuzzy.o +0 -0
- data/ext/q_match_all.c +148 -0
- data/ext/q_match_all.o +0 -0
- data/ext/q_multi_term.c +677 -0
- data/ext/q_multi_term.o +0 -0
- data/ext/q_parser.c +2825 -0
- data/ext/q_parser.o +0 -0
- data/ext/q_phrase.c +1126 -0
- data/ext/q_phrase.o +0 -0
- data/ext/q_prefix.c +100 -0
- data/ext/q_prefix.o +0 -0
- data/ext/q_range.c +356 -0
- data/ext/q_range.o +0 -0
- data/ext/q_span.c +2402 -0
- data/ext/q_span.o +0 -0
- data/ext/q_term.c +337 -0
- data/ext/q_term.o +0 -0
- data/ext/q_wildcard.c +171 -0
- data/ext/q_wildcard.o +0 -0
- data/ext/r_analysis.c +2636 -0
- data/ext/r_analysis.o +0 -0
- data/ext/r_index.c +3509 -0
- data/ext/r_index.o +0 -0
- data/ext/r_qparser.c +585 -0
- data/ext/r_qparser.o +0 -0
- data/ext/r_search.c +4240 -0
- data/ext/r_search.o +0 -0
- data/ext/r_store.c +513 -0
- data/ext/r_store.o +0 -0
- data/ext/r_utils.c +963 -0
- data/ext/r_utils.o +0 -0
- data/ext/ram_store.c +471 -0
- data/ext/ram_store.o +0 -0
- data/ext/search.c +1743 -0
- data/ext/search.h +885 -0
- data/ext/search.o +0 -0
- data/ext/similarity.c +150 -0
- data/ext/similarity.h +82 -0
- data/ext/similarity.o +0 -0
- data/ext/sort.c +985 -0
- data/ext/sort.o +0 -0
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_danish.o +0 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.o +0 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_english.o +0 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.o +0 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_french.o +0 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_german.o +0 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_italian.o +0 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.o +0 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_porter.o +0 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.o +0 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.o +0 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.o +0 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_KOI8_R_russian.o +0 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_danish.o +0 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_dutch.o +0 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_english.o +0 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_finnish.o +0 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_french.o +0 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_german.o +0 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_italian.o +0 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_norwegian.o +0 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_porter.o +0 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_portuguese.o +0 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_russian.o +0 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_spanish.o +0 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stem_UTF_8_swedish.o +0 -0
- data/ext/stopwords.c +401 -0
- data/ext/stopwords.o +0 -0
- data/ext/store.c +692 -0
- data/ext/store.h +777 -0
- data/ext/store.o +0 -0
- data/ext/term_vectors.c +352 -0
- data/ext/term_vectors.o +0 -0
- data/ext/threading.h +31 -0
- data/ext/utilities.c +446 -0
- data/ext/utilities.o +0 -0
- data/ext/win32.h +54 -0
- data/ferret.gemspec +39 -0
- data/lib/ferret.rb +29 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/index.rb +786 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_ext.bundle +0 -0
- data/lib/ferret_version.rb +3 -0
- data/pkg/ferret-0.11.6.gem +0 -0
- data/pkg/ferret-0.11.6.tgz +0 -0
- data/pkg/ferret-0.11.6.zip +0 -0
- data/setup.rb +1555 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +24 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +79 -0
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +548 -0
- data/test/unit/analysis/tc_token_stream.rb +646 -0
- data/test/unit/index/tc_index.rb +762 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +437 -0
- data/test/unit/index/th_doc.rb +315 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +135 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +61 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +384 -0
- data/test/unit/store/tc_fs_store.rb +77 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +392 -0
data/CHANGELOG
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Fri Oct 20 22:25:37 JST 2006
|
|
2
|
+
* Added Filter#bits method to built-in Filters.
|
|
3
|
+
* Added MappingFilter < TokenFilter that can be used to map strings to other
|
|
4
|
+
strings during analysis. A possible use of this is it to Filter utf-8
|
|
5
|
+
characters to ascii characters.
|
|
6
|
+
|
|
7
|
+
Fri Oct 13 09:18:31 JST 2006
|
|
8
|
+
* Changed documentation to state truthfully that FULL_ENGLISH_STOP_WORDS is
|
|
9
|
+
being used by default in StandardAnalyzer and StopwordFilter.
|
|
10
|
+
* Removed 'will', 's' and 't' from ENGLISH_STOP_WORDS so that all words in
|
|
11
|
+
ENGLISH_STOP_WORDS can be found in FULL_ENGLISH_STOP_WORDS, that is
|
|
12
|
+
ENGLISH_STOP_WORDS is a subset of FULL_ENGLISH_STOP_WORDS.
|
|
13
|
+
|
|
14
|
+
Thu Oct 12 23:04:19 JST 2006
|
|
15
|
+
* Fixed adding SortField to Sort object in Ruby. Garbage collection wasn't
|
|
16
|
+
working.
|
|
17
|
+
* Can now set :sort => SortField#new
|
|
18
|
+
|
|
19
|
+
Tue Oct 10 14:42:17 JST 2006
|
|
20
|
+
* Fixed MultiTermDocEnum bug introduced in version 0.10.10 during
|
|
21
|
+
performance enhancements.
|
|
22
|
+
* Added Filter#bits(index_reader) method to C implemented filters so that
|
|
23
|
+
they can be used in Ruby.
|
|
24
|
+
|
data/MIT-LICENSE
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Copyright (c) 2005-2006 David Balmain
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
4
|
+
a copy of this software and associated documentation files (the
|
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
9
|
+
the following conditions:
|
|
10
|
+
|
|
11
|
+
The above copyright notice and this permission notice shall be
|
|
12
|
+
included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
= Ferret
|
|
2
|
+
|
|
3
|
+
Ferret is a Ruby port of the Java Lucene search engine.
|
|
4
|
+
(http://jakarta.apache.org/lucene/) In the same way as Lucene, it is not a
|
|
5
|
+
standalone application, but a library you can use to index documents and
|
|
6
|
+
search for things in them later.
|
|
7
|
+
|
|
8
|
+
== Requirements
|
|
9
|
+
|
|
10
|
+
* Ruby 1.8
|
|
11
|
+
* C compiler to build the extension. Tested with gcc, VC6 and VC2005
|
|
12
|
+
|
|
13
|
+
== Installation
|
|
14
|
+
|
|
15
|
+
If you have gems installed you can simply do;
|
|
16
|
+
|
|
17
|
+
gem install ferret
|
|
18
|
+
|
|
19
|
+
Otherwise, you will need Rake installed. De-compress the archive and enter its top directory.
|
|
20
|
+
|
|
21
|
+
tar zxpvf ferret-<version>.tar.gz
|
|
22
|
+
cd ferret-<version>
|
|
23
|
+
|
|
24
|
+
Run the following;
|
|
25
|
+
|
|
26
|
+
$ rake ext
|
|
27
|
+
$ ruby setup.rb config
|
|
28
|
+
$ ruby setup.rb setup
|
|
29
|
+
# ruby setup.rb install
|
|
30
|
+
|
|
31
|
+
These simple steps install ferret in the default location of Ruby libraries.
|
|
32
|
+
You can also install files into your favorite directory by supplying setup.rb
|
|
33
|
+
some options. Try;
|
|
34
|
+
|
|
35
|
+
$ ruby setup.rb --help
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
== Usage
|
|
39
|
+
|
|
40
|
+
You can read the TUTORIAL which you'll find in the same directory as this
|
|
41
|
+
README. You can also check the following modules for more specific
|
|
42
|
+
documentation.
|
|
43
|
+
|
|
44
|
+
* Ferret::Analysis: for more information on how the data is processed when it
|
|
45
|
+
is tokenized. There are a number of things you can do with your data such as
|
|
46
|
+
adding stop lists or perhaps a porter stemmer. There are also a number of
|
|
47
|
+
analyzers already available and it is almost trivial to create a new one
|
|
48
|
+
with a simple regular expression.
|
|
49
|
+
|
|
50
|
+
* Ferret::Search: for more information on querying the index. There are a
|
|
51
|
+
number of already available queries and it's unlikely you'll need to create
|
|
52
|
+
your own. You may however want to take advantage of the sorting or filtering
|
|
53
|
+
abilities of Ferret to present your data the best way you see fit.
|
|
54
|
+
|
|
55
|
+
* Ferret::Document: to find out how to create documents. This part of Ferret
|
|
56
|
+
is relatively straightforward. If you know how Strings, Hashes and Arrays work
|
|
57
|
+
Ferret then you'll be able to create Documents.
|
|
58
|
+
|
|
59
|
+
* Ferret::QueryParser: if you want to find out more about what you can do with
|
|
60
|
+
Ferret's Query Parser, this is the place to look. The query parser is one
|
|
61
|
+
area that could use a bit of work so please send your suggestions.
|
|
62
|
+
|
|
63
|
+
* Ferret::Index: for more advanced access to the index you'll probably want to
|
|
64
|
+
use the Ferret::Index::IndexWriter and Ferret::Index::IndexReader. This is
|
|
65
|
+
the place to look for more information on them.
|
|
66
|
+
|
|
67
|
+
* Ferret::Store: This is the module used to access the actual index storage
|
|
68
|
+
and won't be of much interest to most people.
|
|
69
|
+
|
|
70
|
+
=== Performance
|
|
71
|
+
|
|
72
|
+
We are unaware of any alternatives that can out-perform Ferret while still
|
|
73
|
+
matching it in features.
|
|
74
|
+
|
|
75
|
+
== Contact
|
|
76
|
+
|
|
77
|
+
For bug reports and patches I have set up Trac here;
|
|
78
|
+
|
|
79
|
+
http://ferret.davebalmain.com/trac
|
|
80
|
+
|
|
81
|
+
Queries, discussion etc should be addressed to the mailing lists here;
|
|
82
|
+
|
|
83
|
+
http://rubyforge.org/projects/ferret/
|
|
84
|
+
|
|
85
|
+
Alternatively you could create a new page for discussion on the Ferret wiki;
|
|
86
|
+
|
|
87
|
+
http://ferret.davebalmain.com/trac
|
|
88
|
+
|
|
89
|
+
Of course, since Ferret was ported from Apache Lucene, most of what you can
|
|
90
|
+
do with Lucene you can also do with Ferret.
|
|
91
|
+
|
|
92
|
+
== Authors
|
|
93
|
+
|
|
94
|
+
[<b>David Balmain</b>] Port to Ruby
|
|
95
|
+
|
|
96
|
+
[The Apache Software Foundation (Doug Cutting and friends)] Original Apache Lucene
|
|
97
|
+
|
|
98
|
+
== License
|
|
99
|
+
|
|
100
|
+
Ferret is available under an MIT-style license.
|
|
101
|
+
|
|
102
|
+
:include: MIT-LICENSE
|
data/Rakefile
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
$:. << 'lib'
|
|
2
|
+
# Some parts of this Rakefile where taken from Jim Weirich's Rakefile for
|
|
3
|
+
# Rake. Other parts where taken from the David Heinemeier Hansson's Rails
|
|
4
|
+
# Rakefile. Both are under MIT-LICENSE. Thanks to both for their excellent
|
|
5
|
+
# projects.
|
|
6
|
+
|
|
7
|
+
require 'rake'
|
|
8
|
+
require 'rake/testtask'
|
|
9
|
+
require 'rake/rdoctask'
|
|
10
|
+
require 'rake/clean'
|
|
11
|
+
require 'ferret_version'
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require 'rubygems'
|
|
15
|
+
require 'rake/gempackagetask'
|
|
16
|
+
rescue Exception
|
|
17
|
+
nil
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
CURRENT_VERSION = Ferret::VERSION
|
|
21
|
+
if ENV['REL']
|
|
22
|
+
PKG_VERSION = ENV['REL']
|
|
23
|
+
else
|
|
24
|
+
PKG_VERSION = CURRENT_VERSION
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def announce(msg='')
|
|
28
|
+
STDERR.puts msg
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
EXT = "ferret_ext.so"
|
|
32
|
+
EXT_SRC = FileList["../c/src/*.[c]", "../c/include/*.h",
|
|
33
|
+
"../c/lib/libstemmer_c/src_c/*.[ch]",
|
|
34
|
+
"../c/lib/libstemmer_c/runtime/*.[ch]",
|
|
35
|
+
"../c/lib/libstemmer_c/libstemmer/*.[ch]",
|
|
36
|
+
"../c/lib/libstemmer_c/include/libstemmer.h"]
|
|
37
|
+
EXT_SRC.exclude('../**/ind.[ch]')
|
|
38
|
+
|
|
39
|
+
EXT_SRC_DEST = EXT_SRC.map {|fn| File.join("ext", File.basename(fn))}
|
|
40
|
+
SRC = (FileList["ext/*.[ch]"] + EXT_SRC_DEST).uniq
|
|
41
|
+
|
|
42
|
+
CLEAN.include(FileList['**/*.o', '**/*.obj', 'InstalledFiles',
|
|
43
|
+
'.config', 'ext/cferret.c'])
|
|
44
|
+
CLOBBER.include(FileList['**/*.so'], 'ext/Makefile', EXT_SRC_DEST)
|
|
45
|
+
POLISH = Rake::FileList.new.include(FileList['**/*.so'], 'ext/Makefile')
|
|
46
|
+
|
|
47
|
+
desc "Clean specifically for the release."
|
|
48
|
+
task :polish => [:clean] do
|
|
49
|
+
POLISH.each { |fn| rm_r fn rescue nil }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
desc "Run tests with Valgrind"
|
|
53
|
+
task :valgrind do
|
|
54
|
+
sh "valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp " +
|
|
55
|
+
"--leak-check=yes --show-reachable=yes -v ruby test/test_all.rb"
|
|
56
|
+
#sh "valgrind --suppressions=ferret_valgrind.supp " +
|
|
57
|
+
# "--leak-check=yes --show-reachable=yes -v ruby test/unit/index/tc_index_reader.rb"
|
|
58
|
+
#valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp --leak-check=yes --show-reachable=yes -v ruby test/test_all.rb
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
task :default => :test_all
|
|
62
|
+
#task :default => :ext do
|
|
63
|
+
# sh "ruby test/unit/index/tc_index.rb"
|
|
64
|
+
#end
|
|
65
|
+
|
|
66
|
+
desc "Run all tests"
|
|
67
|
+
task :test_all => [ :test_units ]
|
|
68
|
+
|
|
69
|
+
desc "Generate API documentation"
|
|
70
|
+
task :doc => [ :appdoc ]
|
|
71
|
+
|
|
72
|
+
desc "run unit tests in test/unit"
|
|
73
|
+
Rake::TestTask.new("test_units" => :ext) do |t|
|
|
74
|
+
t.libs << "test/unit"
|
|
75
|
+
t.pattern = 'test/unit/t[cs]_*.rb'
|
|
76
|
+
#t.pattern = 'test/unit/search/tc_index_searcher.rb'
|
|
77
|
+
t.verbose = true
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
desc "Generate documentation for the application"
|
|
81
|
+
rd = Rake::RDocTask.new("appdoc") do |rdoc|
|
|
82
|
+
rdoc.rdoc_dir = 'doc/api'
|
|
83
|
+
rdoc.title = "Ferret Search Library Documentation"
|
|
84
|
+
rdoc.options << '--line-numbers'
|
|
85
|
+
rdoc.options << '--inline-source'
|
|
86
|
+
rdoc.options << '--charset=utf-8'
|
|
87
|
+
rdoc.rdoc_files.include('README')
|
|
88
|
+
rdoc.rdoc_files.include('TODO')
|
|
89
|
+
rdoc.rdoc_files.include('TUTORIAL')
|
|
90
|
+
rdoc.rdoc_files.include('MIT-LICENSE')
|
|
91
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
92
|
+
rdoc.rdoc_files.include('ext/r_*.c')
|
|
93
|
+
rdoc.rdoc_files.include('ext/ferret.c')
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
EXT_SRC.each do |fn|
|
|
97
|
+
dest_fn = File.join("ext", File.basename(fn))
|
|
98
|
+
file dest_fn => fn do |t|
|
|
99
|
+
begin
|
|
100
|
+
raise "copy for release" if ENV["REL"]
|
|
101
|
+
ln_s File.join("..", fn), dest_fn
|
|
102
|
+
rescue Exception => e
|
|
103
|
+
cp File.expand_path(fn), dest_fn
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
if fn =~ /stemmer/
|
|
107
|
+
# flatten the directory structure for lib_stemmer
|
|
108
|
+
open(dest_fn) do |in_f|
|
|
109
|
+
open(dest_fn + ".out", "w") do |out_f|
|
|
110
|
+
in_f.each {|line| out_f.write(line.sub(/(#include ["<])[.a-z_\/]*\//) {"#{$1}"})}
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
mv dest_fn + ".out", dest_fn
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end if File.exists?("../c")
|
|
117
|
+
|
|
118
|
+
desc "Build the extension"
|
|
119
|
+
task :ext => ["ext/#{EXT}"] + SRC do
|
|
120
|
+
rm_f 'ext/mem_pool.*'
|
|
121
|
+
rm_f 'ext/defines.h'
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
file "ext/#{EXT}" => ["ext/Makefile"] do
|
|
125
|
+
cp "ext/inc/lang.h", "ext/lang.h"
|
|
126
|
+
cp "ext/inc/threading.h", "ext/threading.h"
|
|
127
|
+
cd "ext"
|
|
128
|
+
if (/mswin/ =~ RUBY_PLATFORM) and ENV['make'].nil?
|
|
129
|
+
begin
|
|
130
|
+
sh "nmake"
|
|
131
|
+
rescue Exception => e
|
|
132
|
+
puts
|
|
133
|
+
puts "**********************************************************************"
|
|
134
|
+
puts "You may need to call VCVARS32.BAT to set the environment variables."
|
|
135
|
+
puts ' "f:\Program Files\Microsoft Visual Studio\VC98\Bin\VCVARS32.BAT"'
|
|
136
|
+
puts "**********************************************************************"
|
|
137
|
+
puts
|
|
138
|
+
raise e
|
|
139
|
+
end
|
|
140
|
+
else
|
|
141
|
+
sh "make"
|
|
142
|
+
end
|
|
143
|
+
cd ".."
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
file "ext/lang.h" => ["ext/inc/lang.h"] do
|
|
147
|
+
rm_f "ext/lang.h"
|
|
148
|
+
cp "ext/inc/lang.h", "ext/lang.h"
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
file "ext/threading.h" => ["ext/inc/threading.h"] do
|
|
152
|
+
rm_f "ext/threading.h"
|
|
153
|
+
cp "ext/inc/threading.h", "ext/threading.h"
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
file "ext/Makefile" => SRC do
|
|
157
|
+
cd "ext"
|
|
158
|
+
`ruby extconf.rb`
|
|
159
|
+
cd ".."
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Make Parsers ---------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
RACC_SRC = FileList["lib/**/*.y"]
|
|
165
|
+
RACC_OUT = RACC_SRC.collect { |fn| fn.sub(/\.y$/, '.tab.rb') }
|
|
166
|
+
|
|
167
|
+
task :parsers => RACC_OUT
|
|
168
|
+
rule(/\.tab\.rb$/ => [proc {|tn| tn.sub(/\.tab\.rb$/, '.y')}]) do |t|
|
|
169
|
+
sh "racc #{t.source}"
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Create Packages ------------------------------------------------------------
|
|
173
|
+
|
|
174
|
+
PKG_FILES = FileList[
|
|
175
|
+
'setup.rb',
|
|
176
|
+
'[-A-Z]*',
|
|
177
|
+
'ext/**/*.[ch]',
|
|
178
|
+
'lib/**/*.rb',
|
|
179
|
+
'lib/**/*.rhtml',
|
|
180
|
+
'lib/**/*.css',
|
|
181
|
+
'lib/**/*.js',
|
|
182
|
+
'test/**/*.rb',
|
|
183
|
+
'test/**/wordfile',
|
|
184
|
+
'rake_utils/**/*.rb',
|
|
185
|
+
'Rakefile'
|
|
186
|
+
]
|
|
187
|
+
PKG_FILES.exclude('**/*.o')
|
|
188
|
+
PKG_FILES.exclude('**/Makefile')
|
|
189
|
+
PKG_FILES.exclude('ext/ferret_ext.so')
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
if ! defined?(Gem)
|
|
193
|
+
puts "Package Target requires RubyGEMs"
|
|
194
|
+
else
|
|
195
|
+
spec = Gem::Specification.new do |s|
|
|
196
|
+
|
|
197
|
+
#### Basic information.
|
|
198
|
+
s.name = 'ferret'
|
|
199
|
+
s.version = PKG_VERSION
|
|
200
|
+
s.summary = "Ruby indexing library."
|
|
201
|
+
s.description = <<-EOF
|
|
202
|
+
Ferret is a port of the Java Lucene project. It is a powerful
|
|
203
|
+
indexing and search library.
|
|
204
|
+
EOF
|
|
205
|
+
|
|
206
|
+
#### Dependencies and requirements.
|
|
207
|
+
s.add_dependency('rake')
|
|
208
|
+
s.files = PKG_FILES.to_a
|
|
209
|
+
s.extensions << "ext/extconf.rb"
|
|
210
|
+
s.require_path = 'lib'
|
|
211
|
+
s.autorequire = 'ferret'
|
|
212
|
+
s.bindir = 'bin'
|
|
213
|
+
s.executables = ['ferret-browser']
|
|
214
|
+
s.default_executable = 'ferret-browser'
|
|
215
|
+
|
|
216
|
+
#### Author and project details.
|
|
217
|
+
s.author = "David Balmain"
|
|
218
|
+
s.email = "dbalmain@gmail.com"
|
|
219
|
+
s.homepage = "http://ferret.davebalmain.com/trac"
|
|
220
|
+
s.rubyforge_project = "ferret"
|
|
221
|
+
|
|
222
|
+
s.has_rdoc = true
|
|
223
|
+
s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
|
|
224
|
+
s.rdoc_options <<
|
|
225
|
+
'--title' << 'Ferret -- Ruby Indexer' <<
|
|
226
|
+
'--main' << 'README' << '--line-numbers' <<
|
|
227
|
+
'TUTORIAL' << 'TODO'
|
|
228
|
+
|
|
229
|
+
if RUBY_PLATFORM =~ /mswin/
|
|
230
|
+
s.files = PKG_FILES.to_a + ["ext/#{EXT}"]
|
|
231
|
+
s.extensions.clear
|
|
232
|
+
s.platform = Gem::Platform::WIN32
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
package_task = Rake::GemPackageTask.new(spec) do |pkg|
|
|
237
|
+
unless RUBY_PLATFORM =~ /mswin/
|
|
238
|
+
pkg.need_zip = true
|
|
239
|
+
pkg.need_tar = true
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Support Tasks ------------------------------------------------------
|
|
245
|
+
|
|
246
|
+
desc "Look for TODO and FIXME tags in the code"
|
|
247
|
+
task :todo do
|
|
248
|
+
FileList['**/*.rb'].egrep /#.*(FIXME|TODO|TBD)/
|
|
249
|
+
end
|
|
250
|
+
# --------------------------------------------------------------------
|
|
251
|
+
# Creating a release
|
|
252
|
+
|
|
253
|
+
desc "Make a new release"
|
|
254
|
+
task :release => [
|
|
255
|
+
:prerelease,
|
|
256
|
+
:polish,
|
|
257
|
+
:test_all,
|
|
258
|
+
:update_version,
|
|
259
|
+
:package,
|
|
260
|
+
:tag] do
|
|
261
|
+
announce
|
|
262
|
+
announce "**************************************************************"
|
|
263
|
+
announce "* Release #{PKG_VERSION} Complete."
|
|
264
|
+
announce "* Packages ready to upload."
|
|
265
|
+
announce "**************************************************************"
|
|
266
|
+
announce
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
# Validate that everything is ready to go for a release.
|
|
270
|
+
task :prerelease do
|
|
271
|
+
announce
|
|
272
|
+
announce "**************************************************************"
|
|
273
|
+
announce "* Making RubyGem Release #{PKG_VERSION}"
|
|
274
|
+
announce "* (current version #{CURRENT_VERSION})"
|
|
275
|
+
announce "**************************************************************"
|
|
276
|
+
announce
|
|
277
|
+
|
|
278
|
+
# Is a release number supplied?
|
|
279
|
+
unless ENV['REL']
|
|
280
|
+
fail "Usage: rake release REL=x.y.z [REUSE=tag_suffix]"
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Is the release different than the current release.
|
|
284
|
+
# (or is REUSE set?)
|
|
285
|
+
if PKG_VERSION == CURRENT_VERSION && ! ENV['REUSE']
|
|
286
|
+
fail "Current version is #{PKG_VERSION}, must specify REUSE=tag_suffix to reuse version"
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Are all source files checked in?
|
|
290
|
+
data = `svn -q --ignore-externals status`
|
|
291
|
+
unless data =~ /^$/
|
|
292
|
+
fail "'svn -q status' is not clean ... do you have unchecked-in files?"
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
announce "No outstanding checkins found ... OK"
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def reversion(fn)
|
|
299
|
+
open(fn) do |ferret_in|
|
|
300
|
+
open(fn + ".new", "w") do |ferret_out|
|
|
301
|
+
ferret_in.each do |line|
|
|
302
|
+
if line =~ /^ VERSION\s*=\s*/
|
|
303
|
+
ferret_out.puts " VERSION = '#{PKG_VERSION}'"
|
|
304
|
+
else
|
|
305
|
+
ferret_out.puts line
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
mv fn + ".new", fn
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
task :update_version => [:prerelease] do
|
|
314
|
+
if PKG_VERSION == CURRENT_VERSION
|
|
315
|
+
announce "No version change ... skipping version update"
|
|
316
|
+
else
|
|
317
|
+
announce "Updating Ferret version to #{PKG_VERSION}"
|
|
318
|
+
reversion("lib/ferret_version.rb")
|
|
319
|
+
if ENV['RELTEST']
|
|
320
|
+
announce "Release Task Testing, skipping commiting of new version"
|
|
321
|
+
else
|
|
322
|
+
sh %{svn ci -m "Updated to version #{PKG_VERSION}" lib/ferret_version.rb}
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
desc "Tag all the SVN files with the latest release number (REL=x.y.z)"
|
|
328
|
+
task :tag => [:prerelease] do
|
|
329
|
+
reltag = "REL-#{PKG_VERSION}"
|
|
330
|
+
reltag << ENV['REUSE'] if ENV['REUSE']
|
|
331
|
+
announce "Tagging SVN with [#{reltag}]"
|
|
332
|
+
if ENV['RELTEST']
|
|
333
|
+
announce "Release Task Testing, skipping SVN tagging. Would do the following;"
|
|
334
|
+
announce %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}}
|
|
335
|
+
else
|
|
336
|
+
sh %{svn copy -m "creating release #{reltag}" svn://www.davebalmain.com/ferret/trunk svn://www.davebalmain.com/ferret/tags/#{reltag}}
|
|
337
|
+
end
|
|
338
|
+
end
|