ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_store.c
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
#include "store.h"
|
3
3
|
|
4
4
|
VALUE cLock;
|
5
|
+
VALUE cLockError;
|
5
6
|
VALUE cDirectory;
|
6
7
|
VALUE cRAMDirectory;
|
7
8
|
VALUE cFSDirectory;
|
@@ -15,69 +16,120 @@ VALUE cFSDirectory;
|
|
15
16
|
void
|
16
17
|
frt_lock_free(void *p)
|
17
18
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
Lock *lock = (Lock *)p;
|
20
|
+
if (object_get(lock->store) != Qnil) {
|
21
|
+
lock->store->close_lock(lock);
|
22
|
+
} else {
|
23
|
+
free(lock->name);
|
24
|
+
free(lock);
|
25
|
+
}
|
25
26
|
}
|
26
27
|
|
27
28
|
void
|
28
29
|
frt_lock_mark(void *p)
|
29
30
|
{
|
30
|
-
|
31
|
-
|
31
|
+
Lock *lock = (Lock *)p;
|
32
|
+
frt_gc_mark(lock->store);
|
32
33
|
}
|
33
34
|
|
34
|
-
#define GET_LOCK
|
35
|
+
#define GET_LOCK(lock, self) Data_Get_Struct(self, Lock, lock)
|
36
|
+
|
37
|
+
/*
|
38
|
+
* call-seq:
|
39
|
+
* lock.obtain(timeout = 1) -> bool
|
40
|
+
*
|
41
|
+
* Obtain a lock. Returns true if lock was successfully obtained. Make sure
|
42
|
+
* the lock is released using Lock#release. Otherwise you'll be left with a
|
43
|
+
* stale lock file.
|
44
|
+
*
|
45
|
+
* The timeout defaults to 1 second and 5 attempts are made to obtain the
|
46
|
+
* lock. If you're doing large batch updates on the index with multiple
|
47
|
+
* processes you may need to increase the lock timeout but 1 second will be
|
48
|
+
* substantial in most cases.
|
49
|
+
*
|
50
|
+
* timeout:: seconds to wait to obtain lock before timing out and returning
|
51
|
+
* false
|
52
|
+
* return:: true if lock was successfully obtained. Raises a
|
53
|
+
* Lock::LockError otherwise.
|
54
|
+
*/
|
35
55
|
static VALUE
|
36
56
|
frt_lock_obtain(int argc, VALUE *argv, VALUE self)
|
37
57
|
{
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
58
|
+
VALUE rtimeout;
|
59
|
+
int timeout = 1;
|
60
|
+
Lock *lock;
|
61
|
+
GET_LOCK(lock, self);
|
62
|
+
|
63
|
+
if (rb_scan_args(argc, argv, "01", &rtimeout) > 0) {
|
64
|
+
timeout = FIX2INT(rtimeout);
|
65
|
+
}
|
66
|
+
/* TODO: use the lock timeout */
|
67
|
+
if (!lock->obtain(lock)) {
|
68
|
+
rb_raise(cLockError, "could not obtain lock: #%s", lock->name);
|
69
|
+
}
|
70
|
+
return Qtrue;
|
49
71
|
}
|
50
72
|
|
73
|
+
/*
|
74
|
+
* call-seq:
|
75
|
+
* lock.while_locked(timeout = 1) { do_something() } -> bool
|
76
|
+
*
|
77
|
+
* Run the code in a block while a lock is obtained, automatically releasing
|
78
|
+
* the lock when the block returns.
|
79
|
+
*
|
80
|
+
* See Lock#obtain for more information on lock timeout.
|
81
|
+
*
|
82
|
+
* timeout:: seconds to wait to obtain lock before timing out and returning
|
83
|
+
* false
|
84
|
+
* return:: true if lock was successfully obtained. Raises a
|
85
|
+
* Lock::LockError otherwise.
|
86
|
+
*/
|
51
87
|
static VALUE
|
52
88
|
frt_lock_while_locked(int argc, VALUE *argv, VALUE self)
|
53
89
|
{
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
90
|
+
VALUE rtimeout;
|
91
|
+
int timeout = 1;
|
92
|
+
Lock *lock;
|
93
|
+
GET_LOCK(lock, self);
|
94
|
+
if (rb_scan_args(argc, argv, "01", &rtimeout) > 0) {
|
95
|
+
timeout = FIX2INT(rtimeout);
|
96
|
+
}
|
97
|
+
if (!lock->obtain(lock)) {
|
98
|
+
rb_raise(cLockError, "could not obtain lock: #%s", lock->name);
|
99
|
+
}
|
100
|
+
rb_yield(Qnil);
|
101
|
+
lock->release(lock);
|
102
|
+
return Qtrue;
|
66
103
|
}
|
67
104
|
|
105
|
+
/*
|
106
|
+
* call-seq:
|
107
|
+
* lock.locked? -> bool
|
108
|
+
*
|
109
|
+
* Returns true if the lock has been obtained.
|
110
|
+
*/
|
68
111
|
static VALUE
|
69
112
|
frt_lock_is_locked(VALUE self)
|
70
113
|
{
|
71
|
-
|
72
|
-
|
114
|
+
Lock *lock;
|
115
|
+
GET_LOCK(lock, self);
|
116
|
+
return lock->is_locked(lock) ? Qtrue : Qfalse;
|
73
117
|
}
|
74
118
|
|
119
|
+
/*
|
120
|
+
* call-seq:
|
121
|
+
* lock.release() -> self
|
122
|
+
*
|
123
|
+
* Release the lock. This should only be called by the process which obtains
|
124
|
+
* the lock.
|
125
|
+
*/
|
75
126
|
static VALUE
|
76
127
|
frt_lock_release(VALUE self)
|
77
128
|
{
|
78
|
-
|
79
|
-
|
80
|
-
|
129
|
+
Lock *lock;
|
130
|
+
GET_LOCK(lock, self);
|
131
|
+
lock->release(lock);
|
132
|
+
return self;
|
81
133
|
}
|
82
134
|
|
83
135
|
/****************************************************************************
|
@@ -89,86 +141,142 @@ frt_lock_release(VALUE self)
|
|
89
141
|
void
|
90
142
|
frt_dir_free(Store *store)
|
91
143
|
{
|
92
|
-
|
93
|
-
|
144
|
+
object_del(store);
|
145
|
+
store_deref(store);
|
94
146
|
}
|
95
147
|
|
96
|
-
#define GET_STORE
|
148
|
+
#define GET_STORE(store, self) Data_Get_Struct(self, Store, store)
|
149
|
+
/*
|
150
|
+
* call-seq:
|
151
|
+
* dir.close() -> nil
|
152
|
+
*
|
153
|
+
* It is a good idea to close a directory when you have finished using it.
|
154
|
+
* Although the garbage collector will currently handle this for you, this
|
155
|
+
* behaviour may change in future.
|
156
|
+
*/
|
97
157
|
static VALUE
|
98
158
|
frt_dir_close(VALUE self)
|
99
159
|
{
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
160
|
+
/*
|
161
|
+
* No need to do anything here. Leave it to the garbage collector
|
162
|
+
GET_STORE;
|
163
|
+
Frt_Unwrap_Struct(self);
|
164
|
+
object_del(store);
|
165
|
+
store_deref(store);
|
166
|
+
*/
|
167
|
+
return Qnil;
|
108
168
|
}
|
109
169
|
|
170
|
+
/*
|
171
|
+
* call-seq:
|
172
|
+
* dir.exists?(file_name) -> nil
|
173
|
+
*
|
174
|
+
* Return true if a file with the name +file_name+ exists in the directory.
|
175
|
+
*/
|
110
176
|
static VALUE
|
111
177
|
frt_dir_exists(VALUE self, VALUE rfname)
|
112
178
|
{
|
113
|
-
|
114
|
-
|
115
|
-
|
179
|
+
Store *store;
|
180
|
+
GET_STORE(store, self);
|
181
|
+
StringValue(rfname);
|
182
|
+
return store->exists(store, RSTRING(rfname)->ptr) ? Qtrue : Qfalse;
|
116
183
|
}
|
117
184
|
|
185
|
+
/*
|
186
|
+
* call-seq:
|
187
|
+
* dir.touch(file_name) -> nil
|
188
|
+
*
|
189
|
+
* Create an empty file in the directory with the name +file_name+.
|
190
|
+
*/
|
118
191
|
static VALUE
|
119
192
|
frt_dir_touch(VALUE self, VALUE rfname)
|
120
193
|
{
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
194
|
+
Store *store;
|
195
|
+
GET_STORE(store, self);
|
196
|
+
StringValue(rfname);
|
197
|
+
store->touch(store, RSTRING(rfname)->ptr);
|
198
|
+
return Qnil;
|
125
199
|
}
|
126
200
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
201
|
+
/*
|
202
|
+
* call-seq:
|
203
|
+
* dir.delete(file_name) -> nil
|
204
|
+
*
|
205
|
+
* Remove file +file_name+ from the directory. Returns true if succussful.
|
206
|
+
*/
|
132
207
|
static VALUE
|
133
208
|
frt_dir_delete(VALUE self, VALUE rfname)
|
134
209
|
{
|
135
|
-
|
136
|
-
|
137
|
-
|
210
|
+
Store *store;
|
211
|
+
GET_STORE(store, self);
|
212
|
+
StringValue(rfname);
|
213
|
+
return (store->remove(store, RSTRING(rfname)->ptr) == 0) ? Qtrue : Qfalse;
|
138
214
|
}
|
139
215
|
|
216
|
+
/*
|
217
|
+
* call-seq:
|
218
|
+
* dir.count -> integer
|
219
|
+
*
|
220
|
+
* Return a count of the number of files in the directory.
|
221
|
+
*/
|
140
222
|
static VALUE
|
141
223
|
frt_dir_file_count(VALUE self)
|
142
224
|
{
|
143
|
-
|
144
|
-
|
225
|
+
Store *store;
|
226
|
+
GET_STORE(store, self);
|
227
|
+
return INT2FIX(store->count(store));
|
145
228
|
}
|
146
229
|
|
230
|
+
/*
|
231
|
+
* call-seq:
|
232
|
+
* dir.refresh -> self
|
233
|
+
*
|
234
|
+
* Delete all files in the directory. It gives you a clean slate.
|
235
|
+
*/
|
147
236
|
static VALUE
|
148
237
|
frt_dir_refresh(VALUE self)
|
149
238
|
{
|
150
|
-
|
151
|
-
|
152
|
-
|
239
|
+
Store *store;
|
240
|
+
GET_STORE(store, self);
|
241
|
+
store->clear_all(store);
|
242
|
+
return self;
|
153
243
|
}
|
154
244
|
|
245
|
+
/*
|
246
|
+
* call-seq:
|
247
|
+
* dir.rename(from, to) -> self
|
248
|
+
*
|
249
|
+
* Rename a file from +from+ to +to+. An error will be raised if the file
|
250
|
+
* doesn't exist or there is some other type of IOError.
|
251
|
+
*/
|
155
252
|
static VALUE
|
156
253
|
frt_dir_rename(VALUE self, VALUE rfrom, VALUE rto)
|
157
254
|
{
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
255
|
+
Store *store;
|
256
|
+
GET_STORE(store, self);
|
257
|
+
StringValue(rfrom);
|
258
|
+
StringValue(rto);
|
259
|
+
store->rename(store, RSTRING(rfrom)->ptr, RSTRING(rto)->ptr);
|
260
|
+
return self;
|
163
261
|
}
|
164
262
|
|
263
|
+
/*
|
264
|
+
* call-seq:
|
265
|
+
* dir.make_lock(lock_name) -> self
|
266
|
+
*
|
267
|
+
* Make a lock with the name +lock_name+. Note that lockfiles will be stored
|
268
|
+
* in the directory with other files but they won't be visible to you. You
|
269
|
+
* should avoid using files with a .lck extension as this extension is
|
270
|
+
* reserved for lock files
|
271
|
+
*/
|
165
272
|
static VALUE
|
166
273
|
frt_dir_make_lock(VALUE self, VALUE rlock_name)
|
167
274
|
{
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
275
|
+
Store *store;
|
276
|
+
GET_STORE(store, self);
|
277
|
+
StringValue(rlock_name);
|
278
|
+
return Data_Wrap_Struct(cLock, &frt_lock_mark, &frt_lock_free,
|
279
|
+
store->open_lock(store, RSTRING(rlock_name)->ptr));
|
172
280
|
}
|
173
281
|
|
174
282
|
/****************************************************************************
|
@@ -177,26 +285,35 @@ frt_dir_make_lock(VALUE self, VALUE rlock_name)
|
|
177
285
|
*
|
178
286
|
****************************************************************************/
|
179
287
|
|
288
|
+
/*
|
289
|
+
* call-seq:
|
290
|
+
* RAMDirectory.new(dir = nil)
|
291
|
+
*
|
292
|
+
* Create a new RAMDirectory.
|
293
|
+
*
|
294
|
+
* You can optionally load another Directory (usually a FSDirectory) into
|
295
|
+
* memory. This may be useful to speed up search performance but usually the
|
296
|
+
* speedup won't be worth the trouble. Be sure to benchmark.
|
297
|
+
*
|
298
|
+
* dir:: Directory to load into memory
|
299
|
+
*/
|
180
300
|
static VALUE
|
181
301
|
frt_ramdir_init(int argc, VALUE *argv, VALUE self)
|
182
302
|
{
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
Frt_Wrap_Struct(self, NULL, frt_dir_free, store);
|
198
|
-
object_add(store, self);
|
199
|
-
return self;
|
303
|
+
VALUE rdir;
|
304
|
+
Store *store;
|
305
|
+
switch (rb_scan_args(argc, argv, "01", &rdir)) {
|
306
|
+
case 1: {
|
307
|
+
Store *ostore;
|
308
|
+
Data_Get_Struct(rdir, Store, ostore);
|
309
|
+
store = open_ram_store_and_copy(ostore, false);
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
default: store = open_ram_store();
|
313
|
+
}
|
314
|
+
Frt_Wrap_Struct(self, NULL, frt_dir_free, store);
|
315
|
+
object_add(store, self);
|
316
|
+
return self;
|
200
317
|
}
|
201
318
|
|
202
319
|
/****************************************************************************
|
@@ -205,29 +322,46 @@ frt_ramdir_init(int argc, VALUE *argv, VALUE self)
|
|
205
322
|
*
|
206
323
|
****************************************************************************/
|
207
324
|
|
325
|
+
/*
|
326
|
+
* call-seq:
|
327
|
+
* FSDirectory.new(/path/to/index/, create = false)
|
328
|
+
*
|
329
|
+
* Create a new FSDirectory at +/path/to/index/+ which must be a valid path
|
330
|
+
* on your file system. If it doesn't exist it will be created. You can also
|
331
|
+
* specify the +create+ parameter. If +create+ is true the FSDirectory will
|
332
|
+
* be refreshed as new. That is to say, any existing files in the directory
|
333
|
+
* will be deleted. The default value for +create+ is false.
|
334
|
+
*
|
335
|
+
* path:: path to index directory. Must be a valid path on your system
|
336
|
+
* create:: set to true if you want any existing files in the directory to be
|
337
|
+
* deleted
|
338
|
+
*/
|
208
339
|
static VALUE
|
209
|
-
frt_fsdir_new(
|
340
|
+
frt_fsdir_new(int argc, VALUE *argv, VALUE klass)
|
210
341
|
{
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
342
|
+
VALUE self, rpath, rcreate;
|
343
|
+
Store *store;
|
344
|
+
bool create;
|
345
|
+
|
346
|
+
rb_scan_args(argc, argv, "11", &rpath, &rcreate);
|
347
|
+
StringValue(rpath);
|
348
|
+
create = RTEST(rcreate);
|
349
|
+
if (create) {
|
350
|
+
frt_create_dir(rpath);
|
351
|
+
}
|
352
|
+
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
353
|
+
rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
|
354
|
+
"create one.", RSTRING(rpath)->ptr);
|
355
|
+
}
|
356
|
+
store = open_fs_store(RSTRING(rpath)->ptr);
|
357
|
+
if (create) store->clear_all(store);
|
358
|
+
if ((self = object_get(store)) == Qnil) {
|
359
|
+
self = Data_Wrap_Struct(klass, NULL, &frt_dir_free, store);
|
360
|
+
object_add(store, self);
|
361
|
+
} else {
|
362
|
+
store_deref(store);
|
363
|
+
}
|
364
|
+
return self;
|
231
365
|
}
|
232
366
|
|
233
367
|
/****************************************************************************
|
@@ -236,33 +370,128 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
|
|
236
370
|
*
|
237
371
|
****************************************************************************/
|
238
372
|
|
373
|
+
/*
|
374
|
+
* Document-class: Ferret::Store::Directory
|
375
|
+
*
|
376
|
+
* A Directory is an object which is used to access the index storage.
|
377
|
+
* Ruby's IO API is not used so that we can use different storage
|
378
|
+
* mechanisms to store the index. Some examples are;
|
379
|
+
*
|
380
|
+
* * File system based storage (currently implemented as FSDirectory)
|
381
|
+
* * RAM based storage (currently implemented as RAMDirectory)
|
382
|
+
* * Database based storage
|
383
|
+
*
|
384
|
+
* NOTE: Once a file has been written and closed, it can no longer be
|
385
|
+
* modified. To make any changes to the file it must be deleted and
|
386
|
+
* rewritten. For this reason, the method to open a file for writing is
|
387
|
+
* called _create_output_, while the method to open a file for reading is
|
388
|
+
* called _open_input_ If there is a risk of simultaneous modifications of
|
389
|
+
* the files then locks should be used. See Lock to find out how.
|
390
|
+
*/
|
391
|
+
void
|
392
|
+
Init_Directory(void)
|
393
|
+
{
|
394
|
+
cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
|
395
|
+
rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
|
396
|
+
rb_define_method(cDirectory, "close", frt_dir_close, 0);
|
397
|
+
rb_define_method(cDirectory, "exists?", frt_dir_exists, 1);
|
398
|
+
rb_define_method(cDirectory, "touch", frt_dir_touch, 1);
|
399
|
+
rb_define_method(cDirectory, "delete", frt_dir_delete, 1);
|
400
|
+
rb_define_method(cDirectory, "file_count", frt_dir_file_count, 0);
|
401
|
+
rb_define_method(cDirectory, "refresh", frt_dir_refresh, 0);
|
402
|
+
rb_define_method(cDirectory, "rename", frt_dir_rename, 2);
|
403
|
+
rb_define_method(cDirectory, "make_lock", frt_dir_make_lock, 1);
|
404
|
+
}
|
405
|
+
|
406
|
+
/*
|
407
|
+
* Document-class: Ferret::Store::Lock
|
408
|
+
*
|
409
|
+
* A Lock is used to lock a data source so that not more than one
|
410
|
+
* output stream can access a data source at one time. It is possible
|
411
|
+
* that locks could be disabled. For example a read only index stored
|
412
|
+
* on a CDROM would have no need for a lock.
|
413
|
+
*
|
414
|
+
* You can use a lock in two ways. Firstly:
|
415
|
+
*
|
416
|
+
* write_lock = @directory.make_lock(LOCK_NAME)
|
417
|
+
* write_lock.obtain(WRITE_LOCK_TIME_OUT)
|
418
|
+
* ... # Do your file modifications # ...
|
419
|
+
* write_lock.release()
|
420
|
+
*
|
421
|
+
* Alternatively you could use the while locked method. This ensures that
|
422
|
+
* the lock will be released once processing has finished.
|
423
|
+
*
|
424
|
+
* write_lock = @directory.make_lock(LOCK_NAME)
|
425
|
+
* write_lock.while_locked(WRITE_LOCK_TIME_OUT) do
|
426
|
+
* ... # Do your file modifications # ...
|
427
|
+
* end
|
428
|
+
*/
|
429
|
+
void
|
430
|
+
Init_Lock(void)
|
431
|
+
{
|
432
|
+
cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
|
433
|
+
rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
|
434
|
+
rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
|
435
|
+
rb_define_method(cLock, "release", frt_lock_release, 0);
|
436
|
+
rb_define_method(cLock, "locked?", frt_lock_is_locked, 0);
|
437
|
+
|
438
|
+
cLockError = rb_define_class_under(cLock, "LockError", rb_eStandardError);
|
439
|
+
}
|
440
|
+
|
441
|
+
/*
|
442
|
+
* Document-class: Ferret::Store::RAMDirectory
|
443
|
+
*
|
444
|
+
* Memory resident Directory implementation. You should use a RAMDirectory
|
445
|
+
* during testing but otherwise you should stick with FSDirectory. While
|
446
|
+
* loading an index into memory may slightly speed things up, on most
|
447
|
+
* operating systems there won't be much difference so it wouldn't be worth
|
448
|
+
* your trouble.
|
449
|
+
*/
|
450
|
+
void
|
451
|
+
Init_RAMDirectory(void)
|
452
|
+
{
|
453
|
+
cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
|
454
|
+
rb_define_alloc_func(cRAMDirectory, frt_data_alloc);
|
455
|
+
rb_define_method(cRAMDirectory, "initialize", frt_ramdir_init, -1);
|
456
|
+
}
|
457
|
+
|
458
|
+
/*
|
459
|
+
* Document-class: Ferret::Store::RAMDirectory
|
460
|
+
*
|
461
|
+
* File-system resident Directory implementation. The FSDirectory will use a
|
462
|
+
* single directory to store all of it's files. You should not otherwise
|
463
|
+
* touch this directory. Modifying the files in the directory will corrupt
|
464
|
+
* the index. The one exception to this rule is you may need to delete stale
|
465
|
+
* lock files which have a ".lck" extension.
|
466
|
+
*/
|
467
|
+
void
|
468
|
+
Init_FSDirectory(void)
|
469
|
+
{
|
470
|
+
cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
|
471
|
+
rb_define_alloc_func(cFSDirectory, frt_data_alloc);
|
472
|
+
rb_define_singleton_method(cFSDirectory, "new", frt_fsdir_new, -1);
|
473
|
+
}
|
474
|
+
|
475
|
+
/* rdoc hack
|
476
|
+
extern VALUE mFerret = rb_define_module("Ferret");
|
477
|
+
*/
|
478
|
+
|
479
|
+
/*
|
480
|
+
* Document-module: Ferret::Store
|
481
|
+
*
|
482
|
+
* The Store module contains all the classes required to handle the storing
|
483
|
+
* of an index.
|
484
|
+
*
|
485
|
+
* NOTE: You can currently store an index on a file-system or in memory. If
|
486
|
+
* you want to add a different type of Directory, like a database Directory
|
487
|
+
* for instance, you will to implement it in C.
|
488
|
+
*/
|
239
489
|
void
|
240
|
-
|
490
|
+
Init_Store(void)
|
241
491
|
{
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
|
249
|
-
rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(LOCK_PREFIX));
|
250
|
-
rb_define_method(cDirectory, "close", frt_dir_close, 0);\
|
251
|
-
rb_define_method(cDirectory, "exists?", frt_dir_exists, 1);\
|
252
|
-
rb_define_method(cDirectory, "touch", frt_dir_touch, 1);\
|
253
|
-
rb_define_method(cDirectory, "delete", frt_dir_delete, 1);\
|
254
|
-
rb_define_method(cDirectory, "file_count", frt_dir_file_count, 0);\
|
255
|
-
rb_define_method(cDirectory, "refresh", frt_dir_refresh, 0);\
|
256
|
-
rb_define_method(cDirectory, "rename", frt_dir_rename, 2);\
|
257
|
-
rb_define_method(cDirectory, "make_lock", frt_dir_make_lock, 1);
|
258
|
-
|
259
|
-
/* RAMDirectory */
|
260
|
-
cRAMDirectory = rb_define_class_under(mStore, "RAMDirectory", cDirectory);
|
261
|
-
rb_define_alloc_func(cRAMDirectory, frt_data_alloc);
|
262
|
-
rb_define_method(cRAMDirectory, "initialize", frt_ramdir_init, -1);
|
263
|
-
|
264
|
-
/* FSDirectory */
|
265
|
-
cFSDirectory = rb_define_class_under(mStore, "FSDirectory", cDirectory);
|
266
|
-
rb_define_alloc_func(cFSDirectory, frt_data_alloc);
|
267
|
-
rb_define_singleton_method(cFSDirectory, "new", frt_fsdir_new, 2);
|
492
|
+
mStore = rb_define_module_under(mFerret, "Store");
|
493
|
+
Init_Directory();
|
494
|
+
Init_Lock();
|
495
|
+
Init_RAMDirectory();
|
496
|
+
Init_FSDirectory();
|
268
497
|
}
|