ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/r_index_io.c
DELETED
@@ -1,1021 +0,0 @@
|
|
1
|
-
#include "ferret.h"
|
2
|
-
#include "index.h"
|
3
|
-
|
4
|
-
VALUE cTVOffsetInfo;
|
5
|
-
VALUE cTermVector;
|
6
|
-
VALUE cTermDocEnum;
|
7
|
-
VALUE cIndexWriter;
|
8
|
-
VALUE cIndexReader;
|
9
|
-
VALUE cTermEnum;
|
10
|
-
|
11
|
-
VALUE ranalyzer_key;
|
12
|
-
VALUE rclose_dir_key;
|
13
|
-
VALUE rcreate_key;
|
14
|
-
VALUE rcreate_if_missing_key;
|
15
|
-
VALUE ruse_compound_file_key;
|
16
|
-
VALUE rmerge_factor_key;
|
17
|
-
VALUE rmin_merge_docs_key;
|
18
|
-
VALUE rmax_merge_docs_key;
|
19
|
-
VALUE rmax_field_length_key;
|
20
|
-
VALUE rterm_index_interval_key;
|
21
|
-
|
22
|
-
extern void frt_set_term(VALUE rterm, Term *t);
|
23
|
-
extern VALUE frt_get_rterm(char *field, char *text);
|
24
|
-
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
25
|
-
|
26
|
-
/****************************************************************************
|
27
|
-
*
|
28
|
-
* TermEnum Methods
|
29
|
-
*
|
30
|
-
****************************************************************************/
|
31
|
-
|
32
|
-
static void
|
33
|
-
frt_te_free(void *p)
|
34
|
-
{
|
35
|
-
TermEnum *te = (TermEnum *)p;
|
36
|
-
te->close(te);
|
37
|
-
}
|
38
|
-
|
39
|
-
#define GET_TE TermEnum *te = (TermEnum *)DATA_PTR(self)
|
40
|
-
static VALUE
|
41
|
-
frt_te_next(VALUE self)
|
42
|
-
{
|
43
|
-
GET_TE;
|
44
|
-
return te->next(te) ? Qtrue : Qfalse;
|
45
|
-
}
|
46
|
-
|
47
|
-
static VALUE
|
48
|
-
frt_te_term(VALUE self)
|
49
|
-
{
|
50
|
-
GET_TE;
|
51
|
-
if (!te->tb_curr) return Qnil;
|
52
|
-
return frt_get_rterm(te->tb_curr->field, te->tb_curr->text);
|
53
|
-
}
|
54
|
-
|
55
|
-
static VALUE
|
56
|
-
frt_te_doc_freq(VALUE self)
|
57
|
-
{
|
58
|
-
GET_TE;
|
59
|
-
if (!te->tb_curr) return Qnil;
|
60
|
-
return INT2FIX(te->ti_curr->doc_freq);
|
61
|
-
}
|
62
|
-
|
63
|
-
static VALUE
|
64
|
-
frt_te_close(VALUE self)
|
65
|
-
{
|
66
|
-
GET_TE;
|
67
|
-
Frt_Unwrap_Struct(self);
|
68
|
-
te->close(te);
|
69
|
-
return Qnil;
|
70
|
-
}
|
71
|
-
|
72
|
-
static VALUE
|
73
|
-
frt_te_skip_to(VALUE self, VALUE rterm)
|
74
|
-
{
|
75
|
-
GET_TE;
|
76
|
-
Term t;
|
77
|
-
frt_set_term(rterm, &t);
|
78
|
-
|
79
|
-
return te_skip_to(te, &t) ? Qtrue : Qfalse;
|
80
|
-
}
|
81
|
-
|
82
|
-
/****************************************************************************
|
83
|
-
*
|
84
|
-
* TermVectorOffsetInfo Methods
|
85
|
-
*
|
86
|
-
****************************************************************************/
|
87
|
-
|
88
|
-
void
|
89
|
-
frt_tvoi_free(void *p)
|
90
|
-
{
|
91
|
-
object_del(p);
|
92
|
-
tvoi_destroy(p);
|
93
|
-
}
|
94
|
-
|
95
|
-
static VALUE
|
96
|
-
frt_tvoi_init(VALUE self, VALUE rstart, VALUE rend)
|
97
|
-
{
|
98
|
-
TVOffsetInfo *tvoi = tvoi_create(FIX2INT(rstart), FIX2INT(rend));
|
99
|
-
Frt_Wrap_Struct(self, NULL, &frt_tvoi_free, tvoi);
|
100
|
-
object_add(tvoi, self);
|
101
|
-
return self;
|
102
|
-
}
|
103
|
-
|
104
|
-
#define GET_TVOI TVOffsetInfo *tvoi = (TVOffsetInfo *)DATA_PTR(self)
|
105
|
-
|
106
|
-
static VALUE
|
107
|
-
frt_tvoi_set_start(VALUE self, VALUE rstart)
|
108
|
-
{
|
109
|
-
GET_TVOI;
|
110
|
-
tvoi->start = FIX2INT(rstart);
|
111
|
-
return Qnil;
|
112
|
-
}
|
113
|
-
|
114
|
-
static VALUE
|
115
|
-
frt_tvoi_get_start(VALUE self)
|
116
|
-
{
|
117
|
-
GET_TVOI;
|
118
|
-
return INT2FIX(tvoi->start);
|
119
|
-
}
|
120
|
-
|
121
|
-
static VALUE
|
122
|
-
frt_tvoi_set_end(VALUE self, VALUE rend)
|
123
|
-
{
|
124
|
-
GET_TVOI;
|
125
|
-
tvoi->end = FIX2INT(rend);
|
126
|
-
return Qnil;
|
127
|
-
}
|
128
|
-
|
129
|
-
static VALUE
|
130
|
-
frt_tvoi_get_end(VALUE self)
|
131
|
-
{
|
132
|
-
GET_TVOI;
|
133
|
-
return INT2FIX(tvoi->end);
|
134
|
-
}
|
135
|
-
|
136
|
-
static VALUE
|
137
|
-
frt_tvoi_eql(VALUE self, VALUE rother)
|
138
|
-
{
|
139
|
-
GET_TVOI;
|
140
|
-
TVOffsetInfo *other;
|
141
|
-
if (TYPE(rother) != T_DATA) return Qfalse;
|
142
|
-
Data_Get_Struct(rother, TVOffsetInfo, other);
|
143
|
-
|
144
|
-
return ((tvoi->start == other->start) && (tvoi->end == other->end))
|
145
|
-
? Qtrue : Qfalse;
|
146
|
-
}
|
147
|
-
|
148
|
-
static VALUE
|
149
|
-
frt_tvoi_hash(VALUE self, VALUE rother)
|
150
|
-
{
|
151
|
-
GET_TVOI;
|
152
|
-
return INT2FIX(29 * tvoi->start + tvoi->end);
|
153
|
-
}
|
154
|
-
|
155
|
-
static VALUE
|
156
|
-
frt_tvoi_to_s(VALUE self)
|
157
|
-
{
|
158
|
-
char buf[60];
|
159
|
-
GET_TVOI;
|
160
|
-
sprintf(buf, "TermVectorOffsetInfo(%d:%d)", tvoi->start, tvoi->end);
|
161
|
-
return rb_str_new2(buf);
|
162
|
-
}
|
163
|
-
|
164
|
-
/****************************************************************************
|
165
|
-
*
|
166
|
-
* TermVector Methods
|
167
|
-
*
|
168
|
-
****************************************************************************/
|
169
|
-
|
170
|
-
void
|
171
|
-
frt_tv_free(void *p)
|
172
|
-
{
|
173
|
-
int i;
|
174
|
-
TermVector *tv = (TermVector *)p;
|
175
|
-
for (i = 0; i < tv->tcnt; i++) {
|
176
|
-
free(tv->terms[i]);
|
177
|
-
}
|
178
|
-
free(tv->terms);
|
179
|
-
if (tv->positions) {
|
180
|
-
for (i = 0; i < tv->tcnt; i++) {
|
181
|
-
free(tv->positions[i]);
|
182
|
-
}
|
183
|
-
free(tv->positions);
|
184
|
-
}
|
185
|
-
if (tv->offsets) {
|
186
|
-
for (i = 0; i < tv->tcnt; i++) {
|
187
|
-
free(tv->offsets[i]);
|
188
|
-
}
|
189
|
-
free(tv->offsets);
|
190
|
-
}
|
191
|
-
free(tv->freqs);
|
192
|
-
object_del(p);
|
193
|
-
free(p);
|
194
|
-
}
|
195
|
-
|
196
|
-
void
|
197
|
-
frt_tv_mark(void *p)
|
198
|
-
{
|
199
|
-
int i, j;
|
200
|
-
TermVector *tv = (TermVector *)p;
|
201
|
-
if (tv->offsets != NULL) {
|
202
|
-
for (i = 0; i < tv->tcnt; i++) {
|
203
|
-
for (j = 0; j < tv->freqs[i]; j++) {
|
204
|
-
frt_gc_mark(tv->offsets[i][j]);
|
205
|
-
}
|
206
|
-
}
|
207
|
-
}
|
208
|
-
}
|
209
|
-
|
210
|
-
static VALUE
|
211
|
-
frt_get_tv(TermVector *tv)
|
212
|
-
{
|
213
|
-
VALUE self = Qnil;
|
214
|
-
if (tv) {
|
215
|
-
self = object_get(tv);
|
216
|
-
if (self == Qnil) {
|
217
|
-
self = Data_Wrap_Struct(cTermVector, &frt_tv_mark, &frt_tv_free, tv);
|
218
|
-
if (tv->offsets) {
|
219
|
-
TVOffsetInfo *tvoi;
|
220
|
-
VALUE rtvoi;
|
221
|
-
int i, j;
|
222
|
-
for (i = 0; i < tv->tcnt; i++) {
|
223
|
-
for (j = 0; j < tv->freqs[i]; j++) {
|
224
|
-
tvoi = tv->offsets[i][j];
|
225
|
-
if (object_get(tvoi) == Qnil) {
|
226
|
-
rtvoi = Data_Wrap_Struct(cTVOffsetInfo, NULL, &frt_tvoi_free, tvoi);
|
227
|
-
object_add(tvoi, rtvoi);
|
228
|
-
}
|
229
|
-
}
|
230
|
-
}
|
231
|
-
}
|
232
|
-
object_add(tv, self);
|
233
|
-
}
|
234
|
-
}
|
235
|
-
return self;
|
236
|
-
}
|
237
|
-
|
238
|
-
#define GET_TV TermVector *tv = (TermVector *)DATA_PTR(self)
|
239
|
-
|
240
|
-
static VALUE
|
241
|
-
frt_tv_get_field(VALUE self)
|
242
|
-
{
|
243
|
-
GET_TV;
|
244
|
-
return rb_str_new2(tv->field);
|
245
|
-
}
|
246
|
-
|
247
|
-
static VALUE
|
248
|
-
frt_tv_get_terms(VALUE self)
|
249
|
-
{
|
250
|
-
int i;
|
251
|
-
GET_TV;
|
252
|
-
VALUE rterms = rb_ary_new2(tv->tcnt);
|
253
|
-
for (i = 0; i < tv->tcnt; i++) {
|
254
|
-
rb_ary_push(rterms, rb_str_new2(tv->terms[i]));
|
255
|
-
}
|
256
|
-
return rterms;
|
257
|
-
}
|
258
|
-
|
259
|
-
static VALUE
|
260
|
-
frt_tv_get_freqs(VALUE self)
|
261
|
-
{
|
262
|
-
int i;
|
263
|
-
GET_TV;
|
264
|
-
VALUE rfreqs = rb_ary_new2(tv->tcnt);
|
265
|
-
for (i = 0; i < tv->tcnt; i++) {
|
266
|
-
rb_ary_push(rfreqs, INT2FIX(tv->freqs[i]));
|
267
|
-
}
|
268
|
-
return rfreqs;
|
269
|
-
}
|
270
|
-
|
271
|
-
static VALUE
|
272
|
-
frt_tv_get_positions(VALUE self)
|
273
|
-
{
|
274
|
-
int i, j, freq;
|
275
|
-
GET_TV;
|
276
|
-
VALUE rpositions, rpositionss;
|
277
|
-
|
278
|
-
if (!tv->positions) return Qnil;
|
279
|
-
rpositionss = rb_ary_new2(tv->tcnt);
|
280
|
-
for (i = 0; i < tv->tcnt; i++) {
|
281
|
-
freq = tv->freqs[i];
|
282
|
-
rpositions = rb_ary_new2(freq);
|
283
|
-
for (j = 0; j < freq; j++) {
|
284
|
-
rb_ary_push(rpositions, INT2FIX(tv->positions[i][j]));
|
285
|
-
}
|
286
|
-
rb_ary_push(rpositionss, rpositions);
|
287
|
-
}
|
288
|
-
return rpositionss;
|
289
|
-
}
|
290
|
-
|
291
|
-
static VALUE
|
292
|
-
frt_tv_get_offsets(VALUE self)
|
293
|
-
{
|
294
|
-
int i, j, freq;
|
295
|
-
GET_TV;
|
296
|
-
VALUE roffsetss, roffsets, roffset;
|
297
|
-
if (!tv->offsets) return Qnil;
|
298
|
-
roffsetss = rb_ary_new2(tv->tcnt);
|
299
|
-
|
300
|
-
for (i = 0; i < tv->tcnt; i++) {
|
301
|
-
freq = tv->freqs[i];
|
302
|
-
roffsets = rb_ary_new2(freq);
|
303
|
-
for (j = 0; j < freq; j++) {
|
304
|
-
roffset = object_get(tv->offsets[i][j]);
|
305
|
-
rb_ary_push(roffsets, roffset);
|
306
|
-
}
|
307
|
-
rb_ary_push(roffsetss, roffsets);
|
308
|
-
}
|
309
|
-
return roffsetss;
|
310
|
-
}
|
311
|
-
|
312
|
-
/****************************************************************************
|
313
|
-
*
|
314
|
-
* TermDocEnum Methods
|
315
|
-
*
|
316
|
-
****************************************************************************/
|
317
|
-
|
318
|
-
void
|
319
|
-
frt_tde_free(void *p)
|
320
|
-
{
|
321
|
-
TermDocEnum *tde = (TermDocEnum *)p;
|
322
|
-
tde->close(tde);
|
323
|
-
}
|
324
|
-
|
325
|
-
static VALUE
|
326
|
-
frt_get_tde(TermDocEnum *tde)
|
327
|
-
{
|
328
|
-
return Data_Wrap_Struct(cTermDocEnum, NULL, &frt_tde_free, tde);
|
329
|
-
}
|
330
|
-
|
331
|
-
#define GET_TDE TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self)
|
332
|
-
|
333
|
-
static VALUE
|
334
|
-
frt_tde_close(VALUE self)
|
335
|
-
{
|
336
|
-
GET_TDE;
|
337
|
-
Frt_Unwrap_Struct(self);
|
338
|
-
tde->close(tde);
|
339
|
-
return Qnil;
|
340
|
-
}
|
341
|
-
|
342
|
-
static VALUE
|
343
|
-
frt_tde_seek(VALUE self, VALUE rterm)
|
344
|
-
{
|
345
|
-
GET_TDE;
|
346
|
-
Term t;
|
347
|
-
frt_set_term(rterm, &t);
|
348
|
-
tde->seek(tde, &t);
|
349
|
-
return Qnil;
|
350
|
-
}
|
351
|
-
|
352
|
-
static VALUE
|
353
|
-
frt_tde_doc(VALUE self)
|
354
|
-
{
|
355
|
-
GET_TDE;
|
356
|
-
return INT2FIX(tde->doc_num(tde));
|
357
|
-
}
|
358
|
-
|
359
|
-
static VALUE
|
360
|
-
frt_tde_freq(VALUE self)
|
361
|
-
{
|
362
|
-
GET_TDE;
|
363
|
-
return INT2FIX(tde->freq(tde));
|
364
|
-
}
|
365
|
-
|
366
|
-
static VALUE
|
367
|
-
frt_tde_next(VALUE self)
|
368
|
-
{
|
369
|
-
GET_TDE;
|
370
|
-
return tde->next(tde) ? Qtrue : Qfalse;
|
371
|
-
}
|
372
|
-
|
373
|
-
static VALUE
|
374
|
-
frt_tde_next_position(VALUE self)
|
375
|
-
{
|
376
|
-
GET_TDE;
|
377
|
-
return INT2FIX(tde->next_position(tde));
|
378
|
-
}
|
379
|
-
|
380
|
-
static VALUE
|
381
|
-
frt_tde_read(VALUE self, VALUE rdocs, VALUE rfreqs)
|
382
|
-
{
|
383
|
-
int i, req_num, cnt;
|
384
|
-
GET_TDE;
|
385
|
-
Check_Type(rdocs, T_ARRAY);
|
386
|
-
Check_Type(rfreqs, T_ARRAY);
|
387
|
-
req_num = MIN(RARRAY(rdocs)->len, RARRAY(rfreqs)->len);
|
388
|
-
cnt = tde->read(tde, (int *)RARRAY(rdocs)->ptr,
|
389
|
-
(int *)RARRAY(rfreqs)->ptr, req_num);
|
390
|
-
for (i = 0; i < cnt; i++) {
|
391
|
-
RARRAY(rdocs)->ptr[i] = INT2FIX(RARRAY(rdocs)->ptr[i]);
|
392
|
-
RARRAY(rfreqs)->ptr[i] = INT2FIX(RARRAY(rfreqs)->ptr[i]);
|
393
|
-
}
|
394
|
-
return INT2FIX(cnt);
|
395
|
-
}
|
396
|
-
|
397
|
-
static VALUE
|
398
|
-
frt_tde_skip_to(VALUE self, VALUE rtarget)
|
399
|
-
{
|
400
|
-
GET_TDE;
|
401
|
-
return tde->skip_to(tde, FIX2INT(rtarget)) ? Qtrue : Qfalse;
|
402
|
-
}
|
403
|
-
|
404
|
-
/****************************************************************************
|
405
|
-
*
|
406
|
-
* IndexWriter Methods
|
407
|
-
*
|
408
|
-
****************************************************************************/
|
409
|
-
|
410
|
-
void
|
411
|
-
frt_iw_free(void *p)
|
412
|
-
{
|
413
|
-
IndexWriter *iw = (IndexWriter *)p;
|
414
|
-
iw_close(iw);
|
415
|
-
}
|
416
|
-
|
417
|
-
void
|
418
|
-
frt_iw_mark(void *p)
|
419
|
-
{
|
420
|
-
IndexWriter *iw = (IndexWriter *)p;
|
421
|
-
frt_gc_mark(iw->analyzer);
|
422
|
-
frt_gc_mark(iw->store);
|
423
|
-
}
|
424
|
-
|
425
|
-
#define SET_INT_ATTR(attr) \
|
426
|
-
if (RTEST(rval = rb_hash_aref(roptions, r##attr##_key)))\
|
427
|
-
iw->attr = FIX2INT(rval);
|
428
|
-
|
429
|
-
static VALUE
|
430
|
-
frt_iw_init(int argc, VALUE *argv, VALUE self)
|
431
|
-
{
|
432
|
-
VALUE rdir, roptions, rval;
|
433
|
-
bool create = false;
|
434
|
-
bool use_compound_file = true;
|
435
|
-
Store *store;
|
436
|
-
Analyzer *analyzer = NULL;
|
437
|
-
IndexWriter *iw;
|
438
|
-
rb_scan_args(argc, argv, "02", &rdir, &roptions);
|
439
|
-
if (argc > 0) {
|
440
|
-
if (TYPE(rdir) == T_DATA) {
|
441
|
-
store = DATA_PTR(rdir);
|
442
|
-
ref(store);
|
443
|
-
} else {
|
444
|
-
StringValue(rdir);
|
445
|
-
frt_create_dir(rdir);
|
446
|
-
store = open_fs_store(RSTRING(rdir)->ptr);
|
447
|
-
}
|
448
|
-
} else {
|
449
|
-
store = open_ram_store();
|
450
|
-
}
|
451
|
-
if (argc == 2) {
|
452
|
-
Check_Type(roptions, T_HASH);
|
453
|
-
/* Let ruby's GC handle the closing of the store
|
454
|
-
if (!close_dir) {
|
455
|
-
close_dir = RTEST(rb_hash_aref(roptions, rclose_dir_key));
|
456
|
-
}
|
457
|
-
*/
|
458
|
-
/* use_compound_file defaults to true */
|
459
|
-
use_compound_file =
|
460
|
-
(rb_hash_aref(roptions, ruse_compound_file_key) == Qfalse) ? false : true;
|
461
|
-
|
462
|
-
rval = rb_hash_aref(roptions, ranalyzer_key);
|
463
|
-
if (rval == Qnil) {
|
464
|
-
analyzer = mb_standard_analyzer_create(true);
|
465
|
-
} else {
|
466
|
-
analyzer = frt_get_cwrapped_analyzer(rval);
|
467
|
-
}
|
468
|
-
create = RTEST(rb_hash_aref(roptions, rcreate_key));
|
469
|
-
if (!create && RTEST(rb_hash_aref(roptions, rcreate_if_missing_key))) {
|
470
|
-
if (!store->exists(store, "segments")) {
|
471
|
-
create = true;
|
472
|
-
}
|
473
|
-
}
|
474
|
-
}
|
475
|
-
iw = iw_open(store, analyzer, create);
|
476
|
-
store_deref(store);
|
477
|
-
iw->use_compound_file = use_compound_file;
|
478
|
-
|
479
|
-
SET_INT_ATTR(merge_factor);
|
480
|
-
SET_INT_ATTR(min_merge_docs);
|
481
|
-
SET_INT_ATTR(max_merge_docs);
|
482
|
-
SET_INT_ATTR(max_field_length);
|
483
|
-
SET_INT_ATTR(term_index_interval);
|
484
|
-
|
485
|
-
Frt_Wrap_Struct(self, &frt_iw_mark, &frt_iw_free, iw);
|
486
|
-
return self;
|
487
|
-
}
|
488
|
-
|
489
|
-
#define GET_IW IndexWriter *iw = (IndexWriter *)DATA_PTR(self)
|
490
|
-
|
491
|
-
static VALUE
|
492
|
-
frt_iw_close(VALUE self)
|
493
|
-
{
|
494
|
-
GET_IW;
|
495
|
-
Frt_Unwrap_Struct(self);
|
496
|
-
iw_close(iw);
|
497
|
-
return Qnil;
|
498
|
-
}
|
499
|
-
|
500
|
-
static VALUE
|
501
|
-
frt_iw_add_doc(VALUE self, VALUE rdoc)
|
502
|
-
{
|
503
|
-
GET_IW;
|
504
|
-
Document *doc;
|
505
|
-
Data_Get_Struct(rdoc, Document, doc);
|
506
|
-
iw_add_doc(iw, doc);
|
507
|
-
return Qnil;
|
508
|
-
}
|
509
|
-
|
510
|
-
static VALUE
|
511
|
-
frt_iw_set_merge_factor(VALUE self, VALUE val)
|
512
|
-
{
|
513
|
-
GET_IW;
|
514
|
-
iw->merge_factor = FIX2INT(val);
|
515
|
-
return Qnil;
|
516
|
-
}
|
517
|
-
|
518
|
-
static VALUE
|
519
|
-
frt_iw_set_min_merge_docs(VALUE self, VALUE val)
|
520
|
-
{
|
521
|
-
GET_IW;
|
522
|
-
iw->min_merge_docs = FIX2INT(val);
|
523
|
-
return Qnil;
|
524
|
-
}
|
525
|
-
|
526
|
-
static VALUE
|
527
|
-
frt_iw_set_max_merge_docs(VALUE self, VALUE val)
|
528
|
-
{
|
529
|
-
GET_IW;
|
530
|
-
iw->max_merge_docs = FIX2INT(val);
|
531
|
-
return Qnil;
|
532
|
-
}
|
533
|
-
|
534
|
-
static VALUE
|
535
|
-
frt_iw_set_max_field_length(VALUE self, VALUE val)
|
536
|
-
{
|
537
|
-
GET_IW;
|
538
|
-
iw->max_field_length = FIX2INT(val);
|
539
|
-
return Qnil;
|
540
|
-
}
|
541
|
-
|
542
|
-
static VALUE
|
543
|
-
frt_iw_set_term_index_interval(VALUE self, VALUE val)
|
544
|
-
{
|
545
|
-
GET_IW;
|
546
|
-
iw->term_index_interval = FIX2INT(val);
|
547
|
-
return Qnil;
|
548
|
-
}
|
549
|
-
|
550
|
-
static VALUE
|
551
|
-
frt_iw_set_use_compound_file(VALUE self, VALUE val)
|
552
|
-
{
|
553
|
-
GET_IW;
|
554
|
-
iw->use_compound_file = FIX2INT(val);
|
555
|
-
return Qnil;
|
556
|
-
}
|
557
|
-
|
558
|
-
static VALUE
|
559
|
-
frt_iw_get_doc_count(VALUE self)
|
560
|
-
{
|
561
|
-
GET_IW;
|
562
|
-
return INT2FIX(iw_doc_count(iw));
|
563
|
-
}
|
564
|
-
|
565
|
-
static VALUE
|
566
|
-
frt_iw_get_merge_factor(VALUE self)
|
567
|
-
{
|
568
|
-
GET_IW;
|
569
|
-
return INT2FIX(iw->merge_factor);
|
570
|
-
}
|
571
|
-
|
572
|
-
static VALUE
|
573
|
-
frt_iw_get_min_merge_docs(VALUE self)
|
574
|
-
{
|
575
|
-
GET_IW;
|
576
|
-
return INT2FIX(iw->min_merge_docs);
|
577
|
-
}
|
578
|
-
|
579
|
-
static VALUE
|
580
|
-
frt_iw_get_max_merge_docs(VALUE self)
|
581
|
-
{
|
582
|
-
GET_IW;
|
583
|
-
return INT2FIX(iw->max_merge_docs);
|
584
|
-
}
|
585
|
-
|
586
|
-
static VALUE
|
587
|
-
frt_iw_get_max_field_length(VALUE self)
|
588
|
-
{
|
589
|
-
GET_IW;
|
590
|
-
return INT2FIX(iw->max_field_length);
|
591
|
-
}
|
592
|
-
|
593
|
-
static VALUE
|
594
|
-
frt_iw_get_term_index_interval(VALUE self)
|
595
|
-
{
|
596
|
-
GET_IW;
|
597
|
-
return INT2FIX(iw->term_index_interval);
|
598
|
-
}
|
599
|
-
|
600
|
-
static VALUE
|
601
|
-
frt_iw_get_use_compound_file(VALUE self)
|
602
|
-
{
|
603
|
-
GET_IW;
|
604
|
-
return INT2FIX(iw->use_compound_file);
|
605
|
-
}
|
606
|
-
|
607
|
-
static VALUE
|
608
|
-
frt_iw_optimize(VALUE self)
|
609
|
-
{
|
610
|
-
GET_IW;
|
611
|
-
iw_optimize(iw);
|
612
|
-
return Qnil;
|
613
|
-
}
|
614
|
-
|
615
|
-
/****************************************************************************
|
616
|
-
*
|
617
|
-
* IndexReader Methods
|
618
|
-
*
|
619
|
-
****************************************************************************/
|
620
|
-
|
621
|
-
void
|
622
|
-
frt_ir_free(void *p)
|
623
|
-
{
|
624
|
-
object_del(p);
|
625
|
-
ir_close((IndexReader *)p);
|
626
|
-
}
|
627
|
-
|
628
|
-
void
|
629
|
-
frt_ir_mark(void *p)
|
630
|
-
{
|
631
|
-
IndexReader *ir = (IndexReader *)p;
|
632
|
-
frt_gc_mark(ir->store);
|
633
|
-
}
|
634
|
-
|
635
|
-
static VALUE
|
636
|
-
frt_ir_init(int argc, VALUE *argv, VALUE self)
|
637
|
-
{
|
638
|
-
VALUE rdir, rclose_dir;
|
639
|
-
//bool close_dir = false;
|
640
|
-
Store *store = NULL;
|
641
|
-
IndexReader *ir;
|
642
|
-
switch (rb_scan_args(argc, argv, "11", &rdir, &rclose_dir)) {
|
643
|
-
case 2: //close_dir = RTEST(rclose_dir);
|
644
|
-
case 1:
|
645
|
-
if (TYPE(rdir) == T_DATA) {
|
646
|
-
store = DATA_PTR(rdir);
|
647
|
-
} else {
|
648
|
-
rdir = rb_obj_as_string(rdir);
|
649
|
-
frt_create_dir(rdir);
|
650
|
-
store = open_fs_store(RSTRING(rdir)->ptr);
|
651
|
-
deref(store);
|
652
|
-
}
|
653
|
-
}
|
654
|
-
ir = ir_open(store);
|
655
|
-
Frt_Wrap_Struct(self, &frt_ir_mark, &frt_ir_free, ir);
|
656
|
-
object_add(ir, self);
|
657
|
-
return self;
|
658
|
-
}
|
659
|
-
|
660
|
-
static VALUE
|
661
|
-
frt_ir_open(int argc, VALUE *argv, VALUE klass)
|
662
|
-
{
|
663
|
-
VALUE self = Frt_Make_Struct(klass);
|
664
|
-
return frt_ir_init(argc, argv, self);
|
665
|
-
}
|
666
|
-
|
667
|
-
#define GET_IR IndexReader *ir = (IndexReader *)DATA_PTR(self)
|
668
|
-
|
669
|
-
static VALUE
|
670
|
-
frt_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval)
|
671
|
-
{
|
672
|
-
GET_IR;
|
673
|
-
rfield = rb_obj_as_string(rfield);
|
674
|
-
ir_set_norm(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr, NUM2CHR(rval));
|
675
|
-
return Qnil;
|
676
|
-
}
|
677
|
-
|
678
|
-
static VALUE
|
679
|
-
frt_ir_get_norms(VALUE self, VALUE rfield)
|
680
|
-
{
|
681
|
-
GET_IR;
|
682
|
-
uchar *norms;
|
683
|
-
rfield = rb_obj_as_string(rfield);
|
684
|
-
norms = ir->get_norms(ir, RSTRING(rfield)->ptr);
|
685
|
-
if (norms) {
|
686
|
-
return rb_str_new((char *)norms, ir->max_doc(ir));
|
687
|
-
} else {
|
688
|
-
return Qnil;
|
689
|
-
}
|
690
|
-
}
|
691
|
-
|
692
|
-
static VALUE
|
693
|
-
frt_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
|
694
|
-
{
|
695
|
-
GET_IR;
|
696
|
-
int offset;
|
697
|
-
rfield = rb_obj_as_string(rfield);
|
698
|
-
offset = FIX2INT(roffset);
|
699
|
-
Check_Type(rnorms, T_STRING);
|
700
|
-
if (RSTRING(rnorms)->len < offset + ir->max_doc(ir)) {
|
701
|
-
rb_raise(rb_eArgError, "supplied a string of length:%d to IndexReader#get_norms_into but needed a string of length offset:%d + maxdoc:%d", RSTRING(rnorms)->len, offset, ir->max_doc(ir));
|
702
|
-
}
|
703
|
-
|
704
|
-
ir->get_norms_into(ir, RSTRING(rfield)->ptr, (uchar *)RSTRING(rnorms)->ptr, offset);
|
705
|
-
return Qnil;
|
706
|
-
}
|
707
|
-
|
708
|
-
static VALUE
|
709
|
-
frt_ir_commit(VALUE self)
|
710
|
-
{
|
711
|
-
GET_IR;
|
712
|
-
ir_commit(ir);
|
713
|
-
return Qnil;
|
714
|
-
}
|
715
|
-
|
716
|
-
static VALUE
|
717
|
-
frt_ir_close(VALUE self)
|
718
|
-
{
|
719
|
-
GET_IR;
|
720
|
-
object_del(ir);
|
721
|
-
Frt_Unwrap_Struct(self);
|
722
|
-
ir_close(ir);
|
723
|
-
return Qnil;
|
724
|
-
}
|
725
|
-
|
726
|
-
static VALUE
|
727
|
-
frt_ir_has_deletions(VALUE self)
|
728
|
-
{
|
729
|
-
GET_IR;
|
730
|
-
return ir->has_deletions(ir) ? Qtrue : Qfalse;
|
731
|
-
}
|
732
|
-
|
733
|
-
static VALUE
|
734
|
-
frt_ir_delete(VALUE self, VALUE rdoc_num)
|
735
|
-
{
|
736
|
-
GET_IR;
|
737
|
-
int doc_num = FIX2INT(rdoc_num);
|
738
|
-
ir_delete_doc(ir, doc_num);
|
739
|
-
return Qnil;
|
740
|
-
}
|
741
|
-
|
742
|
-
static VALUE
|
743
|
-
frt_ir_is_deleted(VALUE self, VALUE rdoc_num)
|
744
|
-
{
|
745
|
-
GET_IR;
|
746
|
-
int doc_num = FIX2INT(rdoc_num);
|
747
|
-
return ir->is_deleted(ir, doc_num) ? Qtrue : Qfalse;
|
748
|
-
}
|
749
|
-
|
750
|
-
static VALUE
|
751
|
-
frt_ir_max_doc(VALUE self)
|
752
|
-
{
|
753
|
-
GET_IR;
|
754
|
-
return INT2FIX(ir->max_doc(ir));
|
755
|
-
}
|
756
|
-
|
757
|
-
static VALUE
|
758
|
-
frt_ir_num_docs(VALUE self)
|
759
|
-
{
|
760
|
-
GET_IR;
|
761
|
-
return INT2FIX(ir->num_docs(ir));
|
762
|
-
}
|
763
|
-
|
764
|
-
static VALUE
|
765
|
-
frt_ir_undelete_all(VALUE self)
|
766
|
-
{
|
767
|
-
GET_IR;
|
768
|
-
ir_undelete_all(ir);
|
769
|
-
return Qnil;
|
770
|
-
}
|
771
|
-
|
772
|
-
static VALUE
|
773
|
-
frt_ir_get_doc(VALUE self, VALUE rdoc_num)
|
774
|
-
{
|
775
|
-
GET_IR;
|
776
|
-
Document *doc = ir->get_doc(ir, FIX2INT(rdoc_num));
|
777
|
-
return frt_get_doc(doc);
|
778
|
-
}
|
779
|
-
|
780
|
-
static VALUE
|
781
|
-
frt_ir_is_latest(VALUE self)
|
782
|
-
{
|
783
|
-
GET_IR;
|
784
|
-
return ir_is_latest(ir) ? Qtrue : Qfalse;
|
785
|
-
}
|
786
|
-
|
787
|
-
static VALUE
|
788
|
-
frt_ir_get_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield)
|
789
|
-
{
|
790
|
-
GET_IR;
|
791
|
-
TermVector *tv;
|
792
|
-
rfield = rb_obj_as_string(rfield);
|
793
|
-
tv = ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
|
794
|
-
return frt_get_tv(tv);
|
795
|
-
}
|
796
|
-
|
797
|
-
static VALUE
|
798
|
-
frt_ir_get_term_vectors(VALUE self, VALUE rdoc_num)
|
799
|
-
{
|
800
|
-
int i;
|
801
|
-
GET_IR;
|
802
|
-
Array *tvs = ir->get_term_vectors(ir, FIX2INT(rdoc_num));
|
803
|
-
VALUE rtvs = rb_ary_new2(tvs->size);
|
804
|
-
VALUE rtv;
|
805
|
-
for (i = 0; i < tvs->size; i++) {
|
806
|
-
rtv = frt_get_tv(tvs->elems[i]);
|
807
|
-
rb_ary_push(rtvs, rtv);
|
808
|
-
}
|
809
|
-
tvs->free_elem = NULL;
|
810
|
-
ary_destroy(tvs);
|
811
|
-
|
812
|
-
return rtvs;
|
813
|
-
}
|
814
|
-
|
815
|
-
static VALUE
|
816
|
-
frt_ir_term_docs(VALUE self)
|
817
|
-
{
|
818
|
-
GET_IR;
|
819
|
-
return frt_get_tde(ir->term_docs(ir));
|
820
|
-
}
|
821
|
-
|
822
|
-
static VALUE
|
823
|
-
frt_ir_term_docs_for(VALUE self, VALUE rterm)
|
824
|
-
{
|
825
|
-
GET_IR;
|
826
|
-
Term t;
|
827
|
-
frt_set_term(rterm, &t);
|
828
|
-
return frt_get_tde(ir_term_docs_for(ir, &t));
|
829
|
-
}
|
830
|
-
|
831
|
-
static VALUE
|
832
|
-
frt_ir_term_positions(VALUE self)
|
833
|
-
{
|
834
|
-
GET_IR;
|
835
|
-
return frt_get_tde(ir->term_positions(ir));
|
836
|
-
}
|
837
|
-
|
838
|
-
static VALUE
|
839
|
-
frt_ir_term_positions_for(VALUE self, VALUE rterm)
|
840
|
-
{
|
841
|
-
GET_IR;
|
842
|
-
Term t;
|
843
|
-
frt_set_term(rterm, &t);
|
844
|
-
return frt_get_tde(ir_term_positions_for(ir, &t));
|
845
|
-
}
|
846
|
-
|
847
|
-
static VALUE
|
848
|
-
frt_ir_doc_freq(VALUE self, VALUE rterm)
|
849
|
-
{
|
850
|
-
GET_IR;
|
851
|
-
Term t;
|
852
|
-
frt_set_term(rterm, &t);
|
853
|
-
return INT2FIX(ir->doc_freq(ir, &t));
|
854
|
-
}
|
855
|
-
|
856
|
-
static VALUE
|
857
|
-
frt_ir_terms(VALUE self)
|
858
|
-
{
|
859
|
-
TermEnum *te;
|
860
|
-
GET_IR;
|
861
|
-
te = ir->terms(ir);
|
862
|
-
return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
|
863
|
-
}
|
864
|
-
|
865
|
-
static VALUE
|
866
|
-
frt_ir_terms_from(VALUE self, VALUE rterm)
|
867
|
-
{
|
868
|
-
TermEnum *te;
|
869
|
-
Term t;
|
870
|
-
GET_IR;
|
871
|
-
frt_set_term(rterm, &t);
|
872
|
-
te = ir->terms_from(ir, &t);
|
873
|
-
return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
|
874
|
-
}
|
875
|
-
|
876
|
-
static VALUE
|
877
|
-
frt_ir_get_field_names(VALUE self)
|
878
|
-
{
|
879
|
-
GET_IR;
|
880
|
-
VALUE rfnames;
|
881
|
-
HashSet *fnames = ir->get_field_names(ir, IR_ALL);
|
882
|
-
rfnames = frt_hs_to_rb_ary(fnames);
|
883
|
-
hs_destroy(fnames);
|
884
|
-
return rfnames;
|
885
|
-
}
|
886
|
-
|
887
|
-
/****************************************************************************
|
888
|
-
*
|
889
|
-
* Init Function
|
890
|
-
*
|
891
|
-
****************************************************************************/
|
892
|
-
|
893
|
-
void
|
894
|
-
Init_index_io(void)
|
895
|
-
{
|
896
|
-
ranalyzer_key = ID2SYM(rb_intern("analyzer"));
|
897
|
-
rclose_dir_key = ID2SYM(rb_intern("close_dir"));
|
898
|
-
rcreate_key = ID2SYM(rb_intern("create"));
|
899
|
-
rcreate_if_missing_key = ID2SYM(rb_intern("create_if_missing"));
|
900
|
-
ruse_compound_file_key = ID2SYM(rb_intern("use_compound_file"));
|
901
|
-
rmerge_factor_key = ID2SYM(rb_intern("merge_factor"));
|
902
|
-
rmin_merge_docs_key = ID2SYM(rb_intern("min_merge_docs"));
|
903
|
-
rmax_merge_docs_key = ID2SYM(rb_intern("max_merge_docs"));
|
904
|
-
rmax_field_length_key = ID2SYM(rb_intern("max_field_length"));
|
905
|
-
rterm_index_interval_key = ID2SYM(rb_intern("term_index_interval"));
|
906
|
-
|
907
|
-
/* TermEnum */
|
908
|
-
cTermEnum = rb_define_class_under(mIndex, "TermEnum", rb_cObject);
|
909
|
-
rb_define_alloc_func(cTermEnum, frt_data_alloc);
|
910
|
-
|
911
|
-
rb_define_method(cTermEnum, "next?", frt_te_next, 0);
|
912
|
-
rb_define_method(cTermEnum, "term", frt_te_term, 0);
|
913
|
-
rb_define_method(cTermEnum, "doc_freq", frt_te_doc_freq, 0);
|
914
|
-
rb_define_method(cTermEnum, "skip_to", frt_te_skip_to, 1);
|
915
|
-
rb_define_method(cTermEnum, "close", frt_te_close, 0);
|
916
|
-
|
917
|
-
/* TermVectorOffsetInfo */
|
918
|
-
cTVOffsetInfo = rb_define_class_under(mIndex, "TermVectorOffsetInfo", rb_cObject);
|
919
|
-
rb_define_alloc_func(cTVOffsetInfo, frt_data_alloc);
|
920
|
-
|
921
|
-
rb_define_method(cTVOffsetInfo, "initialize", frt_tvoi_init, 2);
|
922
|
-
rb_define_method(cTVOffsetInfo, "start=", frt_tvoi_set_start, 1);
|
923
|
-
rb_define_method(cTVOffsetInfo, "start", frt_tvoi_get_start, 0);
|
924
|
-
rb_define_method(cTVOffsetInfo, "end=", frt_tvoi_set_end, 1);
|
925
|
-
rb_define_method(cTVOffsetInfo, "end", frt_tvoi_get_end, 0);
|
926
|
-
rb_define_method(cTVOffsetInfo, "eql?", frt_tvoi_eql, 1);
|
927
|
-
rb_define_method(cTVOffsetInfo, "==", frt_tvoi_eql, 1);
|
928
|
-
rb_define_method(cTVOffsetInfo, "hash", frt_tvoi_hash, 0);
|
929
|
-
rb_define_method(cTVOffsetInfo, "to_s", frt_tvoi_to_s, 0);
|
930
|
-
|
931
|
-
/* TermVector */
|
932
|
-
cTermVector = rb_define_class_under(mIndex, "TermVector", rb_cObject);
|
933
|
-
rb_define_alloc_func(cTermVector, frt_data_alloc);
|
934
|
-
rb_define_method(cTermVector, "field", frt_tv_get_field, 0);
|
935
|
-
rb_define_method(cTermVector, "terms", frt_tv_get_terms, 0);
|
936
|
-
rb_define_method(cTermVector, "freqs", frt_tv_get_freqs, 0);
|
937
|
-
rb_define_method(cTermVector, "positions", frt_tv_get_positions, 0);
|
938
|
-
rb_define_method(cTermVector, "offsets", frt_tv_get_offsets, 0);
|
939
|
-
|
940
|
-
/* TermDocEnum */
|
941
|
-
cTermDocEnum = rb_define_class_under(mIndex, "TermDocEnum", rb_cObject);
|
942
|
-
rb_define_alloc_func(cTermDocEnum, frt_data_alloc);
|
943
|
-
rb_define_method(cTermDocEnum, "close", frt_tde_close, 0);
|
944
|
-
rb_define_method(cTermDocEnum, "seek", frt_tde_seek, 1);
|
945
|
-
rb_define_method(cTermDocEnum, "doc", frt_tde_doc, 0);
|
946
|
-
rb_define_method(cTermDocEnum, "freq", frt_tde_freq, 0);
|
947
|
-
rb_define_method(cTermDocEnum, "next?", frt_tde_next, 0);
|
948
|
-
rb_define_method(cTermDocEnum, "next_position", frt_tde_next_position, 0);
|
949
|
-
rb_define_method(cTermDocEnum, "read", frt_tde_read, 2);
|
950
|
-
rb_define_method(cTermDocEnum, "skip_to", frt_tde_skip_to, 1);
|
951
|
-
|
952
|
-
/* IndexWriter */
|
953
|
-
cIndexWriter = rb_define_class_under(mIndex, "IndexWriter", rb_cObject);
|
954
|
-
rb_define_alloc_func(cIndexWriter, frt_data_alloc);
|
955
|
-
|
956
|
-
rb_define_const(cIndexWriter, "WRITE_LOCK_TIMEOUT", INT2FIX(1));
|
957
|
-
rb_define_const(cIndexWriter, "COMMIT_LOCK_TIMEOUT", INT2FIX(10));
|
958
|
-
rb_define_const(cIndexWriter, "WRITE_LOCK_NAME",
|
959
|
-
rb_str_new2(WRITE_LOCK_NAME));
|
960
|
-
rb_define_const(cIndexWriter, "COMMIT_LOCK_NAME",
|
961
|
-
rb_str_new2(COMMIT_LOCK_NAME));
|
962
|
-
rb_define_const(cIndexWriter, "DEFAULT_MERGE_FACTOR",
|
963
|
-
INT2FIX(config.merge_factor));
|
964
|
-
rb_define_const(cIndexWriter, "DEFAULT_MIN_MERGE_DOCS",
|
965
|
-
INT2FIX(config.min_merge_docs));
|
966
|
-
rb_define_const(cIndexWriter, "DEFAULT_MAX_MERGE_DOCS",
|
967
|
-
INT2FIX(config.max_merge_docs));
|
968
|
-
rb_define_const(cIndexWriter, "DEFAULT_MAX_FIELD_LENGTH",
|
969
|
-
INT2FIX(config.max_field_length));
|
970
|
-
rb_define_const(cIndexWriter, "DEFAULT_TERM_INDEX_INTERVAL",
|
971
|
-
INT2FIX(config.term_index_interval));
|
972
|
-
|
973
|
-
rb_define_method(cIndexWriter, "initialize", frt_iw_init, -1);
|
974
|
-
rb_define_method(cIndexWriter, "close", frt_iw_close, 0);
|
975
|
-
rb_define_method(cIndexWriter, "add_document", frt_iw_add_doc, 1);
|
976
|
-
rb_define_method(cIndexWriter, "<<", frt_iw_add_doc, 1);
|
977
|
-
rb_define_method(cIndexWriter, "merge_factor", frt_iw_get_merge_factor, 0);
|
978
|
-
rb_define_method(cIndexWriter, "min_merge_docs", frt_iw_get_min_merge_docs, 0);
|
979
|
-
rb_define_method(cIndexWriter, "max_merge_docs", frt_iw_get_max_merge_docs, 0);
|
980
|
-
rb_define_method(cIndexWriter, "max_field_length", frt_iw_get_max_field_length, 0);
|
981
|
-
rb_define_method(cIndexWriter, "term_index_interval", frt_iw_get_term_index_interval, 0);
|
982
|
-
rb_define_method(cIndexWriter, "use_compound_file", frt_iw_get_use_compound_file, 0);
|
983
|
-
rb_define_method(cIndexWriter, "doc_count", frt_iw_get_doc_count, 0);
|
984
|
-
rb_define_method(cIndexWriter, "merge_factor=", frt_iw_set_merge_factor, 1);
|
985
|
-
rb_define_method(cIndexWriter, "min_merge_docs=", frt_iw_set_min_merge_docs, 1);
|
986
|
-
rb_define_method(cIndexWriter, "max_merge_docs=", frt_iw_set_max_merge_docs, 1);
|
987
|
-
rb_define_method(cIndexWriter, "max_field_length=", frt_iw_set_max_field_length, 1);
|
988
|
-
rb_define_method(cIndexWriter, "term_index_interval=", frt_iw_set_term_index_interval, 1);
|
989
|
-
rb_define_method(cIndexWriter, "use_compound_file=", frt_iw_set_use_compound_file, 1);
|
990
|
-
rb_define_method(cIndexWriter, "optimize", frt_iw_optimize, 0);
|
991
|
-
|
992
|
-
/* IndexReader */
|
993
|
-
cIndexReader = rb_define_class_under(mIndex, "IndexReader", rb_cObject);
|
994
|
-
rb_define_alloc_func(cIndexReader, frt_data_alloc);
|
995
|
-
rb_define_singleton_method(cIndexReader, "open", frt_ir_open, -1);
|
996
|
-
rb_define_method(cIndexReader, "initialize", frt_ir_init, -1);
|
997
|
-
rb_define_method(cIndexReader, "set_norm", frt_ir_set_norm, 3);
|
998
|
-
rb_define_method(cIndexReader, "get_norms", frt_ir_get_norms, 1);
|
999
|
-
rb_define_method(cIndexReader, "get_norms_into", frt_ir_get_norms_into, 3);
|
1000
|
-
rb_define_method(cIndexReader, "commit", frt_ir_commit, 0);
|
1001
|
-
rb_define_method(cIndexReader, "close", frt_ir_close, 0);
|
1002
|
-
rb_define_method(cIndexReader, "has_deletions?", frt_ir_has_deletions, 0);
|
1003
|
-
rb_define_method(cIndexReader, "delete", frt_ir_delete, 1);
|
1004
|
-
rb_define_method(cIndexReader, "deleted?", frt_ir_is_deleted, 1);
|
1005
|
-
rb_define_method(cIndexReader, "max_doc", frt_ir_max_doc, 0);
|
1006
|
-
rb_define_method(cIndexReader, "num_docs", frt_ir_num_docs, 0);
|
1007
|
-
rb_define_method(cIndexReader, "undelete_all", frt_ir_undelete_all, 0);
|
1008
|
-
rb_define_method(cIndexReader, "latest?", frt_ir_is_latest, 0);
|
1009
|
-
rb_define_method(cIndexReader, "get_document", frt_ir_get_doc, 1);
|
1010
|
-
rb_define_method(cIndexReader, "[]", frt_ir_get_doc, 1);
|
1011
|
-
rb_define_method(cIndexReader, "get_term_vector", frt_ir_get_term_vector, 2);
|
1012
|
-
rb_define_method(cIndexReader, "get_term_vectors", frt_ir_get_term_vectors, 1);
|
1013
|
-
rb_define_method(cIndexReader, "term_docs", frt_ir_term_docs, 0);
|
1014
|
-
rb_define_method(cIndexReader, "term_positions", frt_ir_term_positions, 0);
|
1015
|
-
rb_define_method(cIndexReader, "term_docs_for", frt_ir_term_docs_for, 1);
|
1016
|
-
rb_define_method(cIndexReader, "term_positions_for", frt_ir_term_positions_for, 1);
|
1017
|
-
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 1);
|
1018
|
-
rb_define_method(cIndexReader, "terms", frt_ir_terms, 0);
|
1019
|
-
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 1);
|
1020
|
-
rb_define_method(cIndexReader, "get_field_names", frt_ir_get_field_names, 0);
|
1021
|
-
}
|