ferret 0.3.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/ext/term_buffer.c
DELETED
@@ -1,230 +0,0 @@
|
|
1
|
-
#include "ferret.h"
|
2
|
-
|
3
|
-
ID id_field_name;
|
4
|
-
ID id_field_array;
|
5
|
-
|
6
|
-
/****************************************************************************
|
7
|
-
*
|
8
|
-
* TermBuffer Methods
|
9
|
-
*
|
10
|
-
****************************************************************************/
|
11
|
-
|
12
|
-
void
|
13
|
-
frt_termbuffer_free(void *p)
|
14
|
-
{
|
15
|
-
Term *tb = (Term *)p;
|
16
|
-
free(tb->text);
|
17
|
-
free(p);
|
18
|
-
}
|
19
|
-
|
20
|
-
void
|
21
|
-
frt_termbuffer_mark(void *p)
|
22
|
-
{
|
23
|
-
Term *tb = (Term *)p;
|
24
|
-
rb_gc_mark(tb->field);
|
25
|
-
}
|
26
|
-
|
27
|
-
static VALUE
|
28
|
-
frt_termbuffer_alloc(VALUE klass)
|
29
|
-
{
|
30
|
-
Term *tb = ALLOC(Term);
|
31
|
-
MEMZERO(tb, Term, 1);
|
32
|
-
tb->field = Qnil;
|
33
|
-
return Data_Wrap_Struct(klass, frt_termbuffer_mark, frt_termbuffer_free, tb);
|
34
|
-
}
|
35
|
-
|
36
|
-
static VALUE
|
37
|
-
frt_termbuffer_init(VALUE self)
|
38
|
-
{
|
39
|
-
rb_iv_set(self, "@term", Qnil);
|
40
|
-
return Qnil;
|
41
|
-
}
|
42
|
-
|
43
|
-
#define GET_TB Term *tb; Data_Get_Struct(self, Term, tb)
|
44
|
-
static VALUE
|
45
|
-
frt_termbuffer_get_text_length(VALUE self)
|
46
|
-
{
|
47
|
-
GET_TB;
|
48
|
-
return INT2FIX(tb->tlen);
|
49
|
-
}
|
50
|
-
|
51
|
-
static VALUE
|
52
|
-
frt_termbuffer_get_text(VALUE self)
|
53
|
-
{
|
54
|
-
GET_TB;
|
55
|
-
return rb_str_new(tb->text, tb->tlen);
|
56
|
-
}
|
57
|
-
|
58
|
-
static VALUE
|
59
|
-
frt_termbuffer_get_field_name(VALUE self)
|
60
|
-
{
|
61
|
-
GET_TB;
|
62
|
-
return tb->field;
|
63
|
-
}
|
64
|
-
|
65
|
-
static VALUE
|
66
|
-
frt_termbuffer_reset(VALUE self)
|
67
|
-
{
|
68
|
-
GET_TB;
|
69
|
-
|
70
|
-
free(tb->text);
|
71
|
-
MEMZERO(tb, Term, 1);
|
72
|
-
tb->field = Qnil;
|
73
|
-
|
74
|
-
return Qnil;
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
frt_termbuffer_to_term(VALUE self)
|
79
|
-
{
|
80
|
-
GET_TB;
|
81
|
-
|
82
|
-
if(NIL_P(tb->field)) {
|
83
|
-
return Qnil;
|
84
|
-
} else {
|
85
|
-
VALUE args[2];
|
86
|
-
args[0] = tb->field;
|
87
|
-
args[1] = rb_str_new(tb->text, tb->tlen);
|
88
|
-
return rb_class_new_instance(2, args, cTerm);
|
89
|
-
}
|
90
|
-
}
|
91
|
-
|
92
|
-
int
|
93
|
-
frt_termbuffer_compare_to_int(VALUE self, VALUE rother)
|
94
|
-
{
|
95
|
-
Term *other;
|
96
|
-
GET_TB;
|
97
|
-
Data_Get_Struct(rother, Term, other);
|
98
|
-
return frt_term_cmp(tb, other);
|
99
|
-
}
|
100
|
-
|
101
|
-
VALUE
|
102
|
-
frt_termbuffer_lt(VALUE self, VALUE rother)
|
103
|
-
{
|
104
|
-
return frt_termbuffer_compare_to_int(self, rother) < 0 ? Qtrue : Qfalse;
|
105
|
-
}
|
106
|
-
|
107
|
-
VALUE
|
108
|
-
frt_termbuffer_gt(VALUE self, VALUE rother)
|
109
|
-
{
|
110
|
-
return frt_termbuffer_compare_to_int(self, rother) > 0 ? Qtrue : Qfalse;
|
111
|
-
}
|
112
|
-
|
113
|
-
VALUE
|
114
|
-
frt_termbuffer_le(VALUE self, VALUE rother)
|
115
|
-
{
|
116
|
-
return frt_termbuffer_compare_to_int(self, rother) <= 0 ? Qtrue : Qfalse;
|
117
|
-
}
|
118
|
-
|
119
|
-
VALUE
|
120
|
-
frt_termbuffer_ge(VALUE self, VALUE rother)
|
121
|
-
{
|
122
|
-
return frt_termbuffer_compare_to_int(self, rother) >= 0 ? Qtrue : Qfalse;
|
123
|
-
}
|
124
|
-
|
125
|
-
VALUE
|
126
|
-
frt_termbuffer_eq(VALUE self, VALUE rother)
|
127
|
-
{
|
128
|
-
if (rother == Qnil)
|
129
|
-
return Qfalse;
|
130
|
-
return frt_termbuffer_compare_to_int(self, rother) == 0 ? Qtrue : Qfalse;
|
131
|
-
}
|
132
|
-
|
133
|
-
static VALUE
|
134
|
-
frt_termbuffer_compare_to(VALUE self, VALUE rother)
|
135
|
-
{
|
136
|
-
return INT2FIX(frt_termbuffer_compare_to_int(self, rother));
|
137
|
-
}
|
138
|
-
|
139
|
-
VALUE
|
140
|
-
frt_termbuffer_init_copy(VALUE self, VALUE rother)
|
141
|
-
{
|
142
|
-
Term *tb_other;
|
143
|
-
int tlen;
|
144
|
-
GET_TB;
|
145
|
-
Data_Get_Struct(rother, Term, tb_other);
|
146
|
-
|
147
|
-
tlen = tb_other->tlen;
|
148
|
-
REALLOC_N(tb->text, char, tlen+1);
|
149
|
-
tb->tlen = tlen;
|
150
|
-
MEMCPY(tb->text, tb_other->text, char, tlen);
|
151
|
-
|
152
|
-
tb->field = tb_other->field;
|
153
|
-
|
154
|
-
return Qnil;
|
155
|
-
}
|
156
|
-
|
157
|
-
VALUE
|
158
|
-
frt_termbuffer_read(VALUE self, VALUE rinput, VALUE rfield_infos)
|
159
|
-
{
|
160
|
-
IndexBuffer *input;
|
161
|
-
int tlen, start, length, fnum;
|
162
|
-
GET_TB;
|
163
|
-
Data_Get_Struct(rinput, IndexBuffer, input);
|
164
|
-
|
165
|
-
start = frt_read_vint(rinput, input);
|
166
|
-
length = frt_read_vint(rinput, input);
|
167
|
-
tlen = start + length;
|
168
|
-
REALLOC_N(tb->text, char, tlen+1);
|
169
|
-
|
170
|
-
frt_read_chars(rinput, tb->text, start, length);
|
171
|
-
fnum = frt_read_vint(rinput, input);
|
172
|
-
if (fnum < 0) {
|
173
|
-
tb->field = rb_str_new("", 0);
|
174
|
-
} else {
|
175
|
-
tb->field = rb_ivar_get(
|
176
|
-
rb_ary_entry(rb_ivar_get(rfield_infos, id_field_array), fnum),
|
177
|
-
id_field_name);
|
178
|
-
}
|
179
|
-
|
180
|
-
tb->tlen = tlen;
|
181
|
-
return Qnil;
|
182
|
-
}
|
183
|
-
|
184
|
-
static VALUE
|
185
|
-
frt_termbuffer_hash(VALUE self)
|
186
|
-
{
|
187
|
-
GET_TB;
|
188
|
-
return INT2FIX(frt_hash(tb->text, tb->tlen) +
|
189
|
-
frt_hash(RSTRING(tb->field)->ptr, RSTRING(tb->field)->len));
|
190
|
-
}
|
191
|
-
|
192
|
-
/****************************************************************************
|
193
|
-
*
|
194
|
-
* Init Function
|
195
|
-
*
|
196
|
-
****************************************************************************/
|
197
|
-
|
198
|
-
|
199
|
-
void
|
200
|
-
Init_term_buffer(void) {
|
201
|
-
/* IDs */
|
202
|
-
id_field_name = rb_intern("@name");
|
203
|
-
id_field_array = rb_intern("@fi_array");
|
204
|
-
|
205
|
-
/* TermBuffer */
|
206
|
-
cTermBuffer = rb_define_class_under(mIndex, "TermBuffer", rb_cObject);
|
207
|
-
rb_define_alloc_func(cTermBuffer, frt_termbuffer_alloc);
|
208
|
-
rb_include_module(cTermBuffer, rb_mComparable);
|
209
|
-
|
210
|
-
/* Methods */
|
211
|
-
rb_define_method(cTermBuffer, "initialize", frt_termbuffer_init, 0);
|
212
|
-
rb_define_method(cTermBuffer, "initialize_copy", frt_termbuffer_init_copy, 1);
|
213
|
-
rb_define_method(cTermBuffer, "text", frt_termbuffer_get_text, 0);
|
214
|
-
rb_define_method(cTermBuffer, "field", frt_termbuffer_get_field_name, 0);
|
215
|
-
rb_define_method(cTermBuffer, "text_length", frt_termbuffer_get_text_length, 0);
|
216
|
-
rb_define_method(cTermBuffer, "<=>", frt_termbuffer_compare_to, 1);
|
217
|
-
rb_define_method(cTermBuffer, "<", frt_termbuffer_lt, 1);
|
218
|
-
rb_define_method(cTermBuffer, ">", frt_termbuffer_gt, 1);
|
219
|
-
rb_define_method(cTermBuffer, "<=", frt_termbuffer_le, 1);
|
220
|
-
rb_define_method(cTermBuffer, ">=", frt_termbuffer_ge, 1);
|
221
|
-
rb_define_method(cTermBuffer, "eql?", frt_termbuffer_eq, 1);
|
222
|
-
rb_define_method(cTermBuffer, "==", frt_termbuffer_eq, 1);
|
223
|
-
rb_define_method(cTermBuffer, "hash", frt_termbuffer_hash, 0);
|
224
|
-
rb_define_method(cTermBuffer, "read", frt_termbuffer_read, 2);
|
225
|
-
rb_define_method(cTermBuffer, "reset", frt_termbuffer_reset, 0);
|
226
|
-
rb_define_method(cTermBuffer, "to_term", frt_termbuffer_to_term, 0);
|
227
|
-
rb_define_method(cTermBuffer, "term", frt_termbuffer_to_term, 0);
|
228
|
-
rb_define_method(cTermBuffer, "term=", frt_termbuffer_init_copy, 1);
|
229
|
-
rb_define_method(cTermBuffer, "set!", frt_termbuffer_init_copy, 1);
|
230
|
-
}
|
data/ext/term_infos_reader.c
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
#include "ferret.h"
|
2
|
-
|
3
|
-
static ID frt_id_index_terms;
|
4
|
-
/****************************************************************************
|
5
|
-
*
|
6
|
-
* TermInfosReader Methods
|
7
|
-
*
|
8
|
-
****************************************************************************/
|
9
|
-
|
10
|
-
static VALUE
|
11
|
-
frt_tir_get_index_offset(VALUE self, VALUE rterm)
|
12
|
-
{
|
13
|
-
VALUE index_terms = rb_ivar_get(self, frt_id_index_terms);
|
14
|
-
|
15
|
-
register int lo = 0; // binary search @index_terms[]
|
16
|
-
register int hi = RARRAY(index_terms)->len - 1;
|
17
|
-
register int mid, delta;
|
18
|
-
|
19
|
-
Term *term, *tmp_term;
|
20
|
-
Data_Get_Struct(rterm, Term, term);
|
21
|
-
|
22
|
-
while (hi >= lo) {
|
23
|
-
mid = (lo + hi) >> 1;
|
24
|
-
|
25
|
-
Data_Get_Struct(RARRAY(index_terms)->ptr[mid], Term, tmp_term);
|
26
|
-
delta = frt_term_cmp(term, tmp_term);
|
27
|
-
if (delta < 0) {
|
28
|
-
hi = mid - 1;
|
29
|
-
} else if (delta > 0) {
|
30
|
-
lo = mid + 1;
|
31
|
-
} else {
|
32
|
-
return INT2FIX(mid);
|
33
|
-
}
|
34
|
-
}
|
35
|
-
return INT2FIX(hi);
|
36
|
-
}
|
37
|
-
|
38
|
-
/****************************************************************************
|
39
|
-
*
|
40
|
-
* Init Function
|
41
|
-
*
|
42
|
-
****************************************************************************/
|
43
|
-
|
44
|
-
void
|
45
|
-
Init_term_infos_reader(void)
|
46
|
-
{
|
47
|
-
/* IDs */
|
48
|
-
frt_id_index_terms = rb_intern("@index_terms");
|
49
|
-
|
50
|
-
/* TermInfosReader */
|
51
|
-
cTermInfosReader = rb_define_class_under(mIndex, "TermInfosReader", rb_cObject);
|
52
|
-
|
53
|
-
rb_define_method(cTermInfosReader, "get_index_offset", frt_tir_get_index_offset, 1);
|
54
|
-
}
|
data/ext/terminfo.c
DELETED
@@ -1,160 +0,0 @@
|
|
1
|
-
#include "ferret.h"
|
2
|
-
|
3
|
-
|
4
|
-
/****************************************************************************
|
5
|
-
*
|
6
|
-
* TermInfo Methods
|
7
|
-
*
|
8
|
-
****************************************************************************/
|
9
|
-
|
10
|
-
void
|
11
|
-
frt_ti_free(void *p)
|
12
|
-
{
|
13
|
-
free(p);
|
14
|
-
}
|
15
|
-
|
16
|
-
static VALUE
|
17
|
-
frt_ti_alloc(VALUE klass)
|
18
|
-
{
|
19
|
-
TermInfo *ti = (TermInfo *)ALLOC(TermInfo);
|
20
|
-
VALUE rbuffer = Data_Wrap_Struct(klass, NULL, frt_ti_free, ti);
|
21
|
-
return rbuffer;
|
22
|
-
}
|
23
|
-
|
24
|
-
#define GET_TI TermInfo *ti; Data_Get_Struct(self, TermInfo, ti)
|
25
|
-
inline VALUE
|
26
|
-
frt_ti_set(int argc, VALUE *argv, VALUE self)
|
27
|
-
{
|
28
|
-
VALUE df, fp, pp, so;
|
29
|
-
GET_TI;
|
30
|
-
MEMZERO(ti, TermInfo, 1);
|
31
|
-
rb_scan_args(argc, argv, "04", &df, &fp, &pp, &so);
|
32
|
-
switch (argc) {
|
33
|
-
case 4:
|
34
|
-
ti->skip_offset = FIX2INT(so);
|
35
|
-
case 3:
|
36
|
-
ti->prox_pointer = FIX2INT(pp);
|
37
|
-
case 2:
|
38
|
-
ti->freq_pointer = FIX2INT(fp);
|
39
|
-
case 1:
|
40
|
-
ti->doc_freq = FIX2INT(df);
|
41
|
-
case 0:
|
42
|
-
break;
|
43
|
-
}
|
44
|
-
return Qnil;
|
45
|
-
}
|
46
|
-
|
47
|
-
static VALUE
|
48
|
-
frt_ti_init(int argc, VALUE *argv, VALUE self)
|
49
|
-
{
|
50
|
-
frt_ti_set(argc, argv, self);
|
51
|
-
return self;
|
52
|
-
}
|
53
|
-
|
54
|
-
static VALUE
|
55
|
-
frt_ti_init_copy(VALUE self, VALUE rother)
|
56
|
-
{
|
57
|
-
TermInfo *other_ti;
|
58
|
-
GET_TI;
|
59
|
-
Data_Get_Struct(rother, TermInfo, other_ti);
|
60
|
-
MEMCPY(ti, other_ti, TermInfo, 1);
|
61
|
-
return self;
|
62
|
-
}
|
63
|
-
|
64
|
-
static VALUE
|
65
|
-
frt_ti_eql(VALUE self, VALUE rother)
|
66
|
-
{
|
67
|
-
TermInfo *other_ti;
|
68
|
-
GET_TI;
|
69
|
-
if (NIL_P(rother)) return Qfalse;
|
70
|
-
Data_Get_Struct(rother, TermInfo, other_ti);
|
71
|
-
return (MEMCMP(ti, other_ti, TermInfo, 1) == 0) ? Qtrue : Qfalse;
|
72
|
-
}
|
73
|
-
|
74
|
-
static VALUE
|
75
|
-
frt_ti_get_df(VALUE self)
|
76
|
-
{
|
77
|
-
GET_TI;
|
78
|
-
return INT2FIX(ti->doc_freq);
|
79
|
-
}
|
80
|
-
|
81
|
-
static VALUE
|
82
|
-
frt_ti_get_fp(VALUE self)
|
83
|
-
{
|
84
|
-
GET_TI;
|
85
|
-
return INT2FIX(ti->freq_pointer);
|
86
|
-
}
|
87
|
-
|
88
|
-
static VALUE
|
89
|
-
frt_ti_get_pp(VALUE self)
|
90
|
-
{
|
91
|
-
GET_TI;
|
92
|
-
return INT2FIX(ti->prox_pointer);
|
93
|
-
}
|
94
|
-
|
95
|
-
static VALUE
|
96
|
-
frt_ti_get_so(VALUE self)
|
97
|
-
{
|
98
|
-
GET_TI;
|
99
|
-
return INT2FIX(ti->skip_offset);
|
100
|
-
}
|
101
|
-
|
102
|
-
static VALUE
|
103
|
-
frt_ti_set_df(VALUE self, VALUE val)
|
104
|
-
{
|
105
|
-
GET_TI;
|
106
|
-
ti->doc_freq = FIX2INT(val);
|
107
|
-
return Qnil;
|
108
|
-
}
|
109
|
-
|
110
|
-
static VALUE
|
111
|
-
frt_ti_set_fp(VALUE self, VALUE val)
|
112
|
-
{
|
113
|
-
GET_TI;
|
114
|
-
ti->freq_pointer = FIX2INT(val);
|
115
|
-
return Qnil;
|
116
|
-
}
|
117
|
-
|
118
|
-
static VALUE
|
119
|
-
frt_ti_set_pp(VALUE self, VALUE val)
|
120
|
-
{
|
121
|
-
GET_TI;
|
122
|
-
ti->prox_pointer = FIX2INT(val);
|
123
|
-
return Qnil;
|
124
|
-
}
|
125
|
-
|
126
|
-
static VALUE
|
127
|
-
frt_ti_set_so(VALUE self, VALUE val)
|
128
|
-
{
|
129
|
-
GET_TI;
|
130
|
-
ti->skip_offset = FIX2INT(val);
|
131
|
-
return Qnil;
|
132
|
-
}
|
133
|
-
|
134
|
-
/****************************************************************************
|
135
|
-
*
|
136
|
-
* Init Function
|
137
|
-
*
|
138
|
-
****************************************************************************/
|
139
|
-
|
140
|
-
void
|
141
|
-
Init_term_info(void)
|
142
|
-
{
|
143
|
-
/* TermInfo */
|
144
|
-
cTermInfo = rb_define_class_under(mIndex, "TermInfo", rb_cObject);
|
145
|
-
rb_define_alloc_func(cTermInfo, frt_ti_alloc);
|
146
|
-
|
147
|
-
rb_define_method(cTermInfo, "initialize", frt_ti_init, -1);
|
148
|
-
rb_define_method(cTermInfo, "set_values!", frt_ti_set, -1);
|
149
|
-
rb_define_method(cTermInfo, "initialize_copy", frt_ti_init_copy, 1);
|
150
|
-
rb_define_method(cTermInfo, "set!", frt_ti_init_copy, 1);
|
151
|
-
rb_define_method(cTermInfo, "==", frt_ti_eql, 1);
|
152
|
-
rb_define_method(cTermInfo, "doc_freq", frt_ti_get_df, 0);
|
153
|
-
rb_define_method(cTermInfo, "doc_freq=", frt_ti_set_df, 1);
|
154
|
-
rb_define_method(cTermInfo, "freq_pointer", frt_ti_get_fp, 0);
|
155
|
-
rb_define_method(cTermInfo, "freq_pointer=", frt_ti_set_fp, 1);
|
156
|
-
rb_define_method(cTermInfo, "prox_pointer", frt_ti_get_pp, 0);
|
157
|
-
rb_define_method(cTermInfo, "prox_pointer=", frt_ti_set_pp, 1);
|
158
|
-
rb_define_method(cTermInfo, "skip_offset", frt_ti_get_so, 0);
|
159
|
-
rb_define_method(cTermInfo, "skip_offset=", frt_ti_set_so, 1);
|
160
|
-
}
|
data/ext/token.c
DELETED
@@ -1,93 +0,0 @@
|
|
1
|
-
#include "ferret.h"
|
2
|
-
|
3
|
-
/****************************************************************************
|
4
|
-
*
|
5
|
-
* Token Methods
|
6
|
-
*
|
7
|
-
****************************************************************************/
|
8
|
-
|
9
|
-
ID id_tk_text, id_tk_pos_inc, id_tk_start_offset, id_tk_end_offset, id_tk_type;
|
10
|
-
ID id_tk_pos_inc_set;
|
11
|
-
|
12
|
-
static VALUE
|
13
|
-
frt_token_pos_inc (VALUE self, VALUE pI)
|
14
|
-
{
|
15
|
-
if(FIX2INT(pI) < 0)
|
16
|
-
rb_raise(rb_eArgError, "position_increment < 0");
|
17
|
-
rb_ivar_set(self, id_tk_pos_inc, pI);
|
18
|
-
return self;
|
19
|
-
}
|
20
|
-
|
21
|
-
static VALUE
|
22
|
-
frt_token_init(int argc, VALUE *argv, VALUE self)
|
23
|
-
{
|
24
|
-
VALUE text, start_offset, end_offset, type, pos_inc;
|
25
|
-
rb_scan_args(argc, argv, "32", &text,
|
26
|
-
&start_offset, &end_offset, &type, &pos_inc);
|
27
|
-
rb_ivar_set(self, id_tk_text, text);
|
28
|
-
rb_ivar_set(self, id_tk_start_offset, start_offset);
|
29
|
-
rb_ivar_set(self, id_tk_end_offset, end_offset);
|
30
|
-
if (argc < 4) {
|
31
|
-
rb_ivar_set(self, id_tk_type, rb_str_new("word", 4));
|
32
|
-
} else {
|
33
|
-
rb_ivar_set(self, id_tk_type, type);
|
34
|
-
}
|
35
|
-
if (argc < 5) {
|
36
|
-
rb_ivar_set(self, id_tk_pos_inc, INT2FIX(1));
|
37
|
-
} else {
|
38
|
-
rb_ivar_set(self, id_tk_pos_inc, pos_inc);
|
39
|
-
}
|
40
|
-
return self;
|
41
|
-
}
|
42
|
-
|
43
|
-
static VALUE
|
44
|
-
frt_token_eql(VALUE self, VALUE other)
|
45
|
-
{
|
46
|
-
VALUE rself_text, rother_text;
|
47
|
-
char *self_text, *other_text;
|
48
|
-
if (!rb_respond_to(other, id_tk_pos_inc_set))
|
49
|
-
return Qfalse;
|
50
|
-
rself_text = rb_ivar_get(self, id_tk_text);
|
51
|
-
rother_text = rb_ivar_get(other, id_tk_text);
|
52
|
-
self_text = StringValuePtr(rself_text);
|
53
|
-
other_text = StringValuePtr(rother_text);
|
54
|
-
if (rb_ivar_get(self, id_tk_start_offset) == rb_ivar_get(other, id_tk_start_offset) &&
|
55
|
-
rb_ivar_get(self, id_tk_end_offset) == rb_ivar_get(other, id_tk_end_offset) &&
|
56
|
-
(strcmp(self_text, other_text) == 0))
|
57
|
-
return Qtrue;
|
58
|
-
else
|
59
|
-
return Qfalse;
|
60
|
-
}
|
61
|
-
|
62
|
-
/****************************************************************************
|
63
|
-
*
|
64
|
-
* Init Function
|
65
|
-
*
|
66
|
-
****************************************************************************/
|
67
|
-
|
68
|
-
void
|
69
|
-
Init_token(void)
|
70
|
-
{
|
71
|
-
/* IDs */
|
72
|
-
id_tk_text = rb_intern("@term_text");
|
73
|
-
id_tk_start_offset = rb_intern("@start_offset");
|
74
|
-
id_tk_end_offset = rb_intern("@end_offset");
|
75
|
-
id_tk_type = rb_intern("@type");
|
76
|
-
id_tk_pos_inc = rb_intern("@position_increment");
|
77
|
-
id_tk_pos_inc_set = rb_intern("position_increment=");
|
78
|
-
|
79
|
-
|
80
|
-
/* IndexWriter */
|
81
|
-
cToken = rb_define_class_under(mAnalysis, "Token", rb_cObject);
|
82
|
-
|
83
|
-
rb_define_method(cToken, "initialize", frt_token_init, -1);
|
84
|
-
rb_define_method(cToken, "position_increment=", frt_token_pos_inc, 1);
|
85
|
-
rb_define_method(cToken, "==", frt_token_eql, 1);
|
86
|
-
rb_define_method(cToken, "eql", frt_token_eql, 1);
|
87
|
-
|
88
|
-
rb_define_attr(cToken, "term_text", 1, 1);
|
89
|
-
rb_define_attr(cToken, "position_increment", 1, 0);
|
90
|
-
rb_define_attr(cToken, "start_offset", 1, 0);
|
91
|
-
rb_define_attr(cToken, "end_offset", 1, 0);
|
92
|
-
rb_define_attr(cToken, "type", 1, 1);
|
93
|
-
}
|