ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/term_buffer.c DELETED
@@ -1,230 +0,0 @@
1
- #include "ferret.h"
2
-
3
- ID id_field_name;
4
- ID id_field_array;
5
-
6
- /****************************************************************************
7
- *
8
- * TermBuffer Methods
9
- *
10
- ****************************************************************************/
11
-
12
- void
13
- frt_termbuffer_free(void *p)
14
- {
15
- Term *tb = (Term *)p;
16
- free(tb->text);
17
- free(p);
18
- }
19
-
20
- void
21
- frt_termbuffer_mark(void *p)
22
- {
23
- Term *tb = (Term *)p;
24
- rb_gc_mark(tb->field);
25
- }
26
-
27
- static VALUE
28
- frt_termbuffer_alloc(VALUE klass)
29
- {
30
- Term *tb = ALLOC(Term);
31
- MEMZERO(tb, Term, 1);
32
- tb->field = Qnil;
33
- return Data_Wrap_Struct(klass, frt_termbuffer_mark, frt_termbuffer_free, tb);
34
- }
35
-
36
- static VALUE
37
- frt_termbuffer_init(VALUE self)
38
- {
39
- rb_iv_set(self, "@term", Qnil);
40
- return Qnil;
41
- }
42
-
43
- #define GET_TB Term *tb; Data_Get_Struct(self, Term, tb)
44
- static VALUE
45
- frt_termbuffer_get_text_length(VALUE self)
46
- {
47
- GET_TB;
48
- return INT2FIX(tb->tlen);
49
- }
50
-
51
- static VALUE
52
- frt_termbuffer_get_text(VALUE self)
53
- {
54
- GET_TB;
55
- return rb_str_new(tb->text, tb->tlen);
56
- }
57
-
58
- static VALUE
59
- frt_termbuffer_get_field_name(VALUE self)
60
- {
61
- GET_TB;
62
- return tb->field;
63
- }
64
-
65
- static VALUE
66
- frt_termbuffer_reset(VALUE self)
67
- {
68
- GET_TB;
69
-
70
- free(tb->text);
71
- MEMZERO(tb, Term, 1);
72
- tb->field = Qnil;
73
-
74
- return Qnil;
75
- }
76
-
77
- VALUE
78
- frt_termbuffer_to_term(VALUE self)
79
- {
80
- GET_TB;
81
-
82
- if(NIL_P(tb->field)) {
83
- return Qnil;
84
- } else {
85
- VALUE args[2];
86
- args[0] = tb->field;
87
- args[1] = rb_str_new(tb->text, tb->tlen);
88
- return rb_class_new_instance(2, args, cTerm);
89
- }
90
- }
91
-
92
- int
93
- frt_termbuffer_compare_to_int(VALUE self, VALUE rother)
94
- {
95
- Term *other;
96
- GET_TB;
97
- Data_Get_Struct(rother, Term, other);
98
- return frt_term_cmp(tb, other);
99
- }
100
-
101
- VALUE
102
- frt_termbuffer_lt(VALUE self, VALUE rother)
103
- {
104
- return frt_termbuffer_compare_to_int(self, rother) < 0 ? Qtrue : Qfalse;
105
- }
106
-
107
- VALUE
108
- frt_termbuffer_gt(VALUE self, VALUE rother)
109
- {
110
- return frt_termbuffer_compare_to_int(self, rother) > 0 ? Qtrue : Qfalse;
111
- }
112
-
113
- VALUE
114
- frt_termbuffer_le(VALUE self, VALUE rother)
115
- {
116
- return frt_termbuffer_compare_to_int(self, rother) <= 0 ? Qtrue : Qfalse;
117
- }
118
-
119
- VALUE
120
- frt_termbuffer_ge(VALUE self, VALUE rother)
121
- {
122
- return frt_termbuffer_compare_to_int(self, rother) >= 0 ? Qtrue : Qfalse;
123
- }
124
-
125
- VALUE
126
- frt_termbuffer_eq(VALUE self, VALUE rother)
127
- {
128
- if (rother == Qnil)
129
- return Qfalse;
130
- return frt_termbuffer_compare_to_int(self, rother) == 0 ? Qtrue : Qfalse;
131
- }
132
-
133
- static VALUE
134
- frt_termbuffer_compare_to(VALUE self, VALUE rother)
135
- {
136
- return INT2FIX(frt_termbuffer_compare_to_int(self, rother));
137
- }
138
-
139
- VALUE
140
- frt_termbuffer_init_copy(VALUE self, VALUE rother)
141
- {
142
- Term *tb_other;
143
- int tlen;
144
- GET_TB;
145
- Data_Get_Struct(rother, Term, tb_other);
146
-
147
- tlen = tb_other->tlen;
148
- REALLOC_N(tb->text, char, tlen+1);
149
- tb->tlen = tlen;
150
- MEMCPY(tb->text, tb_other->text, char, tlen);
151
-
152
- tb->field = tb_other->field;
153
-
154
- return Qnil;
155
- }
156
-
157
- VALUE
158
- frt_termbuffer_read(VALUE self, VALUE rinput, VALUE rfield_infos)
159
- {
160
- IndexBuffer *input;
161
- int tlen, start, length, fnum;
162
- GET_TB;
163
- Data_Get_Struct(rinput, IndexBuffer, input);
164
-
165
- start = frt_read_vint(rinput, input);
166
- length = frt_read_vint(rinput, input);
167
- tlen = start + length;
168
- REALLOC_N(tb->text, char, tlen+1);
169
-
170
- frt_read_chars(rinput, tb->text, start, length);
171
- fnum = frt_read_vint(rinput, input);
172
- if (fnum < 0) {
173
- tb->field = rb_str_new("", 0);
174
- } else {
175
- tb->field = rb_ivar_get(
176
- rb_ary_entry(rb_ivar_get(rfield_infos, id_field_array), fnum),
177
- id_field_name);
178
- }
179
-
180
- tb->tlen = tlen;
181
- return Qnil;
182
- }
183
-
184
- static VALUE
185
- frt_termbuffer_hash(VALUE self)
186
- {
187
- GET_TB;
188
- return INT2FIX(frt_hash(tb->text, tb->tlen) +
189
- frt_hash(RSTRING(tb->field)->ptr, RSTRING(tb->field)->len));
190
- }
191
-
192
- /****************************************************************************
193
- *
194
- * Init Function
195
- *
196
- ****************************************************************************/
197
-
198
-
199
- void
200
- Init_term_buffer(void) {
201
- /* IDs */
202
- id_field_name = rb_intern("@name");
203
- id_field_array = rb_intern("@fi_array");
204
-
205
- /* TermBuffer */
206
- cTermBuffer = rb_define_class_under(mIndex, "TermBuffer", rb_cObject);
207
- rb_define_alloc_func(cTermBuffer, frt_termbuffer_alloc);
208
- rb_include_module(cTermBuffer, rb_mComparable);
209
-
210
- /* Methods */
211
- rb_define_method(cTermBuffer, "initialize", frt_termbuffer_init, 0);
212
- rb_define_method(cTermBuffer, "initialize_copy", frt_termbuffer_init_copy, 1);
213
- rb_define_method(cTermBuffer, "text", frt_termbuffer_get_text, 0);
214
- rb_define_method(cTermBuffer, "field", frt_termbuffer_get_field_name, 0);
215
- rb_define_method(cTermBuffer, "text_length", frt_termbuffer_get_text_length, 0);
216
- rb_define_method(cTermBuffer, "<=>", frt_termbuffer_compare_to, 1);
217
- rb_define_method(cTermBuffer, "<", frt_termbuffer_lt, 1);
218
- rb_define_method(cTermBuffer, ">", frt_termbuffer_gt, 1);
219
- rb_define_method(cTermBuffer, "<=", frt_termbuffer_le, 1);
220
- rb_define_method(cTermBuffer, ">=", frt_termbuffer_ge, 1);
221
- rb_define_method(cTermBuffer, "eql?", frt_termbuffer_eq, 1);
222
- rb_define_method(cTermBuffer, "==", frt_termbuffer_eq, 1);
223
- rb_define_method(cTermBuffer, "hash", frt_termbuffer_hash, 0);
224
- rb_define_method(cTermBuffer, "read", frt_termbuffer_read, 2);
225
- rb_define_method(cTermBuffer, "reset", frt_termbuffer_reset, 0);
226
- rb_define_method(cTermBuffer, "to_term", frt_termbuffer_to_term, 0);
227
- rb_define_method(cTermBuffer, "term", frt_termbuffer_to_term, 0);
228
- rb_define_method(cTermBuffer, "term=", frt_termbuffer_init_copy, 1);
229
- rb_define_method(cTermBuffer, "set!", frt_termbuffer_init_copy, 1);
230
- }
@@ -1,54 +0,0 @@
1
- #include "ferret.h"
2
-
3
- static ID frt_id_index_terms;
4
- /****************************************************************************
5
- *
6
- * TermInfosReader Methods
7
- *
8
- ****************************************************************************/
9
-
10
- static VALUE
11
- frt_tir_get_index_offset(VALUE self, VALUE rterm)
12
- {
13
- VALUE index_terms = rb_ivar_get(self, frt_id_index_terms);
14
-
15
- register int lo = 0; // binary search @index_terms[]
16
- register int hi = RARRAY(index_terms)->len - 1;
17
- register int mid, delta;
18
-
19
- Term *term, *tmp_term;
20
- Data_Get_Struct(rterm, Term, term);
21
-
22
- while (hi >= lo) {
23
- mid = (lo + hi) >> 1;
24
-
25
- Data_Get_Struct(RARRAY(index_terms)->ptr[mid], Term, tmp_term);
26
- delta = frt_term_cmp(term, tmp_term);
27
- if (delta < 0) {
28
- hi = mid - 1;
29
- } else if (delta > 0) {
30
- lo = mid + 1;
31
- } else {
32
- return INT2FIX(mid);
33
- }
34
- }
35
- return INT2FIX(hi);
36
- }
37
-
38
- /****************************************************************************
39
- *
40
- * Init Function
41
- *
42
- ****************************************************************************/
43
-
44
- void
45
- Init_term_infos_reader(void)
46
- {
47
- /* IDs */
48
- frt_id_index_terms = rb_intern("@index_terms");
49
-
50
- /* TermInfosReader */
51
- cTermInfosReader = rb_define_class_under(mIndex, "TermInfosReader", rb_cObject);
52
-
53
- rb_define_method(cTermInfosReader, "get_index_offset", frt_tir_get_index_offset, 1);
54
- }
data/ext/terminfo.c DELETED
@@ -1,160 +0,0 @@
1
- #include "ferret.h"
2
-
3
-
4
- /****************************************************************************
5
- *
6
- * TermInfo Methods
7
- *
8
- ****************************************************************************/
9
-
10
- void
11
- frt_ti_free(void *p)
12
- {
13
- free(p);
14
- }
15
-
16
- static VALUE
17
- frt_ti_alloc(VALUE klass)
18
- {
19
- TermInfo *ti = (TermInfo *)ALLOC(TermInfo);
20
- VALUE rbuffer = Data_Wrap_Struct(klass, NULL, frt_ti_free, ti);
21
- return rbuffer;
22
- }
23
-
24
- #define GET_TI TermInfo *ti; Data_Get_Struct(self, TermInfo, ti)
25
- inline VALUE
26
- frt_ti_set(int argc, VALUE *argv, VALUE self)
27
- {
28
- VALUE df, fp, pp, so;
29
- GET_TI;
30
- MEMZERO(ti, TermInfo, 1);
31
- rb_scan_args(argc, argv, "04", &df, &fp, &pp, &so);
32
- switch (argc) {
33
- case 4:
34
- ti->skip_offset = FIX2INT(so);
35
- case 3:
36
- ti->prox_pointer = FIX2INT(pp);
37
- case 2:
38
- ti->freq_pointer = FIX2INT(fp);
39
- case 1:
40
- ti->doc_freq = FIX2INT(df);
41
- case 0:
42
- break;
43
- }
44
- return Qnil;
45
- }
46
-
47
- static VALUE
48
- frt_ti_init(int argc, VALUE *argv, VALUE self)
49
- {
50
- frt_ti_set(argc, argv, self);
51
- return self;
52
- }
53
-
54
- static VALUE
55
- frt_ti_init_copy(VALUE self, VALUE rother)
56
- {
57
- TermInfo *other_ti;
58
- GET_TI;
59
- Data_Get_Struct(rother, TermInfo, other_ti);
60
- MEMCPY(ti, other_ti, TermInfo, 1);
61
- return self;
62
- }
63
-
64
- static VALUE
65
- frt_ti_eql(VALUE self, VALUE rother)
66
- {
67
- TermInfo *other_ti;
68
- GET_TI;
69
- if (NIL_P(rother)) return Qfalse;
70
- Data_Get_Struct(rother, TermInfo, other_ti);
71
- return (MEMCMP(ti, other_ti, TermInfo, 1) == 0) ? Qtrue : Qfalse;
72
- }
73
-
74
- static VALUE
75
- frt_ti_get_df(VALUE self)
76
- {
77
- GET_TI;
78
- return INT2FIX(ti->doc_freq);
79
- }
80
-
81
- static VALUE
82
- frt_ti_get_fp(VALUE self)
83
- {
84
- GET_TI;
85
- return INT2FIX(ti->freq_pointer);
86
- }
87
-
88
- static VALUE
89
- frt_ti_get_pp(VALUE self)
90
- {
91
- GET_TI;
92
- return INT2FIX(ti->prox_pointer);
93
- }
94
-
95
- static VALUE
96
- frt_ti_get_so(VALUE self)
97
- {
98
- GET_TI;
99
- return INT2FIX(ti->skip_offset);
100
- }
101
-
102
- static VALUE
103
- frt_ti_set_df(VALUE self, VALUE val)
104
- {
105
- GET_TI;
106
- ti->doc_freq = FIX2INT(val);
107
- return Qnil;
108
- }
109
-
110
- static VALUE
111
- frt_ti_set_fp(VALUE self, VALUE val)
112
- {
113
- GET_TI;
114
- ti->freq_pointer = FIX2INT(val);
115
- return Qnil;
116
- }
117
-
118
- static VALUE
119
- frt_ti_set_pp(VALUE self, VALUE val)
120
- {
121
- GET_TI;
122
- ti->prox_pointer = FIX2INT(val);
123
- return Qnil;
124
- }
125
-
126
- static VALUE
127
- frt_ti_set_so(VALUE self, VALUE val)
128
- {
129
- GET_TI;
130
- ti->skip_offset = FIX2INT(val);
131
- return Qnil;
132
- }
133
-
134
- /****************************************************************************
135
- *
136
- * Init Function
137
- *
138
- ****************************************************************************/
139
-
140
- void
141
- Init_term_info(void)
142
- {
143
- /* TermInfo */
144
- cTermInfo = rb_define_class_under(mIndex, "TermInfo", rb_cObject);
145
- rb_define_alloc_func(cTermInfo, frt_ti_alloc);
146
-
147
- rb_define_method(cTermInfo, "initialize", frt_ti_init, -1);
148
- rb_define_method(cTermInfo, "set_values!", frt_ti_set, -1);
149
- rb_define_method(cTermInfo, "initialize_copy", frt_ti_init_copy, 1);
150
- rb_define_method(cTermInfo, "set!", frt_ti_init_copy, 1);
151
- rb_define_method(cTermInfo, "==", frt_ti_eql, 1);
152
- rb_define_method(cTermInfo, "doc_freq", frt_ti_get_df, 0);
153
- rb_define_method(cTermInfo, "doc_freq=", frt_ti_set_df, 1);
154
- rb_define_method(cTermInfo, "freq_pointer", frt_ti_get_fp, 0);
155
- rb_define_method(cTermInfo, "freq_pointer=", frt_ti_set_fp, 1);
156
- rb_define_method(cTermInfo, "prox_pointer", frt_ti_get_pp, 0);
157
- rb_define_method(cTermInfo, "prox_pointer=", frt_ti_set_pp, 1);
158
- rb_define_method(cTermInfo, "skip_offset", frt_ti_get_so, 0);
159
- rb_define_method(cTermInfo, "skip_offset=", frt_ti_set_so, 1);
160
- }
data/ext/token.c DELETED
@@ -1,93 +0,0 @@
1
- #include "ferret.h"
2
-
3
- /****************************************************************************
4
- *
5
- * Token Methods
6
- *
7
- ****************************************************************************/
8
-
9
- ID id_tk_text, id_tk_pos_inc, id_tk_start_offset, id_tk_end_offset, id_tk_type;
10
- ID id_tk_pos_inc_set;
11
-
12
- static VALUE
13
- frt_token_pos_inc (VALUE self, VALUE pI)
14
- {
15
- if(FIX2INT(pI) < 0)
16
- rb_raise(rb_eArgError, "position_increment < 0");
17
- rb_ivar_set(self, id_tk_pos_inc, pI);
18
- return self;
19
- }
20
-
21
- static VALUE
22
- frt_token_init(int argc, VALUE *argv, VALUE self)
23
- {
24
- VALUE text, start_offset, end_offset, type, pos_inc;
25
- rb_scan_args(argc, argv, "32", &text,
26
- &start_offset, &end_offset, &type, &pos_inc);
27
- rb_ivar_set(self, id_tk_text, text);
28
- rb_ivar_set(self, id_tk_start_offset, start_offset);
29
- rb_ivar_set(self, id_tk_end_offset, end_offset);
30
- if (argc < 4) {
31
- rb_ivar_set(self, id_tk_type, rb_str_new("word", 4));
32
- } else {
33
- rb_ivar_set(self, id_tk_type, type);
34
- }
35
- if (argc < 5) {
36
- rb_ivar_set(self, id_tk_pos_inc, INT2FIX(1));
37
- } else {
38
- rb_ivar_set(self, id_tk_pos_inc, pos_inc);
39
- }
40
- return self;
41
- }
42
-
43
- static VALUE
44
- frt_token_eql(VALUE self, VALUE other)
45
- {
46
- VALUE rself_text, rother_text;
47
- char *self_text, *other_text;
48
- if (!rb_respond_to(other, id_tk_pos_inc_set))
49
- return Qfalse;
50
- rself_text = rb_ivar_get(self, id_tk_text);
51
- rother_text = rb_ivar_get(other, id_tk_text);
52
- self_text = StringValuePtr(rself_text);
53
- other_text = StringValuePtr(rother_text);
54
- if (rb_ivar_get(self, id_tk_start_offset) == rb_ivar_get(other, id_tk_start_offset) &&
55
- rb_ivar_get(self, id_tk_end_offset) == rb_ivar_get(other, id_tk_end_offset) &&
56
- (strcmp(self_text, other_text) == 0))
57
- return Qtrue;
58
- else
59
- return Qfalse;
60
- }
61
-
62
- /****************************************************************************
63
- *
64
- * Init Function
65
- *
66
- ****************************************************************************/
67
-
68
- void
69
- Init_token(void)
70
- {
71
- /* IDs */
72
- id_tk_text = rb_intern("@term_text");
73
- id_tk_start_offset = rb_intern("@start_offset");
74
- id_tk_end_offset = rb_intern("@end_offset");
75
- id_tk_type = rb_intern("@type");
76
- id_tk_pos_inc = rb_intern("@position_increment");
77
- id_tk_pos_inc_set = rb_intern("position_increment=");
78
-
79
-
80
- /* IndexWriter */
81
- cToken = rb_define_class_under(mAnalysis, "Token", rb_cObject);
82
-
83
- rb_define_method(cToken, "initialize", frt_token_init, -1);
84
- rb_define_method(cToken, "position_increment=", frt_token_pos_inc, 1);
85
- rb_define_method(cToken, "==", frt_token_eql, 1);
86
- rb_define_method(cToken, "eql", frt_token_eql, 1);
87
-
88
- rb_define_attr(cToken, "term_text", 1, 1);
89
- rb_define_attr(cToken, "position_increment", 1, 0);
90
- rb_define_attr(cToken, "start_offset", 1, 0);
91
- rb_define_attr(cToken, "end_offset", 1, 0);
92
- rb_define_attr(cToken, "type", 1, 1);
93
- }