ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/index_io.c CHANGED
@@ -1,523 +1,377 @@
1
- #include "ferret.h"
1
+ #include <store.h>
2
+ #include <string.h>
2
3
 
3
- ID frt_length, frt_flush_buffer, frt_read_internal, frt_seek_internal;
4
-
5
- /****************************************************************************
6
- *
7
- * BufferIndexInput Methods
8
- *
9
- ****************************************************************************/
10
-
11
- void
12
- frt_indexbuffer_free(void *p)
4
+ Buffer *buf_create()
13
5
  {
14
- IndexBuffer *my_buf = (IndexBuffer *)p;
15
- free(my_buf->buffer);
16
- free(p);
6
+ Buffer *buf = ALLOC(Buffer);
7
+ buf->start = 0;
8
+ buf->pos = 0;
9
+ buf->len = 0;
10
+ return buf;
17
11
  }
18
12
 
19
- static VALUE
20
- frt_indexbuffer_alloc(VALUE klass)
13
+ void buf_destroy(Buffer *buf)
21
14
  {
22
- IndexBuffer *my_buf;
23
-
24
- my_buf = ALLOC(IndexBuffer);
25
- MEMZERO(my_buf, IndexBuffer, 1);
26
- my_buf->buffer = ALLOC_N(byte_t, BUFFER_SIZE);
27
-
28
- return Data_Wrap_Struct(klass, NULL, frt_indexbuffer_free, my_buf);
15
+ free(buf);
29
16
  }
30
17
 
31
- #define GET_MY_BUF IndexBuffer *my_buf; Data_Get_Struct(self, IndexBuffer, my_buf)
32
- static VALUE
33
- frt_indexin_init_copy(VALUE self, VALUE orig)
18
+ OutStream *os_create()
34
19
  {
35
- IndexBuffer *orig_buf;
36
- int len;
37
- GET_MY_BUF;
38
- if (self == orig)
39
- return self;
40
-
41
- Data_Get_Struct(orig, IndexBuffer, orig_buf);
42
-
43
- len = orig_buf->len;
44
- my_buf->len = len;
45
- my_buf->pos = orig_buf->pos;
46
- my_buf->start = orig_buf->start;
47
-
48
- MEMCPY(my_buf->buffer, orig_buf->buffer, byte_t, len);
49
-
50
- return self;
20
+ OutStream *os = ALLOC(OutStream);
21
+ os->buf.start = 0;
22
+ os->buf.pos = 0;
23
+ os->buf.len = 0;
24
+ return os;
51
25
  }
52
26
 
53
- static VALUE
54
- frt_indexin_refill(VALUE self)
27
+ inline void os_flush(OutStream *os)
55
28
  {
56
- long start;
57
- VALUE rStr;
58
- int stop, len_to_read;
59
- int input_len = FIX2INT(rb_funcall(self, frt_length, 0, NULL));
60
- GET_MY_BUF;
61
-
62
- start = my_buf->start + my_buf->pos;
63
- stop = start + BUFFER_SIZE;
64
- if (stop > input_len) {
65
- stop = input_len;
66
- }
67
-
68
- len_to_read = stop - start;
69
- if (len_to_read <= 0) {
70
- rb_raise(rb_eEOFError, "IndexInput: Read past End of File");
71
- }
72
-
73
- rStr = rb_str_new((char *)my_buf->buffer, BUFFER_SIZE);
74
- rb_funcall(self, frt_read_internal, 3,
75
- rStr, INT2FIX(0), INT2FIX(len_to_read));
76
-
77
- memcpy(my_buf->buffer, RSTRING(rStr)->ptr, BUFFER_SIZE);
78
- /* my_buf->buffer = StringValuePtr(rStr); */
79
-
80
- my_buf->len = len_to_read;
81
- my_buf->start = start;
82
- my_buf->pos = 0;
83
-
84
- return Qnil;
29
+ os->flush_internal(os, os->buf.buf, os->buf.pos);
30
+ os->buf.start += os->buf.pos;
31
+ os->buf.pos = 0;
85
32
  }
86
33
 
87
- static inline byte_t
88
- frt_read_byte(VALUE self, IndexBuffer *my_buf)
34
+ void os_close(OutStream *os)
89
35
  {
90
- if (my_buf->pos >= my_buf->len)
91
- frt_indexin_refill(self);
92
- return my_buf->buffer[my_buf->pos++];
36
+ os_flush(os);
37
+ os->close_internal(os);
38
+ free(os);
93
39
  }
94
40
 
95
- static VALUE
96
- frt_indexin_read_byte(VALUE self)
97
- {
98
- GET_MY_BUF;
99
- return INT2FIX(frt_read_byte(self, my_buf));
100
- }
101
-
102
- static VALUE
103
- frt_indexin_pos(VALUE self)
41
+ int os_pos(OutStream *os)
104
42
  {
105
- IndexBuffer *my_buf;
106
- Data_Get_Struct(self, IndexBuffer, my_buf);
107
- return INT2FIX(my_buf->start + my_buf->pos);
43
+ return os->buf.start + os->buf.pos;
108
44
  }
109
45
 
110
- static VALUE
111
- frt_read_bytes(VALUE self, VALUE rbuffer, int offset, int len)
46
+ void os_seek(OutStream *os, int new_pos)
112
47
  {
113
- int i;
114
- VALUE rbuf = StringValue(rbuffer);
115
-
116
- GET_MY_BUF;
117
-
118
- if (RSTRING(rbuf)->len < (offset + len)) {
119
- rb_str_resize(rbuf, offset + len);
120
- }
121
- if ((len + offset) < BUFFER_SIZE) {
122
- rb_str_modify(rbuf);
123
- for (i = offset; i < offset + len; i++) {
124
- RSTRING(rbuf)->ptr[i] = frt_read_byte(self, my_buf);
125
- }
126
- } else {
127
- VALUE start = frt_indexin_pos(self);
128
- rb_funcall(self, frt_seek_internal, 1, start);
129
- rb_funcall(self, frt_read_internal, 3,
130
- rbuf, INT2FIX(offset), INT2FIX(len));
131
-
132
- my_buf->start = my_buf->start + len;
133
- my_buf->pos = 0;
134
- my_buf->len = 0; /* trigger refill() on read() */
135
- }
136
-
137
- return rbuf;
48
+ os_flush(os);
49
+ os->buf.start = new_pos;
50
+ os->seek_internal(os, new_pos);
138
51
  }
139
52
 
140
- static VALUE
141
- frt_indexin_read_bytes(VALUE self, VALUE rbuf, VALUE roffset, VALUE rlen)
142
- {
143
- int len, offset;
53
+ #define write_byte(os, b) os->buf.buf[os->buf.pos++] = b
144
54
 
145
- len = FIX2INT(rlen);
146
- offset = FIX2INT(roffset);
147
-
148
- return frt_read_bytes(self, rbuf, offset, len);
55
+ inline void os_write_byte(OutStream *os, uchar b)
56
+ {
57
+ if (os->buf.pos >= BUFFER_SIZE)
58
+ os_flush(os);
59
+ write_byte(os, b);
149
60
  }
150
61
 
151
- VALUE
152
- frt_indexin_seek(VALUE self, VALUE rpos)
62
+ void os_write_bytes(OutStream *os, uchar *b, int len)
153
63
  {
154
- int pos = FIX2INT(rpos);
155
-
156
- GET_MY_BUF;
64
+ if (os->buf.pos > 0) // flush buffer
65
+ os_flush(os);
157
66
 
158
- if ((pos >= my_buf->start) && (pos < (my_buf->start + my_buf->len))) {
159
- my_buf->pos = pos - my_buf->start; /* seek within buffer */
67
+ if (len < BUFFER_SIZE) {
68
+ os->flush_internal(os, b, len);
69
+ os->buf.start += len;
160
70
  } else {
161
- my_buf->start = pos;
162
- my_buf->pos = 0;
163
- my_buf->len = 0; /* trigger refill() on read() */
164
- rb_funcall(self, frt_seek_internal, 1, rpos);
71
+ int pos = 0;
72
+ int size;
73
+ while (pos < len) {
74
+ if (len - pos < BUFFER_SIZE) {
75
+ size = len - pos;
76
+ } else {
77
+ size = BUFFER_SIZE;
78
+ }
79
+ os->flush_internal(os, b + pos, size);
80
+ pos += size;
81
+ os->buf.start += size;
82
+ }
165
83
  }
166
- return Qnil;
167
84
  }
168
85
 
169
- VALUE
170
- frt_indexin_read_int(VALUE self)
86
+ InStream *is_create()
171
87
  {
172
- GET_MY_BUF;
173
- return LONG2NUM(((int)frt_read_byte(self, my_buf) << 24) |
174
- ((int)frt_read_byte(self, my_buf) << 16) |
175
- ((int)frt_read_byte(self, my_buf) << 8) |
176
- (int)frt_read_byte(self, my_buf));
88
+ InStream *is = ALLOC(InStream);
89
+ is->buf.start = 0;
90
+ is->buf.pos = 0;
91
+ is->buf.len = 0;
92
+ return is;
177
93
  }
178
94
 
179
- VALUE
180
- frt_indexin_read_long(VALUE self)
95
+ void is_refill(InStream *is)
181
96
  {
182
- GET_MY_BUF;
183
- return LL2NUM(((long long)frt_read_byte(self, my_buf) << 56) |
184
- ((long long)frt_read_byte(self, my_buf) << 48) |
185
- ((long long)frt_read_byte(self, my_buf) << 40) |
186
- ((long long)frt_read_byte(self, my_buf) << 32) |
187
- ((long long)frt_read_byte(self, my_buf) << 24) |
188
- ((long long)frt_read_byte(self, my_buf) << 16) |
189
- ((long long)frt_read_byte(self, my_buf) << 8) |
190
- (long long)frt_read_byte(self, my_buf));
191
- }
97
+ int start = is->buf.start + is->buf.pos;
98
+ int last = start + BUFFER_SIZE;
99
+ int flen = is->length_internal(is);
100
+ if (last > flen) // don't read past EOF
101
+ last = flen;
192
102
 
193
- static VALUE
194
- frt_indexin_read_uint(VALUE self)
195
- {
196
- GET_MY_BUF;
197
- return ULONG2NUM(((unsigned int)frt_read_byte(self, my_buf) << 24) |
198
- ((unsigned int)frt_read_byte(self, my_buf) << 16) |
199
- ((unsigned int)frt_read_byte(self, my_buf) << 8) |
200
- (unsigned int)frt_read_byte(self, my_buf));
201
- }
103
+ is->buf.len = last - start;
104
+ if (is->buf.len <= 0) {
105
+ eprintf(IO_ERROR, "EOF Error when trying to refill. flen was %d\n", flen);
106
+ }
202
107
 
203
- static VALUE
204
- frt_indexin_read_ulong(VALUE self)
205
- {
206
- GET_MY_BUF;
207
- return ULL2NUM(((unsigned long long)frt_read_byte(self, my_buf) << 56) |
208
- ((unsigned long long)frt_read_byte(self, my_buf) << 48) |
209
- ((unsigned long long)frt_read_byte(self, my_buf) << 40) |
210
- ((unsigned long long)frt_read_byte(self, my_buf) << 32) |
211
- ((unsigned long long)frt_read_byte(self, my_buf) << 24) |
212
- ((unsigned long long)frt_read_byte(self, my_buf) << 16) |
213
- ((unsigned long long)frt_read_byte(self, my_buf) << 8) |
214
- (unsigned long long)frt_read_byte(self, my_buf));
108
+ is->read_internal(is, is->buf.buf, 0, is->buf.len);
109
+
110
+ is->buf.start = start;
111
+ is->buf.pos = 0;
215
112
  }
216
113
 
217
- unsigned long long
218
- frt_read_vint(VALUE self, IndexBuffer *my_buf)
114
+ #define read_byte(is) is->buf.buf[is->buf.pos++]
115
+ inline uchar is_read_byte(InStream *is)
219
116
  {
220
- register unsigned long long i, b;
221
- register int shift = 7;
117
+ if (is->buf.pos >= is->buf.len)
118
+ is_refill(is);
222
119
 
223
- b = frt_read_byte(self, my_buf);
224
- i = b & 0x7F; /* 0x7F = 0b01111111 */
225
-
226
- while ((b & 0x80) != 0) {/* 0x80 = 0b10000000 */
227
- b = frt_read_byte(self, my_buf);
228
- i |= (b & 0x7F) << shift;
229
- shift += 7;
230
- }
231
-
232
- return i;
120
+ return read_byte(is);
233
121
  }
234
122
 
235
- static VALUE
236
- frt_indexin_read_vint(VALUE self)
123
+ int is_pos(InStream *is)
237
124
  {
238
- GET_MY_BUF;
239
- return ULL2NUM(frt_read_vint(self, my_buf));
125
+ return is->buf.start + is->buf.pos;
240
126
  }
241
127
 
242
- void
243
- frt_read_chars(VALUE self, char* buffer, int off, int len)
128
+ uchar *is_read_bytes(InStream *is, uchar *b, int offset, int len)
244
129
  {
245
- /* byte_t b, b1, b2; */
246
- int end, i;
247
-
248
- GET_MY_BUF;
249
-
250
- end = off + len;
251
-
252
-
253
- for(i = off; i < end; i++) {
254
- buffer[i] = frt_read_byte(self, my_buf);
130
+ int i, start;
131
+ if ((offset + len) < BUFFER_SIZE) {
132
+ for (i = offset; i < offset + len; i++) {
133
+ b[i] = is_read_byte(is);
134
+ }
135
+ } else { // read all-at-once
136
+ start = is_pos(is);
137
+ is->seek_internal(is, start);
138
+ is->read_internal(is, b, offset, len);
139
+
140
+ is->buf.start = start + len; // adjust stream variables
141
+ is->buf.pos = 0;
142
+ is->buf.len = 0; // trigger refill on read
255
143
  }
144
+ return b;
256
145
  }
257
146
 
258
- static VALUE
259
- frt_indexin_read_string(VALUE self)
147
+ void is_seek(InStream *is, int pos)
260
148
  {
261
- int length;
262
- char *str;
263
- GET_MY_BUF;
264
- length = (int)frt_read_vint(self, my_buf);
265
- str = ALLOC_N(char, length);
266
-
267
- frt_read_chars(self, str, 0, length);
268
-
269
- return rb_str_new(str, length);
149
+ if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
150
+ is->buf.pos = pos - is->buf.start; // seek within buffer
151
+ } else {
152
+ is->buf.start = pos;
153
+ is->buf.pos = 0;
154
+ is->buf.len = 0; // trigger refill() on read()
155
+ is->seek_internal(is, pos);
156
+ }
270
157
  }
271
158
 
272
- /****************************************************************************
273
- *
274
- * BufferIndexInput Methods
275
- *
276
- ****************************************************************************/
277
-
278
- static VALUE
279
- frt_indexout_flush(VALUE self)
159
+ void is_close(InStream *is)
280
160
  {
281
- GET_MY_BUF;
282
-
283
- rb_funcall(self, frt_flush_buffer, 2,
284
- rb_str_new((char *)my_buf->buffer, BUFFER_SIZE), INT2FIX(my_buf->pos));
285
-
286
- my_buf->start += my_buf->pos;
287
- my_buf->pos = 0;
288
-
289
- return Qnil;
161
+ is->close_internal(is);
162
+ free(is);
290
163
  }
291
164
 
292
- static VALUE
293
- frt_write_byte(VALUE self, byte_t b)
165
+ InStream *is_clone(InStream *is)
294
166
  {
295
- GET_MY_BUF;
296
-
297
- my_buf->buffer[my_buf->pos++] = b;
298
-
299
- if (my_buf->pos >= BUFFER_SIZE)
300
- frt_indexout_flush(self);
301
- return Qnil;
167
+ InStream *new_index_i = ALLOC(InStream);
168
+ memcpy(new_index_i, is, sizeof(InStream));
169
+ //new_index_i->buf.start = is->buf.start;
170
+ //new_index_i->buf.pos = is->buf.pos;
171
+ //new_index_i->buf.len = is->buf.len;
172
+ //new_index_i->file = is->file;
173
+ //new_index_i->d = is->d;
174
+ //new_index_i->read_internal = is->read_internal;
175
+ //new_index_i->seek_internal = is->seek_internal;
176
+ //new_index_i->length_internal = is->length_internal;
177
+ //new_index_i->clone_internal = is->clone_internal;
178
+ //new_index_i->close_internal = is->close_internal;
179
+ new_index_i->is_clone = true;
180
+ is->clone_internal(is, new_index_i);
181
+ return new_index_i;
302
182
  }
303
183
 
304
- static VALUE
305
- frt_indexout_write_byte(VALUE self, VALUE rbyte)
184
+ int
185
+ is_read_int(InStream *is)
306
186
  {
307
- byte_t b = (byte_t)FIX2INT(rbyte);
308
- frt_write_byte(self, b);
309
- return Qnil;
187
+ return ((int)is_read_byte(is) << 24) |
188
+ ((int)is_read_byte(is) << 16) |
189
+ ((int)is_read_byte(is) << 8) |
190
+ (int)is_read_byte(is);
310
191
  }
311
192
 
312
- void
313
- frt_write_bytes(VALUE self, byte_t *buf, int len)
193
+ long long
194
+ is_read_long(InStream *is)
314
195
  {
315
- int i;
316
- for (i = 0; i < len; i++)
317
- frt_write_byte(self, buf[i]);
196
+ return ((long long)is_read_byte(is) << 56) |
197
+ ((long long)is_read_byte(is) << 48) |
198
+ ((long long)is_read_byte(is) << 40) |
199
+ ((long long)is_read_byte(is) << 32) |
200
+ ((long long)is_read_byte(is) << 24) |
201
+ ((long long)is_read_byte(is) << 16) |
202
+ ((long long)is_read_byte(is) << 8) |
203
+ (long long)is_read_byte(is);
318
204
  }
319
205
 
320
- static VALUE
321
- frt_indexout_write_bytes(VALUE self, VALUE rbuffer, VALUE rlen)
206
+ unsigned int
207
+ is_read_uint(InStream *is)
322
208
  {
323
- int len = FIX2INT(rlen);
324
- int i;
325
- VALUE rbuf = StringValue(rbuffer);
326
-
327
- for (i = 0; i < len; i++)
328
- frt_write_byte(self, RSTRING(rbuf)->ptr[i]);
329
-
330
- return Qnil;
209
+ return ((unsigned int)is_read_byte(is) << 24) |
210
+ ((unsigned int)is_read_byte(is) << 16) |
211
+ ((unsigned int)is_read_byte(is) << 8) |
212
+ (unsigned int)is_read_byte(is);
331
213
  }
332
214
 
333
- static VALUE
334
- frt_indexout_pos(VALUE self)
215
+ unsigned long long
216
+ is_read_ulong(InStream *is)
335
217
  {
336
- GET_MY_BUF;
337
- return INT2FIX(my_buf->start + my_buf->pos);
218
+ return ((unsigned long long)is_read_byte(is) << 56) |
219
+ ((unsigned long long)is_read_byte(is) << 48) |
220
+ ((unsigned long long)is_read_byte(is) << 40) |
221
+ ((unsigned long long)is_read_byte(is) << 32) |
222
+ ((unsigned long long)is_read_byte(is) << 24) |
223
+ ((unsigned long long)is_read_byte(is) << 16) |
224
+ ((unsigned long long)is_read_byte(is) << 8) |
225
+ (unsigned long long)is_read_byte(is);
338
226
  }
339
227
 
340
- static VALUE
341
- frt_indexout_seek(VALUE self, VALUE pos)
228
+ /* optimized to use unchecked read_byte if there is definitely space */
229
+ inline unsigned long long
230
+ is_read_vint(InStream *is)
342
231
  {
343
- GET_MY_BUF;
344
-
345
- frt_indexout_flush(self);
346
- my_buf->start = FIX2INT(pos);
232
+ register unsigned long long res, b;
233
+ register int shift = 7;
347
234
 
348
- return Qnil;
235
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
236
+ b = is_read_byte(is);
237
+ res = b & 0x7F; // 0x7F = 0b01111111
238
+
239
+ while ((b & 0x80) != 0) {// 0x80 = 0b10000000
240
+ b = is_read_byte(is);
241
+ res |= (b & 0x7F) << shift;
242
+ shift += 7;
243
+ }
244
+ } else { // unchecked
245
+ b = read_byte(is);
246
+ res = b & 0x7F; // 0x7F = 0b01111111
247
+
248
+ while ((b & 0x80) != 0) {// 0x80 = 0b10000000
249
+ b = read_byte(is);
250
+ res |= (b & 0x7F) << shift;
251
+ shift += 7;
252
+ }
253
+ }
254
+
255
+ return res;
349
256
  }
350
257
 
351
- static VALUE
352
- frt_indexout_write_int(VALUE self, VALUE rint)
258
+ inline void
259
+ is_read_chars(InStream *is, char* buffer, int off, int len)
353
260
  {
354
- long l = NUM2LONG(rint);
355
- frt_write_byte(self, (l >> 24) & 0xFF);
356
- frt_write_byte(self, (l >> 16) & 0xFF);
357
- frt_write_byte(self, (l >> 8) & 0xFF);
358
- frt_write_byte(self, l & 0xFF);
359
-
360
- return Qnil;
361
- }
261
+ int end, i;
362
262
 
363
- static VALUE
364
- frt_indexout_write_long(VALUE self, VALUE rlong)
365
- {
366
- long long l = NUM2LL(rlong);
367
- frt_write_byte(self, (l >> 56) & 0xFF);
368
- frt_write_byte(self, (l >> 48) & 0xFF);
369
- frt_write_byte(self, (l >> 40) & 0xFF);
370
- frt_write_byte(self, (l >> 32) & 0xFF);
371
- frt_write_byte(self, (l >> 24) & 0xFF);
372
- frt_write_byte(self, (l >> 16) & 0xFF);
373
- frt_write_byte(self, (l >> 8) & 0xFF);
374
- frt_write_byte(self, l & 0xFF);
263
+ end = off + len;
375
264
 
376
- return Qnil;
265
+ for(i = off; i < end; i++) {
266
+ buffer[i] = is_read_byte(is);
267
+ }
377
268
  }
378
269
 
379
- static VALUE
380
- frt_indexout_write_uint(VALUE self, VALUE ruint)
270
+ char *
271
+ is_read_string(InStream *is)
381
272
  {
382
- unsigned long l = NUM2ULONG(ruint);
383
- frt_write_byte(self, (l >> 24) & 0xFF);
384
- frt_write_byte(self, (l >> 16) & 0xFF);
385
- frt_write_byte(self, (l >> 8) & 0xFF);
386
- frt_write_byte(self, l & 0xFF);
273
+ register int length = (int)is_read_vint(is);
274
+ char *str = ALLOC_N(char, length + 1);
275
+ str[length] = '\0';
387
276
 
388
- return Qnil;
277
+ if (is->buf.pos > (is->buf.len - length)) {
278
+ register int i;
279
+ for(i = 0; i < length; i++) {
280
+ str[i] = is_read_byte(is);
281
+ }
282
+ } else { // unchecked
283
+ memcpy(str, is->buf.buf + is->buf.pos, length);
284
+ is->buf.pos += length;
285
+ }
286
+ //is_read_chars(is, str, 0, length);
287
+
288
+ return str;
389
289
  }
390
290
 
391
- static VALUE
392
- frt_indexout_write_ulong(VALUE self, VALUE rulong)
291
+ void
292
+ os_write_int(OutStream *os, int l)
393
293
  {
394
- unsigned long long l;
395
- l = rb_num2ull(rulong); /* ruby 1.8 doesn't have NUM2ULL. Added in 1.9 */
396
- frt_write_byte(self, (l >> 56) & 0xFF);
397
- frt_write_byte(self, (l >> 48) & 0xFF);
398
- frt_write_byte(self, (l >> 40) & 0xFF);
399
- frt_write_byte(self, (l >> 32) & 0xFF);
400
- frt_write_byte(self, (l >> 24) & 0xFF);
401
- frt_write_byte(self, (l >> 16) & 0xFF);
402
- frt_write_byte(self, (l >> 8) & 0xFF);
403
- frt_write_byte(self, l & 0xFF);
404
-
405
- return Qnil;
294
+ os_write_byte(os, (l >> 24) & 0xFF);
295
+ os_write_byte(os, (l >> 16) & 0xFF);
296
+ os_write_byte(os, (l >> 8) & 0xFF);
297
+ os_write_byte(os, l & 0xFF);
406
298
  }
407
299
 
408
- static VALUE
409
- frt_write_vint(VALUE self, register unsigned long long i)
300
+ void
301
+ os_write_long(OutStream *os, long long l)
410
302
  {
411
- while (i > 127) {
412
- frt_write_byte(self, (i & 0x7f) | 0x80);
413
- i >>= 7;
414
- }
415
- frt_write_byte(self, i);
416
-
417
- return Qnil;
303
+ os_write_byte(os, (l >> 56) & 0xFF);
304
+ os_write_byte(os, (l >> 48) & 0xFF);
305
+ os_write_byte(os, (l >> 40) & 0xFF);
306
+ os_write_byte(os, (l >> 32) & 0xFF);
307
+ os_write_byte(os, (l >> 24) & 0xFF);
308
+ os_write_byte(os, (l >> 16) & 0xFF);
309
+ os_write_byte(os, (l >> 8) & 0xFF);
310
+ os_write_byte(os, l & 0xFF);
418
311
  }
419
312
 
420
- static VALUE
421
- frt_indexout_write_vint(VALUE self, VALUE rulong)
313
+ void
314
+ os_write_uint(OutStream *os, unsigned int l)
422
315
  {
423
- register unsigned long long i = rb_num2ull(rulong);
316
+ os_write_byte(os, (l >> 24) & 0xFF);
317
+ os_write_byte(os, (l >> 16) & 0xFF);
318
+ os_write_byte(os, (l >> 8) & 0xFF);
319
+ os_write_byte(os, l & 0xFF);
320
+ }
424
321
 
425
- while (i > 127) {
426
- frt_write_byte(self, (i & 0x7f) | 0x80);
427
- i >>= 7;
322
+ void
323
+ os_write_ulong(OutStream *os, unsigned long long l)
324
+ {
325
+ os_write_byte(os, (l >> 56) & 0xFF);
326
+ os_write_byte(os, (l >> 48) & 0xFF);
327
+ os_write_byte(os, (l >> 40) & 0xFF);
328
+ os_write_byte(os, (l >> 32) & 0xFF);
329
+ os_write_byte(os, (l >> 24) & 0xFF);
330
+ os_write_byte(os, (l >> 16) & 0xFF);
331
+ os_write_byte(os, (l >> 8) & 0xFF);
332
+ os_write_byte(os, l & 0xFF);
333
+ }
334
+
335
+ /* optimized to use an unchecked write if there is space */
336
+ inline void
337
+ os_write_vint(OutStream *os, register unsigned long long i)
338
+ {
339
+ if (os->buf.pos > VINT_END) {
340
+ while (i > 127) {
341
+ os_write_byte(os, (i & 0x7f) | 0x80);
342
+ i >>= 7;
343
+ }
344
+ os_write_byte(os, i);
345
+ } else {
346
+ while (i > 127) {
347
+ write_byte(os, (i & 0x7f) | 0x80);
348
+ i >>= 7;
349
+ }
350
+ write_byte(os, i);
428
351
  }
429
- frt_write_byte(self, i);
430
-
431
- return Qnil;
432
352
  }
433
353
 
434
- static VALUE
435
- frt_write_chars(VALUE self, VALUE rbuf, int start, int length)
354
+ void
355
+ os_write_chars(OutStream *os, char *buf, int start, int length)
436
356
  {
437
357
  int i;
438
- VALUE rstr = StringValue(rbuf);
439
358
 
440
359
  for (i = start; i < start + length; i++) {
441
- frt_write_byte(self, RSTRING(rstr)->ptr[i]);
360
+ os_write_byte(os, buf[i]);
442
361
  }
443
-
444
- return Qnil;
445
362
  }
446
363
 
447
- static VALUE
448
- frt_indexout_write_chars(VALUE self, VALUE rstr, VALUE rstart, VALUE rlength)
364
+ void
365
+ os_write_string(OutStream *os, char *str)
449
366
  {
450
- int start = FIX2INT(rstart);
451
- int length = FIX2INT(rlength);
367
+ int len = strlen(str);
368
+ os_write_vint(os, len);
452
369
 
453
- return frt_write_chars(self, rstr, start, length);
370
+ os_write_chars(os, str, 0, len);
454
371
  }
455
372
 
456
- static VALUE
457
- frt_indexout_write_string(VALUE self, VALUE rstr)
373
+ int file_is_lock(char *filename)
458
374
  {
459
- int len = RSTRING(StringValue(rstr))->len;
460
- frt_write_vint(self, len);
461
-
462
- frt_write_chars(self, rstr, 0, len);
463
- return Qnil;
464
- }
465
-
466
- /****************************************************************************
467
- *
468
- * Init Function
469
- *
470
- ****************************************************************************/
471
-
472
- void
473
- Init_indexio(void)
474
- {
475
- /* IDs */
476
- frt_length = rb_intern("length");
477
- frt_flush_buffer = rb_intern("flush_buffer");
478
- frt_read_internal = rb_intern("read_internal");
479
- frt_seek_internal = rb_intern("seek_internal");
480
-
481
- /* IndexInput */
482
- cIndexIn = rb_define_class_under(mStore, "IndexInput", rb_cObject);
483
- cBufferedIndexIn = rb_define_class_under(mStore, "BufferedIndexInput", cIndexIn);
484
- rb_define_alloc_func(cBufferedIndexIn, frt_indexbuffer_alloc);
485
-
486
- rb_define_method(cBufferedIndexIn, "initialize_copy", frt_indexin_init_copy, 1);
487
- rb_define_method(cBufferedIndexIn, "refill", frt_indexin_refill, 0);
488
- rb_define_method(cBufferedIndexIn, "read_byte", frt_indexin_read_byte, 0);
489
- rb_define_method(cBufferedIndexIn, "read_bytes", frt_indexin_read_bytes, 3);
490
- rb_define_method(cBufferedIndexIn, "pos", frt_indexin_pos, 0);
491
- rb_define_method(cBufferedIndexIn, "seek", frt_indexin_seek, 1);
492
- rb_define_method(cBufferedIndexIn, "read_int", frt_indexin_read_int, 0);
493
- rb_define_method(cBufferedIndexIn, "read_long", frt_indexin_read_long, 0);
494
- rb_define_method(cBufferedIndexIn, "read_uint", frt_indexin_read_uint, 0);
495
- rb_define_method(cBufferedIndexIn, "read_ulong", frt_indexin_read_ulong, 0);
496
- rb_define_method(cBufferedIndexIn, "read_vint", frt_indexin_read_vint, 0);
497
- rb_define_method(cBufferedIndexIn, "read_vlong", frt_indexin_read_vint, 0);
498
- rb_define_method(cBufferedIndexIn, "read_string", frt_indexin_read_string, 0);
499
- rb_define_method(cBufferedIndexIn, "read_chars", frt_indexin_read_bytes, 3);
500
-
501
- /* IndexOutput */
502
- cIndexOut = rb_define_class_under(mStore, "IndexOutput", rb_cObject);
503
- cBufferedIndexOut = rb_define_class_under(mStore, "BufferedIndexOutput", cIndexOut);
504
- rb_define_alloc_func(cBufferedIndexOut, frt_indexbuffer_alloc);
505
-
506
- rb_define_method(cBufferedIndexOut, "write_byte", frt_indexout_write_byte, 1);
507
- rb_define_method(cBufferedIndexOut, "write_bytes", frt_indexout_write_bytes, 2);
508
- rb_define_method(cBufferedIndexOut, "flush", frt_indexout_flush, 0);
509
- rb_define_method(cBufferedIndexOut, "close", frt_indexout_flush, 0);
510
- rb_define_method(cBufferedIndexOut, "pos", frt_indexout_pos, 0);
511
- rb_define_method(cBufferedIndexOut, "seek", frt_indexout_seek, 1);
512
- rb_define_method(cBufferedIndexOut, "write_int", frt_indexout_write_int, 1);
513
- rb_define_method(cBufferedIndexOut, "write_long", frt_indexout_write_long, 1);
514
- rb_define_method(cBufferedIndexOut, "write_uint", frt_indexout_write_uint, 1);
515
- rb_define_method(cBufferedIndexOut, "write_ulong", frt_indexout_write_ulong, 1);
516
- rb_define_method(cBufferedIndexOut, "write_vint", frt_indexout_write_vint, 1);
517
- rb_define_method(cBufferedIndexOut, "write_vlong", frt_indexout_write_vint, 1);
518
- rb_define_method(cBufferedIndexOut, "write_chars", frt_indexout_write_chars, 3);
519
- rb_define_method(cBufferedIndexOut, "write_string", frt_indexout_write_string, 1);
520
-
521
- /* FSIndexInput */
522
- /*cFSIndexIn = rb_define_class_under(mStore, "FSIndexInput", cBufferedIndexIn); */
375
+ int start = strlen(filename) - 4;
376
+ return ((start > 0) && (strcmp(".lck", &filename[start]) == 0));
523
377
  }