ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/index_io.c CHANGED
@@ -1,523 +1,377 @@
1
- #include "ferret.h"
1
+ #include <store.h>
2
+ #include <string.h>
2
3
 
3
- ID frt_length, frt_flush_buffer, frt_read_internal, frt_seek_internal;
4
-
5
- /****************************************************************************
6
- *
7
- * BufferIndexInput Methods
8
- *
9
- ****************************************************************************/
10
-
11
- void
12
- frt_indexbuffer_free(void *p)
4
+ Buffer *buf_create()
13
5
  {
14
- IndexBuffer *my_buf = (IndexBuffer *)p;
15
- free(my_buf->buffer);
16
- free(p);
6
+ Buffer *buf = ALLOC(Buffer);
7
+ buf->start = 0;
8
+ buf->pos = 0;
9
+ buf->len = 0;
10
+ return buf;
17
11
  }
18
12
 
19
- static VALUE
20
- frt_indexbuffer_alloc(VALUE klass)
13
+ void buf_destroy(Buffer *buf)
21
14
  {
22
- IndexBuffer *my_buf;
23
-
24
- my_buf = ALLOC(IndexBuffer);
25
- MEMZERO(my_buf, IndexBuffer, 1);
26
- my_buf->buffer = ALLOC_N(byte_t, BUFFER_SIZE);
27
-
28
- return Data_Wrap_Struct(klass, NULL, frt_indexbuffer_free, my_buf);
15
+ free(buf);
29
16
  }
30
17
 
31
- #define GET_MY_BUF IndexBuffer *my_buf; Data_Get_Struct(self, IndexBuffer, my_buf)
32
- static VALUE
33
- frt_indexin_init_copy(VALUE self, VALUE orig)
18
+ OutStream *os_create()
34
19
  {
35
- IndexBuffer *orig_buf;
36
- int len;
37
- GET_MY_BUF;
38
- if (self == orig)
39
- return self;
40
-
41
- Data_Get_Struct(orig, IndexBuffer, orig_buf);
42
-
43
- len = orig_buf->len;
44
- my_buf->len = len;
45
- my_buf->pos = orig_buf->pos;
46
- my_buf->start = orig_buf->start;
47
-
48
- MEMCPY(my_buf->buffer, orig_buf->buffer, byte_t, len);
49
-
50
- return self;
20
+ OutStream *os = ALLOC(OutStream);
21
+ os->buf.start = 0;
22
+ os->buf.pos = 0;
23
+ os->buf.len = 0;
24
+ return os;
51
25
  }
52
26
 
53
- static VALUE
54
- frt_indexin_refill(VALUE self)
27
+ inline void os_flush(OutStream *os)
55
28
  {
56
- long start;
57
- VALUE rStr;
58
- int stop, len_to_read;
59
- int input_len = FIX2INT(rb_funcall(self, frt_length, 0, NULL));
60
- GET_MY_BUF;
61
-
62
- start = my_buf->start + my_buf->pos;
63
- stop = start + BUFFER_SIZE;
64
- if (stop > input_len) {
65
- stop = input_len;
66
- }
67
-
68
- len_to_read = stop - start;
69
- if (len_to_read <= 0) {
70
- rb_raise(rb_eEOFError, "IndexInput: Read past End of File");
71
- }
72
-
73
- rStr = rb_str_new((char *)my_buf->buffer, BUFFER_SIZE);
74
- rb_funcall(self, frt_read_internal, 3,
75
- rStr, INT2FIX(0), INT2FIX(len_to_read));
76
-
77
- memcpy(my_buf->buffer, RSTRING(rStr)->ptr, BUFFER_SIZE);
78
- /* my_buf->buffer = StringValuePtr(rStr); */
79
-
80
- my_buf->len = len_to_read;
81
- my_buf->start = start;
82
- my_buf->pos = 0;
83
-
84
- return Qnil;
29
+ os->flush_internal(os, os->buf.buf, os->buf.pos);
30
+ os->buf.start += os->buf.pos;
31
+ os->buf.pos = 0;
85
32
  }
86
33
 
87
- static inline byte_t
88
- frt_read_byte(VALUE self, IndexBuffer *my_buf)
34
+ void os_close(OutStream *os)
89
35
  {
90
- if (my_buf->pos >= my_buf->len)
91
- frt_indexin_refill(self);
92
- return my_buf->buffer[my_buf->pos++];
36
+ os_flush(os);
37
+ os->close_internal(os);
38
+ free(os);
93
39
  }
94
40
 
95
- static VALUE
96
- frt_indexin_read_byte(VALUE self)
97
- {
98
- GET_MY_BUF;
99
- return INT2FIX(frt_read_byte(self, my_buf));
100
- }
101
-
102
- static VALUE
103
- frt_indexin_pos(VALUE self)
41
+ int os_pos(OutStream *os)
104
42
  {
105
- IndexBuffer *my_buf;
106
- Data_Get_Struct(self, IndexBuffer, my_buf);
107
- return INT2FIX(my_buf->start + my_buf->pos);
43
+ return os->buf.start + os->buf.pos;
108
44
  }
109
45
 
110
- static VALUE
111
- frt_read_bytes(VALUE self, VALUE rbuffer, int offset, int len)
46
+ void os_seek(OutStream *os, int new_pos)
112
47
  {
113
- int i;
114
- VALUE rbuf = StringValue(rbuffer);
115
-
116
- GET_MY_BUF;
117
-
118
- if (RSTRING(rbuf)->len < (offset + len)) {
119
- rb_str_resize(rbuf, offset + len);
120
- }
121
- if ((len + offset) < BUFFER_SIZE) {
122
- rb_str_modify(rbuf);
123
- for (i = offset; i < offset + len; i++) {
124
- RSTRING(rbuf)->ptr[i] = frt_read_byte(self, my_buf);
125
- }
126
- } else {
127
- VALUE start = frt_indexin_pos(self);
128
- rb_funcall(self, frt_seek_internal, 1, start);
129
- rb_funcall(self, frt_read_internal, 3,
130
- rbuf, INT2FIX(offset), INT2FIX(len));
131
-
132
- my_buf->start = my_buf->start + len;
133
- my_buf->pos = 0;
134
- my_buf->len = 0; /* trigger refill() on read() */
135
- }
136
-
137
- return rbuf;
48
+ os_flush(os);
49
+ os->buf.start = new_pos;
50
+ os->seek_internal(os, new_pos);
138
51
  }
139
52
 
140
- static VALUE
141
- frt_indexin_read_bytes(VALUE self, VALUE rbuf, VALUE roffset, VALUE rlen)
142
- {
143
- int len, offset;
53
+ #define write_byte(os, b) os->buf.buf[os->buf.pos++] = b
144
54
 
145
- len = FIX2INT(rlen);
146
- offset = FIX2INT(roffset);
147
-
148
- return frt_read_bytes(self, rbuf, offset, len);
55
+ inline void os_write_byte(OutStream *os, uchar b)
56
+ {
57
+ if (os->buf.pos >= BUFFER_SIZE)
58
+ os_flush(os);
59
+ write_byte(os, b);
149
60
  }
150
61
 
151
- VALUE
152
- frt_indexin_seek(VALUE self, VALUE rpos)
62
+ void os_write_bytes(OutStream *os, uchar *b, int len)
153
63
  {
154
- int pos = FIX2INT(rpos);
155
-
156
- GET_MY_BUF;
64
+ if (os->buf.pos > 0) // flush buffer
65
+ os_flush(os);
157
66
 
158
- if ((pos >= my_buf->start) && (pos < (my_buf->start + my_buf->len))) {
159
- my_buf->pos = pos - my_buf->start; /* seek within buffer */
67
+ if (len < BUFFER_SIZE) {
68
+ os->flush_internal(os, b, len);
69
+ os->buf.start += len;
160
70
  } else {
161
- my_buf->start = pos;
162
- my_buf->pos = 0;
163
- my_buf->len = 0; /* trigger refill() on read() */
164
- rb_funcall(self, frt_seek_internal, 1, rpos);
71
+ int pos = 0;
72
+ int size;
73
+ while (pos < len) {
74
+ if (len - pos < BUFFER_SIZE) {
75
+ size = len - pos;
76
+ } else {
77
+ size = BUFFER_SIZE;
78
+ }
79
+ os->flush_internal(os, b + pos, size);
80
+ pos += size;
81
+ os->buf.start += size;
82
+ }
165
83
  }
166
- return Qnil;
167
84
  }
168
85
 
169
- VALUE
170
- frt_indexin_read_int(VALUE self)
86
+ InStream *is_create()
171
87
  {
172
- GET_MY_BUF;
173
- return LONG2NUM(((int)frt_read_byte(self, my_buf) << 24) |
174
- ((int)frt_read_byte(self, my_buf) << 16) |
175
- ((int)frt_read_byte(self, my_buf) << 8) |
176
- (int)frt_read_byte(self, my_buf));
88
+ InStream *is = ALLOC(InStream);
89
+ is->buf.start = 0;
90
+ is->buf.pos = 0;
91
+ is->buf.len = 0;
92
+ return is;
177
93
  }
178
94
 
179
- VALUE
180
- frt_indexin_read_long(VALUE self)
95
+ void is_refill(InStream *is)
181
96
  {
182
- GET_MY_BUF;
183
- return LL2NUM(((long long)frt_read_byte(self, my_buf) << 56) |
184
- ((long long)frt_read_byte(self, my_buf) << 48) |
185
- ((long long)frt_read_byte(self, my_buf) << 40) |
186
- ((long long)frt_read_byte(self, my_buf) << 32) |
187
- ((long long)frt_read_byte(self, my_buf) << 24) |
188
- ((long long)frt_read_byte(self, my_buf) << 16) |
189
- ((long long)frt_read_byte(self, my_buf) << 8) |
190
- (long long)frt_read_byte(self, my_buf));
191
- }
97
+ int start = is->buf.start + is->buf.pos;
98
+ int last = start + BUFFER_SIZE;
99
+ int flen = is->length_internal(is);
100
+ if (last > flen) // don't read past EOF
101
+ last = flen;
192
102
 
193
- static VALUE
194
- frt_indexin_read_uint(VALUE self)
195
- {
196
- GET_MY_BUF;
197
- return ULONG2NUM(((unsigned int)frt_read_byte(self, my_buf) << 24) |
198
- ((unsigned int)frt_read_byte(self, my_buf) << 16) |
199
- ((unsigned int)frt_read_byte(self, my_buf) << 8) |
200
- (unsigned int)frt_read_byte(self, my_buf));
201
- }
103
+ is->buf.len = last - start;
104
+ if (is->buf.len <= 0) {
105
+ eprintf(IO_ERROR, "EOF Error when trying to refill. flen was %d\n", flen);
106
+ }
202
107
 
203
- static VALUE
204
- frt_indexin_read_ulong(VALUE self)
205
- {
206
- GET_MY_BUF;
207
- return ULL2NUM(((unsigned long long)frt_read_byte(self, my_buf) << 56) |
208
- ((unsigned long long)frt_read_byte(self, my_buf) << 48) |
209
- ((unsigned long long)frt_read_byte(self, my_buf) << 40) |
210
- ((unsigned long long)frt_read_byte(self, my_buf) << 32) |
211
- ((unsigned long long)frt_read_byte(self, my_buf) << 24) |
212
- ((unsigned long long)frt_read_byte(self, my_buf) << 16) |
213
- ((unsigned long long)frt_read_byte(self, my_buf) << 8) |
214
- (unsigned long long)frt_read_byte(self, my_buf));
108
+ is->read_internal(is, is->buf.buf, 0, is->buf.len);
109
+
110
+ is->buf.start = start;
111
+ is->buf.pos = 0;
215
112
  }
216
113
 
217
- unsigned long long
218
- frt_read_vint(VALUE self, IndexBuffer *my_buf)
114
+ #define read_byte(is) is->buf.buf[is->buf.pos++]
115
+ inline uchar is_read_byte(InStream *is)
219
116
  {
220
- register unsigned long long i, b;
221
- register int shift = 7;
117
+ if (is->buf.pos >= is->buf.len)
118
+ is_refill(is);
222
119
 
223
- b = frt_read_byte(self, my_buf);
224
- i = b & 0x7F; /* 0x7F = 0b01111111 */
225
-
226
- while ((b & 0x80) != 0) {/* 0x80 = 0b10000000 */
227
- b = frt_read_byte(self, my_buf);
228
- i |= (b & 0x7F) << shift;
229
- shift += 7;
230
- }
231
-
232
- return i;
120
+ return read_byte(is);
233
121
  }
234
122
 
235
- static VALUE
236
- frt_indexin_read_vint(VALUE self)
123
+ int is_pos(InStream *is)
237
124
  {
238
- GET_MY_BUF;
239
- return ULL2NUM(frt_read_vint(self, my_buf));
125
+ return is->buf.start + is->buf.pos;
240
126
  }
241
127
 
242
- void
243
- frt_read_chars(VALUE self, char* buffer, int off, int len)
128
+ uchar *is_read_bytes(InStream *is, uchar *b, int offset, int len)
244
129
  {
245
- /* byte_t b, b1, b2; */
246
- int end, i;
247
-
248
- GET_MY_BUF;
249
-
250
- end = off + len;
251
-
252
-
253
- for(i = off; i < end; i++) {
254
- buffer[i] = frt_read_byte(self, my_buf);
130
+ int i, start;
131
+ if ((offset + len) < BUFFER_SIZE) {
132
+ for (i = offset; i < offset + len; i++) {
133
+ b[i] = is_read_byte(is);
134
+ }
135
+ } else { // read all-at-once
136
+ start = is_pos(is);
137
+ is->seek_internal(is, start);
138
+ is->read_internal(is, b, offset, len);
139
+
140
+ is->buf.start = start + len; // adjust stream variables
141
+ is->buf.pos = 0;
142
+ is->buf.len = 0; // trigger refill on read
255
143
  }
144
+ return b;
256
145
  }
257
146
 
258
- static VALUE
259
- frt_indexin_read_string(VALUE self)
147
+ void is_seek(InStream *is, int pos)
260
148
  {
261
- int length;
262
- char *str;
263
- GET_MY_BUF;
264
- length = (int)frt_read_vint(self, my_buf);
265
- str = ALLOC_N(char, length);
266
-
267
- frt_read_chars(self, str, 0, length);
268
-
269
- return rb_str_new(str, length);
149
+ if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
150
+ is->buf.pos = pos - is->buf.start; // seek within buffer
151
+ } else {
152
+ is->buf.start = pos;
153
+ is->buf.pos = 0;
154
+ is->buf.len = 0; // trigger refill() on read()
155
+ is->seek_internal(is, pos);
156
+ }
270
157
  }
271
158
 
272
- /****************************************************************************
273
- *
274
- * BufferIndexInput Methods
275
- *
276
- ****************************************************************************/
277
-
278
- static VALUE
279
- frt_indexout_flush(VALUE self)
159
+ void is_close(InStream *is)
280
160
  {
281
- GET_MY_BUF;
282
-
283
- rb_funcall(self, frt_flush_buffer, 2,
284
- rb_str_new((char *)my_buf->buffer, BUFFER_SIZE), INT2FIX(my_buf->pos));
285
-
286
- my_buf->start += my_buf->pos;
287
- my_buf->pos = 0;
288
-
289
- return Qnil;
161
+ is->close_internal(is);
162
+ free(is);
290
163
  }
291
164
 
292
- static VALUE
293
- frt_write_byte(VALUE self, byte_t b)
165
+ InStream *is_clone(InStream *is)
294
166
  {
295
- GET_MY_BUF;
296
-
297
- my_buf->buffer[my_buf->pos++] = b;
298
-
299
- if (my_buf->pos >= BUFFER_SIZE)
300
- frt_indexout_flush(self);
301
- return Qnil;
167
+ InStream *new_index_i = ALLOC(InStream);
168
+ memcpy(new_index_i, is, sizeof(InStream));
169
+ //new_index_i->buf.start = is->buf.start;
170
+ //new_index_i->buf.pos = is->buf.pos;
171
+ //new_index_i->buf.len = is->buf.len;
172
+ //new_index_i->file = is->file;
173
+ //new_index_i->d = is->d;
174
+ //new_index_i->read_internal = is->read_internal;
175
+ //new_index_i->seek_internal = is->seek_internal;
176
+ //new_index_i->length_internal = is->length_internal;
177
+ //new_index_i->clone_internal = is->clone_internal;
178
+ //new_index_i->close_internal = is->close_internal;
179
+ new_index_i->is_clone = true;
180
+ is->clone_internal(is, new_index_i);
181
+ return new_index_i;
302
182
  }
303
183
 
304
- static VALUE
305
- frt_indexout_write_byte(VALUE self, VALUE rbyte)
184
+ int
185
+ is_read_int(InStream *is)
306
186
  {
307
- byte_t b = (byte_t)FIX2INT(rbyte);
308
- frt_write_byte(self, b);
309
- return Qnil;
187
+ return ((int)is_read_byte(is) << 24) |
188
+ ((int)is_read_byte(is) << 16) |
189
+ ((int)is_read_byte(is) << 8) |
190
+ (int)is_read_byte(is);
310
191
  }
311
192
 
312
- void
313
- frt_write_bytes(VALUE self, byte_t *buf, int len)
193
+ long long
194
+ is_read_long(InStream *is)
314
195
  {
315
- int i;
316
- for (i = 0; i < len; i++)
317
- frt_write_byte(self, buf[i]);
196
+ return ((long long)is_read_byte(is) << 56) |
197
+ ((long long)is_read_byte(is) << 48) |
198
+ ((long long)is_read_byte(is) << 40) |
199
+ ((long long)is_read_byte(is) << 32) |
200
+ ((long long)is_read_byte(is) << 24) |
201
+ ((long long)is_read_byte(is) << 16) |
202
+ ((long long)is_read_byte(is) << 8) |
203
+ (long long)is_read_byte(is);
318
204
  }
319
205
 
320
- static VALUE
321
- frt_indexout_write_bytes(VALUE self, VALUE rbuffer, VALUE rlen)
206
+ unsigned int
207
+ is_read_uint(InStream *is)
322
208
  {
323
- int len = FIX2INT(rlen);
324
- int i;
325
- VALUE rbuf = StringValue(rbuffer);
326
-
327
- for (i = 0; i < len; i++)
328
- frt_write_byte(self, RSTRING(rbuf)->ptr[i]);
329
-
330
- return Qnil;
209
+ return ((unsigned int)is_read_byte(is) << 24) |
210
+ ((unsigned int)is_read_byte(is) << 16) |
211
+ ((unsigned int)is_read_byte(is) << 8) |
212
+ (unsigned int)is_read_byte(is);
331
213
  }
332
214
 
333
- static VALUE
334
- frt_indexout_pos(VALUE self)
215
+ unsigned long long
216
+ is_read_ulong(InStream *is)
335
217
  {
336
- GET_MY_BUF;
337
- return INT2FIX(my_buf->start + my_buf->pos);
218
+ return ((unsigned long long)is_read_byte(is) << 56) |
219
+ ((unsigned long long)is_read_byte(is) << 48) |
220
+ ((unsigned long long)is_read_byte(is) << 40) |
221
+ ((unsigned long long)is_read_byte(is) << 32) |
222
+ ((unsigned long long)is_read_byte(is) << 24) |
223
+ ((unsigned long long)is_read_byte(is) << 16) |
224
+ ((unsigned long long)is_read_byte(is) << 8) |
225
+ (unsigned long long)is_read_byte(is);
338
226
  }
339
227
 
340
- static VALUE
341
- frt_indexout_seek(VALUE self, VALUE pos)
228
+ /* optimized to use unchecked read_byte if there is definitely space */
229
+ inline unsigned long long
230
+ is_read_vint(InStream *is)
342
231
  {
343
- GET_MY_BUF;
344
-
345
- frt_indexout_flush(self);
346
- my_buf->start = FIX2INT(pos);
232
+ register unsigned long long res, b;
233
+ register int shift = 7;
347
234
 
348
- return Qnil;
235
+ if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
236
+ b = is_read_byte(is);
237
+ res = b & 0x7F; // 0x7F = 0b01111111
238
+
239
+ while ((b & 0x80) != 0) {// 0x80 = 0b10000000
240
+ b = is_read_byte(is);
241
+ res |= (b & 0x7F) << shift;
242
+ shift += 7;
243
+ }
244
+ } else { // unchecked
245
+ b = read_byte(is);
246
+ res = b & 0x7F; // 0x7F = 0b01111111
247
+
248
+ while ((b & 0x80) != 0) {// 0x80 = 0b10000000
249
+ b = read_byte(is);
250
+ res |= (b & 0x7F) << shift;
251
+ shift += 7;
252
+ }
253
+ }
254
+
255
+ return res;
349
256
  }
350
257
 
351
- static VALUE
352
- frt_indexout_write_int(VALUE self, VALUE rint)
258
+ inline void
259
+ is_read_chars(InStream *is, char* buffer, int off, int len)
353
260
  {
354
- long l = NUM2LONG(rint);
355
- frt_write_byte(self, (l >> 24) & 0xFF);
356
- frt_write_byte(self, (l >> 16) & 0xFF);
357
- frt_write_byte(self, (l >> 8) & 0xFF);
358
- frt_write_byte(self, l & 0xFF);
359
-
360
- return Qnil;
361
- }
261
+ int end, i;
362
262
 
363
- static VALUE
364
- frt_indexout_write_long(VALUE self, VALUE rlong)
365
- {
366
- long long l = NUM2LL(rlong);
367
- frt_write_byte(self, (l >> 56) & 0xFF);
368
- frt_write_byte(self, (l >> 48) & 0xFF);
369
- frt_write_byte(self, (l >> 40) & 0xFF);
370
- frt_write_byte(self, (l >> 32) & 0xFF);
371
- frt_write_byte(self, (l >> 24) & 0xFF);
372
- frt_write_byte(self, (l >> 16) & 0xFF);
373
- frt_write_byte(self, (l >> 8) & 0xFF);
374
- frt_write_byte(self, l & 0xFF);
263
+ end = off + len;
375
264
 
376
- return Qnil;
265
+ for(i = off; i < end; i++) {
266
+ buffer[i] = is_read_byte(is);
267
+ }
377
268
  }
378
269
 
379
- static VALUE
380
- frt_indexout_write_uint(VALUE self, VALUE ruint)
270
+ char *
271
+ is_read_string(InStream *is)
381
272
  {
382
- unsigned long l = NUM2ULONG(ruint);
383
- frt_write_byte(self, (l >> 24) & 0xFF);
384
- frt_write_byte(self, (l >> 16) & 0xFF);
385
- frt_write_byte(self, (l >> 8) & 0xFF);
386
- frt_write_byte(self, l & 0xFF);
273
+ register int length = (int)is_read_vint(is);
274
+ char *str = ALLOC_N(char, length + 1);
275
+ str[length] = '\0';
387
276
 
388
- return Qnil;
277
+ if (is->buf.pos > (is->buf.len - length)) {
278
+ register int i;
279
+ for(i = 0; i < length; i++) {
280
+ str[i] = is_read_byte(is);
281
+ }
282
+ } else { // unchecked
283
+ memcpy(str, is->buf.buf + is->buf.pos, length);
284
+ is->buf.pos += length;
285
+ }
286
+ //is_read_chars(is, str, 0, length);
287
+
288
+ return str;
389
289
  }
390
290
 
391
- static VALUE
392
- frt_indexout_write_ulong(VALUE self, VALUE rulong)
291
+ void
292
+ os_write_int(OutStream *os, int l)
393
293
  {
394
- unsigned long long l;
395
- l = rb_num2ull(rulong); /* ruby 1.8 doesn't have NUM2ULL. Added in 1.9 */
396
- frt_write_byte(self, (l >> 56) & 0xFF);
397
- frt_write_byte(self, (l >> 48) & 0xFF);
398
- frt_write_byte(self, (l >> 40) & 0xFF);
399
- frt_write_byte(self, (l >> 32) & 0xFF);
400
- frt_write_byte(self, (l >> 24) & 0xFF);
401
- frt_write_byte(self, (l >> 16) & 0xFF);
402
- frt_write_byte(self, (l >> 8) & 0xFF);
403
- frt_write_byte(self, l & 0xFF);
404
-
405
- return Qnil;
294
+ os_write_byte(os, (l >> 24) & 0xFF);
295
+ os_write_byte(os, (l >> 16) & 0xFF);
296
+ os_write_byte(os, (l >> 8) & 0xFF);
297
+ os_write_byte(os, l & 0xFF);
406
298
  }
407
299
 
408
- static VALUE
409
- frt_write_vint(VALUE self, register unsigned long long i)
300
+ void
301
+ os_write_long(OutStream *os, long long l)
410
302
  {
411
- while (i > 127) {
412
- frt_write_byte(self, (i & 0x7f) | 0x80);
413
- i >>= 7;
414
- }
415
- frt_write_byte(self, i);
416
-
417
- return Qnil;
303
+ os_write_byte(os, (l >> 56) & 0xFF);
304
+ os_write_byte(os, (l >> 48) & 0xFF);
305
+ os_write_byte(os, (l >> 40) & 0xFF);
306
+ os_write_byte(os, (l >> 32) & 0xFF);
307
+ os_write_byte(os, (l >> 24) & 0xFF);
308
+ os_write_byte(os, (l >> 16) & 0xFF);
309
+ os_write_byte(os, (l >> 8) & 0xFF);
310
+ os_write_byte(os, l & 0xFF);
418
311
  }
419
312
 
420
- static VALUE
421
- frt_indexout_write_vint(VALUE self, VALUE rulong)
313
+ void
314
+ os_write_uint(OutStream *os, unsigned int l)
422
315
  {
423
- register unsigned long long i = rb_num2ull(rulong);
316
+ os_write_byte(os, (l >> 24) & 0xFF);
317
+ os_write_byte(os, (l >> 16) & 0xFF);
318
+ os_write_byte(os, (l >> 8) & 0xFF);
319
+ os_write_byte(os, l & 0xFF);
320
+ }
424
321
 
425
- while (i > 127) {
426
- frt_write_byte(self, (i & 0x7f) | 0x80);
427
- i >>= 7;
322
+ void
323
+ os_write_ulong(OutStream *os, unsigned long long l)
324
+ {
325
+ os_write_byte(os, (l >> 56) & 0xFF);
326
+ os_write_byte(os, (l >> 48) & 0xFF);
327
+ os_write_byte(os, (l >> 40) & 0xFF);
328
+ os_write_byte(os, (l >> 32) & 0xFF);
329
+ os_write_byte(os, (l >> 24) & 0xFF);
330
+ os_write_byte(os, (l >> 16) & 0xFF);
331
+ os_write_byte(os, (l >> 8) & 0xFF);
332
+ os_write_byte(os, l & 0xFF);
333
+ }
334
+
335
+ /* optimized to use an unchecked write if there is space */
336
+ inline void
337
+ os_write_vint(OutStream *os, register unsigned long long i)
338
+ {
339
+ if (os->buf.pos > VINT_END) {
340
+ while (i > 127) {
341
+ os_write_byte(os, (i & 0x7f) | 0x80);
342
+ i >>= 7;
343
+ }
344
+ os_write_byte(os, i);
345
+ } else {
346
+ while (i > 127) {
347
+ write_byte(os, (i & 0x7f) | 0x80);
348
+ i >>= 7;
349
+ }
350
+ write_byte(os, i);
428
351
  }
429
- frt_write_byte(self, i);
430
-
431
- return Qnil;
432
352
  }
433
353
 
434
- static VALUE
435
- frt_write_chars(VALUE self, VALUE rbuf, int start, int length)
354
+ void
355
+ os_write_chars(OutStream *os, char *buf, int start, int length)
436
356
  {
437
357
  int i;
438
- VALUE rstr = StringValue(rbuf);
439
358
 
440
359
  for (i = start; i < start + length; i++) {
441
- frt_write_byte(self, RSTRING(rstr)->ptr[i]);
360
+ os_write_byte(os, buf[i]);
442
361
  }
443
-
444
- return Qnil;
445
362
  }
446
363
 
447
- static VALUE
448
- frt_indexout_write_chars(VALUE self, VALUE rstr, VALUE rstart, VALUE rlength)
364
+ void
365
+ os_write_string(OutStream *os, char *str)
449
366
  {
450
- int start = FIX2INT(rstart);
451
- int length = FIX2INT(rlength);
367
+ int len = strlen(str);
368
+ os_write_vint(os, len);
452
369
 
453
- return frt_write_chars(self, rstr, start, length);
370
+ os_write_chars(os, str, 0, len);
454
371
  }
455
372
 
456
- static VALUE
457
- frt_indexout_write_string(VALUE self, VALUE rstr)
373
+ int file_is_lock(char *filename)
458
374
  {
459
- int len = RSTRING(StringValue(rstr))->len;
460
- frt_write_vint(self, len);
461
-
462
- frt_write_chars(self, rstr, 0, len);
463
- return Qnil;
464
- }
465
-
466
- /****************************************************************************
467
- *
468
- * Init Function
469
- *
470
- ****************************************************************************/
471
-
472
- void
473
- Init_indexio(void)
474
- {
475
- /* IDs */
476
- frt_length = rb_intern("length");
477
- frt_flush_buffer = rb_intern("flush_buffer");
478
- frt_read_internal = rb_intern("read_internal");
479
- frt_seek_internal = rb_intern("seek_internal");
480
-
481
- /* IndexInput */
482
- cIndexIn = rb_define_class_under(mStore, "IndexInput", rb_cObject);
483
- cBufferedIndexIn = rb_define_class_under(mStore, "BufferedIndexInput", cIndexIn);
484
- rb_define_alloc_func(cBufferedIndexIn, frt_indexbuffer_alloc);
485
-
486
- rb_define_method(cBufferedIndexIn, "initialize_copy", frt_indexin_init_copy, 1);
487
- rb_define_method(cBufferedIndexIn, "refill", frt_indexin_refill, 0);
488
- rb_define_method(cBufferedIndexIn, "read_byte", frt_indexin_read_byte, 0);
489
- rb_define_method(cBufferedIndexIn, "read_bytes", frt_indexin_read_bytes, 3);
490
- rb_define_method(cBufferedIndexIn, "pos", frt_indexin_pos, 0);
491
- rb_define_method(cBufferedIndexIn, "seek", frt_indexin_seek, 1);
492
- rb_define_method(cBufferedIndexIn, "read_int", frt_indexin_read_int, 0);
493
- rb_define_method(cBufferedIndexIn, "read_long", frt_indexin_read_long, 0);
494
- rb_define_method(cBufferedIndexIn, "read_uint", frt_indexin_read_uint, 0);
495
- rb_define_method(cBufferedIndexIn, "read_ulong", frt_indexin_read_ulong, 0);
496
- rb_define_method(cBufferedIndexIn, "read_vint", frt_indexin_read_vint, 0);
497
- rb_define_method(cBufferedIndexIn, "read_vlong", frt_indexin_read_vint, 0);
498
- rb_define_method(cBufferedIndexIn, "read_string", frt_indexin_read_string, 0);
499
- rb_define_method(cBufferedIndexIn, "read_chars", frt_indexin_read_bytes, 3);
500
-
501
- /* IndexOutput */
502
- cIndexOut = rb_define_class_under(mStore, "IndexOutput", rb_cObject);
503
- cBufferedIndexOut = rb_define_class_under(mStore, "BufferedIndexOutput", cIndexOut);
504
- rb_define_alloc_func(cBufferedIndexOut, frt_indexbuffer_alloc);
505
-
506
- rb_define_method(cBufferedIndexOut, "write_byte", frt_indexout_write_byte, 1);
507
- rb_define_method(cBufferedIndexOut, "write_bytes", frt_indexout_write_bytes, 2);
508
- rb_define_method(cBufferedIndexOut, "flush", frt_indexout_flush, 0);
509
- rb_define_method(cBufferedIndexOut, "close", frt_indexout_flush, 0);
510
- rb_define_method(cBufferedIndexOut, "pos", frt_indexout_pos, 0);
511
- rb_define_method(cBufferedIndexOut, "seek", frt_indexout_seek, 1);
512
- rb_define_method(cBufferedIndexOut, "write_int", frt_indexout_write_int, 1);
513
- rb_define_method(cBufferedIndexOut, "write_long", frt_indexout_write_long, 1);
514
- rb_define_method(cBufferedIndexOut, "write_uint", frt_indexout_write_uint, 1);
515
- rb_define_method(cBufferedIndexOut, "write_ulong", frt_indexout_write_ulong, 1);
516
- rb_define_method(cBufferedIndexOut, "write_vint", frt_indexout_write_vint, 1);
517
- rb_define_method(cBufferedIndexOut, "write_vlong", frt_indexout_write_vint, 1);
518
- rb_define_method(cBufferedIndexOut, "write_chars", frt_indexout_write_chars, 3);
519
- rb_define_method(cBufferedIndexOut, "write_string", frt_indexout_write_string, 1);
520
-
521
- /* FSIndexInput */
522
- /*cFSIndexIn = rb_define_class_under(mStore, "FSIndexInput", cBufferedIndexIn); */
375
+ int start = strlen(filename) - 4;
376
+ return ((start > 0) && (strcmp(".lck", &filename[start]) == 0));
523
377
  }