ferret 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,3 @@
1
+ # extconf.rb for Ferret extensions
2
+ require 'mkmf'
3
+ create_makefile("ferret_ext")
@@ -0,0 +1,23 @@
1
+ #include "ferret.h"
2
+
3
+ void
4
+ Init_ferret_ext(void)
5
+ {
6
+ // IDs
7
+ frt_newobj = rb_intern("new");
8
+
9
+ // Modules
10
+ mFerret = rb_define_module("Ferret");
11
+ mStore = rb_define_module_under(mFerret, "Store");
12
+ mIndex = rb_define_module_under(mFerret, "Index");
13
+ mUtils = rb_define_module_under(mFerret, "Utils");
14
+
15
+ // Inits
16
+ Init_indexio();
17
+ Init_term();
18
+ Init_term_buffer();
19
+ Init_priority_queue();
20
+ Init_segment_merge_queue();
21
+ Init_ram_directory();
22
+ Init_string_helper();
23
+ }
@@ -0,0 +1,85 @@
1
+ #ifndef __FERRET_H_
2
+ #define __FERRET_H_
3
+
4
+ #include <ruby.h>
5
+
6
+ #define BUFFER_SIZE 1024
7
+
8
+ typedef unsigned char byte_t;
9
+
10
+ typedef struct IndexBuffer {
11
+ long start;
12
+ int len;
13
+ int pos;
14
+ byte_t *buffer;
15
+ } IndexBuffer;
16
+
17
+ typedef struct Term {
18
+ char *field;
19
+ char *text;
20
+ int flen;
21
+ int tlen;
22
+ } Term;
23
+
24
+ typedef struct PriorityQueue {
25
+ VALUE *heap;
26
+ int len;
27
+ int size;
28
+ } PriorityQueue;
29
+
30
+ typedef struct TermBuffer {
31
+ char *field;
32
+ char *text;
33
+ int flen;
34
+ int tlen;
35
+ } TermBuffer;
36
+
37
+ typedef struct RAMFile {
38
+ void **buffers;
39
+ int bufcnt;
40
+ VALUE mtime;
41
+ char *name;
42
+ int length;
43
+ } RAMFile;
44
+
45
+ // IDs
46
+ ID frt_newobj;
47
+
48
+ // Modules
49
+ VALUE mFerret;
50
+ VALUE mStore;
51
+ VALUE mIndex;
52
+ VALUE mUtils;
53
+ VALUE mStringHelper;
54
+
55
+ // Classes
56
+ VALUE cRAMDirectory;
57
+ VALUE cIndexIn;
58
+ VALUE cBufferedIndexIn;
59
+ VALUE cFSIndexIn;
60
+ VALUE cIndexOut;
61
+ VALUE cBufferedIndexOut;
62
+ VALUE cFSIndexOut;
63
+ VALUE cRAMIndexOut;
64
+ VALUE cRAMIndexIn;
65
+ VALUE cTerm;
66
+ VALUE cTermBuffer;
67
+ VALUE cPriorityQueue;
68
+ VALUE cSegmentMergeQueue;
69
+
70
+ // Ferret Inits
71
+ extern void Init_indexio();
72
+ extern void Init_term();
73
+ extern void Init_priority_queue();
74
+ extern void Init_term_buffer();
75
+ extern void Init_segment_merge_queue();
76
+ extern void Init_ram_directory();
77
+ extern void Init_string_helper();
78
+
79
+ // External functions
80
+ extern int frt_hash(register char *p, register int len);
81
+ extern unsigned long long frt_read_vint(VALUE self);
82
+ extern void frt_read_chars(VALUE self, char *buf, int offset, int len);
83
+ extern void frt_write_bytes(VALUE self, byte_t *buf, int len);
84
+ extern int frt_term_compare_to_int(VALUE self, VALUE rother);
85
+ #endif
@@ -0,0 +1,543 @@
1
+ #include "ferret.h"
2
+
3
+ ID frt_length, frt_flush_buffer, frt_read_internal, frt_seek_internal;
4
+
5
+ /****************************************************************************
6
+ *
7
+ * BufferIndexInput Methods
8
+ *
9
+ ****************************************************************************/
10
+
11
+ void
12
+ frt_indexbuffer_free(void *p)
13
+ {
14
+ IndexBuffer *my_buf = (IndexBuffer *)p;
15
+ free((void *)my_buf->buffer);
16
+ free(p);
17
+ }
18
+
19
+ static VALUE
20
+ frt_indexbuffer_alloc(VALUE klass)
21
+ {
22
+ byte_t *buffer;
23
+ IndexBuffer *my_buf;
24
+
25
+ my_buf = (IndexBuffer *)ALLOC(IndexBuffer);
26
+ buffer = (byte_t *)ALLOC_N(byte_t, BUFFER_SIZE);
27
+
28
+ my_buf->start = 0;
29
+ my_buf->pos = 0;
30
+ my_buf->len = 0;
31
+ my_buf->buffer = buffer;
32
+
33
+ return Data_Wrap_Struct(klass, NULL, frt_indexbuffer_free, my_buf);
34
+ }
35
+
36
+ static VALUE
37
+ frt_indexin_init_copy(VALUE self, VALUE orig)
38
+ {
39
+ IndexBuffer *orig_buf;
40
+ IndexBuffer *my_buf;
41
+ int len;
42
+ if (self == orig)
43
+ return self;
44
+
45
+ Data_Get_Struct(self, IndexBuffer, my_buf);
46
+ Data_Get_Struct(orig, IndexBuffer, orig_buf);
47
+
48
+ len = orig_buf->len;
49
+ my_buf->len = len;
50
+ my_buf->pos = orig_buf->pos;
51
+ my_buf->len = orig_buf->len;
52
+ my_buf->start = orig_buf->start;
53
+
54
+ MEMCPY(my_buf->buffer, orig_buf->buffer, byte_t, len);
55
+
56
+ return self;
57
+ }
58
+
59
+ static VALUE
60
+ frt_indexin_refill(VALUE self)
61
+ {
62
+ IndexBuffer *my_buf;
63
+ long start;
64
+ int stop, len_to_read;
65
+ int input_len = FIX2INT(rb_funcall(self, frt_length, 0, NULL));
66
+
67
+ Data_Get_Struct(self, IndexBuffer, my_buf);
68
+
69
+ start = my_buf->start + my_buf->pos;
70
+ stop = start + BUFFER_SIZE;
71
+ if (stop > input_len) {
72
+ stop = input_len;
73
+ }
74
+
75
+ len_to_read = stop - start;
76
+ if (len_to_read <= 0) {
77
+ rb_raise(rb_eEOFError, "IndexInput: Read past End of File");
78
+ }
79
+
80
+ VALUE rStr = rb_str_new((char *)my_buf->buffer, BUFFER_SIZE);
81
+ rb_funcall(self, frt_read_internal, 3,
82
+ rStr, INT2FIX(0), INT2FIX(len_to_read));
83
+
84
+ memcpy(my_buf->buffer, RSTRING(rStr)->ptr, BUFFER_SIZE);
85
+ //my_buf->buffer = StringValuePtr(rStr);
86
+
87
+ my_buf->len = len_to_read;
88
+ my_buf->start = start;
89
+ my_buf->pos = 0;
90
+
91
+ return Qnil;
92
+ }
93
+
94
+ byte_t
95
+ frt_read_byte(VALUE self)
96
+ {
97
+ IndexBuffer *my_buf;
98
+ Data_Get_Struct(self, IndexBuffer, my_buf);
99
+
100
+ if (my_buf->pos >= my_buf->len)
101
+ frt_indexin_refill(self);
102
+
103
+ byte_t res = my_buf->buffer[my_buf->pos++];
104
+ return res;
105
+ }
106
+
107
+ static VALUE
108
+ frt_indexin_read_byte(VALUE self)
109
+ {
110
+ return INT2FIX(frt_read_byte(self));
111
+ }
112
+
113
+ static VALUE
114
+ frt_indexin_pos(VALUE self)
115
+ {
116
+ IndexBuffer *my_buf;
117
+ Data_Get_Struct(self, IndexBuffer, my_buf);
118
+ return INT2FIX(my_buf->start + my_buf->pos);
119
+ }
120
+
121
+ static VALUE
122
+ frt_read_bytes(VALUE self, VALUE rbuffer, int offset, int len)
123
+ {
124
+ int i;
125
+ IndexBuffer *my_buf;
126
+ VALUE rbuf = StringValue(rbuffer);
127
+
128
+ if (RSTRING(rbuf)->len < (offset + len)) {
129
+ rb_str_resize(rbuf, offset + len);
130
+ }
131
+ if ((len + offset) < BUFFER_SIZE) {
132
+ rb_str_modify(rbuf);
133
+ for (i = offset; i < offset + len; i++) {
134
+ RSTRING(rbuf)->ptr[i] = frt_read_byte(self);
135
+ }
136
+ } else {
137
+ VALUE start = frt_indexin_pos(self);
138
+ rb_funcall(self, frt_seek_internal, 1, start);
139
+ rb_funcall(self, frt_read_internal, 3,
140
+ rbuf, INT2FIX(offset), INT2FIX(len));
141
+
142
+ Data_Get_Struct(self, IndexBuffer, my_buf);
143
+
144
+ my_buf->start = my_buf->start + len;
145
+ my_buf->pos = 0;
146
+ my_buf->len = 0; // trigger refill() on read()
147
+ }
148
+
149
+ return rbuf;
150
+ }
151
+
152
+ static VALUE
153
+ frt_indexin_read_bytes(VALUE self, VALUE rbuf, VALUE roffset, VALUE rlen)
154
+ {
155
+ int len, offset;
156
+
157
+ len = FIX2INT(rlen);
158
+ offset = FIX2INT(roffset);
159
+
160
+ return frt_read_bytes(self, rbuf, offset, len);
161
+ }
162
+
163
+ static VALUE
164
+ frt_indexin_seek(VALUE self, VALUE rpos)
165
+ {
166
+ int pos = FIX2INT(rpos);
167
+ IndexBuffer *my_buf;
168
+ Data_Get_Struct(self, IndexBuffer, my_buf);
169
+
170
+ if ((pos >= my_buf->start) && (pos < (my_buf->start + my_buf->len))) {
171
+ my_buf->pos = pos - my_buf->start; // seek within buffer
172
+ } else {
173
+ my_buf->start = pos;
174
+ my_buf->pos = 0;
175
+ my_buf->len = 0; // trigger refill() on read()
176
+ rb_funcall(self, frt_seek_internal, 1, rpos);
177
+ }
178
+ return Qnil;
179
+ }
180
+
181
+ static VALUE
182
+ frt_indexin_read_int(VALUE self)
183
+ {
184
+ return LONG2NUM(((long)frt_read_byte(self) << 24) |
185
+ ((long)frt_read_byte(self) << 16) |
186
+ ((long)frt_read_byte(self) << 8) |
187
+ (long)frt_read_byte(self));
188
+ }
189
+
190
+ static VALUE
191
+ frt_indexin_read_long(VALUE self)
192
+ {
193
+ return LL2NUM(((long long)frt_read_byte(self) << 56) |
194
+ ((long long)frt_read_byte(self) << 48) |
195
+ ((long long)frt_read_byte(self) << 40) |
196
+ ((long long)frt_read_byte(self) << 32) |
197
+ ((long long)frt_read_byte(self) << 24) |
198
+ ((long long)frt_read_byte(self) << 16) |
199
+ ((long long)frt_read_byte(self) << 8) |
200
+ (long long)frt_read_byte(self));
201
+ }
202
+
203
+ static VALUE
204
+ frt_indexin_read_uint(VALUE self)
205
+ {
206
+ return ULONG2NUM(((unsigned long)frt_read_byte(self) << 24) |
207
+ ((unsigned long)frt_read_byte(self) << 16) |
208
+ ((unsigned long)frt_read_byte(self) << 8) |
209
+ (unsigned long)frt_read_byte(self));
210
+ }
211
+
212
+ static VALUE
213
+ frt_indexin_read_ulong(VALUE self)
214
+ {
215
+ return ULL2NUM(((unsigned long long)frt_read_byte(self) << 56) |
216
+ ((unsigned long long)frt_read_byte(self) << 48) |
217
+ ((unsigned long long)frt_read_byte(self) << 40) |
218
+ ((unsigned long long)frt_read_byte(self) << 32) |
219
+ ((unsigned long long)frt_read_byte(self) << 24) |
220
+ ((unsigned long long)frt_read_byte(self) << 16) |
221
+ ((unsigned long long)frt_read_byte(self) << 8) |
222
+ (unsigned long long)frt_read_byte(self));
223
+ }
224
+
225
+ unsigned long long
226
+ frt_read_vint(VALUE self)
227
+ {
228
+ register unsigned long long i, b;
229
+ register int shift = 7;
230
+
231
+ b = frt_read_byte(self);
232
+ i = b & 0x7F; // 0x7F = 0b01111111
233
+
234
+ while ((b & 0x80) != 0) {// 0x80 = 0b10000000
235
+ b = frt_read_byte(self);
236
+ i |= (b & 0x7F) << shift;
237
+ shift += 7;
238
+ }
239
+
240
+ return i;
241
+ }
242
+
243
+ static VALUE
244
+ frt_indexin_read_vint(VALUE self)
245
+ {
246
+ return ULL2NUM(frt_read_vint(self));
247
+ }
248
+
249
+ void
250
+ frt_read_chars(VALUE self, char* buffer, int off, int len)
251
+ {
252
+ //byte_t b, b1, b2;
253
+ int end, i;
254
+
255
+ end = off + len;
256
+
257
+ for(i = off; i < end; i++) {
258
+ buffer[i] = frt_read_byte(self);
259
+ }
260
+ // for(i = off; i < end; i++){
261
+ // b = frt_read_byte(self);
262
+ // if((b & 0x80) == 0){
263
+ // buffer[i] = (char)(b & 0x7F);
264
+ // } else {
265
+ // if((b & 0xE0) != 0xE0){
266
+ // b1 = frt_read_byte(self);
267
+ // buffer[i] = (char)(((b & 0x1F) << 6) | (b1 & 0x3F));
268
+ // } else{
269
+ // b1 = frt_read_byte(self);
270
+ // b2 = frt_read_byte(self);
271
+ // buffer[i] = (char)(((b & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F));
272
+ // }
273
+ // }
274
+ // }
275
+ }
276
+
277
+ static VALUE
278
+ frt_indexin_read_string(VALUE self)
279
+ {
280
+ int length = (int)frt_read_vint(self);
281
+ char *str = (char *)ALLOC_N(char, length);
282
+
283
+ frt_read_chars(self, str, 0, length);
284
+
285
+ return rb_str_new(str, length);
286
+ }
287
+
288
+ /****************************************************************************
289
+ *
290
+ * BufferIndexInput Methods
291
+ *
292
+ ****************************************************************************/
293
+
294
+ static VALUE
295
+ frt_indexout_flush(VALUE self)
296
+ {
297
+ IndexBuffer *my_buf;
298
+ Data_Get_Struct(self, IndexBuffer, my_buf);
299
+
300
+ rb_funcall(self, frt_flush_buffer, 2,
301
+ rb_str_new((char *)my_buf->buffer, BUFFER_SIZE), INT2FIX(my_buf->pos));
302
+
303
+ my_buf->start += my_buf->pos;
304
+ my_buf->pos = 0;
305
+
306
+ return Qnil;
307
+ }
308
+
309
+ static VALUE
310
+ frt_write_byte(VALUE self, byte_t b)
311
+ {
312
+ IndexBuffer *my_buf;
313
+ Data_Get_Struct(self, IndexBuffer, my_buf);
314
+
315
+ my_buf->buffer[my_buf->pos++] = b;
316
+
317
+ if (my_buf->pos >= BUFFER_SIZE)
318
+ frt_indexout_flush(self);
319
+ return Qnil;
320
+ }
321
+
322
+ static VALUE
323
+ frt_indexout_write_byte(VALUE self, VALUE rbyte)
324
+ {
325
+ byte_t b = (byte_t)FIX2INT(rbyte);
326
+ frt_write_byte(self, b);
327
+ return Qnil;
328
+ }
329
+
330
+ void
331
+ frt_write_bytes(VALUE self, byte_t *buf, int len)
332
+ {
333
+ int i;
334
+ for (i = 0; i < len; i++)
335
+ frt_write_byte(self, buf[i]);
336
+ }
337
+
338
+ static VALUE
339
+ frt_indexout_write_bytes(VALUE self, VALUE rbuffer, VALUE rlen)
340
+ {
341
+ int len = FIX2INT(rlen);
342
+ int i;
343
+ VALUE rbuf = StringValue(rbuffer);
344
+
345
+ for (i = 0; i < len; i++)
346
+ frt_write_byte(self, RSTRING(rbuf)->ptr[i]);
347
+
348
+ return Qnil;
349
+ }
350
+
351
+ static VALUE
352
+ frt_indexout_pos(VALUE self)
353
+ {
354
+ IndexBuffer *my_buf;
355
+ Data_Get_Struct(self, IndexBuffer, my_buf);
356
+ return INT2FIX(my_buf->start + my_buf->pos);
357
+ }
358
+
359
+ static VALUE
360
+ frt_indexout_seek(VALUE self, VALUE pos)
361
+ {
362
+ IndexBuffer *my_buf;
363
+ Data_Get_Struct(self, IndexBuffer, my_buf);
364
+
365
+ frt_indexout_flush(self);
366
+ my_buf->start = FIX2INT(pos);
367
+
368
+ return Qnil;
369
+ }
370
+
371
+ static VALUE
372
+ frt_indexout_write_int(VALUE self, VALUE rint)
373
+ {
374
+ long l = NUM2LONG(rint);
375
+ frt_write_byte(self, (l >> 24) & 0xFF);
376
+ frt_write_byte(self, (l >> 16) & 0xFF);
377
+ frt_write_byte(self, (l >> 8) & 0xFF);
378
+ frt_write_byte(self, l & 0xFF);
379
+
380
+ return Qnil;
381
+ }
382
+
383
+ static VALUE
384
+ frt_indexout_write_long(VALUE self, VALUE rlong)
385
+ {
386
+ long long l = NUM2LL(rlong);
387
+ frt_write_byte(self, (l >> 56) & 0xFF);
388
+ frt_write_byte(self, (l >> 48) & 0xFF);
389
+ frt_write_byte(self, (l >> 40) & 0xFF);
390
+ frt_write_byte(self, (l >> 32) & 0xFF);
391
+ frt_write_byte(self, (l >> 24) & 0xFF);
392
+ frt_write_byte(self, (l >> 16) & 0xFF);
393
+ frt_write_byte(self, (l >> 8) & 0xFF);
394
+ frt_write_byte(self, l & 0xFF);
395
+
396
+ return Qnil;
397
+ }
398
+
399
+ static VALUE
400
+ frt_indexout_write_uint(VALUE self, VALUE ruint)
401
+ {
402
+ unsigned long l = NUM2ULONG(ruint);
403
+ frt_write_byte(self, (l >> 24) & 0xFF);
404
+ frt_write_byte(self, (l >> 16) & 0xFF);
405
+ frt_write_byte(self, (l >> 8) & 0xFF);
406
+ frt_write_byte(self, l & 0xFF);
407
+
408
+ return Qnil;
409
+ }
410
+
411
+ static VALUE
412
+ frt_indexout_write_ulong(VALUE self, VALUE rulong)
413
+ {
414
+ unsigned long long l;
415
+ l = rb_num2ull(rulong); // ruby 1.8 doesn't have NUM2ULL. Added in 1.9
416
+ frt_write_byte(self, (l >> 56) & 0xFF);
417
+ frt_write_byte(self, (l >> 48) & 0xFF);
418
+ frt_write_byte(self, (l >> 40) & 0xFF);
419
+ frt_write_byte(self, (l >> 32) & 0xFF);
420
+ frt_write_byte(self, (l >> 24) & 0xFF);
421
+ frt_write_byte(self, (l >> 16) & 0xFF);
422
+ frt_write_byte(self, (l >> 8) & 0xFF);
423
+ frt_write_byte(self, l & 0xFF);
424
+
425
+ return Qnil;
426
+ }
427
+
428
+ static VALUE
429
+ frt_write_vint(VALUE self, register unsigned long long i)
430
+ {
431
+ while (i > 127) {
432
+ frt_write_byte(self, (i & 0x7f) | 0x80);
433
+ i >>= 7;
434
+ }
435
+ frt_write_byte(self, i);
436
+
437
+ return Qnil;
438
+ }
439
+
440
+ static VALUE
441
+ frt_indexout_write_vint(VALUE self, VALUE rulong)
442
+ {
443
+ register unsigned long long i = rb_num2ull(rulong);
444
+
445
+ while (i > 127) {
446
+ frt_write_byte(self, (i & 0x7f) | 0x80);
447
+ i >>= 7;
448
+ }
449
+ frt_write_byte(self, i);
450
+
451
+ return Qnil;
452
+ }
453
+
454
+ static VALUE
455
+ frt_write_chars(VALUE self, VALUE rbuf, int start, int length)
456
+ {
457
+ int i;
458
+ VALUE rstr = StringValue(rbuf);
459
+
460
+ for (i = start; i < start + length; i++) {
461
+ frt_write_byte(self, RSTRING(rstr)->ptr[i]);
462
+ }
463
+
464
+ return Qnil;
465
+ }
466
+
467
+ static VALUE
468
+ frt_indexout_write_chars(VALUE self, VALUE rstr, VALUE rstart, VALUE rlength)
469
+ {
470
+ int start = FIX2INT(rstart);
471
+ int length = FIX2INT(rlength);
472
+
473
+ return frt_write_chars(self, rstr, start, length);
474
+ }
475
+
476
+ static VALUE
477
+ frt_indexout_write_string(VALUE self, VALUE rstr)
478
+ {
479
+ int len = RSTRING(StringValue(rstr))->len;
480
+ frt_write_vint(self, len);
481
+
482
+ frt_write_chars(self, rstr, 0, len);
483
+ return Qnil;
484
+ }
485
+
486
+ /****************************************************************************
487
+ *
488
+ * Init Function
489
+ *
490
+ ****************************************************************************/
491
+
492
+ void
493
+ Init_indexio(void)
494
+ {
495
+ // IDs
496
+ frt_length = rb_intern("length");
497
+ frt_flush_buffer = rb_intern("flush_buffer");
498
+ frt_read_internal = rb_intern("read_internal");
499
+ frt_seek_internal = rb_intern("seek_internal");
500
+
501
+ // IndexInput
502
+ cIndexIn = rb_define_class_under(mStore, "IndexInput", rb_cObject);
503
+ cBufferedIndexIn = rb_define_class_under(mStore, "BufferedIndexInput", cIndexIn);
504
+ rb_define_alloc_func(cBufferedIndexIn, frt_indexbuffer_alloc);
505
+
506
+ rb_define_method(cBufferedIndexIn, "initialize_copy", frt_indexin_init_copy, 1);
507
+ rb_define_method(cBufferedIndexIn, "refill", frt_indexin_refill, 0);
508
+ rb_define_method(cBufferedIndexIn, "read_byte", frt_indexin_read_byte, 0);
509
+ rb_define_method(cBufferedIndexIn, "read_bytes", frt_indexin_read_bytes, 3);
510
+ rb_define_method(cBufferedIndexIn, "pos", frt_indexin_pos, 0);
511
+ rb_define_method(cBufferedIndexIn, "seek", frt_indexin_seek, 1);
512
+ rb_define_method(cBufferedIndexIn, "read_int", frt_indexin_read_int, 0);
513
+ rb_define_method(cBufferedIndexIn, "read_long", frt_indexin_read_long, 0);
514
+ rb_define_method(cBufferedIndexIn, "read_uint", frt_indexin_read_uint, 0);
515
+ rb_define_method(cBufferedIndexIn, "read_ulong", frt_indexin_read_ulong, 0);
516
+ rb_define_method(cBufferedIndexIn, "read_vint", frt_indexin_read_vint, 0);
517
+ rb_define_method(cBufferedIndexIn, "read_vlong", frt_indexin_read_vint, 0);
518
+ rb_define_method(cBufferedIndexIn, "read_string", frt_indexin_read_string, 0);
519
+ rb_define_method(cBufferedIndexIn, "read_chars", frt_indexin_read_bytes, 3);
520
+
521
+ // IndexOutput
522
+ cIndexOut = rb_define_class_under(mStore, "IndexOutput", rb_cObject);
523
+ cBufferedIndexOut = rb_define_class_under(mStore, "BufferedIndexOutput", cIndexOut);
524
+ rb_define_alloc_func(cBufferedIndexOut, frt_indexbuffer_alloc);
525
+
526
+ rb_define_method(cBufferedIndexOut, "write_byte", frt_indexout_write_byte, 1);
527
+ rb_define_method(cBufferedIndexOut, "write_bytes", frt_indexout_write_bytes, 2);
528
+ rb_define_method(cBufferedIndexOut, "flush", frt_indexout_flush, 0);
529
+ rb_define_method(cBufferedIndexOut, "close", frt_indexout_flush, 0);
530
+ rb_define_method(cBufferedIndexOut, "pos", frt_indexout_pos, 0);
531
+ rb_define_method(cBufferedIndexOut, "seek", frt_indexout_seek, 1);
532
+ rb_define_method(cBufferedIndexOut, "write_int", frt_indexout_write_int, 1);
533
+ rb_define_method(cBufferedIndexOut, "write_long", frt_indexout_write_long, 1);
534
+ rb_define_method(cBufferedIndexOut, "write_uint", frt_indexout_write_uint, 1);
535
+ rb_define_method(cBufferedIndexOut, "write_ulong", frt_indexout_write_ulong, 1);
536
+ rb_define_method(cBufferedIndexOut, "write_vint", frt_indexout_write_vint, 1);
537
+ rb_define_method(cBufferedIndexOut, "write_vlong", frt_indexout_write_vint, 1);
538
+ rb_define_method(cBufferedIndexOut, "write_chars", frt_indexout_write_chars, 3);
539
+ rb_define_method(cBufferedIndexOut, "write_string", frt_indexout_write_string, 1);
540
+
541
+ // FSIndexInput
542
+ //cFSIndexIn = rb_define_class_under(mStore, "FSIndexInput", cBufferedIndexIn);
543
+ }