ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,3 @@
1
+ # extconf.rb for Ferret extensions
2
+ require 'mkmf'
3
+ create_makefile("ferret_ext")
@@ -0,0 +1,23 @@
1
+ #include "ferret.h"
2
+
3
+ void
4
+ Init_ferret_ext(void)
5
+ {
6
+ // IDs
7
+ frt_newobj = rb_intern("new");
8
+
9
+ // Modules
10
+ mFerret = rb_define_module("Ferret");
11
+ mStore = rb_define_module_under(mFerret, "Store");
12
+ mIndex = rb_define_module_under(mFerret, "Index");
13
+ mUtils = rb_define_module_under(mFerret, "Utils");
14
+
15
+ // Inits
16
+ Init_indexio();
17
+ Init_term();
18
+ Init_term_buffer();
19
+ Init_priority_queue();
20
+ Init_segment_merge_queue();
21
+ Init_ram_directory();
22
+ Init_string_helper();
23
+ }
@@ -0,0 +1,85 @@
1
+ #ifndef __FERRET_H_
2
+ #define __FERRET_H_
3
+
4
+ #include <ruby.h>
5
+
6
+ #define BUFFER_SIZE 1024
7
+
8
+ typedef unsigned char byte_t;
9
+
10
+ typedef struct IndexBuffer {
11
+ long start;
12
+ int len;
13
+ int pos;
14
+ byte_t *buffer;
15
+ } IndexBuffer;
16
+
17
+ typedef struct Term {
18
+ char *field;
19
+ char *text;
20
+ int flen;
21
+ int tlen;
22
+ } Term;
23
+
24
+ typedef struct PriorityQueue {
25
+ VALUE *heap;
26
+ int len;
27
+ int size;
28
+ } PriorityQueue;
29
+
30
+ typedef struct TermBuffer {
31
+ char *field;
32
+ char *text;
33
+ int flen;
34
+ int tlen;
35
+ } TermBuffer;
36
+
37
+ typedef struct RAMFile {
38
+ void **buffers;
39
+ int bufcnt;
40
+ VALUE mtime;
41
+ char *name;
42
+ int length;
43
+ } RAMFile;
44
+
45
+ // IDs
46
+ ID frt_newobj;
47
+
48
+ // Modules
49
+ VALUE mFerret;
50
+ VALUE mStore;
51
+ VALUE mIndex;
52
+ VALUE mUtils;
53
+ VALUE mStringHelper;
54
+
55
+ // Classes
56
+ VALUE cRAMDirectory;
57
+ VALUE cIndexIn;
58
+ VALUE cBufferedIndexIn;
59
+ VALUE cFSIndexIn;
60
+ VALUE cIndexOut;
61
+ VALUE cBufferedIndexOut;
62
+ VALUE cFSIndexOut;
63
+ VALUE cRAMIndexOut;
64
+ VALUE cRAMIndexIn;
65
+ VALUE cTerm;
66
+ VALUE cTermBuffer;
67
+ VALUE cPriorityQueue;
68
+ VALUE cSegmentMergeQueue;
69
+
70
+ // Ferret Inits
71
+ extern void Init_indexio();
72
+ extern void Init_term();
73
+ extern void Init_priority_queue();
74
+ extern void Init_term_buffer();
75
+ extern void Init_segment_merge_queue();
76
+ extern void Init_ram_directory();
77
+ extern void Init_string_helper();
78
+
79
+ // External functions
80
+ extern int frt_hash(register char *p, register int len);
81
+ extern unsigned long long frt_read_vint(VALUE self);
82
+ extern void frt_read_chars(VALUE self, char *buf, int offset, int len);
83
+ extern void frt_write_bytes(VALUE self, byte_t *buf, int len);
84
+ extern int frt_term_compare_to_int(VALUE self, VALUE rother);
85
+ #endif
@@ -0,0 +1,543 @@
1
+ #include "ferret.h"
2
+
3
+ ID frt_length, frt_flush_buffer, frt_read_internal, frt_seek_internal;
4
+
5
+ /****************************************************************************
6
+ *
7
+ * BufferIndexInput Methods
8
+ *
9
+ ****************************************************************************/
10
+
11
+ void
12
+ frt_indexbuffer_free(void *p)
13
+ {
14
+ IndexBuffer *my_buf = (IndexBuffer *)p;
15
+ free((void *)my_buf->buffer);
16
+ free(p);
17
+ }
18
+
19
+ static VALUE
20
+ frt_indexbuffer_alloc(VALUE klass)
21
+ {
22
+ byte_t *buffer;
23
+ IndexBuffer *my_buf;
24
+
25
+ my_buf = (IndexBuffer *)ALLOC(IndexBuffer);
26
+ buffer = (byte_t *)ALLOC_N(byte_t, BUFFER_SIZE);
27
+
28
+ my_buf->start = 0;
29
+ my_buf->pos = 0;
30
+ my_buf->len = 0;
31
+ my_buf->buffer = buffer;
32
+
33
+ return Data_Wrap_Struct(klass, NULL, frt_indexbuffer_free, my_buf);
34
+ }
35
+
36
+ static VALUE
37
+ frt_indexin_init_copy(VALUE self, VALUE orig)
38
+ {
39
+ IndexBuffer *orig_buf;
40
+ IndexBuffer *my_buf;
41
+ int len;
42
+ if (self == orig)
43
+ return self;
44
+
45
+ Data_Get_Struct(self, IndexBuffer, my_buf);
46
+ Data_Get_Struct(orig, IndexBuffer, orig_buf);
47
+
48
+ len = orig_buf->len;
49
+ my_buf->len = len;
50
+ my_buf->pos = orig_buf->pos;
51
+ my_buf->len = orig_buf->len;
52
+ my_buf->start = orig_buf->start;
53
+
54
+ MEMCPY(my_buf->buffer, orig_buf->buffer, byte_t, len);
55
+
56
+ return self;
57
+ }
58
+
59
+ static VALUE
60
+ frt_indexin_refill(VALUE self)
61
+ {
62
+ IndexBuffer *my_buf;
63
+ long start;
64
+ int stop, len_to_read;
65
+ int input_len = FIX2INT(rb_funcall(self, frt_length, 0, NULL));
66
+
67
+ Data_Get_Struct(self, IndexBuffer, my_buf);
68
+
69
+ start = my_buf->start + my_buf->pos;
70
+ stop = start + BUFFER_SIZE;
71
+ if (stop > input_len) {
72
+ stop = input_len;
73
+ }
74
+
75
+ len_to_read = stop - start;
76
+ if (len_to_read <= 0) {
77
+ rb_raise(rb_eEOFError, "IndexInput: Read past End of File");
78
+ }
79
+
80
+ VALUE rStr = rb_str_new((char *)my_buf->buffer, BUFFER_SIZE);
81
+ rb_funcall(self, frt_read_internal, 3,
82
+ rStr, INT2FIX(0), INT2FIX(len_to_read));
83
+
84
+ memcpy(my_buf->buffer, RSTRING(rStr)->ptr, BUFFER_SIZE);
85
+ //my_buf->buffer = StringValuePtr(rStr);
86
+
87
+ my_buf->len = len_to_read;
88
+ my_buf->start = start;
89
+ my_buf->pos = 0;
90
+
91
+ return Qnil;
92
+ }
93
+
94
+ byte_t
95
+ frt_read_byte(VALUE self)
96
+ {
97
+ IndexBuffer *my_buf;
98
+ Data_Get_Struct(self, IndexBuffer, my_buf);
99
+
100
+ if (my_buf->pos >= my_buf->len)
101
+ frt_indexin_refill(self);
102
+
103
+ byte_t res = my_buf->buffer[my_buf->pos++];
104
+ return res;
105
+ }
106
+
107
+ static VALUE
108
+ frt_indexin_read_byte(VALUE self)
109
+ {
110
+ return INT2FIX(frt_read_byte(self));
111
+ }
112
+
113
+ static VALUE
114
+ frt_indexin_pos(VALUE self)
115
+ {
116
+ IndexBuffer *my_buf;
117
+ Data_Get_Struct(self, IndexBuffer, my_buf);
118
+ return INT2FIX(my_buf->start + my_buf->pos);
119
+ }
120
+
121
+ static VALUE
122
+ frt_read_bytes(VALUE self, VALUE rbuffer, int offset, int len)
123
+ {
124
+ int i;
125
+ IndexBuffer *my_buf;
126
+ VALUE rbuf = StringValue(rbuffer);
127
+
128
+ if (RSTRING(rbuf)->len < (offset + len)) {
129
+ rb_str_resize(rbuf, offset + len);
130
+ }
131
+ if ((len + offset) < BUFFER_SIZE) {
132
+ rb_str_modify(rbuf);
133
+ for (i = offset; i < offset + len; i++) {
134
+ RSTRING(rbuf)->ptr[i] = frt_read_byte(self);
135
+ }
136
+ } else {
137
+ VALUE start = frt_indexin_pos(self);
138
+ rb_funcall(self, frt_seek_internal, 1, start);
139
+ rb_funcall(self, frt_read_internal, 3,
140
+ rbuf, INT2FIX(offset), INT2FIX(len));
141
+
142
+ Data_Get_Struct(self, IndexBuffer, my_buf);
143
+
144
+ my_buf->start = my_buf->start + len;
145
+ my_buf->pos = 0;
146
+ my_buf->len = 0; // trigger refill() on read()
147
+ }
148
+
149
+ return rbuf;
150
+ }
151
+
152
+ static VALUE
153
+ frt_indexin_read_bytes(VALUE self, VALUE rbuf, VALUE roffset, VALUE rlen)
154
+ {
155
+ int len, offset;
156
+
157
+ len = FIX2INT(rlen);
158
+ offset = FIX2INT(roffset);
159
+
160
+ return frt_read_bytes(self, rbuf, offset, len);
161
+ }
162
+
163
+ static VALUE
164
+ frt_indexin_seek(VALUE self, VALUE rpos)
165
+ {
166
+ int pos = FIX2INT(rpos);
167
+ IndexBuffer *my_buf;
168
+ Data_Get_Struct(self, IndexBuffer, my_buf);
169
+
170
+ if ((pos >= my_buf->start) && (pos < (my_buf->start + my_buf->len))) {
171
+ my_buf->pos = pos - my_buf->start; // seek within buffer
172
+ } else {
173
+ my_buf->start = pos;
174
+ my_buf->pos = 0;
175
+ my_buf->len = 0; // trigger refill() on read()
176
+ rb_funcall(self, frt_seek_internal, 1, rpos);
177
+ }
178
+ return Qnil;
179
+ }
180
+
181
+ static VALUE
182
+ frt_indexin_read_int(VALUE self)
183
+ {
184
+ return LONG2NUM(((long)frt_read_byte(self) << 24) |
185
+ ((long)frt_read_byte(self) << 16) |
186
+ ((long)frt_read_byte(self) << 8) |
187
+ (long)frt_read_byte(self));
188
+ }
189
+
190
+ static VALUE
191
+ frt_indexin_read_long(VALUE self)
192
+ {
193
+ return LL2NUM(((long long)frt_read_byte(self) << 56) |
194
+ ((long long)frt_read_byte(self) << 48) |
195
+ ((long long)frt_read_byte(self) << 40) |
196
+ ((long long)frt_read_byte(self) << 32) |
197
+ ((long long)frt_read_byte(self) << 24) |
198
+ ((long long)frt_read_byte(self) << 16) |
199
+ ((long long)frt_read_byte(self) << 8) |
200
+ (long long)frt_read_byte(self));
201
+ }
202
+
203
+ static VALUE
204
+ frt_indexin_read_uint(VALUE self)
205
+ {
206
+ return ULONG2NUM(((unsigned long)frt_read_byte(self) << 24) |
207
+ ((unsigned long)frt_read_byte(self) << 16) |
208
+ ((unsigned long)frt_read_byte(self) << 8) |
209
+ (unsigned long)frt_read_byte(self));
210
+ }
211
+
212
+ static VALUE
213
+ frt_indexin_read_ulong(VALUE self)
214
+ {
215
+ return ULL2NUM(((unsigned long long)frt_read_byte(self) << 56) |
216
+ ((unsigned long long)frt_read_byte(self) << 48) |
217
+ ((unsigned long long)frt_read_byte(self) << 40) |
218
+ ((unsigned long long)frt_read_byte(self) << 32) |
219
+ ((unsigned long long)frt_read_byte(self) << 24) |
220
+ ((unsigned long long)frt_read_byte(self) << 16) |
221
+ ((unsigned long long)frt_read_byte(self) << 8) |
222
+ (unsigned long long)frt_read_byte(self));
223
+ }
224
+
225
+ unsigned long long
226
+ frt_read_vint(VALUE self)
227
+ {
228
+ register unsigned long long i, b;
229
+ register int shift = 7;
230
+
231
+ b = frt_read_byte(self);
232
+ i = b & 0x7F; // 0x7F = 0b01111111
233
+
234
+ while ((b & 0x80) != 0) {// 0x80 = 0b10000000
235
+ b = frt_read_byte(self);
236
+ i |= (b & 0x7F) << shift;
237
+ shift += 7;
238
+ }
239
+
240
+ return i;
241
+ }
242
+
243
+ static VALUE
244
+ frt_indexin_read_vint(VALUE self)
245
+ {
246
+ return ULL2NUM(frt_read_vint(self));
247
+ }
248
+
249
+ void
250
+ frt_read_chars(VALUE self, char* buffer, int off, int len)
251
+ {
252
+ //byte_t b, b1, b2;
253
+ int end, i;
254
+
255
+ end = off + len;
256
+
257
+ for(i = off; i < end; i++) {
258
+ buffer[i] = frt_read_byte(self);
259
+ }
260
+ // for(i = off; i < end; i++){
261
+ // b = frt_read_byte(self);
262
+ // if((b & 0x80) == 0){
263
+ // buffer[i] = (char)(b & 0x7F);
264
+ // } else {
265
+ // if((b & 0xE0) != 0xE0){
266
+ // b1 = frt_read_byte(self);
267
+ // buffer[i] = (char)(((b & 0x1F) << 6) | (b1 & 0x3F));
268
+ // } else{
269
+ // b1 = frt_read_byte(self);
270
+ // b2 = frt_read_byte(self);
271
+ // buffer[i] = (char)(((b & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F));
272
+ // }
273
+ // }
274
+ // }
275
+ }
276
+
277
+ static VALUE
278
+ frt_indexin_read_string(VALUE self)
279
+ {
280
+ int length = (int)frt_read_vint(self);
281
+ char *str = (char *)ALLOC_N(char, length);
282
+
283
+ frt_read_chars(self, str, 0, length);
284
+
285
+ return rb_str_new(str, length);
286
+ }
287
+
288
+ /****************************************************************************
289
+ *
290
+ * BufferIndexInput Methods
291
+ *
292
+ ****************************************************************************/
293
+
294
+ static VALUE
295
+ frt_indexout_flush(VALUE self)
296
+ {
297
+ IndexBuffer *my_buf;
298
+ Data_Get_Struct(self, IndexBuffer, my_buf);
299
+
300
+ rb_funcall(self, frt_flush_buffer, 2,
301
+ rb_str_new((char *)my_buf->buffer, BUFFER_SIZE), INT2FIX(my_buf->pos));
302
+
303
+ my_buf->start += my_buf->pos;
304
+ my_buf->pos = 0;
305
+
306
+ return Qnil;
307
+ }
308
+
309
+ static VALUE
310
+ frt_write_byte(VALUE self, byte_t b)
311
+ {
312
+ IndexBuffer *my_buf;
313
+ Data_Get_Struct(self, IndexBuffer, my_buf);
314
+
315
+ my_buf->buffer[my_buf->pos++] = b;
316
+
317
+ if (my_buf->pos >= BUFFER_SIZE)
318
+ frt_indexout_flush(self);
319
+ return Qnil;
320
+ }
321
+
322
+ static VALUE
323
+ frt_indexout_write_byte(VALUE self, VALUE rbyte)
324
+ {
325
+ byte_t b = (byte_t)FIX2INT(rbyte);
326
+ frt_write_byte(self, b);
327
+ return Qnil;
328
+ }
329
+
330
+ void
331
+ frt_write_bytes(VALUE self, byte_t *buf, int len)
332
+ {
333
+ int i;
334
+ for (i = 0; i < len; i++)
335
+ frt_write_byte(self, buf[i]);
336
+ }
337
+
338
+ static VALUE
339
+ frt_indexout_write_bytes(VALUE self, VALUE rbuffer, VALUE rlen)
340
+ {
341
+ int len = FIX2INT(rlen);
342
+ int i;
343
+ VALUE rbuf = StringValue(rbuffer);
344
+
345
+ for (i = 0; i < len; i++)
346
+ frt_write_byte(self, RSTRING(rbuf)->ptr[i]);
347
+
348
+ return Qnil;
349
+ }
350
+
351
+ static VALUE
352
+ frt_indexout_pos(VALUE self)
353
+ {
354
+ IndexBuffer *my_buf;
355
+ Data_Get_Struct(self, IndexBuffer, my_buf);
356
+ return INT2FIX(my_buf->start + my_buf->pos);
357
+ }
358
+
359
+ static VALUE
360
+ frt_indexout_seek(VALUE self, VALUE pos)
361
+ {
362
+ IndexBuffer *my_buf;
363
+ Data_Get_Struct(self, IndexBuffer, my_buf);
364
+
365
+ frt_indexout_flush(self);
366
+ my_buf->start = FIX2INT(pos);
367
+
368
+ return Qnil;
369
+ }
370
+
371
+ static VALUE
372
+ frt_indexout_write_int(VALUE self, VALUE rint)
373
+ {
374
+ long l = NUM2LONG(rint);
375
+ frt_write_byte(self, (l >> 24) & 0xFF);
376
+ frt_write_byte(self, (l >> 16) & 0xFF);
377
+ frt_write_byte(self, (l >> 8) & 0xFF);
378
+ frt_write_byte(self, l & 0xFF);
379
+
380
+ return Qnil;
381
+ }
382
+
383
+ static VALUE
384
+ frt_indexout_write_long(VALUE self, VALUE rlong)
385
+ {
386
+ long long l = NUM2LL(rlong);
387
+ frt_write_byte(self, (l >> 56) & 0xFF);
388
+ frt_write_byte(self, (l >> 48) & 0xFF);
389
+ frt_write_byte(self, (l >> 40) & 0xFF);
390
+ frt_write_byte(self, (l >> 32) & 0xFF);
391
+ frt_write_byte(self, (l >> 24) & 0xFF);
392
+ frt_write_byte(self, (l >> 16) & 0xFF);
393
+ frt_write_byte(self, (l >> 8) & 0xFF);
394
+ frt_write_byte(self, l & 0xFF);
395
+
396
+ return Qnil;
397
+ }
398
+
399
+ static VALUE
400
+ frt_indexout_write_uint(VALUE self, VALUE ruint)
401
+ {
402
+ unsigned long l = NUM2ULONG(ruint);
403
+ frt_write_byte(self, (l >> 24) & 0xFF);
404
+ frt_write_byte(self, (l >> 16) & 0xFF);
405
+ frt_write_byte(self, (l >> 8) & 0xFF);
406
+ frt_write_byte(self, l & 0xFF);
407
+
408
+ return Qnil;
409
+ }
410
+
411
+ static VALUE
412
+ frt_indexout_write_ulong(VALUE self, VALUE rulong)
413
+ {
414
+ unsigned long long l;
415
+ l = rb_num2ull(rulong); // ruby 1.8 doesn't have NUM2ULL. Added in 1.9
416
+ frt_write_byte(self, (l >> 56) & 0xFF);
417
+ frt_write_byte(self, (l >> 48) & 0xFF);
418
+ frt_write_byte(self, (l >> 40) & 0xFF);
419
+ frt_write_byte(self, (l >> 32) & 0xFF);
420
+ frt_write_byte(self, (l >> 24) & 0xFF);
421
+ frt_write_byte(self, (l >> 16) & 0xFF);
422
+ frt_write_byte(self, (l >> 8) & 0xFF);
423
+ frt_write_byte(self, l & 0xFF);
424
+
425
+ return Qnil;
426
+ }
427
+
428
+ static VALUE
429
+ frt_write_vint(VALUE self, register unsigned long long i)
430
+ {
431
+ while (i > 127) {
432
+ frt_write_byte(self, (i & 0x7f) | 0x80);
433
+ i >>= 7;
434
+ }
435
+ frt_write_byte(self, i);
436
+
437
+ return Qnil;
438
+ }
439
+
440
+ static VALUE
441
+ frt_indexout_write_vint(VALUE self, VALUE rulong)
442
+ {
443
+ register unsigned long long i = rb_num2ull(rulong);
444
+
445
+ while (i > 127) {
446
+ frt_write_byte(self, (i & 0x7f) | 0x80);
447
+ i >>= 7;
448
+ }
449
+ frt_write_byte(self, i);
450
+
451
+ return Qnil;
452
+ }
453
+
454
+ static VALUE
455
+ frt_write_chars(VALUE self, VALUE rbuf, int start, int length)
456
+ {
457
+ int i;
458
+ VALUE rstr = StringValue(rbuf);
459
+
460
+ for (i = start; i < start + length; i++) {
461
+ frt_write_byte(self, RSTRING(rstr)->ptr[i]);
462
+ }
463
+
464
+ return Qnil;
465
+ }
466
+
467
+ static VALUE
468
+ frt_indexout_write_chars(VALUE self, VALUE rstr, VALUE rstart, VALUE rlength)
469
+ {
470
+ int start = FIX2INT(rstart);
471
+ int length = FIX2INT(rlength);
472
+
473
+ return frt_write_chars(self, rstr, start, length);
474
+ }
475
+
476
+ static VALUE
477
+ frt_indexout_write_string(VALUE self, VALUE rstr)
478
+ {
479
+ int len = RSTRING(StringValue(rstr))->len;
480
+ frt_write_vint(self, len);
481
+
482
+ frt_write_chars(self, rstr, 0, len);
483
+ return Qnil;
484
+ }
485
+
486
+ /****************************************************************************
487
+ *
488
+ * Init Function
489
+ *
490
+ ****************************************************************************/
491
+
492
+ void
493
+ Init_indexio(void)
494
+ {
495
+ // IDs
496
+ frt_length = rb_intern("length");
497
+ frt_flush_buffer = rb_intern("flush_buffer");
498
+ frt_read_internal = rb_intern("read_internal");
499
+ frt_seek_internal = rb_intern("seek_internal");
500
+
501
+ // IndexInput
502
+ cIndexIn = rb_define_class_under(mStore, "IndexInput", rb_cObject);
503
+ cBufferedIndexIn = rb_define_class_under(mStore, "BufferedIndexInput", cIndexIn);
504
+ rb_define_alloc_func(cBufferedIndexIn, frt_indexbuffer_alloc);
505
+
506
+ rb_define_method(cBufferedIndexIn, "initialize_copy", frt_indexin_init_copy, 1);
507
+ rb_define_method(cBufferedIndexIn, "refill", frt_indexin_refill, 0);
508
+ rb_define_method(cBufferedIndexIn, "read_byte", frt_indexin_read_byte, 0);
509
+ rb_define_method(cBufferedIndexIn, "read_bytes", frt_indexin_read_bytes, 3);
510
+ rb_define_method(cBufferedIndexIn, "pos", frt_indexin_pos, 0);
511
+ rb_define_method(cBufferedIndexIn, "seek", frt_indexin_seek, 1);
512
+ rb_define_method(cBufferedIndexIn, "read_int", frt_indexin_read_int, 0);
513
+ rb_define_method(cBufferedIndexIn, "read_long", frt_indexin_read_long, 0);
514
+ rb_define_method(cBufferedIndexIn, "read_uint", frt_indexin_read_uint, 0);
515
+ rb_define_method(cBufferedIndexIn, "read_ulong", frt_indexin_read_ulong, 0);
516
+ rb_define_method(cBufferedIndexIn, "read_vint", frt_indexin_read_vint, 0);
517
+ rb_define_method(cBufferedIndexIn, "read_vlong", frt_indexin_read_vint, 0);
518
+ rb_define_method(cBufferedIndexIn, "read_string", frt_indexin_read_string, 0);
519
+ rb_define_method(cBufferedIndexIn, "read_chars", frt_indexin_read_bytes, 3);
520
+
521
+ // IndexOutput
522
+ cIndexOut = rb_define_class_under(mStore, "IndexOutput", rb_cObject);
523
+ cBufferedIndexOut = rb_define_class_under(mStore, "BufferedIndexOutput", cIndexOut);
524
+ rb_define_alloc_func(cBufferedIndexOut, frt_indexbuffer_alloc);
525
+
526
+ rb_define_method(cBufferedIndexOut, "write_byte", frt_indexout_write_byte, 1);
527
+ rb_define_method(cBufferedIndexOut, "write_bytes", frt_indexout_write_bytes, 2);
528
+ rb_define_method(cBufferedIndexOut, "flush", frt_indexout_flush, 0);
529
+ rb_define_method(cBufferedIndexOut, "close", frt_indexout_flush, 0);
530
+ rb_define_method(cBufferedIndexOut, "pos", frt_indexout_pos, 0);
531
+ rb_define_method(cBufferedIndexOut, "seek", frt_indexout_seek, 1);
532
+ rb_define_method(cBufferedIndexOut, "write_int", frt_indexout_write_int, 1);
533
+ rb_define_method(cBufferedIndexOut, "write_long", frt_indexout_write_long, 1);
534
+ rb_define_method(cBufferedIndexOut, "write_uint", frt_indexout_write_uint, 1);
535
+ rb_define_method(cBufferedIndexOut, "write_ulong", frt_indexout_write_ulong, 1);
536
+ rb_define_method(cBufferedIndexOut, "write_vint", frt_indexout_write_vint, 1);
537
+ rb_define_method(cBufferedIndexOut, "write_vlong", frt_indexout_write_vint, 1);
538
+ rb_define_method(cBufferedIndexOut, "write_chars", frt_indexout_write_chars, 3);
539
+ rb_define_method(cBufferedIndexOut, "write_string", frt_indexout_write_string, 1);
540
+
541
+ // FSIndexInput
542
+ //cFSIndexIn = rb_define_class_under(mStore, "FSIndexInput", cBufferedIndexIn);
543
+ }