ferret 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +109 -0
- data/Rakefile +275 -0
- data/TODO +9 -0
- data/TUTORIAL +197 -0
- data/ext/extconf.rb +3 -0
- data/ext/ferret.c +23 -0
- data/ext/ferret.h +85 -0
- data/ext/index_io.c +543 -0
- data/ext/priority_queue.c +227 -0
- data/ext/ram_directory.c +316 -0
- data/ext/segment_merge_queue.c +41 -0
- data/ext/string_helper.c +42 -0
- data/ext/tags +240 -0
- data/ext/term.c +261 -0
- data/ext/term_buffer.c +299 -0
- data/ext/util.c +12 -0
- data/lib/ferret.rb +41 -0
- data/lib/ferret/analysis.rb +11 -0
- data/lib/ferret/analysis/analyzers.rb +93 -0
- data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
- data/lib/ferret/analysis/token.rb +79 -0
- data/lib/ferret/analysis/token_filters.rb +86 -0
- data/lib/ferret/analysis/token_stream.rb +26 -0
- data/lib/ferret/analysis/tokenizers.rb +107 -0
- data/lib/ferret/analysis/word_list_loader.rb +27 -0
- data/lib/ferret/document.rb +2 -0
- data/lib/ferret/document/document.rb +152 -0
- data/lib/ferret/document/field.rb +304 -0
- data/lib/ferret/index.rb +26 -0
- data/lib/ferret/index/compound_file_io.rb +343 -0
- data/lib/ferret/index/document_writer.rb +288 -0
- data/lib/ferret/index/field_infos.rb +259 -0
- data/lib/ferret/index/fields_io.rb +175 -0
- data/lib/ferret/index/index.rb +228 -0
- data/lib/ferret/index/index_file_names.rb +33 -0
- data/lib/ferret/index/index_reader.rb +462 -0
- data/lib/ferret/index/index_writer.rb +488 -0
- data/lib/ferret/index/multi_reader.rb +363 -0
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
- data/lib/ferret/index/segment_infos.rb +130 -0
- data/lib/ferret/index/segment_merge_info.rb +47 -0
- data/lib/ferret/index/segment_merge_queue.rb +16 -0
- data/lib/ferret/index/segment_merger.rb +337 -0
- data/lib/ferret/index/segment_reader.rb +380 -0
- data/lib/ferret/index/segment_term_enum.rb +178 -0
- data/lib/ferret/index/segment_term_vector.rb +58 -0
- data/lib/ferret/index/term.rb +49 -0
- data/lib/ferret/index/term_buffer.rb +88 -0
- data/lib/ferret/index/term_doc_enum.rb +283 -0
- data/lib/ferret/index/term_enum.rb +52 -0
- data/lib/ferret/index/term_info.rb +41 -0
- data/lib/ferret/index/term_infos_io.rb +312 -0
- data/lib/ferret/index/term_vector_offset_info.rb +20 -0
- data/lib/ferret/index/term_vectors_io.rb +552 -0
- data/lib/ferret/query_parser.rb +274 -0
- data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
- data/lib/ferret/search.rb +49 -0
- data/lib/ferret/search/boolean_clause.rb +100 -0
- data/lib/ferret/search/boolean_query.rb +303 -0
- data/lib/ferret/search/boolean_scorer.rb +294 -0
- data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
- data/lib/ferret/search/conjunction_scorer.rb +99 -0
- data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
- data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
- data/lib/ferret/search/explanation.rb +41 -0
- data/lib/ferret/search/field_cache.rb +216 -0
- data/lib/ferret/search/field_doc.rb +31 -0
- data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
- data/lib/ferret/search/filter.rb +11 -0
- data/lib/ferret/search/filtered_query.rb +130 -0
- data/lib/ferret/search/filtered_term_enum.rb +79 -0
- data/lib/ferret/search/fuzzy_query.rb +153 -0
- data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
- data/lib/ferret/search/hit_collector.rb +34 -0
- data/lib/ferret/search/hit_queue.rb +11 -0
- data/lib/ferret/search/index_searcher.rb +173 -0
- data/lib/ferret/search/match_all_docs_query.rb +104 -0
- data/lib/ferret/search/multi_phrase_query.rb +204 -0
- data/lib/ferret/search/multi_term_query.rb +65 -0
- data/lib/ferret/search/non_matching_scorer.rb +22 -0
- data/lib/ferret/search/phrase_positions.rb +55 -0
- data/lib/ferret/search/phrase_query.rb +217 -0
- data/lib/ferret/search/phrase_scorer.rb +153 -0
- data/lib/ferret/search/prefix_query.rb +47 -0
- data/lib/ferret/search/query.rb +111 -0
- data/lib/ferret/search/query_filter.rb +51 -0
- data/lib/ferret/search/range_filter.rb +103 -0
- data/lib/ferret/search/range_query.rb +139 -0
- data/lib/ferret/search/req_excl_scorer.rb +125 -0
- data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
- data/lib/ferret/search/score_doc.rb +38 -0
- data/lib/ferret/search/score_doc_comparator.rb +114 -0
- data/lib/ferret/search/scorer.rb +91 -0
- data/lib/ferret/search/similarity.rb +278 -0
- data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
- data/lib/ferret/search/sort.rb +105 -0
- data/lib/ferret/search/sort_comparator.rb +60 -0
- data/lib/ferret/search/sort_field.rb +87 -0
- data/lib/ferret/search/spans.rb +12 -0
- data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
- data/lib/ferret/search/spans/span_first_query.rb +79 -0
- data/lib/ferret/search/spans/span_near_query.rb +108 -0
- data/lib/ferret/search/spans/span_not_query.rb +130 -0
- data/lib/ferret/search/spans/span_or_query.rb +176 -0
- data/lib/ferret/search/spans/span_query.rb +25 -0
- data/lib/ferret/search/spans/span_scorer.rb +74 -0
- data/lib/ferret/search/spans/span_term_query.rb +105 -0
- data/lib/ferret/search/spans/span_weight.rb +84 -0
- data/lib/ferret/search/spans/spans_enum.rb +44 -0
- data/lib/ferret/search/term_query.rb +128 -0
- data/lib/ferret/search/term_scorer.rb +181 -0
- data/lib/ferret/search/top_docs.rb +24 -0
- data/lib/ferret/search/top_field_docs.rb +17 -0
- data/lib/ferret/search/weight.rb +54 -0
- data/lib/ferret/search/wildcard_query.rb +26 -0
- data/lib/ferret/search/wildcard_term_enum.rb +61 -0
- data/lib/ferret/stemmers.rb +1 -0
- data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
- data/lib/ferret/store.rb +5 -0
- data/lib/ferret/store/buffered_index_io.rb +191 -0
- data/lib/ferret/store/directory.rb +139 -0
- data/lib/ferret/store/fs_store.rb +338 -0
- data/lib/ferret/store/index_io.rb +259 -0
- data/lib/ferret/store/ram_store.rb +282 -0
- data/lib/ferret/utils.rb +7 -0
- data/lib/ferret/utils/bit_vector.rb +105 -0
- data/lib/ferret/utils/date_tools.rb +138 -0
- data/lib/ferret/utils/number_tools.rb +91 -0
- data/lib/ferret/utils/parameter.rb +41 -0
- data/lib/ferret/utils/priority_queue.rb +120 -0
- data/lib/ferret/utils/string_helper.rb +47 -0
- data/lib/ferret/utils/weak_key_hash.rb +51 -0
- data/rake_utils/code_statistics.rb +106 -0
- data/setup.rb +1551 -0
- data/test/benchmark/tb_ram_store.rb +76 -0
- data/test/benchmark/tb_rw_vint.rb +26 -0
- data/test/longrunning/tc_numbertools.rb +60 -0
- data/test/longrunning/tm_store.rb +19 -0
- data/test/test_all.rb +9 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/analysis/tc_analyzer.rb +21 -0
- data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
- data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
- data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
- data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
- data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
- data/test/unit/analysis/tc_stop_filter.rb +14 -0
- data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
- data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_word_list_loader.rb +32 -0
- data/test/unit/document/tc_document.rb +47 -0
- data/test/unit/document/tc_field.rb +80 -0
- data/test/unit/index/tc_compound_file_io.rb +107 -0
- data/test/unit/index/tc_field_infos.rb +119 -0
- data/test/unit/index/tc_fields_io.rb +167 -0
- data/test/unit/index/tc_index.rb +140 -0
- data/test/unit/index/tc_index_reader.rb +622 -0
- data/test/unit/index/tc_index_writer.rb +57 -0
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
- data/test/unit/index/tc_segment_infos.rb +74 -0
- data/test/unit/index/tc_segment_term_docs.rb +17 -0
- data/test/unit/index/tc_segment_term_enum.rb +60 -0
- data/test/unit/index/tc_segment_term_vector.rb +71 -0
- data/test/unit/index/tc_term.rb +22 -0
- data/test/unit/index/tc_term_buffer.rb +57 -0
- data/test/unit/index/tc_term_info.rb +19 -0
- data/test/unit/index/tc_term_infos_io.rb +192 -0
- data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
- data/test/unit/index/tc_term_vectors_io.rb +108 -0
- data/test/unit/index/th_doc.rb +244 -0
- data/test/unit/query_parser/tc_query_parser.rb +84 -0
- data/test/unit/search/tc_filter.rb +113 -0
- data/test/unit/search/tc_fuzzy_query.rb +136 -0
- data/test/unit/search/tc_index_searcher.rb +188 -0
- data/test/unit/search/tc_search_and_sort.rb +98 -0
- data/test/unit/search/tc_similarity.rb +37 -0
- data/test/unit/search/tc_sort.rb +48 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +153 -0
- data/test/unit/store/tc_fs_store.rb +84 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +180 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/ts_analysis.rb +16 -0
- data/test/unit/ts_document.rb +4 -0
- data/test/unit/ts_index.rb +18 -0
- data/test/unit/ts_query_parser.rb +3 -0
- data/test/unit/ts_search.rb +10 -0
- data/test/unit/ts_store.rb +6 -0
- data/test/unit/ts_utils.rb +10 -0
- data/test/unit/utils/tc_bit_vector.rb +65 -0
- data/test/unit/utils/tc_date_tools.rb +50 -0
- data/test/unit/utils/tc_number_tools.rb +59 -0
- data/test/unit/utils/tc_parameter.rb +40 -0
- data/test/unit/utils/tc_priority_queue.rb +62 -0
- data/test/unit/utils/tc_string_helper.rb +21 -0
- data/test/unit/utils/tc_weak_key_hash.rb +25 -0
- metadata +251 -0
data/ext/extconf.rb
ADDED
data/ext/ferret.c
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#include "ferret.h"
|
2
|
+
|
3
|
+
void
|
4
|
+
Init_ferret_ext(void)
|
5
|
+
{
|
6
|
+
// IDs
|
7
|
+
frt_newobj = rb_intern("new");
|
8
|
+
|
9
|
+
// Modules
|
10
|
+
mFerret = rb_define_module("Ferret");
|
11
|
+
mStore = rb_define_module_under(mFerret, "Store");
|
12
|
+
mIndex = rb_define_module_under(mFerret, "Index");
|
13
|
+
mUtils = rb_define_module_under(mFerret, "Utils");
|
14
|
+
|
15
|
+
// Inits
|
16
|
+
Init_indexio();
|
17
|
+
Init_term();
|
18
|
+
Init_term_buffer();
|
19
|
+
Init_priority_queue();
|
20
|
+
Init_segment_merge_queue();
|
21
|
+
Init_ram_directory();
|
22
|
+
Init_string_helper();
|
23
|
+
}
|
data/ext/ferret.h
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
#ifndef __FERRET_H_
|
2
|
+
#define __FERRET_H_
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
#define BUFFER_SIZE 1024
|
7
|
+
|
8
|
+
typedef unsigned char byte_t;
|
9
|
+
|
10
|
+
typedef struct IndexBuffer {
|
11
|
+
long start;
|
12
|
+
int len;
|
13
|
+
int pos;
|
14
|
+
byte_t *buffer;
|
15
|
+
} IndexBuffer;
|
16
|
+
|
17
|
+
typedef struct Term {
|
18
|
+
char *field;
|
19
|
+
char *text;
|
20
|
+
int flen;
|
21
|
+
int tlen;
|
22
|
+
} Term;
|
23
|
+
|
24
|
+
typedef struct PriorityQueue {
|
25
|
+
VALUE *heap;
|
26
|
+
int len;
|
27
|
+
int size;
|
28
|
+
} PriorityQueue;
|
29
|
+
|
30
|
+
typedef struct TermBuffer {
|
31
|
+
char *field;
|
32
|
+
char *text;
|
33
|
+
int flen;
|
34
|
+
int tlen;
|
35
|
+
} TermBuffer;
|
36
|
+
|
37
|
+
typedef struct RAMFile {
|
38
|
+
void **buffers;
|
39
|
+
int bufcnt;
|
40
|
+
VALUE mtime;
|
41
|
+
char *name;
|
42
|
+
int length;
|
43
|
+
} RAMFile;
|
44
|
+
|
45
|
+
// IDs
|
46
|
+
ID frt_newobj;
|
47
|
+
|
48
|
+
// Modules
|
49
|
+
VALUE mFerret;
|
50
|
+
VALUE mStore;
|
51
|
+
VALUE mIndex;
|
52
|
+
VALUE mUtils;
|
53
|
+
VALUE mStringHelper;
|
54
|
+
|
55
|
+
// Classes
|
56
|
+
VALUE cRAMDirectory;
|
57
|
+
VALUE cIndexIn;
|
58
|
+
VALUE cBufferedIndexIn;
|
59
|
+
VALUE cFSIndexIn;
|
60
|
+
VALUE cIndexOut;
|
61
|
+
VALUE cBufferedIndexOut;
|
62
|
+
VALUE cFSIndexOut;
|
63
|
+
VALUE cRAMIndexOut;
|
64
|
+
VALUE cRAMIndexIn;
|
65
|
+
VALUE cTerm;
|
66
|
+
VALUE cTermBuffer;
|
67
|
+
VALUE cPriorityQueue;
|
68
|
+
VALUE cSegmentMergeQueue;
|
69
|
+
|
70
|
+
// Ferret Inits
|
71
|
+
extern void Init_indexio();
|
72
|
+
extern void Init_term();
|
73
|
+
extern void Init_priority_queue();
|
74
|
+
extern void Init_term_buffer();
|
75
|
+
extern void Init_segment_merge_queue();
|
76
|
+
extern void Init_ram_directory();
|
77
|
+
extern void Init_string_helper();
|
78
|
+
|
79
|
+
// External functions
|
80
|
+
extern int frt_hash(register char *p, register int len);
|
81
|
+
extern unsigned long long frt_read_vint(VALUE self);
|
82
|
+
extern void frt_read_chars(VALUE self, char *buf, int offset, int len);
|
83
|
+
extern void frt_write_bytes(VALUE self, byte_t *buf, int len);
|
84
|
+
extern int frt_term_compare_to_int(VALUE self, VALUE rother);
|
85
|
+
#endif
|
data/ext/index_io.c
ADDED
@@ -0,0 +1,543 @@
|
|
1
|
+
#include "ferret.h"
|
2
|
+
|
3
|
+
ID frt_length, frt_flush_buffer, frt_read_internal, frt_seek_internal;
|
4
|
+
|
5
|
+
/****************************************************************************
|
6
|
+
*
|
7
|
+
* BufferIndexInput Methods
|
8
|
+
*
|
9
|
+
****************************************************************************/
|
10
|
+
|
11
|
+
void
|
12
|
+
frt_indexbuffer_free(void *p)
|
13
|
+
{
|
14
|
+
IndexBuffer *my_buf = (IndexBuffer *)p;
|
15
|
+
free((void *)my_buf->buffer);
|
16
|
+
free(p);
|
17
|
+
}
|
18
|
+
|
19
|
+
static VALUE
|
20
|
+
frt_indexbuffer_alloc(VALUE klass)
|
21
|
+
{
|
22
|
+
byte_t *buffer;
|
23
|
+
IndexBuffer *my_buf;
|
24
|
+
|
25
|
+
my_buf = (IndexBuffer *)ALLOC(IndexBuffer);
|
26
|
+
buffer = (byte_t *)ALLOC_N(byte_t, BUFFER_SIZE);
|
27
|
+
|
28
|
+
my_buf->start = 0;
|
29
|
+
my_buf->pos = 0;
|
30
|
+
my_buf->len = 0;
|
31
|
+
my_buf->buffer = buffer;
|
32
|
+
|
33
|
+
return Data_Wrap_Struct(klass, NULL, frt_indexbuffer_free, my_buf);
|
34
|
+
}
|
35
|
+
|
36
|
+
static VALUE
|
37
|
+
frt_indexin_init_copy(VALUE self, VALUE orig)
|
38
|
+
{
|
39
|
+
IndexBuffer *orig_buf;
|
40
|
+
IndexBuffer *my_buf;
|
41
|
+
int len;
|
42
|
+
if (self == orig)
|
43
|
+
return self;
|
44
|
+
|
45
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
46
|
+
Data_Get_Struct(orig, IndexBuffer, orig_buf);
|
47
|
+
|
48
|
+
len = orig_buf->len;
|
49
|
+
my_buf->len = len;
|
50
|
+
my_buf->pos = orig_buf->pos;
|
51
|
+
my_buf->len = orig_buf->len;
|
52
|
+
my_buf->start = orig_buf->start;
|
53
|
+
|
54
|
+
MEMCPY(my_buf->buffer, orig_buf->buffer, byte_t, len);
|
55
|
+
|
56
|
+
return self;
|
57
|
+
}
|
58
|
+
|
59
|
+
static VALUE
|
60
|
+
frt_indexin_refill(VALUE self)
|
61
|
+
{
|
62
|
+
IndexBuffer *my_buf;
|
63
|
+
long start;
|
64
|
+
int stop, len_to_read;
|
65
|
+
int input_len = FIX2INT(rb_funcall(self, frt_length, 0, NULL));
|
66
|
+
|
67
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
68
|
+
|
69
|
+
start = my_buf->start + my_buf->pos;
|
70
|
+
stop = start + BUFFER_SIZE;
|
71
|
+
if (stop > input_len) {
|
72
|
+
stop = input_len;
|
73
|
+
}
|
74
|
+
|
75
|
+
len_to_read = stop - start;
|
76
|
+
if (len_to_read <= 0) {
|
77
|
+
rb_raise(rb_eEOFError, "IndexInput: Read past End of File");
|
78
|
+
}
|
79
|
+
|
80
|
+
VALUE rStr = rb_str_new((char *)my_buf->buffer, BUFFER_SIZE);
|
81
|
+
rb_funcall(self, frt_read_internal, 3,
|
82
|
+
rStr, INT2FIX(0), INT2FIX(len_to_read));
|
83
|
+
|
84
|
+
memcpy(my_buf->buffer, RSTRING(rStr)->ptr, BUFFER_SIZE);
|
85
|
+
//my_buf->buffer = StringValuePtr(rStr);
|
86
|
+
|
87
|
+
my_buf->len = len_to_read;
|
88
|
+
my_buf->start = start;
|
89
|
+
my_buf->pos = 0;
|
90
|
+
|
91
|
+
return Qnil;
|
92
|
+
}
|
93
|
+
|
94
|
+
byte_t
|
95
|
+
frt_read_byte(VALUE self)
|
96
|
+
{
|
97
|
+
IndexBuffer *my_buf;
|
98
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
99
|
+
|
100
|
+
if (my_buf->pos >= my_buf->len)
|
101
|
+
frt_indexin_refill(self);
|
102
|
+
|
103
|
+
byte_t res = my_buf->buffer[my_buf->pos++];
|
104
|
+
return res;
|
105
|
+
}
|
106
|
+
|
107
|
+
static VALUE
|
108
|
+
frt_indexin_read_byte(VALUE self)
|
109
|
+
{
|
110
|
+
return INT2FIX(frt_read_byte(self));
|
111
|
+
}
|
112
|
+
|
113
|
+
static VALUE
|
114
|
+
frt_indexin_pos(VALUE self)
|
115
|
+
{
|
116
|
+
IndexBuffer *my_buf;
|
117
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
118
|
+
return INT2FIX(my_buf->start + my_buf->pos);
|
119
|
+
}
|
120
|
+
|
121
|
+
static VALUE
|
122
|
+
frt_read_bytes(VALUE self, VALUE rbuffer, int offset, int len)
|
123
|
+
{
|
124
|
+
int i;
|
125
|
+
IndexBuffer *my_buf;
|
126
|
+
VALUE rbuf = StringValue(rbuffer);
|
127
|
+
|
128
|
+
if (RSTRING(rbuf)->len < (offset + len)) {
|
129
|
+
rb_str_resize(rbuf, offset + len);
|
130
|
+
}
|
131
|
+
if ((len + offset) < BUFFER_SIZE) {
|
132
|
+
rb_str_modify(rbuf);
|
133
|
+
for (i = offset; i < offset + len; i++) {
|
134
|
+
RSTRING(rbuf)->ptr[i] = frt_read_byte(self);
|
135
|
+
}
|
136
|
+
} else {
|
137
|
+
VALUE start = frt_indexin_pos(self);
|
138
|
+
rb_funcall(self, frt_seek_internal, 1, start);
|
139
|
+
rb_funcall(self, frt_read_internal, 3,
|
140
|
+
rbuf, INT2FIX(offset), INT2FIX(len));
|
141
|
+
|
142
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
143
|
+
|
144
|
+
my_buf->start = my_buf->start + len;
|
145
|
+
my_buf->pos = 0;
|
146
|
+
my_buf->len = 0; // trigger refill() on read()
|
147
|
+
}
|
148
|
+
|
149
|
+
return rbuf;
|
150
|
+
}
|
151
|
+
|
152
|
+
static VALUE
|
153
|
+
frt_indexin_read_bytes(VALUE self, VALUE rbuf, VALUE roffset, VALUE rlen)
|
154
|
+
{
|
155
|
+
int len, offset;
|
156
|
+
|
157
|
+
len = FIX2INT(rlen);
|
158
|
+
offset = FIX2INT(roffset);
|
159
|
+
|
160
|
+
return frt_read_bytes(self, rbuf, offset, len);
|
161
|
+
}
|
162
|
+
|
163
|
+
static VALUE
|
164
|
+
frt_indexin_seek(VALUE self, VALUE rpos)
|
165
|
+
{
|
166
|
+
int pos = FIX2INT(rpos);
|
167
|
+
IndexBuffer *my_buf;
|
168
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
169
|
+
|
170
|
+
if ((pos >= my_buf->start) && (pos < (my_buf->start + my_buf->len))) {
|
171
|
+
my_buf->pos = pos - my_buf->start; // seek within buffer
|
172
|
+
} else {
|
173
|
+
my_buf->start = pos;
|
174
|
+
my_buf->pos = 0;
|
175
|
+
my_buf->len = 0; // trigger refill() on read()
|
176
|
+
rb_funcall(self, frt_seek_internal, 1, rpos);
|
177
|
+
}
|
178
|
+
return Qnil;
|
179
|
+
}
|
180
|
+
|
181
|
+
static VALUE
|
182
|
+
frt_indexin_read_int(VALUE self)
|
183
|
+
{
|
184
|
+
return LONG2NUM(((long)frt_read_byte(self) << 24) |
|
185
|
+
((long)frt_read_byte(self) << 16) |
|
186
|
+
((long)frt_read_byte(self) << 8) |
|
187
|
+
(long)frt_read_byte(self));
|
188
|
+
}
|
189
|
+
|
190
|
+
static VALUE
|
191
|
+
frt_indexin_read_long(VALUE self)
|
192
|
+
{
|
193
|
+
return LL2NUM(((long long)frt_read_byte(self) << 56) |
|
194
|
+
((long long)frt_read_byte(self) << 48) |
|
195
|
+
((long long)frt_read_byte(self) << 40) |
|
196
|
+
((long long)frt_read_byte(self) << 32) |
|
197
|
+
((long long)frt_read_byte(self) << 24) |
|
198
|
+
((long long)frt_read_byte(self) << 16) |
|
199
|
+
((long long)frt_read_byte(self) << 8) |
|
200
|
+
(long long)frt_read_byte(self));
|
201
|
+
}
|
202
|
+
|
203
|
+
static VALUE
|
204
|
+
frt_indexin_read_uint(VALUE self)
|
205
|
+
{
|
206
|
+
return ULONG2NUM(((unsigned long)frt_read_byte(self) << 24) |
|
207
|
+
((unsigned long)frt_read_byte(self) << 16) |
|
208
|
+
((unsigned long)frt_read_byte(self) << 8) |
|
209
|
+
(unsigned long)frt_read_byte(self));
|
210
|
+
}
|
211
|
+
|
212
|
+
static VALUE
|
213
|
+
frt_indexin_read_ulong(VALUE self)
|
214
|
+
{
|
215
|
+
return ULL2NUM(((unsigned long long)frt_read_byte(self) << 56) |
|
216
|
+
((unsigned long long)frt_read_byte(self) << 48) |
|
217
|
+
((unsigned long long)frt_read_byte(self) << 40) |
|
218
|
+
((unsigned long long)frt_read_byte(self) << 32) |
|
219
|
+
((unsigned long long)frt_read_byte(self) << 24) |
|
220
|
+
((unsigned long long)frt_read_byte(self) << 16) |
|
221
|
+
((unsigned long long)frt_read_byte(self) << 8) |
|
222
|
+
(unsigned long long)frt_read_byte(self));
|
223
|
+
}
|
224
|
+
|
225
|
+
unsigned long long
|
226
|
+
frt_read_vint(VALUE self)
|
227
|
+
{
|
228
|
+
register unsigned long long i, b;
|
229
|
+
register int shift = 7;
|
230
|
+
|
231
|
+
b = frt_read_byte(self);
|
232
|
+
i = b & 0x7F; // 0x7F = 0b01111111
|
233
|
+
|
234
|
+
while ((b & 0x80) != 0) {// 0x80 = 0b10000000
|
235
|
+
b = frt_read_byte(self);
|
236
|
+
i |= (b & 0x7F) << shift;
|
237
|
+
shift += 7;
|
238
|
+
}
|
239
|
+
|
240
|
+
return i;
|
241
|
+
}
|
242
|
+
|
243
|
+
static VALUE
|
244
|
+
frt_indexin_read_vint(VALUE self)
|
245
|
+
{
|
246
|
+
return ULL2NUM(frt_read_vint(self));
|
247
|
+
}
|
248
|
+
|
249
|
+
void
|
250
|
+
frt_read_chars(VALUE self, char* buffer, int off, int len)
|
251
|
+
{
|
252
|
+
//byte_t b, b1, b2;
|
253
|
+
int end, i;
|
254
|
+
|
255
|
+
end = off + len;
|
256
|
+
|
257
|
+
for(i = off; i < end; i++) {
|
258
|
+
buffer[i] = frt_read_byte(self);
|
259
|
+
}
|
260
|
+
// for(i = off; i < end; i++){
|
261
|
+
// b = frt_read_byte(self);
|
262
|
+
// if((b & 0x80) == 0){
|
263
|
+
// buffer[i] = (char)(b & 0x7F);
|
264
|
+
// } else {
|
265
|
+
// if((b & 0xE0) != 0xE0){
|
266
|
+
// b1 = frt_read_byte(self);
|
267
|
+
// buffer[i] = (char)(((b & 0x1F) << 6) | (b1 & 0x3F));
|
268
|
+
// } else{
|
269
|
+
// b1 = frt_read_byte(self);
|
270
|
+
// b2 = frt_read_byte(self);
|
271
|
+
// buffer[i] = (char)(((b & 0x0F) << 12) | ((b1 & 0x3F) << 6) | (b2 & 0x3F));
|
272
|
+
// }
|
273
|
+
// }
|
274
|
+
// }
|
275
|
+
}
|
276
|
+
|
277
|
+
static VALUE
|
278
|
+
frt_indexin_read_string(VALUE self)
|
279
|
+
{
|
280
|
+
int length = (int)frt_read_vint(self);
|
281
|
+
char *str = (char *)ALLOC_N(char, length);
|
282
|
+
|
283
|
+
frt_read_chars(self, str, 0, length);
|
284
|
+
|
285
|
+
return rb_str_new(str, length);
|
286
|
+
}
|
287
|
+
|
288
|
+
/****************************************************************************
|
289
|
+
*
|
290
|
+
* BufferIndexInput Methods
|
291
|
+
*
|
292
|
+
****************************************************************************/
|
293
|
+
|
294
|
+
static VALUE
|
295
|
+
frt_indexout_flush(VALUE self)
|
296
|
+
{
|
297
|
+
IndexBuffer *my_buf;
|
298
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
299
|
+
|
300
|
+
rb_funcall(self, frt_flush_buffer, 2,
|
301
|
+
rb_str_new((char *)my_buf->buffer, BUFFER_SIZE), INT2FIX(my_buf->pos));
|
302
|
+
|
303
|
+
my_buf->start += my_buf->pos;
|
304
|
+
my_buf->pos = 0;
|
305
|
+
|
306
|
+
return Qnil;
|
307
|
+
}
|
308
|
+
|
309
|
+
static VALUE
|
310
|
+
frt_write_byte(VALUE self, byte_t b)
|
311
|
+
{
|
312
|
+
IndexBuffer *my_buf;
|
313
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
314
|
+
|
315
|
+
my_buf->buffer[my_buf->pos++] = b;
|
316
|
+
|
317
|
+
if (my_buf->pos >= BUFFER_SIZE)
|
318
|
+
frt_indexout_flush(self);
|
319
|
+
return Qnil;
|
320
|
+
}
|
321
|
+
|
322
|
+
static VALUE
|
323
|
+
frt_indexout_write_byte(VALUE self, VALUE rbyte)
|
324
|
+
{
|
325
|
+
byte_t b = (byte_t)FIX2INT(rbyte);
|
326
|
+
frt_write_byte(self, b);
|
327
|
+
return Qnil;
|
328
|
+
}
|
329
|
+
|
330
|
+
void
|
331
|
+
frt_write_bytes(VALUE self, byte_t *buf, int len)
|
332
|
+
{
|
333
|
+
int i;
|
334
|
+
for (i = 0; i < len; i++)
|
335
|
+
frt_write_byte(self, buf[i]);
|
336
|
+
}
|
337
|
+
|
338
|
+
static VALUE
|
339
|
+
frt_indexout_write_bytes(VALUE self, VALUE rbuffer, VALUE rlen)
|
340
|
+
{
|
341
|
+
int len = FIX2INT(rlen);
|
342
|
+
int i;
|
343
|
+
VALUE rbuf = StringValue(rbuffer);
|
344
|
+
|
345
|
+
for (i = 0; i < len; i++)
|
346
|
+
frt_write_byte(self, RSTRING(rbuf)->ptr[i]);
|
347
|
+
|
348
|
+
return Qnil;
|
349
|
+
}
|
350
|
+
|
351
|
+
static VALUE
|
352
|
+
frt_indexout_pos(VALUE self)
|
353
|
+
{
|
354
|
+
IndexBuffer *my_buf;
|
355
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
356
|
+
return INT2FIX(my_buf->start + my_buf->pos);
|
357
|
+
}
|
358
|
+
|
359
|
+
static VALUE
|
360
|
+
frt_indexout_seek(VALUE self, VALUE pos)
|
361
|
+
{
|
362
|
+
IndexBuffer *my_buf;
|
363
|
+
Data_Get_Struct(self, IndexBuffer, my_buf);
|
364
|
+
|
365
|
+
frt_indexout_flush(self);
|
366
|
+
my_buf->start = FIX2INT(pos);
|
367
|
+
|
368
|
+
return Qnil;
|
369
|
+
}
|
370
|
+
|
371
|
+
static VALUE
|
372
|
+
frt_indexout_write_int(VALUE self, VALUE rint)
|
373
|
+
{
|
374
|
+
long l = NUM2LONG(rint);
|
375
|
+
frt_write_byte(self, (l >> 24) & 0xFF);
|
376
|
+
frt_write_byte(self, (l >> 16) & 0xFF);
|
377
|
+
frt_write_byte(self, (l >> 8) & 0xFF);
|
378
|
+
frt_write_byte(self, l & 0xFF);
|
379
|
+
|
380
|
+
return Qnil;
|
381
|
+
}
|
382
|
+
|
383
|
+
static VALUE
|
384
|
+
frt_indexout_write_long(VALUE self, VALUE rlong)
|
385
|
+
{
|
386
|
+
long long l = NUM2LL(rlong);
|
387
|
+
frt_write_byte(self, (l >> 56) & 0xFF);
|
388
|
+
frt_write_byte(self, (l >> 48) & 0xFF);
|
389
|
+
frt_write_byte(self, (l >> 40) & 0xFF);
|
390
|
+
frt_write_byte(self, (l >> 32) & 0xFF);
|
391
|
+
frt_write_byte(self, (l >> 24) & 0xFF);
|
392
|
+
frt_write_byte(self, (l >> 16) & 0xFF);
|
393
|
+
frt_write_byte(self, (l >> 8) & 0xFF);
|
394
|
+
frt_write_byte(self, l & 0xFF);
|
395
|
+
|
396
|
+
return Qnil;
|
397
|
+
}
|
398
|
+
|
399
|
+
static VALUE
|
400
|
+
frt_indexout_write_uint(VALUE self, VALUE ruint)
|
401
|
+
{
|
402
|
+
unsigned long l = NUM2ULONG(ruint);
|
403
|
+
frt_write_byte(self, (l >> 24) & 0xFF);
|
404
|
+
frt_write_byte(self, (l >> 16) & 0xFF);
|
405
|
+
frt_write_byte(self, (l >> 8) & 0xFF);
|
406
|
+
frt_write_byte(self, l & 0xFF);
|
407
|
+
|
408
|
+
return Qnil;
|
409
|
+
}
|
410
|
+
|
411
|
+
static VALUE
|
412
|
+
frt_indexout_write_ulong(VALUE self, VALUE rulong)
|
413
|
+
{
|
414
|
+
unsigned long long l;
|
415
|
+
l = rb_num2ull(rulong); // ruby 1.8 doesn't have NUM2ULL. Added in 1.9
|
416
|
+
frt_write_byte(self, (l >> 56) & 0xFF);
|
417
|
+
frt_write_byte(self, (l >> 48) & 0xFF);
|
418
|
+
frt_write_byte(self, (l >> 40) & 0xFF);
|
419
|
+
frt_write_byte(self, (l >> 32) & 0xFF);
|
420
|
+
frt_write_byte(self, (l >> 24) & 0xFF);
|
421
|
+
frt_write_byte(self, (l >> 16) & 0xFF);
|
422
|
+
frt_write_byte(self, (l >> 8) & 0xFF);
|
423
|
+
frt_write_byte(self, l & 0xFF);
|
424
|
+
|
425
|
+
return Qnil;
|
426
|
+
}
|
427
|
+
|
428
|
+
static VALUE
|
429
|
+
frt_write_vint(VALUE self, register unsigned long long i)
|
430
|
+
{
|
431
|
+
while (i > 127) {
|
432
|
+
frt_write_byte(self, (i & 0x7f) | 0x80);
|
433
|
+
i >>= 7;
|
434
|
+
}
|
435
|
+
frt_write_byte(self, i);
|
436
|
+
|
437
|
+
return Qnil;
|
438
|
+
}
|
439
|
+
|
440
|
+
static VALUE
|
441
|
+
frt_indexout_write_vint(VALUE self, VALUE rulong)
|
442
|
+
{
|
443
|
+
register unsigned long long i = rb_num2ull(rulong);
|
444
|
+
|
445
|
+
while (i > 127) {
|
446
|
+
frt_write_byte(self, (i & 0x7f) | 0x80);
|
447
|
+
i >>= 7;
|
448
|
+
}
|
449
|
+
frt_write_byte(self, i);
|
450
|
+
|
451
|
+
return Qnil;
|
452
|
+
}
|
453
|
+
|
454
|
+
static VALUE
|
455
|
+
frt_write_chars(VALUE self, VALUE rbuf, int start, int length)
|
456
|
+
{
|
457
|
+
int i;
|
458
|
+
VALUE rstr = StringValue(rbuf);
|
459
|
+
|
460
|
+
for (i = start; i < start + length; i++) {
|
461
|
+
frt_write_byte(self, RSTRING(rstr)->ptr[i]);
|
462
|
+
}
|
463
|
+
|
464
|
+
return Qnil;
|
465
|
+
}
|
466
|
+
|
467
|
+
static VALUE
|
468
|
+
frt_indexout_write_chars(VALUE self, VALUE rstr, VALUE rstart, VALUE rlength)
|
469
|
+
{
|
470
|
+
int start = FIX2INT(rstart);
|
471
|
+
int length = FIX2INT(rlength);
|
472
|
+
|
473
|
+
return frt_write_chars(self, rstr, start, length);
|
474
|
+
}
|
475
|
+
|
476
|
+
static VALUE
|
477
|
+
frt_indexout_write_string(VALUE self, VALUE rstr)
|
478
|
+
{
|
479
|
+
int len = RSTRING(StringValue(rstr))->len;
|
480
|
+
frt_write_vint(self, len);
|
481
|
+
|
482
|
+
frt_write_chars(self, rstr, 0, len);
|
483
|
+
return Qnil;
|
484
|
+
}
|
485
|
+
|
486
|
+
/****************************************************************************
|
487
|
+
*
|
488
|
+
* Init Function
|
489
|
+
*
|
490
|
+
****************************************************************************/
|
491
|
+
|
492
|
+
void
|
493
|
+
Init_indexio(void)
|
494
|
+
{
|
495
|
+
// IDs
|
496
|
+
frt_length = rb_intern("length");
|
497
|
+
frt_flush_buffer = rb_intern("flush_buffer");
|
498
|
+
frt_read_internal = rb_intern("read_internal");
|
499
|
+
frt_seek_internal = rb_intern("seek_internal");
|
500
|
+
|
501
|
+
// IndexInput
|
502
|
+
cIndexIn = rb_define_class_under(mStore, "IndexInput", rb_cObject);
|
503
|
+
cBufferedIndexIn = rb_define_class_under(mStore, "BufferedIndexInput", cIndexIn);
|
504
|
+
rb_define_alloc_func(cBufferedIndexIn, frt_indexbuffer_alloc);
|
505
|
+
|
506
|
+
rb_define_method(cBufferedIndexIn, "initialize_copy", frt_indexin_init_copy, 1);
|
507
|
+
rb_define_method(cBufferedIndexIn, "refill", frt_indexin_refill, 0);
|
508
|
+
rb_define_method(cBufferedIndexIn, "read_byte", frt_indexin_read_byte, 0);
|
509
|
+
rb_define_method(cBufferedIndexIn, "read_bytes", frt_indexin_read_bytes, 3);
|
510
|
+
rb_define_method(cBufferedIndexIn, "pos", frt_indexin_pos, 0);
|
511
|
+
rb_define_method(cBufferedIndexIn, "seek", frt_indexin_seek, 1);
|
512
|
+
rb_define_method(cBufferedIndexIn, "read_int", frt_indexin_read_int, 0);
|
513
|
+
rb_define_method(cBufferedIndexIn, "read_long", frt_indexin_read_long, 0);
|
514
|
+
rb_define_method(cBufferedIndexIn, "read_uint", frt_indexin_read_uint, 0);
|
515
|
+
rb_define_method(cBufferedIndexIn, "read_ulong", frt_indexin_read_ulong, 0);
|
516
|
+
rb_define_method(cBufferedIndexIn, "read_vint", frt_indexin_read_vint, 0);
|
517
|
+
rb_define_method(cBufferedIndexIn, "read_vlong", frt_indexin_read_vint, 0);
|
518
|
+
rb_define_method(cBufferedIndexIn, "read_string", frt_indexin_read_string, 0);
|
519
|
+
rb_define_method(cBufferedIndexIn, "read_chars", frt_indexin_read_bytes, 3);
|
520
|
+
|
521
|
+
// IndexOutput
|
522
|
+
cIndexOut = rb_define_class_under(mStore, "IndexOutput", rb_cObject);
|
523
|
+
cBufferedIndexOut = rb_define_class_under(mStore, "BufferedIndexOutput", cIndexOut);
|
524
|
+
rb_define_alloc_func(cBufferedIndexOut, frt_indexbuffer_alloc);
|
525
|
+
|
526
|
+
rb_define_method(cBufferedIndexOut, "write_byte", frt_indexout_write_byte, 1);
|
527
|
+
rb_define_method(cBufferedIndexOut, "write_bytes", frt_indexout_write_bytes, 2);
|
528
|
+
rb_define_method(cBufferedIndexOut, "flush", frt_indexout_flush, 0);
|
529
|
+
rb_define_method(cBufferedIndexOut, "close", frt_indexout_flush, 0);
|
530
|
+
rb_define_method(cBufferedIndexOut, "pos", frt_indexout_pos, 0);
|
531
|
+
rb_define_method(cBufferedIndexOut, "seek", frt_indexout_seek, 1);
|
532
|
+
rb_define_method(cBufferedIndexOut, "write_int", frt_indexout_write_int, 1);
|
533
|
+
rb_define_method(cBufferedIndexOut, "write_long", frt_indexout_write_long, 1);
|
534
|
+
rb_define_method(cBufferedIndexOut, "write_uint", frt_indexout_write_uint, 1);
|
535
|
+
rb_define_method(cBufferedIndexOut, "write_ulong", frt_indexout_write_ulong, 1);
|
536
|
+
rb_define_method(cBufferedIndexOut, "write_vint", frt_indexout_write_vint, 1);
|
537
|
+
rb_define_method(cBufferedIndexOut, "write_vlong", frt_indexout_write_vint, 1);
|
538
|
+
rb_define_method(cBufferedIndexOut, "write_chars", frt_indexout_write_chars, 3);
|
539
|
+
rb_define_method(cBufferedIndexOut, "write_string", frt_indexout_write_string, 1);
|
540
|
+
|
541
|
+
// FSIndexInput
|
542
|
+
//cFSIndexIn = rb_define_class_under(mStore, "FSIndexInput", cBufferedIndexIn);
|
543
|
+
}
|