ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/filter.c ADDED
@@ -0,0 +1,103 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /***************************************************************************
5
+ *
6
+ * Filter
7
+ *
8
+ ***************************************************************************/
9
+
10
+ void filt_destroy(void *p)
11
+ {
12
+ Filter *self = (Filter *)p;
13
+ h_destroy(self->cache);
14
+ free(self->name);
15
+ free(self);
16
+ }
17
+
18
+ BitVector *filt_get_bv(Filter *self, IndexReader *ir)
19
+ {
20
+ CacheObject *co = h_get(self->cache, ir);
21
+ if (!co) {
22
+ BitVector *bv;
23
+ if (!ir->cache) {
24
+ ir_add_cache(ir);
25
+ }
26
+ bv = self->get_bv(self, ir);
27
+ co = co_create(self->cache, ir->cache, self, ir,
28
+ &bv_destroy, (void *)bv);
29
+ }
30
+ return (BitVector *)co->obj;
31
+ }
32
+
33
+ char *filt_to_s(Filter *self)
34
+ {
35
+ return estrdup(self->name);
36
+ }
37
+
38
+ Filter *filt_create(char *name)
39
+ {
40
+ Filter *self = ALLOC(Filter);
41
+ self->cache = co_hsh_create();
42
+ self->name = estrdup(name);
43
+ self->to_s = &filt_to_s;
44
+ self->destroy = &filt_destroy;
45
+ return self;
46
+ }
47
+
48
+ /***************************************************************************
49
+ *
50
+ * QueryFilter
51
+ *
52
+ ***************************************************************************/
53
+
54
+ char *qfilt_to_s(Filter *self)
55
+ {
56
+ QueryFilter *qf = (QueryFilter *)self->data;
57
+ Query *query = (Query *)qf->query;
58
+ char *query_str = query->to_s(query, "");
59
+ char *filter_str = epstrdup("QueryFilter(%s)", strlen(query_str), query_str);
60
+ free(query_str);
61
+ return filter_str;
62
+ }
63
+
64
+ BitVector *qfilt_get_bv(Filter *filt, IndexReader *ir)
65
+ {
66
+ BitVector *bv = bv_create_size(ir->max_doc(ir));
67
+ QueryFilter *qfilt = (QueryFilter *)filt->data;
68
+ Searcher *sea = sea_create(ir);
69
+ Weight *weight = q_weight(qfilt->query, sea);
70
+ Scorer *scorer = weight->scorer(weight, ir);
71
+ if (scorer) {
72
+ while (scorer->next(scorer)) {
73
+ bv_set(bv, scorer->doc);
74
+ }
75
+ scorer->destroy(scorer);
76
+ }
77
+ free(sea);
78
+ return bv;
79
+ }
80
+
81
+ void qfilt_destroy(void *p)
82
+ {
83
+ Filter *filt = (Filter *)p;
84
+ QueryFilter *qfilt = filt->data;
85
+ /* caller is responsible for closing the query */
86
+ /* if (qfilt->query) qfilt->query->destroy(qfilt->query); */
87
+ free(qfilt);
88
+ filt_destroy(filt);
89
+ }
90
+
91
+ Filter *qfilt_create(Query *query)
92
+ {
93
+ Filter *self;
94
+ QueryFilter *qfilt = ALLOC(QueryFilter);
95
+ qfilt->query = query;
96
+
97
+ self = filt_create("QueryFilter");
98
+ self->data = qfilt;
99
+ self->get_bv = &qfilt_get_bv;
100
+ self->to_s = &qfilt_to_s;
101
+ self->destroy = &qfilt_destroy;
102
+ return self;
103
+ }
data/ext/fs_store.c ADDED
@@ -0,0 +1,352 @@
1
+ #include <sys/types.h>
2
+ #include <sys/dir.h>
3
+ #include <sys/stat.h>
4
+ #include <dirent.h>
5
+ #include <fcntl.h>
6
+ #include <unistd.h>
7
+
8
+ #include <errno.h>
9
+ #include <string.h>
10
+ #include <stdio.h>
11
+ #include "store.h"
12
+
13
+ static char *join_path(char *buf, const char *base, const char *filename)
14
+ {
15
+ strcpy(buf, base);
16
+ strcat(buf, "/");
17
+ strcat(buf, filename);
18
+ return buf;
19
+ }
20
+
21
+ void fs_touch(Store *store, char *filename)
22
+ {
23
+ char buf[MAX_PATH];
24
+ int fd = creat(join_path(buf, store->dir.path, filename), S_IRUSR|S_IWUSR);
25
+ close(fd);
26
+ }
27
+
28
+ int fs_exists(Store *store, char *filename)
29
+ {
30
+ char buf[MAX_PATH];
31
+ int fd = open(join_path(buf, store->dir.path, filename), 0);
32
+ if (fd < 0) {
33
+ assert(errno == ENOENT);
34
+ return false;
35
+ }
36
+ close(fd);
37
+ return true;
38
+ }
39
+
40
+ int fs_remove(Store *store, char *filename)
41
+ {
42
+ char buf[MAX_PATH];
43
+ return remove(join_path(buf, store->dir.path, filename));
44
+ }
45
+
46
+ int fs_rename(Store *store, char *from, char *to)
47
+ {
48
+ char buf1[MAX_PATH], buf2[MAX_PATH];
49
+ int ret;
50
+
51
+ ret = rename(join_path(buf1, store->dir.path, from),
52
+ join_path(buf2, store->dir.path, to));
53
+
54
+ return ret;
55
+ }
56
+
57
+ int fs_count(Store *store)
58
+ {
59
+ int cnt = 0;
60
+ DIR *d = opendir(store->dir.path);
61
+ struct dirent *de;
62
+
63
+ while ((de = readdir(d)) != NULL) {
64
+ if (de->d_name[0] != '.') {
65
+ cnt++;
66
+ }
67
+ }
68
+ closedir(d);
69
+
70
+ return cnt;
71
+ }
72
+
73
+ void fs_each(Store *store, void (*func)(char *fname, void *arg), void *arg)
74
+ {
75
+ DIR *d;
76
+ struct dirent *de;
77
+
78
+ d = opendir(store->dir.path);
79
+ while ((de = readdir(d)) != NULL) {
80
+ if ((strncmp(de->d_name, LOCK_PREFIX, strlen(LOCK_PREFIX)) == 0) ||
81
+ (de->d_name[0] == '.'))
82
+ continue;
83
+ func(de->d_name, arg);
84
+ }
85
+ closedir(d);
86
+ }
87
+
88
+ void fs_clear_locks(Store *store)
89
+ {
90
+ DIR *d;
91
+ struct dirent *de;
92
+
93
+ d = opendir(store->dir.path);
94
+ while ((de = readdir(d)) != NULL) {
95
+ if (file_is_lock(de->d_name)) {
96
+ fs_remove(store, de->d_name);
97
+ }
98
+ }
99
+ closedir(d);
100
+ }
101
+
102
+ void fs_destroy(void *p)
103
+ {
104
+ Store *store = (Store *)p;
105
+ fs_clear_locks(store);
106
+ free(store->dir.path);
107
+ store_destroy(store);
108
+ }
109
+
110
+ void fs_clear(Store *store)
111
+ {
112
+ DIR *d;
113
+ struct dirent *de;
114
+
115
+ d = opendir(store->dir.path);
116
+ while ((de = readdir(d)) != NULL) {
117
+ if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
118
+ fs_remove(store, de->d_name);
119
+ }
120
+ }
121
+ closedir(d);
122
+ }
123
+
124
+ void fs_clear_all(Store *store)
125
+ {
126
+ DIR *d;
127
+ struct dirent *de;
128
+
129
+ d = opendir(store->dir.path);
130
+ while ((de = readdir(d)) != NULL) {
131
+ if (de->d_name[0] != '.') {
132
+ fs_remove(store, de->d_name);
133
+ }
134
+ }
135
+ closedir(d);
136
+ }
137
+
138
+ int fs_length(Store *store, char *filename)
139
+ {
140
+ char buf[MAX_PATH];
141
+ struct stat stt;
142
+ stat(join_path(buf, store->dir.path, filename), &stt);
143
+ return stt.st_size;
144
+ }
145
+
146
+ void fso_flush_internal(OutStream *os, uchar *src, int len)
147
+ {
148
+ fwrite(src, sizeof(uchar), len, (FILE *)os->file);
149
+ }
150
+
151
+ void fso_seek_internal(OutStream *os, int pos)
152
+ {
153
+ fseek((FILE *)os->file, pos, SEEK_SET);
154
+ }
155
+
156
+ void fso_close_internal(OutStream *os)
157
+ {
158
+ fclose((FILE *)os->file);
159
+ }
160
+
161
+ OutStream *fs_create_output(Store *store, const char *filename)
162
+ {
163
+ char buf[MAX_PATH];
164
+ FILE *f = fopen(join_path(buf, store->dir.path, filename), "wb");
165
+ if (!f) {
166
+ eprintf(IO_ERROR, "Couldn't open the file %s to write. Error was %s",
167
+ buf, strerror(errno));
168
+ }
169
+ OutStream *os = os_create();
170
+ os->file = f;
171
+ os->flush_internal = &fso_flush_internal;
172
+ os->seek_internal = &fso_seek_internal;
173
+ os->close_internal = &fso_close_internal;
174
+ return os;
175
+ }
176
+
177
+ void fsi_read_internal(InStream *is, uchar *b, int offset, int len)
178
+ {
179
+ int fd = (int)is->file;
180
+ int pos = is_pos(is);
181
+ if (pos != lseek(fd, 0, SEEK_CUR)) {
182
+ lseek(fd, pos, SEEK_SET);
183
+ }
184
+ read(fd, b, len);
185
+ }
186
+
187
+ void fsi_seek_internal(InStream *is, int pos)
188
+ {
189
+ lseek((int)is->file, pos, SEEK_SET);
190
+ }
191
+
192
+ void fsi_close_internal(InStream *is)
193
+ {
194
+ if (!is->is_clone) {
195
+ close((int)is->file);
196
+ free(is->d.path);
197
+ }
198
+ }
199
+
200
+ void fsi_clone_internal(InStream *is, InStream *new_is)
201
+ { }
202
+
203
+ int fsi_length(InStream *is)
204
+ {
205
+ struct stat stt;
206
+ fstat((int)is->file, &stt);
207
+ return stt.st_size;
208
+ }
209
+
210
+ InStream *fs_open_input(Store *store, const char *filename)
211
+ {
212
+ char buf[MAX_PATH];
213
+ int fd = open(join_path(buf, store->dir.path, filename), O_RDONLY);
214
+ if (fd < 0) {
215
+ eprintf(IO_ERROR, "Couldn't open the file %s to read", buf);
216
+ }
217
+ InStream *is = is_create();
218
+ is->file = (void *)fd;
219
+ is->d.path = estrdup(buf);
220
+ is->is_clone = false;
221
+ is->read_internal = &fsi_read_internal;
222
+ is->seek_internal = &fsi_seek_internal;
223
+ is->close_internal = &fsi_close_internal;
224
+ is->clone_internal = &fsi_clone_internal;
225
+ is->length_internal= &fsi_length;
226
+ return is;
227
+ }
228
+
229
+ #define LOCK_OBTAIN_TIMEOUT 5
230
+
231
+ int fs_lock_obtain(Lock *lock)
232
+ {
233
+ int f;
234
+ int trys = LOCK_OBTAIN_TIMEOUT;
235
+ while (((f = open(lock->name, O_CREAT|O_EXCL|O_WRONLY, S_IRUSR|S_IWUSR)) < 0) &&
236
+ (trys > 0))
237
+ trys--;
238
+ if (f >= 0) {
239
+ close(f);
240
+ return true;
241
+ } else {
242
+ return false;
243
+ }
244
+ }
245
+
246
+ int fs_lock_is_locked(Lock *lock)
247
+ {
248
+ int f = open(lock->name, O_CREAT|O_EXCL|O_WRONLY, S_IRUSR|S_IWUSR);
249
+ if (f >= 0) {
250
+ close(f);
251
+ remove(lock->name);
252
+ return false;
253
+ } else {
254
+ return true;
255
+ }
256
+ }
257
+
258
+ void fs_lock_release(Lock *lock)
259
+ {
260
+ remove(lock->name);
261
+ }
262
+
263
+ Lock *fs_open_lock(Store *store, char *lockname)
264
+ {
265
+ Lock *lock = ALLOC(Lock);
266
+ char lname[100];
267
+ char buf[MAX_PATH];
268
+ sprintf(lname, "%s%s.lck", LOCK_PREFIX, lockname);
269
+ lock->name = estrdup(join_path(buf, store->dir.path, lname));
270
+ lock->obtain = &fs_lock_obtain;
271
+ lock->release = &fs_lock_release;
272
+ lock->is_locked = &fs_lock_is_locked;
273
+ return lock;
274
+ }
275
+
276
+ void fs_close_lock(Lock *lock)
277
+ {
278
+ remove(lock->name);
279
+ free(lock->name);
280
+ free(lock);
281
+ }
282
+
283
+ static HshTable stores = {
284
+ fill:0,
285
+ used:0,
286
+ mask:Hsh_MINSIZE - 1,
287
+ table:stores.smalltable,
288
+ lookup:&h_lookup_str,
289
+ hash:NULL,
290
+ eq:NULL,
291
+ free_key:dummy_free,
292
+ free_value:&fs_destroy
293
+ };
294
+
295
+ #ifndef FERRET_EXT
296
+ static mutex_t stores_mutex = MUTEX_INITIALIZER;
297
+ #endif
298
+
299
+ void fs_close(Store *store)
300
+ {
301
+ mutex_lock(&store->mutex);
302
+ if (--(store->ref_cnt) == 0) {
303
+ mutex_lock(&stores_mutex);
304
+ h_del(&stores, store->dir.path);
305
+ mutex_unlock(&stores_mutex);
306
+ } else {
307
+ mutex_unlock(&store->mutex);
308
+ }
309
+ }
310
+
311
+ Store *fs_store_create(const char *pathname)
312
+ {
313
+ Store *new_store = store_create();
314
+
315
+ new_store->ref_cnt = 0;
316
+ new_store->dir.path = estrdup(pathname);
317
+ new_store->touch = &fs_touch;
318
+ new_store->exists = &fs_exists;
319
+ new_store->remove = &fs_remove;
320
+ new_store->rename = &fs_rename;
321
+ new_store->count = &fs_count;
322
+ new_store->close = &fs_close;
323
+ new_store->clear = &fs_clear;
324
+ new_store->clear_all = &fs_clear_all;
325
+ new_store->clear_locks = &fs_clear_locks;
326
+ new_store->length = &fs_length;
327
+ new_store->each = &fs_each;
328
+ new_store->create_output = &fs_create_output;
329
+ new_store->open_input = &fs_open_input;
330
+ new_store->open_lock = &fs_open_lock;
331
+ new_store->close_lock = &fs_close_lock;
332
+ return new_store;
333
+ }
334
+
335
+ Store *open_fs_store(const char *pathname)
336
+ {
337
+ Store *store = NULL;
338
+
339
+ mutex_lock(&stores_mutex);
340
+ store = h_get(&stores, pathname);
341
+ if (!store) {
342
+ store = fs_store_create(pathname);
343
+ h_set(&stores, pathname, store);
344
+ }
345
+ mutex_unlock(&stores_mutex);
346
+
347
+ mutex_lock(&store->mutex);
348
+ store->ref_cnt++;
349
+ mutex_unlock(&store->mutex);
350
+
351
+ return store;
352
+ }