ferret 0.11.4 → 0.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/TUTORIAL +3 -3
- data/ext/analysis.c +12 -9
- data/ext/array.c +10 -10
- data/ext/array.h +8 -1
- data/ext/bitvector.c +2 -2
- data/ext/except.c +1 -1
- data/ext/ferret.c +2 -2
- data/ext/ferret.h +1 -1
- data/ext/fs_store.c +13 -2
- data/ext/global.c +4 -4
- data/ext/global.h +6 -0
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/index.c +48 -22
- data/ext/index.h +17 -16
- data/ext/mempool.c +4 -1
- data/ext/mempool.h +1 -1
- data/ext/multimapper.c +2 -2
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +39 -8
- data/ext/q_range.c +32 -1
- data/ext/r_analysis.c +66 -28
- data/ext/r_index.c +18 -19
- data/ext/r_qparser.c +21 -6
- data/ext/r_search.c +74 -49
- data/ext/r_store.c +1 -1
- data/ext/r_utils.c +17 -17
- data/ext/search.c +10 -5
- data/ext/search.h +3 -1
- data/ext/sort.c +2 -2
- data/ext/stopwords.c +23 -34
- data/ext/store.c +9 -9
- data/ext/store.h +5 -4
- data/lib/ferret/document.rb +2 -2
- data/lib/ferret/field_infos.rb +37 -35
- data/lib/ferret/index.rb +16 -6
- data/lib/ferret/number_tools.rb +2 -2
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +40 -0
- data/test/unit/index/tc_index.rb +64 -101
- data/test/unit/index/tc_index_reader.rb +13 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +17 -1
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tm_searcher.rb +27 -1
- data/test/unit/ts_largefile.rb +4 -0
- metadata +147 -144
data/ext/r_store.c
CHANGED
@@ -213,7 +213,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
|
|
213
213
|
* call-seq:
|
214
214
|
* dir.delete(file_name) -> nil
|
215
215
|
*
|
216
|
-
* Remove file +file_name+ from the directory. Returns true if
|
216
|
+
* Remove file +file_name+ from the directory. Returns true if successful.
|
217
217
|
*/
|
218
218
|
static VALUE
|
219
219
|
frt_dir_delete(VALUE self, VALUE rfname)
|
data/ext/r_utils.c
CHANGED
@@ -40,7 +40,7 @@ frt_get_bv(BitVector *bv)
|
|
40
40
|
* call-seq:
|
41
41
|
* BitVector.new() -> new_bv
|
42
42
|
*
|
43
|
-
* Returns a new empty bit
|
43
|
+
* Returns a new empty bit vector object
|
44
44
|
*/
|
45
45
|
static VALUE
|
46
46
|
frt_bv_init(VALUE self)
|
@@ -124,7 +124,7 @@ frt_bv_get(VALUE self, VALUE rindex)
|
|
124
124
|
* call-seq:
|
125
125
|
* bv.count -> bit_count
|
126
126
|
*
|
127
|
-
* Count the number of bits set in the bit
|
127
|
+
* Count the number of bits set in the bit vector. If the bit vector has been
|
128
128
|
* negated using +#not+ then count the number of unset bits
|
129
129
|
* instead.
|
130
130
|
*/
|
@@ -140,7 +140,7 @@ frt_bv_count(VALUE self)
|
|
140
140
|
* call-seq:
|
141
141
|
* bv.clear -> self
|
142
142
|
*
|
143
|
-
* Clears all set bits in the bit
|
143
|
+
* Clears all set bits in the bit vector. Negated bit vectors will still have
|
144
144
|
* all bits set to *off*.
|
145
145
|
*/
|
146
146
|
VALUE
|
@@ -159,7 +159,7 @@ frt_bv_clear(VALUE self)
|
|
159
159
|
* bv1 != bv2 -> bool
|
160
160
|
* bv1.eql(bv2) -> bool
|
161
161
|
*
|
162
|
-
* Compares two bit vectors and returns true if both
|
162
|
+
* Compares two bit vectors and returns true if both bit vectors have the same
|
163
163
|
* bits set.
|
164
164
|
*/
|
165
165
|
VALUE
|
@@ -339,10 +339,10 @@ frt_bv_reset_scan(VALUE self)
|
|
339
339
|
* call-seq:
|
340
340
|
* bv.next -> bit_num
|
341
341
|
*
|
342
|
-
* Returns the next set bit in the bit
|
342
|
+
* Returns the next set bit in the bit vector scanning from low order to high
|
343
343
|
* order. You should call +#reset_scan+ before calling this method
|
344
344
|
* if you want to scan from the beginning. It is automatically reset when you
|
345
|
-
* first create the bit
|
345
|
+
* first create the bit vector.
|
346
346
|
*/
|
347
347
|
VALUE
|
348
348
|
frt_bv_next(VALUE self)
|
@@ -356,11 +356,11 @@ frt_bv_next(VALUE self)
|
|
356
356
|
* call-seq:
|
357
357
|
* bv.next_unset -> bit_num
|
358
358
|
*
|
359
|
-
* Returns the next unset bit in the bit
|
360
|
-
* high order. This method should only be called on bit
|
359
|
+
* Returns the next unset bit in the bit vector scanning from low order to
|
360
|
+
* high order. This method should only be called on bit vectors which have
|
361
361
|
* been flipped (negated). You should call +#reset_scan+ before
|
362
362
|
* calling this method if you want to scan from the beginning. It is
|
363
|
-
* automatically reset when you first create the bit
|
363
|
+
* automatically reset when you first create the bit vector.
|
364
364
|
*/
|
365
365
|
VALUE
|
366
366
|
frt_bv_next_unset(VALUE self)
|
@@ -374,10 +374,10 @@ frt_bv_next_unset(VALUE self)
|
|
374
374
|
* call-seq:
|
375
375
|
* bv.next_from(from) -> bit_num
|
376
376
|
*
|
377
|
-
* Returns the next set bit in the bit
|
377
|
+
* Returns the next set bit in the bit vector scanning from low order to
|
378
378
|
* high order and starting at +from+. The scan is inclusive so if
|
379
379
|
* +from+ is equal to 10 and +bv[10]+ is set it will
|
380
|
-
* return the number 10. If the bit
|
380
|
+
* return the number 10. If the bit vector has been negated than you should
|
381
381
|
* use the +#next_unset_from+ method.
|
382
382
|
*/
|
383
383
|
VALUE
|
@@ -396,10 +396,10 @@ frt_bv_next_from(VALUE self, VALUE rfrom)
|
|
396
396
|
* call-seq:
|
397
397
|
* bv.next_unset_from(from) -> bit_num
|
398
398
|
*
|
399
|
-
* Returns the next unset bit in the bit
|
399
|
+
* Returns the next unset bit in the bit vector scanning from low order to
|
400
400
|
* high order and starting at +from+. The scan is inclusive so if
|
401
401
|
* +from+ is equal to 10 and +bv[10]+ is unset it will
|
402
|
-
* return the number 10. If the bit
|
402
|
+
* return the number 10. If the bit vector has not been negated than you
|
403
403
|
* should use the +#next_from+ method.
|
404
404
|
*/
|
405
405
|
VALUE
|
@@ -418,7 +418,7 @@ frt_bv_next_unset_from(VALUE self, VALUE rfrom)
|
|
418
418
|
* call-seq:
|
419
419
|
* bv.each { |bit_num| }
|
420
420
|
*
|
421
|
-
* Iterate through all the set bits in the bit
|
421
|
+
* Iterate through all the set bits in the bit vector yielding each one in
|
422
422
|
* order
|
423
423
|
*/
|
424
424
|
VALUE
|
@@ -445,9 +445,9 @@ frt_bv_each(VALUE self)
|
|
445
445
|
* call-seq:
|
446
446
|
* bv.to_a
|
447
447
|
*
|
448
|
-
* Iterate through all the set bits in the bit
|
448
|
+
* Iterate through all the set bits in the bit vector adding the index of
|
449
449
|
* each set bit to an array. This is useful if you want to perform array
|
450
|
-
* methods on the bit
|
450
|
+
* methods on the bit vector. If you want to convert an array to a bit_vector
|
451
451
|
* simply do this;
|
452
452
|
*
|
453
453
|
* bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
|
@@ -896,7 +896,7 @@ frt_pq_capa(VALUE self)
|
|
896
896
|
*
|
897
897
|
* == Example
|
898
898
|
*
|
899
|
-
* Here is a toy example that sorts strings by their
|
899
|
+
* Here is a toy example that sorts strings by their length and has a capacity
|
900
900
|
* of 5;
|
901
901
|
*
|
902
902
|
* q = PriorityQueue.new(5) {|a, b| a.size < b.size}
|
data/ext/search.c
CHANGED
@@ -623,13 +623,18 @@ MatchVector *searcher_get_match_vector(Searcher *self,
|
|
623
623
|
const char *field)
|
624
624
|
{
|
625
625
|
MatchVector *mv = matchv_new();
|
626
|
-
|
626
|
+
bool rewrite = query->get_matchv_i == q_get_matchv_i;
|
627
627
|
TermVector *tv = self->get_term_vector(self, doc_num, field);
|
628
|
+
if (rewrite) {
|
629
|
+
query = self->rewrite(self, query);
|
630
|
+
}
|
628
631
|
if (tv && tv->term_cnt > 0 && tv->terms[0].positions != NULL) {
|
629
|
-
mv =
|
632
|
+
mv = query->get_matchv_i(query, mv, tv);
|
630
633
|
tv_destroy(tv);
|
631
634
|
}
|
632
|
-
|
635
|
+
if (rewrite) {
|
636
|
+
q_deref(query);
|
637
|
+
}
|
633
638
|
return mv;
|
634
639
|
}
|
635
640
|
|
@@ -846,6 +851,7 @@ char **searcher_highlight(Searcher *self,
|
|
846
851
|
MatchVector *mv;
|
847
852
|
query = self->rewrite(self, query);
|
848
853
|
mv = query->get_matchv_i(query, matchv_new(), tv);
|
854
|
+
q_deref(query);
|
849
855
|
if (lazy_df->len < (excerpt_len * num_excerpts)) {
|
850
856
|
excerpt_strs = ary_new_type_capa(char *, 1);
|
851
857
|
ary_push(excerpt_strs,
|
@@ -947,7 +953,6 @@ char **searcher_highlight(Searcher *self,
|
|
947
953
|
pq_destroy(excerpt_pq);
|
948
954
|
}
|
949
955
|
matchv_destroy(mv);
|
950
|
-
q_deref(query);
|
951
956
|
}
|
952
957
|
if (tv) tv_destroy(tv);
|
953
958
|
if (lazy_doc) lazy_doc_close(lazy_doc);
|
@@ -1371,7 +1376,7 @@ static Searcher *cdfsea_new(HashTable *df_map, int max_doc)
|
|
1371
1376
|
***************************************************************************/
|
1372
1377
|
|
1373
1378
|
#define MSEA(searcher) ((MultiSearcher *)(searcher))
|
1374
|
-
static
|
1379
|
+
static INLINE int msea_get_searcher_index(Searcher *self, int n)
|
1375
1380
|
{
|
1376
1381
|
MultiSearcher *msea = MSEA(self);
|
1377
1382
|
int lo = 0; /* search starts array */
|
data/ext/search.h
CHANGED
@@ -408,6 +408,7 @@ typedef struct ConstantScoreQuery
|
|
408
408
|
{
|
409
409
|
Query super;
|
410
410
|
Filter *filter;
|
411
|
+
Query *original;
|
411
412
|
} ConstantScoreQuery;
|
412
413
|
|
413
414
|
extern Query *csq_new(Filter *filter);
|
@@ -713,7 +714,7 @@ typedef struct FieldDoc
|
|
713
714
|
{
|
714
715
|
Hit hit;
|
715
716
|
int size;
|
716
|
-
Comparable comparables[];
|
717
|
+
Comparable comparables[1];
|
717
718
|
} FieldDoc;
|
718
719
|
|
719
720
|
extern void fd_destroy(FieldDoc *fd);
|
@@ -872,6 +873,7 @@ typedef struct QParser
|
|
872
873
|
bool close_def_fields : 1;
|
873
874
|
bool destruct : 1;
|
874
875
|
bool recovering : 1;
|
876
|
+
bool use_keywords : 1;
|
875
877
|
} QParser;
|
876
878
|
|
877
879
|
extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
data/ext/sort.c
CHANGED
@@ -675,7 +675,7 @@ bool fshq_less_than(const void *hit1, const void *hit2)
|
|
675
675
|
}
|
676
676
|
}
|
677
677
|
|
678
|
-
|
678
|
+
INLINE bool fshq_lt(Sorter *sorter, Hit *hit1, Hit *hit2)
|
679
679
|
{
|
680
680
|
Comparator *comp;
|
681
681
|
int diff = 0, i;
|
@@ -734,7 +734,7 @@ Hit *fshq_pq_pop(PriorityQueue *pq)
|
|
734
734
|
}
|
735
735
|
}
|
736
736
|
|
737
|
-
|
737
|
+
INLINE void fshq_pq_up(PriorityQueue *pq)
|
738
738
|
{
|
739
739
|
Hit **heap = (Hit **)pq->heap;
|
740
740
|
Hit *node;
|
data/ext/stopwords.c
CHANGED
@@ -371,40 +371,29 @@ const char *FULL_DANISH_STOP_WORDS[] = {
|
|
371
371
|
};
|
372
372
|
|
373
373
|
const char *FULL_RUSSIAN_STOP_WORDS[] = {
|
374
|
-
"
|
375
|
-
|
376
|
-
"
|
377
|
-
|
378
|
-
"
|
379
|
-
|
380
|
-
"
|
381
|
-
|
382
|
-
"
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
"
|
389
|
-
|
390
|
-
"
|
391
|
-
|
392
|
-
"
|
393
|
-
|
394
|
-
"
|
395
|
-
|
396
|
-
"
|
397
|
-
"×ÓÅÈ", "ÎÉËÏÇÄÁ", "ÓÅÇÏÄÎÑ",
|
398
|
-
"ÍÏÖÎÏ", "ÐÒÉ", "ÎÁËÏÎÅÃ", "Ä×Á", "ÏÂ",
|
399
|
-
"ÄÒÕÇÏÊ", "ÈÏÔØ", "ÐÏÓÌÅ", "ÎÁÄ",
|
400
|
-
"ÂÏÌØÛÅ", "ÔÏÔ", "ÞÅÒÅÚ", "ÜÔÉ", "ÎÁÓ", "ÐÒÏ",
|
401
|
-
"×ÓÅÇÏ", "ÎÉÈ", "ËÁËÁÑ",
|
402
|
-
"ÍÎÏÇÏ", "ÒÁÚ×Å", "ÓËÁÚÁÌÁ", "ÔÒÉ", "ÜÔÕ",
|
403
|
-
"ÍÏÑ", "×ÐÒÏÞÅÍ", "ÈÏÒÏÛÏ",
|
404
|
-
"Ó×ÏÀ", "ÜÔÏÊ", "ÐÅÒÅÄ", "ÉÎÏÇÄÁ", "ÌÕÞÛÅ",
|
405
|
-
"ÞÕÔØ", "ÔÏÍ", "ÎÅÌØÚÑ",
|
406
|
-
"ÔÁËÏÊ", "ÉÍ", "ÂÏÌÅÅ", "×ÓÅÇÄÁ", "ËÏÎÅÞÎÏ",
|
407
|
-
"×ÓÀ", "ÍÅÖÄÕ", NULL
|
374
|
+
"а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
|
375
|
+
"вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где",
|
376
|
+
"да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", "еще",
|
377
|
+
"же", "за", "здесь", "и", "из", "или", "им", "их", "к", "как", "ко",
|
378
|
+
"когда", "кто", "ли", "либо", "мне", "может", "мы", "на", "надо", "наш",
|
379
|
+
"не", "него", "нее", "нет", "ни", "них", "но", "ну", "о", "об", "однако",
|
380
|
+
"он", "она", "они", "оно", "от", "очень", "по", "под", "при", "с", "со",
|
381
|
+
"так", "также", "такой", "там", "те", "тем", "то", "того", "тоже", "той",
|
382
|
+
"только", "том", "ты", "у", "уже", "хотя", "чего", "чей", "чем", "что",
|
383
|
+
"чтобы", "чье", "чья", "эта", "эти", "это", "я", NULL
|
384
|
+
};
|
385
|
+
|
386
|
+
const char *FULL_RUSSIAN_STOP_WORDS_KOI8_R[] = {
|
387
|
+
"Á", "ÂÅÚ", "ÂÏÌÅÅ", "ÂÙ", "ÂÙÌ", "ÂÙÌÁ", "ÂÙÌÉ", "ÂÙÌÏ", "ÂÙÔØ", "×",
|
388
|
+
"×ÁÍ", "×ÁÓ", "×ÅÓØ", "×Ï", "×ÏÔ", "×ÓÅ", "×ÓÅÇÏ", "×ÓÅÈ", "×Ù", "ÇÄÅ",
|
389
|
+
"ÄÁ", "ÄÁÖÅ", "ÄÌÑ", "ÄÏ", "ÅÇÏ", "ÅÅ", "ÅÊ", "ÅÀ", "ÅÓÌÉ", "ÅÓÔØ", "ÅÝÅ",
|
390
|
+
"ÖÅ", "ÚÁ", "ÚÄÅÓØ", "É", "ÉÚ", "ÉÌÉ", "ÉÍ", "ÉÈ", "Ë", "ËÁË", "ËÏ",
|
391
|
+
"ËÏÇÄÁ", "ËÔÏ", "ÌÉ", "ÌÉÂÏ", "ÍÎÅ", "ÍÏÖÅÔ", "ÍÙ", "ÎÁ", "ÎÁÄÏ", "ÎÁÛ",
|
392
|
+
"ÎÅ", "ÎÅÇÏ", "ÎÅÅ", "ÎÅÔ", "ÎÉ", "ÎÉÈ", "ÎÏ", "ÎÕ", "Ï", "ÏÂ", "ÏÄÎÁËÏ",
|
393
|
+
"ÏÎ", "ÏÎÁ", "ÏÎÉ", "ÏÎÏ", "ÏÔ", "ÏÞÅÎØ", "ÐÏ", "ÐÏÄ", "ÐÒÉ", "Ó", "ÓÏ",
|
394
|
+
"ÔÁË", "ÔÁËÖÅ", "ÔÁËÏÊ", "ÔÁÍ", "ÔÅ", "ÔÅÍ", "ÔÏ", "ÔÏÇÏ", "ÔÏÖÅ", "ÔÏÊ",
|
395
|
+
"ÔÏÌØËÏ", "ÔÏÍ", "ÔÙ", "Õ", "ÕÖÅ", "ÈÏÔÑ", "ÞÅÇÏ", "ÞÅÊ", "ÞÅÍ", "ÞÔÏ",
|
396
|
+
"ÞÔÏÂÙ", "ÞØÅ", "ÞØÑ", "ÜÔÁ", "ÜÔÉ", "ÜÔÏ", "Ñ", NULL
|
408
397
|
};
|
409
398
|
|
410
399
|
const char *FULL_FINNISH_STOP_WORDS[] = {
|
data/ext/store.c
CHANGED
@@ -104,7 +104,7 @@ OutStream *os_new()
|
|
104
104
|
*
|
105
105
|
* @param the OutStream to flush
|
106
106
|
*/
|
107
|
-
|
107
|
+
INLINE void os_flush(OutStream *os)
|
108
108
|
{
|
109
109
|
os->m->flush_i(os, os->buf.buf, os->buf.pos);
|
110
110
|
os->buf.start += os->buf.pos;
|
@@ -143,7 +143,7 @@ void os_seek(OutStream *os, off_t new_pos)
|
|
143
143
|
* @param b the byte to write
|
144
144
|
* @raise IO_ERROR if there is an IO error writing to the filesystem
|
145
145
|
*/
|
146
|
-
|
146
|
+
INLINE void os_write_byte(OutStream *os, uchar b)
|
147
147
|
{
|
148
148
|
if (os->buf.pos >= BUFFER_SIZE) {
|
149
149
|
os_flush(os);
|
@@ -237,7 +237,7 @@ void is_refill(InStream *is)
|
|
237
237
|
* @raise IO_ERROR if there is a error reading from the filesystem
|
238
238
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
239
239
|
*/
|
240
|
-
|
240
|
+
INLINE uchar is_read_byte(InStream *is)
|
241
241
|
{
|
242
242
|
if (is->buf.pos >= is->buf.len) {
|
243
243
|
is_refill(is);
|
@@ -344,7 +344,7 @@ f_u64 is_read_u64(InStream *is)
|
|
344
344
|
}
|
345
345
|
|
346
346
|
/* optimized to use unchecked read_byte if there is definitely space */
|
347
|
-
|
347
|
+
INLINE unsigned int is_read_vint(InStream *is)
|
348
348
|
{
|
349
349
|
register unsigned int res, b;
|
350
350
|
register int shift = 7;
|
@@ -374,7 +374,7 @@ __inline unsigned int is_read_vint(InStream *is)
|
|
374
374
|
}
|
375
375
|
|
376
376
|
/* optimized to use unchecked read_byte if there is definitely space */
|
377
|
-
|
377
|
+
INLINE off_t is_read_voff_t(InStream *is)
|
378
378
|
{
|
379
379
|
register off_t res, b;
|
380
380
|
register int shift = 7;
|
@@ -403,7 +403,7 @@ __inline off_t is_read_voff_t(InStream *is)
|
|
403
403
|
return res;
|
404
404
|
}
|
405
405
|
|
406
|
-
|
406
|
+
INLINE void is_skip_vints(InStream *is, register int cnt)
|
407
407
|
{
|
408
408
|
for (; cnt > 0; cnt--) {
|
409
409
|
while ((is_read_byte(is) & 0x80) != 0) {
|
@@ -411,7 +411,7 @@ __inline void is_skip_vints(InStream *is, register int cnt)
|
|
411
411
|
}
|
412
412
|
}
|
413
413
|
|
414
|
-
|
414
|
+
INLINE void is_read_chars(InStream *is, char *buffer,
|
415
415
|
int off, int len)
|
416
416
|
{
|
417
417
|
int end, i;
|
@@ -508,7 +508,7 @@ void os_write_u64(OutStream *os, f_u64 num)
|
|
508
508
|
}
|
509
509
|
|
510
510
|
/* optimized to use an unchecked write if there is space */
|
511
|
-
|
511
|
+
INLINE void os_write_vint(OutStream *os, register unsigned int num)
|
512
512
|
{
|
513
513
|
if (os->buf.pos > VINT_END) {
|
514
514
|
while (num > 127) {
|
@@ -527,7 +527,7 @@ __inline void os_write_vint(OutStream *os, register unsigned int num)
|
|
527
527
|
}
|
528
528
|
|
529
529
|
/* optimized to use an unchecked write if there is space */
|
530
|
-
|
530
|
+
INLINE void os_write_voff_t(OutStream *os, register off_t num)
|
531
531
|
{
|
532
532
|
if (os->buf.pos > VINT_END) {
|
533
533
|
while (num > 127) {
|
data/ext/store.h
CHANGED
@@ -176,6 +176,7 @@ struct Store
|
|
176
176
|
CompoundStore *cmpd; /* for compound_store only */
|
177
177
|
} dir;
|
178
178
|
|
179
|
+
mode_t file_mode;
|
179
180
|
HashSet *locks;
|
180
181
|
|
181
182
|
/**
|
@@ -605,7 +606,7 @@ extern InStream *is_clone(InStream *is);
|
|
605
606
|
* @raise IO_ERROR if there is a error reading from the file-system
|
606
607
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
607
608
|
*/
|
608
|
-
extern
|
609
|
+
extern INLINE uchar is_read_byte(InStream *is);
|
609
610
|
|
610
611
|
/**
|
611
612
|
* Read +len+ bytes from InStream +is+ and write them to buffer +buf+
|
@@ -668,7 +669,7 @@ extern f_u64 is_read_u64(InStream *is);
|
|
668
669
|
* @raise IO_ERROR if there is a error reading from the file-system
|
669
670
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
670
671
|
*/
|
671
|
-
extern
|
672
|
+
extern INLINE unsigned int is_read_vint(InStream *is);
|
672
673
|
|
673
674
|
/**
|
674
675
|
* Skip _cnt_ vints. This is a convenience method used for performance reasons
|
@@ -680,7 +681,7 @@ extern __inline unsigned int is_read_vint(InStream *is);
|
|
680
681
|
* @raise IO_ERROR if there is a error reading from the file-system
|
681
682
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
682
683
|
*/
|
683
|
-
extern
|
684
|
+
extern INLINE void is_skip_vints(InStream *is, register int cnt);
|
684
685
|
|
685
686
|
/**
|
686
687
|
* Read a compressed (VINT) unsigned off_t from the InStream.
|
@@ -691,7 +692,7 @@ extern __inline void is_skip_vints(InStream *is, register int cnt);
|
|
691
692
|
* @raise IO_ERROR if there is a error reading from the file-system
|
692
693
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
693
694
|
*/
|
694
|
-
extern
|
695
|
+
extern INLINE off_t is_read_voff_t(InStream *is);
|
695
696
|
|
696
697
|
/**
|
697
698
|
* Read a string from the InStream. A string is an integer +length+ in vint
|
data/lib/ferret/document.rb
CHANGED
@@ -25,7 +25,7 @@ module Ferret
|
|
25
25
|
# textual values. If you are coming from a Lucene background you should note
|
26
26
|
# that Fields don't have any properties except for the boost property. You
|
27
27
|
# should use the Ferret::Index::FieldInfos class to set field properties
|
28
|
-
#
|
28
|
+
# across the whole index instead.
|
29
29
|
#
|
30
30
|
# === Boost
|
31
31
|
#
|
@@ -57,7 +57,7 @@ module Ferret
|
|
57
57
|
end
|
58
58
|
alias :== :eql?
|
59
59
|
|
60
|
-
# Create a string
|
60
|
+
# Create a string representation of the document
|
61
61
|
def to_s
|
62
62
|
buf = ["Document {"]
|
63
63
|
self.keys.sort_by {|key| key.to_s}.each do |key|
|
data/lib/ferret/field_infos.rb
CHANGED
@@ -1,41 +1,43 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
3
|
+
module Ferret::Index
|
4
|
+
class FieldInfos
|
5
|
+
# Load FieldInfos from a YAML file. The YAML file should look something like
|
6
|
+
# this:
|
7
|
+
# default:
|
8
|
+
# store: :yes
|
9
|
+
# index: :yes
|
10
|
+
# term_vector: :no
|
11
|
+
#
|
12
|
+
# fields:
|
13
|
+
# id:
|
14
|
+
# index: :untokenized
|
15
|
+
# term_vector: :no
|
16
|
+
#
|
17
|
+
# title:
|
18
|
+
# boost: 20.0
|
19
|
+
# term_vector: :no
|
20
|
+
#
|
21
|
+
# content:
|
22
|
+
# term_vector: :with_positions_offsets
|
23
|
+
#
|
24
|
+
def self.load(yaml_str)
|
25
|
+
info = YAML.load(yaml_str)
|
26
|
+
convert_strings_to_symbols(info)
|
27
|
+
fis = FieldInfos.new(info[:default])
|
28
|
+
fields = info[:fields]
|
29
|
+
fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
|
30
|
+
fis
|
31
|
+
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
private
|
34
|
+
def self.convert_strings_to_symbols(hash)
|
35
|
+
hash.keys.each do |key|
|
36
|
+
convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
|
37
|
+
if key.is_a?(String)
|
38
|
+
hash[key.intern] = hash[key]
|
39
|
+
hash.delete(key)
|
40
|
+
end
|
39
41
|
end
|
40
42
|
end
|
41
43
|
end
|