ferret 0.11.4 → 0.11.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/TUTORIAL +3 -3
- data/ext/analysis.c +12 -9
- data/ext/array.c +10 -10
- data/ext/array.h +8 -1
- data/ext/bitvector.c +2 -2
- data/ext/except.c +1 -1
- data/ext/ferret.c +2 -2
- data/ext/ferret.h +1 -1
- data/ext/fs_store.c +13 -2
- data/ext/global.c +4 -4
- data/ext/global.h +6 -0
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/index.c +48 -22
- data/ext/index.h +17 -16
- data/ext/mempool.c +4 -1
- data/ext/mempool.h +1 -1
- data/ext/multimapper.c +2 -2
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +39 -8
- data/ext/q_range.c +32 -1
- data/ext/r_analysis.c +66 -28
- data/ext/r_index.c +18 -19
- data/ext/r_qparser.c +21 -6
- data/ext/r_search.c +74 -49
- data/ext/r_store.c +1 -1
- data/ext/r_utils.c +17 -17
- data/ext/search.c +10 -5
- data/ext/search.h +3 -1
- data/ext/sort.c +2 -2
- data/ext/stopwords.c +23 -34
- data/ext/store.c +9 -9
- data/ext/store.h +5 -4
- data/lib/ferret/document.rb +2 -2
- data/lib/ferret/field_infos.rb +37 -35
- data/lib/ferret/index.rb +16 -6
- data/lib/ferret/number_tools.rb +2 -2
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +40 -0
- data/test/unit/index/tc_index.rb +64 -101
- data/test/unit/index/tc_index_reader.rb +13 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +17 -1
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tm_searcher.rb +27 -1
- data/test/unit/ts_largefile.rb +4 -0
- metadata +147 -144
data/ext/r_store.c
CHANGED
@@ -213,7 +213,7 @@ frt_dir_touch(VALUE self, VALUE rfname)
|
|
213
213
|
* call-seq:
|
214
214
|
* dir.delete(file_name) -> nil
|
215
215
|
*
|
216
|
-
* Remove file +file_name+ from the directory. Returns true if
|
216
|
+
* Remove file +file_name+ from the directory. Returns true if successful.
|
217
217
|
*/
|
218
218
|
static VALUE
|
219
219
|
frt_dir_delete(VALUE self, VALUE rfname)
|
data/ext/r_utils.c
CHANGED
@@ -40,7 +40,7 @@ frt_get_bv(BitVector *bv)
|
|
40
40
|
* call-seq:
|
41
41
|
* BitVector.new() -> new_bv
|
42
42
|
*
|
43
|
-
* Returns a new empty bit
|
43
|
+
* Returns a new empty bit vector object
|
44
44
|
*/
|
45
45
|
static VALUE
|
46
46
|
frt_bv_init(VALUE self)
|
@@ -124,7 +124,7 @@ frt_bv_get(VALUE self, VALUE rindex)
|
|
124
124
|
* call-seq:
|
125
125
|
* bv.count -> bit_count
|
126
126
|
*
|
127
|
-
* Count the number of bits set in the bit
|
127
|
+
* Count the number of bits set in the bit vector. If the bit vector has been
|
128
128
|
* negated using +#not+ then count the number of unset bits
|
129
129
|
* instead.
|
130
130
|
*/
|
@@ -140,7 +140,7 @@ frt_bv_count(VALUE self)
|
|
140
140
|
* call-seq:
|
141
141
|
* bv.clear -> self
|
142
142
|
*
|
143
|
-
* Clears all set bits in the bit
|
143
|
+
* Clears all set bits in the bit vector. Negated bit vectors will still have
|
144
144
|
* all bits set to *off*.
|
145
145
|
*/
|
146
146
|
VALUE
|
@@ -159,7 +159,7 @@ frt_bv_clear(VALUE self)
|
|
159
159
|
* bv1 != bv2 -> bool
|
160
160
|
* bv1.eql(bv2) -> bool
|
161
161
|
*
|
162
|
-
* Compares two bit vectors and returns true if both
|
162
|
+
* Compares two bit vectors and returns true if both bit vectors have the same
|
163
163
|
* bits set.
|
164
164
|
*/
|
165
165
|
VALUE
|
@@ -339,10 +339,10 @@ frt_bv_reset_scan(VALUE self)
|
|
339
339
|
* call-seq:
|
340
340
|
* bv.next -> bit_num
|
341
341
|
*
|
342
|
-
* Returns the next set bit in the bit
|
342
|
+
* Returns the next set bit in the bit vector scanning from low order to high
|
343
343
|
* order. You should call +#reset_scan+ before calling this method
|
344
344
|
* if you want to scan from the beginning. It is automatically reset when you
|
345
|
-
* first create the bit
|
345
|
+
* first create the bit vector.
|
346
346
|
*/
|
347
347
|
VALUE
|
348
348
|
frt_bv_next(VALUE self)
|
@@ -356,11 +356,11 @@ frt_bv_next(VALUE self)
|
|
356
356
|
* call-seq:
|
357
357
|
* bv.next_unset -> bit_num
|
358
358
|
*
|
359
|
-
* Returns the next unset bit in the bit
|
360
|
-
* high order. This method should only be called on bit
|
359
|
+
* Returns the next unset bit in the bit vector scanning from low order to
|
360
|
+
* high order. This method should only be called on bit vectors which have
|
361
361
|
* been flipped (negated). You should call +#reset_scan+ before
|
362
362
|
* calling this method if you want to scan from the beginning. It is
|
363
|
-
* automatically reset when you first create the bit
|
363
|
+
* automatically reset when you first create the bit vector.
|
364
364
|
*/
|
365
365
|
VALUE
|
366
366
|
frt_bv_next_unset(VALUE self)
|
@@ -374,10 +374,10 @@ frt_bv_next_unset(VALUE self)
|
|
374
374
|
* call-seq:
|
375
375
|
* bv.next_from(from) -> bit_num
|
376
376
|
*
|
377
|
-
* Returns the next set bit in the bit
|
377
|
+
* Returns the next set bit in the bit vector scanning from low order to
|
378
378
|
* high order and starting at +from+. The scan is inclusive so if
|
379
379
|
* +from+ is equal to 10 and +bv[10]+ is set it will
|
380
|
-
* return the number 10. If the bit
|
380
|
+
* return the number 10. If the bit vector has been negated than you should
|
381
381
|
* use the +#next_unset_from+ method.
|
382
382
|
*/
|
383
383
|
VALUE
|
@@ -396,10 +396,10 @@ frt_bv_next_from(VALUE self, VALUE rfrom)
|
|
396
396
|
* call-seq:
|
397
397
|
* bv.next_unset_from(from) -> bit_num
|
398
398
|
*
|
399
|
-
* Returns the next unset bit in the bit
|
399
|
+
* Returns the next unset bit in the bit vector scanning from low order to
|
400
400
|
* high order and starting at +from+. The scan is inclusive so if
|
401
401
|
* +from+ is equal to 10 and +bv[10]+ is unset it will
|
402
|
-
* return the number 10. If the bit
|
402
|
+
* return the number 10. If the bit vector has not been negated than you
|
403
403
|
* should use the +#next_from+ method.
|
404
404
|
*/
|
405
405
|
VALUE
|
@@ -418,7 +418,7 @@ frt_bv_next_unset_from(VALUE self, VALUE rfrom)
|
|
418
418
|
* call-seq:
|
419
419
|
* bv.each { |bit_num| }
|
420
420
|
*
|
421
|
-
* Iterate through all the set bits in the bit
|
421
|
+
* Iterate through all the set bits in the bit vector yielding each one in
|
422
422
|
* order
|
423
423
|
*/
|
424
424
|
VALUE
|
@@ -445,9 +445,9 @@ frt_bv_each(VALUE self)
|
|
445
445
|
* call-seq:
|
446
446
|
* bv.to_a
|
447
447
|
*
|
448
|
-
* Iterate through all the set bits in the bit
|
448
|
+
* Iterate through all the set bits in the bit vector adding the index of
|
449
449
|
* each set bit to an array. This is useful if you want to perform array
|
450
|
-
* methods on the bit
|
450
|
+
* methods on the bit vector. If you want to convert an array to a bit_vector
|
451
451
|
* simply do this;
|
452
452
|
*
|
453
453
|
* bv = [1, 12, 45, 367, 455].inject(BitVector.new) {|bv, i| bv.set(i)}
|
@@ -896,7 +896,7 @@ frt_pq_capa(VALUE self)
|
|
896
896
|
*
|
897
897
|
* == Example
|
898
898
|
*
|
899
|
-
* Here is a toy example that sorts strings by their
|
899
|
+
* Here is a toy example that sorts strings by their length and has a capacity
|
900
900
|
* of 5;
|
901
901
|
*
|
902
902
|
* q = PriorityQueue.new(5) {|a, b| a.size < b.size}
|
data/ext/search.c
CHANGED
@@ -623,13 +623,18 @@ MatchVector *searcher_get_match_vector(Searcher *self,
|
|
623
623
|
const char *field)
|
624
624
|
{
|
625
625
|
MatchVector *mv = matchv_new();
|
626
|
-
|
626
|
+
bool rewrite = query->get_matchv_i == q_get_matchv_i;
|
627
627
|
TermVector *tv = self->get_term_vector(self, doc_num, field);
|
628
|
+
if (rewrite) {
|
629
|
+
query = self->rewrite(self, query);
|
630
|
+
}
|
628
631
|
if (tv && tv->term_cnt > 0 && tv->terms[0].positions != NULL) {
|
629
|
-
mv =
|
632
|
+
mv = query->get_matchv_i(query, mv, tv);
|
630
633
|
tv_destroy(tv);
|
631
634
|
}
|
632
|
-
|
635
|
+
if (rewrite) {
|
636
|
+
q_deref(query);
|
637
|
+
}
|
633
638
|
return mv;
|
634
639
|
}
|
635
640
|
|
@@ -846,6 +851,7 @@ char **searcher_highlight(Searcher *self,
|
|
846
851
|
MatchVector *mv;
|
847
852
|
query = self->rewrite(self, query);
|
848
853
|
mv = query->get_matchv_i(query, matchv_new(), tv);
|
854
|
+
q_deref(query);
|
849
855
|
if (lazy_df->len < (excerpt_len * num_excerpts)) {
|
850
856
|
excerpt_strs = ary_new_type_capa(char *, 1);
|
851
857
|
ary_push(excerpt_strs,
|
@@ -947,7 +953,6 @@ char **searcher_highlight(Searcher *self,
|
|
947
953
|
pq_destroy(excerpt_pq);
|
948
954
|
}
|
949
955
|
matchv_destroy(mv);
|
950
|
-
q_deref(query);
|
951
956
|
}
|
952
957
|
if (tv) tv_destroy(tv);
|
953
958
|
if (lazy_doc) lazy_doc_close(lazy_doc);
|
@@ -1371,7 +1376,7 @@ static Searcher *cdfsea_new(HashTable *df_map, int max_doc)
|
|
1371
1376
|
***************************************************************************/
|
1372
1377
|
|
1373
1378
|
#define MSEA(searcher) ((MultiSearcher *)(searcher))
|
1374
|
-
static
|
1379
|
+
static INLINE int msea_get_searcher_index(Searcher *self, int n)
|
1375
1380
|
{
|
1376
1381
|
MultiSearcher *msea = MSEA(self);
|
1377
1382
|
int lo = 0; /* search starts array */
|
data/ext/search.h
CHANGED
@@ -408,6 +408,7 @@ typedef struct ConstantScoreQuery
|
|
408
408
|
{
|
409
409
|
Query super;
|
410
410
|
Filter *filter;
|
411
|
+
Query *original;
|
411
412
|
} ConstantScoreQuery;
|
412
413
|
|
413
414
|
extern Query *csq_new(Filter *filter);
|
@@ -713,7 +714,7 @@ typedef struct FieldDoc
|
|
713
714
|
{
|
714
715
|
Hit hit;
|
715
716
|
int size;
|
716
|
-
Comparable comparables[];
|
717
|
+
Comparable comparables[1];
|
717
718
|
} FieldDoc;
|
718
719
|
|
719
720
|
extern void fd_destroy(FieldDoc *fd);
|
@@ -872,6 +873,7 @@ typedef struct QParser
|
|
872
873
|
bool close_def_fields : 1;
|
873
874
|
bool destruct : 1;
|
874
875
|
bool recovering : 1;
|
876
|
+
bool use_keywords : 1;
|
875
877
|
} QParser;
|
876
878
|
|
877
879
|
extern QParser *qp_new(HashSet *all_fields, HashSet *def_fields,
|
data/ext/sort.c
CHANGED
@@ -675,7 +675,7 @@ bool fshq_less_than(const void *hit1, const void *hit2)
|
|
675
675
|
}
|
676
676
|
}
|
677
677
|
|
678
|
-
|
678
|
+
INLINE bool fshq_lt(Sorter *sorter, Hit *hit1, Hit *hit2)
|
679
679
|
{
|
680
680
|
Comparator *comp;
|
681
681
|
int diff = 0, i;
|
@@ -734,7 +734,7 @@ Hit *fshq_pq_pop(PriorityQueue *pq)
|
|
734
734
|
}
|
735
735
|
}
|
736
736
|
|
737
|
-
|
737
|
+
INLINE void fshq_pq_up(PriorityQueue *pq)
|
738
738
|
{
|
739
739
|
Hit **heap = (Hit **)pq->heap;
|
740
740
|
Hit *node;
|
data/ext/stopwords.c
CHANGED
@@ -371,40 +371,29 @@ const char *FULL_DANISH_STOP_WORDS[] = {
|
|
371
371
|
};
|
372
372
|
|
373
373
|
const char *FULL_RUSSIAN_STOP_WORDS[] = {
|
374
|
-
"
|
375
|
-
|
376
|
-
"
|
377
|
-
|
378
|
-
"
|
379
|
-
|
380
|
-
"
|
381
|
-
|
382
|
-
"
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
"
|
389
|
-
|
390
|
-
"
|
391
|
-
|
392
|
-
"
|
393
|
-
|
394
|
-
"
|
395
|
-
|
396
|
-
"
|
397
|
-
"×ÓÅÈ", "ÎÉËÏÇÄÁ", "ÓÅÇÏÄÎÑ",
|
398
|
-
"ÍÏÖÎÏ", "ÐÒÉ", "ÎÁËÏÎÅÃ", "Ä×Á", "ÏÂ",
|
399
|
-
"ÄÒÕÇÏÊ", "ÈÏÔØ", "ÐÏÓÌÅ", "ÎÁÄ",
|
400
|
-
"ÂÏÌØÛÅ", "ÔÏÔ", "ÞÅÒÅÚ", "ÜÔÉ", "ÎÁÓ", "ÐÒÏ",
|
401
|
-
"×ÓÅÇÏ", "ÎÉÈ", "ËÁËÁÑ",
|
402
|
-
"ÍÎÏÇÏ", "ÒÁÚ×Å", "ÓËÁÚÁÌÁ", "ÔÒÉ", "ÜÔÕ",
|
403
|
-
"ÍÏÑ", "×ÐÒÏÞÅÍ", "ÈÏÒÏÛÏ",
|
404
|
-
"Ó×ÏÀ", "ÜÔÏÊ", "ÐÅÒÅÄ", "ÉÎÏÇÄÁ", "ÌÕÞÛÅ",
|
405
|
-
"ÞÕÔØ", "ÔÏÍ", "ÎÅÌØÚÑ",
|
406
|
-
"ÔÁËÏÊ", "ÉÍ", "ÂÏÌÅÅ", "×ÓÅÇÄÁ", "ËÏÎÅÞÎÏ",
|
407
|
-
"×ÓÀ", "ÍÅÖÄÕ", NULL
|
374
|
+
"а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
|
375
|
+
"вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где",
|
376
|
+
"да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть", "еще",
|
377
|
+
"же", "за", "здесь", "и", "из", "или", "им", "их", "к", "как", "ко",
|
378
|
+
"когда", "кто", "ли", "либо", "мне", "может", "мы", "на", "надо", "наш",
|
379
|
+
"не", "него", "нее", "нет", "ни", "них", "но", "ну", "о", "об", "однако",
|
380
|
+
"он", "она", "они", "оно", "от", "очень", "по", "под", "при", "с", "со",
|
381
|
+
"так", "также", "такой", "там", "те", "тем", "то", "того", "тоже", "той",
|
382
|
+
"только", "том", "ты", "у", "уже", "хотя", "чего", "чей", "чем", "что",
|
383
|
+
"чтобы", "чье", "чья", "эта", "эти", "это", "я", NULL
|
384
|
+
};
|
385
|
+
|
386
|
+
const char *FULL_RUSSIAN_STOP_WORDS_KOI8_R[] = {
|
387
|
+
"Á", "ÂÅÚ", "ÂÏÌÅÅ", "ÂÙ", "ÂÙÌ", "ÂÙÌÁ", "ÂÙÌÉ", "ÂÙÌÏ", "ÂÙÔØ", "×",
|
388
|
+
"×ÁÍ", "×ÁÓ", "×ÅÓØ", "×Ï", "×ÏÔ", "×ÓÅ", "×ÓÅÇÏ", "×ÓÅÈ", "×Ù", "ÇÄÅ",
|
389
|
+
"ÄÁ", "ÄÁÖÅ", "ÄÌÑ", "ÄÏ", "ÅÇÏ", "ÅÅ", "ÅÊ", "ÅÀ", "ÅÓÌÉ", "ÅÓÔØ", "ÅÝÅ",
|
390
|
+
"ÖÅ", "ÚÁ", "ÚÄÅÓØ", "É", "ÉÚ", "ÉÌÉ", "ÉÍ", "ÉÈ", "Ë", "ËÁË", "ËÏ",
|
391
|
+
"ËÏÇÄÁ", "ËÔÏ", "ÌÉ", "ÌÉÂÏ", "ÍÎÅ", "ÍÏÖÅÔ", "ÍÙ", "ÎÁ", "ÎÁÄÏ", "ÎÁÛ",
|
392
|
+
"ÎÅ", "ÎÅÇÏ", "ÎÅÅ", "ÎÅÔ", "ÎÉ", "ÎÉÈ", "ÎÏ", "ÎÕ", "Ï", "ÏÂ", "ÏÄÎÁËÏ",
|
393
|
+
"ÏÎ", "ÏÎÁ", "ÏÎÉ", "ÏÎÏ", "ÏÔ", "ÏÞÅÎØ", "ÐÏ", "ÐÏÄ", "ÐÒÉ", "Ó", "ÓÏ",
|
394
|
+
"ÔÁË", "ÔÁËÖÅ", "ÔÁËÏÊ", "ÔÁÍ", "ÔÅ", "ÔÅÍ", "ÔÏ", "ÔÏÇÏ", "ÔÏÖÅ", "ÔÏÊ",
|
395
|
+
"ÔÏÌØËÏ", "ÔÏÍ", "ÔÙ", "Õ", "ÕÖÅ", "ÈÏÔÑ", "ÞÅÇÏ", "ÞÅÊ", "ÞÅÍ", "ÞÔÏ",
|
396
|
+
"ÞÔÏÂÙ", "ÞØÅ", "ÞØÑ", "ÜÔÁ", "ÜÔÉ", "ÜÔÏ", "Ñ", NULL
|
408
397
|
};
|
409
398
|
|
410
399
|
const char *FULL_FINNISH_STOP_WORDS[] = {
|
data/ext/store.c
CHANGED
@@ -104,7 +104,7 @@ OutStream *os_new()
|
|
104
104
|
*
|
105
105
|
* @param the OutStream to flush
|
106
106
|
*/
|
107
|
-
|
107
|
+
INLINE void os_flush(OutStream *os)
|
108
108
|
{
|
109
109
|
os->m->flush_i(os, os->buf.buf, os->buf.pos);
|
110
110
|
os->buf.start += os->buf.pos;
|
@@ -143,7 +143,7 @@ void os_seek(OutStream *os, off_t new_pos)
|
|
143
143
|
* @param b the byte to write
|
144
144
|
* @raise IO_ERROR if there is an IO error writing to the filesystem
|
145
145
|
*/
|
146
|
-
|
146
|
+
INLINE void os_write_byte(OutStream *os, uchar b)
|
147
147
|
{
|
148
148
|
if (os->buf.pos >= BUFFER_SIZE) {
|
149
149
|
os_flush(os);
|
@@ -237,7 +237,7 @@ void is_refill(InStream *is)
|
|
237
237
|
* @raise IO_ERROR if there is a error reading from the filesystem
|
238
238
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
239
239
|
*/
|
240
|
-
|
240
|
+
INLINE uchar is_read_byte(InStream *is)
|
241
241
|
{
|
242
242
|
if (is->buf.pos >= is->buf.len) {
|
243
243
|
is_refill(is);
|
@@ -344,7 +344,7 @@ f_u64 is_read_u64(InStream *is)
|
|
344
344
|
}
|
345
345
|
|
346
346
|
/* optimized to use unchecked read_byte if there is definitely space */
|
347
|
-
|
347
|
+
INLINE unsigned int is_read_vint(InStream *is)
|
348
348
|
{
|
349
349
|
register unsigned int res, b;
|
350
350
|
register int shift = 7;
|
@@ -374,7 +374,7 @@ __inline unsigned int is_read_vint(InStream *is)
|
|
374
374
|
}
|
375
375
|
|
376
376
|
/* optimized to use unchecked read_byte if there is definitely space */
|
377
|
-
|
377
|
+
INLINE off_t is_read_voff_t(InStream *is)
|
378
378
|
{
|
379
379
|
register off_t res, b;
|
380
380
|
register int shift = 7;
|
@@ -403,7 +403,7 @@ __inline off_t is_read_voff_t(InStream *is)
|
|
403
403
|
return res;
|
404
404
|
}
|
405
405
|
|
406
|
-
|
406
|
+
INLINE void is_skip_vints(InStream *is, register int cnt)
|
407
407
|
{
|
408
408
|
for (; cnt > 0; cnt--) {
|
409
409
|
while ((is_read_byte(is) & 0x80) != 0) {
|
@@ -411,7 +411,7 @@ __inline void is_skip_vints(InStream *is, register int cnt)
|
|
411
411
|
}
|
412
412
|
}
|
413
413
|
|
414
|
-
|
414
|
+
INLINE void is_read_chars(InStream *is, char *buffer,
|
415
415
|
int off, int len)
|
416
416
|
{
|
417
417
|
int end, i;
|
@@ -508,7 +508,7 @@ void os_write_u64(OutStream *os, f_u64 num)
|
|
508
508
|
}
|
509
509
|
|
510
510
|
/* optimized to use an unchecked write if there is space */
|
511
|
-
|
511
|
+
INLINE void os_write_vint(OutStream *os, register unsigned int num)
|
512
512
|
{
|
513
513
|
if (os->buf.pos > VINT_END) {
|
514
514
|
while (num > 127) {
|
@@ -527,7 +527,7 @@ __inline void os_write_vint(OutStream *os, register unsigned int num)
|
|
527
527
|
}
|
528
528
|
|
529
529
|
/* optimized to use an unchecked write if there is space */
|
530
|
-
|
530
|
+
INLINE void os_write_voff_t(OutStream *os, register off_t num)
|
531
531
|
{
|
532
532
|
if (os->buf.pos > VINT_END) {
|
533
533
|
while (num > 127) {
|
data/ext/store.h
CHANGED
@@ -176,6 +176,7 @@ struct Store
|
|
176
176
|
CompoundStore *cmpd; /* for compound_store only */
|
177
177
|
} dir;
|
178
178
|
|
179
|
+
mode_t file_mode;
|
179
180
|
HashSet *locks;
|
180
181
|
|
181
182
|
/**
|
@@ -605,7 +606,7 @@ extern InStream *is_clone(InStream *is);
|
|
605
606
|
* @raise IO_ERROR if there is a error reading from the file-system
|
606
607
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
607
608
|
*/
|
608
|
-
extern
|
609
|
+
extern INLINE uchar is_read_byte(InStream *is);
|
609
610
|
|
610
611
|
/**
|
611
612
|
* Read +len+ bytes from InStream +is+ and write them to buffer +buf+
|
@@ -668,7 +669,7 @@ extern f_u64 is_read_u64(InStream *is);
|
|
668
669
|
* @raise IO_ERROR if there is a error reading from the file-system
|
669
670
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
670
671
|
*/
|
671
|
-
extern
|
672
|
+
extern INLINE unsigned int is_read_vint(InStream *is);
|
672
673
|
|
673
674
|
/**
|
674
675
|
* Skip _cnt_ vints. This is a convenience method used for performance reasons
|
@@ -680,7 +681,7 @@ extern __inline unsigned int is_read_vint(InStream *is);
|
|
680
681
|
* @raise IO_ERROR if there is a error reading from the file-system
|
681
682
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
682
683
|
*/
|
683
|
-
extern
|
684
|
+
extern INLINE void is_skip_vints(InStream *is, register int cnt);
|
684
685
|
|
685
686
|
/**
|
686
687
|
* Read a compressed (VINT) unsigned off_t from the InStream.
|
@@ -691,7 +692,7 @@ extern __inline void is_skip_vints(InStream *is, register int cnt);
|
|
691
692
|
* @raise IO_ERROR if there is a error reading from the file-system
|
692
693
|
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
693
694
|
*/
|
694
|
-
extern
|
695
|
+
extern INLINE off_t is_read_voff_t(InStream *is);
|
695
696
|
|
696
697
|
/**
|
697
698
|
* Read a string from the InStream. A string is an integer +length+ in vint
|
data/lib/ferret/document.rb
CHANGED
@@ -25,7 +25,7 @@ module Ferret
|
|
25
25
|
# textual values. If you are coming from a Lucene background you should note
|
26
26
|
# that Fields don't have any properties except for the boost property. You
|
27
27
|
# should use the Ferret::Index::FieldInfos class to set field properties
|
28
|
-
#
|
28
|
+
# across the whole index instead.
|
29
29
|
#
|
30
30
|
# === Boost
|
31
31
|
#
|
@@ -57,7 +57,7 @@ module Ferret
|
|
57
57
|
end
|
58
58
|
alias :== :eql?
|
59
59
|
|
60
|
-
# Create a string
|
60
|
+
# Create a string representation of the document
|
61
61
|
def to_s
|
62
62
|
buf = ["Document {"]
|
63
63
|
self.keys.sort_by {|key| key.to_s}.each do |key|
|
data/lib/ferret/field_infos.rb
CHANGED
@@ -1,41 +1,43 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
3
|
+
module Ferret::Index
|
4
|
+
class FieldInfos
|
5
|
+
# Load FieldInfos from a YAML file. The YAML file should look something like
|
6
|
+
# this:
|
7
|
+
# default:
|
8
|
+
# store: :yes
|
9
|
+
# index: :yes
|
10
|
+
# term_vector: :no
|
11
|
+
#
|
12
|
+
# fields:
|
13
|
+
# id:
|
14
|
+
# index: :untokenized
|
15
|
+
# term_vector: :no
|
16
|
+
#
|
17
|
+
# title:
|
18
|
+
# boost: 20.0
|
19
|
+
# term_vector: :no
|
20
|
+
#
|
21
|
+
# content:
|
22
|
+
# term_vector: :with_positions_offsets
|
23
|
+
#
|
24
|
+
def self.load(yaml_str)
|
25
|
+
info = YAML.load(yaml_str)
|
26
|
+
convert_strings_to_symbols(info)
|
27
|
+
fis = FieldInfos.new(info[:default])
|
28
|
+
fields = info[:fields]
|
29
|
+
fields.keys.each {|key| fis.add_field(key, fields[key])} if fields
|
30
|
+
fis
|
31
|
+
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
private
|
34
|
+
def self.convert_strings_to_symbols(hash)
|
35
|
+
hash.keys.each do |key|
|
36
|
+
convert_strings_to_symbols(hash[key]) if hash[key].is_a?(Hash)
|
37
|
+
if key.is_a?(String)
|
38
|
+
hash[key.intern] = hash[key]
|
39
|
+
hash.delete(key)
|
40
|
+
end
|
39
41
|
end
|
40
42
|
end
|
41
43
|
end
|