isomorfeus-ferret 0.17.2 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +161 -187
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +77 -69
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +5 -33
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +14 -33
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +21 -39
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +334 -848
- data/ext/isomorfeus_ferret_ext/frt_index.h +4 -105
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +131 -217
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +18 -26
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +27 -28
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +64 -116
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +8 -14
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +251 -365
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +109 -191
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +12 -23
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +41 -88
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +16 -25
- data/ext/isomorfeus_ferret_ext/test_filter.c +22 -33
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +307 -519
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +66 -115
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +15 -21
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +8 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +32 -6
@@ -8,8 +8,10 @@ static ID id_ref_cnt;
|
|
8
8
|
VALUE cLock;
|
9
9
|
VALUE cLockError;
|
10
10
|
VALUE cDirectory;
|
11
|
-
VALUE cRAMDirectory;
|
12
11
|
VALUE cMDBXDirectory;
|
12
|
+
VALUE cRAMDirectory;
|
13
|
+
VALUE cObjectStore;
|
14
|
+
VALUE mStore;
|
13
15
|
|
14
16
|
/****************************************************************************
|
15
17
|
* Lock Methods
|
@@ -307,7 +309,7 @@ static VALUE frb_dir_close(VALUE self) {
|
|
307
309
|
|
308
310
|
/*
|
309
311
|
* call-seq:
|
310
|
-
* dir.
|
312
|
+
* dir.exist?(file_name) -> nil
|
311
313
|
*
|
312
314
|
* Return true if a file with the name +file_name+ exists in the directory.
|
313
315
|
*/
|
@@ -319,7 +321,7 @@ static VALUE frb_dir_exists(VALUE self, VALUE rfname) {
|
|
319
321
|
bool res;
|
320
322
|
|
321
323
|
FRT_TRY
|
322
|
-
res = store->exists(store, rs2s(rfname));
|
324
|
+
res = store->exists(store, segm_idx_name, rs2s(rfname));
|
323
325
|
FRT_XCATCHALL
|
324
326
|
ex_code = xcontext.excode;
|
325
327
|
msg = xcontext.msg;
|
@@ -346,7 +348,7 @@ static VALUE frb_dir_touch(VALUE self, VALUE rfname) {
|
|
346
348
|
StringValue(rfname);
|
347
349
|
|
348
350
|
FRT_TRY
|
349
|
-
store->touch(store, rs2s(rfname));
|
351
|
+
store->touch(store, segm_idx_name, rs2s(rfname));
|
350
352
|
FRT_XCATCHALL
|
351
353
|
ex_code = xcontext.excode;
|
352
354
|
msg = xcontext.msg;
|
@@ -373,7 +375,7 @@ static VALUE frb_dir_delete(VALUE self, VALUE rfname) {
|
|
373
375
|
StringValue(rfname);
|
374
376
|
bool res;
|
375
377
|
FRT_TRY
|
376
|
-
res = (store->remove(store, rs2s(rfname)) == 0);
|
378
|
+
res = (store->remove(store, segm_idx_name, rs2s(rfname)) == 0);
|
377
379
|
FRT_XCATCHALL
|
378
380
|
ex_code = xcontext.excode;
|
379
381
|
msg = xcontext.msg;
|
@@ -399,7 +401,7 @@ static VALUE frb_dir_file_count(VALUE self) {
|
|
399
401
|
FrtStore *store = DATA_PTR(self);
|
400
402
|
int cnt = 0;
|
401
403
|
FRT_TRY
|
402
|
-
cnt = INT2FIX(store->count(store));
|
404
|
+
cnt = INT2FIX(store->count(store, segm_idx_name));
|
403
405
|
FRT_XCATCHALL
|
404
406
|
ex_code = xcontext.excode;
|
405
407
|
msg = xcontext.msg;
|
@@ -425,7 +427,7 @@ static VALUE frb_dir_refresh(VALUE self) {
|
|
425
427
|
FrtStore *store = DATA_PTR(self);
|
426
428
|
|
427
429
|
FRT_TRY
|
428
|
-
store->clear_all(store);
|
430
|
+
store->clear_all(store, segm_idx_name);
|
429
431
|
FRT_XCATCHALL
|
430
432
|
ex_code = xcontext.excode;
|
431
433
|
msg = xcontext.msg;
|
@@ -454,7 +456,7 @@ static VALUE frb_dir_rename(VALUE self, VALUE rfrom, VALUE rto) {
|
|
454
456
|
StringValue(rfrom);
|
455
457
|
StringValue(rto);
|
456
458
|
FRT_TRY
|
457
|
-
store->rename(store, rs2s(rfrom), rs2s(rto));
|
459
|
+
store->rename(store, segm_idx_name, rs2s(rfrom), rs2s(rto));
|
458
460
|
FRT_XCATCHALL
|
459
461
|
ex_code = xcontext.excode;
|
460
462
|
msg = xcontext.msg;
|
@@ -486,7 +488,7 @@ static VALUE frb_dir_make_lock(VALUE self, VALUE rlock_name) {
|
|
486
488
|
FrtStore *store = DATA_PTR(self);
|
487
489
|
StringValue(rlock_name);
|
488
490
|
FRT_TRY
|
489
|
-
lock = frt_open_lock(store, rs2s(rlock_name));
|
491
|
+
lock = frt_open_lock(store, segm_idx_name, rs2s(rlock_name));
|
490
492
|
FRT_XCATCHALL
|
491
493
|
ex_code = xcontext.excode;
|
492
494
|
msg = xcontext.msg;
|
@@ -529,10 +531,13 @@ static VALUE frb_ramdir_init(int argc, VALUE *argv, VALUE self) {
|
|
529
531
|
case 1: {
|
530
532
|
FrtStore *ostore;
|
531
533
|
TypedData_Get_Struct(rdir, FrtStore, &frb_store_t, ostore);
|
532
|
-
frt_open_ram_store_and_copy(store, ostore, false);
|
534
|
+
frt_open_ram_store_and_copy(store, ostore, segm_idx_name, false);
|
533
535
|
break;
|
534
536
|
}
|
535
|
-
default:
|
537
|
+
default: {
|
538
|
+
frt_open_ram_store(store);
|
539
|
+
store->create_folder(store, segm_idx_name);
|
540
|
+
}
|
536
541
|
}
|
537
542
|
store->rstore = self;
|
538
543
|
rb_ivar_set(self, id_ref_cnt, INT2FIX(0));
|
@@ -583,7 +588,8 @@ static VALUE frb_mdbxdir_new(int argc, VALUE *argv, VALUE klass) {
|
|
583
588
|
|
584
589
|
FRT_TRY
|
585
590
|
store = frt_open_mdbx_store(rs2s(rpath));
|
586
|
-
|
591
|
+
store->create_folder(store, segm_idx_name);
|
592
|
+
if (create) store->clear_all(store, segm_idx_name);
|
587
593
|
self = store->rstore;
|
588
594
|
if (self == Qnil || DATA_PTR(self) == NULL) {
|
589
595
|
self = TypedData_Wrap_Struct(klass, &frb_store_t, store);
|
@@ -634,7 +640,6 @@ void Init_Directory(void) {
|
|
634
640
|
cDirectory = rb_define_class_under(mStore, "Directory", rb_cObject);
|
635
641
|
rb_define_const(cDirectory, "LOCK_PREFIX", rb_str_new2(FRT_LOCK_PREFIX));
|
636
642
|
rb_define_method(cDirectory, "close", frb_dir_close, 0);
|
637
|
-
rb_define_method(cDirectory, "exists?", frb_dir_exists, 1);
|
638
643
|
rb_define_method(cDirectory, "exist?", frb_dir_exists, 1);
|
639
644
|
rb_define_method(cDirectory, "touch", frb_dir_touch, 1);
|
640
645
|
rb_define_method(cDirectory, "delete", frb_dir_delete, 1);
|
@@ -707,6 +712,14 @@ void Init_MDBXDirectory(void) {
|
|
707
712
|
rb_define_singleton_method(cMDBXDirectory, "new", frb_mdbxdir_new, -1);
|
708
713
|
}
|
709
714
|
|
715
|
+
void Init_ObjectStore(void) {
|
716
|
+
cObjectStore = rb_define_class_under(mStore, "ObjectStore", rb_cObject);
|
717
|
+
// rb_define_alloc_func(cObjectStore, frb_obst_alloc);
|
718
|
+
// rb_define_method(cObjectStore, "initialize", frb_obst_init, -1);
|
719
|
+
// rb_define_method(cObjectStore, "fetch", frb_obst_fetch, -1);
|
720
|
+
// rb_define_method(cObjectStore, "key?", frb_obst_key?, -1);
|
721
|
+
// rb_define_method(cObjectStore, "store", frb_obst_store, -1);
|
722
|
+
}
|
710
723
|
/*
|
711
724
|
* Document-module: Ferret::Store
|
712
725
|
*
|
@@ -724,4 +737,5 @@ void Init_Store(void) {
|
|
724
737
|
Init_Lock();
|
725
738
|
Init_RAMDirectory();
|
726
739
|
Init_MDBXDirectory();
|
740
|
+
Init_ObjectStore();
|
727
741
|
}
|
@@ -599,8 +599,7 @@ static int frb_mulmap_add_mappings_i(VALUE key, VALUE value, VALUE arg) {
|
|
599
599
|
for (i = RARRAY_LEN(key) - 1; i >= 0; i--) {
|
600
600
|
frb_mulmap_add_mapping_i(mulmap, RARRAY_PTR(key)[i], to);
|
601
601
|
}
|
602
|
-
}
|
603
|
-
else {
|
602
|
+
} else {
|
604
603
|
frb_mulmap_add_mapping_i(mulmap, key, to);
|
605
604
|
}
|
606
605
|
}
|
@@ -904,8 +903,7 @@ static VALUE frb_pq_insert(VALUE self, VALUE elem) {
|
|
904
903
|
GET_PQ(pq, self);
|
905
904
|
if (pq->size < pq->capa) {
|
906
905
|
frb_pq_push(pq, elem);
|
907
|
-
}
|
908
|
-
else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
|
906
|
+
} else if (pq->size > 0 && frb_pq_lt(pq->proc, pq->heap[1], elem)) {
|
909
907
|
pq->heap[1] = elem;
|
910
908
|
frb_pq_down(pq);
|
911
909
|
}
|
@@ -957,8 +955,7 @@ static VALUE frb_pq_pop(VALUE self) {
|
|
957
955
|
pq->size--;
|
958
956
|
frb_pq_down(pq); /* adjust heap */
|
959
957
|
return result;
|
960
|
-
}
|
961
|
-
else {
|
958
|
+
} else {
|
962
959
|
return Qnil;
|
963
960
|
}
|
964
961
|
}
|
@@ -52,7 +52,7 @@ static bool cp_enc_istok(OnigCodePoint cp, rb_encoding *enc) {
|
|
52
52
|
return false;
|
53
53
|
}
|
54
54
|
|
55
|
-
static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc) {
|
55
|
+
static inline int get_cp(const char *start, const char *end, int *cp_len, rb_encoding *enc) {
|
56
56
|
if (start >= end) {
|
57
57
|
*cp_len = 0;
|
58
58
|
return 0;
|
@@ -64,7 +64,7 @@ static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc)
|
|
64
64
|
/*** FrtToken ****************************************************************/
|
65
65
|
/*****************************************************************************/
|
66
66
|
|
67
|
-
FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
67
|
+
FrtToken *frt_tk_set(FrtToken *tk, const char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
68
68
|
if (tlen >= FRT_MAX_WORD_SIZE) {
|
69
69
|
char *head_last = rb_enc_left_char_head(text, text + FRT_MAX_WORD_SIZE - 1, text + tlen, encoding);
|
70
70
|
tlen = head_last - text;
|
@@ -89,11 +89,11 @@ FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_of
|
|
89
89
|
return tk;
|
90
90
|
}
|
91
91
|
|
92
|
-
|
92
|
+
FrtToken *frt_tk_set_ts(FrtToken *tk, const char *start, const char *end, const char *text, int pos_inc, rb_encoding *encoding) {
|
93
93
|
return frt_tk_set(tk, start, (int)(end - start), (off_t)(start - text), (off_t)(end - text), pos_inc, encoding);
|
94
94
|
}
|
95
95
|
|
96
|
-
FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
96
|
+
FrtToken *frt_tk_set_no_len(FrtToken *tk, const char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
97
97
|
return frt_tk_set(tk, text, (int)strlen(text), start, end, pos_inc, encoding);
|
98
98
|
}
|
99
99
|
|
@@ -138,7 +138,7 @@ void frt_ts_deref(FrtTokenStream *ts) {
|
|
138
138
|
ts->destroy_i(ts);
|
139
139
|
}
|
140
140
|
|
141
|
-
FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding) {
|
141
|
+
FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding) {
|
142
142
|
ts->t = ts->text = text;
|
143
143
|
ts->length = strlen(text);
|
144
144
|
ts->encoding = encoding;
|
@@ -224,14 +224,13 @@ FrtTokenStream *frt_non_tokenizer_new(void) {
|
|
224
224
|
/*** FrtWhiteSpaceTokenizer **************************************************/
|
225
225
|
/*****************************************************************************/
|
226
226
|
|
227
|
-
static FrtToken *wst_next(FrtTokenStream *ts)
|
228
|
-
{
|
227
|
+
static FrtToken *wst_next(FrtTokenStream *ts) {
|
229
228
|
int cp_len = 0;
|
230
229
|
OnigCodePoint cp;
|
231
230
|
rb_encoding *enc = ts->encoding;
|
232
|
-
char *end = ts->text + ts->length;
|
233
|
-
char *start;
|
234
|
-
char *t = ts->t;
|
231
|
+
const char *end = ts->text + ts->length;
|
232
|
+
const char *start;
|
233
|
+
const char *t = ts->t;
|
235
234
|
|
236
235
|
cp = get_cp(t, end, &cp_len, enc);
|
237
236
|
if (cp < 1)
|
@@ -278,9 +277,9 @@ static FrtToken *lt_next(FrtTokenStream *ts) {
|
|
278
277
|
int cp_len = 0;
|
279
278
|
OnigCodePoint cp;
|
280
279
|
rb_encoding *enc = ts->encoding;
|
281
|
-
char *end = ts->text + ts->length;
|
282
|
-
char *start;
|
283
|
-
char *t = ts->t;
|
280
|
+
const char *end = ts->text + ts->length;
|
281
|
+
const char *start;
|
282
|
+
const char *t = ts->t;
|
284
283
|
|
285
284
|
cp = get_cp(t, end, &cp_len, enc);
|
286
285
|
if (cp < 1)
|
@@ -324,9 +323,9 @@ FrtTokenStream *frt_letter_tokenizer_new(void) {
|
|
324
323
|
/*****************************************************************************/
|
325
324
|
|
326
325
|
static int std_get_alnum(FrtTokenStream *ts, char *token, OnigCodePoint cp, int *cp_len_p, OnigCodePoint *cp_out_p, rb_encoding *enc) {
|
327
|
-
char *end = ts->text + ts->length;
|
328
|
-
char *t = ts->t;
|
329
|
-
char *tt = ts->t;
|
326
|
+
const char *end = ts->text + ts->length;
|
327
|
+
const char *t = ts->t;
|
328
|
+
const char *tt = ts->t;
|
330
329
|
int cp_len = *cp_len_p;
|
331
330
|
|
332
331
|
while (cp > 0 && rb_enc_isalnum(cp, enc)) {
|
@@ -349,10 +348,9 @@ static int std_get_alnum(FrtTokenStream *ts, char *token, OnigCodePoint cp, int
|
|
349
348
|
* (alnum) = [a-zA-Z0-9]
|
350
349
|
* (punc) = [_\/.,-]
|
351
350
|
*/
|
352
|
-
static int std_get_number(FrtTokenStream *ts, char *start, char *end, OnigCodePoint cp, int cp_len_a, rb_encoding *enc) {
|
353
|
-
|
351
|
+
static int std_get_number(FrtTokenStream *ts, const char *start, const char *end, OnigCodePoint cp, int cp_len_a, rb_encoding *enc) {
|
354
352
|
OnigCodePoint cp_1 = 0;
|
355
|
-
char *t = start;
|
353
|
+
const char *t = start;
|
356
354
|
int cp_len = cp_len_a;
|
357
355
|
int cp_1_len = 0;
|
358
356
|
int last_seen_digit = 2;
|
@@ -385,10 +383,10 @@ static int std_get_number(FrtTokenStream *ts, char *start, char *end, OnigCodePo
|
|
385
383
|
}
|
386
384
|
}
|
387
385
|
|
388
|
-
static int std_get_apostrophe(FrtTokenStream *ts, char *input, OnigCodePoint cp, int *cp_len_p, rb_encoding *enc) {
|
386
|
+
static int std_get_apostrophe(FrtTokenStream *ts, const char *input, OnigCodePoint cp, int *cp_len_p, rb_encoding *enc) {
|
389
387
|
int cp_len = *cp_len_p;
|
390
|
-
char *end = ts->text + ts->length;
|
391
|
-
char *t = input;
|
388
|
+
const char *end = ts->text + ts->length;
|
389
|
+
const char *t = input;
|
392
390
|
|
393
391
|
while (cp_len > 0 && (rb_enc_isalpha(cp, enc) || cp == cp_apostrophe)) {
|
394
392
|
t += cp_len;
|
@@ -397,14 +395,14 @@ static int std_get_apostrophe(FrtTokenStream *ts, char *input, OnigCodePoint cp,
|
|
397
395
|
return (int)(t - input);
|
398
396
|
}
|
399
397
|
|
400
|
-
static char *std_get_url(FrtTokenStream *ts, char *start, char *end, char *token, int *len, int bufred) {
|
398
|
+
static const char *std_get_url(FrtTokenStream *ts, const char *start, const char *end, char *token, int *len, int bufred) {
|
401
399
|
rb_encoding *enc = ts->encoding;
|
402
400
|
OnigCodePoint cp;
|
403
401
|
OnigCodePoint prev_cp = 0;
|
404
402
|
int cp_len = 0;
|
405
403
|
int prev_cp_len = 0;
|
406
|
-
char *t = start;
|
407
|
-
char *tt = start;
|
404
|
+
const char *t = start;
|
405
|
+
const char *tt = start;
|
408
406
|
|
409
407
|
cp = get_cp(t, end, &cp_len, enc);
|
410
408
|
while (cp > 0 && cp_enc_isurlc(cp, enc)) {
|
@@ -432,9 +430,9 @@ static char *std_get_url(FrtTokenStream *ts, char *start, char *end, char *token
|
|
432
430
|
}
|
433
431
|
|
434
432
|
/* Company names can contain '@' and '&' like AT&T and Excite@Home. */
|
435
|
-
static int std_get_company_name(FrtTokenStream *ts, char *start, char* end) {
|
433
|
+
static int std_get_company_name(FrtTokenStream *ts, const char *start, const char* end) {
|
436
434
|
rb_encoding *enc = ts->encoding;
|
437
|
-
char * t = start;
|
435
|
+
const char * t = start;
|
438
436
|
OnigCodePoint cp;
|
439
437
|
int cp_len = 0;
|
440
438
|
|
@@ -452,8 +450,8 @@ static int std_advance_to_start(FrtTokenStream *ts, int *cp_len_p, OnigCodePoint
|
|
452
450
|
int cp_next = 0;
|
453
451
|
int cp_len_next = 0;
|
454
452
|
OnigCodePoint cp;
|
455
|
-
char *end = ts->text + ts->length;
|
456
|
-
char *t = ts->t;
|
453
|
+
const char *end = ts->text + ts->length;
|
454
|
+
const char *t = ts->t;
|
457
455
|
|
458
456
|
cp = get_cp(t, end, &cp_len, enc);
|
459
457
|
while (cp > 0 && !rb_enc_isalnum(cp, enc)) {
|
@@ -472,11 +470,9 @@ static int std_advance_to_start(FrtTokenStream *ts, int *cp_len_p, OnigCodePoint
|
|
472
470
|
}
|
473
471
|
|
474
472
|
static FrtToken *std_next(FrtTokenStream *ts) {
|
475
|
-
char *s;
|
476
|
-
char *
|
477
|
-
char *
|
478
|
-
char *end;
|
479
|
-
char *num_end = NULL;
|
473
|
+
const char *s, *t, *end;
|
474
|
+
const char *start = NULL;
|
475
|
+
const char *num_end = NULL;
|
480
476
|
char token[FRT_MAX_WORD_SIZE + 1];
|
481
477
|
OnigCodePoint cp = 0;
|
482
478
|
OnigCodePoint cp_1 = 0;
|
@@ -528,13 +524,11 @@ static FrtToken *std_next(FrtTokenStream *ts) {
|
|
528
524
|
t -= 2;
|
529
525
|
frt_tk_set_ts(&(ts->token), start, t, ts->text, 1, enc);
|
530
526
|
ts->token.end += 2;
|
531
|
-
}
|
532
|
-
else if (t[-1] == '\'') {
|
527
|
+
} else if (t[-1] == '\'') {
|
533
528
|
t -= 1;
|
534
529
|
frt_tk_set_ts(&(ts->token), start, t, ts->text, 1, enc);
|
535
530
|
ts->token.end += 1;
|
536
|
-
}
|
537
|
-
else {
|
531
|
+
} else {
|
538
532
|
frt_tk_set_ts(&(ts->token), start, t, ts->text, 1, enc);
|
539
533
|
}
|
540
534
|
return &(ts->token);
|
@@ -606,8 +600,7 @@ static FrtToken *std_next(FrtTokenStream *ts) {
|
|
606
600
|
if (cp == cp_at) {
|
607
601
|
if (seen_at_symbol) {
|
608
602
|
break; /* we can only have one @ symbol */
|
609
|
-
}
|
610
|
-
else {
|
603
|
+
} else {
|
611
604
|
seen_at_symbol = true;
|
612
605
|
}
|
613
606
|
}
|
@@ -693,7 +686,7 @@ static FrtTokenStream *filter_clone_i(FrtTokenStream *ts) {
|
|
693
686
|
return frt_filter_clone_size(ts, sizeof(FrtTokenFilter));
|
694
687
|
}
|
695
688
|
|
696
|
-
static FrtTokenStream *filter_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding) {
|
689
|
+
static FrtTokenStream *filter_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding) {
|
697
690
|
TkFilt(ts)->sub_ts->reset(TkFilt(ts)->sub_ts, text, encoding);
|
698
691
|
return ts;
|
699
692
|
}
|
@@ -837,7 +830,7 @@ static FrtToken *mf_next(FrtTokenStream *ts) {
|
|
837
830
|
return tk;
|
838
831
|
}
|
839
832
|
|
840
|
-
static FrtTokenStream *mf_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding) {
|
833
|
+
static FrtTokenStream *mf_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding) {
|
841
834
|
FrtMultiMapper *mm = MFilt(ts)->mapper;
|
842
835
|
if (mm->d_size == 0)
|
843
836
|
frt_mulmap_compile(MFilt(ts)->mapper);
|
@@ -1100,7 +1093,7 @@ static void frt_a_standard_destroy_i(FrtAnalyzer *a) {
|
|
1100
1093
|
free(a);
|
1101
1094
|
}
|
1102
1095
|
|
1103
|
-
static FrtTokenStream *a_standard_get_ts(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding) {
|
1096
|
+
static FrtTokenStream *a_standard_get_ts(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding) {
|
1104
1097
|
FrtTokenStream *ts;
|
1105
1098
|
(void)field;
|
1106
1099
|
ts = frt_ts_clone(a->current_ts);
|
@@ -1112,7 +1105,7 @@ FrtAnalyzer *frt_analyzer_alloc(void) {
|
|
1112
1105
|
}
|
1113
1106
|
|
1114
1107
|
void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy_i)(FrtAnalyzer *a),
|
1115
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding)) {
|
1108
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding)) {
|
1116
1109
|
a->current_ts = ts;
|
1117
1110
|
a->destroy_i = (destroy_i ? destroy_i : &frt_a_standard_destroy_i);
|
1118
1111
|
a->get_ts = (get_ts ? get_ts : &a_standard_get_ts);
|
@@ -1121,7 +1114,7 @@ void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy_i)(Frt
|
|
1121
1114
|
}
|
1122
1115
|
|
1123
1116
|
FrtAnalyzer *frt_analyzer_new(FrtTokenStream *ts, void (*destroy_i)(FrtAnalyzer *a),
|
1124
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding)) {
|
1117
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding)) {
|
1125
1118
|
FrtAnalyzer *a = frt_analyzer_alloc();
|
1126
1119
|
frt_analyzer_init(a, ts, destroy_i, get_ts);
|
1127
1120
|
return a;
|
@@ -1214,7 +1207,7 @@ static void pfa_destroy_i(FrtAnalyzer *self) {
|
|
1214
1207
|
free(self);
|
1215
1208
|
}
|
1216
1209
|
|
1217
|
-
static FrtTokenStream *pfa_get_ts(FrtAnalyzer *self, ID field, char *text, rb_encoding *encoding) {
|
1210
|
+
static FrtTokenStream *pfa_get_ts(FrtAnalyzer *self, ID field, const char *text, rb_encoding *encoding) {
|
1218
1211
|
FrtAnalyzer *a = (FrtAnalyzer *)frt_h_get(PFA(self)->dict, (void *)field);
|
1219
1212
|
if (a == NULL)
|
1220
1213
|
a = PFA(self)->default_a;
|
@@ -20,8 +20,8 @@ typedef struct FrtToken {
|
|
20
20
|
|
21
21
|
extern FrtToken *frt_tk_new();
|
22
22
|
extern void frt_tk_destroy(void *p);
|
23
|
-
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
-
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
23
|
+
extern FrtToken *frt_tk_set(FrtToken *tk, const char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
+
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, const char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
25
25
|
extern int frt_tk_eq(FrtToken *tk1, FrtToken *tk2);
|
26
26
|
extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
27
27
|
|
@@ -31,12 +31,12 @@ extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
|
31
31
|
|
32
32
|
typedef struct FrtTokenStream FrtTokenStream;
|
33
33
|
struct FrtTokenStream {
|
34
|
-
char *t; /* ptr used to scan text */
|
35
|
-
char *text;
|
34
|
+
const char *t; /* ptr used to scan text */
|
35
|
+
const char *text;
|
36
36
|
int length;
|
37
37
|
rb_encoding *encoding;
|
38
38
|
FrtToken *(*next)(FrtTokenStream *ts);
|
39
|
-
FrtTokenStream *(*reset)(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
39
|
+
FrtTokenStream *(*reset)(FrtTokenStream *ts, const char *text, rb_encoding *encoding);
|
40
40
|
FrtTokenStream *(*clone_i)(FrtTokenStream *ts);
|
41
41
|
void (*destroy_i)(FrtTokenStream *ts);
|
42
42
|
_Atomic unsigned int ref_cnt;
|
@@ -46,7 +46,7 @@ struct FrtTokenStream {
|
|
46
46
|
|
47
47
|
extern FrtTokenStream *frt_ts_new_i(size_t size);
|
48
48
|
extern FrtTokenStream *frt_ts_init(FrtTokenStream *ts);
|
49
|
-
extern FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
49
|
+
extern FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding);
|
50
50
|
extern FrtTokenStream *frt_ts_clone_size(FrtTokenStream *orig_ts, size_t size);
|
51
51
|
|
52
52
|
typedef struct FrtTokenFilter {
|
@@ -182,7 +182,7 @@ extern FrtTokenStream *frt_mapping_filter_add(FrtTokenStream *ts, const char *pa
|
|
182
182
|
|
183
183
|
typedef struct FrtAnalyzer {
|
184
184
|
FrtTokenStream *current_ts;
|
185
|
-
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding);
|
185
|
+
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding);
|
186
186
|
void (*destroy_i)(struct FrtAnalyzer *a);
|
187
187
|
_Atomic unsigned int ref_cnt;
|
188
188
|
VALUE ranalyzer;
|
@@ -194,9 +194,9 @@ extern void frt_a_deref(FrtAnalyzer *a);
|
|
194
194
|
|
195
195
|
extern FrtAnalyzer *frt_analyzer_alloc(void);
|
196
196
|
extern void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy)(FrtAnalyzer *a),
|
197
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding));
|
197
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding));
|
198
198
|
extern FrtAnalyzer *frt_analyzer_new(FrtTokenStream *ts, void (*destroy)(FrtAnalyzer *a),
|
199
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding));
|
199
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding));
|
200
200
|
|
201
201
|
/*****************************************************************************/
|
202
202
|
/*** FrtNonAnalyzer **********************************************************/
|
@@ -4,8 +4,7 @@
|
|
4
4
|
#define META_CNT FRT_ARY_META_CNT
|
5
5
|
#define DATA_SZ sizeof(int) * META_CNT
|
6
6
|
|
7
|
-
void **frt_ary_new_i(int type_size, int init_capa)
|
8
|
-
{
|
7
|
+
void **frt_ary_new_i(int type_size, int init_capa) {
|
9
8
|
void **ary;
|
10
9
|
if (init_capa <= 0) {
|
11
10
|
init_capa = FRT_ARY_INIT_CAPA;
|
@@ -17,8 +16,7 @@ void **frt_ary_new_i(int type_size, int init_capa)
|
|
17
16
|
return ary;
|
18
17
|
}
|
19
18
|
|
20
|
-
void frt_ary_resize_i(void ***ary, int size)
|
21
|
-
{
|
19
|
+
void frt_ary_resize_i(void ***ary, int size) {
|
22
20
|
size++;
|
23
21
|
if (size > frt_ary_sz(*ary)) {
|
24
22
|
int capa = frt_ary_capa(*ary);
|
@@ -39,8 +37,7 @@ void frt_ary_resize_i(void ***ary, int size)
|
|
39
37
|
}
|
40
38
|
}
|
41
39
|
|
42
|
-
void frt_ary_set_i(void ***ary, int index, void *value)
|
43
|
-
{
|
40
|
+
void frt_ary_set_i(void ***ary, int index, void *value) {
|
44
41
|
if (index < 0) {
|
45
42
|
index += frt_ary_sz(*ary);
|
46
43
|
if (index < 0) {
|
@@ -51,43 +48,37 @@ void frt_ary_set_i(void ***ary, int index, void *value)
|
|
51
48
|
(*ary)[index] = value;
|
52
49
|
}
|
53
50
|
|
54
|
-
void *frt_ary_get_i(void **ary, int index)
|
55
|
-
{
|
51
|
+
void *frt_ary_get_i(void **ary, int index) {
|
56
52
|
if (index < 0) {
|
57
53
|
index += frt_ary_sz(ary);
|
58
54
|
}
|
59
55
|
if (index >= 0 && index < frt_ary_sz(ary)) {
|
60
56
|
return ary[index];
|
61
|
-
}
|
62
|
-
else {
|
57
|
+
} else {
|
63
58
|
return NULL;
|
64
59
|
}
|
65
60
|
}
|
66
61
|
|
67
|
-
void frt_ary_push_i(void ***ary, void *value)
|
68
|
-
{
|
62
|
+
void frt_ary_push_i(void ***ary, void *value) {
|
69
63
|
int size = frt_ary_sz(*ary);
|
70
64
|
frt_ary_resize_i(ary, size);
|
71
65
|
(*ary)[size] = value;
|
72
66
|
}
|
73
67
|
|
74
|
-
void *frt_ary_pop_i(void **ary)
|
75
|
-
{
|
68
|
+
void *frt_ary_pop_i(void **ary) {
|
76
69
|
void *val = ary[--frt_ary_sz(ary)];
|
77
70
|
ary[frt_ary_sz(ary)] = NULL;
|
78
71
|
return val;
|
79
72
|
}
|
80
73
|
|
81
|
-
void frt_ary_unshift_i(void ***ary, void *value)
|
82
|
-
{
|
74
|
+
void frt_ary_unshift_i(void ***ary, void *value) {
|
83
75
|
int size = frt_ary_sz(*ary);
|
84
76
|
frt_ary_resize_i(ary, size);
|
85
77
|
memmove(*ary + 1, *ary, size * sizeof(void *));
|
86
78
|
(*ary)[0] = value;
|
87
79
|
}
|
88
80
|
|
89
|
-
void *frt_ary_shift_i(void **ary)
|
90
|
-
{
|
81
|
+
void *frt_ary_shift_i(void **ary) {
|
91
82
|
void *val = ary[0];
|
92
83
|
int size = --frt_ary_sz(ary);
|
93
84
|
memmove(ary, ary + 1, size * sizeof(void *));
|
@@ -95,16 +86,14 @@ void *frt_ary_shift_i(void **ary)
|
|
95
86
|
return val;
|
96
87
|
}
|
97
88
|
|
98
|
-
void *frt_ary_remove_i(void **ary, int index)
|
99
|
-
{
|
89
|
+
void *frt_ary_remove_i(void **ary, int index) {
|
100
90
|
if (index >= 0 && index < frt_ary_sz(ary)) {
|
101
91
|
void *val = ary[index];
|
102
92
|
memmove(ary + index, ary + index + 1,
|
103
93
|
(frt_ary_sz(ary) - index + 1) * sizeof(void *));
|
104
94
|
frt_ary_sz(ary)--;
|
105
95
|
return val;
|
106
|
-
}
|
107
|
-
else {
|
96
|
+
} else {
|
108
97
|
return NULL;
|
109
98
|
}
|
110
99
|
}
|
@@ -100,8 +100,7 @@ static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_value(FrtBitVector *bv, int bit, b
|
|
100
100
|
if (value) {
|
101
101
|
bv->count++;
|
102
102
|
*word_p |= bitmask;
|
103
|
-
}
|
104
|
-
else {
|
103
|
+
} else {
|
105
104
|
bv->count--;
|
106
105
|
*word_p &= ~bitmask;
|
107
106
|
}
|
@@ -284,8 +283,7 @@ done:
|
|
284
283
|
* @return the next set bits index or -1 if no more bits are set
|
285
284
|
*/
|
286
285
|
static FRT_ATTR_ALWAYS_INLINE
|
287
|
-
int frt_bv_scan_next(FrtBitVector *bv)
|
288
|
-
{
|
286
|
+
int frt_bv_scan_next(FrtBitVector *bv) {
|
289
287
|
return frt_bv_scan_next_from(bv, bv->curr_bit + 1);
|
290
288
|
}
|
291
289
|
|
@@ -331,8 +329,7 @@ done:
|
|
331
329
|
* @return the next unset bits index or -1 if no more bits are unset
|
332
330
|
*/
|
333
331
|
static FRT_ATTR_ALWAYS_INLINE
|
334
|
-
int frt_bv_scan_next_unset(FrtBitVector *bv)
|
335
|
-
{
|
332
|
+
int frt_bv_scan_next_unset(FrtBitVector *bv) {
|
336
333
|
return frt_bv_scan_next_unset_from(bv, bv->curr_bit + 1);
|
337
334
|
}
|
338
335
|
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#include "frt_doc_field.h"
|
2
|
+
#include "frt_document.h"
|
3
|
+
|
4
|
+
FrtDocField *frt_df_new(ID name) {
|
5
|
+
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
6
|
+
df->name = name;
|
7
|
+
df->size = 0;
|
8
|
+
df->capa = FRT_DF_INIT_CAPA;
|
9
|
+
df->data = FRT_ALLOC_N(const char *, df->capa);
|
10
|
+
df->lengths = FRT_ALLOC_N(int, df->capa);
|
11
|
+
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
12
|
+
df->boost = 1.0f;
|
13
|
+
return df;
|
14
|
+
}
|
15
|
+
|
16
|
+
FrtDocField *frt_df_add_data_len_nc(FrtDocField *df, const char *data, int len, rb_encoding *encoding) {
|
17
|
+
if (df->size >= df->capa) {
|
18
|
+
df->capa <<= 2;
|
19
|
+
FRT_REALLOC_N(df->data, const char *, df->capa);
|
20
|
+
FRT_REALLOC_N(df->lengths, int, df->capa);
|
21
|
+
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
22
|
+
}
|
23
|
+
df->data[df->size] = data;
|
24
|
+
df->lengths[df->size] = len;
|
25
|
+
df->encodings[df->size] = encoding;
|
26
|
+
df->size++;
|
27
|
+
return df;
|
28
|
+
}
|
29
|
+
|
30
|
+
FrtDocField *frt_df_add_data_len(FrtDocField *self, const char *data, int length, rb_encoding *encoding) {
|
31
|
+
char *d = FRT_ALLOC_N(char, length + 1);
|
32
|
+
memcpy(d, data, length); /* must handle binary data ... */
|
33
|
+
d[length] = '\0'; /* ... and strings */
|
34
|
+
return frt_df_add_data_len_nc(self, d, length, encoding);
|
35
|
+
}
|
36
|
+
|
37
|
+
FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding) {
|
38
|
+
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
|
+
}
|
40
|
+
|
41
|
+
void frt_df_destroy(FrtDocField *df) {
|
42
|
+
int i;
|
43
|
+
for (i = 0; i < df->size; i++) {
|
44
|
+
free((void *)df->data[i]);
|
45
|
+
}
|
46
|
+
free(df->data);
|
47
|
+
free(df->lengths);
|
48
|
+
free(df->encodings);
|
49
|
+
free(df);
|
50
|
+
}
|
51
|
+
|
52
|
+
/*
|
53
|
+
* Format for one item is: name: "data"
|
54
|
+
* for more items : name: ["data", "data", "data"]
|
55
|
+
* internally used for testing, thus encoding can be ignored
|
56
|
+
*/
|
57
|
+
char *frt_df_to_s(FrtDocField *df) {
|
58
|
+
const char *df_name = rb_id2name(df->name);
|
59
|
+
int i, len = 0, namelen = strlen(df_name);
|
60
|
+
char *str, *s;
|
61
|
+
for (i = 0; i < df->size; i++) {
|
62
|
+
len += df->lengths[i] + 4;
|
63
|
+
}
|
64
|
+
s = str = FRT_ALLOC_N(char, namelen + len + 5);
|
65
|
+
memcpy(s, df_name, namelen);
|
66
|
+
s += namelen;
|
67
|
+
s = frt_strapp(s, ": ");
|
68
|
+
|
69
|
+
if (df->size > 1) {
|
70
|
+
s = frt_strapp(s, "[");
|
71
|
+
}
|
72
|
+
for (i = 0; i < df->size; i++) {
|
73
|
+
if (i != 0) {
|
74
|
+
s = frt_strapp(s, ", ");
|
75
|
+
}
|
76
|
+
s = frt_strapp(s, "\"");
|
77
|
+
memcpy(s, df->data[i], df->lengths[i]);
|
78
|
+
s += df->lengths[i];
|
79
|
+
s = frt_strapp(s, "\"");
|
80
|
+
}
|
81
|
+
|
82
|
+
if (df->size > 1) {
|
83
|
+
s = frt_strapp(s, "]");
|
84
|
+
}
|
85
|
+
*s = 0;
|
86
|
+
return str;
|
87
|
+
}
|