isomorfeus-ferret 0.17.2 → 0.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -20,8 +20,8 @@ typedef struct FrtToken {
|
|
20
20
|
|
21
21
|
extern FrtToken *frt_tk_new();
|
22
22
|
extern void frt_tk_destroy(void *p);
|
23
|
-
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
-
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
23
|
+
extern FrtToken *frt_tk_set(FrtToken *tk, const char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
+
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, const char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
25
25
|
extern int frt_tk_eq(FrtToken *tk1, FrtToken *tk2);
|
26
26
|
extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
27
27
|
|
@@ -31,12 +31,12 @@ extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
|
31
31
|
|
32
32
|
typedef struct FrtTokenStream FrtTokenStream;
|
33
33
|
struct FrtTokenStream {
|
34
|
-
char *t; /* ptr used to scan text */
|
35
|
-
char *text;
|
34
|
+
const char *t; /* ptr used to scan text */
|
35
|
+
const char *text;
|
36
36
|
int length;
|
37
37
|
rb_encoding *encoding;
|
38
38
|
FrtToken *(*next)(FrtTokenStream *ts);
|
39
|
-
FrtTokenStream *(*reset)(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
39
|
+
FrtTokenStream *(*reset)(FrtTokenStream *ts, const char *text, rb_encoding *encoding);
|
40
40
|
FrtTokenStream *(*clone_i)(FrtTokenStream *ts);
|
41
41
|
void (*destroy_i)(FrtTokenStream *ts);
|
42
42
|
_Atomic unsigned int ref_cnt;
|
@@ -46,7 +46,7 @@ struct FrtTokenStream {
|
|
46
46
|
|
47
47
|
extern FrtTokenStream *frt_ts_new_i(size_t size);
|
48
48
|
extern FrtTokenStream *frt_ts_init(FrtTokenStream *ts);
|
49
|
-
extern FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
49
|
+
extern FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding);
|
50
50
|
extern FrtTokenStream *frt_ts_clone_size(FrtTokenStream *orig_ts, size_t size);
|
51
51
|
|
52
52
|
typedef struct FrtTokenFilter {
|
@@ -182,7 +182,7 @@ extern FrtTokenStream *frt_mapping_filter_add(FrtTokenStream *ts, const char *pa
|
|
182
182
|
|
183
183
|
typedef struct FrtAnalyzer {
|
184
184
|
FrtTokenStream *current_ts;
|
185
|
-
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding);
|
185
|
+
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding);
|
186
186
|
void (*destroy_i)(struct FrtAnalyzer *a);
|
187
187
|
_Atomic unsigned int ref_cnt;
|
188
188
|
VALUE ranalyzer;
|
@@ -194,9 +194,9 @@ extern void frt_a_deref(FrtAnalyzer *a);
|
|
194
194
|
|
195
195
|
extern FrtAnalyzer *frt_analyzer_alloc(void);
|
196
196
|
extern void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy)(FrtAnalyzer *a),
|
197
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding));
|
197
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding));
|
198
198
|
extern FrtAnalyzer *frt_analyzer_new(FrtTokenStream *ts, void (*destroy)(FrtAnalyzer *a),
|
199
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding));
|
199
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding));
|
200
200
|
|
201
201
|
/*****************************************************************************/
|
202
202
|
/*** FrtNonAnalyzer **********************************************************/
|
@@ -4,8 +4,7 @@
|
|
4
4
|
#define META_CNT FRT_ARY_META_CNT
|
5
5
|
#define DATA_SZ sizeof(int) * META_CNT
|
6
6
|
|
7
|
-
void **frt_ary_new_i(int type_size, int init_capa)
|
8
|
-
{
|
7
|
+
void **frt_ary_new_i(int type_size, int init_capa) {
|
9
8
|
void **ary;
|
10
9
|
if (init_capa <= 0) {
|
11
10
|
init_capa = FRT_ARY_INIT_CAPA;
|
@@ -17,8 +16,7 @@ void **frt_ary_new_i(int type_size, int init_capa)
|
|
17
16
|
return ary;
|
18
17
|
}
|
19
18
|
|
20
|
-
void frt_ary_resize_i(void ***ary, int size)
|
21
|
-
{
|
19
|
+
void frt_ary_resize_i(void ***ary, int size) {
|
22
20
|
size++;
|
23
21
|
if (size > frt_ary_sz(*ary)) {
|
24
22
|
int capa = frt_ary_capa(*ary);
|
@@ -39,8 +37,7 @@ void frt_ary_resize_i(void ***ary, int size)
|
|
39
37
|
}
|
40
38
|
}
|
41
39
|
|
42
|
-
void frt_ary_set_i(void ***ary, int index, void *value)
|
43
|
-
{
|
40
|
+
void frt_ary_set_i(void ***ary, int index, void *value) {
|
44
41
|
if (index < 0) {
|
45
42
|
index += frt_ary_sz(*ary);
|
46
43
|
if (index < 0) {
|
@@ -51,43 +48,37 @@ void frt_ary_set_i(void ***ary, int index, void *value)
|
|
51
48
|
(*ary)[index] = value;
|
52
49
|
}
|
53
50
|
|
54
|
-
void *frt_ary_get_i(void **ary, int index)
|
55
|
-
{
|
51
|
+
void *frt_ary_get_i(void **ary, int index) {
|
56
52
|
if (index < 0) {
|
57
53
|
index += frt_ary_sz(ary);
|
58
54
|
}
|
59
55
|
if (index >= 0 && index < frt_ary_sz(ary)) {
|
60
56
|
return ary[index];
|
61
|
-
}
|
62
|
-
else {
|
57
|
+
} else {
|
63
58
|
return NULL;
|
64
59
|
}
|
65
60
|
}
|
66
61
|
|
67
|
-
void frt_ary_push_i(void ***ary, void *value)
|
68
|
-
{
|
62
|
+
void frt_ary_push_i(void ***ary, void *value) {
|
69
63
|
int size = frt_ary_sz(*ary);
|
70
64
|
frt_ary_resize_i(ary, size);
|
71
65
|
(*ary)[size] = value;
|
72
66
|
}
|
73
67
|
|
74
|
-
void *frt_ary_pop_i(void **ary)
|
75
|
-
{
|
68
|
+
void *frt_ary_pop_i(void **ary) {
|
76
69
|
void *val = ary[--frt_ary_sz(ary)];
|
77
70
|
ary[frt_ary_sz(ary)] = NULL;
|
78
71
|
return val;
|
79
72
|
}
|
80
73
|
|
81
|
-
void frt_ary_unshift_i(void ***ary, void *value)
|
82
|
-
{
|
74
|
+
void frt_ary_unshift_i(void ***ary, void *value) {
|
83
75
|
int size = frt_ary_sz(*ary);
|
84
76
|
frt_ary_resize_i(ary, size);
|
85
77
|
memmove(*ary + 1, *ary, size * sizeof(void *));
|
86
78
|
(*ary)[0] = value;
|
87
79
|
}
|
88
80
|
|
89
|
-
void *frt_ary_shift_i(void **ary)
|
90
|
-
{
|
81
|
+
void *frt_ary_shift_i(void **ary) {
|
91
82
|
void *val = ary[0];
|
92
83
|
int size = --frt_ary_sz(ary);
|
93
84
|
memmove(ary, ary + 1, size * sizeof(void *));
|
@@ -95,16 +86,14 @@ void *frt_ary_shift_i(void **ary)
|
|
95
86
|
return val;
|
96
87
|
}
|
97
88
|
|
98
|
-
void *frt_ary_remove_i(void **ary, int index)
|
99
|
-
{
|
89
|
+
void *frt_ary_remove_i(void **ary, int index) {
|
100
90
|
if (index >= 0 && index < frt_ary_sz(ary)) {
|
101
91
|
void *val = ary[index];
|
102
92
|
memmove(ary + index, ary + index + 1,
|
103
93
|
(frt_ary_sz(ary) - index + 1) * sizeof(void *));
|
104
94
|
frt_ary_sz(ary)--;
|
105
95
|
return val;
|
106
|
-
}
|
107
|
-
else {
|
96
|
+
} else {
|
108
97
|
return NULL;
|
109
98
|
}
|
110
99
|
}
|
@@ -100,8 +100,7 @@ static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_value(FrtBitVector *bv, int bit, b
|
|
100
100
|
if (value) {
|
101
101
|
bv->count++;
|
102
102
|
*word_p |= bitmask;
|
103
|
-
}
|
104
|
-
else {
|
103
|
+
} else {
|
105
104
|
bv->count--;
|
106
105
|
*word_p &= ~bitmask;
|
107
106
|
}
|
@@ -284,8 +283,7 @@ done:
|
|
284
283
|
* @return the next set bits index or -1 if no more bits are set
|
285
284
|
*/
|
286
285
|
static FRT_ATTR_ALWAYS_INLINE
|
287
|
-
int frt_bv_scan_next(FrtBitVector *bv)
|
288
|
-
{
|
286
|
+
int frt_bv_scan_next(FrtBitVector *bv) {
|
289
287
|
return frt_bv_scan_next_from(bv, bv->curr_bit + 1);
|
290
288
|
}
|
291
289
|
|
@@ -331,8 +329,7 @@ done:
|
|
331
329
|
* @return the next unset bits index or -1 if no more bits are unset
|
332
330
|
*/
|
333
331
|
static FRT_ATTR_ALWAYS_INLINE
|
334
|
-
int frt_bv_scan_next_unset(FrtBitVector *bv)
|
335
|
-
{
|
332
|
+
int frt_bv_scan_next_unset(FrtBitVector *bv) {
|
336
333
|
return frt_bv_scan_next_unset_from(bv, bv->curr_bit + 1);
|
337
334
|
}
|
338
335
|
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#include "frt_doc_field.h"
|
2
|
+
#include "frt_document.h"
|
3
|
+
|
4
|
+
FrtDocField *frt_df_new(ID name) {
|
5
|
+
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
6
|
+
df->name = name;
|
7
|
+
df->size = 0;
|
8
|
+
df->capa = FRT_DF_INIT_CAPA;
|
9
|
+
df->data = FRT_ALLOC_N(const char *, df->capa);
|
10
|
+
df->lengths = FRT_ALLOC_N(int, df->capa);
|
11
|
+
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
12
|
+
df->boost = 1.0f;
|
13
|
+
return df;
|
14
|
+
}
|
15
|
+
|
16
|
+
FrtDocField *frt_df_add_data_len_nc(FrtDocField *df, const char *data, int len, rb_encoding *encoding) {
|
17
|
+
if (df->size >= df->capa) {
|
18
|
+
df->capa <<= 2;
|
19
|
+
FRT_REALLOC_N(df->data, const char *, df->capa);
|
20
|
+
FRT_REALLOC_N(df->lengths, int, df->capa);
|
21
|
+
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
22
|
+
}
|
23
|
+
df->data[df->size] = data;
|
24
|
+
df->lengths[df->size] = len;
|
25
|
+
df->encodings[df->size] = encoding;
|
26
|
+
df->size++;
|
27
|
+
return df;
|
28
|
+
}
|
29
|
+
|
30
|
+
FrtDocField *frt_df_add_data_len(FrtDocField *self, const char *data, int length, rb_encoding *encoding) {
|
31
|
+
char *d = FRT_ALLOC_N(char, length + 1);
|
32
|
+
memcpy(d, data, length); /* must handle binary data ... */
|
33
|
+
d[length] = '\0'; /* ... and strings */
|
34
|
+
return frt_df_add_data_len_nc(self, d, length, encoding);
|
35
|
+
}
|
36
|
+
|
37
|
+
FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding) {
|
38
|
+
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
|
+
}
|
40
|
+
|
41
|
+
void frt_df_destroy(FrtDocField *df) {
|
42
|
+
int i;
|
43
|
+
for (i = 0; i < df->size; i++) {
|
44
|
+
free((void *)df->data[i]);
|
45
|
+
}
|
46
|
+
free(df->data);
|
47
|
+
free(df->lengths);
|
48
|
+
free(df->encodings);
|
49
|
+
free(df);
|
50
|
+
}
|
51
|
+
|
52
|
+
/*
|
53
|
+
* Format for one item is: name: "data"
|
54
|
+
* for more items : name: ["data", "data", "data"]
|
55
|
+
* internally used for testing, thus encoding can be ignored
|
56
|
+
*/
|
57
|
+
char *frt_df_to_s(FrtDocField *df) {
|
58
|
+
const char *df_name = rb_id2name(df->name);
|
59
|
+
int i, len = 0, namelen = strlen(df_name);
|
60
|
+
char *str, *s;
|
61
|
+
for (i = 0; i < df->size; i++) {
|
62
|
+
len += df->lengths[i] + 4;
|
63
|
+
}
|
64
|
+
s = str = FRT_ALLOC_N(char, namelen + len + 5);
|
65
|
+
memcpy(s, df_name, namelen);
|
66
|
+
s += namelen;
|
67
|
+
s = frt_strapp(s, ": ");
|
68
|
+
|
69
|
+
if (df->size > 1) {
|
70
|
+
s = frt_strapp(s, "[");
|
71
|
+
}
|
72
|
+
for (i = 0; i < df->size; i++) {
|
73
|
+
if (i != 0) {
|
74
|
+
s = frt_strapp(s, ", ");
|
75
|
+
}
|
76
|
+
s = frt_strapp(s, "\"");
|
77
|
+
memcpy(s, df->data[i], df->lengths[i]);
|
78
|
+
s += df->lengths[i];
|
79
|
+
s = frt_strapp(s, "\"");
|
80
|
+
}
|
81
|
+
|
82
|
+
if (df->size > 1) {
|
83
|
+
s = frt_strapp(s, "]");
|
84
|
+
}
|
85
|
+
*s = 0;
|
86
|
+
return str;
|
87
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#ifndef FRT_DOC_FIELD_H
|
2
|
+
#define FRT_DOC_FIELD_H
|
3
|
+
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
#include "frt_hash.h"
|
6
|
+
|
7
|
+
#define FRT_DF_INIT_CAPA 1
|
8
|
+
|
9
|
+
typedef struct FrtDocField {
|
10
|
+
ID name;
|
11
|
+
int size;
|
12
|
+
int capa;
|
13
|
+
int *lengths;
|
14
|
+
rb_encoding **encodings; /* used for processing */
|
15
|
+
const char **data;
|
16
|
+
float boost;
|
17
|
+
FrtCompressionType compression_type;
|
18
|
+
} FrtDocField;
|
19
|
+
|
20
|
+
extern FrtDocField *frt_df_new(ID name);
|
21
|
+
extern FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding);
|
22
|
+
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, const char *data, int len, rb_encoding *encoding);
|
23
|
+
extern void frt_df_destroy(FrtDocField *df);
|
24
|
+
extern char *frt_df_to_s(FrtDocField *df);
|
25
|
+
|
26
|
+
#endif
|
@@ -1,103 +1,10 @@
|
|
1
1
|
#include "frt_document.h"
|
2
2
|
#include <string.h>
|
3
3
|
|
4
|
-
/****************************************************************************
|
5
|
-
*
|
6
|
-
* FrtDocField
|
7
|
-
*
|
8
|
-
****************************************************************************/
|
9
|
-
|
10
|
-
FrtDocField *frt_df_new(ID name) {
|
11
|
-
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
12
|
-
df->name = name;
|
13
|
-
df->size = 0;
|
14
|
-
df->capa = FRT_DF_INIT_CAPA;
|
15
|
-
df->data = FRT_ALLOC_N(char *, df->capa);
|
16
|
-
df->lengths = FRT_ALLOC_N(int, df->capa);
|
17
|
-
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
18
|
-
df->destroy_data = false;
|
19
|
-
df->boost = 1.0f;
|
20
|
-
return df;
|
21
|
-
}
|
22
|
-
|
23
|
-
FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding) {
|
24
|
-
if (df->size >= df->capa) {
|
25
|
-
df->capa <<= 2;
|
26
|
-
FRT_REALLOC_N(df->data, char *, df->capa);
|
27
|
-
FRT_REALLOC_N(df->lengths, int, df->capa);
|
28
|
-
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
29
|
-
}
|
30
|
-
df->data[df->size] = data;
|
31
|
-
df->lengths[df->size] = len;
|
32
|
-
df->encodings[df->size] = encoding;
|
33
|
-
df->size++;
|
34
|
-
return df;
|
35
|
-
}
|
36
|
-
|
37
|
-
FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding) {
|
38
|
-
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
|
-
}
|
40
|
-
|
41
|
-
void frt_df_destroy(FrtDocField *df) {
|
42
|
-
if (df->destroy_data) {
|
43
|
-
int i;
|
44
|
-
for (i = 0; i < df->size; i++) {
|
45
|
-
free(df->data[i]);
|
46
|
-
}
|
47
|
-
}
|
48
|
-
free(df->data);
|
49
|
-
free(df->lengths);
|
50
|
-
free(df->encodings);
|
51
|
-
free(df);
|
52
|
-
}
|
53
|
-
|
54
|
-
/*
|
55
|
-
* Format for one item is: name: "data"
|
56
|
-
* for more items : name: ["data", "data", "data"]
|
57
|
-
* internally used for testing, thus encoding can be ignored
|
58
|
-
*/
|
59
|
-
char *frt_df_to_s(FrtDocField *df) {
|
60
|
-
const char *df_name = rb_id2name(df->name);
|
61
|
-
int i, len = 0, namelen = strlen(df_name);
|
62
|
-
char *str, *s;
|
63
|
-
for (i = 0; i < df->size; i++) {
|
64
|
-
len += df->lengths[i] + 4;
|
65
|
-
}
|
66
|
-
s = str = FRT_ALLOC_N(char, namelen + len + 5);
|
67
|
-
memcpy(s, df_name, namelen);
|
68
|
-
s += namelen;
|
69
|
-
s = frt_strapp(s, ": ");
|
70
|
-
|
71
|
-
if (df->size > 1) {
|
72
|
-
s = frt_strapp(s, "[");
|
73
|
-
}
|
74
|
-
for (i = 0; i < df->size; i++) {
|
75
|
-
if (i != 0) {
|
76
|
-
s = frt_strapp(s, ", ");
|
77
|
-
}
|
78
|
-
s = frt_strapp(s, "\"");
|
79
|
-
memcpy(s, df->data[i], df->lengths[i]);
|
80
|
-
s += df->lengths[i];
|
81
|
-
s = frt_strapp(s, "\"");
|
82
|
-
}
|
83
|
-
|
84
|
-
if (df->size > 1) {
|
85
|
-
s = frt_strapp(s, "]");
|
86
|
-
}
|
87
|
-
*s = 0;
|
88
|
-
return str;
|
89
|
-
}
|
90
|
-
|
91
|
-
/****************************************************************************
|
92
|
-
*
|
93
|
-
* FrtDocument
|
94
|
-
*
|
95
|
-
****************************************************************************/
|
96
|
-
|
97
4
|
FrtDocument *frt_doc_new(void) {
|
98
5
|
FrtDocument *doc = FRT_ALLOC(FrtDocument);
|
99
6
|
doc->field_dict = frt_h_new_ptr((frt_free_ft)&frt_df_destroy);
|
100
|
-
doc->
|
7
|
+
doc->field_count = 0;
|
101
8
|
doc->capa = FRT_DOC_INIT_CAPA;
|
102
9
|
doc->fields = FRT_ALLOC_N(FrtDocField *, doc->capa);
|
103
10
|
doc->boost = 1.0f;
|
@@ -109,12 +16,12 @@ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df) {
|
|
109
16
|
FRT_RAISE(FRT_EXCEPTION, "tried to add %s field which alread existed\n",
|
110
17
|
rb_id2name(df->name));
|
111
18
|
}
|
112
|
-
if (doc->
|
19
|
+
if (doc->field_count >= doc->capa) {
|
113
20
|
doc->capa <<= 1;
|
114
21
|
FRT_REALLOC_N(doc->fields, FrtDocField *, doc->capa);
|
115
22
|
}
|
116
|
-
doc->fields[doc->
|
117
|
-
doc->
|
23
|
+
doc->fields[doc->field_count] = df;
|
24
|
+
doc->field_count++;
|
118
25
|
return df;
|
119
26
|
}
|
120
27
|
|
@@ -2,34 +2,9 @@
|
|
2
2
|
#define FRT_DOCUMENT_H
|
3
3
|
|
4
4
|
#include "frt_global.h"
|
5
|
-
#include "
|
5
|
+
#include "frt_doc_field.h"
|
6
6
|
#include <ruby/encoding.h>
|
7
7
|
|
8
|
-
/****************************************************************************
|
9
|
-
*
|
10
|
-
* FrtDocField
|
11
|
-
*
|
12
|
-
****************************************************************************/
|
13
|
-
|
14
|
-
#define FRT_DF_INIT_CAPA 1
|
15
|
-
typedef struct FrtDocField {
|
16
|
-
ID name;
|
17
|
-
int size;
|
18
|
-
int capa;
|
19
|
-
int *lengths;
|
20
|
-
rb_encoding **encodings; /* used for processing */
|
21
|
-
char **data;
|
22
|
-
float boost;
|
23
|
-
FrtCompressionType compression;
|
24
|
-
bool destroy_data : 1;
|
25
|
-
} FrtDocField;
|
26
|
-
|
27
|
-
extern FrtDocField *frt_df_new(ID name);
|
28
|
-
extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding);
|
29
|
-
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding);
|
30
|
-
extern void frt_df_destroy(FrtDocField *df);
|
31
|
-
extern char *frt_df_to_s(FrtDocField *df);
|
32
|
-
|
33
8
|
/****************************************************************************
|
34
9
|
*
|
35
10
|
* FrtDocument
|
@@ -39,7 +14,7 @@ extern char *frt_df_to_s(FrtDocField *df);
|
|
39
14
|
#define FRT_DOC_INIT_CAPA 8
|
40
15
|
typedef struct FrtDocument {
|
41
16
|
FrtHash *field_dict;
|
42
|
-
int
|
17
|
+
int field_count;
|
43
18
|
int capa;
|
44
19
|
FrtDocField **fields;
|
45
20
|
float boost;
|
@@ -1,3 +1,8 @@
|
|
1
|
+
/* prevent warning: #warning Please include winsock2.h before windows.h [-Wcpp] */
|
2
|
+
#ifdef _WIN32
|
3
|
+
# include <winsock2.h>
|
4
|
+
#endif
|
5
|
+
|
1
6
|
#include <stdarg.h>
|
2
7
|
#include "bzlib.h"
|
3
8
|
#include "frt_global.h"
|
@@ -61,11 +66,9 @@ void frt_xraise(int excode, const char *const msg) {
|
|
61
66
|
|
62
67
|
if (!top_context) {
|
63
68
|
FRT_XEXIT(ERROR_TYPES[excode], msg);
|
64
|
-
}
|
65
|
-
else if (!top_context->in_finally) {
|
69
|
+
} else if (!top_context->in_finally) {
|
66
70
|
frt_xraise_context(top_context, excode, msg);
|
67
|
-
}
|
68
|
-
else if (top_context->handled) {
|
71
|
+
} else if (top_context->handled) {
|
69
72
|
top_context->msg = msg;
|
70
73
|
top_context->excode = excode;
|
71
74
|
top_context->handled = false;
|
@@ -122,8 +125,7 @@ void frt_xpop_context(void) {
|
|
122
125
|
if (!top_cxt->handled) {
|
123
126
|
if (context) {
|
124
127
|
frt_xraise_context(context, top_cxt->excode, top_cxt->msg);
|
125
|
-
}
|
126
|
-
else {
|
128
|
+
} else {
|
127
129
|
FRT_XEXIT(ERROR_TYPES[top_cxt->excode], top_cxt->msg);
|
128
130
|
}
|
129
131
|
}
|
@@ -112,8 +112,7 @@ extern const char *frt_err_code_to_type(const int err_code);
|
|
112
112
|
|
113
113
|
extern void frb_rb_raise(const char *file, int line_num, const char *func, const char *err_type, const char *fmt, ...);
|
114
114
|
|
115
|
-
typedef struct frt_xcontext_t
|
116
|
-
{
|
115
|
+
typedef struct frt_xcontext_t {
|
117
116
|
jmp_buf jbuf;
|
118
117
|
struct frt_xcontext_t *next;
|
119
118
|
const char *msg;
|
@@ -9,22 +9,19 @@
|
|
9
9
|
*
|
10
10
|
***************************************************************************/
|
11
11
|
|
12
|
-
static unsigned long
|
13
|
-
{
|
12
|
+
static unsigned long field_index_hash(const void *p) {
|
14
13
|
FrtFieldIndex *self = (FrtFieldIndex *)p;
|
15
14
|
return frt_str_hash(rb_id2name(self->field)) ^ (unsigned long long)(self->klass);
|
16
15
|
}
|
17
16
|
|
18
|
-
static int field_index_eq(const void *p1, const void *p2)
|
19
|
-
{
|
17
|
+
static int field_index_eq(const void *p1, const void *p2) {
|
20
18
|
FrtFieldIndex *fi1 = (FrtFieldIndex *)p1;
|
21
19
|
FrtFieldIndex *fi2 = (FrtFieldIndex *)p2;
|
22
20
|
return (fi1->field == fi2->field) &&
|
23
21
|
(fi1->klass->type == fi2->klass->type);
|
24
22
|
}
|
25
23
|
|
26
|
-
static void field_index_destroy(void *p)
|
27
|
-
{
|
24
|
+
static void field_index_destroy(void *p) {
|
28
25
|
FrtFieldIndex *self = (FrtFieldIndex *)p;
|
29
26
|
if (self->index) {
|
30
27
|
self->klass->destroy_index(self->index);
|
@@ -95,10 +92,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
|
|
95
92
|
* index should only be used for sorting and not as a field cache of the
|
96
93
|
* column's value.
|
97
94
|
******************************************************************************/
|
98
|
-
static void byte_handle_term(void *index_ptr,
|
99
|
-
FrtTermDocEnum *tde,
|
100
|
-
const char *text)
|
101
|
-
{
|
95
|
+
static void byte_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
102
96
|
long *index = (long *)index_ptr;
|
103
97
|
long val = index[-1]++;
|
104
98
|
(void)text;
|
@@ -107,15 +101,13 @@ static void byte_handle_term(void *index_ptr,
|
|
107
101
|
}
|
108
102
|
}
|
109
103
|
|
110
|
-
static void *byte_create_index(int size)
|
111
|
-
{
|
104
|
+
static void *byte_create_index(int size) {
|
112
105
|
long *index = FRT_ALLOC_AND_ZERO_N(long, size + 1);
|
113
106
|
index[0] = 1;
|
114
107
|
return &index[1];
|
115
108
|
}
|
116
109
|
|
117
|
-
static void byte_destroy_index(void *p)
|
118
|
-
{
|
110
|
+
static void byte_destroy_index(void *p) {
|
119
111
|
long *index = (long *)p;
|
120
112
|
free(&index[-1]);
|
121
113
|
}
|
@@ -130,15 +122,11 @@ const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS = {
|
|
130
122
|
/******************************************************************************
|
131
123
|
* IntegerFieldIndex < FieldIndex
|
132
124
|
******************************************************************************/
|
133
|
-
static void *integer_create_index(int size)
|
134
|
-
{
|
125
|
+
static void *integer_create_index(int size) {
|
135
126
|
return FRT_ALLOC_AND_ZERO_N(long, size);
|
136
127
|
}
|
137
128
|
|
138
|
-
static void integer_handle_term(void *index_ptr,
|
139
|
-
FrtTermDocEnum *tde,
|
140
|
-
const char *text)
|
141
|
-
{
|
129
|
+
static void integer_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
142
130
|
long *index = (long *)index_ptr;
|
143
131
|
long val;
|
144
132
|
sscanf(text, "%ld", &val);
|
@@ -158,15 +146,11 @@ const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS = {
|
|
158
146
|
* FloatFieldIndex < FieldIndex
|
159
147
|
******************************************************************************/
|
160
148
|
#define VALUES_ARRAY_START_SIZE 8
|
161
|
-
static void *float_create_index(int size)
|
162
|
-
{
|
149
|
+
static void *float_create_index(int size) {
|
163
150
|
return FRT_ALLOC_AND_ZERO_N(float, size);
|
164
151
|
}
|
165
152
|
|
166
|
-
static void float_handle_term(void *index_ptr,
|
167
|
-
FrtTermDocEnum *tde,
|
168
|
-
const char *text)
|
169
|
-
{
|
153
|
+
static void float_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
170
154
|
float *index = (float *)index_ptr;
|
171
155
|
float val;
|
172
156
|
sscanf(text, "%g", &val);
|
@@ -186,8 +170,7 @@ const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS = {
|
|
186
170
|
* StringFieldIndex < FieldIndex
|
187
171
|
******************************************************************************/
|
188
172
|
|
189
|
-
static void *string_create_index(int size)
|
190
|
-
{
|
173
|
+
static void *string_create_index(int size) {
|
191
174
|
FrtStringIndex *self = FRT_ALLOC_AND_ZERO(FrtStringIndex);
|
192
175
|
self->size = size;
|
193
176
|
self->index = FRT_ALLOC_AND_ZERO_N(long, size);
|
@@ -197,8 +180,7 @@ static void *string_create_index(int size)
|
|
197
180
|
return self;
|
198
181
|
}
|
199
182
|
|
200
|
-
static void string_destroy_index(void *p)
|
201
|
-
{
|
183
|
+
static void string_destroy_index(void *p) {
|
202
184
|
FrtStringIndex *self = (FrtStringIndex *)p;
|
203
185
|
int i;
|
204
186
|
free(self->index);
|
@@ -211,8 +193,7 @@ static void string_destroy_index(void *p)
|
|
211
193
|
|
212
194
|
static void string_handle_term(void *index_ptr,
|
213
195
|
FrtTermDocEnum *tde,
|
214
|
-
const char *text)
|
215
|
-
{
|
196
|
+
const char *text) {
|
216
197
|
FrtStringIndex *index = (FrtStringIndex *)index_ptr;
|
217
198
|
if (index->v_size >= index->v_capa) {
|
218
199
|
index->v_capa *= 2;
|
@@ -3,12 +3,6 @@
|
|
3
3
|
|
4
4
|
#include "frt_index.h"
|
5
5
|
|
6
|
-
/***************************************************************************
|
7
|
-
*
|
8
|
-
* FrtFieldIndex
|
9
|
-
*
|
10
|
-
***************************************************************************/
|
11
|
-
|
12
6
|
typedef struct FrtStringIndex {
|
13
7
|
int size;
|
14
8
|
long *index;
|