isomorfeus-ferret 0.17.2 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -20,8 +20,8 @@ typedef struct FrtToken {
|
|
20
20
|
|
21
21
|
extern FrtToken *frt_tk_new();
|
22
22
|
extern void frt_tk_destroy(void *p);
|
23
|
-
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
-
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
23
|
+
extern FrtToken *frt_tk_set(FrtToken *tk, const char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
+
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, const char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
25
25
|
extern int frt_tk_eq(FrtToken *tk1, FrtToken *tk2);
|
26
26
|
extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
27
27
|
|
@@ -31,12 +31,12 @@ extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
|
31
31
|
|
32
32
|
typedef struct FrtTokenStream FrtTokenStream;
|
33
33
|
struct FrtTokenStream {
|
34
|
-
char *t; /* ptr used to scan text */
|
35
|
-
char *text;
|
34
|
+
const char *t; /* ptr used to scan text */
|
35
|
+
const char *text;
|
36
36
|
int length;
|
37
37
|
rb_encoding *encoding;
|
38
38
|
FrtToken *(*next)(FrtTokenStream *ts);
|
39
|
-
FrtTokenStream *(*reset)(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
39
|
+
FrtTokenStream *(*reset)(FrtTokenStream *ts, const char *text, rb_encoding *encoding);
|
40
40
|
FrtTokenStream *(*clone_i)(FrtTokenStream *ts);
|
41
41
|
void (*destroy_i)(FrtTokenStream *ts);
|
42
42
|
_Atomic unsigned int ref_cnt;
|
@@ -46,7 +46,7 @@ struct FrtTokenStream {
|
|
46
46
|
|
47
47
|
extern FrtTokenStream *frt_ts_new_i(size_t size);
|
48
48
|
extern FrtTokenStream *frt_ts_init(FrtTokenStream *ts);
|
49
|
-
extern FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, char *text, rb_encoding *encoding);
|
49
|
+
extern FrtTokenStream *frt_ts_reset(FrtTokenStream *ts, const char *text, rb_encoding *encoding);
|
50
50
|
extern FrtTokenStream *frt_ts_clone_size(FrtTokenStream *orig_ts, size_t size);
|
51
51
|
|
52
52
|
typedef struct FrtTokenFilter {
|
@@ -182,7 +182,7 @@ extern FrtTokenStream *frt_mapping_filter_add(FrtTokenStream *ts, const char *pa
|
|
182
182
|
|
183
183
|
typedef struct FrtAnalyzer {
|
184
184
|
FrtTokenStream *current_ts;
|
185
|
-
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding);
|
185
|
+
FrtTokenStream *(*get_ts)(struct FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding);
|
186
186
|
void (*destroy_i)(struct FrtAnalyzer *a);
|
187
187
|
_Atomic unsigned int ref_cnt;
|
188
188
|
VALUE ranalyzer;
|
@@ -194,9 +194,9 @@ extern void frt_a_deref(FrtAnalyzer *a);
|
|
194
194
|
|
195
195
|
extern FrtAnalyzer *frt_analyzer_alloc(void);
|
196
196
|
extern void frt_analyzer_init(FrtAnalyzer *a, FrtTokenStream *ts, void (*destroy)(FrtAnalyzer *a),
|
197
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding));
|
197
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding));
|
198
198
|
extern FrtAnalyzer *frt_analyzer_new(FrtTokenStream *ts, void (*destroy)(FrtAnalyzer *a),
|
199
|
-
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, char *text, rb_encoding *encoding));
|
199
|
+
FrtTokenStream *(*get_ts)(FrtAnalyzer *a, ID field, const char *text, rb_encoding *encoding));
|
200
200
|
|
201
201
|
/*****************************************************************************/
|
202
202
|
/*** FrtNonAnalyzer **********************************************************/
|
@@ -4,8 +4,7 @@
|
|
4
4
|
#define META_CNT FRT_ARY_META_CNT
|
5
5
|
#define DATA_SZ sizeof(int) * META_CNT
|
6
6
|
|
7
|
-
void **frt_ary_new_i(int type_size, int init_capa)
|
8
|
-
{
|
7
|
+
void **frt_ary_new_i(int type_size, int init_capa) {
|
9
8
|
void **ary;
|
10
9
|
if (init_capa <= 0) {
|
11
10
|
init_capa = FRT_ARY_INIT_CAPA;
|
@@ -17,8 +16,7 @@ void **frt_ary_new_i(int type_size, int init_capa)
|
|
17
16
|
return ary;
|
18
17
|
}
|
19
18
|
|
20
|
-
void frt_ary_resize_i(void ***ary, int size)
|
21
|
-
{
|
19
|
+
void frt_ary_resize_i(void ***ary, int size) {
|
22
20
|
size++;
|
23
21
|
if (size > frt_ary_sz(*ary)) {
|
24
22
|
int capa = frt_ary_capa(*ary);
|
@@ -39,8 +37,7 @@ void frt_ary_resize_i(void ***ary, int size)
|
|
39
37
|
}
|
40
38
|
}
|
41
39
|
|
42
|
-
void frt_ary_set_i(void ***ary, int index, void *value)
|
43
|
-
{
|
40
|
+
void frt_ary_set_i(void ***ary, int index, void *value) {
|
44
41
|
if (index < 0) {
|
45
42
|
index += frt_ary_sz(*ary);
|
46
43
|
if (index < 0) {
|
@@ -51,43 +48,37 @@ void frt_ary_set_i(void ***ary, int index, void *value)
|
|
51
48
|
(*ary)[index] = value;
|
52
49
|
}
|
53
50
|
|
54
|
-
void *frt_ary_get_i(void **ary, int index)
|
55
|
-
{
|
51
|
+
void *frt_ary_get_i(void **ary, int index) {
|
56
52
|
if (index < 0) {
|
57
53
|
index += frt_ary_sz(ary);
|
58
54
|
}
|
59
55
|
if (index >= 0 && index < frt_ary_sz(ary)) {
|
60
56
|
return ary[index];
|
61
|
-
}
|
62
|
-
else {
|
57
|
+
} else {
|
63
58
|
return NULL;
|
64
59
|
}
|
65
60
|
}
|
66
61
|
|
67
|
-
void frt_ary_push_i(void ***ary, void *value)
|
68
|
-
{
|
62
|
+
void frt_ary_push_i(void ***ary, void *value) {
|
69
63
|
int size = frt_ary_sz(*ary);
|
70
64
|
frt_ary_resize_i(ary, size);
|
71
65
|
(*ary)[size] = value;
|
72
66
|
}
|
73
67
|
|
74
|
-
void *frt_ary_pop_i(void **ary)
|
75
|
-
{
|
68
|
+
void *frt_ary_pop_i(void **ary) {
|
76
69
|
void *val = ary[--frt_ary_sz(ary)];
|
77
70
|
ary[frt_ary_sz(ary)] = NULL;
|
78
71
|
return val;
|
79
72
|
}
|
80
73
|
|
81
|
-
void frt_ary_unshift_i(void ***ary, void *value)
|
82
|
-
{
|
74
|
+
void frt_ary_unshift_i(void ***ary, void *value) {
|
83
75
|
int size = frt_ary_sz(*ary);
|
84
76
|
frt_ary_resize_i(ary, size);
|
85
77
|
memmove(*ary + 1, *ary, size * sizeof(void *));
|
86
78
|
(*ary)[0] = value;
|
87
79
|
}
|
88
80
|
|
89
|
-
void *frt_ary_shift_i(void **ary)
|
90
|
-
{
|
81
|
+
void *frt_ary_shift_i(void **ary) {
|
91
82
|
void *val = ary[0];
|
92
83
|
int size = --frt_ary_sz(ary);
|
93
84
|
memmove(ary, ary + 1, size * sizeof(void *));
|
@@ -95,16 +86,14 @@ void *frt_ary_shift_i(void **ary)
|
|
95
86
|
return val;
|
96
87
|
}
|
97
88
|
|
98
|
-
void *frt_ary_remove_i(void **ary, int index)
|
99
|
-
{
|
89
|
+
void *frt_ary_remove_i(void **ary, int index) {
|
100
90
|
if (index >= 0 && index < frt_ary_sz(ary)) {
|
101
91
|
void *val = ary[index];
|
102
92
|
memmove(ary + index, ary + index + 1,
|
103
93
|
(frt_ary_sz(ary) - index + 1) * sizeof(void *));
|
104
94
|
frt_ary_sz(ary)--;
|
105
95
|
return val;
|
106
|
-
}
|
107
|
-
else {
|
96
|
+
} else {
|
108
97
|
return NULL;
|
109
98
|
}
|
110
99
|
}
|
@@ -100,8 +100,7 @@ static FRT_ATTR_ALWAYS_INLINE void frt_bv_set_value(FrtBitVector *bv, int bit, b
|
|
100
100
|
if (value) {
|
101
101
|
bv->count++;
|
102
102
|
*word_p |= bitmask;
|
103
|
-
}
|
104
|
-
else {
|
103
|
+
} else {
|
105
104
|
bv->count--;
|
106
105
|
*word_p &= ~bitmask;
|
107
106
|
}
|
@@ -284,8 +283,7 @@ done:
|
|
284
283
|
* @return the next set bits index or -1 if no more bits are set
|
285
284
|
*/
|
286
285
|
static FRT_ATTR_ALWAYS_INLINE
|
287
|
-
int frt_bv_scan_next(FrtBitVector *bv)
|
288
|
-
{
|
286
|
+
int frt_bv_scan_next(FrtBitVector *bv) {
|
289
287
|
return frt_bv_scan_next_from(bv, bv->curr_bit + 1);
|
290
288
|
}
|
291
289
|
|
@@ -331,8 +329,7 @@ done:
|
|
331
329
|
* @return the next unset bits index or -1 if no more bits are unset
|
332
330
|
*/
|
333
331
|
static FRT_ATTR_ALWAYS_INLINE
|
334
|
-
int frt_bv_scan_next_unset(FrtBitVector *bv)
|
335
|
-
{
|
332
|
+
int frt_bv_scan_next_unset(FrtBitVector *bv) {
|
336
333
|
return frt_bv_scan_next_unset_from(bv, bv->curr_bit + 1);
|
337
334
|
}
|
338
335
|
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#include "frt_doc_field.h"
|
2
|
+
#include "frt_document.h"
|
3
|
+
|
4
|
+
FrtDocField *frt_df_new(ID name) {
|
5
|
+
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
6
|
+
df->name = name;
|
7
|
+
df->size = 0;
|
8
|
+
df->capa = FRT_DF_INIT_CAPA;
|
9
|
+
df->data = FRT_ALLOC_N(const char *, df->capa);
|
10
|
+
df->lengths = FRT_ALLOC_N(int, df->capa);
|
11
|
+
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
12
|
+
df->boost = 1.0f;
|
13
|
+
return df;
|
14
|
+
}
|
15
|
+
|
16
|
+
FrtDocField *frt_df_add_data_len_nc(FrtDocField *df, const char *data, int len, rb_encoding *encoding) {
|
17
|
+
if (df->size >= df->capa) {
|
18
|
+
df->capa <<= 2;
|
19
|
+
FRT_REALLOC_N(df->data, const char *, df->capa);
|
20
|
+
FRT_REALLOC_N(df->lengths, int, df->capa);
|
21
|
+
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
22
|
+
}
|
23
|
+
df->data[df->size] = data;
|
24
|
+
df->lengths[df->size] = len;
|
25
|
+
df->encodings[df->size] = encoding;
|
26
|
+
df->size++;
|
27
|
+
return df;
|
28
|
+
}
|
29
|
+
|
30
|
+
FrtDocField *frt_df_add_data_len(FrtDocField *self, const char *data, int length, rb_encoding *encoding) {
|
31
|
+
char *d = FRT_ALLOC_N(char, length + 1);
|
32
|
+
memcpy(d, data, length); /* must handle binary data ... */
|
33
|
+
d[length] = '\0'; /* ... and strings */
|
34
|
+
return frt_df_add_data_len_nc(self, d, length, encoding);
|
35
|
+
}
|
36
|
+
|
37
|
+
FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding) {
|
38
|
+
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
|
+
}
|
40
|
+
|
41
|
+
void frt_df_destroy(FrtDocField *df) {
|
42
|
+
int i;
|
43
|
+
for (i = 0; i < df->size; i++) {
|
44
|
+
free((void *)df->data[i]);
|
45
|
+
}
|
46
|
+
free(df->data);
|
47
|
+
free(df->lengths);
|
48
|
+
free(df->encodings);
|
49
|
+
free(df);
|
50
|
+
}
|
51
|
+
|
52
|
+
/*
|
53
|
+
* Format for one item is: name: "data"
|
54
|
+
* for more items : name: ["data", "data", "data"]
|
55
|
+
* internally used for testing, thus encoding can be ignored
|
56
|
+
*/
|
57
|
+
char *frt_df_to_s(FrtDocField *df) {
|
58
|
+
const char *df_name = rb_id2name(df->name);
|
59
|
+
int i, len = 0, namelen = strlen(df_name);
|
60
|
+
char *str, *s;
|
61
|
+
for (i = 0; i < df->size; i++) {
|
62
|
+
len += df->lengths[i] + 4;
|
63
|
+
}
|
64
|
+
s = str = FRT_ALLOC_N(char, namelen + len + 5);
|
65
|
+
memcpy(s, df_name, namelen);
|
66
|
+
s += namelen;
|
67
|
+
s = frt_strapp(s, ": ");
|
68
|
+
|
69
|
+
if (df->size > 1) {
|
70
|
+
s = frt_strapp(s, "[");
|
71
|
+
}
|
72
|
+
for (i = 0; i < df->size; i++) {
|
73
|
+
if (i != 0) {
|
74
|
+
s = frt_strapp(s, ", ");
|
75
|
+
}
|
76
|
+
s = frt_strapp(s, "\"");
|
77
|
+
memcpy(s, df->data[i], df->lengths[i]);
|
78
|
+
s += df->lengths[i];
|
79
|
+
s = frt_strapp(s, "\"");
|
80
|
+
}
|
81
|
+
|
82
|
+
if (df->size > 1) {
|
83
|
+
s = frt_strapp(s, "]");
|
84
|
+
}
|
85
|
+
*s = 0;
|
86
|
+
return str;
|
87
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
#ifndef FRT_DOC_FIELD_H
|
2
|
+
#define FRT_DOC_FIELD_H
|
3
|
+
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
#include "frt_hash.h"
|
6
|
+
|
7
|
+
#define FRT_DF_INIT_CAPA 1
|
8
|
+
|
9
|
+
typedef struct FrtDocField {
|
10
|
+
ID name;
|
11
|
+
int size;
|
12
|
+
int capa;
|
13
|
+
int *lengths;
|
14
|
+
rb_encoding **encodings; /* used for processing */
|
15
|
+
const char **data;
|
16
|
+
float boost;
|
17
|
+
FrtCompressionType compression_type;
|
18
|
+
} FrtDocField;
|
19
|
+
|
20
|
+
extern FrtDocField *frt_df_new(ID name);
|
21
|
+
extern FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding);
|
22
|
+
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, const char *data, int len, rb_encoding *encoding);
|
23
|
+
extern void frt_df_destroy(FrtDocField *df);
|
24
|
+
extern char *frt_df_to_s(FrtDocField *df);
|
25
|
+
|
26
|
+
#endif
|
@@ -1,103 +1,10 @@
|
|
1
1
|
#include "frt_document.h"
|
2
2
|
#include <string.h>
|
3
3
|
|
4
|
-
/****************************************************************************
|
5
|
-
*
|
6
|
-
* FrtDocField
|
7
|
-
*
|
8
|
-
****************************************************************************/
|
9
|
-
|
10
|
-
FrtDocField *frt_df_new(ID name) {
|
11
|
-
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
12
|
-
df->name = name;
|
13
|
-
df->size = 0;
|
14
|
-
df->capa = FRT_DF_INIT_CAPA;
|
15
|
-
df->data = FRT_ALLOC_N(char *, df->capa);
|
16
|
-
df->lengths = FRT_ALLOC_N(int, df->capa);
|
17
|
-
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
18
|
-
df->destroy_data = false;
|
19
|
-
df->boost = 1.0f;
|
20
|
-
return df;
|
21
|
-
}
|
22
|
-
|
23
|
-
FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding) {
|
24
|
-
if (df->size >= df->capa) {
|
25
|
-
df->capa <<= 2;
|
26
|
-
FRT_REALLOC_N(df->data, char *, df->capa);
|
27
|
-
FRT_REALLOC_N(df->lengths, int, df->capa);
|
28
|
-
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
29
|
-
}
|
30
|
-
df->data[df->size] = data;
|
31
|
-
df->lengths[df->size] = len;
|
32
|
-
df->encodings[df->size] = encoding;
|
33
|
-
df->size++;
|
34
|
-
return df;
|
35
|
-
}
|
36
|
-
|
37
|
-
FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding) {
|
38
|
-
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
|
-
}
|
40
|
-
|
41
|
-
void frt_df_destroy(FrtDocField *df) {
|
42
|
-
if (df->destroy_data) {
|
43
|
-
int i;
|
44
|
-
for (i = 0; i < df->size; i++) {
|
45
|
-
free(df->data[i]);
|
46
|
-
}
|
47
|
-
}
|
48
|
-
free(df->data);
|
49
|
-
free(df->lengths);
|
50
|
-
free(df->encodings);
|
51
|
-
free(df);
|
52
|
-
}
|
53
|
-
|
54
|
-
/*
|
55
|
-
* Format for one item is: name: "data"
|
56
|
-
* for more items : name: ["data", "data", "data"]
|
57
|
-
* internally used for testing, thus encoding can be ignored
|
58
|
-
*/
|
59
|
-
char *frt_df_to_s(FrtDocField *df) {
|
60
|
-
const char *df_name = rb_id2name(df->name);
|
61
|
-
int i, len = 0, namelen = strlen(df_name);
|
62
|
-
char *str, *s;
|
63
|
-
for (i = 0; i < df->size; i++) {
|
64
|
-
len += df->lengths[i] + 4;
|
65
|
-
}
|
66
|
-
s = str = FRT_ALLOC_N(char, namelen + len + 5);
|
67
|
-
memcpy(s, df_name, namelen);
|
68
|
-
s += namelen;
|
69
|
-
s = frt_strapp(s, ": ");
|
70
|
-
|
71
|
-
if (df->size > 1) {
|
72
|
-
s = frt_strapp(s, "[");
|
73
|
-
}
|
74
|
-
for (i = 0; i < df->size; i++) {
|
75
|
-
if (i != 0) {
|
76
|
-
s = frt_strapp(s, ", ");
|
77
|
-
}
|
78
|
-
s = frt_strapp(s, "\"");
|
79
|
-
memcpy(s, df->data[i], df->lengths[i]);
|
80
|
-
s += df->lengths[i];
|
81
|
-
s = frt_strapp(s, "\"");
|
82
|
-
}
|
83
|
-
|
84
|
-
if (df->size > 1) {
|
85
|
-
s = frt_strapp(s, "]");
|
86
|
-
}
|
87
|
-
*s = 0;
|
88
|
-
return str;
|
89
|
-
}
|
90
|
-
|
91
|
-
/****************************************************************************
|
92
|
-
*
|
93
|
-
* FrtDocument
|
94
|
-
*
|
95
|
-
****************************************************************************/
|
96
|
-
|
97
4
|
FrtDocument *frt_doc_new(void) {
|
98
5
|
FrtDocument *doc = FRT_ALLOC(FrtDocument);
|
99
6
|
doc->field_dict = frt_h_new_ptr((frt_free_ft)&frt_df_destroy);
|
100
|
-
doc->
|
7
|
+
doc->field_count = 0;
|
101
8
|
doc->capa = FRT_DOC_INIT_CAPA;
|
102
9
|
doc->fields = FRT_ALLOC_N(FrtDocField *, doc->capa);
|
103
10
|
doc->boost = 1.0f;
|
@@ -109,12 +16,12 @@ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df) {
|
|
109
16
|
FRT_RAISE(FRT_EXCEPTION, "tried to add %s field which alread existed\n",
|
110
17
|
rb_id2name(df->name));
|
111
18
|
}
|
112
|
-
if (doc->
|
19
|
+
if (doc->field_count >= doc->capa) {
|
113
20
|
doc->capa <<= 1;
|
114
21
|
FRT_REALLOC_N(doc->fields, FrtDocField *, doc->capa);
|
115
22
|
}
|
116
|
-
doc->fields[doc->
|
117
|
-
doc->
|
23
|
+
doc->fields[doc->field_count] = df;
|
24
|
+
doc->field_count++;
|
118
25
|
return df;
|
119
26
|
}
|
120
27
|
|
@@ -2,34 +2,9 @@
|
|
2
2
|
#define FRT_DOCUMENT_H
|
3
3
|
|
4
4
|
#include "frt_global.h"
|
5
|
-
#include "
|
5
|
+
#include "frt_doc_field.h"
|
6
6
|
#include <ruby/encoding.h>
|
7
7
|
|
8
|
-
/****************************************************************************
|
9
|
-
*
|
10
|
-
* FrtDocField
|
11
|
-
*
|
12
|
-
****************************************************************************/
|
13
|
-
|
14
|
-
#define FRT_DF_INIT_CAPA 1
|
15
|
-
typedef struct FrtDocField {
|
16
|
-
ID name;
|
17
|
-
int size;
|
18
|
-
int capa;
|
19
|
-
int *lengths;
|
20
|
-
rb_encoding **encodings; /* used for processing */
|
21
|
-
char **data;
|
22
|
-
float boost;
|
23
|
-
FrtCompressionType compression;
|
24
|
-
bool destroy_data : 1;
|
25
|
-
} FrtDocField;
|
26
|
-
|
27
|
-
extern FrtDocField *frt_df_new(ID name);
|
28
|
-
extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding);
|
29
|
-
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding);
|
30
|
-
extern void frt_df_destroy(FrtDocField *df);
|
31
|
-
extern char *frt_df_to_s(FrtDocField *df);
|
32
|
-
|
33
8
|
/****************************************************************************
|
34
9
|
*
|
35
10
|
* FrtDocument
|
@@ -39,7 +14,7 @@ extern char *frt_df_to_s(FrtDocField *df);
|
|
39
14
|
#define FRT_DOC_INIT_CAPA 8
|
40
15
|
typedef struct FrtDocument {
|
41
16
|
FrtHash *field_dict;
|
42
|
-
int
|
17
|
+
int field_count;
|
43
18
|
int capa;
|
44
19
|
FrtDocField **fields;
|
45
20
|
float boost;
|
@@ -1,3 +1,8 @@
|
|
1
|
+
/* prevent warning: #warning Please include winsock2.h before windows.h [-Wcpp] */
|
2
|
+
#ifdef _WIN32
|
3
|
+
# include <winsock2.h>
|
4
|
+
#endif
|
5
|
+
|
1
6
|
#include <stdarg.h>
|
2
7
|
#include "bzlib.h"
|
3
8
|
#include "frt_global.h"
|
@@ -61,11 +66,9 @@ void frt_xraise(int excode, const char *const msg) {
|
|
61
66
|
|
62
67
|
if (!top_context) {
|
63
68
|
FRT_XEXIT(ERROR_TYPES[excode], msg);
|
64
|
-
}
|
65
|
-
else if (!top_context->in_finally) {
|
69
|
+
} else if (!top_context->in_finally) {
|
66
70
|
frt_xraise_context(top_context, excode, msg);
|
67
|
-
}
|
68
|
-
else if (top_context->handled) {
|
71
|
+
} else if (top_context->handled) {
|
69
72
|
top_context->msg = msg;
|
70
73
|
top_context->excode = excode;
|
71
74
|
top_context->handled = false;
|
@@ -122,8 +125,7 @@ void frt_xpop_context(void) {
|
|
122
125
|
if (!top_cxt->handled) {
|
123
126
|
if (context) {
|
124
127
|
frt_xraise_context(context, top_cxt->excode, top_cxt->msg);
|
125
|
-
}
|
126
|
-
else {
|
128
|
+
} else {
|
127
129
|
FRT_XEXIT(ERROR_TYPES[top_cxt->excode], top_cxt->msg);
|
128
130
|
}
|
129
131
|
}
|
@@ -112,8 +112,7 @@ extern const char *frt_err_code_to_type(const int err_code);
|
|
112
112
|
|
113
113
|
extern void frb_rb_raise(const char *file, int line_num, const char *func, const char *err_type, const char *fmt, ...);
|
114
114
|
|
115
|
-
typedef struct frt_xcontext_t
|
116
|
-
{
|
115
|
+
typedef struct frt_xcontext_t {
|
117
116
|
jmp_buf jbuf;
|
118
117
|
struct frt_xcontext_t *next;
|
119
118
|
const char *msg;
|
@@ -9,22 +9,19 @@
|
|
9
9
|
*
|
10
10
|
***************************************************************************/
|
11
11
|
|
12
|
-
static unsigned long
|
13
|
-
{
|
12
|
+
static unsigned long field_index_hash(const void *p) {
|
14
13
|
FrtFieldIndex *self = (FrtFieldIndex *)p;
|
15
14
|
return frt_str_hash(rb_id2name(self->field)) ^ (unsigned long long)(self->klass);
|
16
15
|
}
|
17
16
|
|
18
|
-
static int field_index_eq(const void *p1, const void *p2)
|
19
|
-
{
|
17
|
+
static int field_index_eq(const void *p1, const void *p2) {
|
20
18
|
FrtFieldIndex *fi1 = (FrtFieldIndex *)p1;
|
21
19
|
FrtFieldIndex *fi2 = (FrtFieldIndex *)p2;
|
22
20
|
return (fi1->field == fi2->field) &&
|
23
21
|
(fi1->klass->type == fi2->klass->type);
|
24
22
|
}
|
25
23
|
|
26
|
-
static void field_index_destroy(void *p)
|
27
|
-
{
|
24
|
+
static void field_index_destroy(void *p) {
|
28
25
|
FrtFieldIndex *self = (FrtFieldIndex *)p;
|
29
26
|
if (self->index) {
|
30
27
|
self->klass->destroy_index(self->index);
|
@@ -95,10 +92,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
|
|
95
92
|
* index should only be used for sorting and not as a field cache of the
|
96
93
|
* column's value.
|
97
94
|
******************************************************************************/
|
98
|
-
static void byte_handle_term(void *index_ptr,
|
99
|
-
FrtTermDocEnum *tde,
|
100
|
-
const char *text)
|
101
|
-
{
|
95
|
+
static void byte_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
102
96
|
long *index = (long *)index_ptr;
|
103
97
|
long val = index[-1]++;
|
104
98
|
(void)text;
|
@@ -107,15 +101,13 @@ static void byte_handle_term(void *index_ptr,
|
|
107
101
|
}
|
108
102
|
}
|
109
103
|
|
110
|
-
static void *byte_create_index(int size)
|
111
|
-
{
|
104
|
+
static void *byte_create_index(int size) {
|
112
105
|
long *index = FRT_ALLOC_AND_ZERO_N(long, size + 1);
|
113
106
|
index[0] = 1;
|
114
107
|
return &index[1];
|
115
108
|
}
|
116
109
|
|
117
|
-
static void byte_destroy_index(void *p)
|
118
|
-
{
|
110
|
+
static void byte_destroy_index(void *p) {
|
119
111
|
long *index = (long *)p;
|
120
112
|
free(&index[-1]);
|
121
113
|
}
|
@@ -130,15 +122,11 @@ const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS = {
|
|
130
122
|
/******************************************************************************
|
131
123
|
* IntegerFieldIndex < FieldIndex
|
132
124
|
******************************************************************************/
|
133
|
-
static void *integer_create_index(int size)
|
134
|
-
{
|
125
|
+
static void *integer_create_index(int size) {
|
135
126
|
return FRT_ALLOC_AND_ZERO_N(long, size);
|
136
127
|
}
|
137
128
|
|
138
|
-
static void integer_handle_term(void *index_ptr,
|
139
|
-
FrtTermDocEnum *tde,
|
140
|
-
const char *text)
|
141
|
-
{
|
129
|
+
static void integer_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
142
130
|
long *index = (long *)index_ptr;
|
143
131
|
long val;
|
144
132
|
sscanf(text, "%ld", &val);
|
@@ -158,15 +146,11 @@ const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS = {
|
|
158
146
|
* FloatFieldIndex < FieldIndex
|
159
147
|
******************************************************************************/
|
160
148
|
#define VALUES_ARRAY_START_SIZE 8
|
161
|
-
static void *float_create_index(int size)
|
162
|
-
{
|
149
|
+
static void *float_create_index(int size) {
|
163
150
|
return FRT_ALLOC_AND_ZERO_N(float, size);
|
164
151
|
}
|
165
152
|
|
166
|
-
static void float_handle_term(void *index_ptr,
|
167
|
-
FrtTermDocEnum *tde,
|
168
|
-
const char *text)
|
169
|
-
{
|
153
|
+
static void float_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
170
154
|
float *index = (float *)index_ptr;
|
171
155
|
float val;
|
172
156
|
sscanf(text, "%g", &val);
|
@@ -186,8 +170,7 @@ const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS = {
|
|
186
170
|
* StringFieldIndex < FieldIndex
|
187
171
|
******************************************************************************/
|
188
172
|
|
189
|
-
static void *string_create_index(int size)
|
190
|
-
{
|
173
|
+
static void *string_create_index(int size) {
|
191
174
|
FrtStringIndex *self = FRT_ALLOC_AND_ZERO(FrtStringIndex);
|
192
175
|
self->size = size;
|
193
176
|
self->index = FRT_ALLOC_AND_ZERO_N(long, size);
|
@@ -197,8 +180,7 @@ static void *string_create_index(int size)
|
|
197
180
|
return self;
|
198
181
|
}
|
199
182
|
|
200
|
-
static void string_destroy_index(void *p)
|
201
|
-
{
|
183
|
+
static void string_destroy_index(void *p) {
|
202
184
|
FrtStringIndex *self = (FrtStringIndex *)p;
|
203
185
|
int i;
|
204
186
|
free(self->index);
|
@@ -211,8 +193,7 @@ static void string_destroy_index(void *p)
|
|
211
193
|
|
212
194
|
static void string_handle_term(void *index_ptr,
|
213
195
|
FrtTermDocEnum *tde,
|
214
|
-
const char *text)
|
215
|
-
{
|
196
|
+
const char *text) {
|
216
197
|
FrtStringIndex *index = (FrtStringIndex *)index_ptr;
|
217
198
|
if (index->v_size >= index->v_capa) {
|
218
199
|
index->v_capa *= 2;
|
@@ -3,12 +3,6 @@
|
|
3
3
|
|
4
4
|
#include "frt_index.h"
|
5
5
|
|
6
|
-
/***************************************************************************
|
7
|
-
*
|
8
|
-
* FrtFieldIndex
|
9
|
-
*
|
10
|
-
***************************************************************************/
|
11
|
-
|
12
6
|
typedef struct FrtStringIndex {
|
13
7
|
int size;
|
14
8
|
long *index;
|