isomorfeus-ferret 0.17.2 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +161 -187
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +77 -69
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +5 -33
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +14 -33
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +21 -39
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +334 -848
- data/ext/isomorfeus_ferret_ext/frt_index.h +4 -105
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +131 -217
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +18 -26
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +27 -28
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +64 -116
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +8 -14
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +251 -365
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +109 -191
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +12 -23
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +41 -88
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +16 -25
- data/ext/isomorfeus_ferret_ext/test_filter.c +22 -33
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +307 -519
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +66 -115
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +15 -21
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +8 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +32 -6
@@ -0,0 +1,26 @@
|
|
1
|
+
#ifndef FRT_DOC_FIELD_H
|
2
|
+
#define FRT_DOC_FIELD_H
|
3
|
+
|
4
|
+
#include <ruby/encoding.h>
|
5
|
+
#include "frt_hash.h"
|
6
|
+
|
7
|
+
#define FRT_DF_INIT_CAPA 1
|
8
|
+
|
9
|
+
typedef struct FrtDocField {
|
10
|
+
ID name;
|
11
|
+
int size;
|
12
|
+
int capa;
|
13
|
+
int *lengths;
|
14
|
+
rb_encoding **encodings; /* used for processing */
|
15
|
+
const char **data;
|
16
|
+
float boost;
|
17
|
+
FrtCompressionType compression_type;
|
18
|
+
} FrtDocField;
|
19
|
+
|
20
|
+
extern FrtDocField *frt_df_new(ID name);
|
21
|
+
extern FrtDocField *frt_df_add_data(FrtDocField *df, const char *data, rb_encoding *encoding);
|
22
|
+
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, const char *data, int len, rb_encoding *encoding);
|
23
|
+
extern void frt_df_destroy(FrtDocField *df);
|
24
|
+
extern char *frt_df_to_s(FrtDocField *df);
|
25
|
+
|
26
|
+
#endif
|
@@ -1,103 +1,10 @@
|
|
1
1
|
#include "frt_document.h"
|
2
2
|
#include <string.h>
|
3
3
|
|
4
|
-
/****************************************************************************
|
5
|
-
*
|
6
|
-
* FrtDocField
|
7
|
-
*
|
8
|
-
****************************************************************************/
|
9
|
-
|
10
|
-
FrtDocField *frt_df_new(ID name) {
|
11
|
-
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
12
|
-
df->name = name;
|
13
|
-
df->size = 0;
|
14
|
-
df->capa = FRT_DF_INIT_CAPA;
|
15
|
-
df->data = FRT_ALLOC_N(char *, df->capa);
|
16
|
-
df->lengths = FRT_ALLOC_N(int, df->capa);
|
17
|
-
df->encodings = FRT_ALLOC_N(rb_encoding *, df->capa);
|
18
|
-
df->destroy_data = false;
|
19
|
-
df->boost = 1.0f;
|
20
|
-
return df;
|
21
|
-
}
|
22
|
-
|
23
|
-
FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding) {
|
24
|
-
if (df->size >= df->capa) {
|
25
|
-
df->capa <<= 2;
|
26
|
-
FRT_REALLOC_N(df->data, char *, df->capa);
|
27
|
-
FRT_REALLOC_N(df->lengths, int, df->capa);
|
28
|
-
FRT_REALLOC_N(df->encodings, rb_encoding *, df->capa);
|
29
|
-
}
|
30
|
-
df->data[df->size] = data;
|
31
|
-
df->lengths[df->size] = len;
|
32
|
-
df->encodings[df->size] = encoding;
|
33
|
-
df->size++;
|
34
|
-
return df;
|
35
|
-
}
|
36
|
-
|
37
|
-
FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding) {
|
38
|
-
return frt_df_add_data_len(df, data, strlen(data), encoding);
|
39
|
-
}
|
40
|
-
|
41
|
-
void frt_df_destroy(FrtDocField *df) {
|
42
|
-
if (df->destroy_data) {
|
43
|
-
int i;
|
44
|
-
for (i = 0; i < df->size; i++) {
|
45
|
-
free(df->data[i]);
|
46
|
-
}
|
47
|
-
}
|
48
|
-
free(df->data);
|
49
|
-
free(df->lengths);
|
50
|
-
free(df->encodings);
|
51
|
-
free(df);
|
52
|
-
}
|
53
|
-
|
54
|
-
/*
|
55
|
-
* Format for one item is: name: "data"
|
56
|
-
* for more items : name: ["data", "data", "data"]
|
57
|
-
* internally used for testing, thus encoding can be ignored
|
58
|
-
*/
|
59
|
-
char *frt_df_to_s(FrtDocField *df) {
|
60
|
-
const char *df_name = rb_id2name(df->name);
|
61
|
-
int i, len = 0, namelen = strlen(df_name);
|
62
|
-
char *str, *s;
|
63
|
-
for (i = 0; i < df->size; i++) {
|
64
|
-
len += df->lengths[i] + 4;
|
65
|
-
}
|
66
|
-
s = str = FRT_ALLOC_N(char, namelen + len + 5);
|
67
|
-
memcpy(s, df_name, namelen);
|
68
|
-
s += namelen;
|
69
|
-
s = frt_strapp(s, ": ");
|
70
|
-
|
71
|
-
if (df->size > 1) {
|
72
|
-
s = frt_strapp(s, "[");
|
73
|
-
}
|
74
|
-
for (i = 0; i < df->size; i++) {
|
75
|
-
if (i != 0) {
|
76
|
-
s = frt_strapp(s, ", ");
|
77
|
-
}
|
78
|
-
s = frt_strapp(s, "\"");
|
79
|
-
memcpy(s, df->data[i], df->lengths[i]);
|
80
|
-
s += df->lengths[i];
|
81
|
-
s = frt_strapp(s, "\"");
|
82
|
-
}
|
83
|
-
|
84
|
-
if (df->size > 1) {
|
85
|
-
s = frt_strapp(s, "]");
|
86
|
-
}
|
87
|
-
*s = 0;
|
88
|
-
return str;
|
89
|
-
}
|
90
|
-
|
91
|
-
/****************************************************************************
|
92
|
-
*
|
93
|
-
* FrtDocument
|
94
|
-
*
|
95
|
-
****************************************************************************/
|
96
|
-
|
97
4
|
FrtDocument *frt_doc_new(void) {
|
98
5
|
FrtDocument *doc = FRT_ALLOC(FrtDocument);
|
99
6
|
doc->field_dict = frt_h_new_ptr((frt_free_ft)&frt_df_destroy);
|
100
|
-
doc->
|
7
|
+
doc->field_count = 0;
|
101
8
|
doc->capa = FRT_DOC_INIT_CAPA;
|
102
9
|
doc->fields = FRT_ALLOC_N(FrtDocField *, doc->capa);
|
103
10
|
doc->boost = 1.0f;
|
@@ -109,12 +16,12 @@ FrtDocField *frt_doc_add_field(FrtDocument *doc, FrtDocField *df) {
|
|
109
16
|
FRT_RAISE(FRT_EXCEPTION, "tried to add %s field which alread existed\n",
|
110
17
|
rb_id2name(df->name));
|
111
18
|
}
|
112
|
-
if (doc->
|
19
|
+
if (doc->field_count >= doc->capa) {
|
113
20
|
doc->capa <<= 1;
|
114
21
|
FRT_REALLOC_N(doc->fields, FrtDocField *, doc->capa);
|
115
22
|
}
|
116
|
-
doc->fields[doc->
|
117
|
-
doc->
|
23
|
+
doc->fields[doc->field_count] = df;
|
24
|
+
doc->field_count++;
|
118
25
|
return df;
|
119
26
|
}
|
120
27
|
|
@@ -2,44 +2,16 @@
|
|
2
2
|
#define FRT_DOCUMENT_H
|
3
3
|
|
4
4
|
#include "frt_global.h"
|
5
|
-
#include "
|
5
|
+
#include "frt_doc_field.h"
|
6
6
|
#include <ruby/encoding.h>
|
7
7
|
|
8
|
-
/****************************************************************************
|
9
|
-
*
|
10
|
-
* FrtDocField
|
11
|
-
*
|
12
|
-
****************************************************************************/
|
13
|
-
|
14
|
-
#define FRT_DF_INIT_CAPA 1
|
15
|
-
typedef struct FrtDocField {
|
16
|
-
ID name;
|
17
|
-
int size;
|
18
|
-
int capa;
|
19
|
-
int *lengths;
|
20
|
-
rb_encoding **encodings; /* used for processing */
|
21
|
-
char **data;
|
22
|
-
float boost;
|
23
|
-
FrtCompressionType compression;
|
24
|
-
bool destroy_data : 1;
|
25
|
-
} FrtDocField;
|
26
|
-
|
27
|
-
extern FrtDocField *frt_df_new(ID name);
|
28
|
-
extern FrtDocField *frt_df_add_data(FrtDocField *df, char *data, rb_encoding *encoding);
|
29
|
-
extern FrtDocField *frt_df_add_data_len(FrtDocField *df, char *data, int len, rb_encoding *encoding);
|
30
|
-
extern void frt_df_destroy(FrtDocField *df);
|
31
|
-
extern char *frt_df_to_s(FrtDocField *df);
|
32
|
-
|
33
|
-
/****************************************************************************
|
34
|
-
*
|
35
|
-
* FrtDocument
|
36
|
-
*
|
37
|
-
****************************************************************************/
|
38
|
-
|
39
8
|
#define FRT_DOC_INIT_CAPA 8
|
9
|
+
|
40
10
|
typedef struct FrtDocument {
|
11
|
+
// frt_uchar ulid[16];
|
12
|
+
// char *ulid_c;
|
41
13
|
FrtHash *field_dict;
|
42
|
-
int
|
14
|
+
int field_count;
|
43
15
|
int capa;
|
44
16
|
FrtDocField **fields;
|
45
17
|
float boost;
|
@@ -1,3 +1,8 @@
|
|
1
|
+
/* prevent warning: #warning Please include winsock2.h before windows.h [-Wcpp] */
|
2
|
+
#ifdef _WIN32
|
3
|
+
# include <winsock2.h>
|
4
|
+
#endif
|
5
|
+
|
1
6
|
#include <stdarg.h>
|
2
7
|
#include "bzlib.h"
|
3
8
|
#include "frt_global.h"
|
@@ -61,11 +66,9 @@ void frt_xraise(int excode, const char *const msg) {
|
|
61
66
|
|
62
67
|
if (!top_context) {
|
63
68
|
FRT_XEXIT(ERROR_TYPES[excode], msg);
|
64
|
-
}
|
65
|
-
else if (!top_context->in_finally) {
|
69
|
+
} else if (!top_context->in_finally) {
|
66
70
|
frt_xraise_context(top_context, excode, msg);
|
67
|
-
}
|
68
|
-
else if (top_context->handled) {
|
71
|
+
} else if (top_context->handled) {
|
69
72
|
top_context->msg = msg;
|
70
73
|
top_context->excode = excode;
|
71
74
|
top_context->handled = false;
|
@@ -122,8 +125,7 @@ void frt_xpop_context(void) {
|
|
122
125
|
if (!top_cxt->handled) {
|
123
126
|
if (context) {
|
124
127
|
frt_xraise_context(context, top_cxt->excode, top_cxt->msg);
|
125
|
-
}
|
126
|
-
else {
|
128
|
+
} else {
|
127
129
|
FRT_XEXIT(ERROR_TYPES[top_cxt->excode], top_cxt->msg);
|
128
130
|
}
|
129
131
|
}
|
@@ -112,8 +112,7 @@ extern const char *frt_err_code_to_type(const int err_code);
|
|
112
112
|
|
113
113
|
extern void frb_rb_raise(const char *file, int line_num, const char *func, const char *err_type, const char *fmt, ...);
|
114
114
|
|
115
|
-
typedef struct frt_xcontext_t
|
116
|
-
{
|
115
|
+
typedef struct frt_xcontext_t {
|
117
116
|
jmp_buf jbuf;
|
118
117
|
struct frt_xcontext_t *next;
|
119
118
|
const char *msg;
|
@@ -9,22 +9,19 @@
|
|
9
9
|
*
|
10
10
|
***************************************************************************/
|
11
11
|
|
12
|
-
static unsigned long
|
13
|
-
{
|
12
|
+
static unsigned long field_index_hash(const void *p) {
|
14
13
|
FrtFieldIndex *self = (FrtFieldIndex *)p;
|
15
14
|
return frt_str_hash(rb_id2name(self->field)) ^ (unsigned long long)(self->klass);
|
16
15
|
}
|
17
16
|
|
18
|
-
static int field_index_eq(const void *p1, const void *p2)
|
19
|
-
{
|
17
|
+
static int field_index_eq(const void *p1, const void *p2) {
|
20
18
|
FrtFieldIndex *fi1 = (FrtFieldIndex *)p1;
|
21
19
|
FrtFieldIndex *fi2 = (FrtFieldIndex *)p2;
|
22
20
|
return (fi1->field == fi2->field) &&
|
23
21
|
(fi1->klass->type == fi2->klass->type);
|
24
22
|
}
|
25
23
|
|
26
|
-
static void field_index_destroy(void *p)
|
27
|
-
{
|
24
|
+
static void field_index_destroy(void *p) {
|
28
25
|
FrtFieldIndex *self = (FrtFieldIndex *)p;
|
29
26
|
if (self->index) {
|
30
27
|
self->klass->destroy_index(self->index);
|
@@ -63,7 +60,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
|
|
63
60
|
* just use the field_infos field symbol */
|
64
61
|
self->field = fi->name;
|
65
62
|
|
66
|
-
length = ir->
|
63
|
+
length = ir->max_doc_num(ir);
|
67
64
|
if (length > 0) {
|
68
65
|
FRT_TRY
|
69
66
|
{
|
@@ -95,10 +92,7 @@ FrtFieldIndex *frt_field_index_get(FrtIndexReader *ir, ID field, const FrtFieldI
|
|
95
92
|
* index should only be used for sorting and not as a field cache of the
|
96
93
|
* column's value.
|
97
94
|
******************************************************************************/
|
98
|
-
static void byte_handle_term(void *index_ptr,
|
99
|
-
FrtTermDocEnum *tde,
|
100
|
-
const char *text)
|
101
|
-
{
|
95
|
+
static void byte_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
102
96
|
long *index = (long *)index_ptr;
|
103
97
|
long val = index[-1]++;
|
104
98
|
(void)text;
|
@@ -107,15 +101,13 @@ static void byte_handle_term(void *index_ptr,
|
|
107
101
|
}
|
108
102
|
}
|
109
103
|
|
110
|
-
static void *byte_create_index(int size)
|
111
|
-
{
|
104
|
+
static void *byte_create_index(int size) {
|
112
105
|
long *index = FRT_ALLOC_AND_ZERO_N(long, size + 1);
|
113
106
|
index[0] = 1;
|
114
107
|
return &index[1];
|
115
108
|
}
|
116
109
|
|
117
|
-
static void byte_destroy_index(void *p)
|
118
|
-
{
|
110
|
+
static void byte_destroy_index(void *p) {
|
119
111
|
long *index = (long *)p;
|
120
112
|
free(&index[-1]);
|
121
113
|
}
|
@@ -130,15 +122,11 @@ const FrtFieldIndexClass FRT_BYTE_FIELD_INDEX_CLASS = {
|
|
130
122
|
/******************************************************************************
|
131
123
|
* IntegerFieldIndex < FieldIndex
|
132
124
|
******************************************************************************/
|
133
|
-
static void *integer_create_index(int size)
|
134
|
-
{
|
125
|
+
static void *integer_create_index(int size) {
|
135
126
|
return FRT_ALLOC_AND_ZERO_N(long, size);
|
136
127
|
}
|
137
128
|
|
138
|
-
static void integer_handle_term(void *index_ptr,
|
139
|
-
FrtTermDocEnum *tde,
|
140
|
-
const char *text)
|
141
|
-
{
|
129
|
+
static void integer_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
142
130
|
long *index = (long *)index_ptr;
|
143
131
|
long val;
|
144
132
|
sscanf(text, "%ld", &val);
|
@@ -158,15 +146,11 @@ const FrtFieldIndexClass FRT_INTEGER_FIELD_INDEX_CLASS = {
|
|
158
146
|
* FloatFieldIndex < FieldIndex
|
159
147
|
******************************************************************************/
|
160
148
|
#define VALUES_ARRAY_START_SIZE 8
|
161
|
-
static void *float_create_index(int size)
|
162
|
-
{
|
149
|
+
static void *float_create_index(int size) {
|
163
150
|
return FRT_ALLOC_AND_ZERO_N(float, size);
|
164
151
|
}
|
165
152
|
|
166
|
-
static void float_handle_term(void *index_ptr,
|
167
|
-
FrtTermDocEnum *tde,
|
168
|
-
const char *text)
|
169
|
-
{
|
153
|
+
static void float_handle_term(void *index_ptr, FrtTermDocEnum *tde, const char *text) {
|
170
154
|
float *index = (float *)index_ptr;
|
171
155
|
float val;
|
172
156
|
sscanf(text, "%g", &val);
|
@@ -186,8 +170,7 @@ const FrtFieldIndexClass FRT_FLOAT_FIELD_INDEX_CLASS = {
|
|
186
170
|
* StringFieldIndex < FieldIndex
|
187
171
|
******************************************************************************/
|
188
172
|
|
189
|
-
static void *string_create_index(int size)
|
190
|
-
{
|
173
|
+
static void *string_create_index(int size) {
|
191
174
|
FrtStringIndex *self = FRT_ALLOC_AND_ZERO(FrtStringIndex);
|
192
175
|
self->size = size;
|
193
176
|
self->index = FRT_ALLOC_AND_ZERO_N(long, size);
|
@@ -197,8 +180,7 @@ static void *string_create_index(int size)
|
|
197
180
|
return self;
|
198
181
|
}
|
199
182
|
|
200
|
-
static void string_destroy_index(void *p)
|
201
|
-
{
|
183
|
+
static void string_destroy_index(void *p) {
|
202
184
|
FrtStringIndex *self = (FrtStringIndex *)p;
|
203
185
|
int i;
|
204
186
|
free(self->index);
|
@@ -211,8 +193,7 @@ static void string_destroy_index(void *p)
|
|
211
193
|
|
212
194
|
static void string_handle_term(void *index_ptr,
|
213
195
|
FrtTermDocEnum *tde,
|
214
|
-
const char *text)
|
215
|
-
{
|
196
|
+
const char *text) {
|
216
197
|
FrtStringIndex *index = (FrtStringIndex *)index_ptr;
|
217
198
|
if (index->v_size >= index->v_capa) {
|
218
199
|
index->v_capa *= 2;
|
@@ -3,12 +3,6 @@
|
|
3
3
|
|
4
4
|
#include "frt_index.h"
|
5
5
|
|
6
|
-
/***************************************************************************
|
7
|
-
*
|
8
|
-
* FrtFieldIndex
|
9
|
-
*
|
10
|
-
***************************************************************************/
|
11
|
-
|
12
6
|
typedef struct FrtStringIndex {
|
13
7
|
int size;
|
14
8
|
long *index;
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#include "frt_field_info.h"
|
2
|
+
|
3
|
+
FrtFieldInfo *frt_fi_alloc(void) {
|
4
|
+
return FRT_ALLOC(FrtFieldInfo);
|
5
|
+
}
|
6
|
+
|
7
|
+
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
8
|
+
assert(NULL != name);
|
9
|
+
bits_check(bits);
|
10
|
+
fi->name = name;
|
11
|
+
fi->boost = 1.0f;
|
12
|
+
fi->bits = bits;
|
13
|
+
fi->number = 0;
|
14
|
+
fi->ref_cnt = 1;
|
15
|
+
fi->rfi = Qnil;
|
16
|
+
return fi;
|
17
|
+
}
|
18
|
+
|
19
|
+
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
20
|
+
FrtFieldInfo *fi = frt_fi_alloc();
|
21
|
+
return frt_fi_init(fi, name, bits);
|
22
|
+
}
|
23
|
+
|
24
|
+
void frt_fi_deref(FrtFieldInfo *fi) {
|
25
|
+
if (FRT_DEREF(fi) == 0) free(fi);
|
26
|
+
}
|
27
|
+
|
28
|
+
void bits_check(unsigned int bits) {
|
29
|
+
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
30
|
+
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
31
|
+
}
|
32
|
+
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
33
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
FrtCompressionType bits_get_compression_type(unsigned int bits) {
|
38
|
+
if (bits_is_compressed_brotli(bits)) {
|
39
|
+
return FRT_COMPRESSION_BROTLI;
|
40
|
+
} else if (bits_is_compressed_bz2(bits)) {
|
41
|
+
return FRT_COMPRESSION_BZ2;
|
42
|
+
} else if (bits_is_compressed_lz4(bits)) {
|
43
|
+
return FRT_COMPRESSION_LZ4;
|
44
|
+
} else {
|
45
|
+
return FRT_COMPRESSION_NONE;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
char *frt_fi_to_s(FrtFieldInfo *fi) {
|
50
|
+
const char *fi_name = rb_id2name(fi->name);
|
51
|
+
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
52
|
+
char *s = str;
|
53
|
+
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
54
|
+
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
55
|
+
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
56
|
+
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
57
|
+
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
58
|
+
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
59
|
+
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
60
|
+
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
61
|
+
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
62
|
+
s -= 2;
|
63
|
+
if (*s != ',') {
|
64
|
+
s += 2;
|
65
|
+
}
|
66
|
+
|
67
|
+
sprintf(s, ")]");
|
68
|
+
return str;
|
69
|
+
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#ifndef FRT_FIELD_INFO_H
|
2
|
+
#define FRT_FIELD_INFO_H
|
3
|
+
|
4
|
+
#include "frt_global.h"
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
|
8
|
+
#define FRT_FI_IS_STORED_BM 0x001
|
9
|
+
#define FRT_FI_IS_INDEXED_BM 0x002
|
10
|
+
#define FRT_FI_IS_TOKENIZED_BM 0x004
|
11
|
+
#define FRT_FI_OMIT_NORMS_BM 0x008
|
12
|
+
#define FRT_FI_STORE_TERM_VECTOR_BM 0x010
|
13
|
+
#define FRT_FI_STORE_POSITIONS_BM 0x020
|
14
|
+
#define FRT_FI_STORE_OFFSETS_BM 0x040
|
15
|
+
#define FRT_FI_COMPRESSION_BROTLI_BM 0x080
|
16
|
+
#define FRT_FI_COMPRESSION_BZ2_BM 0x100
|
17
|
+
#define FRT_FI_COMPRESSION_LZ4_BM 0x200
|
18
|
+
|
19
|
+
typedef struct FrtFieldInfo {
|
20
|
+
ID name;
|
21
|
+
float boost;
|
22
|
+
unsigned int bits;
|
23
|
+
int number;
|
24
|
+
_Atomic unsigned int ref_cnt;
|
25
|
+
VALUE rfi;
|
26
|
+
} FrtFieldInfo;
|
27
|
+
|
28
|
+
extern FrtFieldInfo *frt_fi_alloc();
|
29
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
|
30
|
+
extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
|
31
|
+
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
32
|
+
extern void frt_fi_deref(FrtFieldInfo *fi);
|
33
|
+
|
34
|
+
extern void bits_check(unsigned int bits);
|
35
|
+
extern FrtCompressionType bits_get_compression_type(unsigned int bits);
|
36
|
+
#define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
|
37
|
+
#define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
|
38
|
+
#define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
39
|
+
#define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
40
|
+
#define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
41
|
+
#define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
42
|
+
#define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
43
|
+
#define bits_has_norms(bits) ((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
44
|
+
#define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
45
|
+
#define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
46
|
+
#define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
47
|
+
#define bits_is_compressed(bits) (bits_is_compressed_brotli(bits) || bits_is_compressed_bz2(bits) || bits_is_compressed_lz4(bits))
|
48
|
+
|
49
|
+
#endif
|
@@ -0,0 +1,196 @@
|
|
1
|
+
#include "frt_field_infos.h"
|
2
|
+
#include "frt_except.h"
|
3
|
+
|
4
|
+
FrtFieldInfos *frt_fis_alloc(void) {
|
5
|
+
return FRT_ALLOC(FrtFieldInfos);
|
6
|
+
}
|
7
|
+
|
8
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
9
|
+
bits_check(bits);
|
10
|
+
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
11
|
+
fis->size = 0;
|
12
|
+
fis->capa = FIELD_INFOS_INIT_CAPA;
|
13
|
+
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
14
|
+
fis->bits = bits;
|
15
|
+
fis->ref_cnt = 1;
|
16
|
+
fis->rfis = Qnil;
|
17
|
+
return fis;
|
18
|
+
}
|
19
|
+
|
20
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
21
|
+
FrtFieldInfos *fis = frt_fis_alloc();
|
22
|
+
return frt_fis_init(fis, bits);
|
23
|
+
}
|
24
|
+
|
25
|
+
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
26
|
+
if (fis->size == fis->capa) {
|
27
|
+
fis->capa <<= 1;
|
28
|
+
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
29
|
+
}
|
30
|
+
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
31
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
32
|
+
}
|
33
|
+
FRT_REF(fi);
|
34
|
+
fi->number = fis->size;
|
35
|
+
fis->fields[fis->size] = fi;
|
36
|
+
fis->size++;
|
37
|
+
return fi;
|
38
|
+
}
|
39
|
+
|
40
|
+
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
41
|
+
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
42
|
+
}
|
43
|
+
|
44
|
+
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
45
|
+
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
46
|
+
if (fi) { return fi->number; }
|
47
|
+
else { return -1; }
|
48
|
+
}
|
49
|
+
|
50
|
+
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
51
|
+
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
52
|
+
if (!fi) {
|
53
|
+
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
54
|
+
frt_fis_add_field(fis, fi);
|
55
|
+
}
|
56
|
+
return fi;
|
57
|
+
}
|
58
|
+
|
59
|
+
bool frt_fis_has_vectors(FrtFieldInfos *fis) {
|
60
|
+
int i;
|
61
|
+
const int fis_size = fis->size;
|
62
|
+
|
63
|
+
for (i = 0; i < fis_size; i++) {
|
64
|
+
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
65
|
+
return true;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
return false;
|
69
|
+
}
|
70
|
+
|
71
|
+
FrtFieldInfos *frt_fis_read(FrtInStream *is) {
|
72
|
+
FrtFieldInfos *volatile fis = NULL;
|
73
|
+
char *field_name;
|
74
|
+
FRT_TRY
|
75
|
+
do {
|
76
|
+
volatile int i;
|
77
|
+
union { frt_u32 i; float f; } tmp;
|
78
|
+
FrtFieldInfo *volatile fi;
|
79
|
+
fis = frt_fis_new(frt_is_read_vint(is));
|
80
|
+
for (i = frt_is_read_vint(is); i > 0; i--) {
|
81
|
+
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
82
|
+
FRT_TRY
|
83
|
+
field_name = frt_is_read_string_safe(is);
|
84
|
+
fi->name = rb_intern(field_name);
|
85
|
+
free(field_name);
|
86
|
+
tmp.i = frt_is_read_u32(is);
|
87
|
+
fi->boost = tmp.f;
|
88
|
+
fi->bits = frt_is_read_vint(is);
|
89
|
+
FRT_XCATCHALL
|
90
|
+
free(fi);
|
91
|
+
FRT_XENDTRY
|
92
|
+
frt_fis_add_field(fis, fi);
|
93
|
+
fi->ref_cnt = 1;
|
94
|
+
}
|
95
|
+
} while (0);
|
96
|
+
FRT_XCATCHALL
|
97
|
+
frt_fis_deref(fis);
|
98
|
+
FRT_XENDTRY
|
99
|
+
return fis;
|
100
|
+
}
|
101
|
+
|
102
|
+
void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os) {
|
103
|
+
int i;
|
104
|
+
union { frt_u32 i; float f; } tmp;
|
105
|
+
FrtFieldInfo *fi;
|
106
|
+
const int fis_size = fis->size;
|
107
|
+
|
108
|
+
frt_os_write_vint(os, fis->bits);
|
109
|
+
frt_os_write_vint(os, fis->size);
|
110
|
+
|
111
|
+
for (i = 0; i < fis_size; i++) {
|
112
|
+
fi = fis->fields[i];
|
113
|
+
|
114
|
+
frt_os_write_string(os, rb_id2name(fi->name));
|
115
|
+
tmp.f = fi->boost;
|
116
|
+
frt_os_write_u32(os, tmp.i);
|
117
|
+
frt_os_write_vint(os, fi->bits);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
static const char *store_str[] = {
|
122
|
+
":no",
|
123
|
+
":yes",
|
124
|
+
":compressed"
|
125
|
+
};
|
126
|
+
|
127
|
+
static const char *fi_store_str(FrtFieldInfo *fi) {
|
128
|
+
return store_str[bits_is_compressed(fi->bits) ? 2 : fi->bits & 0x1];
|
129
|
+
}
|
130
|
+
|
131
|
+
static const char *index_str[] = {
|
132
|
+
":no",
|
133
|
+
":untokenized",
|
134
|
+
"",
|
135
|
+
":yes",
|
136
|
+
"",
|
137
|
+
":untokenized_omit_norms",
|
138
|
+
"",
|
139
|
+
":omit_norms"
|
140
|
+
};
|
141
|
+
|
142
|
+
static const char *fi_index_str(FrtFieldInfo *fi) {
|
143
|
+
return index_str[(fi->bits >> 1) & 0x7];
|
144
|
+
}
|
145
|
+
|
146
|
+
static const char *term_vector_str[] = {
|
147
|
+
":no",
|
148
|
+
":yes",
|
149
|
+
"",
|
150
|
+
":with_positions",
|
151
|
+
"",
|
152
|
+
":with_offsets",
|
153
|
+
"",
|
154
|
+
":with_positions_offsets"
|
155
|
+
};
|
156
|
+
|
157
|
+
static const char *fi_term_vector_str(FrtFieldInfo *fi) {
|
158
|
+
return term_vector_str[(fi->bits >> 4) & 0x7];
|
159
|
+
}
|
160
|
+
|
161
|
+
char *frt_fis_to_s(FrtFieldInfos *fis) {
|
162
|
+
int i, pos, capa = 200 + fis->size * 120;
|
163
|
+
char *buf = FRT_ALLOC_N(char, capa);
|
164
|
+
FrtFieldInfo *fi;
|
165
|
+
const int fis_size = fis->size;
|
166
|
+
|
167
|
+
pos = sprintf(buf,
|
168
|
+
"default:\n"
|
169
|
+
" store: %s\n"
|
170
|
+
" index: %s\n"
|
171
|
+
" term_vector: %s\n"
|
172
|
+
"fields:\n",
|
173
|
+
store_str[bits_is_compressed(fis->bits) ? 2 : fis->bits & 0x1],
|
174
|
+
index_str[(fis->bits >> 1) & 0x7],
|
175
|
+
term_vector_str[(fis->bits >> 4) & 0x7]);
|
176
|
+
for (i = 0; i < fis_size; i++) {
|
177
|
+
fi = fis->fields[i];
|
178
|
+
pos += sprintf(buf + pos,
|
179
|
+
" %s:\n"
|
180
|
+
" boost: %f\n"
|
181
|
+
" store: %s\n"
|
182
|
+
" index: %s\n"
|
183
|
+
" term_vector: %s\n",
|
184
|
+
rb_id2name(fi->name), fi->boost, fi_store_str(fi),
|
185
|
+
fi_index_str(fi), fi_term_vector_str(fi));
|
186
|
+
}
|
187
|
+
return buf;
|
188
|
+
}
|
189
|
+
|
190
|
+
void frt_fis_deref(FrtFieldInfos *fis) {
|
191
|
+
if (FRT_DEREF(fis) == 0) {
|
192
|
+
frt_h_destroy(fis->field_dict);
|
193
|
+
free(fis->fields);
|
194
|
+
free(fis);
|
195
|
+
}
|
196
|
+
}
|