isomorfeus-ferret 0.17.2 → 0.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -0,0 +1,69 @@
|
|
1
|
+
#include "frt_field_info.h"
|
2
|
+
|
3
|
+
FrtFieldInfo *frt_fi_alloc(void) {
|
4
|
+
return FRT_ALLOC(FrtFieldInfo);
|
5
|
+
}
|
6
|
+
|
7
|
+
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
8
|
+
assert(NULL != name);
|
9
|
+
bits_check(bits);
|
10
|
+
fi->name = name;
|
11
|
+
fi->boost = 1.0f;
|
12
|
+
fi->bits = bits;
|
13
|
+
fi->number = 0;
|
14
|
+
fi->ref_cnt = 1;
|
15
|
+
fi->rfi = Qnil;
|
16
|
+
return fi;
|
17
|
+
}
|
18
|
+
|
19
|
+
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
20
|
+
FrtFieldInfo *fi = frt_fi_alloc();
|
21
|
+
return frt_fi_init(fi, name, bits);
|
22
|
+
}
|
23
|
+
|
24
|
+
void frt_fi_deref(FrtFieldInfo *fi) {
|
25
|
+
if (FRT_DEREF(fi) == 0) free(fi);
|
26
|
+
}
|
27
|
+
|
28
|
+
void bits_check(unsigned int bits) {
|
29
|
+
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
30
|
+
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
31
|
+
}
|
32
|
+
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
33
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
FrtCompressionType bits_get_compression_type(unsigned int bits) {
|
38
|
+
if (bits_is_compressed_brotli(bits)) {
|
39
|
+
return FRT_COMPRESSION_BROTLI;
|
40
|
+
} else if (bits_is_compressed_bz2(bits)) {
|
41
|
+
return FRT_COMPRESSION_BZ2;
|
42
|
+
} else if (bits_is_compressed_lz4(bits)) {
|
43
|
+
return FRT_COMPRESSION_LZ4;
|
44
|
+
} else {
|
45
|
+
return FRT_COMPRESSION_NONE;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
char *frt_fi_to_s(FrtFieldInfo *fi) {
|
50
|
+
const char *fi_name = rb_id2name(fi->name);
|
51
|
+
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
52
|
+
char *s = str;
|
53
|
+
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
54
|
+
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
55
|
+
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
56
|
+
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
57
|
+
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
58
|
+
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
59
|
+
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
60
|
+
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
61
|
+
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
62
|
+
s -= 2;
|
63
|
+
if (*s != ',') {
|
64
|
+
s += 2;
|
65
|
+
}
|
66
|
+
|
67
|
+
sprintf(s, ")]");
|
68
|
+
return str;
|
69
|
+
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#ifndef FRT_FIELD_INFO_H
|
2
|
+
#define FRT_FIELD_INFO_H
|
3
|
+
|
4
|
+
#include "frt_global.h"
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
|
8
|
+
#define FRT_FI_IS_STORED_BM 0x001
|
9
|
+
#define FRT_FI_IS_INDEXED_BM 0x002
|
10
|
+
#define FRT_FI_IS_TOKENIZED_BM 0x004
|
11
|
+
#define FRT_FI_OMIT_NORMS_BM 0x008
|
12
|
+
#define FRT_FI_STORE_TERM_VECTOR_BM 0x010
|
13
|
+
#define FRT_FI_STORE_POSITIONS_BM 0x020
|
14
|
+
#define FRT_FI_STORE_OFFSETS_BM 0x040
|
15
|
+
#define FRT_FI_COMPRESSION_BROTLI_BM 0x080
|
16
|
+
#define FRT_FI_COMPRESSION_BZ2_BM 0x100
|
17
|
+
#define FRT_FI_COMPRESSION_LZ4_BM 0x200
|
18
|
+
|
19
|
+
typedef struct FrtFieldInfo {
|
20
|
+
ID name;
|
21
|
+
float boost;
|
22
|
+
unsigned int bits;
|
23
|
+
int number;
|
24
|
+
_Atomic unsigned int ref_cnt;
|
25
|
+
VALUE rfi;
|
26
|
+
} FrtFieldInfo;
|
27
|
+
|
28
|
+
extern FrtFieldInfo *frt_fi_alloc();
|
29
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
|
30
|
+
extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
|
31
|
+
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
32
|
+
extern void frt_fi_deref(FrtFieldInfo *fi);
|
33
|
+
|
34
|
+
extern void bits_check(unsigned int bits);
|
35
|
+
extern FrtCompressionType bits_get_compression_type(unsigned int bits);
|
36
|
+
#define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
|
37
|
+
#define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
|
38
|
+
#define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
39
|
+
#define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
40
|
+
#define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
41
|
+
#define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
42
|
+
#define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
43
|
+
#define bits_has_norms(bits) ((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
44
|
+
#define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
45
|
+
#define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
46
|
+
#define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
47
|
+
#define bits_is_compressed(bits) (bits_is_compressed_brotli(bits) || bits_is_compressed_bz2(bits) || bits_is_compressed_lz4(bits))
|
48
|
+
|
49
|
+
#endif
|
@@ -0,0 +1,196 @@
|
|
1
|
+
#include "frt_field_infos.h"
|
2
|
+
#include "frt_except.h"
|
3
|
+
|
4
|
+
FrtFieldInfos *frt_fis_alloc(void) {
|
5
|
+
return FRT_ALLOC(FrtFieldInfos);
|
6
|
+
}
|
7
|
+
|
8
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
9
|
+
bits_check(bits);
|
10
|
+
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
11
|
+
fis->size = 0;
|
12
|
+
fis->capa = FIELD_INFOS_INIT_CAPA;
|
13
|
+
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
14
|
+
fis->bits = bits;
|
15
|
+
fis->ref_cnt = 1;
|
16
|
+
fis->rfis = Qnil;
|
17
|
+
return fis;
|
18
|
+
}
|
19
|
+
|
20
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
21
|
+
FrtFieldInfos *fis = frt_fis_alloc();
|
22
|
+
return frt_fis_init(fis, bits);
|
23
|
+
}
|
24
|
+
|
25
|
+
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
26
|
+
if (fis->size == fis->capa) {
|
27
|
+
fis->capa <<= 1;
|
28
|
+
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
29
|
+
}
|
30
|
+
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
31
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
32
|
+
}
|
33
|
+
FRT_REF(fi);
|
34
|
+
fi->number = fis->size;
|
35
|
+
fis->fields[fis->size] = fi;
|
36
|
+
fis->size++;
|
37
|
+
return fi;
|
38
|
+
}
|
39
|
+
|
40
|
+
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
41
|
+
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
42
|
+
}
|
43
|
+
|
44
|
+
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
45
|
+
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
46
|
+
if (fi) { return fi->number; }
|
47
|
+
else { return -1; }
|
48
|
+
}
|
49
|
+
|
50
|
+
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
51
|
+
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
52
|
+
if (!fi) {
|
53
|
+
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
54
|
+
frt_fis_add_field(fis, fi);
|
55
|
+
}
|
56
|
+
return fi;
|
57
|
+
}
|
58
|
+
|
59
|
+
bool frt_fis_has_vectors(FrtFieldInfos *fis) {
|
60
|
+
int i;
|
61
|
+
const int fis_size = fis->size;
|
62
|
+
|
63
|
+
for (i = 0; i < fis_size; i++) {
|
64
|
+
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
65
|
+
return true;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
return false;
|
69
|
+
}
|
70
|
+
|
71
|
+
FrtFieldInfos *frt_fis_read(FrtInStream *is) {
|
72
|
+
FrtFieldInfos *volatile fis = NULL;
|
73
|
+
char *field_name;
|
74
|
+
FRT_TRY
|
75
|
+
do {
|
76
|
+
volatile int i;
|
77
|
+
union { frt_u32 i; float f; } tmp;
|
78
|
+
FrtFieldInfo *volatile fi;
|
79
|
+
fis = frt_fis_new(frt_is_read_vint(is));
|
80
|
+
for (i = frt_is_read_vint(is); i > 0; i--) {
|
81
|
+
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
82
|
+
FRT_TRY
|
83
|
+
field_name = frt_is_read_string_safe(is);
|
84
|
+
fi->name = rb_intern(field_name);
|
85
|
+
free(field_name);
|
86
|
+
tmp.i = frt_is_read_u32(is);
|
87
|
+
fi->boost = tmp.f;
|
88
|
+
fi->bits = frt_is_read_vint(is);
|
89
|
+
FRT_XCATCHALL
|
90
|
+
free(fi);
|
91
|
+
FRT_XENDTRY
|
92
|
+
frt_fis_add_field(fis, fi);
|
93
|
+
fi->ref_cnt = 1;
|
94
|
+
}
|
95
|
+
} while (0);
|
96
|
+
FRT_XCATCHALL
|
97
|
+
frt_fis_deref(fis);
|
98
|
+
FRT_XENDTRY
|
99
|
+
return fis;
|
100
|
+
}
|
101
|
+
|
102
|
+
void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os) {
|
103
|
+
int i;
|
104
|
+
union { frt_u32 i; float f; } tmp;
|
105
|
+
FrtFieldInfo *fi;
|
106
|
+
const int fis_size = fis->size;
|
107
|
+
|
108
|
+
frt_os_write_vint(os, fis->bits);
|
109
|
+
frt_os_write_vint(os, fis->size);
|
110
|
+
|
111
|
+
for (i = 0; i < fis_size; i++) {
|
112
|
+
fi = fis->fields[i];
|
113
|
+
|
114
|
+
frt_os_write_string(os, rb_id2name(fi->name));
|
115
|
+
tmp.f = fi->boost;
|
116
|
+
frt_os_write_u32(os, tmp.i);
|
117
|
+
frt_os_write_vint(os, fi->bits);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
static const char *store_str[] = {
|
122
|
+
":no",
|
123
|
+
":yes",
|
124
|
+
":compressed"
|
125
|
+
};
|
126
|
+
|
127
|
+
static const char *fi_store_str(FrtFieldInfo *fi) {
|
128
|
+
return store_str[bits_is_compressed(fi->bits) ? 2 : fi->bits & 0x1];
|
129
|
+
}
|
130
|
+
|
131
|
+
static const char *index_str[] = {
|
132
|
+
":no",
|
133
|
+
":untokenized",
|
134
|
+
"",
|
135
|
+
":yes",
|
136
|
+
"",
|
137
|
+
":untokenized_omit_norms",
|
138
|
+
"",
|
139
|
+
":omit_norms"
|
140
|
+
};
|
141
|
+
|
142
|
+
static const char *fi_index_str(FrtFieldInfo *fi) {
|
143
|
+
return index_str[(fi->bits >> 1) & 0x7];
|
144
|
+
}
|
145
|
+
|
146
|
+
static const char *term_vector_str[] = {
|
147
|
+
":no",
|
148
|
+
":yes",
|
149
|
+
"",
|
150
|
+
":with_positions",
|
151
|
+
"",
|
152
|
+
":with_offsets",
|
153
|
+
"",
|
154
|
+
":with_positions_offsets"
|
155
|
+
};
|
156
|
+
|
157
|
+
static const char *fi_term_vector_str(FrtFieldInfo *fi) {
|
158
|
+
return term_vector_str[(fi->bits >> 4) & 0x7];
|
159
|
+
}
|
160
|
+
|
161
|
+
char *frt_fis_to_s(FrtFieldInfos *fis) {
|
162
|
+
int i, pos, capa = 200 + fis->size * 120;
|
163
|
+
char *buf = FRT_ALLOC_N(char, capa);
|
164
|
+
FrtFieldInfo *fi;
|
165
|
+
const int fis_size = fis->size;
|
166
|
+
|
167
|
+
pos = sprintf(buf,
|
168
|
+
"default:\n"
|
169
|
+
" store: %s\n"
|
170
|
+
" index: %s\n"
|
171
|
+
" term_vector: %s\n"
|
172
|
+
"fields:\n",
|
173
|
+
store_str[bits_is_compressed(fis->bits) ? 2 : fis->bits & 0x1],
|
174
|
+
index_str[(fis->bits >> 1) & 0x7],
|
175
|
+
term_vector_str[(fis->bits >> 4) & 0x7]);
|
176
|
+
for (i = 0; i < fis_size; i++) {
|
177
|
+
fi = fis->fields[i];
|
178
|
+
pos += sprintf(buf + pos,
|
179
|
+
" %s:\n"
|
180
|
+
" boost: %f\n"
|
181
|
+
" store: %s\n"
|
182
|
+
" index: %s\n"
|
183
|
+
" term_vector: %s\n",
|
184
|
+
rb_id2name(fi->name), fi->boost, fi_store_str(fi),
|
185
|
+
fi_index_str(fi), fi_term_vector_str(fi));
|
186
|
+
}
|
187
|
+
return buf;
|
188
|
+
}
|
189
|
+
|
190
|
+
void frt_fis_deref(FrtFieldInfos *fis) {
|
191
|
+
if (FRT_DEREF(fis) == 0) {
|
192
|
+
frt_h_destroy(fis->field_dict);
|
193
|
+
free(fis->fields);
|
194
|
+
free(fis);
|
195
|
+
}
|
196
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#ifndef FRT_FIELD_INFOS_H
|
2
|
+
#define FRT_FIELD_INFOS_H
|
3
|
+
|
4
|
+
#include "frt_field_info.h"
|
5
|
+
#include "frt_hash.h"
|
6
|
+
#include "frt_in_stream.h"
|
7
|
+
#include "frt_out_stream.h"
|
8
|
+
|
9
|
+
#define FIELD_INFOS_INIT_CAPA 4
|
10
|
+
|
11
|
+
/* carry changes over to dummy_fis in test/test_segments.c */
|
12
|
+
typedef struct FrtFieldInfos {
|
13
|
+
unsigned int bits;
|
14
|
+
int size;
|
15
|
+
int capa;
|
16
|
+
FrtFieldInfo **fields;
|
17
|
+
FrtHash *field_dict;
|
18
|
+
_Atomic unsigned int ref_cnt;
|
19
|
+
VALUE rfis;
|
20
|
+
} FrtFieldInfos;
|
21
|
+
|
22
|
+
FrtFieldInfos *frt_fis_alloc();
|
23
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits);
|
24
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits);
|
25
|
+
extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
|
26
|
+
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
|
27
|
+
extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
|
28
|
+
extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name);
|
29
|
+
extern bool frt_fis_has_vectors(FrtFieldInfos *fis);
|
30
|
+
extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
|
31
|
+
extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
|
32
|
+
extern char *frt_fis_to_s(FrtFieldInfos *fis);
|
33
|
+
extern void frt_fis_deref(FrtFieldInfos *fis);
|
34
|
+
|
35
|
+
#endif
|
@@ -44,8 +44,7 @@ int frt_icmp(const void *p1, const void *p2) {
|
|
44
44
|
|
45
45
|
if (i1 > i2) {
|
46
46
|
return 1;
|
47
|
-
}
|
48
|
-
else if (i1 < i2) {
|
47
|
+
} else if (i1 < i2) {
|
49
48
|
return -1;
|
50
49
|
}
|
51
50
|
return 0;
|
@@ -110,6 +109,14 @@ char *frt_estrdup(const char *s) {
|
|
110
109
|
return t;
|
111
110
|
}
|
112
111
|
|
112
|
+
/* frt_estrndup: duplicate a string with length given, report if error */
|
113
|
+
char *frt_estrndup(const char *s, int len) {
|
114
|
+
char *t = FRT_ALLOC_N(char, len + 1);
|
115
|
+
strncpy(t, s, len);
|
116
|
+
t[len] = '\0';
|
117
|
+
return t;
|
118
|
+
}
|
119
|
+
|
113
120
|
/* Pretty print a float to the buffer. The buffer should have at least 32
|
114
121
|
* bytes available.
|
115
122
|
*/
|
@@ -248,8 +255,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
|
|
248
255
|
if (free_mes_capa == 0) {
|
249
256
|
free_mes_capa = 16;
|
250
257
|
free_mes = FRT_ALLOC_N(FreeMe, free_mes_capa);
|
251
|
-
}
|
252
|
-
else if (free_mes_capa <= free_mes_size) {
|
258
|
+
} else if (free_mes_capa <= free_mes_size) {
|
253
259
|
free_mes_capa *= 2;
|
254
260
|
FRT_REALLOC_N(free_mes, FreeMe, free_mes_capa);
|
255
261
|
}
|
@@ -83,12 +83,15 @@ typedef void (*frt_free_ft)(void *key);
|
|
83
83
|
# define Xj fprintf(stdout,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
84
84
|
#endif
|
85
85
|
|
86
|
+
extern const char *segm_idx_name;
|
87
|
+
|
86
88
|
extern unsigned int *frt_imalloc(unsigned int value);
|
87
89
|
extern unsigned long *frt_lmalloc(unsigned long value);
|
88
90
|
extern frt_u32 *frt_u32malloc(frt_u32 value);
|
89
91
|
extern frt_u64 *frt_u64malloc(frt_u64 value);
|
90
92
|
|
91
93
|
extern char *frt_estrdup(const char *s);
|
94
|
+
extern char *frt_estrndup(const char *s, int len);
|
92
95
|
extern char *frt_estrcat(char *str, char *str_cat);
|
93
96
|
extern char *frt_epstrdup(const char *fmt, int len, ...);
|
94
97
|
|
@@ -120,8 +123,7 @@ extern void frt_dummy_free(void *p);
|
|
120
123
|
* Returns the count of leading [MSB] 0 bits in +word+.
|
121
124
|
*/
|
122
125
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
123
|
-
int frt_count_leading_zeros(frt_u32 word)
|
124
|
-
{
|
126
|
+
int frt_count_leading_zeros(frt_u32 word) {
|
125
127
|
#ifdef __GNUC__
|
126
128
|
if (word)
|
127
129
|
return __builtin_clz(word);
|
@@ -153,8 +155,7 @@ int frt_count_leading_zeros(frt_u32 word)
|
|
153
155
|
}
|
154
156
|
|
155
157
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
156
|
-
int frt_count_leading_ones(frt_u32 word)
|
157
|
-
{
|
158
|
+
int frt_count_leading_ones(frt_u32 word) {
|
158
159
|
return frt_count_leading_zeros(~word);
|
159
160
|
}
|
160
161
|
|
@@ -163,8 +164,7 @@ int frt_count_leading_ones(frt_u32 word)
|
|
163
164
|
*/
|
164
165
|
|
165
166
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
166
|
-
int frt_count_trailing_zeros(frt_u32 word)
|
167
|
-
{
|
167
|
+
int frt_count_trailing_zeros(frt_u32 word) {
|
168
168
|
#ifdef __GNUC__
|
169
169
|
if (word)
|
170
170
|
return __builtin_ctz(word);
|
@@ -196,14 +196,12 @@ int frt_count_trailing_zeros(frt_u32 word)
|
|
196
196
|
}
|
197
197
|
|
198
198
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
199
|
-
int frt_count_trailing_ones(frt_u32 word)
|
200
|
-
{
|
199
|
+
int frt_count_trailing_ones(frt_u32 word) {
|
201
200
|
return frt_count_trailing_zeros(~word);
|
202
201
|
}
|
203
202
|
|
204
203
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
205
|
-
int frt_count_ones(frt_u32 word)
|
206
|
-
{
|
204
|
+
int frt_count_ones(frt_u32 word) {
|
207
205
|
#ifdef __GNUC__
|
208
206
|
return __builtin_popcount(word);
|
209
207
|
#else
|
@@ -233,8 +231,7 @@ int frt_count_ones(frt_u32 word)
|
|
233
231
|
}
|
234
232
|
|
235
233
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
236
|
-
int frt_count_zeros(frt_u32 word)
|
237
|
-
{
|
234
|
+
int frt_count_zeros(frt_u32 word) {
|
238
235
|
return frt_count_ones(~word);
|
239
236
|
}
|
240
237
|
|
@@ -242,8 +239,7 @@ int frt_count_zeros(frt_u32 word)
|
|
242
239
|
* Round up to the next power of 2
|
243
240
|
*/
|
244
241
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
245
|
-
int frt_round2(frt_u32 word)
|
246
|
-
{
|
242
|
+
int frt_round2(frt_u32 word) {
|
247
243
|
return 1 << (32 - frt_count_leading_zeros(word));
|
248
244
|
}
|
249
245
|
|
@@ -261,7 +257,7 @@ extern FILE *frt_x_exception_stream;
|
|
261
257
|
|
262
258
|
/**
|
263
259
|
* The convenience macro +EXCEPTION_STREAM+ returns stderr when
|
264
|
-
* +frt_x_exception_stream+ isn't
|
260
|
+
* +frt_x_exception_stream+ isn't explicitly set.
|
265
261
|
*/
|
266
262
|
#define EXCEPTION 2
|
267
263
|
#define EXCEPTION_STREAM (frt_x_exception_stream ? frt_x_exception_stream : stderr)
|
@@ -30,7 +30,7 @@ unsigned long frt_str_hash(const char *const str) {
|
|
30
30
|
}
|
31
31
|
|
32
32
|
unsigned long frt_ptr_hash(const void *const ptr) {
|
33
|
-
return (unsigned long)ptr;
|
33
|
+
return (unsigned long)(uintptr_t)ptr;
|
34
34
|
}
|
35
35
|
|
36
36
|
int frt_ptr_eq(const void *q1, const void *q2) {
|
@@ -73,7 +73,7 @@ static FrtHashEntry *frt_h_resize_lookup(FrtHash *self, register const unsigned
|
|
73
73
|
}
|
74
74
|
|
75
75
|
static FrtHashEntry *frt_h_lookup_ptr(FrtHash *self, const void *key) {
|
76
|
-
register const unsigned long hash = (unsigned long)key;
|
76
|
+
register const unsigned long hash = (unsigned long)(uintptr_t)key;
|
77
77
|
register unsigned long perturb;
|
78
78
|
register int mask = self->mask;
|
79
79
|
register FrtHashEntry *he0 = self->table;
|
@@ -375,21 +375,21 @@ FrtHashKeyStatus frt_h_has_key(FrtHash *self, const void *key) {
|
|
375
375
|
}
|
376
376
|
|
377
377
|
void *frt_h_get_int(FrtHash *self, const unsigned long key) {
|
378
|
-
return frt_h_get(self, (const void *)key);
|
378
|
+
return frt_h_get(self, (const void *)(uintptr_t)key);
|
379
379
|
}
|
380
380
|
|
381
381
|
int frt_h_del_int(FrtHash *self, const unsigned long key) {
|
382
|
-
return frt_h_del(self, (const void *)key);
|
382
|
+
return frt_h_del(self, (const void *)(uintptr_t)key);
|
383
383
|
}
|
384
384
|
|
385
385
|
void *frt_h_rem_int(FrtHash *self, const unsigned long key) {
|
386
|
-
return frt_h_rem(self, (const void *)key, false);
|
386
|
+
return frt_h_rem(self, (const void *)(uintptr_t)key, false);
|
387
387
|
}
|
388
388
|
|
389
389
|
FrtHashKeyStatus frt_h_set_int(FrtHash *self, const unsigned long key, void *value) {
|
390
390
|
FrtHashKeyStatus ret_val = FRT_HASH_KEY_DOES_NOT_EXIST;
|
391
391
|
FrtHashEntry *he;
|
392
|
-
if (!frt_h_set_ext(self, (const void *)key, &he)) {
|
392
|
+
if (!frt_h_set_ext(self, (const void *)(uintptr_t)key, &he)) {
|
393
393
|
/* Only free old value if it isn't the new value */
|
394
394
|
if (he->value != value) {
|
395
395
|
self->free_value_i(he->value);
|
@@ -404,7 +404,7 @@ FrtHashKeyStatus frt_h_set_int(FrtHash *self, const unsigned long key, void *val
|
|
404
404
|
|
405
405
|
int frt_h_set_safe_int(FrtHash *self, const unsigned long key, void *value) {
|
406
406
|
FrtHashEntry *he;
|
407
|
-
if (frt_h_set_ext(self, (const void *)key, &he)) {
|
407
|
+
if (frt_h_set_ext(self, (const void *)(uintptr_t)key, &he)) {
|
408
408
|
he->key = (char *)dummy_int_key;
|
409
409
|
he->value = value;
|
410
410
|
return true;
|
@@ -413,7 +413,7 @@ int frt_h_set_safe_int(FrtHash *self, const unsigned long key, void *value) {
|
|
413
413
|
}
|
414
414
|
|
415
415
|
int frt_h_has_key_int(FrtHash *self, const unsigned long key) {
|
416
|
-
return frt_h_has_key(self, (const void *)key);
|
416
|
+
return frt_h_has_key(self, (const void *)(uintptr_t)key);
|
417
417
|
}
|
418
418
|
|
419
419
|
void frt_h_each(FrtHash *self, void (*each_kv) (void *key, void *value, void *arg), void *arg) {
|