isomorfeus-ferret 0.17.2 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +118 -125
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +31 -23
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +2 -27
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +13 -32
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +20 -38
- data/ext/isomorfeus_ferret_ext/frt_index.c +292 -790
- data/ext/isomorfeus_ferret_ext/frt_index.h +1 -102
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +85 -171
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +52 -104
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +113 -226
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +82 -164
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +40 -87
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +15 -23
- data/ext/isomorfeus_ferret_ext/test_filter.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +277 -487
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -109
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +14 -24
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +14 -20
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +1 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +24 -4
@@ -0,0 +1,69 @@
|
|
1
|
+
#include "frt_field_info.h"
|
2
|
+
|
3
|
+
FrtFieldInfo *frt_fi_alloc(void) {
|
4
|
+
return FRT_ALLOC(FrtFieldInfo);
|
5
|
+
}
|
6
|
+
|
7
|
+
FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits) {
|
8
|
+
assert(NULL != name);
|
9
|
+
bits_check(bits);
|
10
|
+
fi->name = name;
|
11
|
+
fi->boost = 1.0f;
|
12
|
+
fi->bits = bits;
|
13
|
+
fi->number = 0;
|
14
|
+
fi->ref_cnt = 1;
|
15
|
+
fi->rfi = Qnil;
|
16
|
+
return fi;
|
17
|
+
}
|
18
|
+
|
19
|
+
FrtFieldInfo *frt_fi_new(ID name, unsigned int bits) {
|
20
|
+
FrtFieldInfo *fi = frt_fi_alloc();
|
21
|
+
return frt_fi_init(fi, name, bits);
|
22
|
+
}
|
23
|
+
|
24
|
+
void frt_fi_deref(FrtFieldInfo *fi) {
|
25
|
+
if (FRT_DEREF(fi) == 0) free(fi);
|
26
|
+
}
|
27
|
+
|
28
|
+
void bits_check(unsigned int bits) {
|
29
|
+
if (!bits_is_indexed(bits) && bits_store_term_vector(bits)) {
|
30
|
+
FRT_RAISE(FRT_ARG_ERROR, "You can't store the term vectors of an unindexed field.");
|
31
|
+
}
|
32
|
+
if (bits_is_compressed(bits) && !bits_is_stored(bits)) {
|
33
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field must be stored for compression to be useful.");
|
34
|
+
}
|
35
|
+
}
|
36
|
+
|
37
|
+
FrtCompressionType bits_get_compression_type(unsigned int bits) {
|
38
|
+
if (bits_is_compressed_brotli(bits)) {
|
39
|
+
return FRT_COMPRESSION_BROTLI;
|
40
|
+
} else if (bits_is_compressed_bz2(bits)) {
|
41
|
+
return FRT_COMPRESSION_BZ2;
|
42
|
+
} else if (bits_is_compressed_lz4(bits)) {
|
43
|
+
return FRT_COMPRESSION_LZ4;
|
44
|
+
} else {
|
45
|
+
return FRT_COMPRESSION_NONE;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
char *frt_fi_to_s(FrtFieldInfo *fi) {
|
50
|
+
const char *fi_name = rb_id2name(fi->name);
|
51
|
+
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
52
|
+
char *s = str;
|
53
|
+
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
54
|
+
bits_is_stored(fi->bits) ? "is_stored, " : "",
|
55
|
+
bits_is_compressed(fi->bits) ? "is_compressed, " : "",
|
56
|
+
bits_is_indexed(fi->bits) ? "is_indexed, " : "",
|
57
|
+
bits_is_tokenized(fi->bits) ? "is_tokenized, " : "",
|
58
|
+
bits_omit_norms(fi->bits) ? "omit_norms, " : "",
|
59
|
+
bits_store_term_vector(fi->bits) ? "store_term_vector, " : "",
|
60
|
+
bits_store_positions(fi->bits) ? "store_positions, " : "",
|
61
|
+
bits_store_offsets(fi->bits) ? "store_offsets, " : "");
|
62
|
+
s -= 2;
|
63
|
+
if (*s != ',') {
|
64
|
+
s += 2;
|
65
|
+
}
|
66
|
+
|
67
|
+
sprintf(s, ")]");
|
68
|
+
return str;
|
69
|
+
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#ifndef FRT_FIELD_INFO_H
|
2
|
+
#define FRT_FIELD_INFO_H
|
3
|
+
|
4
|
+
#include "frt_global.h"
|
5
|
+
#include <ruby.h>
|
6
|
+
|
7
|
+
#define FRT_FI_DEFAULTS_BM FRT_FI_IS_STORED_BM | FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM
|
8
|
+
#define FRT_FI_IS_STORED_BM 0x001
|
9
|
+
#define FRT_FI_IS_INDEXED_BM 0x002
|
10
|
+
#define FRT_FI_IS_TOKENIZED_BM 0x004
|
11
|
+
#define FRT_FI_OMIT_NORMS_BM 0x008
|
12
|
+
#define FRT_FI_STORE_TERM_VECTOR_BM 0x010
|
13
|
+
#define FRT_FI_STORE_POSITIONS_BM 0x020
|
14
|
+
#define FRT_FI_STORE_OFFSETS_BM 0x040
|
15
|
+
#define FRT_FI_COMPRESSION_BROTLI_BM 0x080
|
16
|
+
#define FRT_FI_COMPRESSION_BZ2_BM 0x100
|
17
|
+
#define FRT_FI_COMPRESSION_LZ4_BM 0x200
|
18
|
+
|
19
|
+
typedef struct FrtFieldInfo {
|
20
|
+
ID name;
|
21
|
+
float boost;
|
22
|
+
unsigned int bits;
|
23
|
+
int number;
|
24
|
+
_Atomic unsigned int ref_cnt;
|
25
|
+
VALUE rfi;
|
26
|
+
} FrtFieldInfo;
|
27
|
+
|
28
|
+
extern FrtFieldInfo *frt_fi_alloc();
|
29
|
+
extern FrtFieldInfo *frt_fi_init(FrtFieldInfo *fi, ID name, unsigned int bits);
|
30
|
+
extern FrtFieldInfo *frt_fi_new(ID name, unsigned int bits);
|
31
|
+
extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
32
|
+
extern void frt_fi_deref(FrtFieldInfo *fi);
|
33
|
+
|
34
|
+
extern void bits_check(unsigned int bits);
|
35
|
+
extern FrtCompressionType bits_get_compression_type(unsigned int bits);
|
36
|
+
#define bits_is_stored(bits) ((bits & FRT_FI_IS_STORED_BM) != 0)
|
37
|
+
#define bits_is_indexed(bits) ((bits & FRT_FI_IS_INDEXED_BM) != 0)
|
38
|
+
#define bits_is_tokenized(bits) ((bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
39
|
+
#define bits_omit_norms(bits) ((bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
40
|
+
#define bits_store_term_vector(bits) ((bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
|
41
|
+
#define bits_store_positions(bits) ((bits & FRT_FI_STORE_POSITIONS_BM) != 0)
|
42
|
+
#define bits_store_offsets(bits) ((bits & FRT_FI_STORE_OFFSETS_BM) != 0)
|
43
|
+
#define bits_has_norms(bits) ((bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
|
44
|
+
#define bits_is_compressed_brotli(bits) ((bits & FRT_FI_COMPRESSION_BROTLI_BM) != 0)
|
45
|
+
#define bits_is_compressed_bz2(bits) ((bits & FRT_FI_COMPRESSION_BZ2_BM) != 0)
|
46
|
+
#define bits_is_compressed_lz4(bits) ((bits & FRT_FI_COMPRESSION_LZ4_BM) != 0)
|
47
|
+
#define bits_is_compressed(bits) (bits_is_compressed_brotli(bits) || bits_is_compressed_bz2(bits) || bits_is_compressed_lz4(bits))
|
48
|
+
|
49
|
+
#endif
|
@@ -0,0 +1,196 @@
|
|
1
|
+
#include "frt_field_infos.h"
|
2
|
+
#include "frt_except.h"
|
3
|
+
|
4
|
+
FrtFieldInfos *frt_fis_alloc(void) {
|
5
|
+
return FRT_ALLOC(FrtFieldInfos);
|
6
|
+
}
|
7
|
+
|
8
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits) {
|
9
|
+
bits_check(bits);
|
10
|
+
fis->field_dict = frt_h_new_ptr((frt_free_ft)&frt_fi_deref);
|
11
|
+
fis->size = 0;
|
12
|
+
fis->capa = FIELD_INFOS_INIT_CAPA;
|
13
|
+
fis->fields = FRT_ALLOC_N(FrtFieldInfo *, fis->capa);
|
14
|
+
fis->bits = bits;
|
15
|
+
fis->ref_cnt = 1;
|
16
|
+
fis->rfis = Qnil;
|
17
|
+
return fis;
|
18
|
+
}
|
19
|
+
|
20
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits) {
|
21
|
+
FrtFieldInfos *fis = frt_fis_alloc();
|
22
|
+
return frt_fis_init(fis, bits);
|
23
|
+
}
|
24
|
+
|
25
|
+
FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi) {
|
26
|
+
if (fis->size == fis->capa) {
|
27
|
+
fis->capa <<= 1;
|
28
|
+
FRT_REALLOC_N(fis->fields, FrtFieldInfo *, fis->capa);
|
29
|
+
}
|
30
|
+
if (!frt_h_set_safe(fis->field_dict, (void *)fi->name, fi)) {
|
31
|
+
FRT_RAISE(FRT_ARG_ERROR, "Field :%s already exists", rb_id2name(fi->name));
|
32
|
+
}
|
33
|
+
FRT_REF(fi);
|
34
|
+
fi->number = fis->size;
|
35
|
+
fis->fields[fis->size] = fi;
|
36
|
+
fis->size++;
|
37
|
+
return fi;
|
38
|
+
}
|
39
|
+
|
40
|
+
FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name) {
|
41
|
+
return (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
42
|
+
}
|
43
|
+
|
44
|
+
int frt_fis_get_field_num(FrtFieldInfos *fis, ID name) {
|
45
|
+
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
46
|
+
if (fi) { return fi->number; }
|
47
|
+
else { return -1; }
|
48
|
+
}
|
49
|
+
|
50
|
+
FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name) {
|
51
|
+
FrtFieldInfo *fi = (FrtFieldInfo *)frt_h_get(fis->field_dict, (void *)name);
|
52
|
+
if (!fi) {
|
53
|
+
fi = (FrtFieldInfo*)frt_fi_new(name, fis->bits);
|
54
|
+
frt_fis_add_field(fis, fi);
|
55
|
+
}
|
56
|
+
return fi;
|
57
|
+
}
|
58
|
+
|
59
|
+
bool frt_fis_has_vectors(FrtFieldInfos *fis) {
|
60
|
+
int i;
|
61
|
+
const int fis_size = fis->size;
|
62
|
+
|
63
|
+
for (i = 0; i < fis_size; i++) {
|
64
|
+
if (bits_store_term_vector(fis->fields[i]->bits)) {
|
65
|
+
return true;
|
66
|
+
}
|
67
|
+
}
|
68
|
+
return false;
|
69
|
+
}
|
70
|
+
|
71
|
+
FrtFieldInfos *frt_fis_read(FrtInStream *is) {
|
72
|
+
FrtFieldInfos *volatile fis = NULL;
|
73
|
+
char *field_name;
|
74
|
+
FRT_TRY
|
75
|
+
do {
|
76
|
+
volatile int i;
|
77
|
+
union { frt_u32 i; float f; } tmp;
|
78
|
+
FrtFieldInfo *volatile fi;
|
79
|
+
fis = frt_fis_new(frt_is_read_vint(is));
|
80
|
+
for (i = frt_is_read_vint(is); i > 0; i--) {
|
81
|
+
fi = FRT_ALLOC_AND_ZERO(FrtFieldInfo);
|
82
|
+
FRT_TRY
|
83
|
+
field_name = frt_is_read_string_safe(is);
|
84
|
+
fi->name = rb_intern(field_name);
|
85
|
+
free(field_name);
|
86
|
+
tmp.i = frt_is_read_u32(is);
|
87
|
+
fi->boost = tmp.f;
|
88
|
+
fi->bits = frt_is_read_vint(is);
|
89
|
+
FRT_XCATCHALL
|
90
|
+
free(fi);
|
91
|
+
FRT_XENDTRY
|
92
|
+
frt_fis_add_field(fis, fi);
|
93
|
+
fi->ref_cnt = 1;
|
94
|
+
}
|
95
|
+
} while (0);
|
96
|
+
FRT_XCATCHALL
|
97
|
+
frt_fis_deref(fis);
|
98
|
+
FRT_XENDTRY
|
99
|
+
return fis;
|
100
|
+
}
|
101
|
+
|
102
|
+
void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os) {
|
103
|
+
int i;
|
104
|
+
union { frt_u32 i; float f; } tmp;
|
105
|
+
FrtFieldInfo *fi;
|
106
|
+
const int fis_size = fis->size;
|
107
|
+
|
108
|
+
frt_os_write_vint(os, fis->bits);
|
109
|
+
frt_os_write_vint(os, fis->size);
|
110
|
+
|
111
|
+
for (i = 0; i < fis_size; i++) {
|
112
|
+
fi = fis->fields[i];
|
113
|
+
|
114
|
+
frt_os_write_string(os, rb_id2name(fi->name));
|
115
|
+
tmp.f = fi->boost;
|
116
|
+
frt_os_write_u32(os, tmp.i);
|
117
|
+
frt_os_write_vint(os, fi->bits);
|
118
|
+
}
|
119
|
+
}
|
120
|
+
|
121
|
+
static const char *store_str[] = {
|
122
|
+
":no",
|
123
|
+
":yes",
|
124
|
+
":compressed"
|
125
|
+
};
|
126
|
+
|
127
|
+
static const char *fi_store_str(FrtFieldInfo *fi) {
|
128
|
+
return store_str[bits_is_compressed(fi->bits) ? 2 : fi->bits & 0x1];
|
129
|
+
}
|
130
|
+
|
131
|
+
static const char *index_str[] = {
|
132
|
+
":no",
|
133
|
+
":untokenized",
|
134
|
+
"",
|
135
|
+
":yes",
|
136
|
+
"",
|
137
|
+
":untokenized_omit_norms",
|
138
|
+
"",
|
139
|
+
":omit_norms"
|
140
|
+
};
|
141
|
+
|
142
|
+
static const char *fi_index_str(FrtFieldInfo *fi) {
|
143
|
+
return index_str[(fi->bits >> 1) & 0x7];
|
144
|
+
}
|
145
|
+
|
146
|
+
static const char *term_vector_str[] = {
|
147
|
+
":no",
|
148
|
+
":yes",
|
149
|
+
"",
|
150
|
+
":with_positions",
|
151
|
+
"",
|
152
|
+
":with_offsets",
|
153
|
+
"",
|
154
|
+
":with_positions_offsets"
|
155
|
+
};
|
156
|
+
|
157
|
+
static const char *fi_term_vector_str(FrtFieldInfo *fi) {
|
158
|
+
return term_vector_str[(fi->bits >> 4) & 0x7];
|
159
|
+
}
|
160
|
+
|
161
|
+
char *frt_fis_to_s(FrtFieldInfos *fis) {
|
162
|
+
int i, pos, capa = 200 + fis->size * 120;
|
163
|
+
char *buf = FRT_ALLOC_N(char, capa);
|
164
|
+
FrtFieldInfo *fi;
|
165
|
+
const int fis_size = fis->size;
|
166
|
+
|
167
|
+
pos = sprintf(buf,
|
168
|
+
"default:\n"
|
169
|
+
" store: %s\n"
|
170
|
+
" index: %s\n"
|
171
|
+
" term_vector: %s\n"
|
172
|
+
"fields:\n",
|
173
|
+
store_str[bits_is_compressed(fis->bits) ? 2 : fis->bits & 0x1],
|
174
|
+
index_str[(fis->bits >> 1) & 0x7],
|
175
|
+
term_vector_str[(fis->bits >> 4) & 0x7]);
|
176
|
+
for (i = 0; i < fis_size; i++) {
|
177
|
+
fi = fis->fields[i];
|
178
|
+
pos += sprintf(buf + pos,
|
179
|
+
" %s:\n"
|
180
|
+
" boost: %f\n"
|
181
|
+
" store: %s\n"
|
182
|
+
" index: %s\n"
|
183
|
+
" term_vector: %s\n",
|
184
|
+
rb_id2name(fi->name), fi->boost, fi_store_str(fi),
|
185
|
+
fi_index_str(fi), fi_term_vector_str(fi));
|
186
|
+
}
|
187
|
+
return buf;
|
188
|
+
}
|
189
|
+
|
190
|
+
void frt_fis_deref(FrtFieldInfos *fis) {
|
191
|
+
if (FRT_DEREF(fis) == 0) {
|
192
|
+
frt_h_destroy(fis->field_dict);
|
193
|
+
free(fis->fields);
|
194
|
+
free(fis);
|
195
|
+
}
|
196
|
+
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#ifndef FRT_FIELD_INFOS_H
|
2
|
+
#define FRT_FIELD_INFOS_H
|
3
|
+
|
4
|
+
#include "frt_field_info.h"
|
5
|
+
#include "frt_hash.h"
|
6
|
+
#include "frt_in_stream.h"
|
7
|
+
#include "frt_out_stream.h"
|
8
|
+
|
9
|
+
#define FIELD_INFOS_INIT_CAPA 4
|
10
|
+
|
11
|
+
/* carry changes over to dummy_fis in test/test_segments.c */
|
12
|
+
typedef struct FrtFieldInfos {
|
13
|
+
unsigned int bits;
|
14
|
+
int size;
|
15
|
+
int capa;
|
16
|
+
FrtFieldInfo **fields;
|
17
|
+
FrtHash *field_dict;
|
18
|
+
_Atomic unsigned int ref_cnt;
|
19
|
+
VALUE rfis;
|
20
|
+
} FrtFieldInfos;
|
21
|
+
|
22
|
+
FrtFieldInfos *frt_fis_alloc();
|
23
|
+
FrtFieldInfos *frt_fis_init(FrtFieldInfos *fis, unsigned int bits);
|
24
|
+
FrtFieldInfos *frt_fis_new(unsigned int bits);
|
25
|
+
extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
|
26
|
+
extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, ID name);
|
27
|
+
extern int frt_fis_get_field_num(FrtFieldInfos *fis, ID name);
|
28
|
+
extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis, ID name);
|
29
|
+
extern bool frt_fis_has_vectors(FrtFieldInfos *fis);
|
30
|
+
extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
|
31
|
+
extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
|
32
|
+
extern char *frt_fis_to_s(FrtFieldInfos *fis);
|
33
|
+
extern void frt_fis_deref(FrtFieldInfos *fis);
|
34
|
+
|
35
|
+
#endif
|
@@ -44,8 +44,7 @@ int frt_icmp(const void *p1, const void *p2) {
|
|
44
44
|
|
45
45
|
if (i1 > i2) {
|
46
46
|
return 1;
|
47
|
-
}
|
48
|
-
else if (i1 < i2) {
|
47
|
+
} else if (i1 < i2) {
|
49
48
|
return -1;
|
50
49
|
}
|
51
50
|
return 0;
|
@@ -110,6 +109,14 @@ char *frt_estrdup(const char *s) {
|
|
110
109
|
return t;
|
111
110
|
}
|
112
111
|
|
112
|
+
/* frt_estrndup: duplicate a string with length given, report if error */
|
113
|
+
char *frt_estrndup(const char *s, int len) {
|
114
|
+
char *t = FRT_ALLOC_N(char, len + 1);
|
115
|
+
strncpy(t, s, len);
|
116
|
+
t[len] = '\0';
|
117
|
+
return t;
|
118
|
+
}
|
119
|
+
|
113
120
|
/* Pretty print a float to the buffer. The buffer should have at least 32
|
114
121
|
* bytes available.
|
115
122
|
*/
|
@@ -248,8 +255,7 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
|
|
248
255
|
if (free_mes_capa == 0) {
|
249
256
|
free_mes_capa = 16;
|
250
257
|
free_mes = FRT_ALLOC_N(FreeMe, free_mes_capa);
|
251
|
-
}
|
252
|
-
else if (free_mes_capa <= free_mes_size) {
|
258
|
+
} else if (free_mes_capa <= free_mes_size) {
|
253
259
|
free_mes_capa *= 2;
|
254
260
|
FRT_REALLOC_N(free_mes, FreeMe, free_mes_capa);
|
255
261
|
}
|
@@ -83,12 +83,15 @@ typedef void (*frt_free_ft)(void *key);
|
|
83
83
|
# define Xj fprintf(stdout,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
84
84
|
#endif
|
85
85
|
|
86
|
+
extern const char *segm_idx_name;
|
87
|
+
|
86
88
|
extern unsigned int *frt_imalloc(unsigned int value);
|
87
89
|
extern unsigned long *frt_lmalloc(unsigned long value);
|
88
90
|
extern frt_u32 *frt_u32malloc(frt_u32 value);
|
89
91
|
extern frt_u64 *frt_u64malloc(frt_u64 value);
|
90
92
|
|
91
93
|
extern char *frt_estrdup(const char *s);
|
94
|
+
extern char *frt_estrndup(const char *s, int len);
|
92
95
|
extern char *frt_estrcat(char *str, char *str_cat);
|
93
96
|
extern char *frt_epstrdup(const char *fmt, int len, ...);
|
94
97
|
|
@@ -120,8 +123,7 @@ extern void frt_dummy_free(void *p);
|
|
120
123
|
* Returns the count of leading [MSB] 0 bits in +word+.
|
121
124
|
*/
|
122
125
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
123
|
-
int frt_count_leading_zeros(frt_u32 word)
|
124
|
-
{
|
126
|
+
int frt_count_leading_zeros(frt_u32 word) {
|
125
127
|
#ifdef __GNUC__
|
126
128
|
if (word)
|
127
129
|
return __builtin_clz(word);
|
@@ -153,8 +155,7 @@ int frt_count_leading_zeros(frt_u32 word)
|
|
153
155
|
}
|
154
156
|
|
155
157
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
156
|
-
int frt_count_leading_ones(frt_u32 word)
|
157
|
-
{
|
158
|
+
int frt_count_leading_ones(frt_u32 word) {
|
158
159
|
return frt_count_leading_zeros(~word);
|
159
160
|
}
|
160
161
|
|
@@ -163,8 +164,7 @@ int frt_count_leading_ones(frt_u32 word)
|
|
163
164
|
*/
|
164
165
|
|
165
166
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
166
|
-
int frt_count_trailing_zeros(frt_u32 word)
|
167
|
-
{
|
167
|
+
int frt_count_trailing_zeros(frt_u32 word) {
|
168
168
|
#ifdef __GNUC__
|
169
169
|
if (word)
|
170
170
|
return __builtin_ctz(word);
|
@@ -196,14 +196,12 @@ int frt_count_trailing_zeros(frt_u32 word)
|
|
196
196
|
}
|
197
197
|
|
198
198
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
199
|
-
int frt_count_trailing_ones(frt_u32 word)
|
200
|
-
{
|
199
|
+
int frt_count_trailing_ones(frt_u32 word) {
|
201
200
|
return frt_count_trailing_zeros(~word);
|
202
201
|
}
|
203
202
|
|
204
203
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
205
|
-
int frt_count_ones(frt_u32 word)
|
206
|
-
{
|
204
|
+
int frt_count_ones(frt_u32 word) {
|
207
205
|
#ifdef __GNUC__
|
208
206
|
return __builtin_popcount(word);
|
209
207
|
#else
|
@@ -233,8 +231,7 @@ int frt_count_ones(frt_u32 word)
|
|
233
231
|
}
|
234
232
|
|
235
233
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
236
|
-
int frt_count_zeros(frt_u32 word)
|
237
|
-
{
|
234
|
+
int frt_count_zeros(frt_u32 word) {
|
238
235
|
return frt_count_ones(~word);
|
239
236
|
}
|
240
237
|
|
@@ -242,8 +239,7 @@ int frt_count_zeros(frt_u32 word)
|
|
242
239
|
* Round up to the next power of 2
|
243
240
|
*/
|
244
241
|
static FRT_ATTR_ALWAYS_INLINE FRT_ATTR_CONST
|
245
|
-
int frt_round2(frt_u32 word)
|
246
|
-
{
|
242
|
+
int frt_round2(frt_u32 word) {
|
247
243
|
return 1 << (32 - frt_count_leading_zeros(word));
|
248
244
|
}
|
249
245
|
|
@@ -261,7 +257,7 @@ extern FILE *frt_x_exception_stream;
|
|
261
257
|
|
262
258
|
/**
|
263
259
|
* The convenience macro +EXCEPTION_STREAM+ returns stderr when
|
264
|
-
* +frt_x_exception_stream+ isn't
|
260
|
+
* +frt_x_exception_stream+ isn't explicitly set.
|
265
261
|
*/
|
266
262
|
#define EXCEPTION 2
|
267
263
|
#define EXCEPTION_STREAM (frt_x_exception_stream ? frt_x_exception_stream : stderr)
|
@@ -30,7 +30,7 @@ unsigned long frt_str_hash(const char *const str) {
|
|
30
30
|
}
|
31
31
|
|
32
32
|
unsigned long frt_ptr_hash(const void *const ptr) {
|
33
|
-
return (unsigned long)ptr;
|
33
|
+
return (unsigned long)(uintptr_t)ptr;
|
34
34
|
}
|
35
35
|
|
36
36
|
int frt_ptr_eq(const void *q1, const void *q2) {
|
@@ -73,7 +73,7 @@ static FrtHashEntry *frt_h_resize_lookup(FrtHash *self, register const unsigned
|
|
73
73
|
}
|
74
74
|
|
75
75
|
static FrtHashEntry *frt_h_lookup_ptr(FrtHash *self, const void *key) {
|
76
|
-
register const unsigned long hash = (unsigned long)key;
|
76
|
+
register const unsigned long hash = (unsigned long)(uintptr_t)key;
|
77
77
|
register unsigned long perturb;
|
78
78
|
register int mask = self->mask;
|
79
79
|
register FrtHashEntry *he0 = self->table;
|
@@ -375,21 +375,21 @@ FrtHashKeyStatus frt_h_has_key(FrtHash *self, const void *key) {
|
|
375
375
|
}
|
376
376
|
|
377
377
|
void *frt_h_get_int(FrtHash *self, const unsigned long key) {
|
378
|
-
return frt_h_get(self, (const void *)key);
|
378
|
+
return frt_h_get(self, (const void *)(uintptr_t)key);
|
379
379
|
}
|
380
380
|
|
381
381
|
int frt_h_del_int(FrtHash *self, const unsigned long key) {
|
382
|
-
return frt_h_del(self, (const void *)key);
|
382
|
+
return frt_h_del(self, (const void *)(uintptr_t)key);
|
383
383
|
}
|
384
384
|
|
385
385
|
void *frt_h_rem_int(FrtHash *self, const unsigned long key) {
|
386
|
-
return frt_h_rem(self, (const void *)key, false);
|
386
|
+
return frt_h_rem(self, (const void *)(uintptr_t)key, false);
|
387
387
|
}
|
388
388
|
|
389
389
|
FrtHashKeyStatus frt_h_set_int(FrtHash *self, const unsigned long key, void *value) {
|
390
390
|
FrtHashKeyStatus ret_val = FRT_HASH_KEY_DOES_NOT_EXIST;
|
391
391
|
FrtHashEntry *he;
|
392
|
-
if (!frt_h_set_ext(self, (const void *)key, &he)) {
|
392
|
+
if (!frt_h_set_ext(self, (const void *)(uintptr_t)key, &he)) {
|
393
393
|
/* Only free old value if it isn't the new value */
|
394
394
|
if (he->value != value) {
|
395
395
|
self->free_value_i(he->value);
|
@@ -404,7 +404,7 @@ FrtHashKeyStatus frt_h_set_int(FrtHash *self, const unsigned long key, void *val
|
|
404
404
|
|
405
405
|
int frt_h_set_safe_int(FrtHash *self, const unsigned long key, void *value) {
|
406
406
|
FrtHashEntry *he;
|
407
|
-
if (frt_h_set_ext(self, (const void *)key, &he)) {
|
407
|
+
if (frt_h_set_ext(self, (const void *)(uintptr_t)key, &he)) {
|
408
408
|
he->key = (char *)dummy_int_key;
|
409
409
|
he->value = value;
|
410
410
|
return true;
|
@@ -413,7 +413,7 @@ int frt_h_set_safe_int(FrtHash *self, const unsigned long key, void *value) {
|
|
413
413
|
}
|
414
414
|
|
415
415
|
int frt_h_has_key_int(FrtHash *self, const unsigned long key) {
|
416
|
-
return frt_h_has_key(self, (const void *)key);
|
416
|
+
return frt_h_has_key(self, (const void *)(uintptr_t)key);
|
417
417
|
}
|
418
418
|
|
419
419
|
void frt_h_each(FrtHash *self, void (*each_kv) (void *key, void *value, void *arg), void *arg) {
|