isomorfeus-ferret 0.12.6 → 0.12.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -4
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.c +15 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_constants.h +200 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.c +156 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_context.h +113 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +5914 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.h +64 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.c +22 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_platform.h +594 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.c +291 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_transform.h +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_common_version.h +26 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.c +76 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_bit_reader.h +351 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +2608 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.c +339 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_huffman.h +121 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_prefix.h +732 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.c +159 -0
- data/ext/isomorfeus_ferret_ext/brotli_dec_state.h +365 -0
- data/ext/isomorfeus_ferret_ext/brotli_decode.h +344 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.c +145 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references.h +39 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.c +843 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_hq.h +95 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_backward_references_inc.h +163 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.c +35 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_bit_cost_inc.h +127 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_encoder_inc.h +34 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.c +194 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_block_splitter_inc.h +440 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.c +1314 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_brotli_bit_stream.h +84 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.c +56 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster.h +48 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_cluster_inc.h +320 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.c +28 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_command.h +190 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.c +790 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment.h +61 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.c +645 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_compress_fragment_two_pass.h +54 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.c +1846 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_dictionary_hash.h +25 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +1927 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.c +33 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_encoder_dict.h +43 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.c +503 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode.h +122 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_entropy_encode_static.h +539 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.c +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_fast_log.h +66 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_find_match_length.h +79 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash.h +488 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_composite_inc.h +125 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_forgetful_chain_inc.h +293 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match64_inc.h +267 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_inc.h +262 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_longest_match_quickly_inc.h +266 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_rolling_inc.h +212 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_hash_to_binary_tree_inc.h +329 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.c +100 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram.h +63 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_histogram_inc.h +51 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.c +175 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_literal_cost.h +30 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.c +170 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_memory.h +114 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.c +663 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock.h +105 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_metablock_inc.h +183 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_params.h +46 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_prefix.h +53 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_quality.h +165 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_ringbuffer.h +167 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.c +486 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict.h +40 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_static_dict_lut.h +5864 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.c +85 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_utf8_util.h +32 -0
- data/ext/isomorfeus_ferret_ext/brotli_enc_write_bits.h +87 -0
- data/ext/isomorfeus_ferret_ext/brotli_encode.h +448 -0
- data/ext/isomorfeus_ferret_ext/brotli_port.h +288 -0
- data/ext/isomorfeus_ferret_ext/brotli_types.h +83 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +35 -4
- data/ext/isomorfeus_ferret_ext/frt_document.h +1 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +174 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +6 -3
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +57 -45
- data/ext/isomorfeus_ferret_ext/test_index.c +4 -1
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +88 -3
@@ -6,6 +6,8 @@
|
|
6
6
|
#include <string.h>
|
7
7
|
#include <limits.h>
|
8
8
|
#include <ctype.h>
|
9
|
+
#include "brotli_decode.h"
|
10
|
+
#include "brotli_encode.h"
|
9
11
|
|
10
12
|
extern void frt_micro_sleep(const int micro_seconds);
|
11
13
|
|
@@ -39,8 +41,8 @@ static char *ste_next(FrtTermEnum *te);
|
|
39
41
|
#define FORMAT 0
|
40
42
|
#define SEGMENTS_GEN_FILE_NAME "segments"
|
41
43
|
#define MAX_EXT_LEN 10
|
42
|
-
#define
|
43
|
-
#define
|
44
|
+
#define COMPRESSION_BUFFER_SIZE 16348
|
45
|
+
#define COMPRESSION_LEVEL 9
|
44
46
|
|
45
47
|
/* *** Must be three characters *** */
|
46
48
|
static const char *INDEX_EXTENSIONS[] = {
|
@@ -220,6 +222,9 @@ static void fi_set_store(FrtFieldInfo *fi, int store)
|
|
220
222
|
case FRT_STORE_YES:
|
221
223
|
fi->bits |= FRT_FI_IS_STORED_BM;
|
222
224
|
break;
|
225
|
+
case FRT_STORE_COMPRESS:
|
226
|
+
fi->bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_IS_STORED_BM;
|
227
|
+
break;
|
223
228
|
}
|
224
229
|
}
|
225
230
|
|
@@ -304,8 +309,9 @@ char *frt_fi_to_s(FrtFieldInfo *fi)
|
|
304
309
|
const char *fi_name = rb_id2name(fi->name);
|
305
310
|
char *str = FRT_ALLOC_N(char, strlen(fi_name) + 200);
|
306
311
|
char *s = str;
|
307
|
-
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s", fi_name,
|
312
|
+
s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi_name,
|
308
313
|
fi_is_stored(fi) ? "is_stored, " : "",
|
314
|
+
fi_is_compressed(fi) ? "is_compressed, " : "",
|
309
315
|
fi_is_indexed(fi) ? "is_indexed, " : "",
|
310
316
|
fi_is_tokenized(fi) ? "is_tokenized, " : "",
|
311
317
|
fi_omit_norms(fi) ? "omit_norms, " : "",
|
@@ -443,7 +449,8 @@ void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os)
|
|
443
449
|
static const char *store_str[] = {
|
444
450
|
":no",
|
445
451
|
":yes",
|
446
|
-
""
|
452
|
+
"",
|
453
|
+
":compressed"
|
447
454
|
};
|
448
455
|
|
449
456
|
static const char *fi_store_str(FrtFieldInfo *fi)
|
@@ -1145,12 +1152,13 @@ frt_u64 frt_sis_read_current_version(FrtStore *store)
|
|
1145
1152
|
*
|
1146
1153
|
****************************************************************************/
|
1147
1154
|
|
1148
|
-
static FrtLazyDocField *lazy_df_new(FrtSymbol name, const int size)
|
1155
|
+
static FrtLazyDocField *lazy_df_new(FrtSymbol name, const int size, bool is_compressed)
|
1149
1156
|
{
|
1150
1157
|
FrtLazyDocField *self = FRT_ALLOC(FrtLazyDocField);
|
1151
1158
|
self->name = name;
|
1152
1159
|
self->size = size;
|
1153
1160
|
self->data = FRT_ALLOC_AND_ZERO_N(FrtLazyDocFieldData, size);
|
1161
|
+
self->is_compressed = is_compressed;
|
1154
1162
|
return self;
|
1155
1163
|
}
|
1156
1164
|
|
@@ -1166,6 +1174,52 @@ static void lazy_df_destroy(FrtLazyDocField *self)
|
|
1166
1174
|
free(self);
|
1167
1175
|
}
|
1168
1176
|
|
1177
|
+
static void comp_raise()
|
1178
|
+
{
|
1179
|
+
FRT_RAISE(EXCEPTION, "Compression error");
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
static char *is_read_compressed_bytes(FrtInStream *is, int compressed_len, int *len)
|
1183
|
+
{
|
1184
|
+
int buf_out_idx = 0;
|
1185
|
+
int read_len;
|
1186
|
+
frt_uchar buf_in[COMPRESSION_BUFFER_SIZE];
|
1187
|
+
const frt_uchar *next_in;
|
1188
|
+
size_t available_in;
|
1189
|
+
frt_uchar *buf_out = NULL;
|
1190
|
+
frt_uchar *next_out;
|
1191
|
+
size_t available_out;
|
1192
|
+
|
1193
|
+
BrotliDecoderState *b_state = BrotliDecoderCreateInstance(NULL, NULL, NULL);
|
1194
|
+
BrotliDecoderResult b_result = BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT;
|
1195
|
+
if (!b_state) { comp_raise(); return NULL; }
|
1196
|
+
|
1197
|
+
do {
|
1198
|
+
read_len = compressed_len > COMPRESSION_BUFFER_SIZE ? COMPRESSION_BUFFER_SIZE : compressed_len;
|
1199
|
+
frt_is_read_bytes(is, buf_in, read_len);
|
1200
|
+
compressed_len -= read_len;
|
1201
|
+
available_in = read_len;
|
1202
|
+
next_in = buf_in;
|
1203
|
+
available_out = COMPRESSION_BUFFER_SIZE;
|
1204
|
+
do {
|
1205
|
+
FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + COMPRESSION_BUFFER_SIZE);
|
1206
|
+
next_out = buf_out + buf_out_idx;
|
1207
|
+
b_result = BrotliDecoderDecompressStream(b_state,
|
1208
|
+
&available_in, &next_in,
|
1209
|
+
&available_out, &next_out, NULL);
|
1210
|
+
if (b_result == BROTLI_DECODER_RESULT_ERROR) { comp_raise(); return NULL; }
|
1211
|
+
buf_out_idx += COMPRESSION_BUFFER_SIZE - available_out;
|
1212
|
+
} while (b_result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
|
1213
|
+
} while (b_result != BROTLI_DECODER_RESULT_SUCCESS && compressed_len > 0);
|
1214
|
+
|
1215
|
+
BrotliDecoderDestroyInstance(b_state);
|
1216
|
+
|
1217
|
+
FRT_REALLOC_N(buf_out, frt_uchar, buf_out_idx + 1);
|
1218
|
+
buf_out[buf_out_idx] = '\0';
|
1219
|
+
*len = buf_out_idx;
|
1220
|
+
return (char *)buf_out;
|
1221
|
+
}
|
1222
|
+
|
1169
1223
|
char *frt_lazy_df_get_data(FrtLazyDocField *self, int i)
|
1170
1224
|
{
|
1171
1225
|
char *text = NULL;
|
@@ -1174,9 +1228,13 @@ char *frt_lazy_df_get_data(FrtLazyDocField *self, int i)
|
|
1174
1228
|
if (NULL == text) {
|
1175
1229
|
const int read_len = self->data[i].length + 1;
|
1176
1230
|
frt_is_seek(self->doc->fields_in, self->data[i].start);
|
1177
|
-
self->
|
1178
|
-
|
1179
|
-
|
1231
|
+
if (self->is_compressed) {
|
1232
|
+
self->data[i].text = text = is_read_compressed_bytes(self->doc->fields_in, read_len, &(self->data[i].length));
|
1233
|
+
} else {
|
1234
|
+
self->data[i].text = text = FRT_ALLOC_N(char, read_len);
|
1235
|
+
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)text, read_len);
|
1236
|
+
text[read_len - 1] = '\0';
|
1237
|
+
}
|
1180
1238
|
}
|
1181
1239
|
}
|
1182
1240
|
|
@@ -1185,6 +1243,16 @@ char *frt_lazy_df_get_data(FrtLazyDocField *self, int i)
|
|
1185
1243
|
|
1186
1244
|
void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
|
1187
1245
|
{
|
1246
|
+
if (self->is_compressed == 1) {
|
1247
|
+
int i;
|
1248
|
+
self->len = 0;
|
1249
|
+
for (i = self->size-1; i >= 0; i--) {
|
1250
|
+
(void)frt_lazy_df_get_data(self, i);
|
1251
|
+
self->len += self->data[i].length + 1;
|
1252
|
+
}
|
1253
|
+
self->len--; /* each field separated by ' ' but no need to add to end */
|
1254
|
+
self->is_compressed = 2;
|
1255
|
+
}
|
1188
1256
|
if (start < 0 || start >= self->len) {
|
1189
1257
|
FRT_RAISE(FRT_IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
|
1190
1258
|
"is not between 0 and %d", start, self->len);
|
@@ -1196,7 +1264,33 @@ void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len)
|
|
1196
1264
|
FRT_RAISE(FRT_IO_ERROR, "Tried to read past end of field. Field is only %d "
|
1197
1265
|
"bytes long but tried to read to %d", self->len, start + len);
|
1198
1266
|
}
|
1199
|
-
|
1267
|
+
if (self->is_compressed) {
|
1268
|
+
int cur_start = 0, buf_start = 0, cur_end, i, copy_start, copy_len;
|
1269
|
+
for (i = 0; i < self->size; i++) {
|
1270
|
+
cur_end = cur_start + self->data[i].length;
|
1271
|
+
if (start < cur_end) {
|
1272
|
+
copy_start = start > cur_start ? start - cur_start : 0;
|
1273
|
+
copy_len = cur_end - cur_start - copy_start;
|
1274
|
+
if (copy_len >= len) {
|
1275
|
+
copy_len = len;
|
1276
|
+
len = 0;
|
1277
|
+
}
|
1278
|
+
else {
|
1279
|
+
len -= copy_len;
|
1280
|
+
}
|
1281
|
+
memcpy(buf + buf_start,
|
1282
|
+
self->data[i].text + copy_start,
|
1283
|
+
copy_len);
|
1284
|
+
buf_start += copy_len;
|
1285
|
+
if (len > 0) {
|
1286
|
+
buf[buf_start++] = ' ';
|
1287
|
+
len--;
|
1288
|
+
}
|
1289
|
+
if (len == 0) break;
|
1290
|
+
}
|
1291
|
+
cur_start = cur_end + 1;
|
1292
|
+
}
|
1293
|
+
} else {
|
1200
1294
|
frt_is_seek(self->doc->fields_in, self->data[0].start + start);
|
1201
1295
|
frt_is_read_bytes(self->doc->fields_in, (frt_uchar *)buf, len);
|
1202
1296
|
}
|
@@ -1286,7 +1380,7 @@ void frt_fr_close(FrtFieldsReader *fr)
|
|
1286
1380
|
free(fr);
|
1287
1381
|
}
|
1288
1382
|
|
1289
|
-
static FrtDocField *frt_fr_df_new(FrtSymbol name, int size)
|
1383
|
+
static FrtDocField *frt_fr_df_new(FrtSymbol name, int size, bool is_compressed)
|
1290
1384
|
{
|
1291
1385
|
FrtDocField *df = FRT_ALLOC(FrtDocField);
|
1292
1386
|
df->name = name;
|
@@ -1295,9 +1389,22 @@ static FrtDocField *frt_fr_df_new(FrtSymbol name, int size)
|
|
1295
1389
|
df->lengths = FRT_ALLOC_N(int, df->capa);
|
1296
1390
|
df->destroy_data = true;
|
1297
1391
|
df->boost = 1.0f;
|
1392
|
+
df->is_compressed = is_compressed;
|
1298
1393
|
return df;
|
1299
1394
|
}
|
1300
1395
|
|
1396
|
+
static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df)
|
1397
|
+
{
|
1398
|
+
int i;
|
1399
|
+
const int df_size = df->size;
|
1400
|
+
FrtInStream *fdt_in = fr->fdt_in;
|
1401
|
+
|
1402
|
+
for (i = 0; i < df_size; i++) {
|
1403
|
+
const int compressed_len = df->lengths[i] + 1;
|
1404
|
+
df->data[i] = is_read_compressed_bytes(fdt_in, compressed_len, &(df->lengths[i]));
|
1405
|
+
}
|
1406
|
+
}
|
1407
|
+
|
1301
1408
|
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
1302
1409
|
{
|
1303
1410
|
int i, j;
|
@@ -1316,7 +1423,7 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1316
1423
|
const int field_num = frt_is_read_vint(fdt_in);
|
1317
1424
|
FrtFieldInfo *fi = fr->fis->fields[field_num];
|
1318
1425
|
const int df_size = frt_is_read_vint(fdt_in);
|
1319
|
-
FrtDocField *df = frt_fr_df_new(fi->name, df_size);
|
1426
|
+
FrtDocField *df = frt_fr_df_new(fi->name, df_size, fi_is_compressed(fi));
|
1320
1427
|
|
1321
1428
|
for (j = 0; j < df_size; j++) {
|
1322
1429
|
df->lengths[j] = frt_is_read_vint(fdt_in);
|
@@ -1326,12 +1433,16 @@ FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
|
1326
1433
|
}
|
1327
1434
|
for (i = 0; i < stored_cnt; i++) {
|
1328
1435
|
FrtDocField *df = doc->fields[i];
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1436
|
+
if (df->is_compressed) {
|
1437
|
+
frt_fr_read_compressed_fields(fr, df);
|
1438
|
+
} else {
|
1439
|
+
const int df_size = df->size;
|
1440
|
+
for (j = 0; j < df_size; j++) {
|
1441
|
+
const int read_len = df->lengths[j] + 1;
|
1442
|
+
df->data[j] = FRT_ALLOC_N(char, read_len);
|
1443
|
+
frt_is_read_bytes(fdt_in, (frt_uchar *)df->data[j], read_len);
|
1444
|
+
df->data[j][read_len - 1] = '\0';
|
1445
|
+
}
|
1335
1446
|
}
|
1336
1447
|
}
|
1337
1448
|
|
@@ -1355,7 +1466,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1355
1466
|
for (i = 0; i < stored_cnt; i++) {
|
1356
1467
|
FrtFieldInfo *fi = fr->fis->fields[frt_is_read_vint(fdt_in)];
|
1357
1468
|
const int data_cnt = frt_is_read_vint(fdt_in);
|
1358
|
-
FrtLazyDocField *lazy_df = lazy_df_new(fi->name, data_cnt);
|
1469
|
+
FrtLazyDocField *lazy_df = lazy_df_new(fi->name, data_cnt, fi_is_compressed(fi));
|
1359
1470
|
const int field_start = start;
|
1360
1471
|
/* get the starts relative positions this time around */
|
1361
1472
|
for (j = 0; j < data_cnt; j++) {
|
@@ -1549,6 +1660,37 @@ void frt_fw_close(FrtFieldsWriter *fw)
|
|
1549
1660
|
free(fw);
|
1550
1661
|
}
|
1551
1662
|
|
1663
|
+
static int frt_os_write_compressed_bytes(FrtOutStream* out_stream, frt_uchar *data, int length)
|
1664
|
+
{
|
1665
|
+
size_t compressed_len = 0;
|
1666
|
+
const frt_uchar *next_in = data;
|
1667
|
+
size_t available_in = length;
|
1668
|
+
size_t available_out;
|
1669
|
+
frt_uchar compression_buffer[COMPRESSION_BUFFER_SIZE];
|
1670
|
+
frt_uchar *next_out;
|
1671
|
+
BrotliEncoderState *b_state = BrotliEncoderCreateInstance(NULL, NULL, NULL);
|
1672
|
+
if (!b_state) { comp_raise(); return -1; }
|
1673
|
+
|
1674
|
+
BrotliEncoderSetParameter(b_state, BROTLI_PARAM_QUALITY, COMPRESSION_LEVEL);
|
1675
|
+
|
1676
|
+
do {
|
1677
|
+
available_out = COMPRESSION_BUFFER_SIZE;
|
1678
|
+
next_out = compression_buffer;
|
1679
|
+
if (!BrotliEncoderCompressStream(b_state, BROTLI_OPERATION_FINISH,
|
1680
|
+
&available_in, &next_in,
|
1681
|
+
&available_out, &next_out, &compressed_len)) {
|
1682
|
+
BrotliEncoderDestroyInstance(b_state);
|
1683
|
+
comp_raise();
|
1684
|
+
return -1;
|
1685
|
+
}
|
1686
|
+
frt_os_write_bytes(out_stream, compression_buffer, COMPRESSION_BUFFER_SIZE - available_out);
|
1687
|
+
} while (!BrotliEncoderIsFinished(b_state));
|
1688
|
+
|
1689
|
+
BrotliEncoderDestroyInstance(b_state);
|
1690
|
+
// fprintf(stderr, "Compressed: %i -> %i\n", length, (int)compressed_len);
|
1691
|
+
return (int)compressed_len;
|
1692
|
+
}
|
1693
|
+
|
1552
1694
|
void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc)
|
1553
1695
|
{
|
1554
1696
|
int i, j, stored_cnt = 0;
|
@@ -1577,13 +1719,20 @@ void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc)
|
|
1577
1719
|
const int df_size = df->size;
|
1578
1720
|
frt_os_write_vint(fdt_out, fi->number);
|
1579
1721
|
frt_os_write_vint(fdt_out, df_size);
|
1580
|
-
|
1581
|
-
|
1582
|
-
|
1583
|
-
|
1584
|
-
|
1585
|
-
|
1586
|
-
|
1722
|
+
if (fi_is_compressed(fi)) {
|
1723
|
+
for (j = 0; j < df_size; j++) {
|
1724
|
+
const int length = df->lengths[j];
|
1725
|
+
int compressed_len = frt_os_write_compressed_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1726
|
+
frt_os_write_vint(fdt_out, compressed_len - 1);
|
1727
|
+
}
|
1728
|
+
} else {
|
1729
|
+
for (j = 0; j < df_size; j++) {
|
1730
|
+
const int length = df->lengths[j];
|
1731
|
+
frt_os_write_vint(fdt_out, length);
|
1732
|
+
frt_os_write_bytes(fw->buffer, (frt_uchar*)df->data[j], length);
|
1733
|
+
/* leave a space between fields as that is how they are analyzed */
|
1734
|
+
frt_os_write_byte(fw->buffer, ' ');
|
1735
|
+
}
|
1587
1736
|
}
|
1588
1737
|
}
|
1589
1738
|
}
|
@@ -66,7 +66,8 @@ extern FrtHash *frt_co_hash_create();
|
|
66
66
|
typedef enum
|
67
67
|
{
|
68
68
|
FRT_STORE_NO = 0,
|
69
|
-
FRT_STORE_YES = 1
|
69
|
+
FRT_STORE_YES = 1,
|
70
|
+
FRT_STORE_COMPRESS = 2
|
70
71
|
} FrtStoreValue;
|
71
72
|
|
72
73
|
typedef enum
|
@@ -88,6 +89,7 @@ typedef enum
|
|
88
89
|
} FrtTermVectorValue;
|
89
90
|
|
90
91
|
#define FRT_FI_IS_STORED_BM 0x001
|
92
|
+
#define FRT_FI_IS_COMPRESSED_BM 0x002
|
91
93
|
#define FRT_FI_IS_INDEXED_BM 0x004
|
92
94
|
#define FRT_FI_IS_TOKENIZED_BM 0x008
|
93
95
|
#define FRT_FI_OMIT_NORMS_BM 0x010
|
@@ -112,6 +114,7 @@ extern char *frt_fi_to_s(FrtFieldInfo *fi);
|
|
112
114
|
extern void frt_fi_deref(FrtFieldInfo *fi);
|
113
115
|
|
114
116
|
#define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
|
117
|
+
#define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
|
115
118
|
#define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
|
116
119
|
#define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
|
117
120
|
#define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
|
@@ -575,11 +578,11 @@ typedef struct FrtLazyDocField
|
|
575
578
|
FrtLazyDoc *doc;
|
576
579
|
int size; /* number of data elements */
|
577
580
|
int len; /* length of data elements concatenated */
|
581
|
+
int is_compressed : 2; /* set to 2 after all data is loaded */
|
578
582
|
} FrtLazyDocField;
|
579
583
|
|
580
584
|
extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
|
581
|
-
extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf,
|
582
|
-
int start, int len);
|
585
|
+
extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf, int start, int len);
|
583
586
|
|
584
587
|
/* * * FrtLazyDoc * * */
|
585
588
|
struct FrtLazyDoc
|
@@ -179,6 +179,7 @@ static off_t ram_length(FrtStore *store, const char *filename)
|
|
179
179
|
|
180
180
|
static void ramo_flush_i(FrtOutStream *os, const frt_uchar *src, int len)
|
181
181
|
{
|
182
|
+
if (len == 0) { return; }
|
182
183
|
frt_uchar *buffer;
|
183
184
|
FrtRAMFile *rf = os->file.rf;
|
184
185
|
int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
|
@@ -2,10 +2,10 @@
|
|
2
2
|
#include "test.h"
|
3
3
|
|
4
4
|
#define do_field_prop_test(tc, fi, name, boost, is_stored,\
|
5
|
-
is_indexed, is_tokenized, omit_norms,\
|
5
|
+
is_compressed, is_indexed, is_tokenized, omit_norms,\
|
6
6
|
store_term_vector, store_positions, store_offsets)\
|
7
7
|
field_prop_test(tc, __LINE__, fi, name, boost, is_stored,\
|
8
|
-
is_indexed, is_tokenized, omit_norms,\
|
8
|
+
is_compressed, is_indexed, is_tokenized, omit_norms,\
|
9
9
|
store_term_vector, store_positions, store_offsets)
|
10
10
|
#define T 1
|
11
11
|
#define F 0
|
@@ -16,6 +16,7 @@ void field_prop_test(TestCase *tc,
|
|
16
16
|
FrtSymbol name,
|
17
17
|
float boost,
|
18
18
|
bool is_stored,
|
19
|
+
bool is_compressed,
|
19
20
|
bool is_indexed,
|
20
21
|
bool is_tokenized,
|
21
22
|
bool omit_norms,
|
@@ -26,6 +27,7 @@ void field_prop_test(TestCase *tc,
|
|
26
27
|
tst_ptr_equal(line_num, tc, (void *)name, (void *)fi->name);
|
27
28
|
tst_flt_equal(line_num, tc, boost, fi->boost);
|
28
29
|
tst_int_equal(line_num, tc, is_stored, fi_is_stored(fi));
|
30
|
+
tst_int_equal(line_num, tc, is_compressed, fi_is_compressed(fi));
|
29
31
|
tst_int_equal(line_num, tc, is_indexed, fi_is_indexed(fi));
|
30
32
|
tst_int_equal(line_num, tc, is_tokenized, fi_is_tokenized(fi));
|
31
33
|
tst_int_equal(line_num, tc, omit_norms, fi_omit_norms(fi));
|
@@ -45,17 +47,20 @@ static void test_fi_new(TestCase *tc, void *data)
|
|
45
47
|
FrtFieldInfo *fi;
|
46
48
|
(void)data; /* suppress unused argument warning */
|
47
49
|
fi = frt_fi_new(rb_intern("name"), FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
48
|
-
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, F, F, F, F, F, F, F);
|
50
|
+
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, F, F, F, F, F, F, F, F);
|
49
51
|
frt_fi_deref(fi);
|
50
52
|
fi = frt_fi_new(rb_intern("name"), FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
|
51
|
-
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, T, T, T, F, T, F, F);
|
53
|
+
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, T, F, T, T, F, T, F, F);
|
54
|
+
frt_fi_deref(fi);
|
55
|
+
fi = frt_fi_new(rb_intern("name"), FRT_STORE_COMPRESS, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_POSITIONS);
|
56
|
+
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, T, T, T, F, F, T, T, F);
|
52
57
|
frt_fi_deref(fi);
|
53
58
|
fi = frt_fi_new(rb_intern("name"), FRT_STORE_NO, FRT_INDEX_YES_OMIT_NORMS, FRT_TERM_VECTOR_WITH_OFFSETS);
|
54
|
-
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, F, T, T, T, T, F, T);
|
59
|
+
do_field_prop_test(tc, fi, rb_intern("name"), 1.0, F, F, T, T, T, T, F, T);
|
55
60
|
frt_fi_deref(fi);
|
56
61
|
fi = frt_fi_new(rb_intern("name"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED_OMIT_NORMS, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
|
57
62
|
fi->boost = 1000.0;
|
58
|
-
do_field_prop_test(tc, fi, rb_intern("name"), 1000.0, F, T, F, T, T, T, T);
|
63
|
+
do_field_prop_test(tc, fi, rb_intern("name"), 1000.0, F, F, T, F, T, T, T, T);
|
59
64
|
frt_fi_deref(fi);
|
60
65
|
}
|
61
66
|
|
@@ -75,7 +80,7 @@ static void test_fis_basic(TestCase *tc, void *data)
|
|
75
80
|
fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
76
81
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("FFFFFFFF"), FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO));
|
77
82
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("TFTTFTFF"), FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES));
|
78
|
-
frt_fis_add_field(fis, frt_fi_new(rb_intern("TTTFFTTF"),
|
83
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("TTTFFTTF"), FRT_STORE_COMPRESS, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_POSITIONS));
|
79
84
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("FFTTTTFT"), FRT_STORE_NO, FRT_INDEX_YES_OMIT_NORMS, FRT_TERM_VECTOR_WITH_OFFSETS));
|
80
85
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("FFTFTTTT"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED_OMIT_NORMS, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS));
|
81
86
|
|
@@ -113,11 +118,11 @@ static void test_fis_basic(TestCase *tc, void *data)
|
|
113
118
|
fis->fields[3]->boost = 4.0;
|
114
119
|
fis->fields[4]->boost = 5.0;
|
115
120
|
|
116
|
-
do_field_prop_test(tc, fis->fields[0], rb_intern("FFFFFFFF"), 1.0, F, F, F, F, F, F, F);
|
117
|
-
do_field_prop_test(tc, fis->fields[1], rb_intern("TFTTFTFF"), 2.0, T, T, T, F, T, F, F);
|
118
|
-
do_field_prop_test(tc, fis->fields[2], rb_intern("TTTFFTTF"), 3.0, T, T, F, F, T, T, F);
|
119
|
-
do_field_prop_test(tc, fis->fields[3], rb_intern("FFTTTTFT"), 4.0, F, T, T, T, T, F, T);
|
120
|
-
do_field_prop_test(tc, fis->fields[4], rb_intern("FFTFTTTT"), 5.0, F, T, F, T, T, T, T);
|
121
|
+
do_field_prop_test(tc, fis->fields[0], rb_intern("FFFFFFFF"), 1.0, F, F, F, F, F, F, F, F);
|
122
|
+
do_field_prop_test(tc, fis->fields[1], rb_intern("TFTTFTFF"), 2.0, T, F, T, T, F, T, F, F);
|
123
|
+
do_field_prop_test(tc, fis->fields[2], rb_intern("TTTFFTTF"), 3.0, T, T, T, F, F, T, T, F);
|
124
|
+
do_field_prop_test(tc, fis->fields[3], rb_intern("FFTTTTFT"), 4.0, F, F, T, T, T, T, F, T);
|
125
|
+
do_field_prop_test(tc, fis->fields[4], rb_intern("FFTFTTTT"), 5.0, F, F, T, F, T, T, T, T);
|
121
126
|
|
122
127
|
frt_fis_deref(fis);
|
123
128
|
}
|
@@ -128,26 +133,26 @@ static void test_fis_with_default(TestCase *tc, void *data)
|
|
128
133
|
(void)data; /* suppress unused argument warning */
|
129
134
|
|
130
135
|
fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
|
131
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, F, F, F, F, F, F, F);
|
132
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("dave")), rb_intern("dave"), 1.0, F, F, F, F, F, F, F);
|
133
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("wert")), rb_intern("wert"), 1.0, F, F, F, F, F, F, F);
|
134
|
-
do_field_prop_test(tc, fis->fields[0], rb_intern("name"), 1.0, F, F, F, F, F, F, F);
|
135
|
-
do_field_prop_test(tc, fis->fields[1], rb_intern("dave"), 1.0, F, F, F, F, F, F, F);
|
136
|
-
do_field_prop_test(tc, fis->fields[2], rb_intern("wert"), 1.0, F, F, F, F, F, F, F);
|
136
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, F, F, F, F, F, F, F, F);
|
137
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("dave")), rb_intern("dave"), 1.0, F, F, F, F, F, F, F, F);
|
138
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("wert")), rb_intern("wert"), 1.0, F, F, F, F, F, F, F, F);
|
139
|
+
do_field_prop_test(tc, fis->fields[0], rb_intern("name"), 1.0, F, F, F, F, F, F, F, F);
|
140
|
+
do_field_prop_test(tc, fis->fields[1], rb_intern("dave"), 1.0, F, F, F, F, F, F, F, F);
|
141
|
+
do_field_prop_test(tc, fis->fields[2], rb_intern("wert"), 1.0, F, F, F, F, F, F, F, F);
|
137
142
|
Apnull(frt_fis_get_field(fis, rb_intern("random")));
|
138
143
|
frt_fis_deref(fis);
|
139
144
|
|
140
145
|
fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
|
141
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, T, T, T, F, T, F, F);
|
146
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, T, F, T, T, F, T, F, F);
|
142
147
|
frt_fis_deref(fis);
|
143
|
-
fis = frt_fis_new(
|
144
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, T, T, F, F, T, T, F);
|
148
|
+
fis = frt_fis_new(FRT_STORE_COMPRESS, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_POSITIONS);
|
149
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, T, T, T, F, F, T, T, F);
|
145
150
|
frt_fis_deref(fis);
|
146
151
|
fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_YES_OMIT_NORMS, FRT_TERM_VECTOR_WITH_OFFSETS);
|
147
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, F, T, T, T, T, F, T);
|
152
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, F, F, T, T, T, T, F, T);
|
148
153
|
frt_fis_deref(fis);
|
149
154
|
fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_UNTOKENIZED_OMIT_NORMS, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
|
150
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, F, T, F, T, T, T, T);
|
155
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("name")), rb_intern("name"), 1.0, F, F, T, F, T, T, T, T);
|
151
156
|
frt_fis_deref(fis);
|
152
157
|
}
|
153
158
|
|
@@ -164,22 +169,23 @@ static void test_fis_rw(TestCase *tc, void *data)
|
|
164
169
|
FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
|
165
170
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("FFFFFFFF"), FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO));
|
166
171
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("TFTTFTFF"), FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES));
|
172
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("TTTFFTTF"), FRT_STORE_COMPRESS, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_POSITIONS));
|
167
173
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("FFTTTTFT"), FRT_STORE_NO, FRT_INDEX_YES_OMIT_NORMS, FRT_TERM_VECTOR_WITH_OFFSETS));
|
168
174
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("FFTFTTTT"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED_OMIT_NORMS, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS));
|
169
|
-
fis->fields[
|
170
|
-
fis->fields[
|
171
|
-
fis->fields[
|
172
|
-
fis->fields[
|
175
|
+
fis->fields[1]->boost = 2.0;
|
176
|
+
fis->fields[2]->boost = 3.0;
|
177
|
+
fis->fields[3]->boost = 4.0;
|
178
|
+
fis->fields[4]->boost = 5.0;
|
173
179
|
os = store->new_output(store, "fields");
|
174
180
|
frt_fis_write(fis, os);
|
175
181
|
frt_os_close(os);
|
176
182
|
|
177
183
|
/* these fields won't be saved be will added again later */
|
178
|
-
Aiequal(4, fis->size);
|
179
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("new_field")), rb_intern("new_field"), 1.0, T, T, F, T, T, T, T);
|
180
184
|
Aiequal(5, fis->size);
|
181
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("
|
185
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("new_field")), rb_intern("new_field"), 1.0, T, F, T, F, T, T, T, T);
|
182
186
|
Aiequal(6, fis->size);
|
187
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("another")), rb_intern("another"), 1.0, T, F, T, F, T, T, T, T);
|
188
|
+
Aiequal(7, fis->size);
|
183
189
|
|
184
190
|
frt_fis_deref(fis);
|
185
191
|
|
@@ -190,15 +196,16 @@ static void test_fis_rw(TestCase *tc, void *data)
|
|
190
196
|
Aiequal(FRT_INDEX_UNTOKENIZED_OMIT_NORMS, fis->index);
|
191
197
|
Aiequal(FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS, fis->term_vector);
|
192
198
|
|
193
|
-
do_field_prop_test(tc, fis->fields[0], rb_intern("FFFFFFFF"),
|
194
|
-
do_field_prop_test(tc, fis->fields[1], rb_intern("TFTTFTFF"),
|
195
|
-
do_field_prop_test(tc, fis->fields[2], rb_intern("
|
196
|
-
do_field_prop_test(tc, fis->fields[3], rb_intern("
|
197
|
-
|
198
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("new_field")), rb_intern("new_field"), 1.0, T, T, F, T, T, T, T);
|
199
|
+
do_field_prop_test(tc, fis->fields[0], rb_intern("FFFFFFFF"), 1.0, F, F, F, F, F, F, F, F);
|
200
|
+
do_field_prop_test(tc, fis->fields[1], rb_intern("TFTTFTFF"), 2.0, T, F, T, T, F, T, F, F);
|
201
|
+
do_field_prop_test(tc, fis->fields[2], rb_intern("TTTFFTTF"), 3.0, T, T, T, F, F, T, T, F);
|
202
|
+
do_field_prop_test(tc, fis->fields[3], rb_intern("FFTTTTFT"), 4.0, F, F, T, T, T, T, F, T);
|
203
|
+
do_field_prop_test(tc, fis->fields[4], rb_intern("FFTFTTTT"), 5.0, F, F, T, F, T, T, T, T);
|
199
204
|
Aiequal(5, fis->size);
|
200
|
-
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("
|
205
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("new_field")), rb_intern("new_field"), 1.0, T, F, T, F, T, T, T, T);
|
201
206
|
Aiequal(6, fis->size);
|
207
|
+
do_field_prop_test(tc, frt_fis_get_or_add_field(fis, rb_intern("another")), rb_intern("another"), 1.0, T, F, T, F, T, T, T, T);
|
208
|
+
Aiequal(7, fis->size);
|
202
209
|
str = frt_fis_to_s(fis);
|
203
210
|
Asequal("default:\n"
|
204
211
|
" store: :yes\n"
|
@@ -206,15 +213,20 @@ static void test_fis_rw(TestCase *tc, void *data)
|
|
206
213
|
" term_vector: :with_positions_offsets\n"
|
207
214
|
"fields:\n"
|
208
215
|
" FFFFFFFF:\n"
|
209
|
-
" boost:
|
216
|
+
" boost: 1.000000\n"
|
210
217
|
" store: :no\n"
|
211
218
|
" index: :no\n"
|
212
219
|
" term_vector: :no\n"
|
213
220
|
" TFTTFTFF:\n"
|
214
|
-
" boost:
|
221
|
+
" boost: 2.000000\n"
|
215
222
|
" store: :yes\n"
|
216
223
|
" index: :yes\n"
|
217
224
|
" term_vector: :yes\n"
|
225
|
+
" TTTFFTTF:\n"
|
226
|
+
" boost: 3.000000\n"
|
227
|
+
" store: :compressed\n"
|
228
|
+
" index: :untokenized\n"
|
229
|
+
" term_vector: :with_positions\n"
|
218
230
|
" FFTTTTFT:\n"
|
219
231
|
" boost: 4.000000\n"
|
220
232
|
" store: :no\n"
|
@@ -302,7 +314,7 @@ static FrtFieldInfos *prepare_fis()
|
|
302
314
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("ignored"), FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO));
|
303
315
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("unstored"), FRT_STORE_NO, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS));
|
304
316
|
frt_fis_add_field(fis, frt_fi_new(rb_intern("stored"), FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES));
|
305
|
-
frt_fis_add_field(fis, frt_fi_new(rb_intern("stored_array"),
|
317
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("stored_array"), FRT_STORE_COMPRESS, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_NO));
|
306
318
|
return fis;
|
307
319
|
}
|
308
320
|
|
@@ -327,8 +339,8 @@ static void test_fields_rw_single(TestCase *tc, void *data)
|
|
327
339
|
frt_doc_destroy(doc);
|
328
340
|
|
329
341
|
Aiequal(6, fis->size);
|
330
|
-
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("binary")), rb_intern("binary"), 1.0, T, T, T, F, F, F, F);
|
331
|
-
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("array")), rb_intern("array"), 1.0, T, T, T, F, F, F, F);
|
342
|
+
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("binary")), rb_intern("binary"), 1.0, T, F, T, T, F, F, F, F);
|
343
|
+
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("array")), rb_intern("array"), 1.0, T, F, T, T, F, F, F, F);
|
332
344
|
|
333
345
|
fr = frt_fr_open(store, "_0", fis);
|
334
346
|
doc = frt_fr_get_doc(fr, 0);
|
@@ -401,12 +413,12 @@ static void test_fields_rw_multi(TestCase *tc, void *data)
|
|
401
413
|
frt_fw_close(fw);
|
402
414
|
|
403
415
|
Aiequal(106, fis->size);
|
404
|
-
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("binary")), rb_intern("binary"), 1.0, T, T, T, F, F, F, F);
|
405
|
-
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("array")), rb_intern("array"), 1.0, T, T, T, F, F, F, F);
|
416
|
+
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("binary")), rb_intern("binary"), 1.0, T, F, T, T, F, F, F, F);
|
417
|
+
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern("array")), rb_intern("array"), 1.0, T, F, T, T, F, F, F, F);
|
406
418
|
for (i = 0; i < 100; i++) {
|
407
419
|
char buf[100];
|
408
420
|
sprintf(buf, "<<%d>>", i);
|
409
|
-
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern(buf)), rb_intern(buf), 1.0, T, T, T, F, F, F, F);
|
421
|
+
do_field_prop_test(tc, frt_fis_get_field(fis, rb_intern(buf)), rb_intern(buf), 1.0, T, F, T, T, F, F, F, F);
|
410
422
|
}
|
411
423
|
|
412
424
|
fr = frt_fr_open(store, "_as3", fis);
|
@@ -1365,7 +1365,7 @@ static ReaderTestEnvironment *reader_test_env_new(int type)
|
|
1365
1365
|
FRT_TERM_VECTOR_NO));
|
1366
1366
|
} else if (compressed_field == df->name) {
|
1367
1367
|
frt_fis_add_field(fis, frt_fi_new(compressed_field,
|
1368
|
-
|
1368
|
+
FRT_STORE_COMPRESS,
|
1369
1369
|
FRT_INDEX_YES,
|
1370
1370
|
FRT_TERM_VECTOR_NO));
|
1371
1371
|
}
|
@@ -1428,6 +1428,8 @@ static void write_ir_test_docs(FrtStore *store)
|
|
1428
1428
|
FRT_TERM_VECTOR_NO));
|
1429
1429
|
frt_fis_add_field(fis, frt_fi_new(text, FRT_STORE_NO, FRT_INDEX_YES,
|
1430
1430
|
FRT_TERM_VECTOR_NO));
|
1431
|
+
frt_fis_add_field(fis, frt_fi_new(compressed_field, FRT_STORE_COMPRESS, FRT_INDEX_YES,
|
1432
|
+
FRT_TERM_VECTOR_NO));
|
1431
1433
|
frt_index_create(store, fis);
|
1432
1434
|
frt_fis_deref(fis);
|
1433
1435
|
config.max_buffered_docs = 5;
|
@@ -2123,6 +2125,7 @@ static void test_ir_multivalue_fields(TestCase *tc, void *data)
|
|
2123
2125
|
|
2124
2126
|
fi = frt_fis_get_field(iw->fis, tag);
|
2125
2127
|
Aiequal(true, fi_is_stored(fi));
|
2128
|
+
Aiequal(false, fi_is_compressed(fi));
|
2126
2129
|
Aiequal(true, fi_is_indexed(fi));
|
2127
2130
|
Aiequal(true, fi_is_tokenized(fi));
|
2128
2131
|
Aiequal(true, fi_has_norms(fi));
|