isomorfeus-ferret 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +612 -0
- data/README.md +44 -0
- data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
- data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
- data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
- data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
- data/ext/isomorfeus_ferret_ext/email.rl +21 -0
- data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
- data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
- data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
- data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
- data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
- data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
- data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
- data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
- data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
- data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
- data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
- data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
- data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
- data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
- data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
- data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
- data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
- data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
- data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
- data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
- data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
- data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
- data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
- data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
- data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
- data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
- data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
- data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
- data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
- data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
- data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
- data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
- data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
- data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
- data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
- data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
- data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
- data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
- data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
- data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
- data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
- data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
- data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
- data/ext/isomorfeus_ferret_ext/test.c +850 -0
- data/ext/isomorfeus_ferret_ext/test.h +416 -0
- data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
- data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
- data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
- data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
- data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
- data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
- data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
- data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
- data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
- data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
- data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
- data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
- data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
- data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
- data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
- data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
- data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
- data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
- data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
- data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
- data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
- data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
- data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
- data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
- data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
- data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
- data/ext/isomorfeus_ferret_ext/url.rl +27 -0
- data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
- data/lib/isomorfeus/ferret/document.rb +132 -0
- data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
- data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
- data/lib/isomorfeus/ferret/index/index.rb +970 -0
- data/lib/isomorfeus/ferret/monitor.rb +323 -0
- data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
- data/lib/isomorfeus/ferret/version.rb +5 -0
- data/lib/isomorfeus-ferret.rb +8 -0
- metadata +307 -0
@@ -0,0 +1,373 @@
|
|
1
|
+
#include "frt_index.h"
|
2
|
+
#include "test.h"
|
3
|
+
|
4
|
+
#define NUM_TERMS 100
|
5
|
+
#define TERM_LEN 10
|
6
|
+
|
7
|
+
|
8
|
+
static void test_posting(TestCase *tc, void *data)
|
9
|
+
{
|
10
|
+
FrtMemoryPool *mp = (FrtMemoryPool *)data;
|
11
|
+
FrtPostingList *pl;
|
12
|
+
FrtPosting *p = frt_p_new(mp, 0, 10);
|
13
|
+
Aiequal(0, p->doc_num);
|
14
|
+
Aiequal(1, p->freq);
|
15
|
+
Aiequal(10, p->first_occ->pos);
|
16
|
+
Apnull(p->first_occ->next);
|
17
|
+
|
18
|
+
pl = frt_pl_new(mp, "seven", 5, p);
|
19
|
+
Aiequal(5, pl->term_len);
|
20
|
+
Asequal("seven", pl->term);
|
21
|
+
Apequal(p->first_occ, pl->last_occ);
|
22
|
+
|
23
|
+
frt_pl_add_occ(mp, pl, 50);
|
24
|
+
Apequal(pl->last_occ, p->first_occ->next);
|
25
|
+
Aiequal(2, p->freq);
|
26
|
+
Aiequal(50, pl->last_occ->pos);
|
27
|
+
Apnull(pl->last_occ->next);
|
28
|
+
|
29
|
+
frt_pl_add_occ(mp, pl, 345);
|
30
|
+
Apequal(pl->last_occ, p->first_occ->next->next);
|
31
|
+
Aiequal(3, p->freq);
|
32
|
+
Aiequal(345, pl->last_occ->pos);
|
33
|
+
Apnull(pl->last_occ->next);
|
34
|
+
}
|
35
|
+
|
36
|
+
static FrtFieldInfos *create_tv_fis()
|
37
|
+
{
|
38
|
+
FrtFieldInfos *fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_NO);
|
39
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("tv"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_YES));
|
40
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("tv2"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_YES));
|
41
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("tv_with_positions"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_POSITIONS));
|
42
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("tv_with_offsets"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_OFFSETS));
|
43
|
+
frt_fis_add_field(fis, frt_fi_new(rb_intern("tv_with_positions_offsets"), FRT_STORE_NO, FRT_INDEX_UNTOKENIZED, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS));
|
44
|
+
return fis;
|
45
|
+
}
|
46
|
+
|
47
|
+
static char **create_tv_terms(FrtMemoryPool *mp)
|
48
|
+
{
|
49
|
+
int i;
|
50
|
+
char term_buf[10];
|
51
|
+
char **terms = FRT_MP_ALLOC_N(mp, char *, NUM_TERMS);
|
52
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
53
|
+
sprintf(term_buf, "%09d", i);
|
54
|
+
terms[i] = frt_mp_strdup(mp, term_buf);
|
55
|
+
}
|
56
|
+
return terms;
|
57
|
+
}
|
58
|
+
|
59
|
+
static FrtPostingList **create_tv_plists(FrtMemoryPool *mp, char **terms)
|
60
|
+
{
|
61
|
+
int i, j;
|
62
|
+
FrtPostingList **plists, *pl;
|
63
|
+
plists = FRT_MP_ALLOC_N(mp, FrtPostingList *, NUM_TERMS);
|
64
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
65
|
+
pl = plists[i] =
|
66
|
+
frt_pl_new(mp, terms[i], 9, frt_p_new(mp, 0, 0));
|
67
|
+
for (j = 1; j <= i; j++) {
|
68
|
+
frt_pl_add_occ(mp, pl, j);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
return plists;
|
72
|
+
}
|
73
|
+
|
74
|
+
static FrtOffset *create_tv_offsets(FrtMemoryPool *mp)
|
75
|
+
{
|
76
|
+
int i;
|
77
|
+
FrtOffset *offsets = FRT_MP_ALLOC_N(mp, FrtOffset, NUM_TERMS);
|
78
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
79
|
+
offsets[i].start = 5 * i;
|
80
|
+
offsets[i].end = 5 * i + 4;
|
81
|
+
}
|
82
|
+
return offsets;
|
83
|
+
}
|
84
|
+
|
85
|
+
static void test_tv_single_doc(TestCase *tc, void *data)
|
86
|
+
{
|
87
|
+
int i, j;
|
88
|
+
FrtStore *store = frt_open_ram_store();
|
89
|
+
FrtMemoryPool *mp = (FrtMemoryPool *)data;
|
90
|
+
FrtFieldsReader *fr;
|
91
|
+
FrtFieldsWriter *fw;
|
92
|
+
FrtTermVector *tv;
|
93
|
+
FrtHash *tvs;
|
94
|
+
FrtFieldInfos *fis = create_tv_fis();
|
95
|
+
char **terms = create_tv_terms(mp);
|
96
|
+
FrtPostingList **plists = create_tv_plists(mp, terms);
|
97
|
+
FrtOffset *offsets = create_tv_offsets(mp);
|
98
|
+
FrtDocument *doc = frt_doc_new();
|
99
|
+
|
100
|
+
fw = frt_fw_open(store, "_0", fis);
|
101
|
+
frt_fw_close(fw);
|
102
|
+
|
103
|
+
fr = frt_fr_open(store, "_0", fis);
|
104
|
+
Aiequal(0, fr->size);
|
105
|
+
frt_fr_close(fr);
|
106
|
+
|
107
|
+
|
108
|
+
fw = frt_fw_open(store, "_0", fis);
|
109
|
+
frt_fw_add_doc(fw, doc);
|
110
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
111
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_positions"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
112
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_offsets"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
113
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
114
|
+
frt_fw_write_tv_index(fw);
|
115
|
+
frt_fw_close(fw);
|
116
|
+
frt_doc_destroy(doc);
|
117
|
+
|
118
|
+
fr = frt_fr_open(store, "_0", fis);
|
119
|
+
Aiequal(1, fr->size);
|
120
|
+
|
121
|
+
/* test individual field's term vectors */
|
122
|
+
tv = frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv"))->number);
|
123
|
+
if (Apnotnull(tv)) {
|
124
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv"))->number, tv->field_num);
|
125
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
126
|
+
Aiequal(0, tv->offset_cnt);
|
127
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
128
|
+
Asequal(terms[i], tv->terms[i].text);
|
129
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
130
|
+
Apnull(tv->terms[i].positions);
|
131
|
+
}
|
132
|
+
Apnull(tv->offsets);
|
133
|
+
}
|
134
|
+
if (tv) frt_tv_destroy(tv);
|
135
|
+
|
136
|
+
tv = frt_fr_get_field_tv(fr, 0,
|
137
|
+
frt_fis_get_field(fis, rb_intern("tv_with_positions"))->number);
|
138
|
+
if (Apnotnull(tv)) {
|
139
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv_with_positions"))->number,
|
140
|
+
tv->field_num);
|
141
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
142
|
+
Aiequal(0, tv->offset_cnt);
|
143
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
144
|
+
Asequal(terms[i], tv->terms[i].text);
|
145
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
146
|
+
for (j = 0; j <= i; j++) {
|
147
|
+
Aiequal(j, tv->terms[i].positions[j]);
|
148
|
+
}
|
149
|
+
}
|
150
|
+
Apnull(tv->offsets);
|
151
|
+
}
|
152
|
+
if (tv) frt_tv_destroy(tv);
|
153
|
+
|
154
|
+
tv = frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv_with_offsets"))->number);
|
155
|
+
if (Apnotnull(tv)) {
|
156
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv_with_offsets"))->number, tv->field_num);
|
157
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
158
|
+
Aiequal(NUM_TERMS, tv->offset_cnt);
|
159
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
160
|
+
Asequal(terms[i], tv->terms[i].text);
|
161
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
162
|
+
Apnull(tv->terms[i].positions);
|
163
|
+
}
|
164
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
165
|
+
Aiequal(i * 5, tv->offsets[i].start);
|
166
|
+
Aiequal(i * 5 + 4, tv->offsets[i].end);
|
167
|
+
}
|
168
|
+
}
|
169
|
+
if (tv) frt_tv_destroy(tv);
|
170
|
+
|
171
|
+
tv = frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number);
|
172
|
+
if (Apnotnull(tv)) {
|
173
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number, tv->field_num);
|
174
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
175
|
+
Aiequal(NUM_TERMS, tv->offset_cnt);
|
176
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
177
|
+
Asequal(terms[i], tv->terms[i].text);
|
178
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
179
|
+
for (j = 1; j <= i; j++) {
|
180
|
+
Aiequal(j, tv->terms[i].positions[j]);
|
181
|
+
}
|
182
|
+
}
|
183
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
184
|
+
Aiequal(i * 5, tv->offsets[i].start);
|
185
|
+
Aiequal(i * 5 + 4, tv->offsets[i].end);
|
186
|
+
}
|
187
|
+
}
|
188
|
+
if (tv) frt_tv_destroy(tv);
|
189
|
+
|
190
|
+
tv = frt_fr_get_field_tv(fr, 0, frt_fis_get_or_add_field(fis, rb_intern("tv2"))->number);
|
191
|
+
Apnull(tv);
|
192
|
+
tv = frt_fr_get_field_tv(fr, 0, frt_fis_get_or_add_field(fis, rb_intern("new"))->number);
|
193
|
+
Apnull(tv);
|
194
|
+
|
195
|
+
/* test document's term vectors */
|
196
|
+
tvs = frt_fr_get_tv(fr, 0);
|
197
|
+
Aiequal(4, tvs->size);
|
198
|
+
tv = (FrtTermVector*)frt_h_get(tvs, (void *)rb_intern("tv2"));
|
199
|
+
Apnull(tv);
|
200
|
+
tv = (FrtTermVector*)frt_h_get(tvs, (void *)rb_intern("other"));
|
201
|
+
Apnull(tv);
|
202
|
+
|
203
|
+
tv = (FrtTermVector*)frt_h_get(tvs, (void *)rb_intern("tv_with_positions_offsets"));
|
204
|
+
if (Apnotnull(tv)) {
|
205
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number, tv->field_num);
|
206
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
207
|
+
Aiequal(NUM_TERMS, tv->offset_cnt);
|
208
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
209
|
+
Asequal(terms[i], tv->terms[i].text);
|
210
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
211
|
+
for (j = 1; j <= i; j++) {
|
212
|
+
Aiequal(j, tv->terms[i].positions[j]);
|
213
|
+
}
|
214
|
+
}
|
215
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
216
|
+
Aiequal(i * 5, tv->offsets[i].start);
|
217
|
+
Aiequal(i * 5 + 4, tv->offsets[i].end);
|
218
|
+
}
|
219
|
+
}
|
220
|
+
frt_h_destroy(tvs);
|
221
|
+
|
222
|
+
frt_fr_close(fr);
|
223
|
+
frt_fis_deref(fis);
|
224
|
+
frt_store_deref(store);
|
225
|
+
}
|
226
|
+
|
227
|
+
static void test_tv_multi_doc(TestCase *tc, void *data)
|
228
|
+
{
|
229
|
+
int i, j;
|
230
|
+
FrtStore *store = frt_open_ram_store();
|
231
|
+
FrtMemoryPool *mp = (FrtMemoryPool *)data;
|
232
|
+
FrtFieldsReader *fr;
|
233
|
+
FrtFieldsWriter *fw;
|
234
|
+
FrtTermVector *tv;
|
235
|
+
FrtHash *tvs;
|
236
|
+
FrtFieldInfos *fis = create_tv_fis();
|
237
|
+
char **terms = create_tv_terms(mp);
|
238
|
+
FrtPostingList **plists = create_tv_plists(mp, terms);
|
239
|
+
FrtOffset *offsets = create_tv_offsets(mp);
|
240
|
+
FrtDocument *doc = frt_doc_new();
|
241
|
+
|
242
|
+
fw = frt_fw_open(store, "_0", fis);
|
243
|
+
frt_fw_add_doc(fw, doc);
|
244
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
245
|
+
frt_fw_write_tv_index(fw); frt_fw_add_doc(fw, doc);
|
246
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_positions"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
247
|
+
frt_fw_write_tv_index(fw); frt_fw_add_doc(fw, doc);
|
248
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_offsets"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
249
|
+
frt_fw_write_tv_index(fw); frt_fw_add_doc(fw, doc);
|
250
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
251
|
+
frt_fw_write_tv_index(fw); frt_fw_add_doc(fw, doc);
|
252
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
253
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_positions"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
254
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_offsets"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
255
|
+
frt_fw_add_postings(fw, frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number, plists, NUM_TERMS, offsets, NUM_TERMS);
|
256
|
+
|
257
|
+
frt_fw_write_tv_index(fw);
|
258
|
+
frt_fw_close(fw);
|
259
|
+
frt_doc_destroy(doc);
|
260
|
+
|
261
|
+
fr = frt_fr_open(store, "_0", fis);
|
262
|
+
Aiequal(5, fr->size);
|
263
|
+
|
264
|
+
tv = frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv"))->number);
|
265
|
+
if (Apnotnull(tv)) {
|
266
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv"))->number, tv->field_num);
|
267
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
268
|
+
Aiequal(0, tv->offset_cnt);
|
269
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
270
|
+
Asequal(terms[i], tv->terms[i].text);
|
271
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
272
|
+
Apnull(tv->terms[i].positions);
|
273
|
+
}
|
274
|
+
Apnull(tv->offsets);
|
275
|
+
}
|
276
|
+
Apnull(frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv_with_positions"))->number));
|
277
|
+
Apnull(frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv_with_offsets"))->number));
|
278
|
+
Apnull(frt_fr_get_field_tv(fr, 0, frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number));
|
279
|
+
frt_tv_destroy(tv);
|
280
|
+
|
281
|
+
tv = frt_fr_get_field_tv(fr, 3, frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number);
|
282
|
+
if (Apnotnull(tv)) {
|
283
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number,
|
284
|
+
tv->field_num);
|
285
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
286
|
+
Aiequal(NUM_TERMS, tv->offset_cnt);
|
287
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
288
|
+
Asequal(terms[i], tv->terms[i].text);
|
289
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
290
|
+
for (j = 1; j <= i; j++) {
|
291
|
+
Aiequal(j, tv->terms[i].positions[j]);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
295
|
+
Aiequal(i * 5, tv->offsets[i].start);
|
296
|
+
Aiequal(i * 5 + 4, tv->offsets[i].end);
|
297
|
+
}
|
298
|
+
}
|
299
|
+
frt_tv_destroy(tv);
|
300
|
+
|
301
|
+
/* test document's term vector */
|
302
|
+
tvs = frt_fr_get_tv(fr, 0);
|
303
|
+
Aiequal(1, tvs->size);
|
304
|
+
frt_h_destroy(tvs);
|
305
|
+
|
306
|
+
tvs = frt_fr_get_tv(fr, 4);
|
307
|
+
Aiequal(4, tvs->size);
|
308
|
+
tv = (FrtTermVector*)frt_h_get(tvs, (void *)rb_intern("tv2"));
|
309
|
+
Apnull(tv);
|
310
|
+
tv = (FrtTermVector*)frt_h_get(tvs, (void *)rb_intern("other"));
|
311
|
+
Apnull(tv);
|
312
|
+
|
313
|
+
tv = (FrtTermVector*)frt_h_get(tvs, (void *)rb_intern("tv_with_positions_offsets"));
|
314
|
+
if (Apnotnull(tv)) {
|
315
|
+
Aiequal(frt_fis_get_field(fis, rb_intern("tv_with_positions_offsets"))->number, tv->field_num);
|
316
|
+
Aiequal(NUM_TERMS, tv->term_cnt);
|
317
|
+
Aiequal(NUM_TERMS, tv->offset_cnt);
|
318
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
319
|
+
Asequal(terms[i], tv->terms[i].text);
|
320
|
+
Aiequal(i + 1, tv->terms[i].freq);
|
321
|
+
for (j = 1; j <= i; j++) {
|
322
|
+
Aiequal(j, tv->terms[i].positions[j]);
|
323
|
+
}
|
324
|
+
}
|
325
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
326
|
+
Aiequal(i * 5, tv->offsets[i].start);
|
327
|
+
Aiequal(i * 5 + 4, tv->offsets[i].end);
|
328
|
+
}
|
329
|
+
}
|
330
|
+
|
331
|
+
for (i = 0; i < NUM_TERMS; i++) {
|
332
|
+
char buf[100];
|
333
|
+
int len = sprintf(buf, "%s", tv->terms[i].text);
|
334
|
+
assert(strlen(tv->terms[i].text) < 100);
|
335
|
+
Aiequal(i, frt_tv_get_term_index(tv, buf));
|
336
|
+
|
337
|
+
/* make the word lexically less than it was but greater than any other
|
338
|
+
* word in the index that originally came before it. */
|
339
|
+
buf[len - 1]--;
|
340
|
+
buf[len ] = '~';
|
341
|
+
buf[len + 1] = '\0';
|
342
|
+
Aiequal(-1, frt_tv_get_term_index(tv, buf));
|
343
|
+
Aiequal(i, frt_tv_scan_to_term_index(tv, buf));
|
344
|
+
|
345
|
+
/* make the word lexically more than it was by less than any other
|
346
|
+
* word in the index that originally came after it. */
|
347
|
+
buf[len - 1]++;
|
348
|
+
buf[len ] = '.';
|
349
|
+
Aiequal(-1, frt_tv_get_term_index(tv, buf));
|
350
|
+
Aiequal(i + 1, frt_tv_scan_to_term_index(tv, buf));
|
351
|
+
}
|
352
|
+
Aiequal(-1, frt_tv_get_term_index(tv, "UnKnOwN TeRm"));
|
353
|
+
frt_h_destroy(tvs);
|
354
|
+
frt_fr_close(fr);
|
355
|
+
frt_fis_deref(fis);
|
356
|
+
frt_store_deref(store);
|
357
|
+
}
|
358
|
+
|
359
|
+
|
360
|
+
TestSuite *ts_term_vectors(TestSuite *suite)
|
361
|
+
{
|
362
|
+
FrtMemoryPool *mp = frt_mp_new();
|
363
|
+
|
364
|
+
suite = ADD_SUITE(suite);
|
365
|
+
|
366
|
+
tst_run_test(suite, test_posting, mp);
|
367
|
+
frt_mp_reset(mp);
|
368
|
+
tst_run_test(suite, test_tv_single_doc, mp);
|
369
|
+
frt_mp_reset(mp);
|
370
|
+
tst_run_test(suite, test_tv_multi_doc, mp);
|
371
|
+
frt_mp_destroy(mp);
|
372
|
+
return suite;
|
373
|
+
}
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#include "frt_except.h"
|
2
|
+
#include "test.h"
|
3
|
+
|
4
|
+
static void raise_eof()
|
5
|
+
{
|
6
|
+
FRT_RAISE(FRT_EOF_ERROR, "Error, end of file");
|
7
|
+
}
|
8
|
+
|
9
|
+
/**
|
10
|
+
* Test the basic test functions
|
11
|
+
*/
|
12
|
+
static void test_asserts(TestCase *tc, void *data)
|
13
|
+
{
|
14
|
+
char *p[10];
|
15
|
+
|
16
|
+
int ia1[3] = { 1, 2, 3 };
|
17
|
+
int ia2[3] = { 1, 2, 3 };
|
18
|
+
|
19
|
+
static const char *sa1[10] = { "one", "two", "three" };
|
20
|
+
static const char *sa2[10] = { "one", "two", "three" };
|
21
|
+
(void)data; /* suppress unused argument warning */
|
22
|
+
|
23
|
+
Aaiequal(ia1, ia2, 3);
|
24
|
+
Aasequal(sa1, sa2, 3);
|
25
|
+
Aiequal(1, 1);
|
26
|
+
Asequal("String One", "String One");
|
27
|
+
Asnequal("String One", "String Two", 7);
|
28
|
+
Apnotnull(p);
|
29
|
+
Apequal(p, p);
|
30
|
+
Atrue(p != NULL);
|
31
|
+
Atrue(1);
|
32
|
+
Atrue(!0);
|
33
|
+
Assert(1 == 1, "%d != %d", 1, 1);
|
34
|
+
Araise(FRT_EOF_ERROR, &raise_eof, NULL);
|
35
|
+
}
|
36
|
+
|
37
|
+
static void raise_nothing(void *nothing)
|
38
|
+
{
|
39
|
+
(void)nothing;
|
40
|
+
}
|
41
|
+
/**
|
42
|
+
* Test test failures. This method isn't called because we want 100% tests
|
43
|
+
* passing but if you want to check the tests work in case of failure, run
|
44
|
+
* this test.
|
45
|
+
*/
|
46
|
+
static void test_failures(TestCase *tc, void *data)
|
47
|
+
{
|
48
|
+
void *q = NULL;
|
49
|
+
void *p = frt_emalloc(10);
|
50
|
+
|
51
|
+
int ia1[3] = { 1, 2, 3 };
|
52
|
+
int ia2[3] = { 1, 2, 4 };
|
53
|
+
|
54
|
+
static const char *sa1[10] = { "one", "two", "three" };
|
55
|
+
static const char *sa2[10] = { "one", "two", "there" };
|
56
|
+
(void)data; /* suppress unused argument warning */
|
57
|
+
|
58
|
+
Aaiequal(ia1, ia2, 3);
|
59
|
+
Aasequal(sa1, sa2, 3);
|
60
|
+
Aiequal(1, 2);
|
61
|
+
Asequal("String One", "String Two");
|
62
|
+
Asnequal("String One", "String Two", 8);
|
63
|
+
Apnotnull(q);
|
64
|
+
Apequal(p, q);
|
65
|
+
Atrue(1 == 2);
|
66
|
+
Assert(1 == 2, "%d != %d", 1, 2);
|
67
|
+
Atrue(0);
|
68
|
+
free(p);
|
69
|
+
Araise(EXCEPTION, &raise_eof, NULL);
|
70
|
+
Araise(EXCEPTION, &raise_nothing, NULL);
|
71
|
+
}
|
72
|
+
|
73
|
+
TestSuite *ts_test(TestSuite *suite)
|
74
|
+
{
|
75
|
+
suite = ADD_SUITE(suite);
|
76
|
+
|
77
|
+
tst_run_test(suite, test_asserts, NULL);
|
78
|
+
if (false) {
|
79
|
+
tst_run_test(suite, test_failures, NULL);
|
80
|
+
}
|
81
|
+
|
82
|
+
return suite;
|
83
|
+
}
|
@@ -0,0 +1,188 @@
|
|
1
|
+
#include "frt_global.h"
|
2
|
+
#include "frt_search.h"
|
3
|
+
#include "frt_ind.h"
|
4
|
+
#include "testhelper.h"
|
5
|
+
#include "test.h"
|
6
|
+
#include "pthread.h"
|
7
|
+
|
8
|
+
extern char *num_to_str(int num);
|
9
|
+
|
10
|
+
#define test_num(n, expected) num = num_to_str(n); Asequal(expected, num); free(num)
|
11
|
+
|
12
|
+
static void test_number_to_str(TestCase *tc, void *data)
|
13
|
+
{
|
14
|
+
char *num;
|
15
|
+
(void)data;
|
16
|
+
test_num(0, "zero");
|
17
|
+
test_num(9, "nine");
|
18
|
+
test_num(10, "ten");
|
19
|
+
test_num(13, "thirteen");
|
20
|
+
test_num(19, "nineteen");
|
21
|
+
test_num(20, "twenty");
|
22
|
+
test_num(21, "twenty one");
|
23
|
+
test_num(99, "ninety nine");
|
24
|
+
test_num(100, "one hundred");
|
25
|
+
test_num(101, "one hundred and one");
|
26
|
+
test_num(111, "one hundred and eleven");
|
27
|
+
test_num(1111, "one thousand one hundred and eleven");
|
28
|
+
test_num(22222, "twenty two thousand two hundred and twenty two");
|
29
|
+
test_num(333333, "three hundred and thirty three thousand three hundred and thirty three");
|
30
|
+
test_num(8712387, "eight million seven hundred and twelve thousand three hundred and eighty seven");
|
31
|
+
test_num(1000000000, "one billion");
|
32
|
+
test_num(-8712387, "negative eight million seven hundred and twelve thousand three hundred and eighty seven");
|
33
|
+
|
34
|
+
}
|
35
|
+
|
36
|
+
void dummy_log(const void *fmt, ...) {(void)fmt;}
|
37
|
+
#define ITERATIONS 10
|
38
|
+
#define NTHREADS 10
|
39
|
+
#ifdef FRT_HAS_VARARGS
|
40
|
+
#define tlog(...)
|
41
|
+
#else
|
42
|
+
#define tlog dummy_log
|
43
|
+
#endif
|
44
|
+
/*#define tlog printf */
|
45
|
+
|
46
|
+
static void do_optimize(FrtIndex *index)
|
47
|
+
{
|
48
|
+
tlog("Optimizing the index\n");
|
49
|
+
frt_index_optimize(index);
|
50
|
+
}
|
51
|
+
|
52
|
+
static void do_delete_doc(FrtIndex *index)
|
53
|
+
{
|
54
|
+
int size;
|
55
|
+
if ((size = frt_index_size(index)) > 0) {
|
56
|
+
int doc_num = rand() % size;
|
57
|
+
tlog("Deleting %d from index which has%s deletions\n",
|
58
|
+
doc_num, (frt_index_has_del(index) ? "" : " no"));
|
59
|
+
if (frt_index_is_deleted(index, doc_num)) {
|
60
|
+
tlog("document was already deleted\n");
|
61
|
+
} else {
|
62
|
+
frt_index_delete(index, doc_num);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
ID id;
|
68
|
+
ID contents;
|
69
|
+
|
70
|
+
static void do_add_doc(FrtIndex *index)
|
71
|
+
{
|
72
|
+
FrtDocument *doc = frt_doc_new();
|
73
|
+
int n = rand();
|
74
|
+
|
75
|
+
frt_doc_add_field(doc, frt_df_add_data(frt_df_new(id), frt_strfmt("%d", n)))->destroy_data = true;
|
76
|
+
frt_doc_add_field(doc, frt_df_add_data(frt_df_new(contents), num_to_str(n)))->destroy_data = true;
|
77
|
+
tlog("Adding %d\n", n);
|
78
|
+
frt_index_add_doc(index, doc);
|
79
|
+
frt_doc_destroy(doc);
|
80
|
+
}
|
81
|
+
|
82
|
+
static void do_search(FrtIndex *index)
|
83
|
+
{
|
84
|
+
int n = rand(), i;
|
85
|
+
char *query = num_to_str(n);
|
86
|
+
FrtTopDocs *td;
|
87
|
+
|
88
|
+
tlog("Searching for %d\n", n);
|
89
|
+
|
90
|
+
frt_mutex_lock(&index->mutex);
|
91
|
+
td = frt_index_search_str(index, query, 0, 3, NULL, NULL, NULL);
|
92
|
+
free(query);
|
93
|
+
for (i = 0; i < td->size; i++) {
|
94
|
+
FrtHit *hit = td->hits[i];
|
95
|
+
FrtDocument *doc = frt_index_get_doc(index, hit->doc);
|
96
|
+
tlog("Hit for %d: %s - %f\n", hit->doc, frt_doc_get_field(doc, id)->data[0], hit->score);
|
97
|
+
frt_doc_destroy(doc);
|
98
|
+
}
|
99
|
+
tlog("Searched for %d: total = %d\n", n, td->total_hits);
|
100
|
+
frt_mutex_unlock(&index->mutex);
|
101
|
+
|
102
|
+
frt_td_destroy(td);
|
103
|
+
}
|
104
|
+
|
105
|
+
static void *indexing_thread(void *p)
|
106
|
+
{
|
107
|
+
int i, choice;
|
108
|
+
FrtIndex *index = (FrtIndex *)p;
|
109
|
+
|
110
|
+
for (i = 0; i < ITERATIONS; i++) {
|
111
|
+
choice = rand() % 1000;
|
112
|
+
|
113
|
+
if (choice > 999) {
|
114
|
+
do_optimize(index);
|
115
|
+
} else if (choice > 900) {
|
116
|
+
do_delete_doc(index);
|
117
|
+
} else if (choice > 700) {
|
118
|
+
do_search(index);
|
119
|
+
} else {
|
120
|
+
do_add_doc(index);
|
121
|
+
}
|
122
|
+
}
|
123
|
+
return NULL;
|
124
|
+
}
|
125
|
+
|
126
|
+
static void test_threading_test(TestCase *tc, void *data)
|
127
|
+
{
|
128
|
+
FrtIndex *index = (FrtIndex *)data;
|
129
|
+
(void)data;
|
130
|
+
(void)tc;
|
131
|
+
indexing_thread(index);
|
132
|
+
}
|
133
|
+
|
134
|
+
static void test_threading(TestCase *tc, void *data)
|
135
|
+
{
|
136
|
+
int i;
|
137
|
+
pthread_t thread_id[NTHREADS];
|
138
|
+
FrtIndex *index = (FrtIndex *)data;
|
139
|
+
(void)data;
|
140
|
+
(void)tc;
|
141
|
+
|
142
|
+
for(i=0; i < NTHREADS; i++) {
|
143
|
+
pthread_create(&thread_id[i], NULL, &indexing_thread, index );
|
144
|
+
}
|
145
|
+
|
146
|
+
for(i=0; i < NTHREADS; i++) {
|
147
|
+
pthread_join(thread_id[i], NULL);
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
TestSuite *ts_threading(TestSuite *suite)
|
152
|
+
{
|
153
|
+
id = rb_intern("id");
|
154
|
+
contents = rb_intern("contents");
|
155
|
+
|
156
|
+
FrtAnalyzer *a = frt_letter_analyzer_new(true);
|
157
|
+
FrtStore *store = frt_open_fs_store("./test/testdir/store");
|
158
|
+
FrtIndex *index;
|
159
|
+
FrtHashSet *def_fields = frt_hs_new_ptr(NULL);
|
160
|
+
FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES,
|
161
|
+
FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
|
162
|
+
frt_fis_add_field(fis, frt_fi_new(id, FRT_STORE_YES, FRT_INDEX_UNTOKENIZED,
|
163
|
+
FRT_TERM_VECTOR_YES));
|
164
|
+
frt_index_create(store, fis);
|
165
|
+
frt_fis_deref(fis);
|
166
|
+
|
167
|
+
frt_hs_add(def_fields, (void *)contents);
|
168
|
+
store->clear_all(store);
|
169
|
+
index = frt_index_new(store, a, def_fields, true);
|
170
|
+
frt_hs_destroy(def_fields);
|
171
|
+
|
172
|
+
suite = ADD_SUITE(suite);
|
173
|
+
|
174
|
+
frt_store_deref(store);
|
175
|
+
frt_a_deref(a);
|
176
|
+
|
177
|
+
tst_run_test(suite, test_number_to_str, NULL);
|
178
|
+
tst_run_test(suite, test_threading_test, index);
|
179
|
+
// tst_run_test(suite, test_threading, index);
|
180
|
+
|
181
|
+
frt_index_destroy(index);
|
182
|
+
|
183
|
+
store = frt_open_fs_store("./test/testdir/store");
|
184
|
+
store->clear_all(store);
|
185
|
+
frt_store_deref(store);
|
186
|
+
|
187
|
+
return suite;
|
188
|
+
}
|