isomorfeus-ferret 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +612 -0
- data/README.md +44 -0
- data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
- data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
- data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
- data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
- data/ext/isomorfeus_ferret_ext/email.rl +21 -0
- data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
- data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
- data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
- data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
- data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
- data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
- data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
- data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
- data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
- data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
- data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
- data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
- data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
- data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
- data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
- data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
- data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
- data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
- data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
- data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
- data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
- data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
- data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
- data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
- data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
- data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
- data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
- data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
- data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
- data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
- data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
- data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
- data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
- data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
- data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
- data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
- data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
- data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
- data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
- data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
- data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
- data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
- data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
- data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
- data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
- data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
- data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
- data/ext/isomorfeus_ferret_ext/test.c +850 -0
- data/ext/isomorfeus_ferret_ext/test.h +416 -0
- data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
- data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
- data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
- data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
- data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
- data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
- data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
- data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
- data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
- data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
- data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
- data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
- data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
- data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
- data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
- data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
- data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
- data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
- data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
- data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
- data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
- data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
- data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
- data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
- data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
- data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
- data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
- data/ext/isomorfeus_ferret_ext/url.rl +27 -0
- data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
- data/lib/isomorfeus/ferret/document.rb +132 -0
- data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
- data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
- data/lib/isomorfeus/ferret/index/index.rb +970 -0
- data/lib/isomorfeus/ferret/monitor.rb +323 -0
- data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
- data/lib/isomorfeus/ferret/version.rb +5 -0
- data/lib/isomorfeus-ferret.rb +8 -0
- metadata +307 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
#include "frt_search.h"
|
2
|
+
#include "test.h"
|
3
|
+
|
4
|
+
#define ARRAY_SIZE 20
|
5
|
+
|
6
|
+
static FrtSymbol field;
|
7
|
+
|
8
|
+
static void add_doc(const char *text, FrtIndexWriter *iw)
|
9
|
+
{
|
10
|
+
FrtDocument *doc = frt_doc_new();
|
11
|
+
frt_doc_add_field(doc, frt_df_add_data(frt_df_new(field), (char *)text));
|
12
|
+
frt_iw_add_doc(iw, doc);
|
13
|
+
frt_doc_destroy(doc);
|
14
|
+
}
|
15
|
+
|
16
|
+
void check_to_s(TestCase *tc, FrtQuery *query, FrtSymbol field, const char *q_str);
|
17
|
+
|
18
|
+
static void do_prefix_test(TestCase *tc, FrtSearcher *searcher, const char *qstr, const char *expected_hits, int pre_len, float min_sim)
|
19
|
+
{
|
20
|
+
FrtQuery *fq = frt_fuzq_new_conf(field, qstr, min_sim, pre_len, 10);
|
21
|
+
tst_check_hits(tc, searcher, fq, expected_hits, -1);
|
22
|
+
frt_q_deref(fq);
|
23
|
+
}
|
24
|
+
|
25
|
+
static void test_fuzziness(TestCase *tc, void *data)
|
26
|
+
{
|
27
|
+
FrtStore *store = (FrtStore *)data;
|
28
|
+
FrtIndexWriter *iw;
|
29
|
+
FrtIndexReader *ir;
|
30
|
+
FrtSearcher *sea;
|
31
|
+
FrtTopDocs *top_docs;
|
32
|
+
FrtQuery *q;
|
33
|
+
FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
|
34
|
+
frt_index_create(store, fis);
|
35
|
+
frt_fis_deref(fis);
|
36
|
+
|
37
|
+
iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
|
38
|
+
|
39
|
+
add_doc("aaaaa", iw);
|
40
|
+
add_doc("aaaab", iw);
|
41
|
+
add_doc("aaabb", iw);
|
42
|
+
add_doc("aabbb", iw);
|
43
|
+
add_doc("abbbb", iw);
|
44
|
+
add_doc("bbbbb", iw);
|
45
|
+
add_doc("ddddd", iw);
|
46
|
+
add_doc("ddddddddddddddddddddd", iw); /* test max_distances problem */
|
47
|
+
add_doc("aaaaaaaaaaaaaaaaaaaaaaa", iw); /* test max_distances problem */
|
48
|
+
frt_iw_close(iw);
|
49
|
+
|
50
|
+
ir = frt_ir_open(store);
|
51
|
+
sea = frt_isea_new(ir);
|
52
|
+
|
53
|
+
q = frt_fuzq_new_conf(field, "aaaaa", 0.0, 5, 10);
|
54
|
+
tst_check_hits(tc, sea, q, "0", -1);
|
55
|
+
frt_q_deref(q);
|
56
|
+
|
57
|
+
q = frt_fuzq_new(rb_intern("not a field"), "aaaaa");
|
58
|
+
tst_check_hits(tc, sea, q, "", -1);
|
59
|
+
frt_q_deref(q);
|
60
|
+
|
61
|
+
/* test prefix length */
|
62
|
+
do_prefix_test(tc, sea, "aaaaaaaaaaaaaaaaaaaaaa", "8", 1, 0.0);
|
63
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1,2", 0, 0.0);
|
64
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1,2", 1, 0.0);
|
65
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1,2", 2, 0.0);
|
66
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1,2", 3, 0.0);
|
67
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1", 4, 0.0);
|
68
|
+
do_prefix_test(tc, sea, "aaaaa", "0", 5, 0.0);
|
69
|
+
do_prefix_test(tc, sea, "aaaaa", "0", 6, 0.0);
|
70
|
+
/* test where term will equal prefix but not whole query string */
|
71
|
+
do_prefix_test(tc, sea, "aaaaaaa", "0", 5, 0.0);
|
72
|
+
|
73
|
+
/* test minimum similarity */
|
74
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1,2,3", 0, 0.2);
|
75
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1,2", 1, 0.4);
|
76
|
+
do_prefix_test(tc, sea, "aaaaa", "0,1", 1, 0.6);
|
77
|
+
do_prefix_test(tc, sea, "aaaaa", "0", 1, 0.8);
|
78
|
+
|
79
|
+
/* test where no terms will have any similarity */
|
80
|
+
do_prefix_test(tc, sea, "xxxxx", "", 0, 0.0);
|
81
|
+
|
82
|
+
/* test where no terms will have enough similarity to match */
|
83
|
+
do_prefix_test(tc, sea, "aaccc", "", 0, 0.0);
|
84
|
+
|
85
|
+
/* test prefix length but with non-matching term (aaaac does not exit in
|
86
|
+
* the index) */
|
87
|
+
do_prefix_test(tc, sea, "aaaac", "0,1,2", 0, 0.0);
|
88
|
+
do_prefix_test(tc, sea, "aaaac", "0,1,2", 1, 0.0);
|
89
|
+
do_prefix_test(tc, sea, "aaaac", "0,1,2", 2, 0.0);
|
90
|
+
do_prefix_test(tc, sea, "aaaac", "0,1,2", 3, 0.0);
|
91
|
+
do_prefix_test(tc, sea, "aaaac", "0,1", 4, 0.0);
|
92
|
+
do_prefix_test(tc, sea, "aaaac", "", 5, 0.0);
|
93
|
+
|
94
|
+
/* test really long string never matches */
|
95
|
+
do_prefix_test(tc, sea, "ddddX", "6", 0, 0.0);
|
96
|
+
do_prefix_test(tc, sea, "ddddX", "6", 1, 0.0);
|
97
|
+
do_prefix_test(tc, sea, "ddddX", "6", 2, 0.0);
|
98
|
+
do_prefix_test(tc, sea, "ddddX", "6", 3, 0.0);
|
99
|
+
do_prefix_test(tc, sea, "ddddX", "6", 4, 0.0);
|
100
|
+
do_prefix_test(tc, sea, "ddddX", "", 5, 0.0);
|
101
|
+
|
102
|
+
/* test non-existing field doesn't break search */
|
103
|
+
q = frt_fuzq_new_conf(rb_intern("anotherfield"), "ddddX", 0.0, 10, 100);
|
104
|
+
top_docs = frt_searcher_search(sea, q, 0, 1, NULL, NULL, NULL);
|
105
|
+
frt_q_deref(q);
|
106
|
+
Aiequal(0, top_docs->total_hits);
|
107
|
+
frt_td_destroy(top_docs);
|
108
|
+
|
109
|
+
frt_searcher_close(sea);
|
110
|
+
}
|
111
|
+
|
112
|
+
static void test_fuzziness_long(TestCase *tc, void *data)
|
113
|
+
{
|
114
|
+
FrtStore *store = (FrtStore *)data;
|
115
|
+
FrtIndexWriter *iw;
|
116
|
+
FrtSearcher *sea;
|
117
|
+
FrtIndexReader *ir;
|
118
|
+
FrtTopDocs *top_docs;
|
119
|
+
FrtQuery *q;
|
120
|
+
FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_INDEX_YES, FRT_TERM_VECTOR_YES);
|
121
|
+
frt_index_create(store, fis);
|
122
|
+
frt_fis_deref(fis);
|
123
|
+
|
124
|
+
iw = frt_iw_open(store, frt_whitespace_analyzer_new(false), NULL);
|
125
|
+
|
126
|
+
add_doc("aaaaaaa", iw);
|
127
|
+
add_doc("segment", iw);
|
128
|
+
frt_iw_close(iw);
|
129
|
+
ir = frt_ir_open(store);
|
130
|
+
sea = frt_isea_new(ir);
|
131
|
+
|
132
|
+
/* not similar enough: */
|
133
|
+
do_prefix_test(tc, sea, "xxxxx", "", 0, 0.0);
|
134
|
+
|
135
|
+
/* edit distance to "aaaaaaa" = 3, this matches because the string is longer than
|
136
|
+
* in testDefaultFuzziness so a bigger difference is allowed: */
|
137
|
+
do_prefix_test(tc, sea, "aaaaccc", "0", 0, 0.0);
|
138
|
+
|
139
|
+
/* now with prefix */
|
140
|
+
do_prefix_test(tc, sea, "aaaaccc", "0", 1, 0.0);
|
141
|
+
do_prefix_test(tc, sea, "aaaaccc", "0", 4, 0.0);
|
142
|
+
do_prefix_test(tc, sea, "aaaaccc", "", 5, 0.0);
|
143
|
+
|
144
|
+
/* no match, more than half of the characters is wrong: */
|
145
|
+
do_prefix_test(tc, sea, "aaacccc", "", 0, 0.0);
|
146
|
+
|
147
|
+
/* now with prefix */
|
148
|
+
do_prefix_test(tc, sea, "aaacccc", "", 1, 0.0);
|
149
|
+
|
150
|
+
/* "student" and "stellent" are indeed similar to "segment" by default: */
|
151
|
+
do_prefix_test(tc, sea, "student", "1", 0, 0.0);
|
152
|
+
|
153
|
+
/* now with prefix */
|
154
|
+
do_prefix_test(tc, sea, "student", "", 2, 0.0);
|
155
|
+
do_prefix_test(tc, sea, "stellent", "", 2, 0.0);
|
156
|
+
|
157
|
+
/* "student" doesn't match anymore thanks to increased min-similarity: */
|
158
|
+
q = frt_fuzq_new_conf(field, "student", (float)0.6, 0, 100);
|
159
|
+
top_docs = frt_searcher_search(sea, q, 0, 1, NULL, NULL, NULL);
|
160
|
+
frt_q_deref(q);
|
161
|
+
Aiequal(0, top_docs->total_hits);
|
162
|
+
frt_td_destroy(top_docs);
|
163
|
+
|
164
|
+
frt_searcher_close(sea);
|
165
|
+
}
|
166
|
+
|
167
|
+
/**
|
168
|
+
* Test query->to_s functionality
|
169
|
+
*/
|
170
|
+
static void test_fuzzy_query_to_s(TestCase *tc, void *data)
|
171
|
+
{
|
172
|
+
FrtQuery *q;
|
173
|
+
(void)data;
|
174
|
+
|
175
|
+
q = frt_fuzq_new_conf(rb_intern("A"), "a", 0.4f, 2, 100);
|
176
|
+
check_to_s(tc, q, rb_intern("A"), "a~0.4");
|
177
|
+
check_to_s(tc, q, rb_intern("B"), "A:a~0.4");
|
178
|
+
frt_q_deref(q);
|
179
|
+
|
180
|
+
q = frt_fuzq_new_conf(rb_intern("field"), "mispell", 0.5f, 2, 100);
|
181
|
+
check_to_s(tc, q, rb_intern("field"), "mispell~");
|
182
|
+
check_to_s(tc, q, rb_intern("notfield"), "field:mispell~");
|
183
|
+
frt_q_deref(q);
|
184
|
+
|
185
|
+
}
|
186
|
+
|
187
|
+
/**
|
188
|
+
* Test query hashing functionality
|
189
|
+
*/
|
190
|
+
static void test_fuzzy_query_hash(TestCase *tc, void *data)
|
191
|
+
{
|
192
|
+
FrtQuery *q1, *q2;
|
193
|
+
(void)data;
|
194
|
+
|
195
|
+
q1 = frt_fuzq_new_conf(rb_intern("A"), "a", 0.4f, 2, 100);
|
196
|
+
q2 = frt_fuzq_new_conf(rb_intern("A"), "a", 0.4f, 2, 100);
|
197
|
+
|
198
|
+
Assert(frt_q_eq(q1, q1), "Test same queries are equal");
|
199
|
+
Aiequal(frt_q_hash(q1), frt_q_hash(q2));
|
200
|
+
Assert(frt_q_eq(q1, q2), "Queries are equal");
|
201
|
+
frt_q_deref(q2);
|
202
|
+
|
203
|
+
q2 = frt_fuzq_new_conf(rb_intern("A"), "a", 0.4f, 0, 100);
|
204
|
+
Assert(frt_q_hash(q1) != frt_q_hash(q2), "prelen differs");
|
205
|
+
Assert(!frt_q_eq(q1, q2), "prelen differs");
|
206
|
+
frt_q_deref(q2);
|
207
|
+
|
208
|
+
q2 = frt_fuzq_new_conf(rb_intern("A"), "a", 0.5f, 2, 100);
|
209
|
+
Assert(frt_q_hash(q1) != frt_q_hash(q2), "similarity differs");
|
210
|
+
Assert(!frt_q_eq(q1, q2), "similarity differs");
|
211
|
+
frt_q_deref(q2);
|
212
|
+
|
213
|
+
q2 = frt_fuzq_new_conf(rb_intern("A"), "b", 0.4f, 2, 100);
|
214
|
+
Assert(frt_q_hash(q1) != frt_q_hash(q2), "term differs");
|
215
|
+
Assert(!frt_q_eq(q1, q2), "term differs");
|
216
|
+
frt_q_deref(q2);
|
217
|
+
|
218
|
+
q2 = frt_fuzq_new_conf(rb_intern("B"), "a", 0.4f, 2, 100);
|
219
|
+
Assert(frt_q_hash(q1) != frt_q_hash(q2), "field differs");
|
220
|
+
Assert(!frt_q_eq(q1, q2), "field differs");
|
221
|
+
frt_q_deref(q2);
|
222
|
+
|
223
|
+
frt_q_deref(q1);
|
224
|
+
}
|
225
|
+
|
226
|
+
TestSuite *ts_q_fuzzy(TestSuite *suite)
|
227
|
+
{
|
228
|
+
FrtStore *store = frt_open_ram_store();
|
229
|
+
|
230
|
+
field = rb_intern("field");
|
231
|
+
|
232
|
+
suite = ADD_SUITE(suite);
|
233
|
+
|
234
|
+
tst_run_test(suite, test_fuzziness, (void *)store);
|
235
|
+
tst_run_test(suite, test_fuzziness_long, (void *)store);
|
236
|
+
tst_run_test(suite, test_fuzzy_query_hash, (void *)store);
|
237
|
+
tst_run_test(suite, test_fuzzy_query_to_s, (void *)store);
|
238
|
+
|
239
|
+
frt_store_deref(store);
|
240
|
+
return suite;
|
241
|
+
}
|