ferret 0.3.2 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/ext/hash.h
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#ifndef FRT_HASH_H
|
2
|
+
#define FRT_HASH_H
|
3
|
+
|
4
|
+
#include "global.h"
|
5
|
+
|
6
|
+
#define NUM_ENTRIES 256
|
7
|
+
#define MULTIPLIER 31
|
8
|
+
|
9
|
+
typedef struct HashEntry {
|
10
|
+
char *name;
|
11
|
+
void *value;
|
12
|
+
struct HashEntry *next;
|
13
|
+
} HashEntry;
|
14
|
+
|
15
|
+
HashEntry **ht_create();
|
16
|
+
int ht_count(HashEntry **ht);
|
17
|
+
void ht_destroy(HashEntry **ht);
|
18
|
+
void ht_destroy_all(HashEntry **ht, void (*fn)(void *));
|
19
|
+
void ht_set(HashEntry **ht, char *name, void *value);
|
20
|
+
void *ht_get(HashEntry **ht, char *name);
|
21
|
+
void *ht_delete(HashEntry **ht, char *name);
|
22
|
+
|
23
|
+
/****************************************************************************
|
24
|
+
*
|
25
|
+
* HshTable
|
26
|
+
*
|
27
|
+
****************************************************************************/
|
28
|
+
|
29
|
+
#define Hsh_MINSIZE 8
|
30
|
+
#define SLOW_DOWN 50000 // stop increasing the hash table so quickly to
|
31
|
+
// conserve memory
|
32
|
+
extern char *dummy_key;
|
33
|
+
enum {
|
34
|
+
HASH_KEY_DOES_NOT_EXIST = 0,
|
35
|
+
HASH_KEY_SAME = 1,
|
36
|
+
HASH_KEY_EQUAL = 2
|
37
|
+
};
|
38
|
+
|
39
|
+
typedef struct {
|
40
|
+
int hash; /* cached hash code of key */
|
41
|
+
void *key;
|
42
|
+
void *value;
|
43
|
+
} HshEntry;
|
44
|
+
|
45
|
+
typedef struct HshTable {
|
46
|
+
int fill; /* # Active + # Dummy */
|
47
|
+
int used; /* # Active */
|
48
|
+
int mask;
|
49
|
+
|
50
|
+
/* table points to smalltable for small tables, else to
|
51
|
+
* additional malloc'ed memory. */
|
52
|
+
HshEntry *table;
|
53
|
+
HshEntry smalltable[Hsh_MINSIZE];
|
54
|
+
HshEntry *(*lookup)(struct HshTable *ht, const void *key);
|
55
|
+
unsigned int (*hash)(const void *key);
|
56
|
+
int (*eq)(const void *key1, const void *key2);
|
57
|
+
void (*free_key)(void *key);
|
58
|
+
void (*free_value)(void *value);
|
59
|
+
} HshTable;
|
60
|
+
|
61
|
+
HshTable *h_new_str(void (*free_key)(void *key), void (*free_value)(void *value));
|
62
|
+
HshTable *h_new(unsigned int (*hash)(const void *key),
|
63
|
+
int (*eq)(const void *key1, const void *key2),
|
64
|
+
void (*free_key)(void *key),
|
65
|
+
void (*free_value)(void *value));
|
66
|
+
void h_destroy(HshTable *ht);
|
67
|
+
void h_clear(HshTable *ht);
|
68
|
+
|
69
|
+
void *h_get(HshTable *ht, const void *key);
|
70
|
+
int h_del(HshTable *ht, const void *key);
|
71
|
+
void *h_rem(HshTable *ht, const void *key, bool del_key);
|
72
|
+
int h_set(HshTable *ht, const void *key, void *value);
|
73
|
+
int h_set_safe(HshTable *ht, const void *key, void *value);
|
74
|
+
int h_has_key(HshTable *ht, const void *key);
|
75
|
+
unsigned int str_hash(const char *const str);
|
76
|
+
|
77
|
+
void dummy_free(void *p);
|
78
|
+
HshEntry *h_lookup_str(HshTable *ht, register const void *key_p);
|
79
|
+
|
80
|
+
#endif
|
data/ext/hashset.c
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
#include <hashset.h>
|
2
|
+
#include <string.h>
|
3
|
+
#define HS_MIN_SIZE 4
|
4
|
+
|
5
|
+
int *imalloc(int i)
|
6
|
+
{
|
7
|
+
int *ip = ALLOC(int);
|
8
|
+
*ip = i;
|
9
|
+
return ip;
|
10
|
+
}
|
11
|
+
|
12
|
+
void hs_dummy_free(void *p){}
|
13
|
+
|
14
|
+
HashSet *hs_create(unsigned int (*hash)(const void *p),
|
15
|
+
int (*eq)(const void *p1, const void *p2),
|
16
|
+
void (*free_elem)(void *p))
|
17
|
+
{
|
18
|
+
HashSet *hs = ALLOC(HashSet);
|
19
|
+
hs->ht = h_new(hash, eq, NULL, &efree);
|
20
|
+
hs->elems = NULL;
|
21
|
+
hs->capa = hs->size = 0;
|
22
|
+
if (free_elem == NULL)
|
23
|
+
hs->free_elem = &hs_dummy_free;
|
24
|
+
else
|
25
|
+
hs->free_elem = free_elem;
|
26
|
+
return hs;
|
27
|
+
}
|
28
|
+
|
29
|
+
HashSet *hs_str_create(void (*free_elem)(void *p))
|
30
|
+
{
|
31
|
+
HashSet *hs = ALLOC(HashSet);
|
32
|
+
hs->ht = h_new_str(NULL, &efree);
|
33
|
+
hs->elems = NULL;
|
34
|
+
hs->capa = hs->size = 0;
|
35
|
+
if (free_elem == NULL)
|
36
|
+
hs->free_elem = &hs_dummy_free;
|
37
|
+
else
|
38
|
+
hs->free_elem = free_elem;
|
39
|
+
return hs;
|
40
|
+
}
|
41
|
+
|
42
|
+
void hs_destroy(void *p)
|
43
|
+
{
|
44
|
+
HashSet *hs = (HashSet *)p;
|
45
|
+
h_destroy(hs->ht);
|
46
|
+
free(hs->elems);
|
47
|
+
free(hs);
|
48
|
+
}
|
49
|
+
|
50
|
+
void hs_clear(HashSet *self)
|
51
|
+
{
|
52
|
+
int i;
|
53
|
+
for (i = self->size - 1; i >= 0; i--)
|
54
|
+
hs_del(self, self->elems[i]);
|
55
|
+
}
|
56
|
+
|
57
|
+
void hs_destroy_all(void *p)
|
58
|
+
{
|
59
|
+
int i;
|
60
|
+
HashSet *hs = (HashSet *)p;
|
61
|
+
if (hs->free_elem != &dummy_free)
|
62
|
+
for (i = 0; i < hs->size; i++)
|
63
|
+
hs->free_elem(hs->elems[i]);
|
64
|
+
hs_destroy(p);
|
65
|
+
}
|
66
|
+
|
67
|
+
int hs_add(HashSet *hs, void *elem)
|
68
|
+
{
|
69
|
+
int has_elem = h_has_key(hs->ht, elem);
|
70
|
+
//printf("has_elem = %d %d:%d\n", has_elem, HASH_KEY_EQUAL, HASH_KEY_SAME);
|
71
|
+
if (has_elem == HASH_KEY_EQUAL) {
|
72
|
+
// We don't want to keep two of the same elem so free if necessary
|
73
|
+
hs->free_elem(elem);
|
74
|
+
} else if (has_elem == HASH_KEY_SAME) {
|
75
|
+
// No need to do anything
|
76
|
+
} else {
|
77
|
+
// add the elem to the array, resizing if necessary
|
78
|
+
if (hs->size >= hs->capa) {
|
79
|
+
if (hs->capa == 0)
|
80
|
+
hs->capa = HS_MIN_SIZE;
|
81
|
+
else
|
82
|
+
hs->capa *= 2;
|
83
|
+
REALLOC_N(hs->elems, void *, hs->capa);
|
84
|
+
}
|
85
|
+
hs->elems[hs->size] = elem;
|
86
|
+
h_set(hs->ht, elem, imalloc(hs->size));
|
87
|
+
hs->size++;
|
88
|
+
}
|
89
|
+
return has_elem;
|
90
|
+
}
|
91
|
+
|
92
|
+
int hs_del(HashSet *hs, void *elem)
|
93
|
+
{
|
94
|
+
void *tmp_elem = hs_rem(hs, elem);
|
95
|
+
if (tmp_elem != NULL) {
|
96
|
+
hs->free_elem(tmp_elem);
|
97
|
+
return 1;
|
98
|
+
} else {
|
99
|
+
return 0;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
|
103
|
+
void *hs_rem(HashSet *hs, void *elem)
|
104
|
+
{
|
105
|
+
void *ret_elem;
|
106
|
+
int *index = (int *)h_get(hs->ht, elem);
|
107
|
+
if (index == NULL) {
|
108
|
+
return NULL;
|
109
|
+
} else {
|
110
|
+
int i = *index;
|
111
|
+
ret_elem = hs->elems[i];
|
112
|
+
h_del(hs->ht, elem);
|
113
|
+
hs->size--;
|
114
|
+
memmove(&hs->elems[i], &hs->elems[i+1], sizeof(void *) * (hs->size - i));
|
115
|
+
return ret_elem;
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
int hs_exists(HashSet *hs, void *elem)
|
120
|
+
{
|
121
|
+
return h_has_key(hs->ht, elem);
|
122
|
+
}
|
123
|
+
|
124
|
+
HashSet *hs_merge(HashSet *hs, HashSet *other)
|
125
|
+
{
|
126
|
+
int i;
|
127
|
+
for (i = 0; i < other->size; i++) {
|
128
|
+
hs_add(hs, other->elems[i]);
|
129
|
+
}
|
130
|
+
// Now free the other hashset. It is no longer needed. No need, however, to
|
131
|
+
// delete the elements as they are in the new hash set
|
132
|
+
hs_destroy(other);
|
133
|
+
return hs;
|
134
|
+
}
|
135
|
+
|
136
|
+
void *hs_orig(HashSet *hs, void *elem)
|
137
|
+
{
|
138
|
+
int *i = h_get(hs->ht, elem);
|
139
|
+
if (i) return hs->elems[*i];
|
140
|
+
else return NULL;
|
141
|
+
}
|
data/ext/hashset.h
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
#ifndef FRT_HASHSET_H
|
2
|
+
#define FRT_HASHSET_H
|
3
|
+
|
4
|
+
#include "hash.h"
|
5
|
+
#include "array.h"
|
6
|
+
#include "global.h"
|
7
|
+
|
8
|
+
typedef struct HashSet {
|
9
|
+
int capa;
|
10
|
+
int size;
|
11
|
+
void **elems;
|
12
|
+
HshTable *ht;
|
13
|
+
void (*free_elem)(void *p);
|
14
|
+
} HashSet;
|
15
|
+
|
16
|
+
HashSet *hs_create(unsigned int (*hash)(const void *p),
|
17
|
+
int (*eq)(const void *p1, const void *p2),
|
18
|
+
void (*free_elem)(void *p));
|
19
|
+
HashSet *hs_str_create(void (*free_elem)(void *p));
|
20
|
+
void hs_destroy(void *p);
|
21
|
+
void hs_destroy_all(void *p);
|
22
|
+
int hs_add(HashSet *hs, void *elem);
|
23
|
+
int hs_del(HashSet *hs, void *elem);
|
24
|
+
void *hs_rem(HashSet *hs, void *elem);
|
25
|
+
int hs_exists(HashSet *hs, void *elem);
|
26
|
+
HashSet *hs_merge(HashSet *hs, HashSet *other);
|
27
|
+
void *hs_orig(HashSet *hs, void *elem);
|
28
|
+
void hs_clear(HashSet *self);
|
29
|
+
|
30
|
+
// TODO: finish these functions.
|
31
|
+
//int hs_osf(HashSet *hs, void *elem);
|
32
|
+
//HashSet hs_or(HashSet *hs1, HashSet *h2);
|
33
|
+
//HashSet hs_excl_or(HashSet *hs1, HashSet *h2);
|
34
|
+
//HashSet hs_and(HashSet *hs1, HashSet *h2);
|
35
|
+
//HashSet hs_mask(HashSet *hs1, HashSet *h2);
|
36
|
+
|
37
|
+
#endif
|
data/ext/helper.c
ADDED
data/ext/helper.h
ADDED
data/ext/inc/lang.h
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#ifndef FRT_LANG_H
|
2
|
+
#define FRT_LANG_H
|
3
|
+
|
4
|
+
#include <ruby.h>
|
5
|
+
|
6
|
+
#define FERRET_EXT
|
7
|
+
|
8
|
+
#define MAX_ERROR_LEN 2048
|
9
|
+
#define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
|
10
|
+
extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
|
11
|
+
extern void weprintf(const char *fmt, ...);
|
12
|
+
extern char *progname(void);
|
13
|
+
extern void setprogname(const char *str);
|
14
|
+
|
15
|
+
extern VALUE cQueryParseException;
|
16
|
+
|
17
|
+
#define ERROR rb_eException
|
18
|
+
#define IO_ERROR rb_eIOError
|
19
|
+
#define ARG_ERROR rb_eArgError
|
20
|
+
#define EOF_ERROR rb_eEOFError
|
21
|
+
#define UNSUPPORTED_ERROR rb_eNotImpError
|
22
|
+
#define STATE_ERROR rb_eException
|
23
|
+
#define PARSE_ERROR cQueryParseException
|
24
|
+
#define MEM_ERROR rb_eNoMemError
|
25
|
+
|
26
|
+
typedef void * mutex_t;
|
27
|
+
typedef void * thread_key_t;
|
28
|
+
#define MUTEX_INITIALIZER NULL
|
29
|
+
#define MUTEX_RECURSIVE_INITIALIZER NULL
|
30
|
+
#define mutex_init(a, b)
|
31
|
+
#define mutex_lock(a)
|
32
|
+
#define mutex_trylock(a)
|
33
|
+
#define mutex_unlock(a)
|
34
|
+
#define mutex_destroy(a)
|
35
|
+
#define thread_key_create(a, b)
|
36
|
+
#define thread_key_delete(a)
|
37
|
+
#define thread_setspecific(a, b)
|
38
|
+
#define thread_getspecific(a) NULL
|
39
|
+
#define thread_exit(a)
|
40
|
+
|
41
|
+
#endif
|
data/ext/ind.c
ADDED
@@ -0,0 +1,389 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include "search.h"
|
3
|
+
|
4
|
+
static const char *ID_STRING = "id";
|
5
|
+
|
6
|
+
#define INDEX_CLOSE_READER(self) do {\
|
7
|
+
if (self->sea) {\
|
8
|
+
sea_close(self->sea);\
|
9
|
+
self->sea = NULL;\
|
10
|
+
self->ir = NULL;\
|
11
|
+
} else if (self->ir) {\
|
12
|
+
ir_close(self->ir);\
|
13
|
+
self->ir = NULL;\
|
14
|
+
}\
|
15
|
+
} while (0)
|
16
|
+
|
17
|
+
#define AUTOFLUSH_IR if (self->auto_flush) ir_commit(self->ir);\
|
18
|
+
else self->has_writes = true
|
19
|
+
|
20
|
+
#define AUTOFLUSH_IW \
|
21
|
+
if (self->auto_flush) {\
|
22
|
+
iw_close(self->iw);\
|
23
|
+
self->iw = NULL;\
|
24
|
+
} else self->has_writes = true
|
25
|
+
|
26
|
+
void index_auto_flush_ir(Index *self)
|
27
|
+
{
|
28
|
+
AUTOFLUSH_IR;
|
29
|
+
}
|
30
|
+
|
31
|
+
void index_auto_flush_iw(Index *self)
|
32
|
+
{
|
33
|
+
AUTOFLUSH_IW;
|
34
|
+
}
|
35
|
+
|
36
|
+
Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
37
|
+
bool create)
|
38
|
+
{
|
39
|
+
HashSet *all_fields = hs_str_create(&free);
|
40
|
+
Index *self = ALLOC(Index);
|
41
|
+
ZEROSET(self, Index, 1);
|
42
|
+
mutex_init(&self->mutex, NULL);
|
43
|
+
self->has_writes = false;
|
44
|
+
if (store) {
|
45
|
+
self->store = store;
|
46
|
+
self->close_store = false;
|
47
|
+
} else {
|
48
|
+
self->store = open_ram_store();
|
49
|
+
create = true;
|
50
|
+
self->close_store = true;
|
51
|
+
}
|
52
|
+
if (analyzer) {
|
53
|
+
self->analyzer = analyzer;
|
54
|
+
self->close_analyzer = false;
|
55
|
+
} else {
|
56
|
+
self->analyzer = standard_analyzer_create();
|
57
|
+
self->close_analyzer = true;
|
58
|
+
}
|
59
|
+
self->use_compound_file = true;
|
60
|
+
|
61
|
+
if (create) {
|
62
|
+
self->iw = iw_open(self->store, self->analyzer, create, false, false);
|
63
|
+
iw_close(self->iw);
|
64
|
+
self->iw = NULL;
|
65
|
+
}
|
66
|
+
|
67
|
+
/* options */
|
68
|
+
self->key = NULL;
|
69
|
+
self->id_field = (char *)ID_STRING;
|
70
|
+
self->def_field = (char *)ID_STRING;
|
71
|
+
self->auto_flush = false;
|
72
|
+
|
73
|
+
self->qp = qp_create(all_fields, def_fields, self->analyzer);
|
74
|
+
/* Index is a convenience class so set qp convenience options */
|
75
|
+
self->qp->allow_any_fields = true;
|
76
|
+
self->qp->clean_str = true;
|
77
|
+
self->qp->handle_parse_errors = true;
|
78
|
+
|
79
|
+
return self;
|
80
|
+
}
|
81
|
+
|
82
|
+
void index_destroy(Index *self)
|
83
|
+
{
|
84
|
+
mutex_destroy(&self->mutex);
|
85
|
+
INDEX_CLOSE_READER(self);
|
86
|
+
if (self->iw) iw_close(self->iw);
|
87
|
+
if (self->close_store) self->store->close(self->store);
|
88
|
+
if (self->close_analyzer) a_destroy(self->analyzer);
|
89
|
+
if (self->qp) qp_destroy(self->qp);
|
90
|
+
if (self->id_field != ((char *)ID_STRING)) free(self->id_field);
|
91
|
+
if (self->def_field != ((char *)ID_STRING)) free(self->def_field);
|
92
|
+
if (self->key) hs_destroy_all(self->key);
|
93
|
+
free(self);
|
94
|
+
}
|
95
|
+
|
96
|
+
void index_flush(Index *self)
|
97
|
+
{
|
98
|
+
if (self->ir) {
|
99
|
+
ir_commit(self->ir);
|
100
|
+
} else if (self->iw) {
|
101
|
+
iw_close(self->iw);
|
102
|
+
self->iw = NULL;
|
103
|
+
}
|
104
|
+
self->has_writes = false;
|
105
|
+
}
|
106
|
+
inline void ensure_writer_open(Index *self)
|
107
|
+
{
|
108
|
+
if (!self->iw) {
|
109
|
+
INDEX_CLOSE_READER(self);
|
110
|
+
self->iw = iw_open(self->store, self->analyzer, false, false, false);
|
111
|
+
self->iw->use_compound_file = self->use_compound_file;
|
112
|
+
} else {
|
113
|
+
self->iw->analyzer = self->analyzer; /* in case it has changed */
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
inline void ensure_reader_open(Index *self)
|
118
|
+
{
|
119
|
+
if (self->ir) {
|
120
|
+
if (!ir_is_latest(self->ir)) {
|
121
|
+
INDEX_CLOSE_READER(self);
|
122
|
+
self->ir = ir_open(self->store, false);
|
123
|
+
}
|
124
|
+
} else {
|
125
|
+
if (self->iw) {
|
126
|
+
iw_close(self->iw);
|
127
|
+
self->iw = NULL;
|
128
|
+
}
|
129
|
+
self->ir = ir_open(self->store, false);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
inline void ensure_searcher_open(Index *self)
|
134
|
+
{
|
135
|
+
ensure_reader_open(self);
|
136
|
+
if (!self->sea) {
|
137
|
+
self->sea = sea_create(self->ir);
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
int index_size(Index *self)
|
142
|
+
{
|
143
|
+
int size;
|
144
|
+
mutex_lock(&self->store->ext_mutex);
|
145
|
+
ensure_reader_open(self);
|
146
|
+
size = self->ir->num_docs(self->ir);
|
147
|
+
mutex_unlock(&self->store->ext_mutex);
|
148
|
+
return size;
|
149
|
+
}
|
150
|
+
|
151
|
+
void index_optimize(Index *self)
|
152
|
+
{
|
153
|
+
mutex_lock(&self->store->ext_mutex);
|
154
|
+
ensure_writer_open(self);
|
155
|
+
iw_optimize(self->iw);
|
156
|
+
AUTOFLUSH_IW;
|
157
|
+
mutex_unlock(&self->store->ext_mutex);
|
158
|
+
}
|
159
|
+
|
160
|
+
bool index_has_del(Index *self)
|
161
|
+
{
|
162
|
+
bool has_del;
|
163
|
+
mutex_lock(&self->store->ext_mutex);
|
164
|
+
ensure_reader_open(self);
|
165
|
+
has_del = self->ir->has_deletions(self->ir);
|
166
|
+
mutex_unlock(&self->store->ext_mutex);
|
167
|
+
return has_del;
|
168
|
+
}
|
169
|
+
|
170
|
+
bool index_is_deleted(Index *self, int doc_num)
|
171
|
+
{
|
172
|
+
bool is_del;
|
173
|
+
mutex_lock(&self->store->ext_mutex);
|
174
|
+
ensure_reader_open(self);
|
175
|
+
is_del = self->ir->is_deleted(self->ir, doc_num);
|
176
|
+
mutex_unlock(&self->store->ext_mutex);
|
177
|
+
return is_del;
|
178
|
+
}
|
179
|
+
|
180
|
+
static void inline index_add_doc_i(Index *self, Document *doc)
|
181
|
+
{
|
182
|
+
/* If there is a key specified delete the document with the same key */
|
183
|
+
if (self->key) {
|
184
|
+
int i;
|
185
|
+
char *field;
|
186
|
+
DocField *df;
|
187
|
+
Query *q = bq_create(false);
|
188
|
+
TopDocs *td;
|
189
|
+
ensure_searcher_open(self);
|
190
|
+
for (i = 0; i < self->key->size; i++) {
|
191
|
+
field = self->key->elems[i];
|
192
|
+
df = doc_get_field(doc, field);
|
193
|
+
if (!df) continue;
|
194
|
+
bq_add_query(q, tq_create(term_create(field, df->data)), BC_MUST);
|
195
|
+
}
|
196
|
+
td = sea_search(self->sea, q, 0, 1, NULL, NULL);
|
197
|
+
if (td->total_hits > 1) {
|
198
|
+
td_destroy(td);
|
199
|
+
eprintf(ARG_ERROR, "Tried to use a key that was not unique");
|
200
|
+
} else if (td->total_hits == 1) {
|
201
|
+
ir_delete_doc(self->ir, td->hits[0]->doc);
|
202
|
+
}
|
203
|
+
q->destroy(q);
|
204
|
+
td_destroy(td);
|
205
|
+
}
|
206
|
+
ensure_writer_open(self);
|
207
|
+
iw_add_doc(self->iw, doc);
|
208
|
+
AUTOFLUSH_IW;
|
209
|
+
}
|
210
|
+
|
211
|
+
void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
|
212
|
+
{
|
213
|
+
Analyzer *tmp_analyzer;
|
214
|
+
mutex_lock(&self->store->ext_mutex);
|
215
|
+
tmp_analyzer = self->analyzer;
|
216
|
+
self->analyzer = analyzer;
|
217
|
+
iw_add_doc(self->iw, doc);
|
218
|
+
index_add_doc_i(self, doc);
|
219
|
+
self->analyzer = tmp_analyzer;
|
220
|
+
mutex_unlock(&self->store->ext_mutex);
|
221
|
+
}
|
222
|
+
|
223
|
+
void index_add_doc(Index *self, Document *doc)
|
224
|
+
{
|
225
|
+
mutex_lock(&self->store->ext_mutex);
|
226
|
+
index_add_doc_i(self, doc);
|
227
|
+
mutex_unlock(&self->store->ext_mutex);
|
228
|
+
}
|
229
|
+
|
230
|
+
void index_add_string(Index *self, char *str, Analyzer *analyzer)
|
231
|
+
{
|
232
|
+
Document *doc = doc_create();
|
233
|
+
doc_add_field(doc, df_create(self->id_field, estrdup(str),
|
234
|
+
DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
|
235
|
+
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
236
|
+
else index_add_doc(self, doc);
|
237
|
+
doc_destroy(doc);
|
238
|
+
}
|
239
|
+
|
240
|
+
void index_add_array(Index *self, Array *ary, Analyzer *analyzer)
|
241
|
+
{
|
242
|
+
int i;
|
243
|
+
Document *doc = doc_create();
|
244
|
+
for (i = 0; i < ary->size; i++) {
|
245
|
+
doc_add_field(doc, df_create(self->id_field, estrdup(ary->elems[i]),
|
246
|
+
DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
|
247
|
+
}
|
248
|
+
if (analyzer) index_add_doc_a(self, doc, analyzer);
|
249
|
+
else index_add_doc(self, doc);
|
250
|
+
doc_destroy(doc);
|
251
|
+
}
|
252
|
+
|
253
|
+
Query *index_get_query(Index *self, char *qstr)
|
254
|
+
{
|
255
|
+
int i;
|
256
|
+
HashSet *all_fields;
|
257
|
+
ensure_searcher_open(self);
|
258
|
+
all_fields = self->ir->get_field_names(self->ir, IR_ALL);
|
259
|
+
for (i = 0; i < all_fields->size; i++)
|
260
|
+
hs_add(self->qp->all_fields, estrdup(all_fields->elems[i]));
|
261
|
+
hs_destroy(all_fields);
|
262
|
+
return qp_parse(self->qp, qstr);
|
263
|
+
}
|
264
|
+
|
265
|
+
TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
|
266
|
+
int num_docs, Filter *filter, Sort *sort)
|
267
|
+
{
|
268
|
+
Query *query;
|
269
|
+
TopDocs *td;
|
270
|
+
query = index_get_query(self, qstr); /* will ensure_searcher is open */
|
271
|
+
td = sea_search(self->sea, query, first_doc, num_docs, filter, sort);
|
272
|
+
query->destroy(query);
|
273
|
+
return td;
|
274
|
+
}
|
275
|
+
|
276
|
+
Document *index_get_doc(Index *self, int doc_num)
|
277
|
+
{
|
278
|
+
Document *doc;
|
279
|
+
ensure_reader_open(self);
|
280
|
+
doc = self->ir->get_doc(self->ir, doc_num);
|
281
|
+
return doc;
|
282
|
+
}
|
283
|
+
|
284
|
+
Document *index_get_doc_ts(Index *self, int doc_num)
|
285
|
+
{
|
286
|
+
Document *doc;
|
287
|
+
mutex_lock(&self->store->ext_mutex);
|
288
|
+
doc = index_get_doc(self, doc_num);
|
289
|
+
mutex_unlock(&self->store->ext_mutex);
|
290
|
+
return doc;
|
291
|
+
}
|
292
|
+
|
293
|
+
int index_term_id(Index *self, Term *term)
|
294
|
+
{
|
295
|
+
TermDocEnum *tde;
|
296
|
+
int doc_num = -1;
|
297
|
+
ensure_reader_open(self);
|
298
|
+
tde = ir_term_docs_for(self->ir, term);
|
299
|
+
if (tde->next(tde)) {
|
300
|
+
doc_num = tde->doc_num(tde);
|
301
|
+
}
|
302
|
+
tde->close(tde);
|
303
|
+
return doc_num;
|
304
|
+
}
|
305
|
+
|
306
|
+
Document *index_get_doc_term(Index *self, Term *term)
|
307
|
+
{
|
308
|
+
Document *doc = NULL;
|
309
|
+
TermDocEnum *tde;
|
310
|
+
mutex_lock(&self->store->ext_mutex);
|
311
|
+
ensure_reader_open(self);
|
312
|
+
tde = ir_term_docs_for(self->ir, term);
|
313
|
+
if (tde->next(tde)) {
|
314
|
+
doc = index_get_doc(self, tde->doc_num(tde));
|
315
|
+
tde->close(tde);
|
316
|
+
}
|
317
|
+
mutex_unlock(&self->store->ext_mutex);
|
318
|
+
return doc;
|
319
|
+
}
|
320
|
+
|
321
|
+
Document *index_get_doc_id(Index *self, char *id)
|
322
|
+
{
|
323
|
+
Term t;
|
324
|
+
t.field = self->id_field;
|
325
|
+
t.text = id;
|
326
|
+
return index_get_doc_term(self, &t);
|
327
|
+
}
|
328
|
+
|
329
|
+
void index_delete(Index *self, int doc_num)
|
330
|
+
{
|
331
|
+
mutex_lock(&self->store->ext_mutex);
|
332
|
+
ensure_reader_open(self);
|
333
|
+
ir_delete_doc(self->ir, doc_num);
|
334
|
+
AUTOFLUSH_IR;
|
335
|
+
mutex_unlock(&self->store->ext_mutex);
|
336
|
+
}
|
337
|
+
|
338
|
+
void index_delete_term(Index *self, Term *term)
|
339
|
+
{
|
340
|
+
TermDocEnum *tde;
|
341
|
+
mutex_lock(&self->store->ext_mutex);
|
342
|
+
ensure_reader_open(self);
|
343
|
+
tde = ir_term_docs_for(self->ir, term);
|
344
|
+
while (tde->next(tde)) {
|
345
|
+
ir_delete_doc(self->ir, tde->doc_num(tde));
|
346
|
+
AUTOFLUSH_IR;
|
347
|
+
}
|
348
|
+
tde->close(tde);
|
349
|
+
mutex_unlock(&self->store->ext_mutex);
|
350
|
+
}
|
351
|
+
|
352
|
+
void index_delete_id(Index *self, char *id)
|
353
|
+
{
|
354
|
+
Term t;
|
355
|
+
t.field = self->id_field;
|
356
|
+
t.text = id;
|
357
|
+
index_delete_term(self, &t);
|
358
|
+
}
|
359
|
+
|
360
|
+
static void index_qdel_i(Searcher *sea, int doc_num, void *arg)
|
361
|
+
{
|
362
|
+
ir_delete_doc(sea->ir, doc_num);
|
363
|
+
}
|
364
|
+
|
365
|
+
void index_delete_query(Index *self, Query *q, Filter *f)
|
366
|
+
{
|
367
|
+
mutex_lock(&self->store->ext_mutex);
|
368
|
+
ensure_searcher_open(self);
|
369
|
+
sea_search_each(self->sea, q, f, &index_qdel_i, NULL);
|
370
|
+
AUTOFLUSH_IR;
|
371
|
+
mutex_unlock(&self->store->ext_mutex);
|
372
|
+
}
|
373
|
+
|
374
|
+
void index_delete_query_str(Index *self, char *qstr, Filter *f)
|
375
|
+
{
|
376
|
+
Query *q = index_get_query(self, qstr);
|
377
|
+
index_delete_query(self, q, f);
|
378
|
+
q->destroy(q);
|
379
|
+
}
|
380
|
+
|
381
|
+
Explanation *index_explain(Index *self, Query *q, int doc_num)
|
382
|
+
{
|
383
|
+
Explanation *expl;
|
384
|
+
mutex_lock(&self->store->ext_mutex);
|
385
|
+
ensure_searcher_open(self);
|
386
|
+
expl = sea_explain(self->sea, q, doc_num);
|
387
|
+
mutex_unlock(&self->store->ext_mutex);
|
388
|
+
return expl;
|
389
|
+
}
|