ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/hash.h ADDED
@@ -0,0 +1,80 @@
1
+ #ifndef FRT_HASH_H
2
+ #define FRT_HASH_H
3
+
4
+ #include "global.h"
5
+
6
+ #define NUM_ENTRIES 256
7
+ #define MULTIPLIER 31
8
+
9
+ typedef struct HashEntry {
10
+ char *name;
11
+ void *value;
12
+ struct HashEntry *next;
13
+ } HashEntry;
14
+
15
+ HashEntry **ht_create();
16
+ int ht_count(HashEntry **ht);
17
+ void ht_destroy(HashEntry **ht);
18
+ void ht_destroy_all(HashEntry **ht, void (*fn)(void *));
19
+ void ht_set(HashEntry **ht, char *name, void *value);
20
+ void *ht_get(HashEntry **ht, char *name);
21
+ void *ht_delete(HashEntry **ht, char *name);
22
+
23
+ /****************************************************************************
24
+ *
25
+ * HshTable
26
+ *
27
+ ****************************************************************************/
28
+
29
+ #define Hsh_MINSIZE 8
30
+ #define SLOW_DOWN 50000 // stop increasing the hash table so quickly to
31
+ // conserve memory
32
+ extern char *dummy_key;
33
+ enum {
34
+ HASH_KEY_DOES_NOT_EXIST = 0,
35
+ HASH_KEY_SAME = 1,
36
+ HASH_KEY_EQUAL = 2
37
+ };
38
+
39
+ typedef struct {
40
+ int hash; /* cached hash code of key */
41
+ void *key;
42
+ void *value;
43
+ } HshEntry;
44
+
45
+ typedef struct HshTable {
46
+ int fill; /* # Active + # Dummy */
47
+ int used; /* # Active */
48
+ int mask;
49
+
50
+ /* table points to smalltable for small tables, else to
51
+ * additional malloc'ed memory. */
52
+ HshEntry *table;
53
+ HshEntry smalltable[Hsh_MINSIZE];
54
+ HshEntry *(*lookup)(struct HshTable *ht, const void *key);
55
+ unsigned int (*hash)(const void *key);
56
+ int (*eq)(const void *key1, const void *key2);
57
+ void (*free_key)(void *key);
58
+ void (*free_value)(void *value);
59
+ } HshTable;
60
+
61
+ HshTable *h_new_str(void (*free_key)(void *key), void (*free_value)(void *value));
62
+ HshTable *h_new(unsigned int (*hash)(const void *key),
63
+ int (*eq)(const void *key1, const void *key2),
64
+ void (*free_key)(void *key),
65
+ void (*free_value)(void *value));
66
+ void h_destroy(HshTable *ht);
67
+ void h_clear(HshTable *ht);
68
+
69
+ void *h_get(HshTable *ht, const void *key);
70
+ int h_del(HshTable *ht, const void *key);
71
+ void *h_rem(HshTable *ht, const void *key, bool del_key);
72
+ int h_set(HshTable *ht, const void *key, void *value);
73
+ int h_set_safe(HshTable *ht, const void *key, void *value);
74
+ int h_has_key(HshTable *ht, const void *key);
75
+ unsigned int str_hash(const char *const str);
76
+
77
+ void dummy_free(void *p);
78
+ HshEntry *h_lookup_str(HshTable *ht, register const void *key_p);
79
+
80
+ #endif
data/ext/hashset.c ADDED
@@ -0,0 +1,141 @@
1
+ #include <hashset.h>
2
+ #include <string.h>
3
+ #define HS_MIN_SIZE 4
4
+
5
+ int *imalloc(int i)
6
+ {
7
+ int *ip = ALLOC(int);
8
+ *ip = i;
9
+ return ip;
10
+ }
11
+
12
+ void hs_dummy_free(void *p){}
13
+
14
+ HashSet *hs_create(unsigned int (*hash)(const void *p),
15
+ int (*eq)(const void *p1, const void *p2),
16
+ void (*free_elem)(void *p))
17
+ {
18
+ HashSet *hs = ALLOC(HashSet);
19
+ hs->ht = h_new(hash, eq, NULL, &efree);
20
+ hs->elems = NULL;
21
+ hs->capa = hs->size = 0;
22
+ if (free_elem == NULL)
23
+ hs->free_elem = &hs_dummy_free;
24
+ else
25
+ hs->free_elem = free_elem;
26
+ return hs;
27
+ }
28
+
29
+ HashSet *hs_str_create(void (*free_elem)(void *p))
30
+ {
31
+ HashSet *hs = ALLOC(HashSet);
32
+ hs->ht = h_new_str(NULL, &efree);
33
+ hs->elems = NULL;
34
+ hs->capa = hs->size = 0;
35
+ if (free_elem == NULL)
36
+ hs->free_elem = &hs_dummy_free;
37
+ else
38
+ hs->free_elem = free_elem;
39
+ return hs;
40
+ }
41
+
42
+ void hs_destroy(void *p)
43
+ {
44
+ HashSet *hs = (HashSet *)p;
45
+ h_destroy(hs->ht);
46
+ free(hs->elems);
47
+ free(hs);
48
+ }
49
+
50
+ void hs_clear(HashSet *self)
51
+ {
52
+ int i;
53
+ for (i = self->size - 1; i >= 0; i--)
54
+ hs_del(self, self->elems[i]);
55
+ }
56
+
57
+ void hs_destroy_all(void *p)
58
+ {
59
+ int i;
60
+ HashSet *hs = (HashSet *)p;
61
+ if (hs->free_elem != &dummy_free)
62
+ for (i = 0; i < hs->size; i++)
63
+ hs->free_elem(hs->elems[i]);
64
+ hs_destroy(p);
65
+ }
66
+
67
+ int hs_add(HashSet *hs, void *elem)
68
+ {
69
+ int has_elem = h_has_key(hs->ht, elem);
70
+ //printf("has_elem = %d %d:%d\n", has_elem, HASH_KEY_EQUAL, HASH_KEY_SAME);
71
+ if (has_elem == HASH_KEY_EQUAL) {
72
+ // We don't want to keep two of the same elem so free if necessary
73
+ hs->free_elem(elem);
74
+ } else if (has_elem == HASH_KEY_SAME) {
75
+ // No need to do anything
76
+ } else {
77
+ // add the elem to the array, resizing if necessary
78
+ if (hs->size >= hs->capa) {
79
+ if (hs->capa == 0)
80
+ hs->capa = HS_MIN_SIZE;
81
+ else
82
+ hs->capa *= 2;
83
+ REALLOC_N(hs->elems, void *, hs->capa);
84
+ }
85
+ hs->elems[hs->size] = elem;
86
+ h_set(hs->ht, elem, imalloc(hs->size));
87
+ hs->size++;
88
+ }
89
+ return has_elem;
90
+ }
91
+
92
+ int hs_del(HashSet *hs, void *elem)
93
+ {
94
+ void *tmp_elem = hs_rem(hs, elem);
95
+ if (tmp_elem != NULL) {
96
+ hs->free_elem(tmp_elem);
97
+ return 1;
98
+ } else {
99
+ return 0;
100
+ }
101
+ }
102
+
103
+ void *hs_rem(HashSet *hs, void *elem)
104
+ {
105
+ void *ret_elem;
106
+ int *index = (int *)h_get(hs->ht, elem);
107
+ if (index == NULL) {
108
+ return NULL;
109
+ } else {
110
+ int i = *index;
111
+ ret_elem = hs->elems[i];
112
+ h_del(hs->ht, elem);
113
+ hs->size--;
114
+ memmove(&hs->elems[i], &hs->elems[i+1], sizeof(void *) * (hs->size - i));
115
+ return ret_elem;
116
+ }
117
+ }
118
+
119
+ int hs_exists(HashSet *hs, void *elem)
120
+ {
121
+ return h_has_key(hs->ht, elem);
122
+ }
123
+
124
+ HashSet *hs_merge(HashSet *hs, HashSet *other)
125
+ {
126
+ int i;
127
+ for (i = 0; i < other->size; i++) {
128
+ hs_add(hs, other->elems[i]);
129
+ }
130
+ // Now free the other hashset. It is no longer needed. No need, however, to
131
+ // delete the elements as they are in the new hash set
132
+ hs_destroy(other);
133
+ return hs;
134
+ }
135
+
136
+ void *hs_orig(HashSet *hs, void *elem)
137
+ {
138
+ int *i = h_get(hs->ht, elem);
139
+ if (i) return hs->elems[*i];
140
+ else return NULL;
141
+ }
data/ext/hashset.h ADDED
@@ -0,0 +1,37 @@
1
+ #ifndef FRT_HASHSET_H
2
+ #define FRT_HASHSET_H
3
+
4
+ #include "hash.h"
5
+ #include "array.h"
6
+ #include "global.h"
7
+
8
+ typedef struct HashSet {
9
+ int capa;
10
+ int size;
11
+ void **elems;
12
+ HshTable *ht;
13
+ void (*free_elem)(void *p);
14
+ } HashSet;
15
+
16
+ HashSet *hs_create(unsigned int (*hash)(const void *p),
17
+ int (*eq)(const void *p1, const void *p2),
18
+ void (*free_elem)(void *p));
19
+ HashSet *hs_str_create(void (*free_elem)(void *p));
20
+ void hs_destroy(void *p);
21
+ void hs_destroy_all(void *p);
22
+ int hs_add(HashSet *hs, void *elem);
23
+ int hs_del(HashSet *hs, void *elem);
24
+ void *hs_rem(HashSet *hs, void *elem);
25
+ int hs_exists(HashSet *hs, void *elem);
26
+ HashSet *hs_merge(HashSet *hs, HashSet *other);
27
+ void *hs_orig(HashSet *hs, void *elem);
28
+ void hs_clear(HashSet *self);
29
+
30
+ // TODO: finish these functions.
31
+ //int hs_osf(HashSet *hs, void *elem);
32
+ //HashSet hs_or(HashSet *hs1, HashSet *h2);
33
+ //HashSet hs_excl_or(HashSet *hs1, HashSet *h2);
34
+ //HashSet hs_and(HashSet *hs1, HashSet *h2);
35
+ //HashSet hs_mask(HashSet *hs1, HashSet *h2);
36
+
37
+ #endif
data/ext/helper.c ADDED
@@ -0,0 +1,11 @@
1
+ #include <string.h>
2
+
3
+ inline int hlp_string_diff(register const char *const s1,
4
+ register const char *const s2)
5
+ {
6
+ register int i = 0;
7
+ while (s1[i] && (s1[i] == s2[i])) {
8
+ i++;
9
+ }
10
+ return i;
11
+ }
data/ext/helper.h ADDED
@@ -0,0 +1,5 @@
1
+ #ifndef FRT_HELPER_H
2
+ #define FRT_HELPER_H
3
+
4
+ int hlp_string_diff(char *s1, char *s2);
5
+ #endif
data/ext/inc/lang.h ADDED
@@ -0,0 +1,41 @@
1
+ #ifndef FRT_LANG_H
2
+ #define FRT_LANG_H
3
+
4
+ #include <ruby.h>
5
+
6
+ #define FERRET_EXT
7
+
8
+ #define MAX_ERROR_LEN 2048
9
+ #define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
10
+ extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
11
+ extern void weprintf(const char *fmt, ...);
12
+ extern char *progname(void);
13
+ extern void setprogname(const char *str);
14
+
15
+ extern VALUE cQueryParseException;
16
+
17
+ #define ERROR rb_eException
18
+ #define IO_ERROR rb_eIOError
19
+ #define ARG_ERROR rb_eArgError
20
+ #define EOF_ERROR rb_eEOFError
21
+ #define UNSUPPORTED_ERROR rb_eNotImpError
22
+ #define STATE_ERROR rb_eException
23
+ #define PARSE_ERROR cQueryParseException
24
+ #define MEM_ERROR rb_eNoMemError
25
+
26
+ typedef void * mutex_t;
27
+ typedef void * thread_key_t;
28
+ #define MUTEX_INITIALIZER NULL
29
+ #define MUTEX_RECURSIVE_INITIALIZER NULL
30
+ #define mutex_init(a, b)
31
+ #define mutex_lock(a)
32
+ #define mutex_trylock(a)
33
+ #define mutex_unlock(a)
34
+ #define mutex_destroy(a)
35
+ #define thread_key_create(a, b)
36
+ #define thread_key_delete(a)
37
+ #define thread_setspecific(a, b)
38
+ #define thread_getspecific(a) NULL
39
+ #define thread_exit(a)
40
+
41
+ #endif
data/ext/ind.c ADDED
@@ -0,0 +1,389 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ static const char *ID_STRING = "id";
5
+
6
+ #define INDEX_CLOSE_READER(self) do {\
7
+ if (self->sea) {\
8
+ sea_close(self->sea);\
9
+ self->sea = NULL;\
10
+ self->ir = NULL;\
11
+ } else if (self->ir) {\
12
+ ir_close(self->ir);\
13
+ self->ir = NULL;\
14
+ }\
15
+ } while (0)
16
+
17
+ #define AUTOFLUSH_IR if (self->auto_flush) ir_commit(self->ir);\
18
+ else self->has_writes = true
19
+
20
+ #define AUTOFLUSH_IW \
21
+ if (self->auto_flush) {\
22
+ iw_close(self->iw);\
23
+ self->iw = NULL;\
24
+ } else self->has_writes = true
25
+
26
+ void index_auto_flush_ir(Index *self)
27
+ {
28
+ AUTOFLUSH_IR;
29
+ }
30
+
31
+ void index_auto_flush_iw(Index *self)
32
+ {
33
+ AUTOFLUSH_IW;
34
+ }
35
+
36
+ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
37
+ bool create)
38
+ {
39
+ HashSet *all_fields = hs_str_create(&free);
40
+ Index *self = ALLOC(Index);
41
+ ZEROSET(self, Index, 1);
42
+ mutex_init(&self->mutex, NULL);
43
+ self->has_writes = false;
44
+ if (store) {
45
+ self->store = store;
46
+ self->close_store = false;
47
+ } else {
48
+ self->store = open_ram_store();
49
+ create = true;
50
+ self->close_store = true;
51
+ }
52
+ if (analyzer) {
53
+ self->analyzer = analyzer;
54
+ self->close_analyzer = false;
55
+ } else {
56
+ self->analyzer = standard_analyzer_create();
57
+ self->close_analyzer = true;
58
+ }
59
+ self->use_compound_file = true;
60
+
61
+ if (create) {
62
+ self->iw = iw_open(self->store, self->analyzer, create, false, false);
63
+ iw_close(self->iw);
64
+ self->iw = NULL;
65
+ }
66
+
67
+ /* options */
68
+ self->key = NULL;
69
+ self->id_field = (char *)ID_STRING;
70
+ self->def_field = (char *)ID_STRING;
71
+ self->auto_flush = false;
72
+
73
+ self->qp = qp_create(all_fields, def_fields, self->analyzer);
74
+ /* Index is a convenience class so set qp convenience options */
75
+ self->qp->allow_any_fields = true;
76
+ self->qp->clean_str = true;
77
+ self->qp->handle_parse_errors = true;
78
+
79
+ return self;
80
+ }
81
+
82
+ void index_destroy(Index *self)
83
+ {
84
+ mutex_destroy(&self->mutex);
85
+ INDEX_CLOSE_READER(self);
86
+ if (self->iw) iw_close(self->iw);
87
+ if (self->close_store) self->store->close(self->store);
88
+ if (self->close_analyzer) a_destroy(self->analyzer);
89
+ if (self->qp) qp_destroy(self->qp);
90
+ if (self->id_field != ((char *)ID_STRING)) free(self->id_field);
91
+ if (self->def_field != ((char *)ID_STRING)) free(self->def_field);
92
+ if (self->key) hs_destroy_all(self->key);
93
+ free(self);
94
+ }
95
+
96
+ void index_flush(Index *self)
97
+ {
98
+ if (self->ir) {
99
+ ir_commit(self->ir);
100
+ } else if (self->iw) {
101
+ iw_close(self->iw);
102
+ self->iw = NULL;
103
+ }
104
+ self->has_writes = false;
105
+ }
106
+ inline void ensure_writer_open(Index *self)
107
+ {
108
+ if (!self->iw) {
109
+ INDEX_CLOSE_READER(self);
110
+ self->iw = iw_open(self->store, self->analyzer, false, false, false);
111
+ self->iw->use_compound_file = self->use_compound_file;
112
+ } else {
113
+ self->iw->analyzer = self->analyzer; /* in case it has changed */
114
+ }
115
+ }
116
+
117
+ inline void ensure_reader_open(Index *self)
118
+ {
119
+ if (self->ir) {
120
+ if (!ir_is_latest(self->ir)) {
121
+ INDEX_CLOSE_READER(self);
122
+ self->ir = ir_open(self->store, false);
123
+ }
124
+ } else {
125
+ if (self->iw) {
126
+ iw_close(self->iw);
127
+ self->iw = NULL;
128
+ }
129
+ self->ir = ir_open(self->store, false);
130
+ }
131
+ }
132
+
133
+ inline void ensure_searcher_open(Index *self)
134
+ {
135
+ ensure_reader_open(self);
136
+ if (!self->sea) {
137
+ self->sea = sea_create(self->ir);
138
+ }
139
+ }
140
+
141
+ int index_size(Index *self)
142
+ {
143
+ int size;
144
+ mutex_lock(&self->store->ext_mutex);
145
+ ensure_reader_open(self);
146
+ size = self->ir->num_docs(self->ir);
147
+ mutex_unlock(&self->store->ext_mutex);
148
+ return size;
149
+ }
150
+
151
+ void index_optimize(Index *self)
152
+ {
153
+ mutex_lock(&self->store->ext_mutex);
154
+ ensure_writer_open(self);
155
+ iw_optimize(self->iw);
156
+ AUTOFLUSH_IW;
157
+ mutex_unlock(&self->store->ext_mutex);
158
+ }
159
+
160
+ bool index_has_del(Index *self)
161
+ {
162
+ bool has_del;
163
+ mutex_lock(&self->store->ext_mutex);
164
+ ensure_reader_open(self);
165
+ has_del = self->ir->has_deletions(self->ir);
166
+ mutex_unlock(&self->store->ext_mutex);
167
+ return has_del;
168
+ }
169
+
170
+ bool index_is_deleted(Index *self, int doc_num)
171
+ {
172
+ bool is_del;
173
+ mutex_lock(&self->store->ext_mutex);
174
+ ensure_reader_open(self);
175
+ is_del = self->ir->is_deleted(self->ir, doc_num);
176
+ mutex_unlock(&self->store->ext_mutex);
177
+ return is_del;
178
+ }
179
+
180
+ static void inline index_add_doc_i(Index *self, Document *doc)
181
+ {
182
+ /* If there is a key specified delete the document with the same key */
183
+ if (self->key) {
184
+ int i;
185
+ char *field;
186
+ DocField *df;
187
+ Query *q = bq_create(false);
188
+ TopDocs *td;
189
+ ensure_searcher_open(self);
190
+ for (i = 0; i < self->key->size; i++) {
191
+ field = self->key->elems[i];
192
+ df = doc_get_field(doc, field);
193
+ if (!df) continue;
194
+ bq_add_query(q, tq_create(term_create(field, df->data)), BC_MUST);
195
+ }
196
+ td = sea_search(self->sea, q, 0, 1, NULL, NULL);
197
+ if (td->total_hits > 1) {
198
+ td_destroy(td);
199
+ eprintf(ARG_ERROR, "Tried to use a key that was not unique");
200
+ } else if (td->total_hits == 1) {
201
+ ir_delete_doc(self->ir, td->hits[0]->doc);
202
+ }
203
+ q->destroy(q);
204
+ td_destroy(td);
205
+ }
206
+ ensure_writer_open(self);
207
+ iw_add_doc(self->iw, doc);
208
+ AUTOFLUSH_IW;
209
+ }
210
+
211
+ void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
212
+ {
213
+ Analyzer *tmp_analyzer;
214
+ mutex_lock(&self->store->ext_mutex);
215
+ tmp_analyzer = self->analyzer;
216
+ self->analyzer = analyzer;
217
+ iw_add_doc(self->iw, doc);
218
+ index_add_doc_i(self, doc);
219
+ self->analyzer = tmp_analyzer;
220
+ mutex_unlock(&self->store->ext_mutex);
221
+ }
222
+
223
+ void index_add_doc(Index *self, Document *doc)
224
+ {
225
+ mutex_lock(&self->store->ext_mutex);
226
+ index_add_doc_i(self, doc);
227
+ mutex_unlock(&self->store->ext_mutex);
228
+ }
229
+
230
+ void index_add_string(Index *self, char *str, Analyzer *analyzer)
231
+ {
232
+ Document *doc = doc_create();
233
+ doc_add_field(doc, df_create(self->id_field, estrdup(str),
234
+ DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
235
+ if (analyzer) index_add_doc_a(self, doc, analyzer);
236
+ else index_add_doc(self, doc);
237
+ doc_destroy(doc);
238
+ }
239
+
240
+ void index_add_array(Index *self, Array *ary, Analyzer *analyzer)
241
+ {
242
+ int i;
243
+ Document *doc = doc_create();
244
+ for (i = 0; i < ary->size; i++) {
245
+ doc_add_field(doc, df_create(self->id_field, estrdup(ary->elems[i]),
246
+ DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
247
+ }
248
+ if (analyzer) index_add_doc_a(self, doc, analyzer);
249
+ else index_add_doc(self, doc);
250
+ doc_destroy(doc);
251
+ }
252
+
253
+ Query *index_get_query(Index *self, char *qstr)
254
+ {
255
+ int i;
256
+ HashSet *all_fields;
257
+ ensure_searcher_open(self);
258
+ all_fields = self->ir->get_field_names(self->ir, IR_ALL);
259
+ for (i = 0; i < all_fields->size; i++)
260
+ hs_add(self->qp->all_fields, estrdup(all_fields->elems[i]));
261
+ hs_destroy(all_fields);
262
+ return qp_parse(self->qp, qstr);
263
+ }
264
+
265
+ TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
266
+ int num_docs, Filter *filter, Sort *sort)
267
+ {
268
+ Query *query;
269
+ TopDocs *td;
270
+ query = index_get_query(self, qstr); /* will ensure_searcher is open */
271
+ td = sea_search(self->sea, query, first_doc, num_docs, filter, sort);
272
+ query->destroy(query);
273
+ return td;
274
+ }
275
+
276
+ Document *index_get_doc(Index *self, int doc_num)
277
+ {
278
+ Document *doc;
279
+ ensure_reader_open(self);
280
+ doc = self->ir->get_doc(self->ir, doc_num);
281
+ return doc;
282
+ }
283
+
284
+ Document *index_get_doc_ts(Index *self, int doc_num)
285
+ {
286
+ Document *doc;
287
+ mutex_lock(&self->store->ext_mutex);
288
+ doc = index_get_doc(self, doc_num);
289
+ mutex_unlock(&self->store->ext_mutex);
290
+ return doc;
291
+ }
292
+
293
+ int index_term_id(Index *self, Term *term)
294
+ {
295
+ TermDocEnum *tde;
296
+ int doc_num = -1;
297
+ ensure_reader_open(self);
298
+ tde = ir_term_docs_for(self->ir, term);
299
+ if (tde->next(tde)) {
300
+ doc_num = tde->doc_num(tde);
301
+ }
302
+ tde->close(tde);
303
+ return doc_num;
304
+ }
305
+
306
+ Document *index_get_doc_term(Index *self, Term *term)
307
+ {
308
+ Document *doc = NULL;
309
+ TermDocEnum *tde;
310
+ mutex_lock(&self->store->ext_mutex);
311
+ ensure_reader_open(self);
312
+ tde = ir_term_docs_for(self->ir, term);
313
+ if (tde->next(tde)) {
314
+ doc = index_get_doc(self, tde->doc_num(tde));
315
+ tde->close(tde);
316
+ }
317
+ mutex_unlock(&self->store->ext_mutex);
318
+ return doc;
319
+ }
320
+
321
+ Document *index_get_doc_id(Index *self, char *id)
322
+ {
323
+ Term t;
324
+ t.field = self->id_field;
325
+ t.text = id;
326
+ return index_get_doc_term(self, &t);
327
+ }
328
+
329
+ void index_delete(Index *self, int doc_num)
330
+ {
331
+ mutex_lock(&self->store->ext_mutex);
332
+ ensure_reader_open(self);
333
+ ir_delete_doc(self->ir, doc_num);
334
+ AUTOFLUSH_IR;
335
+ mutex_unlock(&self->store->ext_mutex);
336
+ }
337
+
338
+ void index_delete_term(Index *self, Term *term)
339
+ {
340
+ TermDocEnum *tde;
341
+ mutex_lock(&self->store->ext_mutex);
342
+ ensure_reader_open(self);
343
+ tde = ir_term_docs_for(self->ir, term);
344
+ while (tde->next(tde)) {
345
+ ir_delete_doc(self->ir, tde->doc_num(tde));
346
+ AUTOFLUSH_IR;
347
+ }
348
+ tde->close(tde);
349
+ mutex_unlock(&self->store->ext_mutex);
350
+ }
351
+
352
+ void index_delete_id(Index *self, char *id)
353
+ {
354
+ Term t;
355
+ t.field = self->id_field;
356
+ t.text = id;
357
+ index_delete_term(self, &t);
358
+ }
359
+
360
+ static void index_qdel_i(Searcher *sea, int doc_num, void *arg)
361
+ {
362
+ ir_delete_doc(sea->ir, doc_num);
363
+ }
364
+
365
+ void index_delete_query(Index *self, Query *q, Filter *f)
366
+ {
367
+ mutex_lock(&self->store->ext_mutex);
368
+ ensure_searcher_open(self);
369
+ sea_search_each(self->sea, q, f, &index_qdel_i, NULL);
370
+ AUTOFLUSH_IR;
371
+ mutex_unlock(&self->store->ext_mutex);
372
+ }
373
+
374
+ void index_delete_query_str(Index *self, char *qstr, Filter *f)
375
+ {
376
+ Query *q = index_get_query(self, qstr);
377
+ index_delete_query(self, q, f);
378
+ q->destroy(q);
379
+ }
380
+
381
+ Explanation *index_explain(Index *self, Query *q, int doc_num)
382
+ {
383
+ Explanation *expl;
384
+ mutex_lock(&self->store->ext_mutex);
385
+ ensure_searcher_open(self);
386
+ expl = sea_explain(self->sea, q, doc_num);
387
+ mutex_unlock(&self->store->ext_mutex);
388
+ return expl;
389
+ }