ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
data/ext/hash.h ADDED
@@ -0,0 +1,80 @@
1
+ #ifndef FRT_HASH_H
2
+ #define FRT_HASH_H
3
+
4
+ #include "global.h"
5
+
6
+ #define NUM_ENTRIES 256
7
+ #define MULTIPLIER 31
8
+
9
+ typedef struct HashEntry {
10
+ char *name;
11
+ void *value;
12
+ struct HashEntry *next;
13
+ } HashEntry;
14
+
15
+ HashEntry **ht_create();
16
+ int ht_count(HashEntry **ht);
17
+ void ht_destroy(HashEntry **ht);
18
+ void ht_destroy_all(HashEntry **ht, void (*fn)(void *));
19
+ void ht_set(HashEntry **ht, char *name, void *value);
20
+ void *ht_get(HashEntry **ht, char *name);
21
+ void *ht_delete(HashEntry **ht, char *name);
22
+
23
+ /****************************************************************************
24
+ *
25
+ * HshTable
26
+ *
27
+ ****************************************************************************/
28
+
29
+ #define Hsh_MINSIZE 8
30
+ #define SLOW_DOWN 50000 // stop increasing the hash table so quickly to
31
+ // conserve memory
32
+ extern char *dummy_key;
33
+ enum {
34
+ HASH_KEY_DOES_NOT_EXIST = 0,
35
+ HASH_KEY_SAME = 1,
36
+ HASH_KEY_EQUAL = 2
37
+ };
38
+
39
+ typedef struct {
40
+ int hash; /* cached hash code of key */
41
+ void *key;
42
+ void *value;
43
+ } HshEntry;
44
+
45
+ typedef struct HshTable {
46
+ int fill; /* # Active + # Dummy */
47
+ int used; /* # Active */
48
+ int mask;
49
+
50
+ /* table points to smalltable for small tables, else to
51
+ * additional malloc'ed memory. */
52
+ HshEntry *table;
53
+ HshEntry smalltable[Hsh_MINSIZE];
54
+ HshEntry *(*lookup)(struct HshTable *ht, const void *key);
55
+ unsigned int (*hash)(const void *key);
56
+ int (*eq)(const void *key1, const void *key2);
57
+ void (*free_key)(void *key);
58
+ void (*free_value)(void *value);
59
+ } HshTable;
60
+
61
+ HshTable *h_new_str(void (*free_key)(void *key), void (*free_value)(void *value));
62
+ HshTable *h_new(unsigned int (*hash)(const void *key),
63
+ int (*eq)(const void *key1, const void *key2),
64
+ void (*free_key)(void *key),
65
+ void (*free_value)(void *value));
66
+ void h_destroy(HshTable *ht);
67
+ void h_clear(HshTable *ht);
68
+
69
+ void *h_get(HshTable *ht, const void *key);
70
+ int h_del(HshTable *ht, const void *key);
71
+ void *h_rem(HshTable *ht, const void *key, bool del_key);
72
+ int h_set(HshTable *ht, const void *key, void *value);
73
+ int h_set_safe(HshTable *ht, const void *key, void *value);
74
+ int h_has_key(HshTable *ht, const void *key);
75
+ unsigned int str_hash(const char *const str);
76
+
77
+ void dummy_free(void *p);
78
+ HshEntry *h_lookup_str(HshTable *ht, register const void *key_p);
79
+
80
+ #endif
data/ext/hashset.c ADDED
@@ -0,0 +1,141 @@
1
+ #include <hashset.h>
2
+ #include <string.h>
3
+ #define HS_MIN_SIZE 4
4
+
5
+ int *imalloc(int i)
6
+ {
7
+ int *ip = ALLOC(int);
8
+ *ip = i;
9
+ return ip;
10
+ }
11
+
12
+ void hs_dummy_free(void *p){}
13
+
14
+ HashSet *hs_create(unsigned int (*hash)(const void *p),
15
+ int (*eq)(const void *p1, const void *p2),
16
+ void (*free_elem)(void *p))
17
+ {
18
+ HashSet *hs = ALLOC(HashSet);
19
+ hs->ht = h_new(hash, eq, NULL, &efree);
20
+ hs->elems = NULL;
21
+ hs->capa = hs->size = 0;
22
+ if (free_elem == NULL)
23
+ hs->free_elem = &hs_dummy_free;
24
+ else
25
+ hs->free_elem = free_elem;
26
+ return hs;
27
+ }
28
+
29
+ HashSet *hs_str_create(void (*free_elem)(void *p))
30
+ {
31
+ HashSet *hs = ALLOC(HashSet);
32
+ hs->ht = h_new_str(NULL, &efree);
33
+ hs->elems = NULL;
34
+ hs->capa = hs->size = 0;
35
+ if (free_elem == NULL)
36
+ hs->free_elem = &hs_dummy_free;
37
+ else
38
+ hs->free_elem = free_elem;
39
+ return hs;
40
+ }
41
+
42
+ void hs_destroy(void *p)
43
+ {
44
+ HashSet *hs = (HashSet *)p;
45
+ h_destroy(hs->ht);
46
+ free(hs->elems);
47
+ free(hs);
48
+ }
49
+
50
+ void hs_clear(HashSet *self)
51
+ {
52
+ int i;
53
+ for (i = self->size - 1; i >= 0; i--)
54
+ hs_del(self, self->elems[i]);
55
+ }
56
+
57
+ void hs_destroy_all(void *p)
58
+ {
59
+ int i;
60
+ HashSet *hs = (HashSet *)p;
61
+ if (hs->free_elem != &dummy_free)
62
+ for (i = 0; i < hs->size; i++)
63
+ hs->free_elem(hs->elems[i]);
64
+ hs_destroy(p);
65
+ }
66
+
67
+ int hs_add(HashSet *hs, void *elem)
68
+ {
69
+ int has_elem = h_has_key(hs->ht, elem);
70
+ //printf("has_elem = %d %d:%d\n", has_elem, HASH_KEY_EQUAL, HASH_KEY_SAME);
71
+ if (has_elem == HASH_KEY_EQUAL) {
72
+ // We don't want to keep two of the same elem so free if necessary
73
+ hs->free_elem(elem);
74
+ } else if (has_elem == HASH_KEY_SAME) {
75
+ // No need to do anything
76
+ } else {
77
+ // add the elem to the array, resizing if necessary
78
+ if (hs->size >= hs->capa) {
79
+ if (hs->capa == 0)
80
+ hs->capa = HS_MIN_SIZE;
81
+ else
82
+ hs->capa *= 2;
83
+ REALLOC_N(hs->elems, void *, hs->capa);
84
+ }
85
+ hs->elems[hs->size] = elem;
86
+ h_set(hs->ht, elem, imalloc(hs->size));
87
+ hs->size++;
88
+ }
89
+ return has_elem;
90
+ }
91
+
92
+ int hs_del(HashSet *hs, void *elem)
93
+ {
94
+ void *tmp_elem = hs_rem(hs, elem);
95
+ if (tmp_elem != NULL) {
96
+ hs->free_elem(tmp_elem);
97
+ return 1;
98
+ } else {
99
+ return 0;
100
+ }
101
+ }
102
+
103
+ void *hs_rem(HashSet *hs, void *elem)
104
+ {
105
+ void *ret_elem;
106
+ int *index = (int *)h_get(hs->ht, elem);
107
+ if (index == NULL) {
108
+ return NULL;
109
+ } else {
110
+ int i = *index;
111
+ ret_elem = hs->elems[i];
112
+ h_del(hs->ht, elem);
113
+ hs->size--;
114
+ memmove(&hs->elems[i], &hs->elems[i+1], sizeof(void *) * (hs->size - i));
115
+ return ret_elem;
116
+ }
117
+ }
118
+
119
+ int hs_exists(HashSet *hs, void *elem)
120
+ {
121
+ return h_has_key(hs->ht, elem);
122
+ }
123
+
124
+ HashSet *hs_merge(HashSet *hs, HashSet *other)
125
+ {
126
+ int i;
127
+ for (i = 0; i < other->size; i++) {
128
+ hs_add(hs, other->elems[i]);
129
+ }
130
+ // Now free the other hashset. It is no longer needed. No need, however, to
131
+ // delete the elements as they are in the new hash set
132
+ hs_destroy(other);
133
+ return hs;
134
+ }
135
+
136
+ void *hs_orig(HashSet *hs, void *elem)
137
+ {
138
+ int *i = h_get(hs->ht, elem);
139
+ if (i) return hs->elems[*i];
140
+ else return NULL;
141
+ }
data/ext/hashset.h ADDED
@@ -0,0 +1,37 @@
1
+ #ifndef FRT_HASHSET_H
2
+ #define FRT_HASHSET_H
3
+
4
+ #include "hash.h"
5
+ #include "array.h"
6
+ #include "global.h"
7
+
8
+ typedef struct HashSet {
9
+ int capa;
10
+ int size;
11
+ void **elems;
12
+ HshTable *ht;
13
+ void (*free_elem)(void *p);
14
+ } HashSet;
15
+
16
+ HashSet *hs_create(unsigned int (*hash)(const void *p),
17
+ int (*eq)(const void *p1, const void *p2),
18
+ void (*free_elem)(void *p));
19
+ HashSet *hs_str_create(void (*free_elem)(void *p));
20
+ void hs_destroy(void *p);
21
+ void hs_destroy_all(void *p);
22
+ int hs_add(HashSet *hs, void *elem);
23
+ int hs_del(HashSet *hs, void *elem);
24
+ void *hs_rem(HashSet *hs, void *elem);
25
+ int hs_exists(HashSet *hs, void *elem);
26
+ HashSet *hs_merge(HashSet *hs, HashSet *other);
27
+ void *hs_orig(HashSet *hs, void *elem);
28
+ void hs_clear(HashSet *self);
29
+
30
+ // TODO: finish these functions.
31
+ //int hs_osf(HashSet *hs, void *elem);
32
+ //HashSet hs_or(HashSet *hs1, HashSet *h2);
33
+ //HashSet hs_excl_or(HashSet *hs1, HashSet *h2);
34
+ //HashSet hs_and(HashSet *hs1, HashSet *h2);
35
+ //HashSet hs_mask(HashSet *hs1, HashSet *h2);
36
+
37
+ #endif
data/ext/helper.c ADDED
@@ -0,0 +1,11 @@
1
+ #include <string.h>
2
+
3
+ inline int hlp_string_diff(register const char *const s1,
4
+ register const char *const s2)
5
+ {
6
+ register int i = 0;
7
+ while (s1[i] && (s1[i] == s2[i])) {
8
+ i++;
9
+ }
10
+ return i;
11
+ }
data/ext/helper.h ADDED
@@ -0,0 +1,5 @@
1
+ #ifndef FRT_HELPER_H
2
+ #define FRT_HELPER_H
3
+
4
+ int hlp_string_diff(char *s1, char *s2);
5
+ #endif
data/ext/inc/lang.h ADDED
@@ -0,0 +1,41 @@
1
+ #ifndef FRT_LANG_H
2
+ #define FRT_LANG_H
3
+
4
+ #include <ruby.h>
5
+
6
+ #define FERRET_EXT
7
+
8
+ #define MAX_ERROR_LEN 2048
9
+ #define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
10
+ extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
11
+ extern void weprintf(const char *fmt, ...);
12
+ extern char *progname(void);
13
+ extern void setprogname(const char *str);
14
+
15
+ extern VALUE cQueryParseException;
16
+
17
+ #define ERROR rb_eException
18
+ #define IO_ERROR rb_eIOError
19
+ #define ARG_ERROR rb_eArgError
20
+ #define EOF_ERROR rb_eEOFError
21
+ #define UNSUPPORTED_ERROR rb_eNotImpError
22
+ #define STATE_ERROR rb_eException
23
+ #define PARSE_ERROR cQueryParseException
24
+ #define MEM_ERROR rb_eNoMemError
25
+
26
+ typedef void * mutex_t;
27
+ typedef void * thread_key_t;
28
+ #define MUTEX_INITIALIZER NULL
29
+ #define MUTEX_RECURSIVE_INITIALIZER NULL
30
+ #define mutex_init(a, b)
31
+ #define mutex_lock(a)
32
+ #define mutex_trylock(a)
33
+ #define mutex_unlock(a)
34
+ #define mutex_destroy(a)
35
+ #define thread_key_create(a, b)
36
+ #define thread_key_delete(a)
37
+ #define thread_setspecific(a, b)
38
+ #define thread_getspecific(a) NULL
39
+ #define thread_exit(a)
40
+
41
+ #endif
data/ext/ind.c ADDED
@@ -0,0 +1,389 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ static const char *ID_STRING = "id";
5
+
6
+ #define INDEX_CLOSE_READER(self) do {\
7
+ if (self->sea) {\
8
+ sea_close(self->sea);\
9
+ self->sea = NULL;\
10
+ self->ir = NULL;\
11
+ } else if (self->ir) {\
12
+ ir_close(self->ir);\
13
+ self->ir = NULL;\
14
+ }\
15
+ } while (0)
16
+
17
+ #define AUTOFLUSH_IR if (self->auto_flush) ir_commit(self->ir);\
18
+ else self->has_writes = true
19
+
20
+ #define AUTOFLUSH_IW \
21
+ if (self->auto_flush) {\
22
+ iw_close(self->iw);\
23
+ self->iw = NULL;\
24
+ } else self->has_writes = true
25
+
26
+ void index_auto_flush_ir(Index *self)
27
+ {
28
+ AUTOFLUSH_IR;
29
+ }
30
+
31
+ void index_auto_flush_iw(Index *self)
32
+ {
33
+ AUTOFLUSH_IW;
34
+ }
35
+
36
+ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
37
+ bool create)
38
+ {
39
+ HashSet *all_fields = hs_str_create(&free);
40
+ Index *self = ALLOC(Index);
41
+ ZEROSET(self, Index, 1);
42
+ mutex_init(&self->mutex, NULL);
43
+ self->has_writes = false;
44
+ if (store) {
45
+ self->store = store;
46
+ self->close_store = false;
47
+ } else {
48
+ self->store = open_ram_store();
49
+ create = true;
50
+ self->close_store = true;
51
+ }
52
+ if (analyzer) {
53
+ self->analyzer = analyzer;
54
+ self->close_analyzer = false;
55
+ } else {
56
+ self->analyzer = standard_analyzer_create();
57
+ self->close_analyzer = true;
58
+ }
59
+ self->use_compound_file = true;
60
+
61
+ if (create) {
62
+ self->iw = iw_open(self->store, self->analyzer, create, false, false);
63
+ iw_close(self->iw);
64
+ self->iw = NULL;
65
+ }
66
+
67
+ /* options */
68
+ self->key = NULL;
69
+ self->id_field = (char *)ID_STRING;
70
+ self->def_field = (char *)ID_STRING;
71
+ self->auto_flush = false;
72
+
73
+ self->qp = qp_create(all_fields, def_fields, self->analyzer);
74
+ /* Index is a convenience class so set qp convenience options */
75
+ self->qp->allow_any_fields = true;
76
+ self->qp->clean_str = true;
77
+ self->qp->handle_parse_errors = true;
78
+
79
+ return self;
80
+ }
81
+
82
+ void index_destroy(Index *self)
83
+ {
84
+ mutex_destroy(&self->mutex);
85
+ INDEX_CLOSE_READER(self);
86
+ if (self->iw) iw_close(self->iw);
87
+ if (self->close_store) self->store->close(self->store);
88
+ if (self->close_analyzer) a_destroy(self->analyzer);
89
+ if (self->qp) qp_destroy(self->qp);
90
+ if (self->id_field != ((char *)ID_STRING)) free(self->id_field);
91
+ if (self->def_field != ((char *)ID_STRING)) free(self->def_field);
92
+ if (self->key) hs_destroy_all(self->key);
93
+ free(self);
94
+ }
95
+
96
+ void index_flush(Index *self)
97
+ {
98
+ if (self->ir) {
99
+ ir_commit(self->ir);
100
+ } else if (self->iw) {
101
+ iw_close(self->iw);
102
+ self->iw = NULL;
103
+ }
104
+ self->has_writes = false;
105
+ }
106
+ inline void ensure_writer_open(Index *self)
107
+ {
108
+ if (!self->iw) {
109
+ INDEX_CLOSE_READER(self);
110
+ self->iw = iw_open(self->store, self->analyzer, false, false, false);
111
+ self->iw->use_compound_file = self->use_compound_file;
112
+ } else {
113
+ self->iw->analyzer = self->analyzer; /* in case it has changed */
114
+ }
115
+ }
116
+
117
+ inline void ensure_reader_open(Index *self)
118
+ {
119
+ if (self->ir) {
120
+ if (!ir_is_latest(self->ir)) {
121
+ INDEX_CLOSE_READER(self);
122
+ self->ir = ir_open(self->store, false);
123
+ }
124
+ } else {
125
+ if (self->iw) {
126
+ iw_close(self->iw);
127
+ self->iw = NULL;
128
+ }
129
+ self->ir = ir_open(self->store, false);
130
+ }
131
+ }
132
+
133
+ inline void ensure_searcher_open(Index *self)
134
+ {
135
+ ensure_reader_open(self);
136
+ if (!self->sea) {
137
+ self->sea = sea_create(self->ir);
138
+ }
139
+ }
140
+
141
+ int index_size(Index *self)
142
+ {
143
+ int size;
144
+ mutex_lock(&self->store->ext_mutex);
145
+ ensure_reader_open(self);
146
+ size = self->ir->num_docs(self->ir);
147
+ mutex_unlock(&self->store->ext_mutex);
148
+ return size;
149
+ }
150
+
151
+ void index_optimize(Index *self)
152
+ {
153
+ mutex_lock(&self->store->ext_mutex);
154
+ ensure_writer_open(self);
155
+ iw_optimize(self->iw);
156
+ AUTOFLUSH_IW;
157
+ mutex_unlock(&self->store->ext_mutex);
158
+ }
159
+
160
+ bool index_has_del(Index *self)
161
+ {
162
+ bool has_del;
163
+ mutex_lock(&self->store->ext_mutex);
164
+ ensure_reader_open(self);
165
+ has_del = self->ir->has_deletions(self->ir);
166
+ mutex_unlock(&self->store->ext_mutex);
167
+ return has_del;
168
+ }
169
+
170
+ bool index_is_deleted(Index *self, int doc_num)
171
+ {
172
+ bool is_del;
173
+ mutex_lock(&self->store->ext_mutex);
174
+ ensure_reader_open(self);
175
+ is_del = self->ir->is_deleted(self->ir, doc_num);
176
+ mutex_unlock(&self->store->ext_mutex);
177
+ return is_del;
178
+ }
179
+
180
+ static void inline index_add_doc_i(Index *self, Document *doc)
181
+ {
182
+ /* If there is a key specified delete the document with the same key */
183
+ if (self->key) {
184
+ int i;
185
+ char *field;
186
+ DocField *df;
187
+ Query *q = bq_create(false);
188
+ TopDocs *td;
189
+ ensure_searcher_open(self);
190
+ for (i = 0; i < self->key->size; i++) {
191
+ field = self->key->elems[i];
192
+ df = doc_get_field(doc, field);
193
+ if (!df) continue;
194
+ bq_add_query(q, tq_create(term_create(field, df->data)), BC_MUST);
195
+ }
196
+ td = sea_search(self->sea, q, 0, 1, NULL, NULL);
197
+ if (td->total_hits > 1) {
198
+ td_destroy(td);
199
+ eprintf(ARG_ERROR, "Tried to use a key that was not unique");
200
+ } else if (td->total_hits == 1) {
201
+ ir_delete_doc(self->ir, td->hits[0]->doc);
202
+ }
203
+ q->destroy(q);
204
+ td_destroy(td);
205
+ }
206
+ ensure_writer_open(self);
207
+ iw_add_doc(self->iw, doc);
208
+ AUTOFLUSH_IW;
209
+ }
210
+
211
+ void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
212
+ {
213
+ Analyzer *tmp_analyzer;
214
+ mutex_lock(&self->store->ext_mutex);
215
+ tmp_analyzer = self->analyzer;
216
+ self->analyzer = analyzer;
217
+ iw_add_doc(self->iw, doc);
218
+ index_add_doc_i(self, doc);
219
+ self->analyzer = tmp_analyzer;
220
+ mutex_unlock(&self->store->ext_mutex);
221
+ }
222
+
223
+ void index_add_doc(Index *self, Document *doc)
224
+ {
225
+ mutex_lock(&self->store->ext_mutex);
226
+ index_add_doc_i(self, doc);
227
+ mutex_unlock(&self->store->ext_mutex);
228
+ }
229
+
230
+ void index_add_string(Index *self, char *str, Analyzer *analyzer)
231
+ {
232
+ Document *doc = doc_create();
233
+ doc_add_field(doc, df_create(self->id_field, estrdup(str),
234
+ DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
235
+ if (analyzer) index_add_doc_a(self, doc, analyzer);
236
+ else index_add_doc(self, doc);
237
+ doc_destroy(doc);
238
+ }
239
+
240
+ void index_add_array(Index *self, Array *ary, Analyzer *analyzer)
241
+ {
242
+ int i;
243
+ Document *doc = doc_create();
244
+ for (i = 0; i < ary->size; i++) {
245
+ doc_add_field(doc, df_create(self->id_field, estrdup(ary->elems[i]),
246
+ DF_STORE_YES, DF_INDEX_TOKENIZED, DF_TERM_VECTOR_NO));
247
+ }
248
+ if (analyzer) index_add_doc_a(self, doc, analyzer);
249
+ else index_add_doc(self, doc);
250
+ doc_destroy(doc);
251
+ }
252
+
253
+ Query *index_get_query(Index *self, char *qstr)
254
+ {
255
+ int i;
256
+ HashSet *all_fields;
257
+ ensure_searcher_open(self);
258
+ all_fields = self->ir->get_field_names(self->ir, IR_ALL);
259
+ for (i = 0; i < all_fields->size; i++)
260
+ hs_add(self->qp->all_fields, estrdup(all_fields->elems[i]));
261
+ hs_destroy(all_fields);
262
+ return qp_parse(self->qp, qstr);
263
+ }
264
+
265
+ TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
266
+ int num_docs, Filter *filter, Sort *sort)
267
+ {
268
+ Query *query;
269
+ TopDocs *td;
270
+ query = index_get_query(self, qstr); /* will ensure_searcher is open */
271
+ td = sea_search(self->sea, query, first_doc, num_docs, filter, sort);
272
+ query->destroy(query);
273
+ return td;
274
+ }
275
+
276
+ Document *index_get_doc(Index *self, int doc_num)
277
+ {
278
+ Document *doc;
279
+ ensure_reader_open(self);
280
+ doc = self->ir->get_doc(self->ir, doc_num);
281
+ return doc;
282
+ }
283
+
284
+ Document *index_get_doc_ts(Index *self, int doc_num)
285
+ {
286
+ Document *doc;
287
+ mutex_lock(&self->store->ext_mutex);
288
+ doc = index_get_doc(self, doc_num);
289
+ mutex_unlock(&self->store->ext_mutex);
290
+ return doc;
291
+ }
292
+
293
+ int index_term_id(Index *self, Term *term)
294
+ {
295
+ TermDocEnum *tde;
296
+ int doc_num = -1;
297
+ ensure_reader_open(self);
298
+ tde = ir_term_docs_for(self->ir, term);
299
+ if (tde->next(tde)) {
300
+ doc_num = tde->doc_num(tde);
301
+ }
302
+ tde->close(tde);
303
+ return doc_num;
304
+ }
305
+
306
+ Document *index_get_doc_term(Index *self, Term *term)
307
+ {
308
+ Document *doc = NULL;
309
+ TermDocEnum *tde;
310
+ mutex_lock(&self->store->ext_mutex);
311
+ ensure_reader_open(self);
312
+ tde = ir_term_docs_for(self->ir, term);
313
+ if (tde->next(tde)) {
314
+ doc = index_get_doc(self, tde->doc_num(tde));
315
+ tde->close(tde);
316
+ }
317
+ mutex_unlock(&self->store->ext_mutex);
318
+ return doc;
319
+ }
320
+
321
+ Document *index_get_doc_id(Index *self, char *id)
322
+ {
323
+ Term t;
324
+ t.field = self->id_field;
325
+ t.text = id;
326
+ return index_get_doc_term(self, &t);
327
+ }
328
+
329
+ void index_delete(Index *self, int doc_num)
330
+ {
331
+ mutex_lock(&self->store->ext_mutex);
332
+ ensure_reader_open(self);
333
+ ir_delete_doc(self->ir, doc_num);
334
+ AUTOFLUSH_IR;
335
+ mutex_unlock(&self->store->ext_mutex);
336
+ }
337
+
338
+ void index_delete_term(Index *self, Term *term)
339
+ {
340
+ TermDocEnum *tde;
341
+ mutex_lock(&self->store->ext_mutex);
342
+ ensure_reader_open(self);
343
+ tde = ir_term_docs_for(self->ir, term);
344
+ while (tde->next(tde)) {
345
+ ir_delete_doc(self->ir, tde->doc_num(tde));
346
+ AUTOFLUSH_IR;
347
+ }
348
+ tde->close(tde);
349
+ mutex_unlock(&self->store->ext_mutex);
350
+ }
351
+
352
+ void index_delete_id(Index *self, char *id)
353
+ {
354
+ Term t;
355
+ t.field = self->id_field;
356
+ t.text = id;
357
+ index_delete_term(self, &t);
358
+ }
359
+
360
+ static void index_qdel_i(Searcher *sea, int doc_num, void *arg)
361
+ {
362
+ ir_delete_doc(sea->ir, doc_num);
363
+ }
364
+
365
+ void index_delete_query(Index *self, Query *q, Filter *f)
366
+ {
367
+ mutex_lock(&self->store->ext_mutex);
368
+ ensure_searcher_open(self);
369
+ sea_search_each(self->sea, q, f, &index_qdel_i, NULL);
370
+ AUTOFLUSH_IR;
371
+ mutex_unlock(&self->store->ext_mutex);
372
+ }
373
+
374
+ void index_delete_query_str(Index *self, char *qstr, Filter *f)
375
+ {
376
+ Query *q = index_get_query(self, qstr);
377
+ index_delete_query(self, q, f);
378
+ q->destroy(q);
379
+ }
380
+
381
+ Explanation *index_explain(Index *self, Query *q, int doc_num)
382
+ {
383
+ Explanation *expl;
384
+ mutex_lock(&self->store->ext_mutex);
385
+ ensure_searcher_open(self);
386
+ expl = sea_explain(self->sea, q, doc_num);
387
+ mutex_unlock(&self->store->ext_mutex);
388
+ return expl;
389
+ }