ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
data/ext/global.c
ADDED
@@ -0,0 +1,219 @@
|
|
1
|
+
#include "global.h"
|
2
|
+
#include <stdarg.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <errno.h>
|
6
|
+
#include <assert.h>
|
7
|
+
#include <math.h>
|
8
|
+
#include <ctype.h>
|
9
|
+
|
10
|
+
const char *EMPTY_STRING = "";
|
11
|
+
|
12
|
+
int min3(int a, int b, int c)
|
13
|
+
{
|
14
|
+
return MIN3(a, b, c);
|
15
|
+
}
|
16
|
+
|
17
|
+
int min(int a, int b)
|
18
|
+
{
|
19
|
+
return MIN(a, b);
|
20
|
+
}
|
21
|
+
|
22
|
+
int max3(int a, int b, int c)
|
23
|
+
{
|
24
|
+
return MAX3(a, b, c);
|
25
|
+
}
|
26
|
+
|
27
|
+
int max(int a, int b)
|
28
|
+
{
|
29
|
+
return MAX(a, b);
|
30
|
+
}
|
31
|
+
|
32
|
+
int scmp(const void *p1, const void *p2)
|
33
|
+
{
|
34
|
+
return strcmp(*(char **)p1, *(char **)p2);
|
35
|
+
}
|
36
|
+
|
37
|
+
int icmp_risky(const void *p1, const void *p2)
|
38
|
+
{
|
39
|
+
return (*(int *)p1) - *((int *)p2);
|
40
|
+
}
|
41
|
+
|
42
|
+
int icmp(const void *p1, const void *p2)
|
43
|
+
{
|
44
|
+
int i1 = *(int *)p1;
|
45
|
+
int i2 = *(int *)p2;
|
46
|
+
|
47
|
+
if (i1 > i2) return 1;
|
48
|
+
else if (i1 < i2) return -1;
|
49
|
+
return 0;
|
50
|
+
}
|
51
|
+
|
52
|
+
|
53
|
+
/* eprintf: print error message and exit */
|
54
|
+
void eprintf_old(const char *fmt, ...)
|
55
|
+
{
|
56
|
+
va_list args;
|
57
|
+
|
58
|
+
fflush(stdout);
|
59
|
+
if (progname() != NULL)
|
60
|
+
fprintf(stderr, "%s: ", progname());
|
61
|
+
|
62
|
+
va_start(args, fmt);
|
63
|
+
vfprintf(stderr, fmt, args);
|
64
|
+
va_end(args);
|
65
|
+
|
66
|
+
if (fmt[0] != '\0' && fmt[strlen(fmt)-1] == ':')
|
67
|
+
fprintf(stderr, " %s", strerror(errno));
|
68
|
+
fprintf(stderr, "\n");
|
69
|
+
exit(2); /* conventional value for failed execution */
|
70
|
+
}
|
71
|
+
|
72
|
+
/* eprintf: print error message and exit */
|
73
|
+
void eprintf_inner(char *file, int line_num, const char *etype, const char *fmt, ...)
|
74
|
+
{
|
75
|
+
va_list args;
|
76
|
+
|
77
|
+
fflush(stdout);
|
78
|
+
fprintf(stderr, "\n");
|
79
|
+
if (progname() != NULL)
|
80
|
+
fprintf(stderr, "%s: ", progname());
|
81
|
+
|
82
|
+
fprintf(stderr, "%s occured at <%s>:%d\n", etype, file, line_num);
|
83
|
+
va_start(args, fmt);
|
84
|
+
vfprintf(stderr, fmt, args);
|
85
|
+
va_end(args);
|
86
|
+
|
87
|
+
if (fmt[0] != '\0' && fmt[strlen(fmt)-1] == ':')
|
88
|
+
fprintf(stderr, " %s", strerror(errno));
|
89
|
+
fprintf(stderr, "\n");
|
90
|
+
exit(2); /* conventional value for failed execution */
|
91
|
+
}
|
92
|
+
|
93
|
+
/* weprintf: print error message and don't exit */
|
94
|
+
void weprintf(const char *fmt, ...)
|
95
|
+
{
|
96
|
+
va_list args;
|
97
|
+
|
98
|
+
fflush(stdout);
|
99
|
+
if (progname() != NULL)
|
100
|
+
fprintf(stderr, "%s: ", progname());
|
101
|
+
|
102
|
+
va_start(args, fmt);
|
103
|
+
vfprintf(stderr, fmt, args);
|
104
|
+
va_end(args);
|
105
|
+
|
106
|
+
if (fmt[0] != '\0' && fmt[strlen(fmt)-1] == ':')
|
107
|
+
fprintf(stderr, " %s", strerror(errno));
|
108
|
+
fprintf(stderr, "\n");
|
109
|
+
}
|
110
|
+
|
111
|
+
static char name[200]; /* program name for error msgs */
|
112
|
+
|
113
|
+
/* setprogname: set stored name of program */
|
114
|
+
void setprogname(const char *str)
|
115
|
+
{
|
116
|
+
strcpy(name, str);
|
117
|
+
}
|
118
|
+
|
119
|
+
char *progname(void)
|
120
|
+
{
|
121
|
+
return name;
|
122
|
+
}
|
123
|
+
|
124
|
+
/* concatenate two strings freeing the second */
|
125
|
+
char *estrcat(char *str1, char *str2)
|
126
|
+
{
|
127
|
+
int len1 = strlen(str1);
|
128
|
+
int len2 = strlen(str2);
|
129
|
+
REALLOC_N(str1, char, len1 + len2 + 3); // leave room for <CR>
|
130
|
+
memcpy(str1 + len1, str2, len2 + 1); // make sure '\0' copied too
|
131
|
+
free(str2);
|
132
|
+
return str1;
|
133
|
+
}
|
134
|
+
|
135
|
+
/* epstrdup: duplicate a string with a format, report if error */
|
136
|
+
char *epstrdup(const char *fmt, int len, ...)
|
137
|
+
{
|
138
|
+
va_list args;
|
139
|
+
len += strlen(fmt);
|
140
|
+
|
141
|
+
char *string = ALLOC_N(char, len + 1);
|
142
|
+
va_start(args, len);
|
143
|
+
vsprintf(string, fmt, args);
|
144
|
+
va_end(args);
|
145
|
+
|
146
|
+
return string;
|
147
|
+
}
|
148
|
+
|
149
|
+
/* estrdup: duplicate a string, report if error */
|
150
|
+
char *estrdup(const char *s)
|
151
|
+
{
|
152
|
+
char *t;
|
153
|
+
|
154
|
+
t = (char *)malloc(strlen(s) + 1);
|
155
|
+
if (t == NULL)
|
156
|
+
eprintf(MEM_ERROR, "estrdup(\"%.20s\") failed:", s);
|
157
|
+
|
158
|
+
strcpy(t, s);
|
159
|
+
return t;
|
160
|
+
}
|
161
|
+
|
162
|
+
/* emalloc: malloc and report if error */
|
163
|
+
void *emalloc(size_t size)
|
164
|
+
{
|
165
|
+
void *p;
|
166
|
+
p = malloc(size);
|
167
|
+
if (p == NULL)
|
168
|
+
eprintf(MEM_ERROR, "malloc of %u bytes failed:", size);
|
169
|
+
return p;
|
170
|
+
}
|
171
|
+
|
172
|
+
void *erealloc(void *ptr, size_t size)
|
173
|
+
{
|
174
|
+
void *p;
|
175
|
+
p = realloc(ptr, size);
|
176
|
+
if (p == NULL)
|
177
|
+
eprintf(MEM_ERROR, "malloc of %u bytes failed:", size);
|
178
|
+
return p;
|
179
|
+
}
|
180
|
+
|
181
|
+
/*
|
182
|
+
* The following method comes slightly modified from the ruby core flo_to_s in
|
183
|
+
* numeric.c. We use it so that tests comparing floats as strings will pass.
|
184
|
+
*/
|
185
|
+
char *dbl_to_s(char *buf, double num)
|
186
|
+
{
|
187
|
+
char *p, *e;
|
188
|
+
|
189
|
+
if (isinf(num)) {
|
190
|
+
return estrdup(num < 0 ? "-Infinity" : "Infinity");
|
191
|
+
} else if(isnan(num)) {
|
192
|
+
return estrdup("NaN");
|
193
|
+
}
|
194
|
+
|
195
|
+
sprintf(buf, "%#.7g", num);
|
196
|
+
if (!(e = strchr(buf, 'e'))) {
|
197
|
+
e = buf + strlen(buf);
|
198
|
+
}
|
199
|
+
if (!isdigit(e[-1])) {
|
200
|
+
/* reformat if ended with decimal point (ex 111111111111111.) */
|
201
|
+
sprintf(buf, "%#.6e", num);
|
202
|
+
if (!(e = strchr(buf, 'e'))) {
|
203
|
+
e = buf + strlen(buf);
|
204
|
+
}
|
205
|
+
}
|
206
|
+
p = e;
|
207
|
+
while (p[-1]=='0' && isdigit(p[-2]))
|
208
|
+
p--;
|
209
|
+
memmove(p, e, strlen(e)+1);
|
210
|
+
return buf;
|
211
|
+
}
|
212
|
+
|
213
|
+
void lower_str(char *str)
|
214
|
+
{
|
215
|
+
while (*str) {
|
216
|
+
*str = tolower(*str);
|
217
|
+
str++;
|
218
|
+
}
|
219
|
+
}
|
data/ext/global.h
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
#ifndef FRT_GLOBAL_H
|
2
|
+
#define FRT_GLOBAL_H
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <stdio.h>
|
6
|
+
#include <assert.h>
|
7
|
+
#include <pthread.h>
|
8
|
+
#include "lang.h"
|
9
|
+
|
10
|
+
//#define DEBUG
|
11
|
+
#define VALGRIND
|
12
|
+
|
13
|
+
#define false 0
|
14
|
+
#define true 1
|
15
|
+
|
16
|
+
typedef unsigned char bool;
|
17
|
+
typedef unsigned char uchar;
|
18
|
+
typedef unsigned int uint;
|
19
|
+
|
20
|
+
typedef void (*destroy_func_t)(void *p);
|
21
|
+
|
22
|
+
#define MAX_WORD_SIZE 255
|
23
|
+
#define MAX_PATH 1024
|
24
|
+
#define MAX_BUFFER_SIZE 1024
|
25
|
+
#define ARRAY_INIT_SIZE 4
|
26
|
+
|
27
|
+
#define NELEMS(array) sizeof(array)/sizeof(array[0])
|
28
|
+
|
29
|
+
#define ZEROSET(ptr, type, n) memset(ptr, 0, sizeof(type)*(n))
|
30
|
+
#define ALLOC_AND_ZERO_N(type,n) (type*)ZEROSET(emalloc(sizeof(type)*(n)), type, n)
|
31
|
+
|
32
|
+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
33
|
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
34
|
+
|
35
|
+
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
|
36
|
+
#define MAX3(a, b, c) ((a) > (b) ? ((a) > (c) ? (a) : (c)) : ((b) > (c) ? (b) : (c)))
|
37
|
+
|
38
|
+
#define RECAPA(self, len, capa, ptr, type) \
|
39
|
+
do {\
|
40
|
+
if (self->len >= self->capa) {\
|
41
|
+
if (self->capa) {\
|
42
|
+
self->capa *= 2;\
|
43
|
+
} else {\
|
44
|
+
self->capa = 4;\
|
45
|
+
}\
|
46
|
+
REALLOC_N(self->ptr, type, self->capa);\
|
47
|
+
}\
|
48
|
+
} while (0)
|
49
|
+
|
50
|
+
#define Jx fprintf(stderr,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
51
|
+
#define Xj fprintf(stdout,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
52
|
+
|
53
|
+
#define efree free
|
54
|
+
extern void *emalloc(size_t n);
|
55
|
+
extern void *erealloc(void *ptr, size_t n);
|
56
|
+
extern char *estrdup(const char *s);
|
57
|
+
extern char *epstrdup(const char *fmt, int len, ...);
|
58
|
+
extern char *estrcat(char *str, char *str_cat);
|
59
|
+
|
60
|
+
extern const char *EMPTY_STRING;
|
61
|
+
|
62
|
+
extern int scmp(const void *p1, const void *p2);
|
63
|
+
extern int icmp(const void *p1, const void *p2);
|
64
|
+
extern int icmp_risky(const void *p1, const void *p2);
|
65
|
+
|
66
|
+
int min(int a, int b);
|
67
|
+
int min3(int a, int b, int c);
|
68
|
+
int max(int a, int b);
|
69
|
+
int max3(int a, int b, int c);
|
70
|
+
|
71
|
+
char *dbl_to_s(char *buf, double num);
|
72
|
+
void lower_str(char *str);
|
73
|
+
#endif
|
data/ext/hash.c
ADDED
@@ -0,0 +1,446 @@
|
|
1
|
+
#include <hash.h>
|
2
|
+
#include <global.h>
|
3
|
+
#include <string.h>
|
4
|
+
|
5
|
+
unsigned int hash(const char *str)
|
6
|
+
{
|
7
|
+
register unsigned int h;
|
8
|
+
register unsigned char *p;
|
9
|
+
|
10
|
+
h = 0;
|
11
|
+
for (p = (unsigned char *) str; *p != '\0'; p++)
|
12
|
+
h = MULTIPLIER * h + *p;
|
13
|
+
|
14
|
+
return h % NUM_ENTRIES;
|
15
|
+
}
|
16
|
+
|
17
|
+
HashEntry **ht_create()
|
18
|
+
{
|
19
|
+
int i;
|
20
|
+
HashEntry **new_ht = ALLOC_N(HashEntry *, NUM_ENTRIES);
|
21
|
+
|
22
|
+
for (i = 0; i < NUM_ENTRIES; i++)
|
23
|
+
new_ht[i] = NULL;
|
24
|
+
|
25
|
+
return new_ht;
|
26
|
+
}
|
27
|
+
|
28
|
+
int ht_count(HashEntry **ht)
|
29
|
+
{
|
30
|
+
int i, cnt=0;
|
31
|
+
HashEntry *he;
|
32
|
+
for (i = 0; i < NUM_ENTRIES; i++) {
|
33
|
+
for (he = ht[i]; he != NULL; he = he->next) {
|
34
|
+
cnt++;
|
35
|
+
}
|
36
|
+
}
|
37
|
+
return cnt;
|
38
|
+
}
|
39
|
+
|
40
|
+
void ht_destroy_all(HashEntry **ht, void (*fn)(void *))
|
41
|
+
{
|
42
|
+
int i;
|
43
|
+
HashEntry *he, *he_tmp;
|
44
|
+
for (i = 0; i < NUM_ENTRIES; i++) {
|
45
|
+
for (he = ht[i]; he != NULL;) {
|
46
|
+
he_tmp = he;
|
47
|
+
he = he->next;
|
48
|
+
fn(he_tmp->value);
|
49
|
+
free(he_tmp->name);
|
50
|
+
free(he_tmp);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
free(ht);
|
54
|
+
}
|
55
|
+
|
56
|
+
void ht_destroy(HashEntry **ht)
|
57
|
+
{
|
58
|
+
int i;
|
59
|
+
HashEntry *he, *he_tmp;
|
60
|
+
for (i = 0; i < NUM_ENTRIES; i++) {
|
61
|
+
for (he = ht[i]; he != NULL;) {
|
62
|
+
he_tmp = he;
|
63
|
+
he = he->next;
|
64
|
+
free(he_tmp->name);
|
65
|
+
free(he_tmp);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
free(ht);
|
69
|
+
}
|
70
|
+
|
71
|
+
void *ht_get(HashEntry **ht, char *name)
|
72
|
+
{
|
73
|
+
HashEntry *he;
|
74
|
+
int h = hash(name);
|
75
|
+
|
76
|
+
for (he = ht[h]; he != NULL; he = he->next) {
|
77
|
+
if (strcmp(name, he->name) == 0) {
|
78
|
+
return he->value;
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
return NULL;
|
83
|
+
}
|
84
|
+
|
85
|
+
void ht_set(HashEntry **ht, char *name, void *value)
|
86
|
+
{
|
87
|
+
HashEntry *he;
|
88
|
+
int h = hash(name);
|
89
|
+
|
90
|
+
for (he = ht[h]; he != NULL; he = he->next) {
|
91
|
+
if (strcmp(name, he->name) == 0) {
|
92
|
+
he->value = value;
|
93
|
+
return;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
he = ALLOC(HashEntry);
|
98
|
+
he->name = estrdup(name);
|
99
|
+
he->value = value;
|
100
|
+
he->next = ht[h];
|
101
|
+
ht[h] = he;
|
102
|
+
return;
|
103
|
+
}
|
104
|
+
|
105
|
+
void *ht_delete(HashEntry **ht, char *name)
|
106
|
+
{
|
107
|
+
HashEntry *he, *tmp;
|
108
|
+
void *rval;
|
109
|
+
|
110
|
+
int h = hash(name);
|
111
|
+
|
112
|
+
if (ht[h] == NULL)
|
113
|
+
return NULL;
|
114
|
+
|
115
|
+
if (strcmp(ht[h]->name, name) == 0) {
|
116
|
+
tmp = ht[h];
|
117
|
+
ht[h] = tmp->next;
|
118
|
+
rval = tmp->value;
|
119
|
+
free(tmp->name);
|
120
|
+
free(tmp);
|
121
|
+
return rval;
|
122
|
+
}
|
123
|
+
|
124
|
+
for (he = ht[h]; he != NULL; he = he->next) {
|
125
|
+
if (he->next != NULL && strcmp(name, he->next->name) == 0) {
|
126
|
+
tmp = he->next;
|
127
|
+
he->next = tmp->next;
|
128
|
+
rval = tmp->value;
|
129
|
+
free(tmp->name);
|
130
|
+
free(tmp);
|
131
|
+
return rval;
|
132
|
+
}
|
133
|
+
}
|
134
|
+
return NULL;
|
135
|
+
}
|
136
|
+
|
137
|
+
/****************************************************************************
|
138
|
+
*
|
139
|
+
* HshTable
|
140
|
+
*
|
141
|
+
****************************************************************************/
|
142
|
+
|
143
|
+
char *dummy_key = "";
|
144
|
+
|
145
|
+
#define PERTURB_SHIFT 5
|
146
|
+
|
147
|
+
#ifdef VALGRIND
|
148
|
+
#define MAX_FREE_HASH_TABLES 0
|
149
|
+
#else
|
150
|
+
#define MAX_FREE_HASH_TABLES 80
|
151
|
+
#endif
|
152
|
+
|
153
|
+
static HshTable *free_hts[MAX_FREE_HASH_TABLES];
|
154
|
+
static int num_free_hts = 0;
|
155
|
+
|
156
|
+
inline unsigned int str_hash(const char *const str)
|
157
|
+
{
|
158
|
+
register unsigned int h=0;
|
159
|
+
register unsigned char *p = (unsigned char *)str;
|
160
|
+
|
161
|
+
for (; *p; p++)
|
162
|
+
h = 37 * h + *p;
|
163
|
+
|
164
|
+
return h;
|
165
|
+
}
|
166
|
+
|
167
|
+
HshEntry *h_lookup_str(HshTable *ht, register const void *key_p)
|
168
|
+
{
|
169
|
+
register char *key = (char *)key_p;
|
170
|
+
register unsigned int hash = str_hash(key);
|
171
|
+
register unsigned int perturb;
|
172
|
+
register int mask = ht->mask;
|
173
|
+
register HshEntry *he0 = ht->table;
|
174
|
+
register int i = hash & mask;
|
175
|
+
register HshEntry *he = &he0[i];
|
176
|
+
register HshEntry *freeslot;
|
177
|
+
|
178
|
+
if (he->key == NULL || he->key == key) {
|
179
|
+
he->hash = hash;
|
180
|
+
return he;
|
181
|
+
}
|
182
|
+
if (he->key == dummy_key) {
|
183
|
+
freeslot = he;
|
184
|
+
} else {
|
185
|
+
if ((he->hash == hash) && (strcmp(he->key, key) == 0)) {
|
186
|
+
return he;
|
187
|
+
}
|
188
|
+
freeslot = NULL;
|
189
|
+
}
|
190
|
+
|
191
|
+
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
|
192
|
+
i = (i << 2) + i + perturb + 1;
|
193
|
+
he = &he0[i & mask];
|
194
|
+
if (he->key == NULL) {
|
195
|
+
if (freeslot != NULL) he = freeslot;
|
196
|
+
he->hash = hash;
|
197
|
+
return he;
|
198
|
+
}
|
199
|
+
if (he->key == key
|
200
|
+
|| (he->hash == hash
|
201
|
+
&& he->key != dummy_key
|
202
|
+
&& strcmp(he->key, key) == 0))
|
203
|
+
return he;
|
204
|
+
if (he->key == dummy_key && freeslot == NULL)
|
205
|
+
freeslot = he;
|
206
|
+
}
|
207
|
+
if (he->key == NULL || he->key == key) {
|
208
|
+
he->hash = hash;
|
209
|
+
return he;
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
typedef int (*eq_func)(const void *key1, const void *key2);
|
214
|
+
|
215
|
+
HshEntry *h_lookup(HshTable *ht, register const void *key)
|
216
|
+
{
|
217
|
+
register unsigned int hash = ht->hash(key);
|
218
|
+
register unsigned int perturb;
|
219
|
+
register int mask = ht->mask;
|
220
|
+
register HshEntry *he0 = ht->table;
|
221
|
+
register int i = hash & mask;
|
222
|
+
register HshEntry *he = &he0[i];
|
223
|
+
register HshEntry *freeslot;
|
224
|
+
eq_func eq = ht->eq;
|
225
|
+
|
226
|
+
if (he->key == NULL || he->key == key) {
|
227
|
+
he->hash = hash;
|
228
|
+
return he;
|
229
|
+
}
|
230
|
+
if (he->key == dummy_key) {
|
231
|
+
freeslot = he;
|
232
|
+
} else {
|
233
|
+
if ((he->hash == hash) && eq(he->key, key))
|
234
|
+
return he;
|
235
|
+
freeslot = NULL;
|
236
|
+
}
|
237
|
+
|
238
|
+
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
|
239
|
+
i = (i << 2) + i + perturb + 1;
|
240
|
+
he = &he0[i & mask];
|
241
|
+
if (he->key == NULL) {
|
242
|
+
if (freeslot != NULL) he = freeslot;
|
243
|
+
he->hash = hash;
|
244
|
+
return he;
|
245
|
+
}
|
246
|
+
if (he->key == key
|
247
|
+
|| (he->hash == hash
|
248
|
+
&& he->key != dummy_key
|
249
|
+
&& eq(he->key, key)))
|
250
|
+
return he;
|
251
|
+
if (he->key == dummy_key && freeslot == NULL)
|
252
|
+
freeslot = he;
|
253
|
+
}
|
254
|
+
}
|
255
|
+
|
256
|
+
typedef void (*free_func)(void *);
|
257
|
+
void dummy_free(void *p)
|
258
|
+
{
|
259
|
+
//printf("Shouldn't do nuthin");
|
260
|
+
}
|
261
|
+
|
262
|
+
HshTable *h_new_str(void (*free_key)(void *key), void (*free_value)(void *value))
|
263
|
+
{
|
264
|
+
HshTable *ht;
|
265
|
+
if (num_free_hts > 0) {
|
266
|
+
ht = free_hts[--num_free_hts];
|
267
|
+
} else {
|
268
|
+
ht = ALLOC(HshTable);
|
269
|
+
}
|
270
|
+
ht->fill = 0;
|
271
|
+
ht->used = 0;
|
272
|
+
ht->mask = Hsh_MINSIZE - 1;
|
273
|
+
ht->table = ht->smalltable;
|
274
|
+
memset(ht->smalltable, 0, sizeof(ht->smalltable));
|
275
|
+
ht->lookup = &h_lookup_str;
|
276
|
+
ht->free_key = free_key != NULL ? free_key : &dummy_free;
|
277
|
+
ht->free_value = free_value != NULL ? free_value : &dummy_free;
|
278
|
+
return ht;
|
279
|
+
}
|
280
|
+
|
281
|
+
HshTable *h_new(unsigned int (*hash)(const void *key),
|
282
|
+
int (*eq)(const void *key1, const void *key2),
|
283
|
+
void (*free_key)(void *key),
|
284
|
+
void (*free_value)(void *value))
|
285
|
+
{
|
286
|
+
HshTable *ht = h_new_str(free_key, free_value);
|
287
|
+
|
288
|
+
ht->lookup = &h_lookup;
|
289
|
+
ht->eq = eq;
|
290
|
+
ht->hash = hash;
|
291
|
+
return ht;
|
292
|
+
}
|
293
|
+
|
294
|
+
void h_clear(HshTable *ht)
|
295
|
+
{
|
296
|
+
int i;
|
297
|
+
HshEntry *he;
|
298
|
+
free_func free_key = ht->free_key;
|
299
|
+
free_func free_value = ht->free_value;
|
300
|
+
|
301
|
+
// Clear all the hash values and keys as necessary
|
302
|
+
for (i = 0; i <= ht->mask; i++) {
|
303
|
+
he = &ht->table[i];
|
304
|
+
if (he->key != NULL && he->key != dummy_key) {
|
305
|
+
free_value(he->value);
|
306
|
+
free_key(he->key);
|
307
|
+
}
|
308
|
+
he->key = NULL;
|
309
|
+
}
|
310
|
+
ht->used = 0;
|
311
|
+
ht->fill = 0;
|
312
|
+
}
|
313
|
+
|
314
|
+
void h_destroy(HshTable *ht)
|
315
|
+
{
|
316
|
+
h_clear(ht);
|
317
|
+
|
318
|
+
// if a new table was created, be sure to free it
|
319
|
+
if (ht->table != ht->smalltable) free(ht->table);
|
320
|
+
|
321
|
+
if (num_free_hts < MAX_FREE_HASH_TABLES) {
|
322
|
+
free_hts[num_free_hts++] = ht;
|
323
|
+
} else {
|
324
|
+
free(ht);
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
void *h_get(HshTable *ht, const void *key)
|
329
|
+
{
|
330
|
+
return ht->lookup(ht, key)->value;
|
331
|
+
}
|
332
|
+
|
333
|
+
int h_del(HshTable *ht, const void *key)
|
334
|
+
{
|
335
|
+
HshEntry *he = ht->lookup(ht, key);
|
336
|
+
|
337
|
+
if (he->key != NULL && he->key != dummy_key) {
|
338
|
+
ht->free_key(he->key);
|
339
|
+
ht->free_value(he->value);
|
340
|
+
he->key = dummy_key;
|
341
|
+
he->value = NULL;
|
342
|
+
ht->used--;
|
343
|
+
return true;
|
344
|
+
} else {
|
345
|
+
return false;
|
346
|
+
}
|
347
|
+
}
|
348
|
+
|
349
|
+
void *h_rem(HshTable *ht, const void *key, bool destory_key)
|
350
|
+
{
|
351
|
+
void *val;
|
352
|
+
HshEntry *he = ht->lookup(ht, key);
|
353
|
+
|
354
|
+
if (he->key != NULL && he->key != dummy_key) {
|
355
|
+
if (destory_key)
|
356
|
+
ht->free_key(he->key);
|
357
|
+
|
358
|
+
he->key = dummy_key;
|
359
|
+
val = he->value;
|
360
|
+
he->value = NULL;
|
361
|
+
ht->used--;
|
362
|
+
return val;
|
363
|
+
} else {
|
364
|
+
return NULL;
|
365
|
+
}
|
366
|
+
}
|
367
|
+
|
368
|
+
int h_resize(HshTable *ht, int min_newsize) {
|
369
|
+
HshEntry smallcopy[Hsh_MINSIZE];
|
370
|
+
HshEntry *oldtable;
|
371
|
+
HshEntry *he_old, *he_new;
|
372
|
+
int newsize, i;
|
373
|
+
|
374
|
+
for (newsize = Hsh_MINSIZE; newsize < min_newsize; newsize <<= 1)
|
375
|
+
;
|
376
|
+
|
377
|
+
oldtable = ht->table;
|
378
|
+
if (newsize == Hsh_MINSIZE) {
|
379
|
+
if (ht->table == ht->smalltable) {
|
380
|
+
/* need to copy the d*(int *)ata out so we can rebuild the table into
|
381
|
+
* the same space */
|
382
|
+
memcpy(smallcopy, ht->smalltable, sizeof(smallcopy));
|
383
|
+
oldtable = smallcopy;
|
384
|
+
} else {
|
385
|
+
ht->table = ht->smalltable;
|
386
|
+
}
|
387
|
+
} else {
|
388
|
+
ht->table = ALLOC_N(HshEntry, newsize);
|
389
|
+
}
|
390
|
+
memset(ht->table, 0, sizeof(HshEntry) * newsize);
|
391
|
+
i = ht->fill;
|
392
|
+
ht->fill = ht->used;
|
393
|
+
ht->mask = newsize - 1;
|
394
|
+
int j = 0;
|
395
|
+
for (he_old = oldtable; i > 0; he_old++) {
|
396
|
+
j++;
|
397
|
+
if (he_old->value != NULL) {// active entry
|
398
|
+
he_new = ht->lookup(ht, he_old->key);
|
399
|
+
he_new->key = he_old->key;
|
400
|
+
he_new->value = he_old->value;
|
401
|
+
i--;
|
402
|
+
} else if (he_old->key == dummy_key) { //dummy entry
|
403
|
+
i--;
|
404
|
+
} // else empty entry so nothing to do
|
405
|
+
}
|
406
|
+
if (oldtable != smallcopy && oldtable != ht->smalltable)
|
407
|
+
free(oldtable);
|
408
|
+
return 0;
|
409
|
+
}
|
410
|
+
|
411
|
+
int h_set(HshTable *ht, const void *key, void *value)
|
412
|
+
{
|
413
|
+
int ret_val = HASH_KEY_DOES_NOT_EXIST;
|
414
|
+
HshEntry *he = ht->lookup(ht, key);
|
415
|
+
int fill = ht->fill;
|
416
|
+
if (he->key == NULL) {
|
417
|
+
ht->fill++;
|
418
|
+
ht->used++;
|
419
|
+
} else if (he->key == dummy_key) {
|
420
|
+
ht->used++;
|
421
|
+
} else if (he->key != key) {
|
422
|
+
ht->free_key(he->key);
|
423
|
+
if (he->value != value) ht->free_value(he->value);
|
424
|
+
ret_val = HASH_KEY_EQUAL;
|
425
|
+
} else {
|
426
|
+
if (he->value != value) ht->free_value(he->value);
|
427
|
+
ret_val = HASH_KEY_SAME;
|
428
|
+
}
|
429
|
+
he->key = (void *)key;
|
430
|
+
he->value = value;
|
431
|
+
if ((ht->fill > fill) && (ht->fill * 3 > ht->mask * 2))
|
432
|
+
h_resize(ht, ht->used * ((ht->used > SLOW_DOWN) ? 4 : 2));
|
433
|
+
return ret_val;
|
434
|
+
}
|
435
|
+
|
436
|
+
int h_has_key(HshTable *ht, const void *key)
|
437
|
+
{
|
438
|
+
HshEntry *he = ht->lookup(ht, key);
|
439
|
+
if (he->key == NULL || he->key == dummy_key) {
|
440
|
+
return HASH_KEY_DOES_NOT_EXIST;
|
441
|
+
} else if (he->key == key) {
|
442
|
+
return HASH_KEY_SAME;
|
443
|
+
} else {
|
444
|
+
return HASH_KEY_EQUAL;
|
445
|
+
}
|
446
|
+
}
|