whistlepig 0.9.1 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ #ifndef wp_lock_h_
2
+ #define wp_lock_h_
3
+
4
+ // whistlepig locks
5
+ // (c) 2011 william morgan. see copying for license terms.
6
+
7
+ #include <pthread.h>
8
+
9
+ #include "error.h"
10
+
11
+ #define WP_LOCK_READLOCK 0
12
+ #define WP_LOCK_WRITELOCK 1
13
+
14
+ wp_error* wp_lock_setup(pthread_rwlock_t* lock) RAISES_ERROR;
15
+ wp_error* wp_lock_grab(pthread_rwlock_t* lock, int lock_type) RAISES_ERROR;
16
+ wp_error* wp_lock_release(pthread_rwlock_t* lock) RAISES_ERROR;
17
+
18
+ #endif
@@ -18,10 +18,10 @@ wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname,
18
18
  lseek(o->fd, size - 1, SEEK_SET);
19
19
  ssize_t num_bytes = write(o->fd, "", 1);
20
20
  if(num_bytes == -1) RAISE_SYSERROR("write");
21
- o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
22
- if(o->header == MAP_FAILED) RAISE_SYSERROR("mmap");
23
- strncpy(o->header->magic, magic, MMAP_OBJ_MAGIC_SIZE);
24
- o->header->size = initial_size;
21
+ o->content = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
22
+ if(o->content == MAP_FAILED) RAISE_SYSERROR("mmap");
23
+ strncpy(o->content->magic, magic, MMAP_OBJ_MAGIC_SIZE);
24
+ o->content->size = o->loaded_size = initial_size;
25
25
  DEBUG("created new %s object with %u bytes", magic, size);
26
26
 
27
27
  return NO_ERROR;
@@ -33,44 +33,60 @@ wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
33
33
  if(o->fd == -1) RAISE_SYSERROR("cannot open %s", pathname);
34
34
 
35
35
  // load header
36
- o->header = mmap(NULL, sizeof(mmap_obj_header), PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
37
- if(o->header == MAP_FAILED) RAISE_SYSERROR("header mmap");
36
+ o->content = mmap(NULL, sizeof(mmap_obj_header), PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
37
+ if(o->content == MAP_FAILED) RAISE_SYSERROR("header mmap");
38
38
  DEBUG("loaded header of %u bytes for %s object", sizeof(mmap_obj_header), magic);
39
39
 
40
- RELAY_ERROR(validate(o->header, magic));
40
+ RELAY_ERROR(validate(o->content, magic));
41
41
 
42
- uint32_t size = o->header->size + (uint32_t)sizeof(mmap_obj_header);
42
+ o->loaded_size = o->content->size;
43
+
44
+ uint32_t size = o->content->size + (uint32_t)sizeof(mmap_obj_header);
43
45
  DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
44
- if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
46
+ if(munmap(o->content, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
45
47
 
46
- o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
47
- if(o->header == MAP_FAILED) RAISE_SYSERROR("full mmap");
48
+ o->content = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
49
+ if(o->content == MAP_FAILED) RAISE_SYSERROR("full mmap");
48
50
  DEBUG("loaded full %s object of %u bytes", magic, size);
49
51
 
50
52
  return NO_ERROR;
51
53
  }
52
54
 
55
+ wp_error* mmap_obj_reload(mmap_obj* o) {
56
+ if(o->loaded_size != o->content->size) {
57
+ DEBUG("need to reload %s because size of %u is now %u", o->content->magic, o->loaded_size, o->content->size);
58
+ uint32_t new_size = o->content->size + (uint32_t)sizeof(mmap_obj_header);
59
+ if(munmap(o->content, sizeof(mmap_obj_header) + o->loaded_size) == -1) RAISE_SYSERROR("munmap");
60
+ o->content = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
61
+ if(o->content == MAP_FAILED) RAISE_SYSERROR("mmap of %uk", new_size / 1024);
62
+ o->loaded_size = o->content->size;
63
+ DEBUG("loaded %u bytes for %s. header is at %p", o->content->size, o->content->magic, o->content);
64
+ }
65
+
66
+ return NO_ERROR;
67
+ }
68
+
53
69
  wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
54
- DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
70
+ DEBUG("going to expand from %u to %u bytes. current header is at %p", o->content->size, data_size, o->content);
55
71
 
56
- if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
72
+ if(munmap(o->content, sizeof(mmap_obj_header) + o->content->size) == -1) RAISE_SYSERROR("munmap");
57
73
  uint32_t size = data_size + (uint32_t)sizeof(mmap_obj_header);
58
74
 
59
75
  lseek(o->fd, size - 1, SEEK_SET);
60
76
  ssize_t num_bytes = write(o->fd, "", 1);
61
77
  if(num_bytes == -1) RAISE_SYSERROR("write");
62
78
  //lseek(fd, 0, SEEK_SET); // not necessary!
63
- o->header = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
64
- if(o->header == MAP_FAILED) RAISE_SYSERROR("mmap");
65
- o->header->size = data_size;
66
- DEBUG("loaded %u bytes after resize. header is at %p", o->header->size, o->header);
79
+ o->content = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, o->fd, 0);
80
+ if(o->content == MAP_FAILED) RAISE_SYSERROR("mmap");
81
+ o->content->size = o->loaded_size = data_size;
82
+ DEBUG("loaded %u bytes after resize. header is at %p", o->content->size, o->content);
67
83
 
68
84
  return NO_ERROR;
69
85
  }
70
86
 
71
87
  wp_error* mmap_obj_unload(mmap_obj* o) {
72
- DEBUG("unloading %u bytes", sizeof(mmap_obj_header) + o->header->size);
73
- if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
74
- o->header = NULL;
88
+ DEBUG("unloading %u bytes", sizeof(mmap_obj_header) + o->content->size);
89
+ if(munmap(o->content, sizeof(mmap_obj_header) + o->content->size) == -1) RAISE_SYSERROR("munmap");
90
+ o->content = NULL;
75
91
  return NO_ERROR;
76
92
  }
@@ -7,7 +7,7 @@
7
7
  // wrappers around the logic of loading, unloading, and resizing
8
8
  // arbitrary-sized objects using mmap.
9
9
  //
10
- // note that aany of the mmap_obj_* functions may change the object pointer, so
10
+ // note that any of the mmap_obj_* functions may change the object pointer, so
11
11
  // use MMAP_OBJ or MAP_OBJ_PTR to dereference (again) after calling them.
12
12
 
13
13
  #define MMAP_OBJ_MAGIC_SIZE 15
@@ -15,26 +15,27 @@
15
15
  #include <stdint.h>
16
16
  #include "error.h"
17
17
 
18
- // the header, with a magic string
18
+ // what's actually mmap'd
19
19
  typedef struct mmap_obj_header {
20
20
  char magic[MMAP_OBJ_MAGIC_SIZE];
21
- uint32_t size;
22
- char obj[];
21
+ uint32_t size; // size of payload, not including this header
22
+ char obj[]; // the payload itself
23
23
  } mmap_obj_header;
24
24
 
25
25
  // what we pass around at runtime
26
26
  typedef struct mmap_obj {
27
27
  int fd;
28
- mmap_obj_header* header;
28
+ uint32_t loaded_size; // compare against header->sizer
29
+ mmap_obj_header* content;
29
30
  } mmap_obj;
30
31
 
31
32
  // public API
32
33
 
33
34
  // public: get the actual object from an mmap_obj
34
- #define MMAP_OBJ(v, type) ((type*)&v.header->obj)
35
+ #define MMAP_OBJ(v, type) ((type*)&v.content->obj)
35
36
 
36
37
  // public: get the object from an mmap_obj*
37
- #define MMAP_OBJ_PTR(v, type) (type*)v->header->obj
38
+ #define MMAP_OBJ_PTR(v, type) (type*)v->content->obj
38
39
 
39
40
  // public: create an object with an initial size
40
41
  wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname, uint32_t initial_size) RAISES_ERROR;
@@ -43,6 +44,10 @@ wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname,
43
44
  // magic doesn't match)
44
45
  wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) RAISES_ERROR;
45
46
 
47
+ // public: load an object, but only if the size has changed since the
48
+ // first load.
49
+ wp_error* mmap_obj_reload(mmap_obj* o) RAISES_ERROR;
50
+
46
51
  // public: resize an object. note that the obj pointer might change after this call.
47
52
  wp_error* mmap_obj_resize(mmap_obj* o, uint32_t new_size) RAISES_ERROR;
48
53
 
@@ -184,15 +184,16 @@ static wp_error* term_init_search_state(wp_query* q, wp_segment* seg) {
184
184
  term t;
185
185
  stringmap* sh = MMAP_OBJ(seg->stringmap, stringmap);
186
186
  termhash* th = MMAP_OBJ(seg->termhash, termhash);
187
+ stringpool* sp = MMAP_OBJ(seg->stringpool, stringpool);
187
188
 
188
189
  term_search_state* state = q->search_data = malloc(sizeof(term_search_state));
189
190
  state->started = 0;
190
191
 
191
192
  state->label = q->type == WP_QUERY_LABEL ? 1 : 0;
192
193
  if(state->label) t.field_s = 0;
193
- else t.field_s = stringmap_string_to_int(sh, q->field); // will be -1 if not found
194
+ else t.field_s = stringmap_string_to_int(sh, sp, q->field); // will be -1 if not found
194
195
 
195
- t.word_s = stringmap_string_to_int(sh, q->word);
196
+ t.word_s = stringmap_string_to_int(sh, sp, q->word);
196
197
 
197
198
  uint32_t offset = termhash_get_val(th, t);
198
199
  if(offset == (uint32_t)-1) offset = OFFSET_NONE;
@@ -268,10 +269,10 @@ static wp_error* neg_init_search_state(wp_query* q, wp_segment* seg) {
268
269
 
269
270
  RELAY_ERROR(wp_search_init_search_state(q->children, seg));
270
271
 
271
- postings_region* pr = MMAP_OBJ(seg->postings, postings_region);
272
+ segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
272
273
  neg_search_state* state = q->search_data = malloc(sizeof(neg_search_state));
273
274
 
274
- state->cur = pr->num_docs + 1;
275
+ state->cur = si->num_docs + 1;
275
276
  search_result result;
276
277
  int done;
277
278
  RELAY_ERROR(query_next_doc(q->children, seg, &result, &done));
@@ -294,8 +295,8 @@ static wp_error* neg_release_search_state(wp_query* q) {
294
295
  static wp_error* every_init_search_state(wp_query* q, wp_segment* seg) {
295
296
  q->search_data = malloc(sizeof(docid_t));
296
297
 
297
- postings_region* pr = MMAP_OBJ(seg->postings, postings_region);
298
- *(docid_t*)q->search_data = pr->num_docs;
298
+ segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
299
+ *(docid_t*)q->search_data = si->num_docs;
299
300
 
300
301
  return NO_ERROR;
301
302
  }
@@ -6,18 +6,50 @@
6
6
  #define POSTINGS_REGION_TYPE_IMMUTABLE_VBE 1
7
7
  #define POSTINGS_REGION_TYPE_MUTABLE_NO_POSITIONS 2 // bigger, mutable
8
8
 
9
+ #define SEGMENT_VERSION 3
10
+
9
11
  #define wp_segment_label_posting_at(posting_region, offset) ((label_posting*)(posting_region->postings + offset))
10
12
 
11
- static void postings_region_init(postings_region* pr, uint32_t initial_size, uint32_t index_type_and_flags) {
12
- pr->index_type_and_flags = index_type_and_flags;
13
- pr->num_docs = 0;
13
+ wp_error* wp_segment_grab_readlock(wp_segment* seg) {
14
+ segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
15
+ RELAY_ERROR(wp_lock_grab(&si->lock, WP_LOCK_READLOCK));
16
+ return NO_ERROR;
17
+ }
18
+
19
+ wp_error* wp_segment_grab_writelock(wp_segment* seg) {
20
+ segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
21
+ RELAY_ERROR(wp_lock_grab(&si->lock, WP_LOCK_WRITELOCK));
22
+ return NO_ERROR;
23
+ }
24
+
25
+ wp_error* wp_segment_release_lock(wp_segment* seg) {
26
+ segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
27
+ RELAY_ERROR(wp_lock_release(&si->lock));
28
+ return NO_ERROR;
29
+ }
30
+
31
+ static void postings_region_init(postings_region* pr, uint32_t initial_size, uint32_t postings_type_and_flags) {
32
+ pr->postings_type_and_flags = postings_type_and_flags;
14
33
  pr->num_postings = 0;
15
34
  pr->postings_head = 1; // skip one byte, which is reserved as OFFSET_NONE
16
35
  pr->postings_tail = initial_size;
17
36
  }
18
37
 
19
- RAISING_STATIC(postings_region_validate(postings_region* pr, uint32_t index_type_and_flags)) {
20
- if(pr->index_type_and_flags != index_type_and_flags) RAISE_ERROR("segment has index type %u; expecting type %u", pr->index_type_and_flags, index_type_and_flags);
38
+ RAISING_STATIC(segment_info_init(segment_info* si, uint32_t segment_version)) {
39
+ si->segment_version = segment_version;
40
+ si->num_docs = 0;
41
+
42
+ RELAY_ERROR(wp_lock_setup(&si->lock));
43
+ return NO_ERROR;
44
+ }
45
+
46
+ RAISING_STATIC(segment_info_validate(segment_info* si, uint32_t segment_version)) {
47
+ if(si->segment_version != segment_version) RAISE_ERROR("segment has type %u; expecting type %u", si->segment_version, segment_version);
48
+ return NO_ERROR;
49
+ }
50
+
51
+ RAISING_STATIC(postings_region_validate(postings_region* pr, uint32_t postings_type_and_flags)) {
52
+ if(pr->postings_type_and_flags != postings_type_and_flags) RAISE_ERROR("postings region has type %u; expecting type %u", pr->postings_type_and_flags, postings_type_and_flags);
21
53
  return NO_ERROR;
22
54
  }
23
55
 
@@ -27,59 +59,78 @@ RAISING_STATIC(postings_region_validate(postings_region* pr, uint32_t index_type
27
59
  wp_error* wp_segment_load(wp_segment* segment, const char* pathname_base) {
28
60
  char fn[FN_SIZE];
29
61
 
62
+ // open the segment info
63
+ snprintf(fn, 128, "%s.si", pathname_base);
64
+ RELAY_ERROR(mmap_obj_load(&segment->seginfo, "wp/seginfo", fn));
65
+ RELAY_ERROR(segment_info_validate(MMAP_OBJ(segment->seginfo, segment_info), SEGMENT_VERSION));
66
+
30
67
  // open the string pool
31
68
  snprintf(fn, 128, "%s.sp", pathname_base);
32
- RELAY_ERROR(mmap_obj_load(&segment->stringpool, "ti/stringpool", fn));
69
+ RELAY_ERROR(mmap_obj_load(&segment->stringpool, "wp/stringpool", fn));
33
70
 
34
71
  // open the string hash
35
- snprintf(fn, 128, "%s.sh_", pathname_base);
36
- RELAY_ERROR(mmap_obj_load(&segment->stringmap, "ti/stringmap", fn));
37
- stringmap_setup(MMAP_OBJ(segment->stringmap, stringmap), MMAP_OBJ(segment->stringpool, stringpool));
72
+ snprintf(fn, 128, "%s.sh", pathname_base);
73
+ RELAY_ERROR(mmap_obj_load(&segment->stringmap, "wp/stringmap", fn));
38
74
 
39
75
  // open the term hash
40
76
  snprintf(fn, 128, "%s.th", pathname_base);
41
- RELAY_ERROR(mmap_obj_load(&segment->termhash, "ti/termhash", fn));
42
- termhash_setup(MMAP_OBJ(segment->termhash, termhash));
77
+ RELAY_ERROR(mmap_obj_load(&segment->termhash, "wp/termhash", fn));
43
78
 
44
79
  // open the postings region
45
80
  snprintf(fn, 128, "%s." WP_SEGMENT_POSTING_REGION_PATH_SUFFIX, pathname_base);
46
- RELAY_ERROR(mmap_obj_load(&segment->postings, "ti/postings", fn));
81
+ RELAY_ERROR(mmap_obj_load(&segment->postings, "wp/postings", fn));
47
82
  RELAY_ERROR(postings_region_validate(MMAP_OBJ(segment->postings, postings_region), POSTINGS_REGION_TYPE_IMMUTABLE_VBE));
48
83
 
49
84
  // open the labels postings region
50
85
  snprintf(fn, 128, "%s.lb", pathname_base);
51
- RELAY_ERROR(mmap_obj_load(&segment->labels, "ti/labels", fn));
86
+ RELAY_ERROR(mmap_obj_load(&segment->labels, "wp/labels", fn));
52
87
  RELAY_ERROR(postings_region_validate(MMAP_OBJ(segment->labels, postings_region), POSTINGS_REGION_TYPE_MUTABLE_NO_POSITIONS));
53
88
 
54
89
  return NO_ERROR;
55
90
  }
56
91
 
92
+ wp_error* wp_segment_reload(wp_segment* segment) {
93
+ RELAY_ERROR(mmap_obj_reload(&segment->seginfo));
94
+ RELAY_ERROR(mmap_obj_reload(&segment->stringpool));
95
+ RELAY_ERROR(mmap_obj_reload(&segment->stringmap));
96
+ RELAY_ERROR(mmap_obj_reload(&segment->termhash));
97
+ RELAY_ERROR(mmap_obj_reload(&segment->postings));
98
+ RELAY_ERROR(mmap_obj_reload(&segment->labels));
99
+
100
+ return NO_ERROR;
101
+ }
102
+
57
103
  wp_error* wp_segment_create(wp_segment* segment, const char* pathname_base) {
58
104
  char fn[FN_SIZE];
59
105
 
106
+ // create the segment info
107
+ snprintf(fn, 128, "%s.si", pathname_base);
108
+ RELAY_ERROR(mmap_obj_create(&segment->seginfo, "wp/seginfo", fn, sizeof(segment_info)));
109
+ RELAY_ERROR(segment_info_init(MMAP_OBJ(segment->seginfo, segment_info), SEGMENT_VERSION));
110
+
60
111
  // create the string pool
61
112
  snprintf(fn, 128, "%s.sp", pathname_base);
62
- RELAY_ERROR(mmap_obj_create(&segment->stringpool, "ti/stringpool", fn, stringpool_initial_size()));
113
+ RELAY_ERROR(mmap_obj_create(&segment->stringpool, "wp/stringpool", fn, stringpool_initial_size()));
63
114
  stringpool_init(MMAP_OBJ(segment->stringpool, stringpool));
64
115
 
65
116
  // create the string hash
66
- snprintf(fn, 128, "%s.sh_", pathname_base);
67
- RELAY_ERROR(mmap_obj_create(&segment->stringmap, "ti/stringmap", fn, stringmap_initial_size()));
68
- stringmap_init(MMAP_OBJ(segment->stringmap, stringmap), MMAP_OBJ(segment->stringpool, stringpool));
117
+ snprintf(fn, 128, "%s.sh", pathname_base);
118
+ RELAY_ERROR(mmap_obj_create(&segment->stringmap, "wp/stringmap", fn, stringmap_initial_size()));
119
+ stringmap_init(MMAP_OBJ(segment->stringmap, stringmap));
69
120
 
70
121
  // create the term hash
71
122
  snprintf(fn, 128, "%s.th", pathname_base);
72
- RELAY_ERROR(mmap_obj_create(&segment->termhash, "ti/termhash", fn, termhash_initial_size()));
123
+ RELAY_ERROR(mmap_obj_create(&segment->termhash, "wp/termhash", fn, termhash_initial_size()));
73
124
  termhash_init(MMAP_OBJ(segment->termhash, termhash));
74
125
 
75
126
  // create the postings region
76
127
  snprintf(fn, 128, "%s." WP_SEGMENT_POSTING_REGION_PATH_SUFFIX, pathname_base);
77
- RELAY_ERROR(mmap_obj_create(&segment->postings, "ti/postings", fn, sizeof(postings_region) + INITIAL_POSTINGS_SIZE));
128
+ RELAY_ERROR(mmap_obj_create(&segment->postings, "wp/postings", fn, sizeof(postings_region) + INITIAL_POSTINGS_SIZE));
78
129
  postings_region_init(MMAP_OBJ(segment->postings, postings_region), INITIAL_POSTINGS_SIZE, POSTINGS_REGION_TYPE_IMMUTABLE_VBE);
79
130
 
80
131
  // create the labels postings region
81
132
  snprintf(fn, 128, "%s.lb", pathname_base);
82
- RELAY_ERROR(mmap_obj_create(&segment->labels, "ti/labels", fn, sizeof(postings_region) + INITIAL_POSTINGS_SIZE));
133
+ RELAY_ERROR(mmap_obj_create(&segment->labels, "wp/labels", fn, sizeof(postings_region) + INITIAL_POSTINGS_SIZE));
83
134
  postings_region_init(MMAP_OBJ(segment->labels, postings_region), INITIAL_POSTINGS_SIZE, POSTINGS_REGION_TYPE_MUTABLE_NO_POSITIONS);
84
135
 
85
136
  return NO_ERROR;
@@ -96,11 +147,13 @@ int wp_segment_exists(const char* pathname_base) {
96
147
  wp_error* wp_segment_delete(const char* pathname_base) {
97
148
  char fn[FN_SIZE];
98
149
 
150
+ snprintf(fn, 128, "%s.si", pathname_base);
151
+ unlink(fn);
99
152
  snprintf(fn, 128, "%s." WP_SEGMENT_POSTING_REGION_PATH_SUFFIX, pathname_base);
100
153
  unlink(fn);
101
154
  snprintf(fn, 128, "%s.sp", pathname_base);
102
155
  unlink(fn);
103
- snprintf(fn, 128, "%s.sh_", pathname_base);
156
+ snprintf(fn, 128, "%s.sh", pathname_base);
104
157
  unlink(fn);
105
158
  snprintf(fn, 128, "%s.th", pathname_base);
106
159
  unlink(fn);
@@ -132,9 +185,7 @@ RAISING_STATIC(bump_stringmap(wp_segment* s, int* success)) {
132
185
  }
133
186
  else {
134
187
  RELAY_ERROR(mmap_obj_resize(&s->stringmap, next_size));
135
- sh = MMAP_OBJ(s->stringmap, stringmap); // this could have changed!
136
- stringmap_setup(sh, MMAP_OBJ(s->stringpool, stringpool));
137
- RELAY_ERROR(stringmap_bump_size(sh));
188
+ RELAY_ERROR(stringmap_bump_size(MMAP_OBJ(s->stringmap, stringmap), MMAP_OBJ(s->stringpool, stringpool)));
138
189
  }
139
190
  }
140
191
 
@@ -154,10 +205,7 @@ RAISING_STATIC(bump_stringpool(wp_segment* s, int* success)) {
154
205
  }
155
206
  else {
156
207
  RELAY_ERROR(mmap_obj_resize(&s->stringpool, next_size));
157
- sp = MMAP_OBJ(s->stringpool, stringpool); // may have changed!
158
- stringmap* sh = MMAP_OBJ(s->stringmap, stringmap);
159
- sh->pool = sp; // need to update it here too
160
- stringpool_bump_size(sp);
208
+ stringpool_bump_size(MMAP_OBJ(s->stringpool, stringpool));
161
209
  }
162
210
  }
163
211
 
@@ -177,9 +225,7 @@ RAISING_STATIC(bump_termhash(wp_segment* s, int* success)) {
177
225
  }
178
226
  else {
179
227
  RELAY_ERROR(mmap_obj_resize(&s->termhash, next_size));
180
- th = MMAP_OBJ(s->termhash, termhash); // could have changed!
181
- termhash_setup(th);
182
- RELAY_ERROR(termhash_bump_size(th));
228
+ RELAY_ERROR(termhash_bump_size(MMAP_OBJ(s->termhash, termhash)));
183
229
  *success = 1;
184
230
  }
185
231
  }
@@ -196,7 +242,7 @@ RAISING_STATIC(postings_region_ensure_fit(mmap_obj* mmopr, uint32_t postings_byt
196
242
  uint32_t new_tail = pr->postings_tail;
197
243
  while(new_tail <= new_head) new_tail = new_tail * 2;
198
244
 
199
- if(new_tail > MAX_POSTINGS_REGION_SIZE) new_tail = MAX_POSTINGS_REGION_SIZE;
245
+ if(new_tail > MAX_POSTINGS_REGION_SIZE - sizeof(mmap_obj_header)) new_tail = MAX_POSTINGS_REGION_SIZE - sizeof(mmap_obj_header);
200
246
  DEBUG("new tail will be %u, current is %u, max is %u", new_tail, pr->postings_tail, MAX_POSTINGS_REGION_SIZE);
201
247
 
202
248
  if(new_tail <= new_head) { // can't increase enough
@@ -362,7 +408,7 @@ wp_error* wp_segment_read_posting(wp_segment* s, uint32_t offset, posting* po, i
362
408
 
363
409
  RELAY_ERROR(read_multibyte(&pr->postings[offset], &po->next_offset, &size));
364
410
  //DEBUG("read next_offset %u -> %u (%u bytes)", po->next_offset, orig_offset - po->next_offset, size);
365
- if((po->next_offset == 0) || (po->next_offset > orig_offset)) RAISE_ERROR("read invalid next_offset %u (must be > 0 and < %u", po->next_offset, orig_offset);
411
+ if((po->next_offset == 0) || (po->next_offset > orig_offset)) RAISE_ERROR("read invalid next_offset %u (must be > 0 and < %u)", po->next_offset, orig_offset);
366
412
  po->next_offset = orig_offset - po->next_offset;
367
413
  offset += size;
368
414
 
@@ -408,11 +454,12 @@ wp_error* wp_segment_add_posting(wp_segment* s, const char* field, const char* w
408
454
  postings_region* pr = MMAP_OBJ(s->postings, postings_region);
409
455
  stringmap* sh = MMAP_OBJ(s->stringmap, stringmap);
410
456
  termhash* th = MMAP_OBJ(s->termhash, termhash);
457
+ stringpool* sp = MMAP_OBJ(s->stringpool, stringpool);
411
458
 
412
459
  // construct the term object
413
460
  term t;
414
- RELAY_ERROR(stringmap_add(sh, field, &t.field_s));
415
- RELAY_ERROR(stringmap_add(sh, word, &t.word_s));
461
+ RELAY_ERROR(stringmap_add(sh, sp, field, &t.field_s));
462
+ RELAY_ERROR(stringmap_add(sh, sp, word, &t.word_s));
416
463
 
417
464
  // find the offset of the next posting
418
465
  posting po;
@@ -480,12 +527,13 @@ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id)
480
527
  postings_region* pr = MMAP_OBJ(s->labels, postings_region);
481
528
  stringmap* sh = MMAP_OBJ(s->stringmap, stringmap);
482
529
  termhash* th = MMAP_OBJ(s->termhash, termhash);
530
+ stringpool* sp = MMAP_OBJ(s->stringpool, stringpool);
483
531
 
484
532
  // construct the term object. term objects for labels have the special
485
533
  // sentinel field value 0
486
534
  term t;
487
535
  t.field_s = 0; // label sentinel value
488
- RELAY_ERROR(stringmap_add(sh, label, &t.word_s)); // get word key
536
+ RELAY_ERROR(stringmap_add(sh, sp, label, &t.word_s)); // get word key
489
537
 
490
538
  // find the previous and next label postings, between which we'll insert this
491
539
  // posting
@@ -558,12 +606,13 @@ wp_error* wp_segment_remove_label(wp_segment* s, const char* label, docid_t doc_
558
606
  postings_region* pr = MMAP_OBJ(s->labels, postings_region);
559
607
  stringmap* sh = MMAP_OBJ(s->stringmap, stringmap);
560
608
  termhash* th = MMAP_OBJ(s->termhash, termhash);
609
+ stringpool* sp = MMAP_OBJ(s->stringpool, stringpool);
561
610
 
562
611
  // construct the term object. term objects for labels have the special
563
612
  // sentinel field value 0
564
613
  term t;
565
614
  t.field_s = 0; // label sentinel value
566
- t.word_s = stringmap_string_to_int(sh, label); // will be -1 if not there
615
+ t.word_s = stringmap_string_to_int(sh, sp, label); // will be -1 if not there
567
616
 
568
617
  // find the posting and the previous posting in the list, if any
569
618
  uint32_t prev_offset = OFFSET_NONE;
@@ -613,12 +662,13 @@ wp_error* wp_segment_remove_label(wp_segment* s, const char* label, docid_t doc_
613
662
  }
614
663
 
615
664
  wp_error* wp_segment_grab_docid(wp_segment* segment, docid_t* doc_id) {
616
- postings_region* pr = MMAP_OBJ(segment->postings, postings_region);
617
- *doc_id = ++pr->num_docs;
665
+ segment_info* si = MMAP_OBJ(segment->seginfo, segment_info);
666
+ *doc_id = ++si->num_docs;
618
667
  return NO_ERROR;
619
668
  }
620
669
 
621
670
  wp_error* wp_segment_dumpinfo(wp_segment* segment, FILE* stream) {
671
+ segment_info* si = MMAP_OBJ(segment->seginfo, segment_info);
622
672
  postings_region* pr = MMAP_OBJ(segment->postings, postings_region);
623
673
  stringmap* sh = MMAP_OBJ(segment->stringmap, stringmap);
624
674
  stringpool* sp = MMAP_OBJ(segment->stringpool, stringpool);
@@ -626,17 +676,17 @@ wp_error* wp_segment_dumpinfo(wp_segment* segment, FILE* stream) {
626
676
 
627
677
  #define p(a, b) 100.0 * (float)a / (float)b
628
678
 
629
- fprintf(stream, "segment has type %u\n", pr->index_type_and_flags);
630
- fprintf(stream, "segment has %u docs and %u postings\n", pr->num_docs, pr->num_postings);
631
- fprintf(stream, "postings region is %6ukb at %3.1f%% saturation\n", segment->postings.header->size / 1024, p(pr->postings_head, pr->postings_tail));
632
- fprintf(stream, " string hash is %6ukb at %3.1f%% saturation\n", segment->stringmap.header->size / 1024, p(sh->n_occupied, sh->n_buckets));
633
- fprintf(stream, " stringpool is %6ukb at %3.1f%% saturation\n", segment->stringpool.header->size / 1024, p(sp->next, sp->size));
634
- fprintf(stream, " term hash has %6ukb at %3.1f%% saturation\n", segment->termhash.header->size / 1024, p(th->n_occupied, th->n_buckets));
679
+ fprintf(stream, "segment has type %u\n", pr->postings_type_and_flags);
680
+ fprintf(stream, "segment has %u docs and %u postings\n", si->num_docs, pr->num_postings);
681
+ fprintf(stream, "postings region is %6ukb at %3.1f%% saturation\n", segment->postings.content->size / 1024, p(pr->postings_head, pr->postings_tail));
682
+ fprintf(stream, " string hash is %6ukb at %3.1f%% saturation\n", segment->stringmap.content->size / 1024, p(sh->n_occupied, sh->n_buckets));
683
+ fprintf(stream, " stringpool is %6ukb at %3.1f%% saturation\n", segment->stringpool.content->size / 1024, p(sp->next, sp->size));
684
+ fprintf(stream, " term hash has %6ukb at %3.1f%% saturation\n", segment->termhash.content->size / 1024, p(th->n_occupied, th->n_buckets));
635
685
 
636
686
  return NO_ERROR;
637
687
  }
638
688
 
639
689
  uint64_t wp_segment_num_docs(wp_segment* seg) {
640
- postings_region* pr = MMAP_OBJ(seg->postings, postings_region);
641
- return pr->num_docs;
690
+ segment_info* si = MMAP_OBJ(seg->seginfo, segment_info);
691
+ return si->num_docs;
642
692
  }
@@ -15,6 +15,8 @@
15
15
  // different, mutable format. regular text is stored in a compressed format
16
16
  // that is not amenable to later changes.
17
17
 
18
+ #include <pthread.h>
19
+
18
20
  #include "defaults.h"
19
21
  #include "stringmap.h"
20
22
  #include "termhash.h"
@@ -60,21 +62,27 @@ typedef struct label_posting {
60
62
  // terms also; see termhash.h.)
61
63
 
62
64
  #define MAX_LOGICAL_DOCID 2147483646 // don't tweak me
63
- #define MAX_POSTINGS_REGION_SIZE (512*1024*1024) // tweak me
65
+ #define MAX_POSTINGS_REGION_SIZE (256*1024*1024) // tweak me
64
66
 
65
67
  #define WP_SEGMENT_POSTING_REGION_PATH_SUFFIX "pr"
66
68
 
67
69
  // the header for the postings region
68
70
  typedef struct postings_region {
69
- uint32_t index_type_and_flags;
70
- uint32_t num_docs;
71
+ uint32_t postings_type_and_flags;
71
72
  uint32_t num_postings;
72
73
  uint32_t postings_head, postings_tail;
73
74
  uint8_t postings[]; // where the postings go yo
74
75
  } postings_region;
75
76
 
77
+ typedef struct segment_info {
78
+ uint32_t segment_version;
79
+ uint32_t num_docs;
80
+ pthread_rwlock_t lock;
81
+ } segment_info;
82
+
76
83
  // a segment is a bunch of all these things
77
84
  typedef struct wp_segment {
85
+ mmap_obj seginfo;
78
86
  mmap_obj stringmap;
79
87
  mmap_obj stringpool;
80
88
  mmap_obj termhash;
@@ -93,6 +101,9 @@ wp_error* wp_segment_create(wp_segment* segment, const char* pathname_base) RAIS
93
101
  // public: load a segment, raising an error unless it already exists
94
102
  wp_error* wp_segment_load(wp_segment* segment, const char* pathname_base) RAISES_ERROR;
95
103
 
104
+ // public: reload a segment as necessary, in case an external writer has changed the mmap object sizes
105
+ wp_error* wp_segment_reload(wp_segment* segment) RAISES_ERROR;
106
+
96
107
  // public: unload a segment
97
108
  wp_error* wp_segment_unload(wp_segment* s) RAISES_ERROR;
98
109
 
@@ -102,6 +113,11 @@ uint64_t wp_segment_num_docs(wp_segment* s);
102
113
  // public: delete a segment from disk
103
114
  wp_error* wp_segment_delete(const char* pathname_base) RAISES_ERROR;
104
115
 
116
+ // public: lock grabbing and releasing
117
+ wp_error* wp_segment_grab_readlock(wp_segment* seg) RAISES_ERROR;
118
+ wp_error* wp_segment_grab_writelock(wp_segment* seg) RAISES_ERROR;
119
+ wp_error* wp_segment_release_lock(wp_segment* seg) RAISES_ERROR;
120
+
105
121
  // private: read a posting from the postings region at a given offset
106
122
  wp_error* wp_segment_read_posting(wp_segment* s, uint32_t offset, posting* po, int include_positions) RAISES_ERROR;
107
123