sophia-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +22 -0
  8. data/ext/extconf.rb +13 -0
  9. data/ext/sophia.c +220 -0
  10. data/lib/sophia-ruby.rb +1 -0
  11. data/lib/sophia/version.rb +3 -0
  12. data/sophia-ruby.gemspec +47 -0
  13. data/test/test_sophia.rb +33 -0
  14. data/vendor/sophia/.gitignore +18 -0
  15. data/vendor/sophia/COPYRIGHT +29 -0
  16. data/vendor/sophia/README +5 -0
  17. data/vendor/sophia/db/a.h +58 -0
  18. data/vendor/sophia/db/cat.c +195 -0
  19. data/vendor/sophia/db/cat.h +32 -0
  20. data/vendor/sophia/db/core.h +129 -0
  21. data/vendor/sophia/db/crc.c +343 -0
  22. data/vendor/sophia/db/crc.h +14 -0
  23. data/vendor/sophia/db/cursor.c +551 -0
  24. data/vendor/sophia/db/cursor.h +47 -0
  25. data/vendor/sophia/db/e.c +49 -0
  26. data/vendor/sophia/db/e.h +49 -0
  27. data/vendor/sophia/db/file.c +355 -0
  28. data/vendor/sophia/db/file.h +106 -0
  29. data/vendor/sophia/db/gc.c +71 -0
  30. data/vendor/sophia/db/gc.h +14 -0
  31. data/vendor/sophia/db/i.c +368 -0
  32. data/vendor/sophia/db/i.h +155 -0
  33. data/vendor/sophia/db/list.h +91 -0
  34. data/vendor/sophia/db/lock.h +77 -0
  35. data/vendor/sophia/db/macro.h +20 -0
  36. data/vendor/sophia/db/makefile +44 -0
  37. data/vendor/sophia/db/merge.c +662 -0
  38. data/vendor/sophia/db/merge.h +14 -0
  39. data/vendor/sophia/db/meta.h +87 -0
  40. data/vendor/sophia/db/recover.c +433 -0
  41. data/vendor/sophia/db/recover.h +14 -0
  42. data/vendor/sophia/db/ref.h +111 -0
  43. data/vendor/sophia/db/rep.c +128 -0
  44. data/vendor/sophia/db/rep.h +120 -0
  45. data/vendor/sophia/db/sophia.h +84 -0
  46. data/vendor/sophia/db/sp.c +626 -0
  47. data/vendor/sophia/db/sp.h +50 -0
  48. data/vendor/sophia/db/task.h +70 -0
  49. data/vendor/sophia/db/track.h +99 -0
  50. data/vendor/sophia/db/util.c +105 -0
  51. data/vendor/sophia/db/util.h +25 -0
  52. data/vendor/sophia/makefile +7 -0
  53. data/vendor/sophia/sophia.gyp +30 -0
  54. data/vendor/sophia/test/common.c +870 -0
  55. data/vendor/sophia/test/crash.c +492 -0
  56. data/vendor/sophia/test/i.c +403 -0
  57. data/vendor/sophia/test/limit.c +65 -0
  58. data/vendor/sophia/test/makefile +30 -0
  59. data/vendor/sophia/test/merge.c +890 -0
  60. data/vendor/sophia/test/recover.c +1550 -0
  61. data/vendor/sophia/test/test.h +66 -0
  62. metadata +134 -0
@@ -0,0 +1,155 @@
1
+ #ifndef SP_I_H_
2
+ #define SP_I_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ typedef struct spipage spipage;
13
+ typedef struct spi spi;
14
+ typedef struct spii spii;
15
+
16
+ struct spipage {
17
+ uint16_t count;
18
+ spv *i[];
19
+ } sppacked;
20
+
21
+ struct spi {
22
+ spa *a;
23
+ int pagesize;
24
+ spipage **i;
25
+ uint32_t itop;
26
+ uint32_t icount;
27
+ uint32_t count;
28
+ spcmpf cmp;
29
+ void *cmparg;
30
+ };
31
+
32
+ struct spii {
33
+ spi *i;
34
+ long long p, n;
35
+ };
36
+
37
+ int sp_iinit(spi*, spa*, int, spcmpf, void*);
38
+ void sp_ifree(spi*);
39
+ int sp_itruncate(spi*);
40
+ int sp_isetorget(spi *i, spv*, spii*);
41
+ int sp_idelraw(spi*, char*, int, spv**);
42
+ spv *sp_igetraw(spi*, char*, int);
43
+
44
+ static inline int
45
+ sp_idel(spi *i, spv *v, spv **old) {
46
+ return sp_idelraw(i, v->key, v->size, old);
47
+ }
48
+
49
+ static inline spv*
50
+ sp_iget(spi *i, spv *v) {
51
+ return sp_igetraw(i, v->key, v->size);
52
+ }
53
+
54
+ static inline void*
55
+ sp_imax(spi *i) {
56
+ if (spunlikely(i->count == 0))
57
+ return NULL;
58
+ return i->i[i->icount-1]->i[i->i[i->icount-1]->count-1];
59
+ }
60
+
61
+ static inline void
62
+ sp_ifirst(spii *it) {
63
+ it->p = 0;
64
+ it->n = 0;
65
+ }
66
+
67
+ static inline void
68
+ sp_ilast(spii *it) {
69
+ it->p = it->i->icount - 1;
70
+ it->n = it->i->i[it->i->icount - 1]->count - 1;
71
+ }
72
+
73
+ static inline void
74
+ sp_iopen(spii *it, spi *i) {
75
+ it->i = i;
76
+ sp_ifirst(it);
77
+ }
78
+
79
+ static inline int
80
+ sp_ihas(spii *it) {
81
+ return (it->p >= 0 && it->n >= 0) &&
82
+ (it->p < it->i->icount) &&
83
+ (it->n < it->i->i[it->p]->count);
84
+ }
85
+
86
+ static inline void
87
+ sp_ivalset(spii *it, spv *v) {
88
+ it->i->i[it->p]->i[it->n] = v;
89
+ }
90
+
91
+ static inline spv*
92
+ sp_ival(spii *it) {
93
+ if (spunlikely(! sp_ihas(it)))
94
+ return NULL;
95
+ return it->i->i[it->p]->i[it->n];
96
+ }
97
+
98
+ static inline int
99
+ sp_inext(spii *it) {
100
+ if (spunlikely(! sp_ihas(it)))
101
+ return 0;
102
+ it->n++;
103
+ while (it->p < it->i->icount) {
104
+ spipage *p = it->i->i[it->p];
105
+ if (spunlikely(it->n >= p->count)) {
106
+ it->p++;
107
+ it->n = 0;
108
+ continue;
109
+ }
110
+ return 1;
111
+ }
112
+ return 0;
113
+ }
114
+
115
+ static inline int
116
+ sp_iprev(spii *it) {
117
+ if (spunlikely(! sp_ihas(it)))
118
+ return 0;
119
+ it->n--;
120
+ while (it->p >= 0) {
121
+ if (spunlikely(it->n < 0)) {
122
+ if (it->p == 0)
123
+ return 0;
124
+ it->p--;
125
+ it->n = it->i->i[it->p]->count-1;
126
+ continue;
127
+ }
128
+ return 1;
129
+ }
130
+ return 0;
131
+ }
132
+
133
+ static inline void
134
+ sp_iinv(spi *i, spii *ii) {
135
+ ii->i = i;
136
+ ii->p = -1;
137
+ ii->n = -1;
138
+ }
139
+
140
+ int sp_ilte(spi*, spii*, char*, int);
141
+ int sp_igte(spi*, spii*, char*, int);
142
+
143
+ static inline int
144
+ sp_iset(spi *i, spv *v, spv **old)
145
+ {
146
+ spii pos;
147
+ int rc = sp_isetorget(i, v, &pos);
148
+ if (splikely(rc <= 0))
149
+ return rc;
150
+ *old = sp_ival(&pos);
151
+ sp_ivalset(&pos, v);
152
+ return 1;
153
+ }
154
+
155
+ #endif
@@ -0,0 +1,91 @@
1
+ #ifndef SP_LIST_H_
2
+ #define SP_LIST_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ typedef struct splist splist;
13
+
14
+ struct splist {
15
+ splist *next, *prev;
16
+ };
17
+
18
+ static inline void
19
+ sp_listinit(splist *h) {
20
+ h->next = h->prev = h;
21
+ }
22
+
23
+ static inline void
24
+ sp_listappend(splist *h, splist *n) {
25
+ n->next = h;
26
+ n->prev = h->prev;
27
+ n->prev->next = n;
28
+ n->next->prev = n;
29
+ }
30
+
31
+ static inline void
32
+ sp_listunlink(splist *n) {
33
+ n->prev->next = n->next;
34
+ n->next->prev = n->prev;
35
+ }
36
+
37
+ static inline void
38
+ sp_listpush(splist *h, splist *n) {
39
+ n->next = h->next;
40
+ n->prev = h;
41
+ n->prev->next = n;
42
+ n->next->prev = n;
43
+ }
44
+
45
+ static inline splist*
46
+ sp_listpop(splist *h) {
47
+ register splist *pop = h->next;
48
+ sp_listunlink(pop);
49
+ return pop;
50
+ }
51
+
52
+ static inline int
53
+ sp_listempty(splist *l) {
54
+ return l->next == l && l->prev == l;
55
+ }
56
+
57
+ static inline void
58
+ sp_listmerge(splist *a, splist *b) {
59
+ if (spunlikely(sp_listempty(b)))
60
+ return;
61
+ register splist *first = b->next;
62
+ register splist *last = b->prev;
63
+ first->prev = a->prev;
64
+ a->prev->next = first;
65
+ last->next = a;
66
+ a->prev = last;
67
+ }
68
+
69
+ static inline void
70
+ sp_listreplace(splist *o, splist *n) {
71
+ n->next = o->next;
72
+ n->next->prev = n;
73
+ n->prev = o->prev;
74
+ n->prev->next = n;
75
+ }
76
+
77
+ #define sp_listlast(H, N) ((H) == (N))
78
+
79
+ #define sp_listforeach(H, I) \
80
+ for (I = (H)->next; I != H; I = (I)->next)
81
+
82
+ #define sp_listforeach_continue(H, I) \
83
+ for (; I != H; I = (I)->next)
84
+
85
+ #define sp_listforeach_safe(H, I, N) \
86
+ for (I = (H)->next; I != H && (N = I->next); I = N)
87
+
88
+ #define sp_listforeach_reverse(H, I) \
89
+ for (I = (H)->prev; I != H; I = (I)->prev)
90
+
91
+ #endif
@@ -0,0 +1,77 @@
1
+ #ifndef SP_LOCK_H_
2
+ #define SP_LOCK_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ #include <unistd.h>
13
+
14
+ typedef uint8_t spspinlock;
15
+
16
+ #if defined(__x86_64__) || defined(__i386) || defined(_X86_)
17
+ # define CPU_PAUSE __asm__ ("pause")
18
+ #else
19
+ # define CPU_PAUSE do { } while(0)
20
+ #endif
21
+
22
+ static inline void
23
+ sp_lockinit(volatile spspinlock *l) {
24
+ *l = 0;
25
+ }
26
+
27
+ static inline void
28
+ sp_lockfree(volatile spspinlock *l) {
29
+ *l = 0;
30
+ }
31
+
32
+ static inline void
33
+ sp_lock(volatile spspinlock *l) {
34
+ if (__sync_lock_test_and_set(l, 1) != 0) {
35
+ unsigned int spin_count = 0U;
36
+ for (;;) {
37
+ CPU_PAUSE;
38
+ if (*l == 0U && __sync_lock_test_and_set(l, 1) == 0)
39
+ break;
40
+ if (++spin_count > 100U)
41
+ usleep(0);
42
+ }
43
+ }
44
+ }
45
+
46
+ static inline void
47
+ sp_unlock(volatile spspinlock *l) {
48
+ __sync_lock_release(l);
49
+ }
50
+
51
+ #if 0
52
+ #include <pthread.h>
53
+
54
+ typedef pthread_spinlock_t spspinlock;
55
+
56
+ static inline void
57
+ sp_lockinit(volatile spspinlock *l) {
58
+ pthread_spin_init(l, 0);
59
+ }
60
+
61
+ static inline void
62
+ sp_lockfree(volatile spspinlock *l) {
63
+ pthread_spin_destroy(l);
64
+ }
65
+
66
+ static inline void
67
+ sp_lock(volatile spspinlock *l) {
68
+ pthread_spin_lock(l);
69
+ }
70
+
71
+ static inline void
72
+ sp_unlock(volatile spspinlock *l) {
73
+ pthread_spin_unlock(l);
74
+ }
75
+ #endif
76
+
77
+ #endif
@@ -0,0 +1,20 @@
1
+ #ifndef SP_MACRO_H_
2
+ #define SP_MACRO_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ #define sppacked __attribute__((packed))
13
+ #define spunused __attribute__((unused))
14
+ #define sphot __attribute__((hot))
15
+ #define splikely(EXPR) __builtin_expect(!! (EXPR), 1)
16
+ #define spunlikely(EXPR) __builtin_expect(!! (EXPR), 0)
17
+ #define spdiv(a, b) ((a) + (b) - 1) / (b)
18
+ #define spcast(N, T, F) ((T*)((char*)(N) - __builtin_offsetof(T, F)))
19
+
20
+ #endif
@@ -0,0 +1,44 @@
1
+
2
+ #
3
+ # sophia makefile.
4
+ #
5
+ CC ?= gcc
6
+ RM ?= rm
7
+ LN ?= ln
8
+ VERMAJOR = 1
9
+ VERMINOR = 1
10
+ TARGET_STATIC = libsophia.a
11
+ TARGET_DSOLIB = libsophia.so.$(VERMAJOR).$(VERMINOR)
12
+ TARGET_DSO = libsophia.so
13
+ CFLAGS ?= -I. -std=c99 -pedantic -Wextra -Wall -pthread -O2 -DNDEBUG -fPIC
14
+ LDFLAGS ?= -shared -soname libsophia.$(VERMAJOR)
15
+
16
+ OBJS = file.o \
17
+ crc.o \
18
+ e.o \
19
+ i.o \
20
+ cat.o \
21
+ rep.o \
22
+ util.o \
23
+ sp.o \
24
+ recover.o \
25
+ merge.o \
26
+ gc.o \
27
+ cursor.o
28
+
29
+ ALL: $(TARGET_STATIC) $(TARGET_DSO)
30
+
31
+ $(TARGET_STATIC): clean $(OBJS)
32
+ $(AR) cru $(TARGET_STATIC) $(OBJS)
33
+
34
+ $(TARGET_DSO): clean $(OBJS)
35
+ $(LD) $(OBJS) $(LDFLAGS) -o $(TARGET_DSOLIB)
36
+ $(LN) -s $(TARGET_DSOLIB) $(TARGET_DSO).$(VERMAJOR)
37
+ $(LN) -s $(TARGET_DSOLIB) $(TARGET_DSO)
38
+
39
+ .c.o:
40
+ $(CC) $(CFLAGS) -c $<
41
+
42
+ clean:
43
+ $(RM) -f $(OBJS) $(TARGET_STATIC)
44
+ $(RM) -f $(TARGET_DSOLIB) $(TARGET_DSO).$(VERMAJOR) $(TARGET_DSO)
@@ -0,0 +1,662 @@
1
+
2
+ /*
3
+ * sophia database
4
+ * sphia.org
5
+ *
6
+ * Copyright (c) Dmitry Simonenko
7
+ * BSD License
8
+ */
9
+
10
+ #include <sp.h>
11
+
12
+ typedef struct {
13
+ uint32_t count;
14
+ uint32_t psize;
15
+ uint32_t bsize;
16
+ } spupdate0;
17
+
18
+ static inline void
19
+ sp_mergeget0(spii *pos, uint32_t n, spupdate0 *u)
20
+ {
21
+ memset(u, 0, sizeof(*u));
22
+ /*
23
+ * collect n or less versions for scheduled page write,
24
+ * not marked as delete, calculate page size and the
25
+ * block size.
26
+ */
27
+ spii i = *pos;
28
+ while (u->count < n && sp_ihas(&i)) {
29
+ spv *v = sp_ival(&i);
30
+ if (v->flags & SPDEL) {
31
+ sp_inext(&i);
32
+ continue;
33
+ }
34
+ if (v->size > u->bsize)
35
+ u->bsize = v->size;
36
+ sp_inext(&i);
37
+ u->count++;
38
+ u->psize += sp_vvsize(v);
39
+ }
40
+ u->bsize += sizeof(spvh);
41
+ u->psize += sizeof(sppageh) + u->bsize * u->count;
42
+ }
43
+
44
+ static inline int sp_merge0(sp *s, spepoch *x, spi *index)
45
+ {
46
+ spv *max = NULL;
47
+ spv *min = NULL;
48
+ int rc;
49
+ spii i;
50
+ sp_iopen(&i, index);
51
+
52
+ while (sp_active(s))
53
+ {
54
+ /* get the new page properties and a data */
55
+ spupdate0 u;
56
+ sp_mergeget0(&i, s->e->page, &u);
57
+ if (spunlikely(u.count == 0))
58
+ break;
59
+
60
+ /* ensure enough space for the page in the file */
61
+ sp_lock(&x->lock);
62
+ rc = sp_mapensure(&x->db, u.psize, s->e->dbgrow);
63
+ if (spunlikely(rc == -1)) {
64
+ sp_unlock(&x->lock);
65
+ sp_e(s, SPEIO, "failed to remap db file", x->epoch);
66
+ goto err;
67
+ }
68
+ sp_unlock(&x->lock);
69
+
70
+ /* write the page.
71
+ *
72
+ * [header] [keys (block sized)] [values]
73
+ *
74
+ * Use partly precalculated crc for a version.
75
+ */
76
+ sppageh *h = (sppageh*)(x->db.map + x->db.used);
77
+ h->id = ++s->psn;
78
+ h->count = u.count;
79
+ h->bsize = u.bsize;
80
+ h->size = u.psize - sizeof(sppageh);
81
+ h->crc = sp_crc32c(0, &h->id, sizeof(sppageh) - sizeof(h->crc));
82
+
83
+ char *ph = x->db.map + x->db.used + sizeof(sppageh);
84
+ char *pv = ph + u.count * u.bsize;
85
+
86
+ uint32_t current = 0;
87
+ spv *last = NULL;
88
+ while (sp_active(s) && current < u.count)
89
+ {
90
+ spv *v = sp_ival(&i);
91
+ if (v->flags & SPDEL) {
92
+ sp_inext(&i);
93
+ continue;
94
+ }
95
+ if (spunlikely(min == NULL)) {
96
+ min = sp_vdup(s, v);
97
+ if (spunlikely(min == NULL)) {
98
+ sp_e(s, SPEOOM, "failed to allocate key");
99
+ goto err;
100
+ }
101
+ }
102
+ assert(v->size <= u.bsize);
103
+ spvh *vh = (spvh*)(ph);
104
+ vh->size = v->size;
105
+ vh->flags = v->flags;
106
+ vh->vsize = sp_vvsize(v);
107
+ vh->voffset = pv - (char*)h;
108
+ vh->crc = sp_crc32c(v->crc, &vh->size, sizeof(spvh) - sizeof(vh->crc));
109
+ memcpy(vh->key, v->key, v->size);
110
+ memcpy(pv, sp_vv(v), vh->vsize);
111
+
112
+ ph += u.bsize;
113
+ pv += vh->vsize;
114
+ last = v;
115
+ current++;
116
+ sp_inext(&i);
117
+ }
118
+
119
+ /* cancellation point check */
120
+ if (! sp_active(s))
121
+ goto err;
122
+
123
+ /* create in-memory page */
124
+ sppage *page = sp_pagenew(s, x);
125
+ if (spunlikely(page == NULL)) {
126
+ sp_e(s, SPEOOM, "failed to allocate page");
127
+ goto err;
128
+ }
129
+ max = sp_vdup(s, last);
130
+ if (spunlikely(max == NULL)) {
131
+ sp_e(s, SPEOOM, "failed to allocate key");
132
+ goto err;
133
+ }
134
+ assert(min != NULL);
135
+ page->id = s->psn;
136
+ page->offset = x->db.used;
137
+ page->size = u.psize;
138
+ page->min = min;
139
+ page->max = max;
140
+
141
+ /* insert page to the index */
142
+ sp_lock(&s->locks);
143
+ sppage *o = NULL;
144
+ rc = sp_catset(&s->s, page, &o);
145
+ if (spunlikely(rc == -1)) {
146
+ sp_unlock(&s->locks);
147
+ sp_pagefree(s, page);
148
+ sp_e(s, SPEOOM, "failed to allocate page index page");
149
+ goto err;
150
+ }
151
+ sp_unlock(&s->locks);
152
+
153
+ /* attach page to the epoch list */
154
+ sp_pageattach(page);
155
+
156
+ /* advance file buffer */
157
+ sp_mapuse(&x->db, u.psize);
158
+
159
+ min = NULL;
160
+ max = NULL;
161
+ }
162
+ return 0;
163
+ err:
164
+ if (min)
165
+ sp_free(&s->a, min);
166
+ if (max)
167
+ sp_free(&s->a, max);
168
+ return -1;
169
+ }
170
+
171
+ typedef struct {
172
+ uint32_t pi;
173
+ sppage *p;
174
+ spepoch *s; /* p->epoch */
175
+ uint32_t count;
176
+ uint32_t bsize;
177
+ } spupdate;
178
+
179
+ typedef struct {
180
+ /* a is an original page version
181
+ b is in-memory version */
182
+ int a_bsize, b_bsize;
183
+ int a_count, b_count;
184
+ int A, B;
185
+ spvh *a;
186
+ spv *b;
187
+ spref last;
188
+ spii i;
189
+ spepoch *x;
190
+ } spmerge;
191
+
192
+ typedef struct {
193
+ splist split;
194
+ int count;
195
+ } spsplit;
196
+
197
+ static inline int
198
+ sp_mergeget(sp *s, spii *from, spupdate *u)
199
+ {
200
+ spii i = *from;
201
+ if (spunlikely(! sp_ihas(&i)))
202
+ return 0;
203
+ memset(u, 0, sizeof(spupdate));
204
+ /* match the origin page and a associated
205
+ * range of keys. */
206
+ sppage *origin = NULL;
207
+ uint32_t origin_idx = 0;
208
+ uint32_t n = 0;
209
+ while (sp_ihas(&i)) {
210
+ spv *v = sp_ival(&i);
211
+ if (splikely(origin)) {
212
+ if (! sp_catown(&s->s, origin_idx, v))
213
+ break;
214
+ } else {
215
+ origin = sp_catroute(&s->s, v->key, v->size, &origin_idx);
216
+ assert(((spepoch*)origin->epoch)->type == SPDB);
217
+ }
218
+ if (v->size > u->bsize)
219
+ u->bsize = v->size;
220
+ sp_inext(&i);
221
+ n++;
222
+ }
223
+ assert(n > 0);
224
+ u->count = n;
225
+ u->bsize += sizeof(spvh);
226
+ u->pi = origin_idx;
227
+ u->p = origin;
228
+ u->s = origin->epoch;
229
+ return 1;
230
+ }
231
+
232
+ static inline void
233
+ sp_mergeinit(spepoch *x, spmerge *m, spupdate *u, spii *from)
234
+ {
235
+ sppageh *h = (sppageh*)(u->s->db.map + u->p->offset);
236
+ uint32_t bsize = u->bsize;
237
+ if (h->bsize > bsize)
238
+ bsize = h->bsize;
239
+ m->a_bsize = h->bsize;
240
+ m->b_bsize = bsize;
241
+ memset(&m->last, 0, sizeof(m->last));
242
+ m->i = *from;
243
+ m->A = 0;
244
+ m->B = 0;
245
+ m->a_count = h->count;
246
+ m->b_count = u->count;
247
+ m->a = (spvh*)((char*)h + sizeof(sppageh));
248
+ m->b = sp_ival(from);
249
+ m->x = x;
250
+ }
251
+
252
+ static inline int sp_mergenext(sp *s, spmerge *m)
253
+ {
254
+ if (m->A < m->a_count && m->B < m->b_count)
255
+ {
256
+ register int cmp =
257
+ s->e->cmp(m->a->key, m->a->size,
258
+ m->b->key,
259
+ m->b->size, s->e->cmparg);
260
+ switch (cmp) {
261
+ case 0:
262
+ /* use updated key B */
263
+ m->last.type = SPREFM;
264
+ m->last.v.v = m->b;
265
+ m->A++;
266
+ m->a = (spvh*)((char*)m->a + m->a_bsize);
267
+ m->B++;
268
+ sp_inext(&m->i);
269
+ m->b = sp_ival(&m->i);
270
+ return 1;
271
+ case -1:
272
+ /* use A */
273
+ m->last.type = SPREFD;
274
+ m->last.v.vh = m->a;
275
+ m->A++;
276
+ m->a = (spvh*)((char*)m->a + m->a_bsize);
277
+ return 1;
278
+ case 1:
279
+ /* use B */
280
+ m->last.type = SPREFM;
281
+ m->last.v.v = m->b;
282
+ m->B++;
283
+ sp_inext(&m->i);
284
+ m->b = sp_ival(&m->i);
285
+ return 1;
286
+ }
287
+ }
288
+ if (m->A < m->a_count) {
289
+ /* use A */
290
+ m->last.type = SPREFD;
291
+ m->last.v.vh = m->a;
292
+ m->A++;
293
+ m->a = (spvh*)((char*)m->a + m->a_bsize);
294
+ return 1;
295
+ }
296
+ if (m->B < m->b_count) {
297
+ /* use B */
298
+ m->last.type = SPREFM;
299
+ m->last.v.v = m->b;
300
+ m->B++;
301
+ sp_inext(&m->i);
302
+ m->b = sp_ival(&m->i);
303
+ return 1;
304
+ }
305
+ return 0;
306
+ }
307
+
308
+ static inline void
309
+ sp_splitinit(spsplit *l) {
310
+ sp_listinit(&l->split);
311
+ l->count = 0;
312
+ }
313
+
314
+ static inline void
315
+ sp_splitfree(sp *s, spsplit *l) {
316
+ splist *i, *n;
317
+ sp_listforeach_safe(&l->split, i, n) {
318
+ sppage *p = spcast(i, sppage, link);
319
+ sp_pagefree(s, p);
320
+ }
321
+ }
322
+
323
+ static inline int sp_split(sp *s, spupdate *u, spmerge *m, spsplit *l)
324
+ {
325
+ int rc;
326
+ int bsize = m->b_bsize;
327
+ uint32_t pagesize = sizeof(sppageh);
328
+ uint32_t count = 0;
329
+ /*
330
+ * merge in-memory keys with the origin page keys,
331
+ * skip any deletes and calculate result
332
+ * page size.
333
+ */
334
+ sp_refsetreset(&s->refs);
335
+ while (count < s->e->page && sp_mergenext(s, m)) {
336
+ if (sp_refisdel(&m->last))
337
+ continue;
338
+ sp_refsetadd(&s->refs, &m->last);
339
+ pagesize += bsize + sp_refvsize(&m->last);
340
+ count++;
341
+ }
342
+ if (spunlikely(count == 0 && l->count > 0))
343
+ return 0;
344
+
345
+ /*
346
+ * set the origin page id for a first spitted page
347
+ */
348
+ uint32_t psn = (l->count == 0) ? u->p->id : ++s->psn;
349
+
350
+ /* ensure enough space for the page in the file */
351
+ sp_lock(&m->x->lock);
352
+ rc = sp_mapensure(&m->x->db, pagesize, s->e->dbgrow);
353
+ if (spunlikely(rc == -1)) {
354
+ sp_unlock(&m->x->lock);
355
+ return sp_e(s, SPEIO, "failed to remap db file",
356
+ m->x->epoch);
357
+ }
358
+ sp_unlock(&m->x->lock);
359
+
360
+ /* in case if all origin page keys are deleted.
361
+ *
362
+ * write special page header without any data, indicating
363
+ * that page should be skipped during recovery
364
+ * and not being added to the index.
365
+ */
366
+ if (spunlikely(count == 0 && l->count == 0)) {
367
+ sppageh *h = (sppageh*)(m->x->db.map + m->x->db.used);
368
+ h->id = psn;
369
+ h->count = 0;
370
+ h->bsize = 0;
371
+ h->size = 0;
372
+ h->crc = sp_crc32c(0, &h->id, sizeof(sppageh) - sizeof(h->crc));
373
+ sp_mapuse(&m->x->db, pagesize);
374
+ return 0;
375
+ }
376
+
377
+ spref *r = s->refs.r;
378
+ spref *min = r;
379
+ spref *max = r + (count - 1);
380
+
381
+ /*
382
+ * write the page
383
+ */
384
+ sppageh *h = (sppageh*)(m->x->db.map + m->x->db.used);
385
+ h->id = psn;
386
+ h->count = count;
387
+ h->bsize = bsize;
388
+ h->size = pagesize - sizeof(sppageh);
389
+ h->crc = sp_crc32c(0, &h->id, sizeof(sppageh) - sizeof(h->crc));
390
+
391
+ spvh *ptr = (spvh*)(m->x->db.map + m->x->db.used + sizeof(sppageh));
392
+ char *ptrv = (char*)ptr + count * bsize;
393
+
394
+ uint32_t i = 0;
395
+ while (i < count)
396
+ {
397
+ uint32_t voffset = ptrv - (char*)h;
398
+ switch (r->type) {
399
+ case SPREFD:
400
+ memcpy(ptr, r->v.vh, sizeof(spvh) + r->v.vh->size);
401
+ memcpy(ptrv, u->s->db.map + u->p->offset + r->v.vh->voffset,
402
+ r->v.vh->vsize);
403
+ ptr->voffset = voffset;
404
+ uint32_t crc;
405
+ crc = sp_crc32c(0, ptr->key, ptr->size);
406
+ crc = sp_crc32c(crc, ptrv, r->v.vh->vsize);
407
+ crc = sp_crc32c(crc, &ptr->size, sizeof(spvh) - sizeof(ptr->crc));
408
+ ptr->crc = crc;
409
+ ptrv += r->v.vh->vsize;
410
+ break;
411
+ case SPREFM:
412
+ ptr->size = r->v.v->size;
413
+ ptr->flags = r->v.v->flags;
414
+ ptr->voffset = voffset;
415
+ ptr->vsize = sp_vvsize(r->v.v);
416
+ ptr->crc = sp_crc32c(r->v.v->crc, &ptr->size, sizeof(spvh) -
417
+ sizeof(ptr->crc));
418
+ memcpy(ptr->key, r->v.v->key, r->v.v->size);
419
+ memcpy(ptrv, sp_vv(r->v.v), ptr->vsize);
420
+ ptrv += ptr->vsize;
421
+ break;
422
+ }
423
+ assert((uint32_t)(ptrv - (char*)h) <= pagesize);
424
+ ptr = (spvh*)((char*)ptr + bsize);
425
+ r++;
426
+ i++;
427
+ }
428
+
429
+ /* create in-memory page */
430
+ sppage *p = sp_pagenew(s, m->x);
431
+ if (spunlikely(p == NULL))
432
+ return sp_e(s, SPEOOM, "failed to allocate page");
433
+ p->id = psn;
434
+ p->offset = m->x->db.used;
435
+ p->size = pagesize;
436
+ p->min = sp_vdupref(s, min, m->x->epoch);
437
+ if (spunlikely(p->min == NULL)) {
438
+ sp_free(&s->a, p);
439
+ return sp_e(s, SPEOOM, "failed to allocate key");
440
+ }
441
+ p->max = sp_vdupref(s, max, m->x->epoch);
442
+ if (spunlikely(p->max == NULL)) {
443
+ sp_free(&s->a, p->min);
444
+ sp_free(&s->a, p);
445
+ return sp_e(s, SPEOOM, "failed to allocate key");
446
+ }
447
+
448
+ /* add page to split list */
449
+ sp_listappend(&l->split, &p->link);
450
+ l->count++;
451
+
452
+ /* advance buffer */
453
+ sp_mapuse(&m->x->db, pagesize);
454
+ return 1;
455
+ }
456
+
457
+ static inline int sp_splitcommit(sp *s, spupdate *u, spmerge *m, spsplit *l)
458
+ {
459
+ sp_lock(&s->locks);
460
+ /* remove origin page, if there were no page
461
+ * updates after split */
462
+ if (spunlikely(l->count == 0)) {
463
+ sp_pagefree(s, u->p);
464
+ u->s->ngc++;
465
+ u->p = NULL;
466
+ sp_catdel(&s->s, u->pi);
467
+ sp_unlock(&s->locks);
468
+ return 0;
469
+ }
470
+ splist *i, *n;
471
+ sp_listforeach_safe(&l->split, i, n)
472
+ {
473
+ sppage *p = spcast(i, sppage, link);
474
+ /* update origin page first */
475
+ if (spunlikely(p->id == u->p->id)) {
476
+ sp_listunlink(&p->link);
477
+ /* relink origin page to new epoch */
478
+ sppage *origin = u->p;
479
+ assert(origin->epoch != m->x);
480
+ sp_listunlink(&origin->link);
481
+ u->s->ngc++; /* origin db epoch */
482
+ m->x->n++; /* current db epoch */
483
+ sp_listappend(&m->x->pages, &origin->link);
484
+ /* update origin page */
485
+ origin->offset = p->offset;
486
+ assert(p->epoch == m->x);
487
+ origin->epoch = m->x;
488
+ origin->size = p->size;
489
+ sp_free(&s->a, origin->min);
490
+ sp_free(&s->a, origin->max);
491
+ origin->min = p->min;
492
+ origin->max = p->max;
493
+ sp_free(&s->a, p);
494
+ continue;
495
+ }
496
+ /* insert split page */
497
+ sppage *o = NULL;
498
+ int rc = sp_catset(&s->s, p, &o);
499
+ if (spunlikely(rc == -1)) {
500
+ sp_unlock(&s->locks);
501
+ return sp_e(s, SPEOOM, "failed to allocate page index page");
502
+ }
503
+ assert(o == NULL);
504
+ sp_pageattach(p);
505
+ m->x->n++;
506
+ }
507
+ sp_unlock(&s->locks);
508
+ return 0;
509
+ }
510
+
511
+ static inline int sp_mergeN(sp *s, spepoch *x, spi *index)
512
+ {
513
+ int rc;
514
+ spii i;
515
+ sp_iopen(&i, index);
516
+ spupdate u;
517
+ while (sp_mergeget(s, &i, &u))
518
+ {
519
+ spmerge m;
520
+ sp_mergeinit(x, &m, &u, &i);
521
+ spsplit l;
522
+ sp_splitinit(&l);
523
+ while (sp_active(s)) {
524
+ rc = sp_split(s, &u, &m, &l);
525
+ if (spunlikely(rc == 0))
526
+ break;
527
+ else
528
+ if (spunlikely(rc == -1)) {
529
+ sp_splitfree(s, &l);
530
+ return -1;
531
+ }
532
+ }
533
+ if (spunlikely(! sp_active(s)))
534
+ return 0;
535
+ rc = sp_splitcommit(s, &u, &m, &l);
536
+ if (spunlikely(rc == -1)) {
537
+ sp_splitfree(s, &l);
538
+ return -1;
539
+ }
540
+ i = m.i;
541
+ }
542
+ return 0;
543
+ }
544
+
545
+ int sp_merge(sp *s)
546
+ {
547
+ sp_lock(&s->lockr);
548
+ sp_lock(&s->locki);
549
+
550
+ spepoch *x = sp_replive(&s->rep);
551
+ /* rotate current live epoch */
552
+ sp_repset(&s->rep, x, SPXFER);
553
+ int rc = sp_rotate(s);
554
+ if (spunlikely(rc == -1)) {
555
+ sp_lock(&s->lockr);
556
+ sp_lock(&s->locki);
557
+ return -1;
558
+ }
559
+ /* swap index */
560
+ spi *index = sp_iswap(s);
561
+
562
+ sp_unlock(&s->lockr);
563
+ sp_unlock(&s->locki);
564
+
565
+ /* complete old live epoch log */
566
+ rc = sp_logeof(&x->log);
567
+ if (spunlikely(rc == -1))
568
+ return sp_e(s, SPEIO, "failed to write eof marker", x->epoch);
569
+ rc = sp_logcomplete(&x->log);
570
+ if (spunlikely(rc == -1))
571
+ return sp_e(s, SPEIO, "failed to complete log file", x->epoch);
572
+
573
+ /* create db file */
574
+ rc = sp_mapepochnew(&x->db, s->e->dbnewsize, s->e->dir, x->epoch, "db");
575
+ if (spunlikely(rc == -1))
576
+ return sp_e(s, SPEIO, "failed to create db file", x->epoch);
577
+
578
+ /* merge index */
579
+ if (splikely(s->s.count > 0))
580
+ rc = sp_mergeN(s, x, index);
581
+ else
582
+ rc = sp_merge0(s, x, index);
583
+
584
+ /* check cancellation point */
585
+ if (! sp_active(s)) {
586
+ sp_mapunlink(&x->db);
587
+ sp_mapclose(&x->db);
588
+ return rc;
589
+ }
590
+ if (spunlikely(rc == -1))
591
+ return -1;
592
+
593
+ /* gc */
594
+ if (s->e->gc) {
595
+ rc = sp_gc(s, x);
596
+ if (spunlikely(rc == -1))
597
+ return -1;
598
+ }
599
+
600
+ /* sync/truncate db file and remap read-only only if
601
+ * database file is not empty. */
602
+ if (splikely(x->db.used > 0)) {
603
+ sp_lock(&x->lock);
604
+ rc = sp_mapcomplete(&x->db);
605
+ if (spunlikely(rc == -1)) {
606
+ sp_unlock(&x->lock);
607
+ return sp_e(s, SPEIO, "failed to complete db file", x->epoch);
608
+ }
609
+ sp_unlock(&x->lock);
610
+ /* set epoch as db */
611
+ sp_lock(&s->lockr);
612
+ sp_repset(&s->rep, x, SPDB);
613
+ sp_unlock(&s->lockr);
614
+ /* remove log file */
615
+ rc = sp_logunlink(&x->log);
616
+ if (spunlikely(rc == -1))
617
+ return sp_e(s, SPEIO, "failed to unlink log file", x->epoch);
618
+ rc = sp_logclose(&x->log);
619
+ if (spunlikely(rc == -1))
620
+ return sp_e(s, SPEIO, "failed to close log file", x->epoch);
621
+ } else {
622
+ /* there are possible situation when all keys has
623
+ * been deleted. */
624
+ rc = sp_mapunlink(&x->db);
625
+ if (spunlikely(rc == -1))
626
+ return sp_e(s, SPEIO, "failed to unlink db file", x->epoch);
627
+ rc = sp_mapclose(&x->db);
628
+ if (spunlikely(rc == -1))
629
+ return sp_e(s, SPEIO, "failed to close db file", x->epoch);
630
+ }
631
+
632
+ /* remove all xfer epochs that took part in the merge
633
+ * including current, if it's database file
634
+ * is empty. */
635
+ while (sp_active(s)) {
636
+ sp_lock(&s->lockr);
637
+ spepoch *e = sp_repxfer(&s->rep);
638
+ sp_unlock(&s->lockr);
639
+ if (e == NULL)
640
+ break;
641
+ rc = sp_logunlink(&e->log);
642
+ if (spunlikely(rc == -1))
643
+ return sp_e(s, SPEIO, "failed to unlink log file", e->epoch);
644
+ rc = sp_logclose(&e->log);
645
+ if (spunlikely(rc == -1))
646
+ return sp_e(s, SPEIO, "failed to close log file", e->epoch);
647
+ sp_lock(&s->lockr);
648
+ sp_repdetach(&s->rep, e);
649
+ sp_free(&s->a, e);
650
+ sp_unlock(&s->lockr);
651
+ }
652
+
653
+ /* truncate the index (skip index on a read) */
654
+ sp_iskipset(s, 1);
655
+ rc = sp_itruncate(index);
656
+ if (spunlikely(rc == -1)) {
657
+ sp_iskipset(s, 0);
658
+ return sp_e(s, SPE, "failed create index");
659
+ }
660
+ sp_iskipset(s, 0);
661
+ return 0;
662
+ }