sophia-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +29 -0
  7. data/Rakefile +22 -0
  8. data/ext/extconf.rb +13 -0
  9. data/ext/sophia.c +220 -0
  10. data/lib/sophia-ruby.rb +1 -0
  11. data/lib/sophia/version.rb +3 -0
  12. data/sophia-ruby.gemspec +47 -0
  13. data/test/test_sophia.rb +33 -0
  14. data/vendor/sophia/.gitignore +18 -0
  15. data/vendor/sophia/COPYRIGHT +29 -0
  16. data/vendor/sophia/README +5 -0
  17. data/vendor/sophia/db/a.h +58 -0
  18. data/vendor/sophia/db/cat.c +195 -0
  19. data/vendor/sophia/db/cat.h +32 -0
  20. data/vendor/sophia/db/core.h +129 -0
  21. data/vendor/sophia/db/crc.c +343 -0
  22. data/vendor/sophia/db/crc.h +14 -0
  23. data/vendor/sophia/db/cursor.c +551 -0
  24. data/vendor/sophia/db/cursor.h +47 -0
  25. data/vendor/sophia/db/e.c +49 -0
  26. data/vendor/sophia/db/e.h +49 -0
  27. data/vendor/sophia/db/file.c +355 -0
  28. data/vendor/sophia/db/file.h +106 -0
  29. data/vendor/sophia/db/gc.c +71 -0
  30. data/vendor/sophia/db/gc.h +14 -0
  31. data/vendor/sophia/db/i.c +368 -0
  32. data/vendor/sophia/db/i.h +155 -0
  33. data/vendor/sophia/db/list.h +91 -0
  34. data/vendor/sophia/db/lock.h +77 -0
  35. data/vendor/sophia/db/macro.h +20 -0
  36. data/vendor/sophia/db/makefile +44 -0
  37. data/vendor/sophia/db/merge.c +662 -0
  38. data/vendor/sophia/db/merge.h +14 -0
  39. data/vendor/sophia/db/meta.h +87 -0
  40. data/vendor/sophia/db/recover.c +433 -0
  41. data/vendor/sophia/db/recover.h +14 -0
  42. data/vendor/sophia/db/ref.h +111 -0
  43. data/vendor/sophia/db/rep.c +128 -0
  44. data/vendor/sophia/db/rep.h +120 -0
  45. data/vendor/sophia/db/sophia.h +84 -0
  46. data/vendor/sophia/db/sp.c +626 -0
  47. data/vendor/sophia/db/sp.h +50 -0
  48. data/vendor/sophia/db/task.h +70 -0
  49. data/vendor/sophia/db/track.h +99 -0
  50. data/vendor/sophia/db/util.c +105 -0
  51. data/vendor/sophia/db/util.h +25 -0
  52. data/vendor/sophia/makefile +7 -0
  53. data/vendor/sophia/sophia.gyp +30 -0
  54. data/vendor/sophia/test/common.c +870 -0
  55. data/vendor/sophia/test/crash.c +492 -0
  56. data/vendor/sophia/test/i.c +403 -0
  57. data/vendor/sophia/test/limit.c +65 -0
  58. data/vendor/sophia/test/makefile +30 -0
  59. data/vendor/sophia/test/merge.c +890 -0
  60. data/vendor/sophia/test/recover.c +1550 -0
  61. data/vendor/sophia/test/test.h +66 -0
  62. metadata +134 -0
@@ -0,0 +1,155 @@
1
+ #ifndef SP_I_H_
2
+ #define SP_I_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ typedef struct spipage spipage;
13
+ typedef struct spi spi;
14
+ typedef struct spii spii;
15
+
16
+ struct spipage {
17
+ uint16_t count;
18
+ spv *i[];
19
+ } sppacked;
20
+
21
+ struct spi {
22
+ spa *a;
23
+ int pagesize;
24
+ spipage **i;
25
+ uint32_t itop;
26
+ uint32_t icount;
27
+ uint32_t count;
28
+ spcmpf cmp;
29
+ void *cmparg;
30
+ };
31
+
32
+ struct spii {
33
+ spi *i;
34
+ long long p, n;
35
+ };
36
+
37
+ int sp_iinit(spi*, spa*, int, spcmpf, void*);
38
+ void sp_ifree(spi*);
39
+ int sp_itruncate(spi*);
40
+ int sp_isetorget(spi *i, spv*, spii*);
41
+ int sp_idelraw(spi*, char*, int, spv**);
42
+ spv *sp_igetraw(spi*, char*, int);
43
+
44
+ static inline int
45
+ sp_idel(spi *i, spv *v, spv **old) {
46
+ return sp_idelraw(i, v->key, v->size, old);
47
+ }
48
+
49
+ static inline spv*
50
+ sp_iget(spi *i, spv *v) {
51
+ return sp_igetraw(i, v->key, v->size);
52
+ }
53
+
54
+ static inline void*
55
+ sp_imax(spi *i) {
56
+ if (spunlikely(i->count == 0))
57
+ return NULL;
58
+ return i->i[i->icount-1]->i[i->i[i->icount-1]->count-1];
59
+ }
60
+
61
+ static inline void
62
+ sp_ifirst(spii *it) {
63
+ it->p = 0;
64
+ it->n = 0;
65
+ }
66
+
67
+ static inline void
68
+ sp_ilast(spii *it) {
69
+ it->p = it->i->icount - 1;
70
+ it->n = it->i->i[it->i->icount - 1]->count - 1;
71
+ }
72
+
73
+ static inline void
74
+ sp_iopen(spii *it, spi *i) {
75
+ it->i = i;
76
+ sp_ifirst(it);
77
+ }
78
+
79
+ static inline int
80
+ sp_ihas(spii *it) {
81
+ return (it->p >= 0 && it->n >= 0) &&
82
+ (it->p < it->i->icount) &&
83
+ (it->n < it->i->i[it->p]->count);
84
+ }
85
+
86
+ static inline void
87
+ sp_ivalset(spii *it, spv *v) {
88
+ it->i->i[it->p]->i[it->n] = v;
89
+ }
90
+
91
+ static inline spv*
92
+ sp_ival(spii *it) {
93
+ if (spunlikely(! sp_ihas(it)))
94
+ return NULL;
95
+ return it->i->i[it->p]->i[it->n];
96
+ }
97
+
98
+ static inline int
99
+ sp_inext(spii *it) {
100
+ if (spunlikely(! sp_ihas(it)))
101
+ return 0;
102
+ it->n++;
103
+ while (it->p < it->i->icount) {
104
+ spipage *p = it->i->i[it->p];
105
+ if (spunlikely(it->n >= p->count)) {
106
+ it->p++;
107
+ it->n = 0;
108
+ continue;
109
+ }
110
+ return 1;
111
+ }
112
+ return 0;
113
+ }
114
+
115
+ static inline int
116
+ sp_iprev(spii *it) {
117
+ if (spunlikely(! sp_ihas(it)))
118
+ return 0;
119
+ it->n--;
120
+ while (it->p >= 0) {
121
+ if (spunlikely(it->n < 0)) {
122
+ if (it->p == 0)
123
+ return 0;
124
+ it->p--;
125
+ it->n = it->i->i[it->p]->count-1;
126
+ continue;
127
+ }
128
+ return 1;
129
+ }
130
+ return 0;
131
+ }
132
+
133
+ static inline void
134
+ sp_iinv(spi *i, spii *ii) {
135
+ ii->i = i;
136
+ ii->p = -1;
137
+ ii->n = -1;
138
+ }
139
+
140
+ int sp_ilte(spi*, spii*, char*, int);
141
+ int sp_igte(spi*, spii*, char*, int);
142
+
143
+ static inline int
144
+ sp_iset(spi *i, spv *v, spv **old)
145
+ {
146
+ spii pos;
147
+ int rc = sp_isetorget(i, v, &pos);
148
+ if (splikely(rc <= 0))
149
+ return rc;
150
+ *old = sp_ival(&pos);
151
+ sp_ivalset(&pos, v);
152
+ return 1;
153
+ }
154
+
155
+ #endif
@@ -0,0 +1,91 @@
1
+ #ifndef SP_LIST_H_
2
+ #define SP_LIST_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ typedef struct splist splist;
13
+
14
+ struct splist {
15
+ splist *next, *prev;
16
+ };
17
+
18
+ static inline void
19
+ sp_listinit(splist *h) {
20
+ h->next = h->prev = h;
21
+ }
22
+
23
+ static inline void
24
+ sp_listappend(splist *h, splist *n) {
25
+ n->next = h;
26
+ n->prev = h->prev;
27
+ n->prev->next = n;
28
+ n->next->prev = n;
29
+ }
30
+
31
+ static inline void
32
+ sp_listunlink(splist *n) {
33
+ n->prev->next = n->next;
34
+ n->next->prev = n->prev;
35
+ }
36
+
37
+ static inline void
38
+ sp_listpush(splist *h, splist *n) {
39
+ n->next = h->next;
40
+ n->prev = h;
41
+ n->prev->next = n;
42
+ n->next->prev = n;
43
+ }
44
+
45
+ static inline splist*
46
+ sp_listpop(splist *h) {
47
+ register splist *pop = h->next;
48
+ sp_listunlink(pop);
49
+ return pop;
50
+ }
51
+
52
+ static inline int
53
+ sp_listempty(splist *l) {
54
+ return l->next == l && l->prev == l;
55
+ }
56
+
57
+ static inline void
58
+ sp_listmerge(splist *a, splist *b) {
59
+ if (spunlikely(sp_listempty(b)))
60
+ return;
61
+ register splist *first = b->next;
62
+ register splist *last = b->prev;
63
+ first->prev = a->prev;
64
+ a->prev->next = first;
65
+ last->next = a;
66
+ a->prev = last;
67
+ }
68
+
69
+ static inline void
70
+ sp_listreplace(splist *o, splist *n) {
71
+ n->next = o->next;
72
+ n->next->prev = n;
73
+ n->prev = o->prev;
74
+ n->prev->next = n;
75
+ }
76
+
77
+ #define sp_listlast(H, N) ((H) == (N))
78
+
79
+ #define sp_listforeach(H, I) \
80
+ for (I = (H)->next; I != H; I = (I)->next)
81
+
82
+ #define sp_listforeach_continue(H, I) \
83
+ for (; I != H; I = (I)->next)
84
+
85
+ #define sp_listforeach_safe(H, I, N) \
86
+ for (I = (H)->next; I != H && (N = I->next); I = N)
87
+
88
+ #define sp_listforeach_reverse(H, I) \
89
+ for (I = (H)->prev; I != H; I = (I)->prev)
90
+
91
+ #endif
@@ -0,0 +1,77 @@
1
+ #ifndef SP_LOCK_H_
2
+ #define SP_LOCK_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ #include <unistd.h>
13
+
14
+ typedef uint8_t spspinlock;
15
+
16
+ #if defined(__x86_64__) || defined(__i386) || defined(_X86_)
17
+ # define CPU_PAUSE __asm__ ("pause")
18
+ #else
19
+ # define CPU_PAUSE do { } while(0)
20
+ #endif
21
+
22
+ static inline void
23
+ sp_lockinit(volatile spspinlock *l) {
24
+ *l = 0;
25
+ }
26
+
27
+ static inline void
28
+ sp_lockfree(volatile spspinlock *l) {
29
+ *l = 0;
30
+ }
31
+
32
+ static inline void
33
+ sp_lock(volatile spspinlock *l) {
34
+ if (__sync_lock_test_and_set(l, 1) != 0) {
35
+ unsigned int spin_count = 0U;
36
+ for (;;) {
37
+ CPU_PAUSE;
38
+ if (*l == 0U && __sync_lock_test_and_set(l, 1) == 0)
39
+ break;
40
+ if (++spin_count > 100U)
41
+ usleep(0);
42
+ }
43
+ }
44
+ }
45
+
46
+ static inline void
47
+ sp_unlock(volatile spspinlock *l) {
48
+ __sync_lock_release(l);
49
+ }
50
+
51
+ #if 0
52
+ #include <pthread.h>
53
+
54
+ typedef pthread_spinlock_t spspinlock;
55
+
56
+ static inline void
57
+ sp_lockinit(volatile spspinlock *l) {
58
+ pthread_spin_init(l, 0);
59
+ }
60
+
61
+ static inline void
62
+ sp_lockfree(volatile spspinlock *l) {
63
+ pthread_spin_destroy(l);
64
+ }
65
+
66
+ static inline void
67
+ sp_lock(volatile spspinlock *l) {
68
+ pthread_spin_lock(l);
69
+ }
70
+
71
+ static inline void
72
+ sp_unlock(volatile spspinlock *l) {
73
+ pthread_spin_unlock(l);
74
+ }
75
+ #endif
76
+
77
+ #endif
@@ -0,0 +1,20 @@
1
+ #ifndef SP_MACRO_H_
2
+ #define SP_MACRO_H_
3
+
4
+ /*
5
+ * sophia database
6
+ * sphia.org
7
+ *
8
+ * Copyright (c) Dmitry Simonenko
9
+ * BSD License
10
+ */
11
+
12
+ #define sppacked __attribute__((packed))
13
+ #define spunused __attribute__((unused))
14
+ #define sphot __attribute__((hot))
15
+ #define splikely(EXPR) __builtin_expect(!! (EXPR), 1)
16
+ #define spunlikely(EXPR) __builtin_expect(!! (EXPR), 0)
17
+ #define spdiv(a, b) ((a) + (b) - 1) / (b)
18
+ #define spcast(N, T, F) ((T*)((char*)(N) - __builtin_offsetof(T, F)))
19
+
20
+ #endif
@@ -0,0 +1,44 @@
1
+
2
+ #
3
+ # sophia makefile.
4
+ #
5
+ CC ?= gcc
6
+ RM ?= rm
7
+ LN ?= ln
8
+ VERMAJOR = 1
9
+ VERMINOR = 1
10
+ TARGET_STATIC = libsophia.a
11
+ TARGET_DSOLIB = libsophia.so.$(VERMAJOR).$(VERMINOR)
12
+ TARGET_DSO = libsophia.so
13
+ CFLAGS ?= -I. -std=c99 -pedantic -Wextra -Wall -pthread -O2 -DNDEBUG -fPIC
14
+ LDFLAGS ?= -shared -soname libsophia.$(VERMAJOR)
15
+
16
+ OBJS = file.o \
17
+ crc.o \
18
+ e.o \
19
+ i.o \
20
+ cat.o \
21
+ rep.o \
22
+ util.o \
23
+ sp.o \
24
+ recover.o \
25
+ merge.o \
26
+ gc.o \
27
+ cursor.o
28
+
29
+ ALL: $(TARGET_STATIC) $(TARGET_DSO)
30
+
31
+ $(TARGET_STATIC): clean $(OBJS)
32
+ $(AR) cru $(TARGET_STATIC) $(OBJS)
33
+
34
+ $(TARGET_DSO): clean $(OBJS)
35
+ $(LD) $(OBJS) $(LDFLAGS) -o $(TARGET_DSOLIB)
36
+ $(LN) -s $(TARGET_DSOLIB) $(TARGET_DSO).$(VERMAJOR)
37
+ $(LN) -s $(TARGET_DSOLIB) $(TARGET_DSO)
38
+
39
+ .c.o:
40
+ $(CC) $(CFLAGS) -c $<
41
+
42
+ clean:
43
+ $(RM) -f $(OBJS) $(TARGET_STATIC)
44
+ $(RM) -f $(TARGET_DSOLIB) $(TARGET_DSO).$(VERMAJOR) $(TARGET_DSO)
@@ -0,0 +1,662 @@
1
+
2
+ /*
3
+ * sophia database
4
+ * sphia.org
5
+ *
6
+ * Copyright (c) Dmitry Simonenko
7
+ * BSD License
8
+ */
9
+
10
+ #include <sp.h>
11
+
12
+ typedef struct {
13
+ uint32_t count;
14
+ uint32_t psize;
15
+ uint32_t bsize;
16
+ } spupdate0;
17
+
18
+ static inline void
19
+ sp_mergeget0(spii *pos, uint32_t n, spupdate0 *u)
20
+ {
21
+ memset(u, 0, sizeof(*u));
22
+ /*
23
+ * collect n or less versions for scheduled page write,
24
+ * not marked as delete, calculate page size and the
25
+ * block size.
26
+ */
27
+ spii i = *pos;
28
+ while (u->count < n && sp_ihas(&i)) {
29
+ spv *v = sp_ival(&i);
30
+ if (v->flags & SPDEL) {
31
+ sp_inext(&i);
32
+ continue;
33
+ }
34
+ if (v->size > u->bsize)
35
+ u->bsize = v->size;
36
+ sp_inext(&i);
37
+ u->count++;
38
+ u->psize += sp_vvsize(v);
39
+ }
40
+ u->bsize += sizeof(spvh);
41
+ u->psize += sizeof(sppageh) + u->bsize * u->count;
42
+ }
43
+
44
+ static inline int sp_merge0(sp *s, spepoch *x, spi *index)
45
+ {
46
+ spv *max = NULL;
47
+ spv *min = NULL;
48
+ int rc;
49
+ spii i;
50
+ sp_iopen(&i, index);
51
+
52
+ while (sp_active(s))
53
+ {
54
+ /* get the new page properties and a data */
55
+ spupdate0 u;
56
+ sp_mergeget0(&i, s->e->page, &u);
57
+ if (spunlikely(u.count == 0))
58
+ break;
59
+
60
+ /* ensure enough space for the page in the file */
61
+ sp_lock(&x->lock);
62
+ rc = sp_mapensure(&x->db, u.psize, s->e->dbgrow);
63
+ if (spunlikely(rc == -1)) {
64
+ sp_unlock(&x->lock);
65
+ sp_e(s, SPEIO, "failed to remap db file", x->epoch);
66
+ goto err;
67
+ }
68
+ sp_unlock(&x->lock);
69
+
70
+ /* write the page.
71
+ *
72
+ * [header] [keys (block sized)] [values]
73
+ *
74
+ * Use partly precalculated crc for a version.
75
+ */
76
+ sppageh *h = (sppageh*)(x->db.map + x->db.used);
77
+ h->id = ++s->psn;
78
+ h->count = u.count;
79
+ h->bsize = u.bsize;
80
+ h->size = u.psize - sizeof(sppageh);
81
+ h->crc = sp_crc32c(0, &h->id, sizeof(sppageh) - sizeof(h->crc));
82
+
83
+ char *ph = x->db.map + x->db.used + sizeof(sppageh);
84
+ char *pv = ph + u.count * u.bsize;
85
+
86
+ uint32_t current = 0;
87
+ spv *last = NULL;
88
+ while (sp_active(s) && current < u.count)
89
+ {
90
+ spv *v = sp_ival(&i);
91
+ if (v->flags & SPDEL) {
92
+ sp_inext(&i);
93
+ continue;
94
+ }
95
+ if (spunlikely(min == NULL)) {
96
+ min = sp_vdup(s, v);
97
+ if (spunlikely(min == NULL)) {
98
+ sp_e(s, SPEOOM, "failed to allocate key");
99
+ goto err;
100
+ }
101
+ }
102
+ assert(v->size <= u.bsize);
103
+ spvh *vh = (spvh*)(ph);
104
+ vh->size = v->size;
105
+ vh->flags = v->flags;
106
+ vh->vsize = sp_vvsize(v);
107
+ vh->voffset = pv - (char*)h;
108
+ vh->crc = sp_crc32c(v->crc, &vh->size, sizeof(spvh) - sizeof(vh->crc));
109
+ memcpy(vh->key, v->key, v->size);
110
+ memcpy(pv, sp_vv(v), vh->vsize);
111
+
112
+ ph += u.bsize;
113
+ pv += vh->vsize;
114
+ last = v;
115
+ current++;
116
+ sp_inext(&i);
117
+ }
118
+
119
+ /* cancellation point check */
120
+ if (! sp_active(s))
121
+ goto err;
122
+
123
+ /* create in-memory page */
124
+ sppage *page = sp_pagenew(s, x);
125
+ if (spunlikely(page == NULL)) {
126
+ sp_e(s, SPEOOM, "failed to allocate page");
127
+ goto err;
128
+ }
129
+ max = sp_vdup(s, last);
130
+ if (spunlikely(max == NULL)) {
131
+ sp_e(s, SPEOOM, "failed to allocate key");
132
+ goto err;
133
+ }
134
+ assert(min != NULL);
135
+ page->id = s->psn;
136
+ page->offset = x->db.used;
137
+ page->size = u.psize;
138
+ page->min = min;
139
+ page->max = max;
140
+
141
+ /* insert page to the index */
142
+ sp_lock(&s->locks);
143
+ sppage *o = NULL;
144
+ rc = sp_catset(&s->s, page, &o);
145
+ if (spunlikely(rc == -1)) {
146
+ sp_unlock(&s->locks);
147
+ sp_pagefree(s, page);
148
+ sp_e(s, SPEOOM, "failed to allocate page index page");
149
+ goto err;
150
+ }
151
+ sp_unlock(&s->locks);
152
+
153
+ /* attach page to the epoch list */
154
+ sp_pageattach(page);
155
+
156
+ /* advance file buffer */
157
+ sp_mapuse(&x->db, u.psize);
158
+
159
+ min = NULL;
160
+ max = NULL;
161
+ }
162
+ return 0;
163
+ err:
164
+ if (min)
165
+ sp_free(&s->a, min);
166
+ if (max)
167
+ sp_free(&s->a, max);
168
+ return -1;
169
+ }
170
+
171
+ typedef struct {
172
+ uint32_t pi;
173
+ sppage *p;
174
+ spepoch *s; /* p->epoch */
175
+ uint32_t count;
176
+ uint32_t bsize;
177
+ } spupdate;
178
+
179
+ typedef struct {
180
+ /* a is an original page version
181
+ b is in-memory version */
182
+ int a_bsize, b_bsize;
183
+ int a_count, b_count;
184
+ int A, B;
185
+ spvh *a;
186
+ spv *b;
187
+ spref last;
188
+ spii i;
189
+ spepoch *x;
190
+ } spmerge;
191
+
192
+ typedef struct {
193
+ splist split;
194
+ int count;
195
+ } spsplit;
196
+
197
+ static inline int
198
+ sp_mergeget(sp *s, spii *from, spupdate *u)
199
+ {
200
+ spii i = *from;
201
+ if (spunlikely(! sp_ihas(&i)))
202
+ return 0;
203
+ memset(u, 0, sizeof(spupdate));
204
+ /* match the origin page and a associated
205
+ * range of keys. */
206
+ sppage *origin = NULL;
207
+ uint32_t origin_idx = 0;
208
+ uint32_t n = 0;
209
+ while (sp_ihas(&i)) {
210
+ spv *v = sp_ival(&i);
211
+ if (splikely(origin)) {
212
+ if (! sp_catown(&s->s, origin_idx, v))
213
+ break;
214
+ } else {
215
+ origin = sp_catroute(&s->s, v->key, v->size, &origin_idx);
216
+ assert(((spepoch*)origin->epoch)->type == SPDB);
217
+ }
218
+ if (v->size > u->bsize)
219
+ u->bsize = v->size;
220
+ sp_inext(&i);
221
+ n++;
222
+ }
223
+ assert(n > 0);
224
+ u->count = n;
225
+ u->bsize += sizeof(spvh);
226
+ u->pi = origin_idx;
227
+ u->p = origin;
228
+ u->s = origin->epoch;
229
+ return 1;
230
+ }
231
+
232
+ static inline void
233
+ sp_mergeinit(spepoch *x, spmerge *m, spupdate *u, spii *from)
234
+ {
235
+ sppageh *h = (sppageh*)(u->s->db.map + u->p->offset);
236
+ uint32_t bsize = u->bsize;
237
+ if (h->bsize > bsize)
238
+ bsize = h->bsize;
239
+ m->a_bsize = h->bsize;
240
+ m->b_bsize = bsize;
241
+ memset(&m->last, 0, sizeof(m->last));
242
+ m->i = *from;
243
+ m->A = 0;
244
+ m->B = 0;
245
+ m->a_count = h->count;
246
+ m->b_count = u->count;
247
+ m->a = (spvh*)((char*)h + sizeof(sppageh));
248
+ m->b = sp_ival(from);
249
+ m->x = x;
250
+ }
251
+
252
+ static inline int sp_mergenext(sp *s, spmerge *m)
253
+ {
254
+ if (m->A < m->a_count && m->B < m->b_count)
255
+ {
256
+ register int cmp =
257
+ s->e->cmp(m->a->key, m->a->size,
258
+ m->b->key,
259
+ m->b->size, s->e->cmparg);
260
+ switch (cmp) {
261
+ case 0:
262
+ /* use updated key B */
263
+ m->last.type = SPREFM;
264
+ m->last.v.v = m->b;
265
+ m->A++;
266
+ m->a = (spvh*)((char*)m->a + m->a_bsize);
267
+ m->B++;
268
+ sp_inext(&m->i);
269
+ m->b = sp_ival(&m->i);
270
+ return 1;
271
+ case -1:
272
+ /* use A */
273
+ m->last.type = SPREFD;
274
+ m->last.v.vh = m->a;
275
+ m->A++;
276
+ m->a = (spvh*)((char*)m->a + m->a_bsize);
277
+ return 1;
278
+ case 1:
279
+ /* use B */
280
+ m->last.type = SPREFM;
281
+ m->last.v.v = m->b;
282
+ m->B++;
283
+ sp_inext(&m->i);
284
+ m->b = sp_ival(&m->i);
285
+ return 1;
286
+ }
287
+ }
288
+ if (m->A < m->a_count) {
289
+ /* use A */
290
+ m->last.type = SPREFD;
291
+ m->last.v.vh = m->a;
292
+ m->A++;
293
+ m->a = (spvh*)((char*)m->a + m->a_bsize);
294
+ return 1;
295
+ }
296
+ if (m->B < m->b_count) {
297
+ /* use B */
298
+ m->last.type = SPREFM;
299
+ m->last.v.v = m->b;
300
+ m->B++;
301
+ sp_inext(&m->i);
302
+ m->b = sp_ival(&m->i);
303
+ return 1;
304
+ }
305
+ return 0;
306
+ }
307
+
308
+ static inline void
309
+ sp_splitinit(spsplit *l) {
310
+ sp_listinit(&l->split);
311
+ l->count = 0;
312
+ }
313
+
314
+ static inline void
315
+ sp_splitfree(sp *s, spsplit *l) {
316
+ splist *i, *n;
317
+ sp_listforeach_safe(&l->split, i, n) {
318
+ sppage *p = spcast(i, sppage, link);
319
+ sp_pagefree(s, p);
320
+ }
321
+ }
322
+
323
+ static inline int sp_split(sp *s, spupdate *u, spmerge *m, spsplit *l)
324
+ {
325
+ int rc;
326
+ int bsize = m->b_bsize;
327
+ uint32_t pagesize = sizeof(sppageh);
328
+ uint32_t count = 0;
329
+ /*
330
+ * merge in-memory keys with the origin page keys,
331
+ * skip any deletes and calculate result
332
+ * page size.
333
+ */
334
+ sp_refsetreset(&s->refs);
335
+ while (count < s->e->page && sp_mergenext(s, m)) {
336
+ if (sp_refisdel(&m->last))
337
+ continue;
338
+ sp_refsetadd(&s->refs, &m->last);
339
+ pagesize += bsize + sp_refvsize(&m->last);
340
+ count++;
341
+ }
342
+ if (spunlikely(count == 0 && l->count > 0))
343
+ return 0;
344
+
345
+ /*
346
+ * set the origin page id for a first spitted page
347
+ */
348
+ uint32_t psn = (l->count == 0) ? u->p->id : ++s->psn;
349
+
350
+ /* ensure enough space for the page in the file */
351
+ sp_lock(&m->x->lock);
352
+ rc = sp_mapensure(&m->x->db, pagesize, s->e->dbgrow);
353
+ if (spunlikely(rc == -1)) {
354
+ sp_unlock(&m->x->lock);
355
+ return sp_e(s, SPEIO, "failed to remap db file",
356
+ m->x->epoch);
357
+ }
358
+ sp_unlock(&m->x->lock);
359
+
360
+ /* in case if all origin page keys are deleted.
361
+ *
362
+ * write special page header without any data, indicating
363
+ * that page should be skipped during recovery
364
+ * and not being added to the index.
365
+ */
366
+ if (spunlikely(count == 0 && l->count == 0)) {
367
+ sppageh *h = (sppageh*)(m->x->db.map + m->x->db.used);
368
+ h->id = psn;
369
+ h->count = 0;
370
+ h->bsize = 0;
371
+ h->size = 0;
372
+ h->crc = sp_crc32c(0, &h->id, sizeof(sppageh) - sizeof(h->crc));
373
+ sp_mapuse(&m->x->db, pagesize);
374
+ return 0;
375
+ }
376
+
377
+ spref *r = s->refs.r;
378
+ spref *min = r;
379
+ spref *max = r + (count - 1);
380
+
381
+ /*
382
+ * write the page
383
+ */
384
+ sppageh *h = (sppageh*)(m->x->db.map + m->x->db.used);
385
+ h->id = psn;
386
+ h->count = count;
387
+ h->bsize = bsize;
388
+ h->size = pagesize - sizeof(sppageh);
389
+ h->crc = sp_crc32c(0, &h->id, sizeof(sppageh) - sizeof(h->crc));
390
+
391
+ spvh *ptr = (spvh*)(m->x->db.map + m->x->db.used + sizeof(sppageh));
392
+ char *ptrv = (char*)ptr + count * bsize;
393
+
394
+ uint32_t i = 0;
395
+ while (i < count)
396
+ {
397
+ uint32_t voffset = ptrv - (char*)h;
398
+ switch (r->type) {
399
+ case SPREFD:
400
+ memcpy(ptr, r->v.vh, sizeof(spvh) + r->v.vh->size);
401
+ memcpy(ptrv, u->s->db.map + u->p->offset + r->v.vh->voffset,
402
+ r->v.vh->vsize);
403
+ ptr->voffset = voffset;
404
+ uint32_t crc;
405
+ crc = sp_crc32c(0, ptr->key, ptr->size);
406
+ crc = sp_crc32c(crc, ptrv, r->v.vh->vsize);
407
+ crc = sp_crc32c(crc, &ptr->size, sizeof(spvh) - sizeof(ptr->crc));
408
+ ptr->crc = crc;
409
+ ptrv += r->v.vh->vsize;
410
+ break;
411
+ case SPREFM:
412
+ ptr->size = r->v.v->size;
413
+ ptr->flags = r->v.v->flags;
414
+ ptr->voffset = voffset;
415
+ ptr->vsize = sp_vvsize(r->v.v);
416
+ ptr->crc = sp_crc32c(r->v.v->crc, &ptr->size, sizeof(spvh) -
417
+ sizeof(ptr->crc));
418
+ memcpy(ptr->key, r->v.v->key, r->v.v->size);
419
+ memcpy(ptrv, sp_vv(r->v.v), ptr->vsize);
420
+ ptrv += ptr->vsize;
421
+ break;
422
+ }
423
+ assert((uint32_t)(ptrv - (char*)h) <= pagesize);
424
+ ptr = (spvh*)((char*)ptr + bsize);
425
+ r++;
426
+ i++;
427
+ }
428
+
429
+ /* create in-memory page */
430
+ sppage *p = sp_pagenew(s, m->x);
431
+ if (spunlikely(p == NULL))
432
+ return sp_e(s, SPEOOM, "failed to allocate page");
433
+ p->id = psn;
434
+ p->offset = m->x->db.used;
435
+ p->size = pagesize;
436
+ p->min = sp_vdupref(s, min, m->x->epoch);
437
+ if (spunlikely(p->min == NULL)) {
438
+ sp_free(&s->a, p);
439
+ return sp_e(s, SPEOOM, "failed to allocate key");
440
+ }
441
+ p->max = sp_vdupref(s, max, m->x->epoch);
442
+ if (spunlikely(p->max == NULL)) {
443
+ sp_free(&s->a, p->min);
444
+ sp_free(&s->a, p);
445
+ return sp_e(s, SPEOOM, "failed to allocate key");
446
+ }
447
+
448
+ /* add page to split list */
449
+ sp_listappend(&l->split, &p->link);
450
+ l->count++;
451
+
452
+ /* advance buffer */
453
+ sp_mapuse(&m->x->db, pagesize);
454
+ return 1;
455
+ }
456
+
457
+ static inline int sp_splitcommit(sp *s, spupdate *u, spmerge *m, spsplit *l)
458
+ {
459
+ sp_lock(&s->locks);
460
+ /* remove origin page, if there were no page
461
+ * updates after split */
462
+ if (spunlikely(l->count == 0)) {
463
+ sp_pagefree(s, u->p);
464
+ u->s->ngc++;
465
+ u->p = NULL;
466
+ sp_catdel(&s->s, u->pi);
467
+ sp_unlock(&s->locks);
468
+ return 0;
469
+ }
470
+ splist *i, *n;
471
+ sp_listforeach_safe(&l->split, i, n)
472
+ {
473
+ sppage *p = spcast(i, sppage, link);
474
+ /* update origin page first */
475
+ if (spunlikely(p->id == u->p->id)) {
476
+ sp_listunlink(&p->link);
477
+ /* relink origin page to new epoch */
478
+ sppage *origin = u->p;
479
+ assert(origin->epoch != m->x);
480
+ sp_listunlink(&origin->link);
481
+ u->s->ngc++; /* origin db epoch */
482
+ m->x->n++; /* current db epoch */
483
+ sp_listappend(&m->x->pages, &origin->link);
484
+ /* update origin page */
485
+ origin->offset = p->offset;
486
+ assert(p->epoch == m->x);
487
+ origin->epoch = m->x;
488
+ origin->size = p->size;
489
+ sp_free(&s->a, origin->min);
490
+ sp_free(&s->a, origin->max);
491
+ origin->min = p->min;
492
+ origin->max = p->max;
493
+ sp_free(&s->a, p);
494
+ continue;
495
+ }
496
+ /* insert split page */
497
+ sppage *o = NULL;
498
+ int rc = sp_catset(&s->s, p, &o);
499
+ if (spunlikely(rc == -1)) {
500
+ sp_unlock(&s->locks);
501
+ return sp_e(s, SPEOOM, "failed to allocate page index page");
502
+ }
503
+ assert(o == NULL);
504
+ sp_pageattach(p);
505
+ m->x->n++;
506
+ }
507
+ sp_unlock(&s->locks);
508
+ return 0;
509
+ }
510
+
511
+ static inline int sp_mergeN(sp *s, spepoch *x, spi *index)
512
+ {
513
+ int rc;
514
+ spii i;
515
+ sp_iopen(&i, index);
516
+ spupdate u;
517
+ while (sp_mergeget(s, &i, &u))
518
+ {
519
+ spmerge m;
520
+ sp_mergeinit(x, &m, &u, &i);
521
+ spsplit l;
522
+ sp_splitinit(&l);
523
+ while (sp_active(s)) {
524
+ rc = sp_split(s, &u, &m, &l);
525
+ if (spunlikely(rc == 0))
526
+ break;
527
+ else
528
+ if (spunlikely(rc == -1)) {
529
+ sp_splitfree(s, &l);
530
+ return -1;
531
+ }
532
+ }
533
+ if (spunlikely(! sp_active(s)))
534
+ return 0;
535
+ rc = sp_splitcommit(s, &u, &m, &l);
536
+ if (spunlikely(rc == -1)) {
537
+ sp_splitfree(s, &l);
538
+ return -1;
539
+ }
540
+ i = m.i;
541
+ }
542
+ return 0;
543
+ }
544
+
545
+ int sp_merge(sp *s)
546
+ {
547
+ sp_lock(&s->lockr);
548
+ sp_lock(&s->locki);
549
+
550
+ spepoch *x = sp_replive(&s->rep);
551
+ /* rotate current live epoch */
552
+ sp_repset(&s->rep, x, SPXFER);
553
+ int rc = sp_rotate(s);
554
+ if (spunlikely(rc == -1)) {
555
+ sp_lock(&s->lockr);
556
+ sp_lock(&s->locki);
557
+ return -1;
558
+ }
559
+ /* swap index */
560
+ spi *index = sp_iswap(s);
561
+
562
+ sp_unlock(&s->lockr);
563
+ sp_unlock(&s->locki);
564
+
565
+ /* complete old live epoch log */
566
+ rc = sp_logeof(&x->log);
567
+ if (spunlikely(rc == -1))
568
+ return sp_e(s, SPEIO, "failed to write eof marker", x->epoch);
569
+ rc = sp_logcomplete(&x->log);
570
+ if (spunlikely(rc == -1))
571
+ return sp_e(s, SPEIO, "failed to complete log file", x->epoch);
572
+
573
+ /* create db file */
574
+ rc = sp_mapepochnew(&x->db, s->e->dbnewsize, s->e->dir, x->epoch, "db");
575
+ if (spunlikely(rc == -1))
576
+ return sp_e(s, SPEIO, "failed to create db file", x->epoch);
577
+
578
+ /* merge index */
579
+ if (splikely(s->s.count > 0))
580
+ rc = sp_mergeN(s, x, index);
581
+ else
582
+ rc = sp_merge0(s, x, index);
583
+
584
+ /* check cancellation point */
585
+ if (! sp_active(s)) {
586
+ sp_mapunlink(&x->db);
587
+ sp_mapclose(&x->db);
588
+ return rc;
589
+ }
590
+ if (spunlikely(rc == -1))
591
+ return -1;
592
+
593
+ /* gc */
594
+ if (s->e->gc) {
595
+ rc = sp_gc(s, x);
596
+ if (spunlikely(rc == -1))
597
+ return -1;
598
+ }
599
+
600
+ /* sync/truncate db file and remap read-only only if
601
+ * database file is not empty. */
602
+ if (splikely(x->db.used > 0)) {
603
+ sp_lock(&x->lock);
604
+ rc = sp_mapcomplete(&x->db);
605
+ if (spunlikely(rc == -1)) {
606
+ sp_unlock(&x->lock);
607
+ return sp_e(s, SPEIO, "failed to complete db file", x->epoch);
608
+ }
609
+ sp_unlock(&x->lock);
610
+ /* set epoch as db */
611
+ sp_lock(&s->lockr);
612
+ sp_repset(&s->rep, x, SPDB);
613
+ sp_unlock(&s->lockr);
614
+ /* remove log file */
615
+ rc = sp_logunlink(&x->log);
616
+ if (spunlikely(rc == -1))
617
+ return sp_e(s, SPEIO, "failed to unlink log file", x->epoch);
618
+ rc = sp_logclose(&x->log);
619
+ if (spunlikely(rc == -1))
620
+ return sp_e(s, SPEIO, "failed to close log file", x->epoch);
621
+ } else {
622
+ /* there are possible situation when all keys has
623
+ * been deleted. */
624
+ rc = sp_mapunlink(&x->db);
625
+ if (spunlikely(rc == -1))
626
+ return sp_e(s, SPEIO, "failed to unlink db file", x->epoch);
627
+ rc = sp_mapclose(&x->db);
628
+ if (spunlikely(rc == -1))
629
+ return sp_e(s, SPEIO, "failed to close db file", x->epoch);
630
+ }
631
+
632
+ /* remove all xfer epochs that took part in the merge
633
+ * including current, if it's database file
634
+ * is empty. */
635
+ while (sp_active(s)) {
636
+ sp_lock(&s->lockr);
637
+ spepoch *e = sp_repxfer(&s->rep);
638
+ sp_unlock(&s->lockr);
639
+ if (e == NULL)
640
+ break;
641
+ rc = sp_logunlink(&e->log);
642
+ if (spunlikely(rc == -1))
643
+ return sp_e(s, SPEIO, "failed to unlink log file", e->epoch);
644
+ rc = sp_logclose(&e->log);
645
+ if (spunlikely(rc == -1))
646
+ return sp_e(s, SPEIO, "failed to close log file", e->epoch);
647
+ sp_lock(&s->lockr);
648
+ sp_repdetach(&s->rep, e);
649
+ sp_free(&s->a, e);
650
+ sp_unlock(&s->lockr);
651
+ }
652
+
653
+ /* truncate the index (skip index on a read) */
654
+ sp_iskipset(s, 1);
655
+ rc = sp_itruncate(index);
656
+ if (spunlikely(rc == -1)) {
657
+ sp_iskipset(s, 0);
658
+ return sp_e(s, SPE, "failed create index");
659
+ }
660
+ sp_iskipset(s, 0);
661
+ return 0;
662
+ }