libsmatrix 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1 @@
1
+ Makefile.in
@@ -0,0 +1,21 @@
1
+ # This file is part of the "libsmatrix" project
2
+ # (c) 2011-2013 Paul Asmuth <paul@paulasmuth.com>
3
+ #
4
+ # Licensed under the MIT License (the "License"); you may not use this
5
+ # file except in compliance with the License. You may obtain a copy of
6
+ # the License at: http://opensource.org/licenses/MIT
7
+
8
+ include ../Makefile.in
9
+ include Makefile.in
10
+
11
+ TARGET = smatrix_ruby.$(LIBEXT)
12
+
13
+ all: $(TARGET)
14
+
15
+ ../smatrix.o:
16
+ cd .. && make
17
+
18
+ $(TARGET): ../smatrix.o ../smatrix_ruby.c ../smatrix_ruby.h
19
+ $(CC) -L$(RUBY_LIB) -I$(RUBY_INCLUDE_ARCH) -I$(RUBY_INCLUDE) $(LIBFLAGS) -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress $(LDFLAGS) ../smatrix_ruby.c ../smatrix.o -o $(TARGET)
20
+
21
+ install: $(TARGET)
@@ -0,0 +1,18 @@
1
+ require "mkmf"
2
+
3
+ mkmf_includes = <<EOF
4
+ RUBY_INCLUDE = #{RbConfig::CONFIG["rubyhdrdir"]}
5
+ RUBY_INCLUDE_ARCH = #{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}
6
+ RUBY_LIB = #{RbConfig::CONFIG["libdir"]}
7
+ RUBY_SO_NAME = #{RbConfig::CONFIG['RUBY_SO_NAME']}
8
+ LIBRUBYARG_SHARED = #{$LIBRUBYARG_SHARED}
9
+ LIBRUBYARG_STATIC = #{$LIBRUBYARG_STATIC}
10
+ LIBRUBYARG = #{$LIBRUBYARG_SHARED.length > 0 ? $LIBRUBYARG_SHARED : $LIBRUBYARG_STATIC}
11
+ EOF
12
+
13
+ File.open(::File.expand_path("../Makefile.in", __FILE__), "w+") do |f|
14
+ f.write(mkmf_includes)
15
+ end
16
+
17
+ $makefile_created = true
18
+
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "libsmatrix"
5
+ s.version = "0.0.1"
6
+ s.date = Date.today.to_s
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Paul Asmuth", "Amir Friedman"]
9
+ s.email = ["paul@paulasmuth.com", "amirf@null.co.il"]
10
+ s.homepage = "http://github.com/paulasmuth/libsmatrix"
11
+ s.summary = %q{A thread-safe two dimensional sparse matrix data structure with C, Java and Ruby bindings.}
12
+ s.description = %q{A thread-safe two dimensional sparse matrix data structure with C, Java and Ruby bindings. It was created to make loading and accessing medium sized (10GB+) matrices in boxed languages like Java/Scala or Ruby easier.}
13
+ s.licenses = ["MIT"]
14
+ s.extensions = ['src/ruby/extconf.rb']
15
+ s.files = `git ls-files`.split("\n") - [".gitignore", ".rspec", ".travis.yml"]
16
+ s.test_files = `git ls-files -- spec/*`.split("\n")
17
+ s.require_paths = ["src/ruby/"]
18
+
19
+ s.add_development_dependency "rspec", "~> 2.8.0"
20
+ end
@@ -0,0 +1,6 @@
1
+ bundle_file = ::File.expand_path("../smatrix_ruby.bundle", __FILE__)
2
+ require bundle_file if ::File.exist? bundle_file
3
+
4
+ bundle_file = ::File.expand_path("../smatrix_ruby.so", __FILE__)
5
+ require bundle_file if ::File.exist? bundle_file
6
+
@@ -0,0 +1,960 @@
1
+ // This file is part of the "libsmatrix" project
2
+ // (c) 2011-2013 Paul Asmuth <paul@paulasmuth.com>
3
+ //
4
+ // Licensed under the MIT License (the "License"); you may not use this
5
+ // file except in compliance with the License. You may obtain a copy of
6
+ // the License at: http://opensource.org/licenses/MIT
7
+
8
+ #include <stdio.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+ #include <sys/types.h>
12
+ #include <sys/stat.h>
13
+ #include <fcntl.h>
14
+ #include <unistd.h>
15
+ #include <assert.h>
16
+ #include <inttypes.h>
17
+
18
+ #include "smatrix.h"
19
+ #include "smatrix_private.h"
20
+
21
+ // TODO
22
+ // + make ioqueue fifo
23
+ // + smatrix_gc()
24
+ // + ftruncate in larger blocks
25
+ // + aquire lock on file to prevent concurrent access
26
+ // + check correct endianess on file open
27
+ // + proper error handling / return codes for smatrix_open
28
+ // + file free list
29
+
30
+ /*
31
+
32
+ libsmatrix file format (augmented BNF):
33
+ ---------------------------------------
34
+
35
+ FILE ::= FILE_HEADER ; header size is 512 bytes
36
+ FILE_BODY
37
+
38
+ FILE_HEADER ::= <8 Bytes 0x17> ; uint64_t, magic number
39
+ CMAP_HEAD_FPOS ; uint64_t
40
+ <496 Bytes 0x0> ; padding to 512 bytes
41
+
42
+ FILE_BODY ::= *( CMAP_BLOCK | RMAP_BLOCK )
43
+
44
+ CMAP_BLOCK ::= CMAP_BLOCK_SIZE ; uint64_t
45
+ CMAP_BLOCK_NEXT ; uint64_t, file offset
46
+ *( CMAP_ENTRY ) ; 12 bytes each
47
+
48
+ CMAP_ENTRY ::= CMAP_ENTRY_KEY ; uint32_t
49
+ CMAP_ENTRY_VALUE ; uint64_t
50
+
51
+ CMAP_ENTRY_KEY ::= <uint32_t> ; key / first dimension
52
+ CMAP_ENTRY_VALUE ::= <uint64_t> ; file offset of the RMAP_BLOCK
53
+ CMAP_HEAD_FPOS ::= <uint64_t> ; file offset of the first CMAP_BLOCK
54
+ CMAP_BLOCK_SIZE ::= <uint64_t> ; number of entries in this block
55
+ CMAP_BLOCK_NEXT ::= <uint64_t> ; file offset of the next block or 0
56
+
57
+ RMAP_BLOCK ::= <8 Bytes 0x23> ; uint64_t, magic number
58
+ RMAP_BLOCK_SIZE ; uint64_t
59
+ *( RMAP_SLOT ) ; 8 bytes each
60
+
61
+ RMAP_SLOT ::= RMAP_ENTRY ; used hashmap slot
62
+ | RMAP_SLOT_UNUSED ; unused hashmap slot
63
+
64
+ RMAP_ENTRY ::= RMAP_ENTRY_KEY ; uint32_t
65
+ RMAP_ENTRY_VALUE ; uint32_t
66
+
67
+ RMAP_SLOT_UNUSED ::= <8 Bytes 0x0> ; empty slot
68
+ RMAP_ENTRY_KEY ::= <uint32_t> ; key / second dimension
69
+ RMAP_ENTRY_VALUE ::= <uint32_t> ; value
70
+ RMAP_BLOCK_SIZE ::= <uint64_t> ; number of slots in this block
71
+
72
+ */
73
+
74
+ smatrix_t* smatrix_open(const char* fname) {
75
+ smatrix_t* self = calloc(1, sizeof(smatrix_t));
76
+
77
+ if (self == NULL)
78
+ return NULL;
79
+
80
+ self->ioqueue = NULL;
81
+ self->lock.count = 0;
82
+ self->lock.mutex = 0;
83
+ self->shutdown = 0;
84
+
85
+ if (!fname) {
86
+ smatrix_cmap_init(self);
87
+ return self;
88
+ }
89
+
90
+ self->fd = open(fname, O_RDWR | O_CREAT, 00600);
91
+
92
+ if (self->fd == -1) {
93
+ perror("cannot open file");
94
+ free(self);
95
+ return NULL;
96
+ }
97
+
98
+ self->fpos = lseek(self->fd, 0, SEEK_END);
99
+
100
+ if (self->fpos == 0) {
101
+ smatrix_fcreate(self);
102
+ } else {
103
+ smatrix_fload(self);
104
+ }
105
+
106
+ if (pthread_create(&self->iothread, NULL, &smatrix_io, self)) {
107
+ smatrix_error("can't start the IO thread");
108
+ }
109
+
110
+ return self;
111
+ }
112
+
113
+ void smatrix_close(smatrix_t* self) {
114
+ void* retval;
115
+ uint64_t pos;
116
+
117
+ self->shutdown = 1;
118
+ pthread_join(self->iothread, &retval);
119
+
120
+ for (pos = 0; pos < self->cmap.size; pos++) {
121
+ if (self->cmap.data[pos].flags & SMATRIX_CMAP_SLOT_USED) {
122
+ smatrix_rmap_free(self, self->cmap.data[pos].rmap);
123
+ }
124
+ }
125
+
126
+ smatrix_cmap_free(self, &self->cmap);
127
+
128
+ if (self->fd) {
129
+ close(self->fd);
130
+ }
131
+
132
+ free(self);
133
+ }
134
+
135
+ uint64_t smatrix_falloc(smatrix_t* self, uint64_t bytes) {
136
+ smatrix_lock_getmutex(&self->lock);
137
+
138
+ uint64_t old = self->fpos;
139
+ uint64_t new = old + bytes;
140
+
141
+ if (ftruncate(self->fd, new) == -1) {
142
+ smatrix_error("truncate() failed");
143
+ }
144
+
145
+ self->fpos = new;
146
+
147
+ smatrix_lock_release(&self->lock);
148
+ return old;
149
+ }
150
+
151
+ inline void* smatrix_malloc(smatrix_t* self, uint64_t bytes) {
152
+ __sync_add_and_fetch(&self->mem, bytes);
153
+
154
+ void* ptr = malloc(bytes);
155
+
156
+ if (ptr == NULL) {
157
+ smatrix_error("malloc() failed");
158
+ abort();
159
+ }
160
+
161
+ return ptr;
162
+ }
163
+
164
+ inline void smatrix_mfree(smatrix_t* self, uint64_t bytes) {
165
+ __sync_sub_and_fetch(&self->mem, bytes);
166
+ }
167
+
168
+ void smatrix_ffree(smatrix_t* self, uint64_t fpos, uint64_t bytes) {
169
+ (void) self;
170
+ (void) fpos;
171
+ (void) bytes;
172
+ }
173
+
174
+ uint32_t smatrix_get(smatrix_t* self, uint32_t x, uint32_t y) {
175
+ smatrix_ref_t ref;
176
+ uint32_t retval = 0;
177
+
178
+ smatrix_lookup(self, &ref, x, y, 0);
179
+
180
+ if (ref.slot)
181
+ retval = ref.slot->value;
182
+
183
+ smatrix_decref(self, &ref);
184
+ return retval;
185
+ }
186
+
187
+ // returns a whole row as an array of uint32_t's, odd slots contain indexes, even slots contain
188
+ // values. example: [index, value, index, value...]
189
+ uint32_t smatrix_getrow(smatrix_t* self, uint32_t x, uint32_t* ret, size_t ret_len) {
190
+ smatrix_ref_t ref;
191
+ uint32_t pos, num = 0;
192
+
193
+ smatrix_lookup(self, &ref, x, 0, 0);
194
+
195
+ if (ref.rmap) {
196
+ for (pos = 0; pos < ref.rmap->size; pos++) {
197
+ if (!ref.rmap->data[pos].key && !ref.rmap->data[pos].value)
198
+ continue;
199
+
200
+ ret[num * 2] = ref.rmap->data[pos].key;
201
+ ret[num * 2 + 1] = ref.rmap->data[pos].value;
202
+
203
+ if ((++num * 2 * sizeof(uint32_t)) >= ret_len)
204
+ break;
205
+ }
206
+ }
207
+
208
+ smatrix_decref(self, &ref);
209
+ return num;
210
+ }
211
+
212
+ uint32_t smatrix_rowlen(smatrix_t* self, uint32_t x) {
213
+ smatrix_ref_t ref;
214
+ uint32_t len = 0;
215
+
216
+ smatrix_lookup(self, &ref, x, 0, 0);
217
+
218
+ if (ref.rmap)
219
+ len = ref.rmap->used;
220
+
221
+ smatrix_decref(self, &ref);
222
+ return len;
223
+ }
224
+
225
+ uint32_t smatrix_set(smatrix_t* self, uint32_t x, uint32_t y, uint32_t value) {
226
+ smatrix_ref_t ref;
227
+ uint32_t retval;
228
+
229
+ smatrix_lookup(self, &ref, x, y, 1);
230
+ retval = (ref.slot->value = value);
231
+ smatrix_decref(self, &ref);
232
+
233
+ return retval;
234
+ }
235
+
236
+ uint32_t smatrix_incr(smatrix_t* self, uint32_t x, uint32_t y, uint32_t value) {
237
+ smatrix_ref_t ref;
238
+ uint32_t retval;
239
+
240
+ smatrix_lookup(self, &ref, x, y, 1);
241
+ retval = (ref.slot->value += value);
242
+ smatrix_decref(self, &ref);
243
+
244
+ return retval;
245
+ }
246
+
247
+ uint32_t smatrix_decr(smatrix_t* self, uint32_t x, uint32_t y, uint32_t value) {
248
+ smatrix_ref_t ref;
249
+ uint32_t retval;
250
+
251
+ smatrix_lookup(self, &ref, x, y, 1);
252
+ retval = (ref.slot->value -= value);
253
+ smatrix_decref(self, &ref);
254
+
255
+ return retval;
256
+ }
257
+
258
+ void smatrix_lookup(smatrix_t* self, smatrix_ref_t* ref, uint32_t x, uint32_t y, int write) {
259
+ int mutex = 0;
260
+ smatrix_rmap_t* rmap;
261
+ smatrix_rmap_slot_t* slot;
262
+
263
+ ref->rmap = NULL;
264
+ ref->slot = NULL;
265
+ ref->write = write;
266
+
267
+ rmap = smatrix_cmap_lookup(self, &self->cmap, x, write);
268
+
269
+ if (rmap == NULL) {
270
+ return;
271
+ }
272
+
273
+ if (write) {
274
+ smatrix_lock_decref(&rmap->lock);
275
+ smatrix_lock_getmutex(&rmap->lock);
276
+ mutex = 1;
277
+ }
278
+
279
+ if (rmap->size == 0) {
280
+ if (!mutex) {
281
+ smatrix_lock_decref(&rmap->lock);
282
+ smatrix_lock_getmutex(&rmap->lock);
283
+ mutex = 1;
284
+ }
285
+
286
+ if (rmap->size == 0) {
287
+ smatrix_rmap_load(self, rmap);
288
+ }
289
+ }
290
+
291
+ ref->rmap = rmap;
292
+
293
+ if (mutex && !write) {
294
+ smatrix_lock_dropmutex(&rmap->lock);
295
+ }
296
+
297
+ slot = smatrix_rmap_probe(rmap, y);
298
+
299
+ if (slot != NULL && slot->key == y) {
300
+ ref->slot = slot;
301
+ } else if (write) {
302
+ ref->slot = smatrix_rmap_insert(self, rmap, y);
303
+ }
304
+ }
305
+
306
+ void smatrix_decref(smatrix_t* self, smatrix_ref_t* ref) {
307
+ if (!ref->rmap) {
308
+ return;
309
+ }
310
+
311
+ if (ref->write) {
312
+ if (self->fd) {
313
+ // FIXPAUL: this will sync the whole rmap. if only one slot changed this is a lot of overhead...
314
+ smatrix_rmap_sync_defer(self, ref->rmap);
315
+ }
316
+
317
+ smatrix_lock_release(&ref->rmap->lock);
318
+ } else {
319
+ smatrix_lock_decref(&ref->rmap->lock);
320
+ }
321
+ }
322
+
323
+ void smatrix_rmap_init(smatrix_t* self, smatrix_rmap_t* rmap, uint32_t size) {
324
+ if (size > 0) {
325
+ size_t bytes = sizeof(smatrix_rmap_slot_t) * size;
326
+
327
+ rmap->data = smatrix_malloc(self, bytes);
328
+ memset(rmap->data, 0, bytes);
329
+ } else {
330
+ rmap->data = NULL;
331
+ }
332
+
333
+ rmap->size = size;
334
+ rmap->used = 0;
335
+ rmap->fpos = 0;
336
+ rmap->flags = 0;
337
+ rmap->lock.count = 0;
338
+ rmap->lock.mutex = 0;
339
+ }
340
+
341
+
342
+ // you need to hold a write lock on rmap to call this function safely
343
+ smatrix_rmap_slot_t* smatrix_rmap_insert(smatrix_t* self, smatrix_rmap_t* rmap, uint32_t key) {
344
+ smatrix_rmap_slot_t* slot;
345
+
346
+ if (rmap->used > rmap->size / 2) {
347
+ smatrix_rmap_resize(self, rmap);
348
+ }
349
+
350
+ slot = smatrix_rmap_probe(rmap, key);
351
+ assert(slot != NULL);
352
+
353
+ if (!slot->key || slot->key != key) {
354
+ rmap->used++;
355
+ slot->key = key;
356
+ slot->value = 0;
357
+ }
358
+
359
+ return slot;
360
+ }
361
+
362
+ // you need to hold a read or write lock on rmap to call this function safely
363
+ smatrix_rmap_slot_t* smatrix_rmap_probe(smatrix_rmap_t* rmap, uint32_t key) {
364
+ uint64_t n, pos;
365
+
366
+ pos = key % rmap->size;
367
+
368
+ // linear probing
369
+ for (n = 0; n < rmap->size; n++) {
370
+ if (rmap->data[pos].key == key)
371
+ break;
372
+
373
+ if (!rmap->data[pos].key && !rmap->data[pos].value)
374
+ break;
375
+
376
+ pos = (pos + 1) % rmap->size;
377
+ }
378
+
379
+ return &rmap->data[pos];
380
+ }
381
+
382
+ // you need to hold a write lock on rmap in order to call this function safely
383
+ void smatrix_rmap_resize(smatrix_t* self, smatrix_rmap_t* rmap) {
384
+ uint64_t pos, bytes, old_size, new_size;
385
+ smatrix_rmap_slot_t* slot;
386
+ smatrix_rmap_t new;
387
+
388
+ old_size = rmap->size;
389
+ new_size = rmap->size * 2;
390
+ bytes = sizeof(smatrix_rmap_slot_t) * new_size;
391
+
392
+ new.size = new_size;
393
+ new.used = 0;
394
+ new.data = smatrix_malloc(self, bytes);
395
+ memset(new.data, 0, bytes);
396
+
397
+ for (pos = 0; pos < rmap->size; pos++) {
398
+ if (!rmap->data[pos].key && !rmap->data[pos].value)
399
+ continue;
400
+
401
+ slot = smatrix_rmap_insert(self, &new, rmap->data[pos].key);
402
+ slot->value = rmap->data[pos].value;
403
+ }
404
+
405
+ smatrix_mfree(self, sizeof(smatrix_rmap_slot_t) * old_size);
406
+ free(rmap->data);
407
+
408
+ rmap->data = new.data;
409
+ rmap->size = new.size;
410
+ rmap->used = new.used;
411
+
412
+ if (self->fd) {
413
+ rmap->flags |= SMATRIX_RMAP_FLAG_RESIZED;
414
+ smatrix_rmap_sync_defer(self, rmap);
415
+ }
416
+ }
417
+
418
+ inline void smatrix_rmap_sync_defer(smatrix_t* self, smatrix_rmap_t* rmap) {
419
+ if ((rmap->flags & SMATRIX_RMAP_FLAG_DIRTY) > 0) {
420
+ return;
421
+ }
422
+
423
+ rmap->flags |= SMATRIX_RMAP_FLAG_DIRTY;
424
+ smatrix_ioqueue_add(self, rmap);
425
+ }
426
+
427
+ void smatrix_rmap_sync(smatrix_t* self, smatrix_rmap_t* rmap) {
428
+ uint64_t bytes;
429
+
430
+ if ((rmap->flags & SMATRIX_RMAP_FLAG_RESIZED) > 0) {
431
+ // FIXPAUL can't ffree without knowing the old size.. just dividing by 2 seems too hacky
432
+ //bytes = SMATRIX_RMAP_SLOT_SIZE * rmap->size + SMATRIX_RMAP_HEAD_SIZE;
433
+ //smatrix_ffree(self, rmap->fpos, bytes);
434
+
435
+ rmap->fpos = 0;
436
+ }
437
+
438
+ if (rmap->fpos == 0) {
439
+ bytes = SMATRIX_RMAP_SLOT_SIZE * rmap->size + SMATRIX_RMAP_HEAD_SIZE;
440
+ rmap->fpos = smatrix_falloc(self, bytes);
441
+
442
+ smatrix_rmap_write_batch(self, rmap, 1);
443
+ smatrix_cmap_write(self, rmap);
444
+ } else {
445
+ // FIXPAUL write only the actualy dirty slots!
446
+ smatrix_rmap_write_batch(self, rmap, 1);
447
+ }
448
+
449
+ rmap->flags &= ~SMATRIX_RMAP_FLAG_DIRTY;
450
+ rmap->flags &= ~SMATRIX_RMAP_FLAG_RESIZED;
451
+ }
452
+
453
+ // the caller of this must hold a read lock on rmap
454
+ void smatrix_rmap_write_batch(smatrix_t* self, smatrix_rmap_t* rmap, int full) {
455
+ uint64_t pos = 0, bytes, buf_pos, rmap_size = rmap->size;
456
+ char *buf;
457
+
458
+ if (full) {
459
+ bytes = rmap->size * SMATRIX_RMAP_SLOT_SIZE;
460
+ bytes += SMATRIX_RMAP_HEAD_SIZE;
461
+ } else {
462
+ bytes = SMATRIX_RMAP_HEAD_SIZE;
463
+ }
464
+
465
+ buf = smatrix_malloc(self, bytes);
466
+
467
+ memset(buf, 0, bytes);
468
+ memset(buf, 0x23, 8);
469
+ memcpy(buf + 8, &rmap_size, 8);
470
+
471
+ if (full) {
472
+ buf_pos = SMATRIX_RMAP_HEAD_SIZE;
473
+
474
+ for (pos = 0; pos < rmap->size; pos++) {
475
+ memcpy(buf + buf_pos, &rmap->data[pos].key, 4);
476
+ memcpy(buf + buf_pos + 4, &rmap->data[pos].value, 4);
477
+ buf_pos += SMATRIX_RMAP_SLOT_SIZE;
478
+ }
479
+ }
480
+
481
+ smatrix_write(self, rmap->fpos, buf, bytes);
482
+ }
483
+
484
+ void smatrix_rmap_write_slot(smatrix_t* self, smatrix_rmap_t* rmap, smatrix_rmap_slot_t* slot) {
485
+ uint64_t rmap_pos, fpos;
486
+ char* buf = smatrix_malloc(self, SMATRIX_RMAP_SLOT_SIZE);
487
+
488
+ rmap_pos = slot - rmap->data;
489
+ fpos = rmap_pos * SMATRIX_RMAP_SLOT_SIZE;
490
+ fpos += rmap->fpos + SMATRIX_RMAP_HEAD_SIZE;
491
+
492
+ memcpy(buf, &slot->key, 4);
493
+ memcpy(buf + 4, &slot->value, 4);
494
+
495
+ smatrix_write(self, fpos, buf, SMATRIX_RMAP_SLOT_SIZE);
496
+ }
497
+
498
+ // caller must hold writelock on rmap
499
+ void smatrix_rmap_load(smatrix_t* self, smatrix_rmap_t* rmap) {
500
+ uint64_t pos, read_bytes, mem_bytes, disk_bytes, rmap_size;
501
+ unsigned char meta_buf[SMATRIX_RMAP_HEAD_SIZE] = {0}, *buf;
502
+
503
+ if (rmap->flags & SMATRIX_RMAP_FLAG_LOADED)
504
+ return;
505
+
506
+ if (!rmap->size) {
507
+ if (pread(self->fd, &meta_buf, SMATRIX_RMAP_HEAD_SIZE, rmap->fpos) != SMATRIX_RMAP_HEAD_SIZE) {
508
+ smatrix_error("pread() failed (rmap_load). corrupt file?");
509
+ }
510
+
511
+ if (memcmp(&meta_buf, &SMATRIX_RMAP_MAGIC, SMATRIX_RMAP_MAGIC_SIZE)) {
512
+ smatrix_error("file is corrupt (rmap_load)");
513
+ }
514
+
515
+ rmap_size = *((uint64_t *) &meta_buf[8]);
516
+ rmap->size = rmap_size;
517
+ assert(rmap->size > 0);
518
+ }
519
+
520
+ mem_bytes = rmap->size * sizeof(smatrix_rmap_slot_t);
521
+ disk_bytes = rmap->size * SMATRIX_RMAP_SLOT_SIZE;
522
+ rmap->used = 0;
523
+ rmap->data = smatrix_malloc(self, mem_bytes);
524
+ buf = smatrix_malloc(self, disk_bytes);
525
+
526
+ memset(rmap->data, 0, mem_bytes);
527
+ read_bytes = pread(self->fd, buf, disk_bytes, rmap->fpos + SMATRIX_RMAP_HEAD_SIZE);
528
+
529
+ if (read_bytes != disk_bytes) {
530
+ smatrix_error("read() failed (rmap_load)");
531
+ }
532
+
533
+ for (pos = 0; pos < rmap->size; pos++) {
534
+ memcpy(&rmap->data[pos].value, buf + pos * SMATRIX_RMAP_SLOT_SIZE + 4, 4);
535
+
536
+ if (rmap->data[pos].value) {
537
+ memcpy(&rmap->data[pos].key, buf + pos * SMATRIX_RMAP_SLOT_SIZE, 4);
538
+ rmap->used++;
539
+ }
540
+ }
541
+
542
+ rmap->flags = SMATRIX_RMAP_FLAG_LOADED;
543
+ smatrix_mfree(self, disk_bytes);
544
+ free(buf);
545
+ }
546
+
547
+ // caller must hold a write lock on rmap
548
+ void smatrix_rmap_swap(smatrix_t* self, smatrix_rmap_t* rmap) {
549
+ rmap->flags &= ~SMATRIX_RMAP_FLAG_LOADED;
550
+ smatrix_mfree(self, sizeof(smatrix_rmap_slot_t) * rmap->size);
551
+ free(rmap->data);
552
+ }
553
+
554
+ void smatrix_rmap_free(smatrix_t* self, smatrix_rmap_t* rmap) {
555
+ if (rmap->data) {
556
+ smatrix_mfree(self, sizeof(smatrix_rmap_slot_t) * rmap->size);
557
+ free(rmap->data);
558
+ }
559
+
560
+ smatrix_mfree(self, sizeof(smatrix_rmap_t));
561
+ free(rmap);
562
+ }
563
+
564
+ void smatrix_fcreate(smatrix_t* self) {
565
+ char buf[SMATRIX_META_SIZE];
566
+ smatrix_falloc(self, SMATRIX_META_SIZE);
567
+
568
+ memset(&buf, 0, SMATRIX_META_SIZE);
569
+ memset(&buf, 0x17, 8);
570
+ pwrite(self->fd, &buf, SMATRIX_META_SIZE, 0);
571
+
572
+ smatrix_cmap_init(self);
573
+ smatrix_cmap_mkblock(self, &self->cmap);
574
+ }
575
+
576
+ void smatrix_fload(smatrix_t* self) {
577
+ char buf[SMATRIX_META_SIZE];
578
+ uint64_t read, cmap_head_fpos;
579
+
580
+ read = pread(self->fd, &buf, SMATRIX_META_SIZE, 0);
581
+
582
+ if (read != SMATRIX_META_SIZE) {
583
+ smatrix_error("invalid file header\n");
584
+ abort();
585
+ }
586
+
587
+ if (buf[0] != 0x17 || buf[1] != 0x17) {
588
+ smatrix_error("invalid file header\n");
589
+ abort();
590
+ }
591
+
592
+ memcpy(&cmap_head_fpos, &buf[8], 8);
593
+
594
+ smatrix_cmap_init(self);
595
+ smatrix_cmap_load(self, cmap_head_fpos);
596
+ }
597
+
598
+ void smatrix_cmap_init(smatrix_t* self) {
599
+ uint64_t bytes;
600
+
601
+ self->cmap.size = SMATRIX_CMAP_INITIAL_SIZE;
602
+ self->cmap.used = 0;
603
+ self->cmap.lock.count = 0;
604
+ self->cmap.lock.mutex = 0;
605
+ self->cmap.block_fpos = 0;
606
+ self->cmap.block_used = 0;
607
+ self->cmap.block_size = 0;
608
+
609
+ bytes = sizeof(smatrix_cmap_slot_t) * self->cmap.size;
610
+ self->cmap.data = smatrix_malloc(self, bytes);
611
+ memset(self->cmap.data, 0, bytes);
612
+ }
613
+
614
+ void smatrix_cmap_free(smatrix_t* self, smatrix_cmap_t* cmap) {
615
+ uint64_t bytes = sizeof(smatrix_cmap_slot_t) * cmap->size;
616
+ smatrix_mfree(self, bytes);
617
+ free(cmap->data);
618
+ }
619
+
620
+ // caller must hold no locks on cmap!
621
+ smatrix_rmap_t* smatrix_cmap_lookup(smatrix_t* self, smatrix_cmap_t* cmap, uint32_t key, int create) {
622
+ smatrix_cmap_slot_t* slot;
623
+ smatrix_rmap_t* rmap;
624
+
625
+ smatrix_lock_incref(&cmap->lock);
626
+ slot = smatrix_cmap_probe(cmap, key);
627
+
628
+ if (slot && slot->key == key && (slot->flags & SMATRIX_CMAP_SLOT_USED) != 0) {
629
+ rmap = slot->rmap;
630
+ smatrix_lock_incref(&rmap->lock);
631
+ smatrix_lock_decref(&cmap->lock);
632
+ return rmap;
633
+ }
634
+
635
+ smatrix_lock_decref(&cmap->lock);
636
+
637
+ if (!create) {
638
+ return NULL;
639
+ }
640
+
641
+ rmap = smatrix_malloc(self, sizeof(smatrix_rmap_t));
642
+ smatrix_rmap_init(self, rmap, SMATRIX_RMAP_INITIAL_SIZE);
643
+ rmap->key = key;
644
+
645
+ smatrix_lock_getmutex(&cmap->lock);
646
+ slot = smatrix_cmap_insert(self, cmap, key);
647
+
648
+ if (slot->rmap) {
649
+ smatrix_rmap_free(self, rmap);
650
+ rmap = slot->rmap;
651
+
652
+ smatrix_lock_incref(&rmap->lock);
653
+ smatrix_lock_release(&cmap->lock);
654
+ } else {
655
+ slot->rmap = rmap;
656
+
657
+ if (self->fd) {
658
+ rmap->meta_fpos = smatrix_cmap_falloc(self, &self->cmap);
659
+ }
660
+
661
+ smatrix_lock_incref(&rmap->lock);
662
+ smatrix_lock_release(&cmap->lock);
663
+
664
+ if (self->fd) {
665
+ smatrix_rmap_sync_defer(self, rmap);
666
+ }
667
+ }
668
+
669
+ return rmap;
670
+ }
671
+
672
+ // caller must hold a read lock on cmap!
673
+ smatrix_cmap_slot_t* smatrix_cmap_probe(smatrix_cmap_t* cmap, uint32_t key) {
674
+ unsigned pos = key;
675
+ smatrix_cmap_slot_t* slot;
676
+
677
+ slot = cmap->data + (key % cmap->size);
678
+
679
+ for (;;) {
680
+ if ((slot->flags & SMATRIX_CMAP_SLOT_USED) == 0) {
681
+ return slot;
682
+ }
683
+
684
+ if (slot->key == key) {
685
+ return slot;
686
+ }
687
+
688
+ pos++;
689
+ slot = cmap->data + (pos % cmap->size);
690
+ }
691
+
692
+ return slot;
693
+ }
694
+
695
+ smatrix_cmap_slot_t* smatrix_cmap_insert(smatrix_t* self, smatrix_cmap_t* cmap, uint32_t key) {
696
+ smatrix_cmap_slot_t* slot;
697
+
698
+ if (cmap->used * 4 >= cmap->size * 3) {
699
+ smatrix_cmap_resize(self, cmap);
700
+ }
701
+
702
+ slot = smatrix_cmap_probe(cmap, key);
703
+ assert(slot != NULL);
704
+
705
+ if ((slot->flags & SMATRIX_CMAP_SLOT_USED) == 0 || slot->key != key) {
706
+ cmap->used++;
707
+ slot->key = key;
708
+ slot->flags = SMATRIX_CMAP_SLOT_USED;
709
+ slot->rmap = NULL;
710
+ }
711
+
712
+ return slot;
713
+ }
714
+
715
+ void smatrix_cmap_resize(smatrix_t* self, smatrix_cmap_t* cmap) {
716
+ uint64_t new_bytes, pos;
717
+ smatrix_cmap_slot_t *slot;
718
+ smatrix_cmap_t new;
719
+
720
+ new.used = 0;
721
+ new.size = cmap->size * 2;
722
+ new_bytes = sizeof(smatrix_cmap_slot_t) * new.size;
723
+ new.data = smatrix_malloc(self, new_bytes);
724
+
725
+ smatrix_mfree(self, sizeof(smatrix_cmap_slot_t) * cmap->size);
726
+ memset(new.data, 0, new_bytes);
727
+
728
+ for (pos = 0; pos < cmap->size; pos++) {
729
+ if ((cmap->data[pos].flags & SMATRIX_CMAP_SLOT_USED) == 0)
730
+ continue;
731
+
732
+ slot = smatrix_cmap_insert(self, &new, cmap->data[pos].key);
733
+ slot->rmap = cmap->data[pos].rmap;
734
+ }
735
+
736
+ free(cmap->data);
737
+
738
+ cmap->data = new.data;
739
+ cmap->size = new.size;
740
+ cmap->used = new.used;
741
+ }
742
+
743
+ // caller must hold a write lock on cmap
744
+ uint64_t smatrix_cmap_falloc(smatrix_t* self, smatrix_cmap_t* cmap) {
745
+ uint64_t fpos;
746
+
747
+ if (cmap->block_used >= cmap->block_size) {
748
+ smatrix_cmap_mkblock(self, cmap);
749
+ }
750
+
751
+ fpos = cmap->block_fpos + SMATRIX_CMAP_HEAD_SIZE;
752
+ fpos += cmap->block_used * SMATRIX_CMAP_SLOT_SIZE;
753
+
754
+ cmap->block_used++;
755
+
756
+ return fpos;
757
+ }
758
+
759
+ void smatrix_cmap_mkblock(smatrix_t* self, smatrix_cmap_t* cmap) {
760
+ uint64_t bytes, meta_fpos;
761
+ char* buf = smatrix_malloc(self, SMATRIX_CMAP_HEAD_SIZE);
762
+ char* meta_buf = smatrix_malloc(self, 8);
763
+
764
+ meta_fpos = cmap->block_fpos + 8;
765
+
766
+ bytes = SMATRIX_CMAP_BLOCK_SIZE * SMATRIX_CMAP_SLOT_SIZE;
767
+ bytes += SMATRIX_CMAP_HEAD_SIZE;
768
+
769
+ cmap->block_fpos = smatrix_falloc(self, bytes);
770
+ cmap->block_used = 0;
771
+ cmap->block_size = SMATRIX_CMAP_BLOCK_SIZE;
772
+
773
+ memcpy(meta_buf, &cmap->block_fpos, 8);
774
+ memcpy(buf, &cmap->block_size, 8);
775
+ memset(buf + 8, 0, 8);
776
+
777
+ smatrix_write(self, cmap->block_fpos, buf, SMATRIX_CMAP_HEAD_SIZE);
778
+ smatrix_write(self, meta_fpos, meta_buf, 8);
779
+ }
780
+
781
+ void smatrix_cmap_write(smatrix_t* self, smatrix_rmap_t* rmap) {
782
+ char* buf = smatrix_malloc(self, SMATRIX_CMAP_SLOT_SIZE);
783
+
784
+ memcpy(buf, &rmap->key, 4);
785
+ memcpy(buf + 4, &rmap->fpos, 8);
786
+
787
+ smatrix_write(self, rmap->meta_fpos, buf, SMATRIX_CMAP_SLOT_SIZE);
788
+ }
789
+
790
+ void smatrix_cmap_load(smatrix_t* self, uint64_t head_fpos) {
791
+ smatrix_rmap_t* rmap;
792
+ unsigned char meta_buf[SMATRIX_CMAP_HEAD_SIZE], *buf;
793
+ ssize_t bytes, pos;
794
+ uint64_t fpos, value;
795
+
796
+ for (fpos = head_fpos; fpos;) {
797
+ self->cmap.block_fpos = fpos;
798
+
799
+ if (pread(self->fd, &meta_buf, SMATRIX_CMAP_HEAD_SIZE, fpos) != SMATRIX_CMAP_HEAD_SIZE) {
800
+ smatrix_error("pread() failed (cmap_load). corrupt file?");
801
+ }
802
+
803
+ fpos += SMATRIX_CMAP_HEAD_SIZE;
804
+ bytes = *((uint64_t *) &meta_buf) * SMATRIX_CMAP_SLOT_SIZE;
805
+ buf = smatrix_malloc(self, bytes);
806
+
807
+ if (pread(self->fd, buf, bytes, fpos) != bytes) {
808
+ smatrix_error("pread() failed (cmap_load). corrupt file?");
809
+ }
810
+
811
+ for (pos = 0; pos < bytes; pos += SMATRIX_CMAP_SLOT_SIZE) {
812
+ value = *((uint64_t *) &buf[pos + 4]);
813
+
814
+ if (!value)
815
+ break;
816
+
817
+ rmap = smatrix_malloc(self, sizeof(smatrix_rmap_t));
818
+ smatrix_rmap_init(self, rmap, 0);
819
+ rmap->key = *((uint32_t *) &buf[pos]);
820
+ rmap->meta_fpos = fpos + pos;
821
+ rmap->fpos = value;
822
+
823
+ smatrix_cmap_insert(self, &self->cmap, rmap->key)->rmap = rmap;
824
+ }
825
+
826
+ smatrix_mfree(self, bytes);
827
+ free(buf);
828
+ fpos = *((uint64_t *) &meta_buf[8]);
829
+ }
830
+ }
831
+
832
+ void smatrix_write(smatrix_t* self, uint64_t fpos, char* data, uint64_t bytes) {
833
+ if (pwrite(self->fd, data, bytes, fpos) != (ssize_t) bytes) {
834
+ smatrix_error("write() failed");
835
+ }
836
+
837
+ free(data);
838
+ smatrix_mfree(self, bytes);
839
+ }
840
+
841
+ // the caller of this function must have called smatrix_lock_incref before
842
+ // returns 0 for success, 1 for failure
843
+ void smatrix_lock_getmutex(smatrix_lock_t* lock) {
844
+ assert(lock->count > 0);
845
+
846
+ for (;;) {
847
+ if (__sync_bool_compare_and_swap(&lock->mutex, 0, 1)) {
848
+ break;
849
+ }
850
+
851
+ while (lock->mutex != 0) {
852
+ asm("pause");
853
+ }
854
+ }
855
+
856
+ while (lock->count > 0) {
857
+ asm("pause");
858
+ }
859
+ }
860
+
861
+ void smatrix_lock_dropmutex(smatrix_lock_t* lock) {
862
+ assert(lock->count == 0);
863
+ asm("lock incw (%0)" : : "c" (&lock->count));
864
+ lock->mutex = 0;
865
+ }
866
+
867
+ void smatrix_lock_release(smatrix_lock_t* lock) {
868
+ lock->mutex = 0;
869
+ }
870
+
871
+ inline void smatrix_lock_incref(smatrix_lock_t* lock) {
872
+ for (;;) {
873
+ asm("lock incw (%0)" : : "c" (&lock->count));
874
+
875
+ if (lock->mutex == 0) {
876
+ return;
877
+ }
878
+
879
+ asm("lock decw (%0)" : : "c" (&lock->count));
880
+
881
+ while (lock->mutex != 0) {
882
+ asm("pause");
883
+ }
884
+ }
885
+ }
886
+
887
+ inline void smatrix_lock_decref(smatrix_lock_t* lock) {
888
+ asm("lock decw (%0)" : : "c" (&lock->count));
889
+ }
890
+
891
+ void smatrix_error(const char* msg) {
892
+ printf("libsmatrix error: %s", msg);
893
+ abort();
894
+ }
895
+
896
+ void smatrix_ioqueue_add(smatrix_t* self, smatrix_rmap_t* rmap) {
897
+ smatrix_ref_t* ref;
898
+
899
+ ref = smatrix_malloc(self, sizeof(smatrix_ref_t));
900
+ ref->rmap = rmap;
901
+
902
+ smatrix_lock_getmutex(&self->lock);
903
+
904
+ ref->next = self->ioqueue;
905
+ self->ioqueue = ref;
906
+
907
+ smatrix_lock_release(&self->lock);
908
+ }
909
+
910
+ smatrix_rmap_t* smatrix_ioqueue_pop(smatrix_t* self) {
911
+ smatrix_ref_t* ref;
912
+ smatrix_rmap_t* rmap;
913
+
914
+ smatrix_lock_getmutex(&self->lock);
915
+
916
+ ref = self->ioqueue;
917
+
918
+ if (ref == NULL) {
919
+ smatrix_lock_release(&self->lock);
920
+ return NULL;
921
+ }
922
+
923
+ self->ioqueue = ref->next;
924
+ smatrix_lock_release(&self->lock);
925
+
926
+ rmap = ref->rmap;
927
+
928
+ free(ref);
929
+ smatrix_mfree(self, sizeof(smatrix_ref_t));
930
+
931
+ return rmap;
932
+ }
933
+
934
+ void* smatrix_io(void* self_) {
935
+ smatrix_t* self = self_;
936
+ smatrix_rmap_t* rmap;
937
+
938
+ for (;;) {
939
+ rmap = smatrix_ioqueue_pop(self);
940
+
941
+ if (rmap == NULL) {
942
+ if (self->shutdown) {
943
+ break;
944
+ } else {
945
+ usleep(100000);
946
+ continue;
947
+ }
948
+ }
949
+
950
+ smatrix_lock_getmutex(&rmap->lock);
951
+
952
+ if ((rmap->flags & SMATRIX_RMAP_FLAG_DIRTY) > 0) {
953
+ smatrix_rmap_sync(self, rmap);
954
+ }
955
+
956
+ smatrix_lock_release(&rmap->lock);
957
+ }
958
+
959
+ return NULL;
960
+ }