libsmatrix 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ Makefile.in
@@ -0,0 +1,21 @@
1
+ # This file is part of the "libsmatrix" project
2
+ # (c) 2011-2013 Paul Asmuth <paul@paulasmuth.com>
3
+ #
4
+ # Licensed under the MIT License (the "License"); you may not use this
5
+ # file except in compliance with the License. You may obtain a copy of
6
+ # the License at: http://opensource.org/licenses/MIT
7
+
8
+ include ../Makefile.in
9
+ include Makefile.in
10
+
11
+ TARGET = smatrix_ruby.$(LIBEXT)
12
+
13
+ all: $(TARGET)
14
+
15
+ ../smatrix.o:
16
+ cd .. && make
17
+
18
+ $(TARGET): ../smatrix.o ../smatrix_ruby.c ../smatrix_ruby.h
19
+ $(CC) -L$(RUBY_LIB) -I$(RUBY_INCLUDE_ARCH) -I$(RUBY_INCLUDE) $(LIBFLAGS) -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress $(LDFLAGS) ../smatrix_ruby.c ../smatrix.o -o $(TARGET)
20
+
21
+ install: $(TARGET)
@@ -0,0 +1,18 @@
1
+ require "mkmf"
2
+
3
+ mkmf_includes = <<EOF
4
+ RUBY_INCLUDE = #{RbConfig::CONFIG["rubyhdrdir"]}
5
+ RUBY_INCLUDE_ARCH = #{RbConfig::CONFIG["rubyhdrdir"]}/#{RbConfig::CONFIG["arch"]}
6
+ RUBY_LIB = #{RbConfig::CONFIG["libdir"]}
7
+ RUBY_SO_NAME = #{RbConfig::CONFIG['RUBY_SO_NAME']}
8
+ LIBRUBYARG_SHARED = #{$LIBRUBYARG_SHARED}
9
+ LIBRUBYARG_STATIC = #{$LIBRUBYARG_STATIC}
10
+ LIBRUBYARG = #{$LIBRUBYARG_SHARED.length > 0 ? $LIBRUBYARG_SHARED : $LIBRUBYARG_STATIC}
11
+ EOF
12
+
13
+ File.open(::File.expand_path("../Makefile.in", __FILE__), "w+") do |f|
14
+ f.write(mkmf_includes)
15
+ end
16
+
17
+ $makefile_created = true
18
+
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "libsmatrix"
5
+ s.version = "0.0.1"
6
+ s.date = Date.today.to_s
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Paul Asmuth", "Amir Friedman"]
9
+ s.email = ["paul@paulasmuth.com", "amirf@null.co.il"]
10
+ s.homepage = "http://github.com/paulasmuth/libsmatrix"
11
+ s.summary = %q{A thread-safe two dimensional sparse matrix data structure with C, Java and Ruby bindings.}
12
+ s.description = %q{A thread-safe two dimensional sparse matrix data structure with C, Java and Ruby bindings. It was created to make loading and accessing medium sized (10GB+) matrices in boxed languages like Java/Scala or Ruby easier.}
13
+ s.licenses = ["MIT"]
14
+ s.extensions = ['src/ruby/extconf.rb']
15
+ s.files = `git ls-files`.split("\n") - [".gitignore", ".rspec", ".travis.yml"]
16
+ s.test_files = `git ls-files -- spec/*`.split("\n")
17
+ s.require_paths = ["src/ruby/"]
18
+
19
+ s.add_development_dependency "rspec", "~> 2.8.0"
20
+ end
@@ -0,0 +1,6 @@
1
+ bundle_file = ::File.expand_path("../smatrix_ruby.bundle", __FILE__)
2
+ require bundle_file if ::File.exist? bundle_file
3
+
4
+ bundle_file = ::File.expand_path("../smatrix_ruby.so", __FILE__)
5
+ require bundle_file if ::File.exist? bundle_file
6
+
@@ -0,0 +1,960 @@
1
+ // This file is part of the "libsmatrix" project
2
+ // (c) 2011-2013 Paul Asmuth <paul@paulasmuth.com>
3
+ //
4
+ // Licensed under the MIT License (the "License"); you may not use this
5
+ // file except in compliance with the License. You may obtain a copy of
6
+ // the License at: http://opensource.org/licenses/MIT
7
+
8
+ #include <stdio.h>
9
+ #include <stdlib.h>
10
+ #include <string.h>
11
+ #include <sys/types.h>
12
+ #include <sys/stat.h>
13
+ #include <fcntl.h>
14
+ #include <unistd.h>
15
+ #include <assert.h>
16
+ #include <inttypes.h>
17
+
18
+ #include "smatrix.h"
19
+ #include "smatrix_private.h"
20
+
21
+ // TODO
22
+ // + make ioqueue fifo
23
+ // + smatrix_gc()
24
+ // + ftruncate in larger blocks
25
+ // + aquire lock on file to prevent concurrent access
26
+ // + check correct endianess on file open
27
+ // + proper error handling / return codes for smatrix_open
28
+ // + file free list
29
+
30
+ /*
31
+
32
+ libsmatrix file format (augmented BNF):
33
+ ---------------------------------------
34
+
35
+ FILE ::= FILE_HEADER ; header size is 512 bytes
36
+ FILE_BODY
37
+
38
+ FILE_HEADER ::= <8 Bytes 0x17> ; uint64_t, magic number
39
+ CMAP_HEAD_FPOS ; uint64_t
40
+ <496 Bytes 0x0> ; padding to 512 bytes
41
+
42
+ FILE_BODY ::= *( CMAP_BLOCK | RMAP_BLOCK )
43
+
44
+ CMAP_BLOCK ::= CMAP_BLOCK_SIZE ; uint64_t
45
+ CMAP_BLOCK_NEXT ; uint64_t, file offset
46
+ *( CMAP_ENTRY ) ; 12 bytes each
47
+
48
+ CMAP_ENTRY ::= CMAP_ENTRY_KEY ; uint32_t
49
+ CMAP_ENTRY_VALUE ; uint64_t
50
+
51
+ CMAP_ENTRY_KEY ::= <uint32_t> ; key / first dimension
52
+ CMAP_ENTRY_VALUE ::= <uint64_t> ; file offset of the RMAP_BLOCK
53
+ CMAP_HEAD_FPOS ::= <uint64_t> ; file offset of the first CMAP_BLOCK
54
+ CMAP_BLOCK_SIZE ::= <uint64_t> ; number of entries in this block
55
+ CMAP_BLOCK_NEXT ::= <uint64_t> ; file offset of the next block or 0
56
+
57
+ RMAP_BLOCK ::= <8 Bytes 0x23> ; uint64_t, magic number
58
+ RMAP_BLOCK_SIZE ; uint64_t
59
+ *( RMAP_SLOT ) ; 8 bytes each
60
+
61
+ RMAP_SLOT ::= RMAP_ENTRY ; used hashmap slot
62
+ | RMAP_SLOT_UNUSED ; unused hashmap slot
63
+
64
+ RMAP_ENTRY ::= RMAP_ENTRY_KEY ; uint32_t
65
+ RMAP_ENTRY_VALUE ; uint32_t
66
+
67
+ RMAP_SLOT_UNUSED ::= <8 Bytes 0x0> ; empty slot
68
+ RMAP_ENTRY_KEY ::= <uint32_t> ; key / second dimension
69
+ RMAP_ENTRY_VALUE ::= <uint32_t> ; value
70
+ RMAP_BLOCK_SIZE ::= <uint64_t> ; number of slots in this block
71
+
72
+ */
73
+
74
+ smatrix_t* smatrix_open(const char* fname) {
75
+ smatrix_t* self = calloc(1, sizeof(smatrix_t));
76
+
77
+ if (self == NULL)
78
+ return NULL;
79
+
80
+ self->ioqueue = NULL;
81
+ self->lock.count = 0;
82
+ self->lock.mutex = 0;
83
+ self->shutdown = 0;
84
+
85
+ if (!fname) {
86
+ smatrix_cmap_init(self);
87
+ return self;
88
+ }
89
+
90
+ self->fd = open(fname, O_RDWR | O_CREAT, 00600);
91
+
92
+ if (self->fd == -1) {
93
+ perror("cannot open file");
94
+ free(self);
95
+ return NULL;
96
+ }
97
+
98
+ self->fpos = lseek(self->fd, 0, SEEK_END);
99
+
100
+ if (self->fpos == 0) {
101
+ smatrix_fcreate(self);
102
+ } else {
103
+ smatrix_fload(self);
104
+ }
105
+
106
+ if (pthread_create(&self->iothread, NULL, &smatrix_io, self)) {
107
+ smatrix_error("can't start the IO thread");
108
+ }
109
+
110
+ return self;
111
+ }
112
+
113
+ void smatrix_close(smatrix_t* self) {
114
+ void* retval;
115
+ uint64_t pos;
116
+
117
+ self->shutdown = 1;
118
+ pthread_join(self->iothread, &retval);
119
+
120
+ for (pos = 0; pos < self->cmap.size; pos++) {
121
+ if (self->cmap.data[pos].flags & SMATRIX_CMAP_SLOT_USED) {
122
+ smatrix_rmap_free(self, self->cmap.data[pos].rmap);
123
+ }
124
+ }
125
+
126
+ smatrix_cmap_free(self, &self->cmap);
127
+
128
+ if (self->fd) {
129
+ close(self->fd);
130
+ }
131
+
132
+ free(self);
133
+ }
134
+
135
+ uint64_t smatrix_falloc(smatrix_t* self, uint64_t bytes) {
136
+ smatrix_lock_getmutex(&self->lock);
137
+
138
+ uint64_t old = self->fpos;
139
+ uint64_t new = old + bytes;
140
+
141
+ if (ftruncate(self->fd, new) == -1) {
142
+ smatrix_error("truncate() failed");
143
+ }
144
+
145
+ self->fpos = new;
146
+
147
+ smatrix_lock_release(&self->lock);
148
+ return old;
149
+ }
150
+
151
+ inline void* smatrix_malloc(smatrix_t* self, uint64_t bytes) {
152
+ __sync_add_and_fetch(&self->mem, bytes);
153
+
154
+ void* ptr = malloc(bytes);
155
+
156
+ if (ptr == NULL) {
157
+ smatrix_error("malloc() failed");
158
+ abort();
159
+ }
160
+
161
+ return ptr;
162
+ }
163
+
164
+ inline void smatrix_mfree(smatrix_t* self, uint64_t bytes) {
165
+ __sync_sub_and_fetch(&self->mem, bytes);
166
+ }
167
+
168
+ void smatrix_ffree(smatrix_t* self, uint64_t fpos, uint64_t bytes) {
169
+ (void) self;
170
+ (void) fpos;
171
+ (void) bytes;
172
+ }
173
+
174
+ uint32_t smatrix_get(smatrix_t* self, uint32_t x, uint32_t y) {
175
+ smatrix_ref_t ref;
176
+ uint32_t retval = 0;
177
+
178
+ smatrix_lookup(self, &ref, x, y, 0);
179
+
180
+ if (ref.slot)
181
+ retval = ref.slot->value;
182
+
183
+ smatrix_decref(self, &ref);
184
+ return retval;
185
+ }
186
+
187
+ // returns a whole row as an array of uint32_t's, odd slots contain indexes, even slots contain
188
+ // values. example: [index, value, index, value...]
189
+ uint32_t smatrix_getrow(smatrix_t* self, uint32_t x, uint32_t* ret, size_t ret_len) {
190
+ smatrix_ref_t ref;
191
+ uint32_t pos, num = 0;
192
+
193
+ smatrix_lookup(self, &ref, x, 0, 0);
194
+
195
+ if (ref.rmap) {
196
+ for (pos = 0; pos < ref.rmap->size; pos++) {
197
+ if (!ref.rmap->data[pos].key && !ref.rmap->data[pos].value)
198
+ continue;
199
+
200
+ ret[num * 2] = ref.rmap->data[pos].key;
201
+ ret[num * 2 + 1] = ref.rmap->data[pos].value;
202
+
203
+ if ((++num * 2 * sizeof(uint32_t)) >= ret_len)
204
+ break;
205
+ }
206
+ }
207
+
208
+ smatrix_decref(self, &ref);
209
+ return num;
210
+ }
211
+
212
+ uint32_t smatrix_rowlen(smatrix_t* self, uint32_t x) {
213
+ smatrix_ref_t ref;
214
+ uint32_t len = 0;
215
+
216
+ smatrix_lookup(self, &ref, x, 0, 0);
217
+
218
+ if (ref.rmap)
219
+ len = ref.rmap->used;
220
+
221
+ smatrix_decref(self, &ref);
222
+ return len;
223
+ }
224
+
225
+ uint32_t smatrix_set(smatrix_t* self, uint32_t x, uint32_t y, uint32_t value) {
226
+ smatrix_ref_t ref;
227
+ uint32_t retval;
228
+
229
+ smatrix_lookup(self, &ref, x, y, 1);
230
+ retval = (ref.slot->value = value);
231
+ smatrix_decref(self, &ref);
232
+
233
+ return retval;
234
+ }
235
+
236
+ uint32_t smatrix_incr(smatrix_t* self, uint32_t x, uint32_t y, uint32_t value) {
237
+ smatrix_ref_t ref;
238
+ uint32_t retval;
239
+
240
+ smatrix_lookup(self, &ref, x, y, 1);
241
+ retval = (ref.slot->value += value);
242
+ smatrix_decref(self, &ref);
243
+
244
+ return retval;
245
+ }
246
+
247
+ uint32_t smatrix_decr(smatrix_t* self, uint32_t x, uint32_t y, uint32_t value) {
248
+ smatrix_ref_t ref;
249
+ uint32_t retval;
250
+
251
+ smatrix_lookup(self, &ref, x, y, 1);
252
+ retval = (ref.slot->value -= value);
253
+ smatrix_decref(self, &ref);
254
+
255
+ return retval;
256
+ }
257
+
258
+ void smatrix_lookup(smatrix_t* self, smatrix_ref_t* ref, uint32_t x, uint32_t y, int write) {
259
+ int mutex = 0;
260
+ smatrix_rmap_t* rmap;
261
+ smatrix_rmap_slot_t* slot;
262
+
263
+ ref->rmap = NULL;
264
+ ref->slot = NULL;
265
+ ref->write = write;
266
+
267
+ rmap = smatrix_cmap_lookup(self, &self->cmap, x, write);
268
+
269
+ if (rmap == NULL) {
270
+ return;
271
+ }
272
+
273
+ if (write) {
274
+ smatrix_lock_decref(&rmap->lock);
275
+ smatrix_lock_getmutex(&rmap->lock);
276
+ mutex = 1;
277
+ }
278
+
279
+ if (rmap->size == 0) {
280
+ if (!mutex) {
281
+ smatrix_lock_decref(&rmap->lock);
282
+ smatrix_lock_getmutex(&rmap->lock);
283
+ mutex = 1;
284
+ }
285
+
286
+ if (rmap->size == 0) {
287
+ smatrix_rmap_load(self, rmap);
288
+ }
289
+ }
290
+
291
+ ref->rmap = rmap;
292
+
293
+ if (mutex && !write) {
294
+ smatrix_lock_dropmutex(&rmap->lock);
295
+ }
296
+
297
+ slot = smatrix_rmap_probe(rmap, y);
298
+
299
+ if (slot != NULL && slot->key == y) {
300
+ ref->slot = slot;
301
+ } else if (write) {
302
+ ref->slot = smatrix_rmap_insert(self, rmap, y);
303
+ }
304
+ }
305
+
306
+ void smatrix_decref(smatrix_t* self, smatrix_ref_t* ref) {
307
+ if (!ref->rmap) {
308
+ return;
309
+ }
310
+
311
+ if (ref->write) {
312
+ if (self->fd) {
313
+ // FIXPAUL: this will sync the whole rmap. if only one slot changed this is a lot of overhead...
314
+ smatrix_rmap_sync_defer(self, ref->rmap);
315
+ }
316
+
317
+ smatrix_lock_release(&ref->rmap->lock);
318
+ } else {
319
+ smatrix_lock_decref(&ref->rmap->lock);
320
+ }
321
+ }
322
+
323
+ void smatrix_rmap_init(smatrix_t* self, smatrix_rmap_t* rmap, uint32_t size) {
324
+ if (size > 0) {
325
+ size_t bytes = sizeof(smatrix_rmap_slot_t) * size;
326
+
327
+ rmap->data = smatrix_malloc(self, bytes);
328
+ memset(rmap->data, 0, bytes);
329
+ } else {
330
+ rmap->data = NULL;
331
+ }
332
+
333
+ rmap->size = size;
334
+ rmap->used = 0;
335
+ rmap->fpos = 0;
336
+ rmap->flags = 0;
337
+ rmap->lock.count = 0;
338
+ rmap->lock.mutex = 0;
339
+ }
340
+
341
+
342
+ // you need to hold a write lock on rmap to call this function safely
343
+ smatrix_rmap_slot_t* smatrix_rmap_insert(smatrix_t* self, smatrix_rmap_t* rmap, uint32_t key) {
344
+ smatrix_rmap_slot_t* slot;
345
+
346
+ if (rmap->used > rmap->size / 2) {
347
+ smatrix_rmap_resize(self, rmap);
348
+ }
349
+
350
+ slot = smatrix_rmap_probe(rmap, key);
351
+ assert(slot != NULL);
352
+
353
+ if (!slot->key || slot->key != key) {
354
+ rmap->used++;
355
+ slot->key = key;
356
+ slot->value = 0;
357
+ }
358
+
359
+ return slot;
360
+ }
361
+
362
+ // you need to hold a read or write lock on rmap to call this function safely
363
+ smatrix_rmap_slot_t* smatrix_rmap_probe(smatrix_rmap_t* rmap, uint32_t key) {
364
+ uint64_t n, pos;
365
+
366
+ pos = key % rmap->size;
367
+
368
+ // linear probing
369
+ for (n = 0; n < rmap->size; n++) {
370
+ if (rmap->data[pos].key == key)
371
+ break;
372
+
373
+ if (!rmap->data[pos].key && !rmap->data[pos].value)
374
+ break;
375
+
376
+ pos = (pos + 1) % rmap->size;
377
+ }
378
+
379
+ return &rmap->data[pos];
380
+ }
381
+
382
+ // you need to hold a write lock on rmap in order to call this function safely
383
+ void smatrix_rmap_resize(smatrix_t* self, smatrix_rmap_t* rmap) {
384
+ uint64_t pos, bytes, old_size, new_size;
385
+ smatrix_rmap_slot_t* slot;
386
+ smatrix_rmap_t new;
387
+
388
+ old_size = rmap->size;
389
+ new_size = rmap->size * 2;
390
+ bytes = sizeof(smatrix_rmap_slot_t) * new_size;
391
+
392
+ new.size = new_size;
393
+ new.used = 0;
394
+ new.data = smatrix_malloc(self, bytes);
395
+ memset(new.data, 0, bytes);
396
+
397
+ for (pos = 0; pos < rmap->size; pos++) {
398
+ if (!rmap->data[pos].key && !rmap->data[pos].value)
399
+ continue;
400
+
401
+ slot = smatrix_rmap_insert(self, &new, rmap->data[pos].key);
402
+ slot->value = rmap->data[pos].value;
403
+ }
404
+
405
+ smatrix_mfree(self, sizeof(smatrix_rmap_slot_t) * old_size);
406
+ free(rmap->data);
407
+
408
+ rmap->data = new.data;
409
+ rmap->size = new.size;
410
+ rmap->used = new.used;
411
+
412
+ if (self->fd) {
413
+ rmap->flags |= SMATRIX_RMAP_FLAG_RESIZED;
414
+ smatrix_rmap_sync_defer(self, rmap);
415
+ }
416
+ }
417
+
418
+ inline void smatrix_rmap_sync_defer(smatrix_t* self, smatrix_rmap_t* rmap) {
419
+ if ((rmap->flags & SMATRIX_RMAP_FLAG_DIRTY) > 0) {
420
+ return;
421
+ }
422
+
423
+ rmap->flags |= SMATRIX_RMAP_FLAG_DIRTY;
424
+ smatrix_ioqueue_add(self, rmap);
425
+ }
426
+
427
+ void smatrix_rmap_sync(smatrix_t* self, smatrix_rmap_t* rmap) {
428
+ uint64_t bytes;
429
+
430
+ if ((rmap->flags & SMATRIX_RMAP_FLAG_RESIZED) > 0) {
431
+ // FIXPAUL can't ffree without knowing the old size.. just dividing by 2 seems too hacky
432
+ //bytes = SMATRIX_RMAP_SLOT_SIZE * rmap->size + SMATRIX_RMAP_HEAD_SIZE;
433
+ //smatrix_ffree(self, rmap->fpos, bytes);
434
+
435
+ rmap->fpos = 0;
436
+ }
437
+
438
+ if (rmap->fpos == 0) {
439
+ bytes = SMATRIX_RMAP_SLOT_SIZE * rmap->size + SMATRIX_RMAP_HEAD_SIZE;
440
+ rmap->fpos = smatrix_falloc(self, bytes);
441
+
442
+ smatrix_rmap_write_batch(self, rmap, 1);
443
+ smatrix_cmap_write(self, rmap);
444
+ } else {
445
+ // FIXPAUL write only the actualy dirty slots!
446
+ smatrix_rmap_write_batch(self, rmap, 1);
447
+ }
448
+
449
+ rmap->flags &= ~SMATRIX_RMAP_FLAG_DIRTY;
450
+ rmap->flags &= ~SMATRIX_RMAP_FLAG_RESIZED;
451
+ }
452
+
453
+ // the caller of this must hold a read lock on rmap
454
+ void smatrix_rmap_write_batch(smatrix_t* self, smatrix_rmap_t* rmap, int full) {
455
+ uint64_t pos = 0, bytes, buf_pos, rmap_size = rmap->size;
456
+ char *buf;
457
+
458
+ if (full) {
459
+ bytes = rmap->size * SMATRIX_RMAP_SLOT_SIZE;
460
+ bytes += SMATRIX_RMAP_HEAD_SIZE;
461
+ } else {
462
+ bytes = SMATRIX_RMAP_HEAD_SIZE;
463
+ }
464
+
465
+ buf = smatrix_malloc(self, bytes);
466
+
467
+ memset(buf, 0, bytes);
468
+ memset(buf, 0x23, 8);
469
+ memcpy(buf + 8, &rmap_size, 8);
470
+
471
+ if (full) {
472
+ buf_pos = SMATRIX_RMAP_HEAD_SIZE;
473
+
474
+ for (pos = 0; pos < rmap->size; pos++) {
475
+ memcpy(buf + buf_pos, &rmap->data[pos].key, 4);
476
+ memcpy(buf + buf_pos + 4, &rmap->data[pos].value, 4);
477
+ buf_pos += SMATRIX_RMAP_SLOT_SIZE;
478
+ }
479
+ }
480
+
481
+ smatrix_write(self, rmap->fpos, buf, bytes);
482
+ }
483
+
484
+ void smatrix_rmap_write_slot(smatrix_t* self, smatrix_rmap_t* rmap, smatrix_rmap_slot_t* slot) {
485
+ uint64_t rmap_pos, fpos;
486
+ char* buf = smatrix_malloc(self, SMATRIX_RMAP_SLOT_SIZE);
487
+
488
+ rmap_pos = slot - rmap->data;
489
+ fpos = rmap_pos * SMATRIX_RMAP_SLOT_SIZE;
490
+ fpos += rmap->fpos + SMATRIX_RMAP_HEAD_SIZE;
491
+
492
+ memcpy(buf, &slot->key, 4);
493
+ memcpy(buf + 4, &slot->value, 4);
494
+
495
+ smatrix_write(self, fpos, buf, SMATRIX_RMAP_SLOT_SIZE);
496
+ }
497
+
498
+ // caller must hold writelock on rmap
499
+ void smatrix_rmap_load(smatrix_t* self, smatrix_rmap_t* rmap) {
500
+ uint64_t pos, read_bytes, mem_bytes, disk_bytes, rmap_size;
501
+ unsigned char meta_buf[SMATRIX_RMAP_HEAD_SIZE] = {0}, *buf;
502
+
503
+ if (rmap->flags & SMATRIX_RMAP_FLAG_LOADED)
504
+ return;
505
+
506
+ if (!rmap->size) {
507
+ if (pread(self->fd, &meta_buf, SMATRIX_RMAP_HEAD_SIZE, rmap->fpos) != SMATRIX_RMAP_HEAD_SIZE) {
508
+ smatrix_error("pread() failed (rmap_load). corrupt file?");
509
+ }
510
+
511
+ if (memcmp(&meta_buf, &SMATRIX_RMAP_MAGIC, SMATRIX_RMAP_MAGIC_SIZE)) {
512
+ smatrix_error("file is corrupt (rmap_load)");
513
+ }
514
+
515
+ rmap_size = *((uint64_t *) &meta_buf[8]);
516
+ rmap->size = rmap_size;
517
+ assert(rmap->size > 0);
518
+ }
519
+
520
+ mem_bytes = rmap->size * sizeof(smatrix_rmap_slot_t);
521
+ disk_bytes = rmap->size * SMATRIX_RMAP_SLOT_SIZE;
522
+ rmap->used = 0;
523
+ rmap->data = smatrix_malloc(self, mem_bytes);
524
+ buf = smatrix_malloc(self, disk_bytes);
525
+
526
+ memset(rmap->data, 0, mem_bytes);
527
+ read_bytes = pread(self->fd, buf, disk_bytes, rmap->fpos + SMATRIX_RMAP_HEAD_SIZE);
528
+
529
+ if (read_bytes != disk_bytes) {
530
+ smatrix_error("read() failed (rmap_load)");
531
+ }
532
+
533
+ for (pos = 0; pos < rmap->size; pos++) {
534
+ memcpy(&rmap->data[pos].value, buf + pos * SMATRIX_RMAP_SLOT_SIZE + 4, 4);
535
+
536
+ if (rmap->data[pos].value) {
537
+ memcpy(&rmap->data[pos].key, buf + pos * SMATRIX_RMAP_SLOT_SIZE, 4);
538
+ rmap->used++;
539
+ }
540
+ }
541
+
542
+ rmap->flags = SMATRIX_RMAP_FLAG_LOADED;
543
+ smatrix_mfree(self, disk_bytes);
544
+ free(buf);
545
+ }
546
+
547
+ // caller must hold a write lock on rmap
548
+ void smatrix_rmap_swap(smatrix_t* self, smatrix_rmap_t* rmap) {
549
+ rmap->flags &= ~SMATRIX_RMAP_FLAG_LOADED;
550
+ smatrix_mfree(self, sizeof(smatrix_rmap_slot_t) * rmap->size);
551
+ free(rmap->data);
552
+ }
553
+
554
+ void smatrix_rmap_free(smatrix_t* self, smatrix_rmap_t* rmap) {
555
+ if (rmap->data) {
556
+ smatrix_mfree(self, sizeof(smatrix_rmap_slot_t) * rmap->size);
557
+ free(rmap->data);
558
+ }
559
+
560
+ smatrix_mfree(self, sizeof(smatrix_rmap_t));
561
+ free(rmap);
562
+ }
563
+
564
+ void smatrix_fcreate(smatrix_t* self) {
565
+ char buf[SMATRIX_META_SIZE];
566
+ smatrix_falloc(self, SMATRIX_META_SIZE);
567
+
568
+ memset(&buf, 0, SMATRIX_META_SIZE);
569
+ memset(&buf, 0x17, 8);
570
+ pwrite(self->fd, &buf, SMATRIX_META_SIZE, 0);
571
+
572
+ smatrix_cmap_init(self);
573
+ smatrix_cmap_mkblock(self, &self->cmap);
574
+ }
575
+
576
+ void smatrix_fload(smatrix_t* self) {
577
+ char buf[SMATRIX_META_SIZE];
578
+ uint64_t read, cmap_head_fpos;
579
+
580
+ read = pread(self->fd, &buf, SMATRIX_META_SIZE, 0);
581
+
582
+ if (read != SMATRIX_META_SIZE) {
583
+ smatrix_error("invalid file header\n");
584
+ abort();
585
+ }
586
+
587
+ if (buf[0] != 0x17 || buf[1] != 0x17) {
588
+ smatrix_error("invalid file header\n");
589
+ abort();
590
+ }
591
+
592
+ memcpy(&cmap_head_fpos, &buf[8], 8);
593
+
594
+ smatrix_cmap_init(self);
595
+ smatrix_cmap_load(self, cmap_head_fpos);
596
+ }
597
+
598
+ void smatrix_cmap_init(smatrix_t* self) {
599
+ uint64_t bytes;
600
+
601
+ self->cmap.size = SMATRIX_CMAP_INITIAL_SIZE;
602
+ self->cmap.used = 0;
603
+ self->cmap.lock.count = 0;
604
+ self->cmap.lock.mutex = 0;
605
+ self->cmap.block_fpos = 0;
606
+ self->cmap.block_used = 0;
607
+ self->cmap.block_size = 0;
608
+
609
+ bytes = sizeof(smatrix_cmap_slot_t) * self->cmap.size;
610
+ self->cmap.data = smatrix_malloc(self, bytes);
611
+ memset(self->cmap.data, 0, bytes);
612
+ }
613
+
614
+ void smatrix_cmap_free(smatrix_t* self, smatrix_cmap_t* cmap) {
615
+ uint64_t bytes = sizeof(smatrix_cmap_slot_t) * cmap->size;
616
+ smatrix_mfree(self, bytes);
617
+ free(cmap->data);
618
+ }
619
+
620
+ // caller must hold no locks on cmap!
621
+ smatrix_rmap_t* smatrix_cmap_lookup(smatrix_t* self, smatrix_cmap_t* cmap, uint32_t key, int create) {
622
+ smatrix_cmap_slot_t* slot;
623
+ smatrix_rmap_t* rmap;
624
+
625
+ smatrix_lock_incref(&cmap->lock);
626
+ slot = smatrix_cmap_probe(cmap, key);
627
+
628
+ if (slot && slot->key == key && (slot->flags & SMATRIX_CMAP_SLOT_USED) != 0) {
629
+ rmap = slot->rmap;
630
+ smatrix_lock_incref(&rmap->lock);
631
+ smatrix_lock_decref(&cmap->lock);
632
+ return rmap;
633
+ }
634
+
635
+ smatrix_lock_decref(&cmap->lock);
636
+
637
+ if (!create) {
638
+ return NULL;
639
+ }
640
+
641
+ rmap = smatrix_malloc(self, sizeof(smatrix_rmap_t));
642
+ smatrix_rmap_init(self, rmap, SMATRIX_RMAP_INITIAL_SIZE);
643
+ rmap->key = key;
644
+
645
+ smatrix_lock_getmutex(&cmap->lock);
646
+ slot = smatrix_cmap_insert(self, cmap, key);
647
+
648
+ if (slot->rmap) {
649
+ smatrix_rmap_free(self, rmap);
650
+ rmap = slot->rmap;
651
+
652
+ smatrix_lock_incref(&rmap->lock);
653
+ smatrix_lock_release(&cmap->lock);
654
+ } else {
655
+ slot->rmap = rmap;
656
+
657
+ if (self->fd) {
658
+ rmap->meta_fpos = smatrix_cmap_falloc(self, &self->cmap);
659
+ }
660
+
661
+ smatrix_lock_incref(&rmap->lock);
662
+ smatrix_lock_release(&cmap->lock);
663
+
664
+ if (self->fd) {
665
+ smatrix_rmap_sync_defer(self, rmap);
666
+ }
667
+ }
668
+
669
+ return rmap;
670
+ }
671
+
672
+ // caller must hold a read lock on cmap!
673
+ smatrix_cmap_slot_t* smatrix_cmap_probe(smatrix_cmap_t* cmap, uint32_t key) {
674
+ unsigned pos = key;
675
+ smatrix_cmap_slot_t* slot;
676
+
677
+ slot = cmap->data + (key % cmap->size);
678
+
679
+ for (;;) {
680
+ if ((slot->flags & SMATRIX_CMAP_SLOT_USED) == 0) {
681
+ return slot;
682
+ }
683
+
684
+ if (slot->key == key) {
685
+ return slot;
686
+ }
687
+
688
+ pos++;
689
+ slot = cmap->data + (pos % cmap->size);
690
+ }
691
+
692
+ return slot;
693
+ }
694
+
695
+ smatrix_cmap_slot_t* smatrix_cmap_insert(smatrix_t* self, smatrix_cmap_t* cmap, uint32_t key) {
696
+ smatrix_cmap_slot_t* slot;
697
+
698
+ if (cmap->used * 4 >= cmap->size * 3) {
699
+ smatrix_cmap_resize(self, cmap);
700
+ }
701
+
702
+ slot = smatrix_cmap_probe(cmap, key);
703
+ assert(slot != NULL);
704
+
705
+ if ((slot->flags & SMATRIX_CMAP_SLOT_USED) == 0 || slot->key != key) {
706
+ cmap->used++;
707
+ slot->key = key;
708
+ slot->flags = SMATRIX_CMAP_SLOT_USED;
709
+ slot->rmap = NULL;
710
+ }
711
+
712
+ return slot;
713
+ }
714
+
715
+ void smatrix_cmap_resize(smatrix_t* self, smatrix_cmap_t* cmap) {
716
+ uint64_t new_bytes, pos;
717
+ smatrix_cmap_slot_t *slot;
718
+ smatrix_cmap_t new;
719
+
720
+ new.used = 0;
721
+ new.size = cmap->size * 2;
722
+ new_bytes = sizeof(smatrix_cmap_slot_t) * new.size;
723
+ new.data = smatrix_malloc(self, new_bytes);
724
+
725
+ smatrix_mfree(self, sizeof(smatrix_cmap_slot_t) * cmap->size);
726
+ memset(new.data, 0, new_bytes);
727
+
728
+ for (pos = 0; pos < cmap->size; pos++) {
729
+ if ((cmap->data[pos].flags & SMATRIX_CMAP_SLOT_USED) == 0)
730
+ continue;
731
+
732
+ slot = smatrix_cmap_insert(self, &new, cmap->data[pos].key);
733
+ slot->rmap = cmap->data[pos].rmap;
734
+ }
735
+
736
+ free(cmap->data);
737
+
738
+ cmap->data = new.data;
739
+ cmap->size = new.size;
740
+ cmap->used = new.used;
741
+ }
742
+
743
+ // caller must hold a write lock on cmap
744
+ uint64_t smatrix_cmap_falloc(smatrix_t* self, smatrix_cmap_t* cmap) {
745
+ uint64_t fpos;
746
+
747
+ if (cmap->block_used >= cmap->block_size) {
748
+ smatrix_cmap_mkblock(self, cmap);
749
+ }
750
+
751
+ fpos = cmap->block_fpos + SMATRIX_CMAP_HEAD_SIZE;
752
+ fpos += cmap->block_used * SMATRIX_CMAP_SLOT_SIZE;
753
+
754
+ cmap->block_used++;
755
+
756
+ return fpos;
757
+ }
758
+
759
+ void smatrix_cmap_mkblock(smatrix_t* self, smatrix_cmap_t* cmap) {
760
+ uint64_t bytes, meta_fpos;
761
+ char* buf = smatrix_malloc(self, SMATRIX_CMAP_HEAD_SIZE);
762
+ char* meta_buf = smatrix_malloc(self, 8);
763
+
764
+ meta_fpos = cmap->block_fpos + 8;
765
+
766
+ bytes = SMATRIX_CMAP_BLOCK_SIZE * SMATRIX_CMAP_SLOT_SIZE;
767
+ bytes += SMATRIX_CMAP_HEAD_SIZE;
768
+
769
+ cmap->block_fpos = smatrix_falloc(self, bytes);
770
+ cmap->block_used = 0;
771
+ cmap->block_size = SMATRIX_CMAP_BLOCK_SIZE;
772
+
773
+ memcpy(meta_buf, &cmap->block_fpos, 8);
774
+ memcpy(buf, &cmap->block_size, 8);
775
+ memset(buf + 8, 0, 8);
776
+
777
+ smatrix_write(self, cmap->block_fpos, buf, SMATRIX_CMAP_HEAD_SIZE);
778
+ smatrix_write(self, meta_fpos, meta_buf, 8);
779
+ }
780
+
781
+ void smatrix_cmap_write(smatrix_t* self, smatrix_rmap_t* rmap) {
782
+ char* buf = smatrix_malloc(self, SMATRIX_CMAP_SLOT_SIZE);
783
+
784
+ memcpy(buf, &rmap->key, 4);
785
+ memcpy(buf + 4, &rmap->fpos, 8);
786
+
787
+ smatrix_write(self, rmap->meta_fpos, buf, SMATRIX_CMAP_SLOT_SIZE);
788
+ }
789
+
790
+ void smatrix_cmap_load(smatrix_t* self, uint64_t head_fpos) {
791
+ smatrix_rmap_t* rmap;
792
+ unsigned char meta_buf[SMATRIX_CMAP_HEAD_SIZE], *buf;
793
+ ssize_t bytes, pos;
794
+ uint64_t fpos, value;
795
+
796
+ for (fpos = head_fpos; fpos;) {
797
+ self->cmap.block_fpos = fpos;
798
+
799
+ if (pread(self->fd, &meta_buf, SMATRIX_CMAP_HEAD_SIZE, fpos) != SMATRIX_CMAP_HEAD_SIZE) {
800
+ smatrix_error("pread() failed (cmap_load). corrupt file?");
801
+ }
802
+
803
+ fpos += SMATRIX_CMAP_HEAD_SIZE;
804
+ bytes = *((uint64_t *) &meta_buf) * SMATRIX_CMAP_SLOT_SIZE;
805
+ buf = smatrix_malloc(self, bytes);
806
+
807
+ if (pread(self->fd, buf, bytes, fpos) != bytes) {
808
+ smatrix_error("pread() failed (cmap_load). corrupt file?");
809
+ }
810
+
811
+ for (pos = 0; pos < bytes; pos += SMATRIX_CMAP_SLOT_SIZE) {
812
+ value = *((uint64_t *) &buf[pos + 4]);
813
+
814
+ if (!value)
815
+ break;
816
+
817
+ rmap = smatrix_malloc(self, sizeof(smatrix_rmap_t));
818
+ smatrix_rmap_init(self, rmap, 0);
819
+ rmap->key = *((uint32_t *) &buf[pos]);
820
+ rmap->meta_fpos = fpos + pos;
821
+ rmap->fpos = value;
822
+
823
+ smatrix_cmap_insert(self, &self->cmap, rmap->key)->rmap = rmap;
824
+ }
825
+
826
+ smatrix_mfree(self, bytes);
827
+ free(buf);
828
+ fpos = *((uint64_t *) &meta_buf[8]);
829
+ }
830
+ }
831
+
832
+ void smatrix_write(smatrix_t* self, uint64_t fpos, char* data, uint64_t bytes) {
833
+ if (pwrite(self->fd, data, bytes, fpos) != (ssize_t) bytes) {
834
+ smatrix_error("write() failed");
835
+ }
836
+
837
+ free(data);
838
+ smatrix_mfree(self, bytes);
839
+ }
840
+
841
+ // the caller of this function must have called smatrix_lock_incref before
842
+ // returns 0 for success, 1 for failure
843
+ void smatrix_lock_getmutex(smatrix_lock_t* lock) {
844
+ assert(lock->count > 0);
845
+
846
+ for (;;) {
847
+ if (__sync_bool_compare_and_swap(&lock->mutex, 0, 1)) {
848
+ break;
849
+ }
850
+
851
+ while (lock->mutex != 0) {
852
+ asm("pause");
853
+ }
854
+ }
855
+
856
+ while (lock->count > 0) {
857
+ asm("pause");
858
+ }
859
+ }
860
+
861
+ void smatrix_lock_dropmutex(smatrix_lock_t* lock) {
862
+ assert(lock->count == 0);
863
+ asm("lock incw (%0)" : : "c" (&lock->count));
864
+ lock->mutex = 0;
865
+ }
866
+
867
+ void smatrix_lock_release(smatrix_lock_t* lock) {
868
+ lock->mutex = 0;
869
+ }
870
+
871
+ inline void smatrix_lock_incref(smatrix_lock_t* lock) {
872
+ for (;;) {
873
+ asm("lock incw (%0)" : : "c" (&lock->count));
874
+
875
+ if (lock->mutex == 0) {
876
+ return;
877
+ }
878
+
879
+ asm("lock decw (%0)" : : "c" (&lock->count));
880
+
881
+ while (lock->mutex != 0) {
882
+ asm("pause");
883
+ }
884
+ }
885
+ }
886
+
887
+ inline void smatrix_lock_decref(smatrix_lock_t* lock) {
888
+ asm("lock decw (%0)" : : "c" (&lock->count));
889
+ }
890
+
891
+ void smatrix_error(const char* msg) {
892
+ printf("libsmatrix error: %s", msg);
893
+ abort();
894
+ }
895
+
896
+ void smatrix_ioqueue_add(smatrix_t* self, smatrix_rmap_t* rmap) {
897
+ smatrix_ref_t* ref;
898
+
899
+ ref = smatrix_malloc(self, sizeof(smatrix_ref_t));
900
+ ref->rmap = rmap;
901
+
902
+ smatrix_lock_getmutex(&self->lock);
903
+
904
+ ref->next = self->ioqueue;
905
+ self->ioqueue = ref;
906
+
907
+ smatrix_lock_release(&self->lock);
908
+ }
909
+
910
+ smatrix_rmap_t* smatrix_ioqueue_pop(smatrix_t* self) {
911
+ smatrix_ref_t* ref;
912
+ smatrix_rmap_t* rmap;
913
+
914
+ smatrix_lock_getmutex(&self->lock);
915
+
916
+ ref = self->ioqueue;
917
+
918
+ if (ref == NULL) {
919
+ smatrix_lock_release(&self->lock);
920
+ return NULL;
921
+ }
922
+
923
+ self->ioqueue = ref->next;
924
+ smatrix_lock_release(&self->lock);
925
+
926
+ rmap = ref->rmap;
927
+
928
+ free(ref);
929
+ smatrix_mfree(self, sizeof(smatrix_ref_t));
930
+
931
+ return rmap;
932
+ }
933
+
934
+ void* smatrix_io(void* self_) {
935
+ smatrix_t* self = self_;
936
+ smatrix_rmap_t* rmap;
937
+
938
+ for (;;) {
939
+ rmap = smatrix_ioqueue_pop(self);
940
+
941
+ if (rmap == NULL) {
942
+ if (self->shutdown) {
943
+ break;
944
+ } else {
945
+ usleep(100000);
946
+ continue;
947
+ }
948
+ }
949
+
950
+ smatrix_lock_getmutex(&rmap->lock);
951
+
952
+ if ((rmap->flags & SMATRIX_RMAP_FLAG_DIRTY) > 0) {
953
+ smatrix_rmap_sync(self, rmap);
954
+ }
955
+
956
+ smatrix_lock_release(&rmap->lock);
957
+ }
958
+
959
+ return NULL;
960
+ }