hawthorn 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile( 'hawthorn/hawthorn' )
@@ -0,0 +1,975 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <stdint.h>
4
+ #include <string.h>
5
+
6
+ #include <time.h>
7
+ #include <sys/time.h>
8
+
9
+ #include <ruby.h>
10
+
11
+ double get_time(){
12
+ struct timeval tv;
13
+ gettimeofday( &tv, NULL );
14
+ return tv.tv_sec + tv.tv_usec * 1e-6;
15
+ }
16
+
17
+
18
+
19
+ /* Linked list */
20
+
21
+ typedef struct LinkedList {
22
+ char *key;
23
+ void *value;
24
+ struct LinkedList *next;
25
+ } list_t;
26
+
27
+ void list_create( list_t **l, const char *key, void *value ){
28
+ *l = (list_t*)malloc( sizeof( list_t ) );
29
+ (*l)->key = (char *)malloc( strlen( key ) + 1 );
30
+ strcpy( (*l)->key, key );
31
+ (*l)->value = value;
32
+ (*l)->next = NULL;
33
+ }
34
+
35
+ void list_free( list_t *l ){
36
+ if( l->next != NULL ){
37
+ list_free( l->next );
38
+ free( l->next );
39
+ }
40
+ }
41
+
42
+ void list_insert( list_t *l, const char *key, void *value ){
43
+ list_t *current = l;
44
+ while( current->next != NULL ){
45
+ if( !strcmp( current->key, key ) ){
46
+ free( current->value );
47
+ current->value = value;
48
+ return;
49
+ }
50
+ current = current->next;
51
+ }
52
+ list_create( &(current->next), key, value );
53
+ }
54
+
55
+ void* list_fetch( list_t *l, const char *key ){
56
+ list_t *current = l;
57
+ while( current != NULL ){
58
+ if( !strcmp( current->key, key ) ){
59
+ return current->value;
60
+ }
61
+ current = current->next;
62
+ }
63
+ return NULL;
64
+ }
65
+
66
+ void list_remove( list_t *l, const char *key ){
67
+ list_t *current = l;
68
+ list_t *prev = l;
69
+ while( current != NULL ){
70
+ if( !strcmp( current->key, key ) ){
71
+ free( current->value );
72
+ if( prev != current ){
73
+ prev->next = current->next;
74
+ }
75
+ free( current );
76
+ return;
77
+ }
78
+ prev = current;
79
+ current = current->next;
80
+ }
81
+ }
82
+
83
+ void* list_pop_back( list_t **l ){
84
+ list_t *current = *l;
85
+ list_t *prev = current;
86
+ void *out;
87
+
88
+ if( *l == NULL ){ return NULL; }
89
+
90
+ while( current->next != NULL ){
91
+ prev = current;
92
+ current = current->next;
93
+ }
94
+
95
+ if( prev != current ){
96
+ out = current->value;
97
+ free( current );
98
+ prev->next = NULL;
99
+ }
100
+ else {
101
+ out = current->value;
102
+ free( current );
103
+ *l = NULL;
104
+ }
105
+ return out;
106
+ }
107
+
108
+ /* Tries */
109
+
110
+ typedef struct Trie64Map {
111
+ uint8_t key;
112
+ void *content;
113
+ struct Trie64Map *leafs[16];
114
+ } trie64_map_t;
115
+
116
+ void trie64_map_create( trie64_map_t **t, uint8_t key ){
117
+ int i;
118
+
119
+ *t = (trie64_map_t*)malloc( sizeof( trie64_map_t ) );
120
+ (*t)->key = key;
121
+ (*t)->content = NULL;
122
+ for( i = 0 ; i < 16 ; ++i ){
123
+ (*t)->leafs[i] = NULL;
124
+ }
125
+ }
126
+
127
+ void trie64_map_insert( trie64_map_t *t, uint64_t key, void *value ){
128
+ uint8_t chunk;
129
+ uint64_t i, shift, mask = 0x0F;
130
+ trie64_map_t *current = t;
131
+ for( i = 0 ; i < 16 ; ++i ){
132
+ shift = 60 - i*4;
133
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
134
+ if( current->leafs[chunk] == NULL ){
135
+ trie64_map_create( &(current->leafs[chunk] ), chunk );
136
+ }
137
+ current = current->leafs[chunk];
138
+ }
139
+ current->content = value;
140
+ }
141
+
142
+ void* trie64_map_fetch( trie64_map_t *t, uint64_t key ){
143
+ uint8_t chunk, shift, mask = 0x0F;
144
+ uint64_t i;
145
+ trie64_map_t *current = t;
146
+ for( i = 0 ; i < 16 ; ++i ){
147
+ shift = 60 - i*4;
148
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
149
+ if( current->leafs[chunk] == NULL ){
150
+ return NULL;
151
+ }
152
+ current = current->leafs[chunk];
153
+ }
154
+ return current->content;
155
+ }
156
+
157
+
158
+ void trie64_map_remove( trie64_map_t *t, uint64_t key ){
159
+ uint8_t chunk, shift, mask = 0x0F;
160
+ uint64_t i;
161
+ trie64_map_t *current = t;
162
+ for( i = 0 ; i < 15 ; ++i ){
163
+ shift = 60 - i*4;
164
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
165
+ if( current->leafs[chunk] == NULL ){
166
+ return;
167
+ }
168
+ current = current->leafs[chunk];
169
+ }
170
+
171
+ shift = 60 - i*4;
172
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
173
+
174
+ if( current->leafs[chunk] != NULL ){
175
+ free( current->leafs[chunk] );
176
+ current->leafs[chunk] = NULL;
177
+ }
178
+ }
179
+
180
+
181
+ void trie64_map_free( trie64_map_t *t ){
182
+ int i;
183
+ for( i = 0 ; i < 16 ; ++i ){
184
+ if( t->leafs[i] != NULL ){
185
+ trie64_map_free( t->leafs[i] );
186
+ free( t->leafs[i] );
187
+ }
188
+ }
189
+ }
190
+
191
+ void _trie64_map_get_all( trie64_map_t *t, uint64_t mask, uint64_t key, uint8_t level, list_t **l ){
192
+ uint64_t i;
193
+ uint64_t k, shift;
194
+ shift = 60 - level*4;
195
+ if( level < 16 ){
196
+ for( i = 0 ; i < 16; ++i ){
197
+ if( t->leafs[i] != NULL ){
198
+ k = key | (i << shift);
199
+ _trie64_map_get_all( t->leafs[i], mask, k, level + 1, l );
200
+ }
201
+ }
202
+ }
203
+ else{
204
+ if( (mask & key) == mask ){
205
+ char strkey[ sizeof( uint64_t ) + 1 ];
206
+ memcpy( strkey, &key, sizeof( uint64_t ) );
207
+ if( *l == NULL ){
208
+ list_create( l, strkey, t->content );
209
+ }
210
+ else {
211
+ list_insert( *l, strkey, t->content );
212
+ }
213
+ }
214
+ }
215
+ }
216
+
217
+ list_t* trie64_map_get_all( trie64_map_t *t, uint64_t mask ){
218
+ list_t *l;
219
+ list_create( &l, "", (void *)NULL );
220
+ _trie64_map_get_all( t, mask, 0, 0, &l );
221
+ return l;
222
+ }
223
+
224
+
225
+ typedef struct Trie16Map {
226
+ uint8_t key;
227
+ void *content;
228
+ struct Trie16Map *leafs[16];
229
+ } trie16_map_t;
230
+
231
+ void trie16_map_create( trie16_map_t **t, uint8_t key ){
232
+ int i;
233
+
234
+ *t = (trie16_map_t*)malloc( sizeof( trie16_map_t ) );
235
+ (*t)->key = key;
236
+ (*t)->content = NULL;
237
+ for( i = 0 ; i < 16 ; ++i ){
238
+ (*t)->leafs[i] = NULL;
239
+ }
240
+ }
241
+
242
+
243
+ void trie16_map_insert( trie16_map_t *t, uint16_t key, void *value ){
244
+ uint8_t chunk;
245
+ uint16_t shift, mask = 0x0F;
246
+ uint64_t i;
247
+ trie16_map_t *current = t;
248
+ for( i = 0 ; i < 4 ; ++i ){
249
+ shift = 12 - i*4;
250
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
251
+ if( current->leafs[chunk] == NULL ){
252
+ trie16_map_create( &(current->leafs[chunk] ), chunk );
253
+ }
254
+ current = current->leafs[chunk];
255
+ }
256
+ current->content = value;
257
+ }
258
+
259
+ void* trie16_map_fetch( trie16_map_t *t, uint16_t key ){
260
+ uint8_t chunk;
261
+ uint64_t shift, mask = 0x0F;
262
+ uint64_t i;
263
+ trie16_map_t *current = t;
264
+ for( i = 0 ; i < 4 ; ++i ){
265
+ shift = 12 - i*4;
266
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
267
+ if( current->leafs[chunk] == NULL ){
268
+ return NULL;
269
+ }
270
+ current = current->leafs[chunk];
271
+ }
272
+ return current->content;
273
+ }
274
+
275
+ void trie16_map_remove( trie16_map_t *t, uint16_t key ){
276
+ uint8_t chunk;
277
+ uint64_t shift, mask = 0x0F;
278
+ uint64_t i;
279
+ trie16_map_t *current = t;
280
+ for( i = 0 ; i < 3 ; ++i ){
281
+ shift = 12 - i*4;
282
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
283
+ if( current->leafs[chunk] == NULL ){
284
+ return;
285
+ }
286
+ current = current->leafs[chunk];
287
+ }
288
+
289
+ shift = 12 - i*4;
290
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
291
+
292
+ if( current->leafs[chunk] != NULL ){
293
+ free( current->leafs[chunk] );
294
+ current->leafs[chunk] = NULL;
295
+ }
296
+ }
297
+
298
+ void trie16_map_free( trie16_map_t *t ){
299
+ int i;
300
+ for( i = 0 ; i < 16 ; ++i ){
301
+ if( t->leafs[i] != NULL ){
302
+ trie16_map_free( t->leafs[i] );
303
+ free( t->leafs[i] );
304
+ }
305
+ }
306
+ }
307
+
308
+ typedef struct Trie64Set {
309
+ uint8_t key;
310
+ struct Trie64Set *leafs[16];
311
+ } trie64_set_t;
312
+
313
+ void trie64_set_create( trie64_set_t **t, uint8_t key ){
314
+ int i;
315
+
316
+ *t = (trie64_set_t*)malloc( sizeof( trie64_set_t ) );
317
+ (*t)->key = key;
318
+ for( i = 0 ; i < 16 ; ++i ){
319
+ (*t)->leafs[i] = NULL;
320
+ }
321
+ }
322
+
323
+
324
+ void trie64_set_insert( trie64_set_t *t, uint64_t key ){
325
+ uint8_t chunk;
326
+ uint64_t i, shift, mask = 0x0F;
327
+ trie64_set_t *current = t;
328
+ for( i = 0 ; i < 16 ; ++i ){
329
+ shift = 60 - i*4;
330
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
331
+ if( current->leafs[chunk] == NULL ){
332
+ trie64_set_create( &(current->leafs[chunk] ), chunk );
333
+ }
334
+ current = current->leafs[chunk];
335
+ }
336
+ }
337
+
338
+ int trie64_set_contains( trie64_set_t *t, uint64_t key ){
339
+ uint8_t chunk;
340
+ uint64_t i, shift, mask = 0x0F;
341
+ trie64_set_t *current = t;
342
+ for( i = 0 ; i < 16 ; ++i ){
343
+ shift = 60 - i*4;
344
+ chunk = ((key & (mask << shift ) ) >> shift) & mask;
345
+ if( current->leafs[chunk] == NULL ){
346
+ return 0;
347
+ }
348
+ current = current->leafs[chunk];
349
+ }
350
+ return 1;
351
+ }
352
+
353
+ void trie64_set_free( trie64_set_t *t ){
354
+ int i;
355
+ for( i = 0 ; i < 16 ; ++i ){
356
+ if( t->leafs[i] != NULL ){
357
+ trie64_set_free( t->leafs[i] );
358
+ free( t->leafs[i] );
359
+ }
360
+ }
361
+ }
362
+
363
+
364
+
365
+ typedef struct {
366
+ uint64_t id;
367
+ list_t *props;
368
+ trie64_map_t *out, *in;
369
+ } node_t;
370
+
371
+ typedef struct {
372
+ uint64_t id;
373
+ uint64_t source;
374
+ uint64_t target;
375
+ uint16_t type;
376
+ double weight;
377
+ } edge_t;
378
+
379
+ uint64_t edge_create_key( edge_t *e ){
380
+ uint64_t out = 0;
381
+ uint64_t shift = 48;
382
+ out |= ((uint64_t)e->type) << shift;
383
+ out |= e->target;
384
+ return out;
385
+ }
386
+
387
+ typedef struct {
388
+ trie64_map_t *nodes;
389
+ trie64_map_t *edges;
390
+ uint64_t next_node_id, next_edge_id;
391
+ } hawthorn_t;
392
+
393
+ void hawthorn_init( hawthorn_t **h ){
394
+ (*h) = (hawthorn_t *)malloc( sizeof( hawthorn_t ) );
395
+ (*h)->next_node_id = 1;
396
+ (*h)->next_edge_id = 1;
397
+ trie64_map_create( &(*h)->nodes, 0 );
398
+ trie64_map_create( &(*h)->edges, 0 );
399
+ }
400
+
401
+ uint64_t hawthorn_create_node( hawthorn_t *h ){
402
+ uint64_t id = h->next_node_id;
403
+ node_t *node;
404
+ char *buffer;
405
+
406
+ node = (node_t*)malloc( sizeof( node_t ) );
407
+ node->id = id;
408
+ buffer = (char *)malloc( 32 );
409
+ sprintf( buffer, "%lx", (unsigned long int)node->id );
410
+
411
+
412
+ list_create( &(node->props), "_id", (void*)buffer );
413
+ trie64_map_create( &node->out, 0 );
414
+ trie64_map_create( &node->in, 0 );
415
+
416
+ trie64_map_insert( h->nodes, id, node );
417
+
418
+ h->next_node_id += 1;
419
+ return id;
420
+ }
421
+
422
+ node_t* hawthorn_get_node( hawthorn_t *h, uint64_t id){
423
+ return (node_t*)trie64_map_fetch( h->nodes, id );
424
+ }
425
+
426
+ uint64_t hawthorn_connect( hawthorn_t *h, uint64_t source, uint64_t target, uint16_t type, double weight ){
427
+ uint64_t id = h->next_edge_id;
428
+
429
+ node_t *sn, *tn;
430
+ edge_t *edge;
431
+
432
+ sn = trie64_map_fetch( h->nodes, source );
433
+ tn = trie64_map_fetch( h->nodes, target );
434
+
435
+ if( sn == NULL || tn == NULL ){ return 0; }
436
+
437
+ edge = (edge_t*)malloc( sizeof( edge_t ) );
438
+
439
+ edge->id = id;
440
+ edge->source = source;
441
+ edge->target = target;
442
+ edge->type = type;
443
+ edge->weight = weight;
444
+
445
+ trie64_map_insert( sn->out, edge_create_key( edge ), edge );
446
+ trie64_map_insert( tn->in, edge_create_key( edge ), edge );
447
+
448
+
449
+ trie64_map_insert( h->edges, id, edge );
450
+
451
+ h->next_edge_id += 1;
452
+ return id;
453
+ }
454
+
455
+ int hawthorn_disconnect( hawthorn_t *h, uint64_t source, uint64_t target, uint16_t type ){
456
+ node_t *sn, *tn;
457
+ uint64_t edge_key;
458
+ edge_t tmp, *edge;
459
+
460
+ sn = trie64_map_fetch( h->nodes, source );
461
+ tn = trie64_map_fetch( h->nodes, target );
462
+
463
+ if( sn == NULL || tn == NULL ){ return 0; }
464
+
465
+ tmp.target = target;
466
+ tmp.type = type;
467
+
468
+ edge_key = edge_create_key( &tmp );
469
+
470
+ edge = (edge_t*)trie64_map_fetch( sn->out, edge_key );
471
+ /*printf( "disco: edge: %p \n", edge );*/
472
+ if( edge != NULL ){
473
+ /*printf( "edge->id: %lx, edge->source: %lx, edge->target: %lx \n", edge->id, edge->source, edge->target ); */
474
+ trie64_map_remove( h->edges, edge->id );
475
+ trie64_map_remove( sn->out, edge_key );
476
+ trie64_map_remove( tn->in, edge_key );
477
+
478
+ free( edge );
479
+ return 1;
480
+ }
481
+ return 0;
482
+ }
483
+
484
+ list_t* hawthorn_get_outbound( hawthorn_t *db, uint64_t source, uint16_t type ){
485
+ uint64_t mask;
486
+ edge_t tmp;
487
+ list_t *edges, *first;
488
+ node_t *sn;
489
+
490
+ tmp.type = type;
491
+ tmp.target = 0;
492
+ mask = edge_create_key( &tmp );
493
+
494
+ sn = trie64_map_fetch( db->nodes, source );
495
+ if( sn == NULL ){ return NULL; }
496
+
497
+ first = trie64_map_get_all( sn->out, mask );
498
+
499
+ free( first );
500
+
501
+ edges = first->next;
502
+ return edges;
503
+ }
504
+
505
+ list_t* hawthorn_get_inbound( hawthorn_t *db, uint64_t source, uint16_t type ){
506
+ uint64_t mask;
507
+ edge_t tmp;
508
+ list_t *edges, *first;
509
+ node_t *sn;
510
+
511
+ tmp.type = type;
512
+ tmp.target = 0;
513
+ mask = edge_create_key( &tmp );
514
+
515
+ sn = trie64_map_fetch( db->nodes, source );
516
+ if( sn == NULL ){ return NULL; }
517
+
518
+ first = trie64_map_get_all( sn->in, mask );
519
+
520
+ free( first );
521
+
522
+ edges = first->next;
523
+ return edges;
524
+ }
525
+
526
+
527
+ list_t* hawthorn_get_connected( hawthorn_t *db, uint64_t source, uint16_t type ){
528
+ trie64_set_t *visited;
529
+ list_t *stack = NULL, *out;
530
+ char strkey[sizeof( uint64_t ) + 1];
531
+ trie64_set_create( &visited, 0 );
532
+
533
+ memcpy( strkey, &source, sizeof( uint64_t ) );
534
+ list_create( &stack, strkey, (void*)source );
535
+ list_create( &out, strkey, (void*)source );
536
+
537
+ while( stack != NULL ){
538
+ uint64_t node_id;
539
+ void *tmp;
540
+ list_t *nodes, *current;
541
+ tmp = list_pop_back( &stack );
542
+ if( tmp == NULL ){ continue; }
543
+ node_id = (uint64_t)tmp;
544
+ if( trie64_set_contains( visited, node_id ) ){
545
+ continue;
546
+ }
547
+
548
+ trie64_set_insert( visited, node_id );
549
+
550
+ nodes = hawthorn_get_outbound( db, node_id, type );
551
+ if( nodes == NULL ){ continue; }
552
+ current = nodes;
553
+ while( current != NULL ){
554
+ edge_t *edge = (edge_t*)current->value;
555
+ if( !trie64_set_contains( visited, edge->target ) ){
556
+ memcpy( strkey, &(edge->target), sizeof( uint64_t ) );
557
+
558
+ if( stack == NULL) {
559
+ list_create( &stack, strkey, (void*)edge->target );
560
+ }
561
+ else {
562
+ list_insert( stack, strkey, (void*)edge->target );
563
+ }
564
+
565
+ list_insert( out, strkey, (void*)edge->target );
566
+ }
567
+ current = current->next;
568
+ }
569
+
570
+ list_free( nodes );
571
+ free( nodes );
572
+ }
573
+ trie64_set_free( visited );
574
+ return out;
575
+ }
576
+
577
+ int hawthorn_are_connected( hawthorn_t *db, uint64_t source, uint64_t target, uint16_t type ){
578
+ trie64_set_t *visited;
579
+ list_t *stack = NULL;
580
+ char strkey[sizeof( uint64_t ) + 1];
581
+ trie64_set_create( &visited, 0 );
582
+
583
+ memcpy( strkey, &source, sizeof( uint64_t ) );
584
+ list_create( &stack, strkey, (void*)source );
585
+
586
+ while( stack != NULL ){
587
+ uint64_t node_id;
588
+ void *tmp;
589
+ list_t *nodes, *current;
590
+ tmp = list_pop_back( &stack );
591
+ if( tmp == NULL ){ continue; }
592
+ node_id = (uint64_t)tmp;
593
+
594
+ if( trie64_set_contains( visited, node_id ) ){
595
+ continue;
596
+ }
597
+
598
+ trie64_set_insert( visited, node_id );
599
+
600
+ nodes = hawthorn_get_outbound( db, node_id, type );
601
+ if( nodes == NULL ){ continue; }
602
+ current = nodes;
603
+ while( current != NULL ){
604
+ edge_t *edge = (edge_t*)current->value;
605
+
606
+ if( edge->target == target ){
607
+ if( stack != NULL ){
608
+ list_free( stack );
609
+ free( stack );
610
+ }
611
+ list_free( nodes );
612
+ free( nodes );
613
+ trie64_set_free( visited );
614
+ return 1;
615
+ }
616
+
617
+ if( !trie64_set_contains( visited, edge->target ) ){
618
+ memcpy( strkey, &(edge->target), sizeof( uint64_t ) );
619
+
620
+ if( stack == NULL) {
621
+ list_create( &stack, strkey, (void*)edge->target );
622
+ }
623
+ else {
624
+ list_insert( stack, strkey, (void*)edge->target );
625
+ }
626
+
627
+ }
628
+ current = current->next;
629
+ }
630
+
631
+ list_free( nodes );
632
+ free( nodes );
633
+ }
634
+ trie64_set_free( visited );
635
+ return 0;
636
+ }
637
+
638
+
639
+ hawthorn_t *databases[16];
640
+
641
+ static VALUE ht_Init( VALUE self, VALUE dbid ){
642
+ int db_id = FIX2INT( dbid );
643
+
644
+ if( databases[db_id] == NULL ){
645
+ hawthorn_init( &databases[db_id] );
646
+ }
647
+
648
+ rb_iv_set( self, "@dbid", dbid );
649
+ return self;
650
+ }
651
+
652
+
653
+
654
+ static VALUE ht_create_node( VALUE self ){
655
+ VALUE dbid = rb_iv_get( self, "@dbid" );
656
+ VALUE out;
657
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
658
+ uint64_t node_id = hawthorn_create_node( db );
659
+ out = INT2NUM( node_id );
660
+
661
+
662
+ return out;
663
+ }
664
+
665
+ static VALUE ht_connect_nodes( VALUE self, VALUE Rsource, VALUE Rtarget, VALUE Rtype, VALUE Rweight ){
666
+ VALUE dbid = rb_iv_get( self, "@dbid" );
667
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
668
+ uint64_t source, target;
669
+ uint16_t type;
670
+ double weight;
671
+ uint64_t retval;
672
+
673
+ source = NUM2INT( Rsource );
674
+ target = NUM2INT( Rtarget );
675
+ type = FIX2INT( Rtype );
676
+ weight = NUM2DBL( Rweight );
677
+
678
+ retval = hawthorn_connect( db, source, target, type, weight );
679
+
680
+ if( retval == 0 ){ return Qfalse; }
681
+
682
+ return Qtrue;
683
+ }
684
+
685
+ static VALUE ht_disconnect_nodes( VALUE self, VALUE Rsource, VALUE Rtarget, VALUE Rtype){
686
+ VALUE dbid = rb_iv_get( self, "@dbid" );
687
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
688
+ uint64_t source, target;
689
+ uint16_t type;
690
+ int retval;
691
+
692
+ source = NUM2INT( Rsource );
693
+ target = NUM2INT( Rtarget );
694
+ type = FIX2INT( Rtype );
695
+
696
+ retval = hawthorn_disconnect( db, source, target, type );
697
+
698
+ if( retval ){ return Qtrue; }
699
+
700
+ return Qfalse;
701
+ }
702
+
703
+ static VALUE ht_are_connected( VALUE self, VALUE Rsource, VALUE Rtarget, VALUE Rtype ){
704
+ VALUE dbid = rb_iv_get( self, "@dbid" );
705
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
706
+ uint64_t source, target;
707
+ uint16_t type;
708
+ int retval;
709
+ source = NUM2INT( Rsource );
710
+ target = NUM2INT( Rtarget );
711
+ type = FIX2INT( Rtype );
712
+
713
+ retval = hawthorn_are_connected( db, source, target, type );
714
+
715
+
716
+ if( retval ){ return Qtrue; }
717
+
718
+ return Qfalse;
719
+ }
720
+
721
+ static VALUE ht_get_outbound( VALUE self, VALUE Rsource, VALUE Rtype ){
722
+ VALUE dbid = rb_iv_get( self, "@dbid" );
723
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
724
+ list_t *edges, *current;
725
+ uint64_t source;
726
+ uint16_t type;
727
+ VALUE out = rb_ary_new();
728
+
729
+ source = NUM2INT( Rsource );
730
+ type = FIX2INT( Rtype );
731
+ edges = hawthorn_get_outbound( db, source, type );
732
+ current = edges;
733
+ while( current != NULL ){
734
+ VALUE entry = rb_ary_new();
735
+ VALUE e_source, e_target, e_type, e_weight;
736
+ edge_t *edge = (edge_t*)(current->value);
737
+
738
+ e_source = INT2NUM( edge->source );
739
+ e_target = INT2NUM( edge->target );
740
+ e_type = INT2FIX( edge->type );
741
+ e_weight = rb_float_new( edge->weight );
742
+
743
+ rb_ary_push( entry, e_source );
744
+ rb_ary_push( entry, e_target );
745
+ rb_ary_push( entry, e_type );
746
+ rb_ary_push( entry, e_weight );
747
+
748
+ rb_ary_push( out, entry );
749
+
750
+ current = current->next;
751
+ }
752
+
753
+ list_free( edges );
754
+ free( edges );
755
+ return out;
756
+ }
757
+
758
+ static VALUE ht_get_inbound( VALUE self, VALUE Rsource, VALUE Rtype ){
759
+ VALUE dbid = rb_iv_get( self, "@dbid" );
760
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
761
+ list_t *edges, *current;
762
+ uint64_t source;
763
+ uint16_t type;
764
+ VALUE out = rb_ary_new();
765
+
766
+ source = NUM2INT( Rsource );
767
+ type = FIX2INT( Rtype );
768
+ edges = hawthorn_get_inbound( db, source, type );
769
+ current = edges;
770
+ while( current != NULL ){
771
+ VALUE entry = rb_ary_new();
772
+ VALUE e_source, e_target, e_type, e_weight;
773
+ edge_t *edge = (edge_t*)(current->value);
774
+
775
+ e_source = INT2NUM( edge->source );
776
+ e_target = INT2NUM( edge->target );
777
+ e_type = INT2FIX( edge->type );
778
+ e_weight = rb_float_new( edge->weight );
779
+
780
+ rb_ary_push( entry, e_source );
781
+ rb_ary_push( entry, e_target );
782
+ rb_ary_push( entry, e_type );
783
+ rb_ary_push( entry, e_weight );
784
+
785
+ rb_ary_push( out, entry );
786
+
787
+ current = current->next;
788
+ }
789
+
790
+ list_free( edges );
791
+ free( edges );
792
+ return out;
793
+ }
794
+
795
+
796
+ static VALUE ht_get_connected( VALUE self, VALUE Rsource, VALUE Rtype ){
797
+ VALUE dbid = rb_iv_get( self, "@dbid" );
798
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
799
+ list_t *nodes, *current;
800
+ uint64_t source;
801
+ uint16_t type;
802
+ VALUE out = rb_ary_new();
803
+
804
+ source = NUM2INT( Rsource );
805
+ type = FIX2INT( Rtype );
806
+ nodes = hawthorn_get_connected( db, source, type );
807
+ current = nodes;
808
+ while( current != NULL ){
809
+ VALUE node_id;
810
+
811
+ node_id = INT2NUM( (uint64_t)(current->value) );
812
+
813
+ rb_ary_push( out, node_id );
814
+
815
+ current = current->next;
816
+ }
817
+
818
+ list_free( nodes );
819
+ free( nodes );
820
+ return out;
821
+ }
822
+
823
+
824
+ static VALUE ht_get_properties( VALUE self, VALUE Rsource ){
825
+ VALUE dbid = rb_iv_get( self, "@dbid" );
826
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
827
+ uint64_t node_id = NUM2INT( Rsource );
828
+ VALUE out;
829
+ list_t *current;
830
+ node_t *node;
831
+ node = hawthorn_get_node( db, node_id );
832
+ if( node == NULL ){ return Qnil; }
833
+
834
+ out = rb_hash_new();
835
+
836
+ current = node->props;
837
+ while( current != NULL ){
838
+ VALUE key, value;
839
+ key = rb_str_new2( current->key );
840
+ value = rb_str_new2( (char *)current->value );
841
+ rb_hash_aset( out, key, value );
842
+ current = current->next;
843
+ }
844
+
845
+ return out;
846
+ }
847
+
848
+ static VALUE ht_set_property( VALUE self, VALUE Rsource, VALUE Rkey, VALUE Rvalue ){
849
+ VALUE dbid = rb_iv_get( self, "@dbid" );
850
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
851
+ uint64_t node_id = NUM2INT( Rsource );
852
+ node_t *node;
853
+ char *value;
854
+ node = hawthorn_get_node( db, node_id );
855
+ if( node == NULL ){ return Qfalse; }
856
+
857
+ value = (char *)malloc( RSTRING_LEN(Rvalue) + 1 );
858
+
859
+ memset( value, 0, RSTRING_LEN(Rvalue) + 1 );
860
+
861
+ memcpy( value, RSTRING_PTR(Rvalue), RSTRING_LEN(Rvalue) );
862
+
863
+ list_insert( node->props, RSTRING_PTR( Rkey ), value );
864
+
865
+
866
+ return Qtrue;
867
+ }
868
+
869
+
870
+ static VALUE ht_find( VALUE self, VALUE Rkey, VALUE Rvalue ){
871
+ VALUE dbid = rb_iv_get( self, "@dbid" );
872
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
873
+ VALUE out;
874
+
875
+ list_t *nodes, *current;
876
+ nodes = trie64_map_get_all( db->nodes, 0 );
877
+ current = nodes;
878
+ out = rb_ary_new();
879
+
880
+ while( current != NULL ){
881
+ node_t *node = (node_t *)(current->value);
882
+ void* value;
883
+ if( node == NULL ){
884
+ current = current->next;
885
+ continue;
886
+ }
887
+ value = list_fetch( node->props, RSTRING_PTR( Rkey) );
888
+
889
+ if( value != NULL ){
890
+ uint64_t node_id;
891
+ memcpy( &node_id, current->key, sizeof( uint64_t ) );
892
+ if( !strcmp( (char *)value, RSTRING_PTR( Rvalue ) ) ){
893
+ rb_ary_push( out, INT2NUM( node_id ) );
894
+ }
895
+
896
+ }
897
+ current = current->next;
898
+ }
899
+
900
+ list_free( nodes );
901
+ free( nodes );
902
+
903
+ return out;
904
+ }
905
+
906
+
907
+ static VALUE ht_all_nodes( VALUE self ){
908
+ VALUE dbid = rb_iv_get( self, "@dbid" );
909
+ hawthorn_t *db = databases[ FIX2INT( dbid ) ];
910
+ VALUE out;
911
+
912
+ list_t *nodes, *current;
913
+ nodes = trie64_map_get_all( db->nodes, 0 );
914
+ current = nodes;
915
+ out = rb_ary_new();
916
+
917
+ while( current != NULL ){
918
+ node_t *node = (node_t *)(current->value);
919
+ void* value;
920
+ if( node == NULL ){
921
+ current = current->next;
922
+ continue;
923
+ }
924
+
925
+ rb_ary_push( out, INT2NUM( node->id ) );
926
+
927
+ current = current->next;
928
+ }
929
+
930
+ list_free( nodes );
931
+ free( nodes );
932
+
933
+ return out;
934
+ }
935
+
936
+
937
+ void Init_hawthorn(void){
938
+ VALUE klass = rb_define_class( "HawthornDB", rb_cObject );
939
+ int arg_count = 1;
940
+
941
+ rb_define_method( klass, "initialize", ht_Init, arg_count );
942
+
943
+ arg_count = 0;
944
+ rb_define_method( klass, "create_node", ht_create_node, arg_count );
945
+
946
+ arg_count = 4;
947
+ rb_define_method( klass, "connect", ht_connect_nodes, arg_count );
948
+
949
+ arg_count = 3;
950
+ rb_define_method( klass, "disconnected", ht_disconnect_nodes, arg_count );
951
+
952
+ arg_count = 3;
953
+ rb_define_method( klass, "are_connected", ht_are_connected, arg_count );
954
+
955
+ arg_count = 2;
956
+ rb_define_method( klass, "get_outbound", ht_get_outbound, arg_count );
957
+
958
+ arg_count = 2;
959
+ rb_define_method( klass, "get_inbound", ht_get_inbound, arg_count );
960
+
961
+ arg_count = 2;
962
+ rb_define_method( klass, "get_connected", ht_get_connected, arg_count );
963
+
964
+ arg_count = 1;
965
+ rb_define_method( klass, "get_properties", ht_get_properties, arg_count );
966
+
967
+ arg_count = 3;
968
+ rb_define_method( klass, "set_property", ht_set_property, arg_count );
969
+
970
+ arg_count = 2;
971
+ rb_define_method( klass, "find", ht_find, arg_count );
972
+
973
+ arg_count = 0;
974
+ rb_define_method( klass, "all_nodes", ht_all_nodes, arg_count );
975
+ }
data/lib/hawthorn.rb ADDED
@@ -0,0 +1,282 @@
1
+ require 'hawthorn/hawthorn'
2
+
3
+ require 'Set'
4
+
5
+ module Hawthorn
6
+
7
+ class Edge
8
+ attr_reader :source, :target, :type, :weight
9
+ def initialize( source, target, type, weight )
10
+ @source = source
11
+ @target = target
12
+ @type = type
13
+ @weight = weight
14
+ end
15
+ end
16
+
17
+ class Node
18
+ attr_reader :id, :props
19
+
20
+ def initialize( id, props, db )
21
+ @id = id
22
+ @props = props
23
+ @db = db
24
+ end
25
+
26
+ def set( key, value )
27
+ @props[ key.to_s ] = value.to_s
28
+ @db._set_property( @id, key.to_s, value.to_s )
29
+ end
30
+
31
+ def is_connected?( other, type )
32
+ @db.are_connected?( self, other, type )
33
+ end
34
+
35
+ def outbound( type )
36
+ @db.get_outbound_edges( self, type.to_s )
37
+ end
38
+
39
+ def inbound( type )
40
+ @db.get_inbound_edges( self, type.to_s )
41
+ end
42
+ end
43
+
44
+
45
+ class Query
46
+ attr_reader :results
47
+
48
+ def initialize( db )
49
+ @db = db
50
+ @results = []
51
+ end
52
+
53
+ def start( *args )
54
+ if args.size == 2
55
+ @results = @db.find( args[0], args[1] )
56
+
57
+ elsif args.size == 1
58
+ @results = [args[0]]
59
+ end
60
+
61
+ self
62
+ end
63
+
64
+ def out( type )
65
+ next_results = []
66
+ visited = Set.new
67
+ @results.each do |node|
68
+ edges = node.outbound( type )
69
+ edges.each do |edge|
70
+ if not visited.include?( edge.target.id )
71
+ visited.add( edge.target.id )
72
+ next_results.push edge.target
73
+ end
74
+ end
75
+ end
76
+ @results = next_results
77
+
78
+ self
79
+ end
80
+
81
+ def in( type )
82
+ next_results = []
83
+ visited = Set.new
84
+ @results.each do |node|
85
+ edges = node.outbound( type )
86
+ edges.each do |edge|
87
+ if not visited.include?( edge.source.id )
88
+ visited.add( edge.source.id )
89
+ next_results.push edge.source
90
+ end
91
+ end
92
+ end
93
+ @results = next_results
94
+
95
+ self
96
+ end
97
+
98
+ def filter( &block )
99
+ next_results = []
100
+ @results.each do |node|
101
+ next_results.push node if block.call( node.props )
102
+ end
103
+
104
+ @results = next_results
105
+
106
+ self
107
+ end
108
+
109
+ def crawl( types )
110
+ next_results = []
111
+ visited = Set.new
112
+ frontier = @results
113
+ done = false
114
+ while not done
115
+ next_frontier = []
116
+ types.each do |type|
117
+ frontier.each do |node|
118
+ conns = @db.get_connected( node, type.to_s )
119
+ conns.each do |conn|
120
+ if not visited.include?( conn.id )
121
+ visited.add( conn.id )
122
+ next_frontier.push conn
123
+ next_results.push conn
124
+ end
125
+ end
126
+ end
127
+ end
128
+ if next_frontier.size < 1
129
+ done = true
130
+ else
131
+ frontier = next_frontier
132
+ end
133
+ end
134
+ @results = next_results
135
+
136
+ self
137
+ end
138
+
139
+ def contains?( target )
140
+ @results.each do |node|
141
+ if target.id == node.id
142
+ return true
143
+ end
144
+ end
145
+ false
146
+ end
147
+ end
148
+
149
+ class Database
150
+
151
+ def initialize
152
+ @db = HawthornDB.new( 0 )
153
+ @edge_types = {}
154
+ @edge_types_invert = {}
155
+ @edge_type_cnt = 1
156
+ end
157
+
158
+ def create_node( params = {} )
159
+ node_id = @db.create_node
160
+ props = @db.get_properties( node_id )
161
+
162
+ node = self.get_node( node_id )
163
+
164
+ params.each do |key, value|
165
+ node.set( key.to_s, value.to_s )
166
+ end
167
+
168
+ node
169
+ end
170
+
171
+ def _set_property( node_id, key, value )
172
+ @db.set_property( node_id, key, value )
173
+ end
174
+
175
+ def get_node( node_id )
176
+ props = @db.get_properties( node_id )
177
+
178
+ Node.new( node_id, props, self )
179
+ end
180
+
181
+ def all_nodes
182
+ node_ids = @db.all_nodes
183
+ nodes = []
184
+ node_ids.each do |node_id|
185
+ nodes.push self.get_node( node_id )
186
+ end
187
+
188
+ nodes
189
+ end
190
+
191
+ def connect( source, target, type, weight = 1.0 )
192
+ if not @edge_types.has_key?( type )
193
+ @edge_types[type] = @edge_type_cnt
194
+ @edge_types_invert[@edge_type_cnt] = type
195
+ @edge_type_cnt += 1
196
+ end
197
+ type_id = @edge_types[type]
198
+
199
+ @db.connect( source.id, target.id, type_id, weight )
200
+
201
+ end
202
+
203
+ def disconnect( source, target, type )
204
+ if not @edge_types.has_key?( type )
205
+ raise "Unknown edge type"
206
+ end
207
+
208
+ type_id = @edge_types[type]
209
+
210
+ @db.disconnect( source.id, target.id, type_id )
211
+
212
+ end
213
+
214
+ def find( key, value )
215
+ node_ids = @db.find( key.to_s, value.to_s )
216
+ out = []
217
+ node_ids.each do |node_id|
218
+ out.push self.get_node( node_id )
219
+ end
220
+
221
+ out
222
+ end
223
+
224
+ def are_connected?( source, target, type )
225
+ if not @edge_types.has_key?( type )
226
+ raise "Unknown edge type"
227
+ end
228
+
229
+ type_id = @edge_types[type]
230
+
231
+ @db.are_connected( source.id, target.id, type_id )
232
+ end
233
+
234
+ def get_connected( source, type )
235
+ if not @edge_types.has_key?( type )
236
+ raise "Unknown edge type"
237
+ end
238
+
239
+ type_id = @edge_types[type]
240
+
241
+ node_ids = @db.get_connected( source.id, type_id )
242
+ out = []
243
+
244
+ node_ids.each do |node_id|
245
+ out.push self.get_node( node_id )
246
+ end
247
+ out
248
+ end
249
+
250
+ def get_outbound_edges( source, type )
251
+ if not @edge_types.has_key?( type )
252
+ raise "Unknown edge type"
253
+ end
254
+
255
+ type_id = @edge_types[type]
256
+ edges = @db.get_outbound( source.id, type_id )
257
+ out = []
258
+ edges.each do |edge|
259
+ out.push Edge.new( self.get_node( edge[0] ), self.get_node( edge[1] ), @edge_types_invert[edge[2]], edge[3] )
260
+ end
261
+ out
262
+ end
263
+
264
+ def get_inbound_edges( source, type )
265
+ if not @edge_types.has_key?( type )
266
+ raise "Unknown edge type"
267
+ end
268
+
269
+ type_id = @edge_types[type]
270
+ edges = @db.get_inbound( source.id, type_id )
271
+ out = []
272
+ edges.each do |edge|
273
+ out.push Edge.new( self.get_node( edge[0] ), self.get_node( edge[1] ), @edge_types_invert[edge[2]], edge[3] )
274
+ end
275
+ out
276
+ end
277
+
278
+ def query
279
+ Query.new( self )
280
+ end
281
+ end
282
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hawthorn
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Markus Gronholm
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-09-26 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Hawthorn Graph Database
15
+ email: markus@alshain.fi
16
+ executables: []
17
+ extensions:
18
+ - ext/hawthorn/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/hawthorn.rb
22
+ - ext/hawthorn/hawthorn.c
23
+ - ext/hawthorn/extconf.rb
24
+ homepage:
25
+ licenses: []
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 1.8.24
45
+ signing_key:
46
+ specification_version: 3
47
+ summary: Hawthorn
48
+ test_files: []