bio-bigwig 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,803 @@
1
+ #include "bigWig.h"
2
+ #include "bwCommon.h"
3
+ #include <stdlib.h>
4
+ #include <math.h>
5
+ #include <string.h>
6
+ #include <zlib.h>
7
+ #include <errno.h>
8
+
9
+ static uint32_t roundup(uint32_t v) {
10
+ v--;
11
+ v |= v >> 1;
12
+ v |= v >> 2;
13
+ v |= v >> 4;
14
+ v |= v >> 8;
15
+ v |= v >> 16;
16
+ v++;
17
+ return v;
18
+ }
19
+
20
+ //Returns the root node on success and NULL on error
21
+ static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
22
+ uint32_t magic;
23
+ bwRTree_t *node;
24
+
25
+ if(!offset) {
26
+ if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
27
+ } else {
28
+ if(bwSetPos(fp, offset)) return NULL;
29
+ }
30
+
31
+ if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
32
+ if(magic != IDX_MAGIC) {
33
+ fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
34
+ return NULL;
35
+ }
36
+
37
+ node = calloc(1, sizeof(bwRTree_t));
38
+ if(!node) return NULL;
39
+
40
+ if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
41
+ if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
42
+ if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43
+ if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
44
+ if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45
+ if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
46
+ if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
47
+ if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
48
+ //Padding
49
+ if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
50
+ node->rootOffset = bwTell(fp);
51
+
52
+ //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
53
+ errno = 0;
54
+
55
+ return node;
56
+
57
+ error:
58
+ free(node);
59
+ return NULL;
60
+ }
61
+
62
+ //Returns a bwRTreeNode_t on success and NULL on an error
63
+ //For the root node, set offset to 0
64
+ static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
65
+ bwRTreeNode_t *node = NULL;
66
+ uint8_t padding;
67
+ uint16_t i;
68
+ if(offset) {
69
+ if(bwSetPos(fp, offset)) return NULL;
70
+ } else {
71
+ //seek
72
+ if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
73
+ }
74
+
75
+ node = calloc(1, sizeof(bwRTreeNode_t));
76
+ if(!node) return NULL;
77
+
78
+ if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
79
+ if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
80
+ if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
81
+
82
+ node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
83
+ if(!node->chrIdxStart) goto error;
84
+ node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
85
+ if(!node->baseStart) goto error;
86
+ node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
87
+ if(!node->chrIdxEnd) goto error;
88
+ node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
89
+ if(!node->baseEnd) goto error;
90
+ node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
91
+ if(!node->dataOffset) goto error;
92
+ if(node->isLeaf) {
93
+ node->x.size = malloc(node->nChildren * sizeof(uint64_t));
94
+ if(!node->x.size) goto error;
95
+ } else {
96
+ node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
97
+ if(!node->x.child) goto error;
98
+ }
99
+ for(i=0; i<node->nChildren; i++) {
100
+ if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101
+ if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102
+ if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103
+ if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
104
+ if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
105
+ if(node->isLeaf) {
106
+ if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
107
+ }
108
+ }
109
+
110
+ return node;
111
+
112
+ error:
113
+ if(node->chrIdxStart) free(node->chrIdxStart);
114
+ if(node->baseStart) free(node->baseStart);
115
+ if(node->chrIdxEnd) free(node->chrIdxEnd);
116
+ if(node->baseEnd) free(node->baseEnd);
117
+ if(node->dataOffset) free(node->dataOffset);
118
+ if(node->isLeaf && node->x.size) free(node->x.size);
119
+ else if((!node->isLeaf) && node->x.child) free(node->x.child);
120
+ free(node);
121
+ return NULL;
122
+ }
123
+
124
+ void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
125
+ if(!b) return;
126
+ if(b->size) free(b->size);
127
+ if(b->offset) free(b->offset);
128
+ free(b);
129
+ }
130
+
131
+ //Returns a bwOverlapBlock_t * object or NULL on error.
132
+ static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
133
+ uint16_t i, idx = 0;
134
+ bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
135
+ if(!o) return NULL;
136
+
137
+ for(i=0; i<node->nChildren; i++) {
138
+ if(tid < node->chrIdxStart[i]) break;
139
+ if(tid > node->chrIdxEnd[i]) continue;
140
+
141
+ /*
142
+ The individual blocks can theoretically span multiple contigs.
143
+ So if we treat the first/last contig in the range as special
144
+ but anything in the middle is a guaranteed match
145
+ */
146
+ if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
147
+ if(tid == node->chrIdxStart[i]) {
148
+ if(node->baseStart[i] >= end) break;
149
+ } else if(tid == node->chrIdxEnd[i]) {
150
+ if(node->baseEnd[i] <= start) continue;
151
+ }
152
+ } else {
153
+ if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
154
+ }
155
+ o->n++;
156
+ }
157
+
158
+ if(o->n) {
159
+ o->offset = malloc(sizeof(uint64_t) * (o->n));
160
+ if(!o->offset) goto error;
161
+ o->size = malloc(sizeof(uint64_t) * (o->n));
162
+ if(!o->size) goto error;
163
+
164
+ for(i=0; i<node->nChildren; i++) {
165
+ if(tid < node->chrIdxStart[i]) break;
166
+ if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
167
+ if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
168
+ if(tid == node->chrIdxStart[i]) {
169
+ if(node->baseStart[i] >= end) continue;
170
+ } else if(tid == node->chrIdxEnd[i]) {
171
+ if(node->baseEnd[i] <= start) continue;
172
+ }
173
+ } else {
174
+ if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
175
+ }
176
+ o->offset[idx] = node->dataOffset[i];
177
+ o->size[idx++] = node->x.size[i];
178
+ if(idx >= o->n) break;
179
+ }
180
+ }
181
+
182
+ if(idx != o->n) { //This should never happen
183
+ fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
184
+ goto error;
185
+ }
186
+
187
+ return o;
188
+
189
+ error:
190
+ if(o) destroyBWOverlapBlock(o);
191
+ return NULL;
192
+ }
193
+
194
+ //This will free l2 unless there's an error!
195
+ //Returns NULL on error, otherwise the merged lists
196
+ static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
197
+ uint64_t i,j;
198
+ if(!b2) return b1;
199
+ if(!b2->n) {
200
+ destroyBWOverlapBlock(b2);
201
+ return b1;
202
+ }
203
+ if(!b1->n) {
204
+ destroyBWOverlapBlock(b1);
205
+ return b2;
206
+ }
207
+ j = b1->n;
208
+ b1->n += b2->n;
209
+ b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
210
+ if(!b1->offset) goto error;
211
+ b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
212
+ if(!b1->size) goto error;
213
+
214
+ for(i=0; i<b2->n; i++) {
215
+ b1->offset[j+i] = b2->offset[i];
216
+ b1->size[j+i] = b2->size[i];
217
+ }
218
+ destroyBWOverlapBlock(b2);
219
+ return b1;
220
+
221
+ error:
222
+ destroyBWOverlapBlock(b1);
223
+ return NULL;
224
+ }
225
+
226
+ //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
227
+ //The output needs to be free()d if not NULL (likewise with *sizes)
228
+ static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
229
+ uint16_t i;
230
+ bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
231
+ if(!output) return NULL;
232
+
233
+ for(i=0; i<node->nChildren; i++) {
234
+ if(tid < node->chrIdxStart[i]) break;
235
+ if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
236
+ if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
237
+ if(tid == node->chrIdxStart[i]) {
238
+ if(node->baseStart[i] >= end) continue;
239
+ } else if(tid == node->chrIdxEnd[i]) {
240
+ if(node->baseEnd[i] <= start) continue;
241
+ }
242
+ } else {
243
+ if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
244
+ }
245
+
246
+ //We have an overlap!
247
+ if(!node->x.child[i])
248
+ node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
249
+ if(!node->x.child[i]) goto error;
250
+
251
+ if(node->x.child[i]->isLeaf) { //leaf
252
+ nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
253
+ } else { //non-leaf
254
+ nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
255
+ }
256
+
257
+ //The output is processed the same regardless of leaf/non-leaf
258
+ if(!nodeBlocks) goto error;
259
+ else {
260
+ output = mergeOverlapBlocks(output, nodeBlocks);
261
+ if(!output) {
262
+ destroyBWOverlapBlock(nodeBlocks);
263
+ goto error;
264
+ }
265
+ }
266
+ }
267
+
268
+ return output;
269
+
270
+ error:
271
+ destroyBWOverlapBlock(output);
272
+ return NULL;
273
+ }
274
+
275
+ //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
276
+ //The output must be free()d
277
+ bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
278
+ if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
279
+ return overlapsNonLeaf(bw, root, tid, start, end);
280
+ }
281
+
282
+ //In reality, a hash or some sort of tree structure is probably faster...
283
+ //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
284
+ uint32_t bwGetTid(const bigWigFile_t *fp, const char *chrom) {
285
+ uint32_t i;
286
+ if(!chrom) return -1;
287
+ for(i=0; i<fp->cl->nKeys; i++) {
288
+ if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
289
+ }
290
+ return -1;
291
+ }
292
+
293
+ static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end) {
294
+ uint32_t tid = bwGetTid(fp, chrom);
295
+
296
+ if(tid == (uint32_t) -1) {
297
+ fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
298
+ return NULL;
299
+ }
300
+
301
+ //Get the info if needed
302
+ if(!fp->idx) {
303
+ fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
304
+ if(!fp->idx) {
305
+ return NULL;
306
+ }
307
+ }
308
+
309
+ if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
310
+ if(!fp->idx->root) return NULL;
311
+
312
+ return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
313
+ }
314
+
315
+ void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
316
+ hdr->tid = ((uint32_t*)b)[0];
317
+ hdr->start = ((uint32_t*)b)[1];
318
+ hdr->end = ((uint32_t*)b)[2];
319
+ hdr->step = ((uint32_t*)b)[3];
320
+ hdr->span = ((uint32_t*)b)[4];
321
+ hdr->type = ((uint8_t*)b)[20];
322
+ hdr->nItems = ((uint16_t*)b)[11];
323
+ }
324
+
325
+ void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
326
+ if(!o) return;
327
+ if(o->start) free(o->start);
328
+ if(o->end) free(o->end);
329
+ if(o->value) free(o->value);
330
+ free(o);
331
+ }
332
+
333
+ void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
334
+ uint32_t i;
335
+ if(!o) return;
336
+ if(o->start) free(o->start);
337
+ if(o->end) free(o->end);
338
+ if(o->str) {
339
+ for(i=0; i<o->l; i++) {
340
+ if(o->str[i]) free(o->str[i]);
341
+ }
342
+ free(o->str);
343
+ }
344
+ free(o);
345
+ }
346
+
347
+ //Returns NULL on error, in which case o has been free()d
348
+ static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
349
+ if(o->l+1 >= o->m) {
350
+ o->m = roundup(o->l+1);
351
+ o->start = realloc(o->start, o->m * sizeof(uint32_t));
352
+ if(!o->start) goto error;
353
+ o->end = realloc(o->end, o->m * sizeof(uint32_t));
354
+ if(!o->end) goto error;
355
+ o->value = realloc(o->value, o->m * sizeof(float));
356
+ if(!o->value) goto error;
357
+ }
358
+ o->start[o->l] = start;
359
+ o->end[o->l] = end;
360
+ o->value[o->l++] = value;
361
+ return o;
362
+
363
+ error:
364
+ bwDestroyOverlappingIntervals(o);
365
+ return NULL;
366
+ }
367
+
368
+ static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
369
+ if(o->l+1 >= o->m) {
370
+ o->m = roundup(o->l+1);
371
+ o->start = realloc(o->start, o->m * sizeof(uint32_t));
372
+ if(!o->start) goto error;
373
+ o->end = realloc(o->end, o->m * sizeof(uint32_t));
374
+ if(!o->end) goto error;
375
+ if(withString) {
376
+ o->str = realloc(o->str, o->m * sizeof(char**));
377
+ if(!o->str) goto error;
378
+ }
379
+ }
380
+ o->start[o->l] = start;
381
+ o->end[o->l] = end;
382
+ if(withString) o->str[o->l] = bwStrdup(str);
383
+ o->l++;
384
+ return o;
385
+
386
+ error:
387
+ bbDestroyOverlappingEntries(o);
388
+ return NULL;
389
+ }
390
+
391
+ //Returns NULL on error
392
+ bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
393
+ uint64_t i;
394
+ uint16_t j;
395
+ int compressed = 0, rv;
396
+ uLongf sz = fp->hdr->bufSize, tmp;
397
+ void *buf = NULL, *compBuf = NULL;
398
+ uint32_t start = 0, end , *p;
399
+ float value;
400
+ bwDataHeader_t hdr;
401
+ bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
402
+
403
+ if(!output) goto error;
404
+
405
+ if(!o) return output;
406
+ if(!o->n) return output;
407
+
408
+ if(sz) {
409
+ compressed = 1;
410
+ buf = malloc(sz);
411
+ }
412
+ sz = 0; //This is now the size of the compressed buffer
413
+
414
+ for(i=0; i<o->n; i++) {
415
+ if(bwSetPos(fp, o->offset[i])) goto error;
416
+
417
+ if(sz < o->size[i]) {
418
+ compBuf = realloc(compBuf, o->size[i]);
419
+ sz = o->size[i];
420
+ }
421
+ if(!compBuf) goto error;
422
+
423
+ if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
424
+ if(compressed) {
425
+ tmp = fp->hdr->bufSize; //This gets over-written by uncompress
426
+ rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
427
+ if(rv != Z_OK) goto error;
428
+ } else {
429
+ buf = compBuf;
430
+ }
431
+
432
+ //TODO: ensure that tmp is large enough!
433
+ bwFillDataHdr(&hdr, buf);
434
+
435
+ p = ((uint32_t*) buf);
436
+ p += 6;
437
+ if(hdr.tid != tid) continue;
438
+
439
+ if(hdr.type == 3) start = hdr.start - hdr.step;
440
+
441
+ //FIXME: We should ensure that sz is large enough to hold nItems of the given type
442
+ for(j=0; j<hdr.nItems; j++) {
443
+ switch(hdr.type) {
444
+ case 1:
445
+ start = *p;
446
+ p++;
447
+ end = *p;
448
+ p++;
449
+ value = *((float *)p);
450
+ p++;
451
+ break;
452
+ case 2:
453
+ start = *p;
454
+ p++;
455
+ end = start + hdr.span;
456
+ value = *((float *)p);
457
+ p++;
458
+ break;
459
+ case 3:
460
+ start += hdr.step;
461
+ end = start+hdr.span;
462
+ value = *((float *)p);
463
+ p++;
464
+ break;
465
+ default :
466
+ goto error;
467
+ break;
468
+ }
469
+
470
+ if(end <= ostart || start >= oend) continue;
471
+ //Push the overlap
472
+ if(!pushIntervals(output, start, end, value)) goto error;
473
+ }
474
+ }
475
+
476
+ if(compressed && buf) free(buf);
477
+ if(compBuf) free(compBuf);
478
+ return output;
479
+
480
+ error:
481
+ fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
482
+ if(output) bwDestroyOverlappingIntervals(output);
483
+ if(compressed && buf) free(buf);
484
+ if(compBuf) free(compBuf);
485
+ return NULL;
486
+ }
487
+
488
+ bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
489
+ uint64_t i;
490
+ int compressed = 0, rv, slen;
491
+ uLongf sz = fp->hdr->bufSize, tmp = 0;
492
+ void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
493
+ uint32_t entryTid = 0, start = 0, end;
494
+ char *str;
495
+ bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
496
+
497
+ if(!output) goto error;
498
+
499
+ if(!o) return output;
500
+ if(!o->n) return output;
501
+
502
+ if(sz) {
503
+ compressed = 1;
504
+ buf = malloc(sz);
505
+ }
506
+ sz = 0; //This is now the size of the compressed buffer
507
+
508
+ for(i=0; i<o->n; i++) {
509
+ if(bwSetPos(fp, o->offset[i])) goto error;
510
+
511
+ if(sz < o->size[i]) {
512
+ compBuf = realloc(compBuf, o->size[i]);
513
+ sz = o->size[i];
514
+ }
515
+ if(!compBuf) goto error;
516
+
517
+ if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
518
+ if(compressed) {
519
+ tmp = fp->hdr->bufSize; //This gets over-written by uncompress
520
+ rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
521
+ if(rv != Z_OK) goto error;
522
+ } else {
523
+ buf = compBuf;
524
+ tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
525
+ }
526
+
527
+ bufEnd = buf + tmp;
528
+ while(buf < bufEnd) {
529
+ entryTid = ((uint32_t*)buf)[0];
530
+ start = ((uint32_t*)buf)[1];
531
+ end = ((uint32_t*)buf)[2];
532
+ buf += 12;
533
+ str = (char*)buf;
534
+ slen = strlen(str) + 1;
535
+ buf += slen;
536
+
537
+ if(entryTid < tid) continue;
538
+ if(entryTid > tid) break;
539
+ if(end <= ostart) continue;
540
+ if(start >= oend) break;
541
+
542
+ //Push the overlap
543
+ if(!pushBBIntervals(output, start, end, str, withString)) goto error;
544
+ }
545
+
546
+ buf = bufEnd - tmp; //reset the buffer pointer
547
+ }
548
+
549
+ if(compressed && buf) free(buf);
550
+ if(compBuf) free(compBuf);
551
+ return output;
552
+
553
+ error:
554
+ fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
555
+ buf = bufEnd - tmp;
556
+ if(output) bbDestroyOverlappingEntries(output);
557
+ if(compressed && buf) free(buf);
558
+ if(compBuf) free(compBuf);
559
+ return NULL;
560
+ }
561
+
562
+ //Returns NULL on error OR no intervals, which is a bad design...
563
+ bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end) {
564
+ bwOverlappingIntervals_t *output;
565
+ uint32_t tid = bwGetTid(fp, chrom);
566
+ if(tid == (uint32_t) -1) return NULL;
567
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
568
+ if(!blocks) return NULL;
569
+ output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
570
+ destroyBWOverlapBlock(blocks);
571
+ return output;
572
+ }
573
+
574
+ //Like above, but for bigBed files
575
+ bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int withString) {
576
+ bbOverlappingEntries_t *output;
577
+ uint32_t tid = bwGetTid(fp, chrom);
578
+ if(tid == (uint32_t) -1) return NULL;
579
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
580
+ if(!blocks) return NULL;
581
+ output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
582
+ destroyBWOverlapBlock(blocks);
583
+ return output;
584
+ }
585
+
586
+ //Returns NULL on error
587
+ bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
588
+ bwOverlapIterator_t *output = NULL;
589
+ uint64_t n;
590
+ uint32_t tid = bwGetTid(fp, chrom);
591
+ if(tid == (uint32_t) -1) return output;
592
+ output = calloc(1, sizeof(bwOverlapIterator_t));
593
+ if(!output) return output;
594
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
595
+
596
+ output->bw = fp;
597
+ output->tid = tid;
598
+ output->start = start;
599
+ output->end = end;
600
+ output->blocks = blocks;
601
+ output->blocksPerIteration = blocksPerIteration;
602
+
603
+ if(blocks) {
604
+ n = blocks->n;
605
+ if(n>blocksPerIteration) blocks->n = blocksPerIteration;
606
+ output->intervals = bwGetOverlappingIntervalsCore(fp, blocks,tid, start, end);
607
+ blocks->n = n;
608
+ output->offset = blocksPerIteration;
609
+ }
610
+ output->data = output->intervals;
611
+ return output;
612
+ }
613
+
614
+ //Returns NULL on error
615
+ bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
616
+ bwOverlapIterator_t *output = NULL;
617
+ uint64_t n;
618
+ uint32_t tid = bwGetTid(fp, chrom);
619
+ if(tid == (uint32_t) -1) return output;
620
+ output = calloc(1, sizeof(bwOverlapIterator_t));
621
+ if(!output) return output;
622
+ bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
623
+
624
+ output->bw = fp;
625
+ output->tid = tid;
626
+ output->start = start;
627
+ output->end = end;
628
+ output->blocks = blocks;
629
+ output->blocksPerIteration = blocksPerIteration;
630
+ output->withString = withString;
631
+
632
+ if(blocks) {
633
+ n = blocks->n;
634
+ if(n>blocksPerIteration) blocks->n = blocksPerIteration;
635
+ output->entries = bbGetOverlappingEntriesCore(fp, blocks,tid, start, end, withString);
636
+ blocks->n = n;
637
+ output->offset = blocksPerIteration;
638
+ }
639
+ output->data = output->entries;
640
+ return output;
641
+ }
642
+
643
+ void bwIteratorDestroy(bwOverlapIterator_t *iter) {
644
+ if(!iter) return;
645
+ if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
646
+ if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
647
+ if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
648
+ free(iter);
649
+ }
650
+
651
+ //On error, points to NULL and destroys the input
652
+ bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
653
+ uint64_t n, *offset, *size;
654
+ bwOverlapBlock_t *blocks = iter->blocks;
655
+
656
+ if(iter->intervals) {
657
+ bwDestroyOverlappingIntervals(iter->intervals);
658
+ iter->intervals = NULL;
659
+ }
660
+ if(iter->entries) {
661
+ bbDestroyOverlappingEntries(iter->entries);
662
+ iter->entries = NULL;
663
+ }
664
+ iter->data = NULL;
665
+
666
+ if(iter->offset < blocks->n) {
667
+ //store the previous values
668
+ n = blocks->n;
669
+ offset = blocks->offset;
670
+ size = blocks->size;
671
+
672
+ //Move the start of the blocks
673
+ blocks->offset += iter->offset;
674
+ blocks->size += iter->offset;
675
+ if(iter->offset + iter->blocksPerIteration > n) {
676
+ blocks->n = blocks->n - iter->offset;
677
+ } else {
678
+ blocks->n = iter->blocksPerIteration;
679
+ }
680
+
681
+ //Get the intervals or entries, as appropriate
682
+ if(iter->bw->type == 0) {
683
+ //bigWig
684
+ iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
685
+ iter->data = iter->intervals;
686
+ } else {
687
+ //bigBed
688
+ iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
689
+ iter->data = iter->entries;
690
+ }
691
+ iter->offset += iter->blocksPerIteration;
692
+
693
+ //reset the values in iter->blocks
694
+ blocks->n = n;
695
+ blocks->offset = offset;
696
+ blocks->size = size;
697
+
698
+ //Check for error
699
+ if(!iter->intervals && !iter->entries) goto error;
700
+ }
701
+
702
+ return iter;
703
+
704
+ error:
705
+ bwIteratorDestroy(iter);
706
+ return NULL;
707
+ }
708
+
709
+ //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
710
+ //The ->end member is NULL
711
+ //If includeNA is not 0 then ->start is also NULL, since it's implied
712
+ //Note that bwDestroyOverlappingIntervals() will work in either case
713
+ bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t end, int includeNA) {
714
+ uint32_t i, j, n;
715
+ bwOverlappingIntervals_t *output = NULL;
716
+ bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
717
+ if(!intermediate) return NULL;
718
+
719
+ output = calloc(1, sizeof(bwOverlappingIntervals_t));
720
+ if(!output) goto error;
721
+ if(includeNA) {
722
+ output->l = end-start;
723
+ output->value = malloc(output->l*sizeof(float));
724
+ if(!output->value) goto error;
725
+ for(i=0; i<output->l; i++) output->value[i] = NAN;
726
+ for(i=0; i<intermediate->l; i++) {
727
+ for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
728
+ if(j < start || j >= end) continue;
729
+ output->value[j-start] = intermediate->value[i];
730
+ }
731
+ }
732
+ } else {
733
+ n = 0;
734
+ for(i=0; i<intermediate->l; i++) {
735
+ if(intermediate->start[i] < start) intermediate->start[i] = start;
736
+ if(intermediate->end[i] > end) intermediate->end[i] = end;
737
+ n += intermediate->end[i]-intermediate->start[i];
738
+ }
739
+ output->l = n;
740
+ output->start = malloc(sizeof(uint32_t)*n);
741
+ if(!output->start) goto error;
742
+ output->value = malloc(sizeof(float)*n);
743
+ if(!output->value) goto error;
744
+ n = 0; //this is now the index
745
+ for(i=0; i<intermediate->l; i++) {
746
+ for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
747
+ if(j < start || j >= end) continue;
748
+ output->start[n] = j;
749
+ output->value[n++] = intermediate->value[i];
750
+ }
751
+ }
752
+ }
753
+
754
+ bwDestroyOverlappingIntervals(intermediate);
755
+ return output;
756
+
757
+ error:
758
+ if(intermediate) bwDestroyOverlappingIntervals(intermediate);
759
+ if(output) bwDestroyOverlappingIntervals(output);
760
+ return NULL;
761
+ }
762
+
763
+ void bwDestroyIndexNode(bwRTreeNode_t *node) {
764
+ uint16_t i;
765
+
766
+ if(!node) return;
767
+
768
+ free(node->chrIdxStart);
769
+ free(node->baseStart);
770
+ free(node->chrIdxEnd);
771
+ free(node->baseEnd);
772
+ free(node->dataOffset);
773
+ if(!node->isLeaf) {
774
+ for(i=0; i<node->nChildren; i++) {
775
+ bwDestroyIndexNode(node->x.child[i]);
776
+ }
777
+ free(node->x.child);
778
+ } else {
779
+ free(node->x.size);
780
+ }
781
+ free(node);
782
+ }
783
+
784
+ void bwDestroyIndex(bwRTree_t *idx) {
785
+ bwDestroyIndexNode(idx->root);
786
+ free(idx);
787
+ }
788
+
789
+ //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
790
+ //Returns NULL on error
791
+ bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
792
+ bwRTree_t *idx = readRTreeIdx(fp, offset);
793
+ if(!idx) return NULL;
794
+
795
+ //Read in the root node
796
+ idx->root = bwGetRTreeNode(fp, idx->rootOffset);
797
+
798
+ if(!idx->root) {
799
+ bwDestroyIndex(idx);
800
+ return NULL;
801
+ }
802
+ return idx;
803
+ }