bio-bigwig 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,803 +0,0 @@
1
- #include "bigWig.h"
2
- #include "bwCommon.h"
3
- #include <stdlib.h>
4
- #include <math.h>
5
- #include <string.h>
6
- #include <zlib.h>
7
- #include <errno.h>
8
-
9
- static uint32_t roundup(uint32_t v) {
10
- v--;
11
- v |= v >> 1;
12
- v |= v >> 2;
13
- v |= v >> 4;
14
- v |= v >> 8;
15
- v |= v >> 16;
16
- v++;
17
- return v;
18
- }
19
-
20
- //Returns the root node on success and NULL on error
21
- static bwRTree_t *readRTreeIdx(bigWigFile_t *fp, uint64_t offset) {
22
- uint32_t magic;
23
- bwRTree_t *node;
24
-
25
- if(!offset) {
26
- if(bwSetPos(fp, fp->hdr->indexOffset)) return NULL;
27
- } else {
28
- if(bwSetPos(fp, offset)) return NULL;
29
- }
30
-
31
- if(bwRead(&magic, sizeof(uint32_t), 1, fp) != 1) return NULL;
32
- if(magic != IDX_MAGIC) {
33
- fprintf(stderr, "[readRTreeIdx] Mismatch in the magic number!\n");
34
- return NULL;
35
- }
36
-
37
- node = calloc(1, sizeof(bwRTree_t));
38
- if(!node) return NULL;
39
-
40
- if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
41
- if(bwRead(&(node->nItems), sizeof(uint64_t), 1, fp) != 1) goto error;
42
- if(bwRead(&(node->chrIdxStart), sizeof(uint32_t), 1, fp) != 1) goto error;
43
- if(bwRead(&(node->baseStart), sizeof(uint32_t), 1, fp) != 1) goto error;
44
- if(bwRead(&(node->chrIdxEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
45
- if(bwRead(&(node->baseEnd), sizeof(uint32_t), 1, fp) != 1) goto error;
46
- if(bwRead(&(node->idxSize), sizeof(uint64_t), 1, fp) != 1) goto error;
47
- if(bwRead(&(node->nItemsPerSlot), sizeof(uint32_t), 1, fp) != 1) goto error;
48
- //Padding
49
- if(bwRead(&(node->blockSize), sizeof(uint32_t), 1, fp) != 1) goto error;
50
- node->rootOffset = bwTell(fp);
51
-
52
- //For remote files, libCurl sometimes sets errno to 115 and doesn't clear it
53
- errno = 0;
54
-
55
- return node;
56
-
57
- error:
58
- free(node);
59
- return NULL;
60
- }
61
-
62
- //Returns a bwRTreeNode_t on success and NULL on an error
63
- //For the root node, set offset to 0
64
- static bwRTreeNode_t *bwGetRTreeNode(bigWigFile_t *fp, uint64_t offset) {
65
- bwRTreeNode_t *node = NULL;
66
- uint8_t padding;
67
- uint16_t i;
68
- if(offset) {
69
- if(bwSetPos(fp, offset)) return NULL;
70
- } else {
71
- //seek
72
- if(bwSetPos(fp, fp->idx->rootOffset)) return NULL;
73
- }
74
-
75
- node = calloc(1, sizeof(bwRTreeNode_t));
76
- if(!node) return NULL;
77
-
78
- if(bwRead(&(node->isLeaf), sizeof(uint8_t), 1, fp) != 1) goto error;
79
- if(bwRead(&padding, sizeof(uint8_t), 1, fp) != 1) goto error;
80
- if(bwRead(&(node->nChildren), sizeof(uint16_t), 1, fp) != 1) goto error;
81
-
82
- node->chrIdxStart = malloc(sizeof(uint32_t)*(node->nChildren));
83
- if(!node->chrIdxStart) goto error;
84
- node->baseStart = malloc(sizeof(uint32_t)*(node->nChildren));
85
- if(!node->baseStart) goto error;
86
- node->chrIdxEnd = malloc(sizeof(uint32_t)*(node->nChildren));
87
- if(!node->chrIdxEnd) goto error;
88
- node->baseEnd = malloc(sizeof(uint32_t)*(node->nChildren));
89
- if(!node->baseEnd) goto error;
90
- node->dataOffset = malloc(sizeof(uint64_t)*(node->nChildren));
91
- if(!node->dataOffset) goto error;
92
- if(node->isLeaf) {
93
- node->x.size = malloc(node->nChildren * sizeof(uint64_t));
94
- if(!node->x.size) goto error;
95
- } else {
96
- node->x.child = calloc(node->nChildren, sizeof(struct bwRTreeNode_t *));
97
- if(!node->x.child) goto error;
98
- }
99
- for(i=0; i<node->nChildren; i++) {
100
- if(bwRead(&(node->chrIdxStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
101
- if(bwRead(&(node->baseStart[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
102
- if(bwRead(&(node->chrIdxEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
103
- if(bwRead(&(node->baseEnd[i]), sizeof(uint32_t), 1, fp) != 1) goto error;
104
- if(bwRead(&(node->dataOffset[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
105
- if(node->isLeaf) {
106
- if(bwRead(&(node->x.size[i]), sizeof(uint64_t), 1, fp) != 1) goto error;
107
- }
108
- }
109
-
110
- return node;
111
-
112
- error:
113
- if(node->chrIdxStart) free(node->chrIdxStart);
114
- if(node->baseStart) free(node->baseStart);
115
- if(node->chrIdxEnd) free(node->chrIdxEnd);
116
- if(node->baseEnd) free(node->baseEnd);
117
- if(node->dataOffset) free(node->dataOffset);
118
- if(node->isLeaf && node->x.size) free(node->x.size);
119
- else if((!node->isLeaf) && node->x.child) free(node->x.child);
120
- free(node);
121
- return NULL;
122
- }
123
-
124
- void destroyBWOverlapBlock(bwOverlapBlock_t *b) {
125
- if(!b) return;
126
- if(b->size) free(b->size);
127
- if(b->offset) free(b->offset);
128
- free(b);
129
- }
130
-
131
- //Returns a bwOverlapBlock_t * object or NULL on error.
132
- static bwOverlapBlock_t *overlapsLeaf(bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
133
- uint16_t i, idx = 0;
134
- bwOverlapBlock_t *o = calloc(1, sizeof(bwOverlapBlock_t));
135
- if(!o) return NULL;
136
-
137
- for(i=0; i<node->nChildren; i++) {
138
- if(tid < node->chrIdxStart[i]) break;
139
- if(tid > node->chrIdxEnd[i]) continue;
140
-
141
- /*
142
- The individual blocks can theoretically span multiple contigs.
143
- So if we treat the first/last contig in the range as special
144
- but anything in the middle is a guaranteed match
145
- */
146
- if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
147
- if(tid == node->chrIdxStart[i]) {
148
- if(node->baseStart[i] >= end) break;
149
- } else if(tid == node->chrIdxEnd[i]) {
150
- if(node->baseEnd[i] <= start) continue;
151
- }
152
- } else {
153
- if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
154
- }
155
- o->n++;
156
- }
157
-
158
- if(o->n) {
159
- o->offset = malloc(sizeof(uint64_t) * (o->n));
160
- if(!o->offset) goto error;
161
- o->size = malloc(sizeof(uint64_t) * (o->n));
162
- if(!o->size) goto error;
163
-
164
- for(i=0; i<node->nChildren; i++) {
165
- if(tid < node->chrIdxStart[i]) break;
166
- if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
167
- if(node->chrIdxStart[i] != node->chrIdxEnd[i]) {
168
- if(tid == node->chrIdxStart[i]) {
169
- if(node->baseStart[i] >= end) continue;
170
- } else if(tid == node->chrIdxEnd[i]) {
171
- if(node->baseEnd[i] <= start) continue;
172
- }
173
- } else {
174
- if(node->baseStart[i] >= end || node->baseEnd[i] <= start) continue;
175
- }
176
- o->offset[idx] = node->dataOffset[i];
177
- o->size[idx++] = node->x.size[i];
178
- if(idx >= o->n) break;
179
- }
180
- }
181
-
182
- if(idx != o->n) { //This should never happen
183
- fprintf(stderr, "[overlapsLeaf] Mismatch between number of overlaps calculated and found!\n");
184
- goto error;
185
- }
186
-
187
- return o;
188
-
189
- error:
190
- if(o) destroyBWOverlapBlock(o);
191
- return NULL;
192
- }
193
-
194
- //This will free l2 unless there's an error!
195
- //Returns NULL on error, otherwise the merged lists
196
- static bwOverlapBlock_t *mergeOverlapBlocks(bwOverlapBlock_t *b1, bwOverlapBlock_t *b2) {
197
- uint64_t i,j;
198
- if(!b2) return b1;
199
- if(!b2->n) {
200
- destroyBWOverlapBlock(b2);
201
- return b1;
202
- }
203
- if(!b1->n) {
204
- destroyBWOverlapBlock(b1);
205
- return b2;
206
- }
207
- j = b1->n;
208
- b1->n += b2->n;
209
- b1->offset = realloc(b1->offset, sizeof(uint64_t) * (b1->n+b2->n));
210
- if(!b1->offset) goto error;
211
- b1->size = realloc(b1->size, sizeof(uint64_t) * (b1->n+b2->n));
212
- if(!b1->size) goto error;
213
-
214
- for(i=0; i<b2->n; i++) {
215
- b1->offset[j+i] = b2->offset[i];
216
- b1->size[j+i] = b2->size[i];
217
- }
218
- destroyBWOverlapBlock(b2);
219
- return b1;
220
-
221
- error:
222
- destroyBWOverlapBlock(b1);
223
- return NULL;
224
- }
225
-
226
- //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
227
- //The output needs to be free()d if not NULL (likewise with *sizes)
228
- static bwOverlapBlock_t *overlapsNonLeaf(bigWigFile_t *fp, bwRTreeNode_t *node, uint32_t tid, uint32_t start, uint32_t end) {
229
- uint16_t i;
230
- bwOverlapBlock_t *nodeBlocks, *output = calloc(1, sizeof(bwOverlapBlock_t));
231
- if(!output) return NULL;
232
-
233
- for(i=0; i<node->nChildren; i++) {
234
- if(tid < node->chrIdxStart[i]) break;
235
- if(tid < node->chrIdxStart[i] || tid > node->chrIdxEnd[i]) continue;
236
- if(node->chrIdxStart[i] != node->chrIdxEnd[i]) { //child spans contigs
237
- if(tid == node->chrIdxStart[i]) {
238
- if(node->baseStart[i] >= end) continue;
239
- } else if(tid == node->chrIdxEnd[i]) {
240
- if(node->baseEnd[i] <= start) continue;
241
- }
242
- } else {
243
- if(end <= node->baseStart[i] || start >= node->baseEnd[i]) continue;
244
- }
245
-
246
- //We have an overlap!
247
- if(!node->x.child[i])
248
- node->x.child[i] = bwGetRTreeNode(fp, node->dataOffset[i]);
249
- if(!node->x.child[i]) goto error;
250
-
251
- if(node->x.child[i]->isLeaf) { //leaf
252
- nodeBlocks = overlapsLeaf(node->x.child[i], tid, start, end);
253
- } else { //non-leaf
254
- nodeBlocks = overlapsNonLeaf(fp, node->x.child[i], tid, start, end);
255
- }
256
-
257
- //The output is processed the same regardless of leaf/non-leaf
258
- if(!nodeBlocks) goto error;
259
- else {
260
- output = mergeOverlapBlocks(output, nodeBlocks);
261
- if(!output) {
262
- destroyBWOverlapBlock(nodeBlocks);
263
- goto error;
264
- }
265
- }
266
- }
267
-
268
- return output;
269
-
270
- error:
271
- destroyBWOverlapBlock(output);
272
- return NULL;
273
- }
274
-
275
- //Returns NULL and sets nOverlaps to >0 on error, otherwise nOverlaps is the number of file offsets returned
276
- //The output must be free()d
277
- bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end) {
278
- if(root->isLeaf) return overlapsLeaf(root, tid, start, end);
279
- return overlapsNonLeaf(bw, root, tid, start, end);
280
- }
281
-
282
- //In reality, a hash or some sort of tree structure is probably faster...
283
- //Return -1 (AKA 0xFFFFFFFF...) on "not there", so we can hold (2^32)-1 items.
284
- uint32_t bwGetTid(bigWigFile_t *fp, char *chrom) {
285
- uint32_t i;
286
- if(!chrom) return -1;
287
- for(i=0; i<fp->cl->nKeys; i++) {
288
- if(strcmp(chrom, fp->cl->chrom[i]) == 0) return i;
289
- }
290
- return -1;
291
- }
292
-
293
- static bwOverlapBlock_t *bwGetOverlappingBlocks(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
294
- uint32_t tid = bwGetTid(fp, chrom);
295
-
296
- if(tid == (uint32_t) -1) {
297
- fprintf(stderr, "[bwGetOverlappingBlocks] Non-existent contig: %s\n", chrom);
298
- return NULL;
299
- }
300
-
301
- //Get the info if needed
302
- if(!fp->idx) {
303
- fp->idx = readRTreeIdx(fp, fp->hdr->indexOffset);
304
- if(!fp->idx) {
305
- return NULL;
306
- }
307
- }
308
-
309
- if(!fp->idx->root) fp->idx->root = bwGetRTreeNode(fp, 0);
310
- if(!fp->idx->root) return NULL;
311
-
312
- return walkRTreeNodes(fp, fp->idx->root, tid, start, end);
313
- }
314
-
315
- void bwFillDataHdr(bwDataHeader_t *hdr, void *b) {
316
- hdr->tid = ((uint32_t*)b)[0];
317
- hdr->start = ((uint32_t*)b)[1];
318
- hdr->end = ((uint32_t*)b)[2];
319
- hdr->step = ((uint32_t*)b)[3];
320
- hdr->span = ((uint32_t*)b)[4];
321
- hdr->type = ((uint8_t*)b)[20];
322
- hdr->nItems = ((uint16_t*)b)[11];
323
- }
324
-
325
- void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o) {
326
- if(!o) return;
327
- if(o->start) free(o->start);
328
- if(o->end) free(o->end);
329
- if(o->value) free(o->value);
330
- free(o);
331
- }
332
-
333
- void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o) {
334
- uint32_t i;
335
- if(!o) return;
336
- if(o->start) free(o->start);
337
- if(o->end) free(o->end);
338
- if(o->str) {
339
- for(i=0; i<o->l; i++) {
340
- if(o->str[i]) free(o->str[i]);
341
- }
342
- free(o->str);
343
- }
344
- free(o);
345
- }
346
-
347
- //Returns NULL on error, in which case o has been free()d
348
- static bwOverlappingIntervals_t *pushIntervals(bwOverlappingIntervals_t *o, uint32_t start, uint32_t end, float value) {
349
- if(o->l+1 >= o->m) {
350
- o->m = roundup(o->l+1);
351
- o->start = realloc(o->start, o->m * sizeof(uint32_t));
352
- if(!o->start) goto error;
353
- o->end = realloc(o->end, o->m * sizeof(uint32_t));
354
- if(!o->end) goto error;
355
- o->value = realloc(o->value, o->m * sizeof(float));
356
- if(!o->value) goto error;
357
- }
358
- o->start[o->l] = start;
359
- o->end[o->l] = end;
360
- o->value[o->l++] = value;
361
- return o;
362
-
363
- error:
364
- bwDestroyOverlappingIntervals(o);
365
- return NULL;
366
- }
367
-
368
- static bbOverlappingEntries_t *pushBBIntervals(bbOverlappingEntries_t *o, uint32_t start, uint32_t end, char *str, int withString) {
369
- if(o->l+1 >= o->m) {
370
- o->m = roundup(o->l+1);
371
- o->start = realloc(o->start, o->m * sizeof(uint32_t));
372
- if(!o->start) goto error;
373
- o->end = realloc(o->end, o->m * sizeof(uint32_t));
374
- if(!o->end) goto error;
375
- if(withString) {
376
- o->str = realloc(o->str, o->m * sizeof(char**));
377
- if(!o->str) goto error;
378
- }
379
- }
380
- o->start[o->l] = start;
381
- o->end[o->l] = end;
382
- if(withString) o->str[o->l] = strdup(str);
383
- o->l++;
384
- return o;
385
-
386
- error:
387
- bbDestroyOverlappingEntries(o);
388
- return NULL;
389
- }
390
-
391
- //Returns NULL on error
392
- bwOverlappingIntervals_t *bwGetOverlappingIntervalsCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend) {
393
- uint64_t i;
394
- uint16_t j;
395
- int compressed = 0, rv;
396
- uLongf sz = fp->hdr->bufSize, tmp;
397
- void *buf = NULL, *compBuf = NULL;
398
- uint32_t start = 0, end , *p;
399
- float value;
400
- bwDataHeader_t hdr;
401
- bwOverlappingIntervals_t *output = calloc(1, sizeof(bwOverlappingIntervals_t));
402
-
403
- if(!output) goto error;
404
-
405
- if(!o) return output;
406
- if(!o->n) return output;
407
-
408
- if(sz) {
409
- compressed = 1;
410
- buf = malloc(sz);
411
- }
412
- sz = 0; //This is now the size of the compressed buffer
413
-
414
- for(i=0; i<o->n; i++) {
415
- if(bwSetPos(fp, o->offset[i])) goto error;
416
-
417
- if(sz < o->size[i]) {
418
- compBuf = realloc(compBuf, o->size[i]);
419
- sz = o->size[i];
420
- }
421
- if(!compBuf) goto error;
422
-
423
- if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
424
- if(compressed) {
425
- tmp = fp->hdr->bufSize; //This gets over-written by uncompress
426
- rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
427
- if(rv != Z_OK) goto error;
428
- } else {
429
- buf = compBuf;
430
- }
431
-
432
- //TODO: ensure that tmp is large enough!
433
- bwFillDataHdr(&hdr, buf);
434
-
435
- p = ((uint32_t*) buf);
436
- p += 6;
437
- if(hdr.tid != tid) continue;
438
-
439
- if(hdr.type == 3) start = hdr.start - hdr.step;
440
-
441
- //FIXME: We should ensure that sz is large enough to hold nItems of the given type
442
- for(j=0; j<hdr.nItems; j++) {
443
- switch(hdr.type) {
444
- case 1:
445
- start = *p;
446
- p++;
447
- end = *p;
448
- p++;
449
- value = *((float *)p);
450
- p++;
451
- break;
452
- case 2:
453
- start = *p;
454
- p++;
455
- end = start + hdr.span;
456
- value = *((float *)p);
457
- p++;
458
- break;
459
- case 3:
460
- start += hdr.step;
461
- end = start+hdr.span;
462
- value = *((float *)p);
463
- p++;
464
- break;
465
- default :
466
- goto error;
467
- break;
468
- }
469
-
470
- if(end <= ostart || start >= oend) continue;
471
- //Push the overlap
472
- if(!pushIntervals(output, start, end, value)) goto error;
473
- }
474
- }
475
-
476
- if(compressed && buf) free(buf);
477
- if(compBuf) free(compBuf);
478
- return output;
479
-
480
- error:
481
- fprintf(stderr, "[bwGetOverlappingIntervalsCore] Got an error\n");
482
- if(output) bwDestroyOverlappingIntervals(output);
483
- if(compressed && buf) free(buf);
484
- if(compBuf) free(compBuf);
485
- return NULL;
486
- }
487
-
488
- bbOverlappingEntries_t *bbGetOverlappingEntriesCore(bigWigFile_t *fp, bwOverlapBlock_t *o, uint32_t tid, uint32_t ostart, uint32_t oend, int withString) {
489
- uint64_t i;
490
- int compressed = 0, rv, slen;
491
- uLongf sz = fp->hdr->bufSize, tmp = 0;
492
- void *buf = NULL, *bufEnd = NULL, *compBuf = NULL;
493
- uint32_t entryTid = 0, start = 0, end;
494
- char *str;
495
- bbOverlappingEntries_t *output = calloc(1, sizeof(bbOverlappingEntries_t));
496
-
497
- if(!output) goto error;
498
-
499
- if(!o) return output;
500
- if(!o->n) return output;
501
-
502
- if(sz) {
503
- compressed = 1;
504
- buf = malloc(sz);
505
- }
506
- sz = 0; //This is now the size of the compressed buffer
507
-
508
- for(i=0; i<o->n; i++) {
509
- if(bwSetPos(fp, o->offset[i])) goto error;
510
-
511
- if(sz < o->size[i]) {
512
- compBuf = realloc(compBuf, o->size[i]);
513
- sz = o->size[i];
514
- }
515
- if(!compBuf) goto error;
516
-
517
- if(bwRead(compBuf, o->size[i], 1, fp) != 1) goto error;
518
- if(compressed) {
519
- tmp = fp->hdr->bufSize; //This gets over-written by uncompress
520
- rv = uncompress(buf, (uLongf *) &tmp, compBuf, o->size[i]);
521
- if(rv != Z_OK) goto error;
522
- } else {
523
- buf = compBuf;
524
- tmp = o->size[i]; //TODO: Is this correct? Do non-gzipped bigBeds exist?
525
- }
526
-
527
- bufEnd = buf + tmp;
528
- while(buf < bufEnd) {
529
- entryTid = ((uint32_t*)buf)[0];
530
- start = ((uint32_t*)buf)[1];
531
- end = ((uint32_t*)buf)[2];
532
- buf += 12;
533
- str = (char*)buf;
534
- slen = strlen(str) + 1;
535
- buf += slen;
536
-
537
- if(entryTid < tid) continue;
538
- if(entryTid > tid) break;
539
- if(end <= ostart) continue;
540
- if(start >= oend) break;
541
-
542
- //Push the overlap
543
- if(!pushBBIntervals(output, start, end, str, withString)) goto error;
544
- }
545
-
546
- buf = bufEnd - tmp; //reset the buffer pointer
547
- }
548
-
549
- if(compressed && buf) free(buf);
550
- if(compBuf) free(compBuf);
551
- return output;
552
-
553
- error:
554
- fprintf(stderr, "[bbGetOverlappingEntriesCore] Got an error\n");
555
- buf = bufEnd - tmp;
556
- if(output) bbDestroyOverlappingEntries(output);
557
- if(compressed && buf) free(buf);
558
- if(compBuf) free(compBuf);
559
- return NULL;
560
- }
561
-
562
- //Returns NULL on error OR no intervals, which is a bad design...
563
- bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end) {
564
- bwOverlappingIntervals_t *output;
565
- uint32_t tid = bwGetTid(fp, chrom);
566
- if(tid == (uint32_t) -1) return NULL;
567
- bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
568
- if(!blocks) return NULL;
569
- output = bwGetOverlappingIntervalsCore(fp, blocks, tid, start, end);
570
- destroyBWOverlapBlock(blocks);
571
- return output;
572
- }
573
-
574
- //Like above, but for bigBed files
575
- bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString) {
576
- bbOverlappingEntries_t *output;
577
- uint32_t tid = bwGetTid(fp, chrom);
578
- if(tid == (uint32_t) -1) return NULL;
579
- bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(fp, chrom, start, end);
580
- if(!blocks) return NULL;
581
- output = bbGetOverlappingEntriesCore(fp, blocks, tid, start, end, withString);
582
- destroyBWOverlapBlock(blocks);
583
- return output;
584
- }
585
-
586
- //Returns NULL on error
587
- bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration) {
588
- bwOverlapIterator_t *output = NULL;
589
- uint64_t n;
590
- uint32_t tid = bwGetTid(bw, chrom);
591
- if(tid == (uint32_t) -1) return output;
592
- output = calloc(1, sizeof(bwOverlapIterator_t));
593
- if(!output) return output;
594
- bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
595
-
596
- output->bw = bw;
597
- output->tid = tid;
598
- output->start = start;
599
- output->end = end;
600
- output->blocks = blocks;
601
- output->blocksPerIteration = blocksPerIteration;
602
-
603
- if(blocks) {
604
- n = blocks->n;
605
- if(n>blocksPerIteration) blocks->n = blocksPerIteration;
606
- output->intervals = bwGetOverlappingIntervalsCore(bw, blocks,tid, start, end);
607
- blocks->n = n;
608
- output->offset = blocksPerIteration;
609
- }
610
- output->data = output->intervals;
611
- return output;
612
- }
613
-
614
- //Returns NULL on error
615
- bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *bw, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration) {
616
- bwOverlapIterator_t *output = NULL;
617
- uint64_t n;
618
- uint32_t tid = bwGetTid(bw, chrom);
619
- if(tid == (uint32_t) -1) return output;
620
- output = calloc(1, sizeof(bwOverlapIterator_t));
621
- if(!output) return output;
622
- bwOverlapBlock_t *blocks = bwGetOverlappingBlocks(bw, chrom, start, end);
623
-
624
- output->bw = bw;
625
- output->tid = tid;
626
- output->start = start;
627
- output->end = end;
628
- output->blocks = blocks;
629
- output->blocksPerIteration = blocksPerIteration;
630
- output->withString = withString;
631
-
632
- if(blocks) {
633
- n = blocks->n;
634
- if(n>blocksPerIteration) blocks->n = blocksPerIteration;
635
- output->entries = bbGetOverlappingEntriesCore(bw, blocks,tid, start, end, withString);
636
- blocks->n = n;
637
- output->offset = blocksPerIteration;
638
- }
639
- output->data = output->entries;
640
- return output;
641
- }
642
-
643
- void bwIteratorDestroy(bwOverlapIterator_t *iter) {
644
- if(!iter) return;
645
- if(iter->blocks) destroyBWOverlapBlock((bwOverlapBlock_t*) iter->blocks);
646
- if(iter->intervals) bwDestroyOverlappingIntervals(iter->intervals);
647
- if(iter->entries) bbDestroyOverlappingEntries(iter->entries);
648
- free(iter);
649
- }
650
-
651
- //On error, points to NULL and destroys the input
652
- bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter) {
653
- uint64_t n, *offset, *size;
654
- bwOverlapBlock_t *blocks = iter->blocks;
655
-
656
- if(iter->intervals) {
657
- bwDestroyOverlappingIntervals(iter->intervals);
658
- iter->intervals = NULL;
659
- }
660
- if(iter->entries) {
661
- bbDestroyOverlappingEntries(iter->entries);
662
- iter->entries = NULL;
663
- }
664
- iter->data = NULL;
665
-
666
- if(iter->offset < blocks->n) {
667
- //store the previous values
668
- n = blocks->n;
669
- offset = blocks->offset;
670
- size = blocks->size;
671
-
672
- //Move the start of the blocks
673
- blocks->offset += iter->offset;
674
- blocks->size += iter->offset;
675
- if(iter->offset + iter->blocksPerIteration > n) {
676
- blocks->n = blocks->n - iter->offset;
677
- } else {
678
- blocks->n = iter->blocksPerIteration;
679
- }
680
-
681
- //Get the intervals or entries, as appropriate
682
- if(iter->bw->type == 0) {
683
- //bigWig
684
- iter->intervals = bwGetOverlappingIntervalsCore(iter->bw, blocks, iter->tid, iter->start, iter->end);
685
- iter->data = iter->intervals;
686
- } else {
687
- //bigBed
688
- iter->entries = bbGetOverlappingEntriesCore(iter->bw, blocks, iter->tid, iter->start, iter->end, iter->withString);
689
- iter->data = iter->entries;
690
- }
691
- iter->offset += iter->blocksPerIteration;
692
-
693
- //reset the values in iter->blocks
694
- blocks->n = n;
695
- blocks->offset = offset;
696
- blocks->size = size;
697
-
698
- //Check for error
699
- if(!iter->intervals && !iter->entries) goto error;
700
- }
701
-
702
- return iter;
703
-
704
- error:
705
- bwIteratorDestroy(iter);
706
- return NULL;
707
- }
708
-
709
- //This is like bwGetOverlappingIntervals, except it returns 1 base windows. If includeNA is not 0, then a value will be returned for every position in the range (defaulting to NAN).
710
- //The ->end member is NULL
711
- //If includeNA is not 0 then ->start is also NULL, since it's implied
712
- //Note that bwDestroyOverlappingIntervals() will work in either case
713
- bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA) {
714
- uint32_t i, j, n;
715
- bwOverlappingIntervals_t *output = NULL;
716
- bwOverlappingIntervals_t *intermediate = bwGetOverlappingIntervals(fp, chrom, start, end);
717
- if(!intermediate) return NULL;
718
-
719
- output = calloc(1, sizeof(bwOverlappingIntervals_t));
720
- if(!output) goto error;
721
- if(includeNA) {
722
- output->l = end-start;
723
- output->value = malloc(output->l*sizeof(float));
724
- if(!output->value) goto error;
725
- for(i=0; i<output->l; i++) output->value[i] = NAN;
726
- for(i=0; i<intermediate->l; i++) {
727
- for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
728
- if(j < start || j >= end) continue;
729
- output->value[j-start] = intermediate->value[i];
730
- }
731
- }
732
- } else {
733
- n = 0;
734
- for(i=0; i<intermediate->l; i++) {
735
- if(intermediate->start[i] < start) intermediate->start[i] = start;
736
- if(intermediate->end[i] > end) intermediate->end[i] = end;
737
- n += intermediate->end[i]-intermediate->start[i];
738
- }
739
- output->l = n;
740
- output->start = malloc(sizeof(uint32_t)*n);
741
- if(!output->start) goto error;
742
- output->value = malloc(sizeof(float)*n);
743
- if(!output->value) goto error;
744
- n = 0; //this is now the index
745
- for(i=0; i<intermediate->l; i++) {
746
- for(j=intermediate->start[i]; j<intermediate->end[i]; j++) {
747
- if(j < start || j >= end) continue;
748
- output->start[n] = j;
749
- output->value[n++] = intermediate->value[i];
750
- }
751
- }
752
- }
753
-
754
- bwDestroyOverlappingIntervals(intermediate);
755
- return output;
756
-
757
- error:
758
- if(intermediate) bwDestroyOverlappingIntervals(intermediate);
759
- if(output) bwDestroyOverlappingIntervals(output);
760
- return NULL;
761
- }
762
-
763
- void bwDestroyIndexNode(bwRTreeNode_t *node) {
764
- uint16_t i;
765
-
766
- if(!node) return;
767
-
768
- free(node->chrIdxStart);
769
- free(node->baseStart);
770
- free(node->chrIdxEnd);
771
- free(node->baseEnd);
772
- free(node->dataOffset);
773
- if(!node->isLeaf) {
774
- for(i=0; i<node->nChildren; i++) {
775
- bwDestroyIndexNode(node->x.child[i]);
776
- }
777
- free(node->x.child);
778
- } else {
779
- free(node->x.size);
780
- }
781
- free(node);
782
- }
783
-
784
- void bwDestroyIndex(bwRTree_t *idx) {
785
- bwDestroyIndexNode(idx->root);
786
- free(idx);
787
- }
788
-
789
- //Returns a pointer to the requested index (@offset, unless it's 0, in which case the index for the values is returned
790
- //Returns NULL on error
791
- bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset) {
792
- bwRTree_t *idx = readRTreeIdx(fp, offset);
793
- if(!idx) return NULL;
794
-
795
- //Read in the root node
796
- idx->root = bwGetRTreeNode(fp, idx->rootOffset);
797
-
798
- if(!idx->root) {
799
- bwDestroyIndex(idx);
800
- return NULL;
801
- }
802
- return idx;
803
- }