bio-bigwig 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,1333 +0,0 @@
1
- #include <limits.h>
2
- #include <float.h>
3
- #include <stdlib.h>
4
- #include <string.h>
5
- #include <math.h>
6
- #include "bigWig.h"
7
- #include "bwCommon.h"
8
-
9
- /// @cond SKIP
10
- struct val_t {
11
- uint32_t tid;
12
- uint32_t start;
13
- uint32_t nBases;
14
- float min, max, sum, sumsq;
15
- double scalar;
16
- struct val_t *next;
17
- };
18
- /// @endcond
19
-
20
- //Create a chromList_t and attach it to a bigWigFile_t *. Returns NULL on error
21
- //Note that chroms and lengths are duplicated, so you MUST free the input
22
- chromList_t *bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n) {
23
- int64_t i = 0;
24
- chromList_t *cl = calloc(1, sizeof(chromList_t));
25
- if(!cl) return NULL;
26
-
27
- cl->nKeys = n;
28
- cl->chrom = malloc(sizeof(char*)*n);
29
- cl->len = malloc(sizeof(uint32_t)*n);
30
- if(!cl->chrom) goto error;
31
- if(!cl->len) goto error;
32
-
33
- for(i=0; i<n; i++) {
34
- cl->len[i] = lengths[i];
35
- cl->chrom[i] = strdup(chroms[i]);
36
- if(!cl->chrom[i]) goto error;
37
- }
38
-
39
- return cl;
40
-
41
- error:
42
- if(i) {
43
- int64_t j;
44
- for(j=0; j<i; j++) free(cl->chrom[j]);
45
- }
46
- if(cl) {
47
- if(cl->chrom) free(cl->chrom);
48
- if(cl->len) free(cl->len);
49
- free(cl);
50
- }
51
- return NULL;
52
- }
53
-
54
- //If maxZooms == 0, then 0 is used (i.e., there are no zoom levels). If maxZooms < 0 or > 65535 then 10 is used.
55
- //TODO allow changing bufSize and blockSize
56
- int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms) {
57
- if(!fp->isWrite) return 1;
58
- bigWigHdr_t *hdr = calloc(1, sizeof(bigWigHdr_t));
59
- if(!hdr) return 2;
60
-
61
- hdr->version = 4;
62
- if(maxZooms < 0 || maxZooms > 65535) {
63
- hdr->nLevels = 10;
64
- } else {
65
- hdr->nLevels = maxZooms;
66
- }
67
-
68
- hdr->bufSize = 32768; //When the file is finalized this is reset if fp->writeBuffer->compressPsz is 0!
69
- hdr->minVal = DBL_MAX;
70
- hdr->maxVal = DBL_MIN;
71
- fp->hdr = hdr;
72
- fp->writeBuffer->blockSize = 64;
73
-
74
- //Allocate the writeBuffer buffers
75
- fp->writeBuffer->compressPsz = compressBound(hdr->bufSize);
76
- fp->writeBuffer->compressP = malloc(fp->writeBuffer->compressPsz);
77
- if(!fp->writeBuffer->compressP) return 3;
78
- fp->writeBuffer->p = calloc(1,hdr->bufSize);
79
- if(!fp->writeBuffer->p) return 4;
80
-
81
- return 0;
82
- }
83
-
84
- //return 0 on success
85
- static int writeAtPos(void *ptr, size_t sz, size_t nmemb, size_t pos, FILE *fp) {
86
- size_t curpos = ftell(fp);
87
- if(fseek(fp, pos, SEEK_SET)) return 1;
88
- if(fwrite(ptr, sz, nmemb, fp) != nmemb) return 2;
89
- if(fseek(fp, curpos, SEEK_SET)) return 3;
90
- return 0;
91
- }
92
-
93
- //We lose keySize bytes on error
94
- static int writeChromList(FILE *fp, chromList_t *cl) {
95
- uint16_t k;
96
- uint32_t j, magic = CIRTREE_MAGIC;
97
- uint32_t nperblock = (cl->nKeys > 0x7FFF) ? 0x7FFF : cl->nKeys; //Items per leaf/non-leaf, there are no unsigned ints in java :(
98
- uint32_t nblocks, keySize = 0, valSize = 8; //In theory valSize could be optimized, in practice that'd be annoying
99
- uint64_t i, nonLeafEnd, leafSize, nextLeaf;
100
- uint8_t eight;
101
- int64_t i64;
102
- char *chrom;
103
- size_t l;
104
-
105
- if(cl->nKeys > 1073676289) {
106
- fprintf(stderr, "[writeChromList] Error: Currently only 1,073,676,289 contigs are supported. If you really need more then please post a request on github.\n");
107
- return 1;
108
- }
109
- nblocks = cl->nKeys/nperblock;
110
- nblocks += ((cl->nKeys % nperblock) > 0)?1:0;
111
-
112
- for(i64=0; i64<cl->nKeys; i64++) {
113
- l = strlen(cl->chrom[i64]);
114
- if(l>keySize) keySize = l;
115
- }
116
- l--; //We don't null terminate strings, because schiess mich tot
117
- chrom = calloc(keySize, sizeof(char));
118
-
119
- //Write the root node of a largely pointless tree
120
- if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 1;
121
- if(fwrite(&nperblock, sizeof(uint32_t), 1, fp) != 1) return 2;
122
- if(fwrite(&keySize, sizeof(uint32_t), 1, fp) != 1) return 3;
123
- if(fwrite(&valSize, sizeof(uint32_t), 1, fp) != 1) return 4;
124
- if(fwrite(&(cl->nKeys), sizeof(uint64_t), 1, fp) != 1) return 5;
125
-
126
- //Padding?
127
- i=0;
128
- if(fwrite(&i, sizeof(uint64_t), 1, fp) != 1) return 6;
129
-
130
- //Do we need a non-leaf node?
131
- if(nblocks > 1) {
132
- eight = 0;
133
- if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 7;
134
- if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 8; //padding
135
- if(fwrite(&nblocks, sizeof(uint16_t), 1, fp) != 1) return 8;
136
- nonLeafEnd = ftell(fp) + nperblock * (keySize + 8);
137
- leafSize = nperblock * (keySize + 8) + 4;
138
- for(i=0; i<nblocks; i++) { //Why yes, this is pointless
139
- chrom = strncpy(chrom, cl->chrom[i * nperblock], keySize);
140
- nextLeaf = nonLeafEnd + i * leafSize;
141
- if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
142
- if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
143
- }
144
- for(i=0; i<keySize; i++) chrom[i] = '\0';
145
- nextLeaf = 0;
146
- for(i=nblocks; i<nperblock; i++) {
147
- if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
148
- if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
149
- }
150
- }
151
-
152
- //Write the leaves
153
- nextLeaf = 0;
154
- for(i=0, j=0; i<nblocks; i++) {
155
- eight = 1;
156
- if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 11;
157
- eight = 0;
158
- if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 12;
159
- if(cl->nKeys - j < nperblock) {
160
- k = cl->nKeys - j;
161
- if(fwrite(&k, sizeof(uint16_t), 1, fp) != 1) return 13;
162
- } else {
163
- if(fwrite(&nperblock, sizeof(uint16_t), 1, fp) != 1) return 13;
164
- }
165
- for(k=0; k<nperblock; k++) {
166
- if(j>=cl->nKeys) {
167
- if(chrom[0]) {
168
- for(l=0; l<keySize; l++) chrom[l] = '\0';
169
- }
170
- if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
171
- if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 16;
172
- } else {
173
- chrom = strncpy(chrom, cl->chrom[j], keySize);
174
- if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
175
- if(fwrite(&j, sizeof(uint32_t), 1, fp) != 1) return 16;
176
- if(fwrite(&(cl->len[j++]), sizeof(uint32_t), 1, fp) != 1) return 17;
177
- }
178
- }
179
- }
180
-
181
- free(chrom);
182
- return 0;
183
- }
184
-
185
- //returns 0 on success
186
- //Still need to fill in indexOffset
187
- int bwWriteHdr(bigWigFile_t *bw) {
188
- uint32_t magic = BIGWIG_MAGIC;
189
- uint16_t two = 4;
190
- FILE *fp;
191
- void *p = calloc(58, sizeof(uint8_t)); //58 bytes of nothing
192
- if(!bw->isWrite) return 1;
193
-
194
- //The header itself, largely just reserving space...
195
- fp = bw->URL->x.fp;
196
- if(!fp) return 2;
197
- if(fseek(fp, 0, SEEK_SET)) return 3;
198
- if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 4;
199
- if(fwrite(&two, sizeof(uint16_t), 1, fp) != 1) return 5;
200
- if(fwrite(p, sizeof(uint8_t), 58, fp) != 58) return 6;
201
-
202
- //Empty zoom headers
203
- if(bw->hdr->nLevels) {
204
- for(two=0; two<bw->hdr->nLevels; two++) {
205
- if(fwrite(p, sizeof(uint8_t), 24, fp) != 24) return 9;
206
- }
207
- }
208
-
209
- //Update summaryOffset and write an empty summary block
210
- bw->hdr->summaryOffset = ftell(fp);
211
- if(fwrite(p, sizeof(uint8_t), 40, fp) != 40) return 10;
212
- if(writeAtPos(&(bw->hdr->summaryOffset), sizeof(uint64_t), 1, 0x2c, fp)) return 11;
213
-
214
- //Write the chromosome list as a stupid freaking tree (because let's TREE ALL THE THINGS!!!)
215
- bw->hdr->ctOffset = ftell(fp);
216
- if(writeChromList(fp, bw->cl)) return 7;
217
- if(writeAtPos(&(bw->hdr->ctOffset), sizeof(uint64_t), 1, 0x8, fp)) return 8;
218
-
219
- //Update the dataOffset
220
- bw->hdr->dataOffset = ftell(fp);
221
- if(writeAtPos(&bw->hdr->dataOffset, sizeof(uint64_t), 1, 0x10, fp)) return 12;
222
-
223
- //Save space for the number of blocks
224
- if(fwrite(p, sizeof(uint8_t), 8, fp) != 8) return 13;
225
-
226
- free(p);
227
- return 0;
228
- }
229
-
230
- static int insertIndexNode(bigWigFile_t *fp, bwRTreeNode_t *leaf) {
231
- bwLL *l = malloc(sizeof(bwLL));
232
- if(!l) return 1;
233
- l->node = leaf;
234
- l->next = NULL;
235
-
236
- if(!fp->writeBuffer->firstIndexNode) {
237
- fp->writeBuffer->firstIndexNode = l;
238
- } else {
239
- fp->writeBuffer->currentIndexNode->next = l;
240
- }
241
- fp->writeBuffer->currentIndexNode = l;
242
- return 0;
243
- }
244
-
245
- //0 on success
246
- static int appendIndexNodeEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
247
- bwLL *n = fp->writeBuffer->currentIndexNode;
248
- if(!n) return 1;
249
- if(n->node->nChildren >= fp->writeBuffer->blockSize) return 2;
250
-
251
- n->node->chrIdxStart[n->node->nChildren] = tid0;
252
- n->node->baseStart[n->node->nChildren] = start;
253
- n->node->chrIdxEnd[n->node->nChildren] = tid1;
254
- n->node->baseEnd[n->node->nChildren] = end;
255
- n->node->dataOffset[n->node->nChildren] = offset;
256
- n->node->x.size[n->node->nChildren] = size;
257
- n->node->nChildren++;
258
- return 0;
259
- }
260
-
261
- //Returns 0 on success
262
- static int addIndexEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
263
- bwRTreeNode_t *node;
264
-
265
- if(appendIndexNodeEntry(fp, tid0, tid1, start, end, offset, size)) {
266
- //The last index node is full, we need to add a new one
267
- node = calloc(1, sizeof(bwRTreeNode_t));
268
- if(!node) return 1;
269
-
270
- //Allocate and set the fields
271
- node->isLeaf = 1;
272
- node->nChildren = 1;
273
- node->chrIdxStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
274
- if(!node->chrIdxStart) goto error;
275
- node->baseStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
276
- if(!node->baseStart) goto error;
277
- node->chrIdxEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
278
- if(!node->chrIdxEnd) goto error;
279
- node->baseEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
280
- if(!node->baseEnd) goto error;
281
- node->dataOffset = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
282
- if(!node->dataOffset) goto error;
283
- node->x.size = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
284
- if(!node->x.size) goto error;
285
-
286
- node->chrIdxStart[0] = tid0;
287
- node->baseStart[0] = start;
288
- node->chrIdxEnd[0] = tid1;
289
- node->baseEnd[0] = end;
290
- node->dataOffset[0] = offset;
291
- node->x.size[0] = size;
292
-
293
- if(insertIndexNode(fp, node)) goto error;
294
- }
295
-
296
- return 0;
297
-
298
- error:
299
- if(node->chrIdxStart) free(node->chrIdxStart);
300
- if(node->baseStart) free(node->baseStart);
301
- if(node->chrIdxEnd) free(node->chrIdxEnd);
302
- if(node->baseEnd) free(node->baseEnd);
303
- if(node->dataOffset) free(node->dataOffset);
304
- if(node->x.size) free(node->x.size);
305
- return 2;
306
- }
307
-
308
- /*
309
- * TODO:
310
- * The buffer size and compression sz need to be determined elsewhere (and p and compressP filled in!)
311
- */
312
- static int flushBuffer(bigWigFile_t *fp) {
313
- bwWriteBuffer_t *wb = fp->writeBuffer;
314
- uLongf sz = wb->compressPsz;
315
- uint16_t nItems;
316
- if(!fp->writeBuffer->l) return 0;
317
- if(!wb->ltype) return 0;
318
-
319
- //Fill in the header
320
- if(!memcpy(wb->p, &(wb->tid), sizeof(uint32_t))) return 1;
321
- if(!memcpy(wb->p+4, &(wb->start), sizeof(uint32_t))) return 2;
322
- if(!memcpy(wb->p+8, &(wb->end), sizeof(uint32_t))) return 3;
323
- if(!memcpy(wb->p+12, &(wb->step), sizeof(uint32_t))) return 4;
324
- if(!memcpy(wb->p+16, &(wb->span), sizeof(uint32_t))) return 5;
325
- if(!memcpy(wb->p+20, &(wb->ltype), sizeof(uint8_t))) return 6;
326
- //1 byte padding
327
- //Determine the number of items
328
- switch(wb->ltype) {
329
- case 1:
330
- nItems = (wb->l-24)/12;
331
- break;
332
- case 2:
333
- nItems = (wb->l-24)/8;
334
- break;
335
- case 3:
336
- nItems = (wb->l-24)/4;
337
- break;
338
- default:
339
- return 7;
340
- }
341
- if(!memcpy(wb->p+22, &nItems, sizeof(uint16_t))) return 8;
342
-
343
- if(sz) {
344
- //compress
345
- if(compress(wb->compressP, &sz, wb->p, wb->l) != Z_OK) return 9;
346
-
347
- //write the data to disk
348
- if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 10;
349
- } else {
350
- sz = wb->l;
351
- if(fwrite(wb->p, sizeof(uint8_t), wb->l, fp->URL->x.fp) != wb->l) return 10;
352
- }
353
-
354
- //Add an entry into the index
355
- if(addIndexEntry(fp, wb->tid, wb->tid, wb->start, wb->end, bwTell(fp)-sz, sz)) return 11;
356
-
357
- wb->nBlocks++;
358
- wb->l = 24;
359
- return 0;
360
- }
361
-
362
- static void updateStats(bigWigFile_t *fp, uint32_t span, float val) {
363
- if(val < fp->hdr->minVal) fp->hdr->minVal = val;
364
- else if(val > fp->hdr->maxVal) fp->hdr->maxVal = val;
365
- fp->hdr->nBasesCovered += span;
366
- fp->hdr->sumData += span*val;
367
- fp->hdr->sumSquared += span*pow(val,2);
368
-
369
- fp->writeBuffer->nEntries++;
370
- fp->writeBuffer->runningWidthSum += span;
371
- }
372
-
373
- //12 bytes per entry
374
- int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n) {
375
- uint32_t tid = 0, i;
376
- char *lastChrom = NULL;
377
- bwWriteBuffer_t *wb = fp->writeBuffer;
378
- if(!n) return 0; //Not an error per se
379
- if(!fp->isWrite) return 1;
380
- if(!wb) return 2;
381
-
382
- //Flush if needed
383
- if(wb->ltype != 1) if(flushBuffer(fp)) return 3;
384
- if(wb->l+36 > fp->hdr->bufSize) if(flushBuffer(fp)) return 4;
385
- lastChrom = chrom[0];
386
- tid = bwGetTid(fp, chrom[0]);
387
- if(tid == (uint32_t) -1) return 5;
388
- if(tid != wb->tid) {
389
- if(flushBuffer(fp)) return 6;
390
- wb->tid = tid;
391
- wb->start = start[0];
392
- wb->end = end[0];
393
- }
394
-
395
- //Ensure that everything is set correctly
396
- wb->ltype = 1;
397
- if(wb->l <= 24) {
398
- wb->start = start[0];
399
- wb->span = 0;
400
- wb->step = 0;
401
- }
402
- if(!memcpy(wb->p+wb->l, start, sizeof(uint32_t))) return 7;
403
- if(!memcpy(wb->p+wb->l+4, end, sizeof(uint32_t))) return 8;
404
- if(!memcpy(wb->p+wb->l+8, values, sizeof(float))) return 9;
405
- updateStats(fp, end[0]-start[0], values[0]);
406
- wb->l += 12;
407
-
408
- for(i=1; i<n; i++) {
409
- if(strcmp(chrom[i],lastChrom) != 0) {
410
- wb->end = end[i-1];
411
- flushBuffer(fp);
412
- lastChrom = chrom[i];
413
- tid = bwGetTid(fp, chrom[i]);
414
- if(tid == (uint32_t) -1) return 10;
415
- wb->tid = tid;
416
- wb->start = start[i];
417
- }
418
- if(wb->l+12 > fp->hdr->bufSize) { //12 bytes/entry
419
- wb->end = end[i-1];
420
- flushBuffer(fp);
421
- wb->start = start[i];
422
- }
423
- if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 11;
424
- if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 12;
425
- if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 13;
426
- updateStats(fp, end[i]-start[i], values[i]);
427
- wb->l += 12;
428
- }
429
- wb->end = end[i-1];
430
-
431
- return 0;
432
- }
433
-
434
- int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n) {
435
- uint32_t i;
436
- bwWriteBuffer_t *wb = fp->writeBuffer;
437
- if(!n) return 0;
438
- if(!fp->isWrite) return 1;
439
- if(!wb) return 2;
440
- if(wb->ltype != 1) return 3;
441
-
442
- for(i=0; i<n; i++) {
443
- if(wb->l+12 > fp->hdr->bufSize) {
444
- if(i>0) { //otherwise it's already set
445
- wb->end = end[i-1];
446
- }
447
- flushBuffer(fp);
448
- wb->start = start[i];
449
- }
450
- if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
451
- if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 5;
452
- if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 6;
453
- updateStats(fp, end[i]-start[i], values[i]);
454
- wb->l += 12;
455
- }
456
- wb->end = end[i-1];
457
-
458
- return 0;
459
- }
460
-
461
- //8 bytes per entry
462
- int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n) {
463
- uint32_t i, tid;
464
- bwWriteBuffer_t *wb = fp->writeBuffer;
465
- if(!n) return 0;
466
- if(!fp->isWrite) return 1;
467
- if(!wb) return 2;
468
- if(wb->ltype != 2) if(flushBuffer(fp)) return 3;
469
- if(flushBuffer(fp)) return 4;
470
-
471
- tid = bwGetTid(fp, chrom);
472
- if(tid == (uint32_t) -1) return 5;
473
- wb->tid = tid;
474
- wb->start = start[0];
475
- wb->step = 0;
476
- wb->span = span;
477
- wb->ltype = 2;
478
-
479
- for(i=0; i<n; i++) {
480
- if(wb->l + 8 >= fp->hdr->bufSize) { //8 bytes/entry
481
- if(i) wb->end = start[i-1]+span;
482
- flushBuffer(fp);
483
- wb->start = start[i];
484
- }
485
- if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 5;
486
- if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 6;
487
- updateStats(fp, span, values[i]);
488
- wb->l += 8;
489
- }
490
- wb->end = start[n-1] + span;
491
-
492
- return 0;
493
- }
494
-
495
- int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n) {
496
- uint32_t i;
497
- bwWriteBuffer_t *wb = fp->writeBuffer;
498
- if(!n) return 0;
499
- if(!fp->isWrite) return 1;
500
- if(!wb) return 2;
501
- if(wb->ltype != 2) return 3;
502
-
503
- for(i=0; i<n; i++) {
504
- if(wb->l + 8 >= fp->hdr->bufSize) {
505
- if(i) wb->end = start[i-1]+wb->span;
506
- flushBuffer(fp);
507
- wb->start = start[i];
508
- }
509
- if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
510
- if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 5;
511
- updateStats(fp, wb->span, values[i]);
512
- wb->l += 8;
513
- }
514
- wb->end = start[n-1] + wb->span;
515
-
516
- return 0;
517
- }
518
-
519
- //4 bytes per entry
520
- int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n) {
521
- uint32_t i, tid;
522
- bwWriteBuffer_t *wb = fp->writeBuffer;
523
- if(!n) return 0;
524
- if(!fp->isWrite) return 1;
525
- if(!wb) return 2;
526
- if(wb->ltype != 3) flushBuffer(fp);
527
- if(flushBuffer(fp)) return 3;
528
-
529
- tid = bwGetTid(fp, chrom);
530
- if(tid == (uint32_t) -1) return 4;
531
- wb->tid = tid;
532
- wb->start = start;
533
- wb->step = step;
534
- wb->span = span;
535
- wb->ltype = 3;
536
-
537
- for(i=0; i<n; i++) {
538
- if(wb->l + 4 >= fp->hdr->bufSize) {
539
- wb->end = wb->start + ((wb->l-24)>>2) * step;
540
- flushBuffer(fp);
541
- wb->start = wb->end;
542
- }
543
- if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 5;
544
- updateStats(fp, wb->span, values[i]);
545
- wb->l += 4;
546
- }
547
- wb->end = wb->start + (wb->l>>2) * step;
548
-
549
- return 0;
550
- }
551
-
552
- int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n) {
553
- uint32_t i;
554
- bwWriteBuffer_t *wb = fp->writeBuffer;
555
- if(!n) return 0;
556
- if(!fp->isWrite) return 1;
557
- if(!wb) return 2;
558
- if(wb->ltype != 3) return 3;
559
-
560
- for(i=0; i<n; i++) {
561
- if(wb->l + 4 >= fp->hdr->bufSize) {
562
- wb->end = wb->start + ((wb->l-24)>>2) * wb->step;
563
- flushBuffer(fp);
564
- wb->start = wb->end;
565
- }
566
- if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 4;
567
- updateStats(fp, wb->span, values[i]);
568
- wb->l += 4;
569
- }
570
- wb->end = wb->start + (wb->l>>2) * wb->step;
571
-
572
- return 0;
573
- }
574
-
575
- //0 on success
576
- int writeSummary(bigWigFile_t *fp) {
577
- if(writeAtPos(&(fp->hdr->nBasesCovered), sizeof(uint64_t), 1, fp->hdr->summaryOffset, fp->URL->x.fp)) return 1;
578
- if(writeAtPos(&(fp->hdr->minVal), sizeof(double), 1, fp->hdr->summaryOffset+8, fp->URL->x.fp)) return 2;
579
- if(writeAtPos(&(fp->hdr->maxVal), sizeof(double), 1, fp->hdr->summaryOffset+16, fp->URL->x.fp)) return 3;
580
- if(writeAtPos(&(fp->hdr->sumData), sizeof(double), 1, fp->hdr->summaryOffset+24, fp->URL->x.fp)) return 4;
581
- if(writeAtPos(&(fp->hdr->sumSquared), sizeof(double), 1, fp->hdr->summaryOffset+32, fp->URL->x.fp)) return 5;
582
- return 0;
583
- }
584
-
585
- static bwRTreeNode_t *makeEmptyNode(uint32_t blockSize) {
586
- bwRTreeNode_t *n = calloc(1, sizeof(bwRTreeNode_t));
587
- if(!n) return NULL;
588
-
589
- n->chrIdxStart = malloc(blockSize*sizeof(uint32_t));
590
- if(!n->chrIdxStart) goto error;
591
- n->baseStart = malloc(blockSize*sizeof(uint32_t));
592
- if(!n->baseStart) goto error;
593
- n->chrIdxEnd = malloc(blockSize*sizeof(uint32_t));
594
- if(!n->chrIdxEnd) goto error;
595
- n->baseEnd = malloc(blockSize*sizeof(uint32_t));
596
- if(!n->baseEnd) goto error;
597
- n->dataOffset = calloc(blockSize,sizeof(uint64_t)); //This MUST be 0 for node writing!
598
- if(!n->dataOffset) goto error;
599
- n->x.child = malloc(blockSize*sizeof(uint64_t));
600
- if(!n->x.child) goto error;
601
-
602
- return n;
603
-
604
- error:
605
- if(n->chrIdxStart) free(n->chrIdxStart);
606
- if(n->baseStart) free(n->baseStart);
607
- if(n->chrIdxEnd) free(n->chrIdxEnd);
608
- if(n->baseEnd) free(n->baseEnd);
609
- if(n->dataOffset) free(n->dataOffset);
610
- if(n->x.child) free(n->x.child);
611
- free(n);
612
- return NULL;
613
- }
614
-
615
- //Returns 0 on success. This doesn't attempt to clean up!
616
- static bwRTreeNode_t *addLeaves(bwLL **ll, uint64_t *sz, uint64_t toProcess, uint32_t blockSize) {
617
- uint32_t i;
618
- uint64_t foo;
619
- bwRTreeNode_t *n = makeEmptyNode(blockSize);
620
- if(!n) return NULL;
621
-
622
- if(toProcess <= blockSize) {
623
- for(i=0; i<toProcess; i++) {
624
- n->chrIdxStart[i] = (*ll)->node->chrIdxStart[0];
625
- n->baseStart[i] = (*ll)->node->baseStart[0];
626
- n->chrIdxEnd[i] = (*ll)->node->chrIdxEnd[(*ll)->node->nChildren-1];
627
- n->baseEnd[i] = (*ll)->node->baseEnd[(*ll)->node->nChildren-1];
628
- n->x.child[i] = (*ll)->node;
629
- *sz += 4 + 32*(*ll)->node->nChildren;
630
- *ll = (*ll)->next;
631
- n->nChildren++;
632
- }
633
- } else {
634
- for(i=0; i<blockSize; i++) {
635
- foo = ceil(((double) toProcess)/((double) blockSize-i));
636
- if(!ll) break;
637
- n->x.child[i] = addLeaves(ll, sz, foo, blockSize);
638
- if(!n->x.child[i]) goto error;
639
- n->chrIdxStart[i] = n->x.child[i]->chrIdxStart[0];
640
- n->baseStart[i] = n->x.child[i]->baseStart[0];
641
- n->chrIdxEnd[i] = n->x.child[i]->chrIdxEnd[n->x.child[i]->nChildren-1];
642
- n->baseEnd[i] = n->x.child[i]->baseEnd[n->x.child[i]->nChildren-1];
643
- n->nChildren++;
644
- toProcess -= foo;
645
- }
646
- }
647
-
648
- *sz += 4 + 24*n->nChildren;
649
- return n;
650
-
651
- error:
652
- bwDestroyIndexNode(n);
653
- return NULL;
654
- }
655
-
656
- //Returns 1 on error
657
- int writeIndexTreeNode(FILE *fp, bwRTreeNode_t *n, uint8_t *wrote, int level) {
658
- uint8_t one = 0;
659
- uint32_t i, j, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
660
-
661
- if(n->isLeaf) return 0;
662
-
663
- for(i=0; i<n->nChildren; i++) {
664
- if(n->dataOffset[i]) { //traverse into child
665
- if(n->isLeaf) return 0; //Only write leaves once!
666
- if(writeIndexTreeNode(fp, n->x.child[i], wrote, level+1)) return 1;
667
- } else {
668
- n->dataOffset[i] = ftell(fp);
669
- if(fwrite(&(n->x.child[i]->isLeaf), sizeof(uint8_t), 1, fp) != 1) return 1;
670
- if(fwrite(&one, sizeof(uint8_t), 1, fp) != 1) return 1; //one byte of padding
671
- if(fwrite(&(n->x.child[i]->nChildren), sizeof(uint16_t), 1, fp) != 1) return 1;
672
- for(j=0; j<n->x.child[i]->nChildren; j++) {
673
- vector[0] = n->x.child[i]->chrIdxStart[j];
674
- vector[1] = n->x.child[i]->baseStart[j];
675
- vector[2] = n->x.child[i]->chrIdxEnd[j];
676
- vector[3] = n->x.child[i]->baseEnd[j];
677
- if(n->x.child[i]->isLeaf) {
678
- //Include the offset and size
679
- if(fwrite(vector, sizeof(uint32_t), 4, fp) != 4) return 1;
680
- if(fwrite(&(n->x.child[i]->dataOffset[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
681
- if(fwrite(&(n->x.child[i]->x.size[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
682
- } else {
683
- if(fwrite(vector, sizeof(uint32_t), 6, fp) != 6) return 1;
684
- }
685
- }
686
- *wrote = 1;
687
- }
688
- }
689
-
690
- return 0;
691
- }
692
-
693
- //returns 1 on success
694
- int writeIndexOffsets(FILE *fp, bwRTreeNode_t *n, uint64_t offset) {
695
- uint32_t i;
696
-
697
- if(n->isLeaf) return 0;
698
- for(i=0; i<n->nChildren; i++) {
699
- if(writeIndexOffsets(fp, n->x.child[i], n->dataOffset[i])) return 1;
700
- if(writeAtPos(&(n->dataOffset[i]), sizeof(uint64_t), 1, offset+20+24*i, fp)) return 2;
701
- }
702
- return 0;
703
- }
704
-
705
- //Returns 0 on success
706
- int writeIndexTree(bigWigFile_t *fp) {
707
- uint64_t offset;
708
- uint8_t wrote = 0;
709
- int rv;
710
-
711
- while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->idx->root, &wrote, 0)) == 0) {
712
- if(!wrote) break;
713
- wrote = 0;
714
- }
715
-
716
- if(rv || wrote) return 1;
717
-
718
- //Save the file position
719
- offset = bwTell(fp);
720
-
721
- //Write the offsets
722
- if(writeIndexOffsets(fp->URL->x.fp, fp->idx->root, fp->idx->rootOffset)) return 2;
723
-
724
- //Move the file pointer back to the end
725
- bwSetPos(fp, offset);
726
-
727
- return 0;
728
- }
729
-
730
- //Returns 0 on success. The original state SHOULD be preserved on error
731
- int writeIndex(bigWigFile_t *fp) {
732
- uint32_t four = IDX_MAGIC;
733
- uint64_t idxSize = 0, foo;
734
- uint8_t one = 0;
735
- uint32_t i, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
736
- bwLL *ll = fp->writeBuffer->firstIndexNode, *p;
737
- bwRTreeNode_t *root = NULL;
738
-
739
- if(!fp->writeBuffer->nBlocks) return 0;
740
- fp->idx = malloc(sizeof(bwRTree_t));
741
- if(!fp->idx) return 2;
742
- fp->idx->root = root;
743
-
744
- //Update the file header to indicate the proper index position
745
- foo = bwTell(fp);
746
- if(writeAtPos(&foo, sizeof(uint64_t), 1, 0x18, fp->URL->x.fp)) return 3;
747
-
748
- //Make the tree
749
- if(ll == fp->writeBuffer->currentIndexNode) {
750
- root = ll->node;
751
- idxSize = 4 + 24*root->nChildren;
752
- } else {
753
- root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
754
- }
755
- if(!root) return 4;
756
- fp->idx->root = root;
757
-
758
- ll = fp->writeBuffer->firstIndexNode;
759
- while(ll) {
760
- p = ll->next;
761
- free(ll);
762
- ll=p;
763
- }
764
-
765
- //write the header
766
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 5;
767
- if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
768
- if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
769
- if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
770
- if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
771
- if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
772
- if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
773
- if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
774
- four = 1;
775
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
776
- four = 0;
777
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
778
- fp->idx->rootOffset = bwTell(fp);
779
-
780
- //Write the root node, since writeIndexTree writes the children and fills in the offset
781
- if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
782
- if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
783
- if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
784
- for(i=0; i<root->nChildren; i++) {
785
- vector[0] = root->chrIdxStart[i];
786
- vector[1] = root->baseStart[i];
787
- vector[2] = root->chrIdxEnd[i];
788
- vector[3] = root->baseEnd[i];
789
- if(root->isLeaf) {
790
- //Include the offset and size
791
- if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
792
- if(fwrite(&(root->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
793
- if(fwrite(&(root->x.size[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
794
- } else {
795
- root->dataOffset[i] = 0; //FIXME: Something upstream is setting this to impossible values (e.g., 0x21?!?!?)
796
- if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
797
- }
798
- }
799
-
800
- //Write each level
801
- if(writeIndexTree(fp)) return 23;
802
-
803
- return 0;
804
- }
805
-
806
- //The first zoom level has a resolution of 4x mean entry size
807
- //This may or may not produce the requested number of zoom levels
808
- int makeZoomLevels(bigWigFile_t *fp) {
809
- uint32_t meanBinSize, i;
810
- uint32_t multiplier = 4, zoom = 10, maxZoom = 0;
811
- uint16_t nLevels = 0;
812
-
813
- meanBinSize = ((double) fp->writeBuffer->runningWidthSum)/(fp->writeBuffer->nEntries);
814
- //In reality, one level is skipped
815
- meanBinSize *= 4;
816
- //N.B., we must ALWAYS check that the zoom doesn't overflow a uint32_t!
817
- if(((uint32_t)-1)>>2 < meanBinSize) return 0; //No zoom levels!
818
- if(meanBinSize*4 > zoom) zoom = multiplier*meanBinSize;
819
-
820
- fp->hdr->zoomHdrs = calloc(1, sizeof(bwZoomHdr_t));
821
- if(!fp->hdr->zoomHdrs) return 1;
822
- fp->hdr->zoomHdrs->level = malloc(fp->hdr->nLevels * sizeof(uint32_t));
823
- fp->hdr->zoomHdrs->dataOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
824
- fp->hdr->zoomHdrs->indexOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
825
- fp->hdr->zoomHdrs->idx = calloc(fp->hdr->nLevels, sizeof(bwRTree_t*));
826
- if(!fp->hdr->zoomHdrs->level) return 2;
827
- if(!fp->hdr->zoomHdrs->dataOffset) return 3;
828
- if(!fp->hdr->zoomHdrs->indexOffset) return 4;
829
- if(!fp->hdr->zoomHdrs->idx) return 5;
830
-
831
- //There's no point in having a zoom level larger than the largest chromosome
832
- //This will none the less allow at least one zoom level, which is generally needed for IGV et al.
833
- for(i=0; i<fp->cl->nKeys; i++) {
834
- if(fp->cl->len[i] > maxZoom) maxZoom = fp->cl->len[i];
835
- }
836
- if(zoom > maxZoom) zoom = maxZoom;
837
-
838
- for(i=0; i<fp->hdr->nLevels; i++) {
839
- if(zoom > maxZoom) break; //prevent absurdly large zoom levels
840
- fp->hdr->zoomHdrs->level[i] = zoom;
841
- nLevels++;
842
- if(((uint32_t)-1)/multiplier < zoom) break;
843
- zoom *= multiplier;
844
- }
845
- fp->hdr->nLevels = nLevels;
846
-
847
- fp->writeBuffer->firstZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
848
- if(!fp->writeBuffer->firstZoomBuffer) goto error;
849
- fp->writeBuffer->lastZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
850
- if(!fp->writeBuffer->lastZoomBuffer) goto error;
851
- fp->writeBuffer->nNodes = calloc(nLevels, sizeof(uint64_t));
852
-
853
- for(i=0; i<fp->hdr->nLevels; i++) {
854
- fp->writeBuffer->firstZoomBuffer[i] = calloc(1, sizeof(bwZoomBuffer_t));
855
- if(!fp->writeBuffer->firstZoomBuffer[i]) goto error;
856
- fp->writeBuffer->firstZoomBuffer[i]->p = calloc(fp->hdr->bufSize/32, 32);
857
- if(!fp->writeBuffer->firstZoomBuffer[i]->p) goto error;
858
- fp->writeBuffer->firstZoomBuffer[i]->m = fp->hdr->bufSize;
859
- ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[0] = 0;
860
- ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[1] = 0;
861
- ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->hdr->zoomHdrs->level[i];
862
- if(fp->hdr->zoomHdrs->level[i] > fp->cl->len[0]) ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->cl->len[0];
863
- fp->writeBuffer->lastZoomBuffer[i] = fp->writeBuffer->firstZoomBuffer[i];
864
- }
865
-
866
- return 0;
867
-
868
- error:
869
- if(fp->writeBuffer->firstZoomBuffer) {
870
- for(i=0; i<fp->hdr->nLevels; i++) {
871
- if(fp->writeBuffer->firstZoomBuffer[i]) {
872
- if(fp->writeBuffer->firstZoomBuffer[i]->p) free(fp->writeBuffer->firstZoomBuffer[i]->p);
873
- free(fp->writeBuffer->firstZoomBuffer[i]);
874
- }
875
- }
876
- free(fp->writeBuffer->firstZoomBuffer);
877
- }
878
- if(fp->writeBuffer->lastZoomBuffer) free(fp->writeBuffer->lastZoomBuffer);
879
- if(fp->writeBuffer->nNodes) free(fp->writeBuffer->lastZoomBuffer);
880
- return 6;
881
- }
882
-
883
- //Given an interval start, calculate the next one at a zoom level
884
- void nextPos(bigWigFile_t *fp, uint32_t size, uint32_t *pos, uint32_t desiredTid) {
885
- uint32_t *tid = pos;
886
- uint32_t *start = pos+1;
887
- uint32_t *end = pos+2;
888
- *start += size;
889
- if(*start >= fp->cl->len[*tid]) {
890
- (*start) = 0;
891
- (*tid)++;
892
- }
893
-
894
- //prevent needless iteration when changing chromosomes
895
- if(*tid < desiredTid) {
896
- *tid = desiredTid;
897
- *start = 0;
898
- }
899
-
900
- (*end) = *start+size;
901
- if(*end > fp->cl->len[*tid]) (*end) = fp->cl->len[*tid];
902
- }
903
-
904
- //Return the number of bases two intervals overlap
905
- uint32_t overlapsInterval(uint32_t tid0, uint32_t start0, uint32_t end0, uint32_t tid1, uint32_t start1, uint32_t end1) {
906
- if(tid0 != tid1) return 0;
907
- if(end0 <= start1) return 0;
908
- if(end1 <= start0) return 0;
909
- if(end0 <= end1) {
910
- if(start1 > start0) return end0-start1;
911
- return end0-start0;
912
- } else {
913
- if(start1 > start0) return end1-start1;
914
- return end1-start0;
915
- }
916
- }
917
-
918
- //Returns the number of bases of the interval written
919
- uint32_t updateInterval(bigWigFile_t *fp, bwZoomBuffer_t *buffer, double *sum, double *sumsq, uint32_t size, uint32_t tid, uint32_t start, uint32_t end, float value) {
920
- uint32_t *p2 = (uint32_t*) buffer->p;
921
- float *fp2 = (float*) p2;
922
- uint32_t rv = 0, offset = 0;
923
- if(!buffer) return 0;
924
- if(buffer->l+32 >= buffer->m) return 0;
925
-
926
- //Make sure that we don't overflow a uint32_t by adding some huge value to start
927
- if(start + size < start) size = ((uint32_t) -1) - start;
928
-
929
- if(buffer->l) {
930
- offset = buffer->l/32;
931
- } else {
932
- p2[0] = tid;
933
- p2[1] = start;
934
- if(start+size < end) p2[2] = start+size;
935
- else p2[2] = end;
936
- }
937
-
938
- //Do we have any overlap with the previously added interval?
939
- if(offset) {
940
- rv = overlapsInterval(p2[8*(offset-1)], p2[8*(offset-1)+1], p2[8*(offset-1)+1] + size, tid, start, end);
941
- if(rv) {
942
- p2[8*(offset-1)+2] = start + rv;
943
- p2[8*(offset-1)+3] += rv;
944
- if(fp2[8*(offset-1)+4] > value) fp2[8*(offset-1)+4] = value;
945
- if(fp2[8*(offset-1)+5] < value) fp2[8*(offset-1)+5] = value;
946
- *sum += rv*value;
947
- *sumsq += rv*pow(value, 2.0);
948
- return rv;
949
- } else {
950
- fp2[8*(offset-1)+6] = *sum;
951
- fp2[8*(offset-1)+7] = *sumsq;
952
- *sum = 0.0;
953
- *sumsq = 0.0;
954
- }
955
- }
956
-
957
- //If we move to a new interval then skip iterating over a bunch of obviously non-overlapping intervals
958
- if(offset && p2[8*offset+2] == 0) {
959
- p2[8*offset] = tid;
960
- p2[8*offset+1] = start;
961
- if(start+size < end) p2[8*offset+2] = start+size;
962
- else p2[8*offset+2] = end;
963
- //nextPos(fp, size, p2+8*offset, tid); //We can actually skip uncovered intervals
964
- }
965
-
966
- //Add a new entry
967
- while(!(rv = overlapsInterval(p2[8*offset], p2[8*offset+1], p2[8*offset+1] + size, tid, start, end))) {
968
- p2[8*offset] = tid;
969
- p2[8*offset+1] = start;
970
- if(start+size < end) p2[8*offset+2] = start+size;
971
- else p2[8*offset+2] = end;
972
- //nextPos(fp, size, p2+8*offset, tid);
973
- }
974
- p2[8*offset+3] = rv;
975
- fp2[8*offset+4] = value; //min
976
- fp2[8*offset+5] = value; //max
977
- *sum += rv * value;
978
- *sumsq += rv * pow(value,2.0);
979
- buffer->l += 32;
980
- return rv;
981
- }
982
-
983
- //Returns 0 on success
984
- int addIntervalValue(bigWigFile_t *fp, uint64_t *nEntries, double *sum, double *sumsq, bwZoomBuffer_t *buffer, uint32_t itemsPerSlot, uint32_t zoom, uint32_t tid, uint32_t start, uint32_t end, float value) {
985
- bwZoomBuffer_t *newBuffer = NULL;
986
- uint32_t rv;
987
-
988
- while(start < end) {
989
- rv = updateInterval(fp, buffer, sum, sumsq, zoom, tid, start, end, value);
990
- if(!rv) {
991
- //Allocate a new buffer
992
- newBuffer = calloc(1, sizeof(bwZoomBuffer_t));
993
- if(!newBuffer) return 1;
994
- newBuffer->p = calloc(itemsPerSlot, 32);
995
- if(!newBuffer->p) goto error;
996
- newBuffer->m = itemsPerSlot*32;
997
- memcpy(newBuffer->p, buffer->p+buffer->l-32, 4);
998
- memcpy(newBuffer->p+4, buffer->p+buffer->l-28, 4);
999
- ((uint32_t*) newBuffer->p)[2] = ((uint32_t*) newBuffer->p)[1] + zoom;
1000
- *sum = *sumsq = 0.0;
1001
- rv = updateInterval(fp, newBuffer, sum, sumsq, zoom, tid, start, end, value);
1002
- if(!rv) goto error;
1003
- buffer->next = newBuffer;
1004
- buffer = buffer->next;
1005
- *nEntries += 1;
1006
- }
1007
- start += rv;
1008
- }
1009
-
1010
- return 0;
1011
-
1012
- error:
1013
- if(newBuffer) {
1014
- if(newBuffer->m) free(newBuffer->p);
1015
- free(newBuffer);
1016
- }
1017
- return 2;
1018
- }
1019
-
1020
- //Get all of the intervals and add them to the appropriate zoomBuffer
1021
- int constructZoomLevels(bigWigFile_t *fp) {
1022
- bwOverlapIterator_t *it = NULL;
1023
- double *sum = NULL, *sumsq = NULL;
1024
- uint32_t i, j, k;
1025
-
1026
- sum = calloc(fp->hdr->nLevels, sizeof(double));
1027
- sumsq = calloc(fp->hdr->nLevels, sizeof(double));
1028
- if(!sum || !sumsq) goto error;
1029
-
1030
- for(i=0; i<fp->cl->nKeys; i++) {
1031
- it = bwOverlappingIntervalsIterator(fp, fp->cl->chrom[i], 0, fp->cl->len[i], 100000);
1032
- if(!it) goto error;
1033
- while(it->data != NULL){
1034
- for(j=0;j<it->intervals->l;j++){
1035
- for(k=0;k<fp->hdr->nLevels;k++){
1036
- if(addIntervalValue(fp, &(fp->writeBuffer->nNodes[k]), sum+k, sumsq+k, fp->writeBuffer->lastZoomBuffer[k], fp->hdr->bufSize/32, fp->hdr->zoomHdrs->level[k], i, it->intervals->start[j], it->intervals->end[j], it->intervals->value[j])) goto error;
1037
- while(fp->writeBuffer->lastZoomBuffer[k]->next) fp->writeBuffer->lastZoomBuffer[k] = fp->writeBuffer->lastZoomBuffer[k]->next;
1038
- }
1039
- }
1040
- it = bwIteratorNext(it);
1041
- }
1042
- bwIteratorDestroy(it);
1043
-
1044
- }
1045
-
1046
- //Make an index for each zoom level
1047
- for(i=0; i<fp->hdr->nLevels; i++) {
1048
- fp->hdr->zoomHdrs->idx[i] = calloc(1, sizeof(bwRTree_t));
1049
- if(!fp->hdr->zoomHdrs->idx[i]) return 1;
1050
- fp->hdr->zoomHdrs->idx[i]->blockSize = fp->writeBuffer->blockSize;
1051
- }
1052
-
1053
-
1054
- free(sum);
1055
- free(sumsq);
1056
-
1057
- return 0;
1058
-
1059
- error:
1060
- if(it) bwIteratorDestroy(it);
1061
- if(sum) free(sum);
1062
- if(sumsq) free(sumsq);
1063
- return 1;
1064
- }
1065
-
1066
- int writeZoomLevels(bigWigFile_t *fp) {
1067
- uint64_t offset1, offset2, idxSize = 0;
1068
- uint32_t i, j, four = 0, last, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0;
1069
- uint8_t wrote, one = 0;
1070
- uint16_t actualNLevels = 0;
1071
- int rv;
1072
- bwLL *ll, *p;
1073
- bwRTreeNode_t *root;
1074
- bwZoomBuffer_t *zb, *zb2;
1075
- bwWriteBuffer_t *wb = fp->writeBuffer;
1076
- uLongf sz;
1077
-
1078
- for(i=0; i<fp->hdr->nLevels; i++) {
1079
- if(i) {
1080
- //Is this a duplicate level?
1081
- if(fp->writeBuffer->nNodes[i] == fp->writeBuffer->nNodes[i-1]) break;
1082
- }
1083
- actualNLevels++;
1084
-
1085
- //reserve a uint32_t for the number of blocks
1086
- fp->hdr->zoomHdrs->dataOffset[i] = bwTell(fp);
1087
- fp->writeBuffer->nBlocks = 0;
1088
- fp->writeBuffer->l = 24;
1089
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
1090
- zb = fp->writeBuffer->firstZoomBuffer[i];
1091
- fp->writeBuffer->firstIndexNode = NULL;
1092
- fp->writeBuffer->currentIndexNode = NULL;
1093
- while(zb) {
1094
- sz = fp->hdr->bufSize;
1095
- if(compress(wb->compressP, &sz, zb->p, zb->l) != Z_OK) return 2;
1096
-
1097
- //write the data to disk
1098
- if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 3;
1099
-
1100
- //Add an entry into the index
1101
- last = (zb->l - 32)>>2;
1102
- if(addIndexEntry(fp, ((uint32_t*)zb->p)[0], ((uint32_t*)zb->p)[last], ((uint32_t*)zb->p)[1], ((uint32_t*)zb->p)[last+2], bwTell(fp)-sz, sz)) return 4;
1103
-
1104
- wb->nBlocks++;
1105
- wb->l = 24;
1106
- zb = zb->next;
1107
- }
1108
- if(writeAtPos(&(wb->nBlocks), sizeof(uint32_t), 1, fp->hdr->zoomHdrs->dataOffset[i], fp->URL->x.fp)) return 5;
1109
-
1110
- //Make the tree
1111
- ll = fp->writeBuffer->firstIndexNode;
1112
- if(ll == fp->writeBuffer->currentIndexNode) {
1113
- root = ll->node;
1114
- idxSize = 4 + 24*root->nChildren;
1115
- } else {
1116
- root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
1117
- }
1118
- if(!root) return 4;
1119
- fp->hdr->zoomHdrs->idx[i]->root = root;
1120
-
1121
- ll = fp->writeBuffer->firstIndexNode;
1122
- while(ll) {
1123
- p = ll->next;
1124
- free(ll);
1125
- ll=p;
1126
- }
1127
-
1128
-
1129
- //write the index
1130
- wrote = 0;
1131
- fp->hdr->zoomHdrs->indexOffset[i] = bwTell(fp);
1132
- four = IDX_MAGIC;
1133
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
1134
- root = fp->hdr->zoomHdrs->idx[i]->root;
1135
- if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
1136
- if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
1137
- if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
1138
- if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1139
- if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
1140
- if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
1141
- if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
1142
- four = fp->hdr->bufSize/32;
1143
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
1144
- four = 0;
1145
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
1146
- fp->hdr->zoomHdrs->idx[i]->rootOffset = bwTell(fp);
1147
-
1148
- //Write the root node, since writeIndexTree writes the children and fills in the offset
1149
- offset1 = bwTell(fp);
1150
- if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
1151
- if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
1152
- if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
1153
- for(j=0; j<root->nChildren; j++) {
1154
- vector[0] = root->chrIdxStart[j];
1155
- vector[1] = root->baseStart[j];
1156
- vector[2] = root->chrIdxEnd[j];
1157
- vector[3] = root->baseEnd[j];
1158
- if(root->isLeaf) {
1159
- //Include the offset and size
1160
- if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
1161
- if(fwrite(&(root->dataOffset[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
1162
- if(fwrite(&(root->x.size[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
1163
- } else {
1164
- if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
1165
- }
1166
- }
1167
-
1168
- while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->hdr->zoomHdrs->idx[i]->root, &wrote, 0)) == 0) {
1169
- if(!wrote) break;
1170
- wrote = 0;
1171
- }
1172
-
1173
- if(rv || wrote) return 6;
1174
-
1175
- //Save the file position
1176
- offset2 = bwTell(fp);
1177
-
1178
- //Write the offsets
1179
- if(writeIndexOffsets(fp->URL->x.fp, root, offset1)) return 2;
1180
-
1181
- //Move the file pointer back to the end
1182
- bwSetPos(fp, offset2);
1183
-
1184
-
1185
- //Free the linked list
1186
- zb = fp->writeBuffer->firstZoomBuffer[i];
1187
- while(zb) {
1188
- if(zb->p) free(zb->p);
1189
- zb2 = zb->next;
1190
- free(zb);
1191
- zb = zb2;
1192
- }
1193
- fp->writeBuffer->firstZoomBuffer[i] = NULL;
1194
- }
1195
-
1196
- //Free unused zoom levels
1197
- for(i=actualNLevels; i<fp->hdr->nLevels; i++) {
1198
- zb = fp->writeBuffer->firstZoomBuffer[i];
1199
- while(zb) {
1200
- if(zb->p) free(zb->p);
1201
- zb2 = zb->next;
1202
- free(zb);
1203
- zb = zb2;
1204
- }
1205
- fp->writeBuffer->firstZoomBuffer[i] = NULL;
1206
- }
1207
-
1208
- //Write the zoom headers to disk
1209
- offset1 = bwTell(fp);
1210
- if(bwSetPos(fp, 0x40)) return 7;
1211
- four = 0;
1212
- for(i=0; i<actualNLevels; i++) {
1213
- if(fwrite(&(fp->hdr->zoomHdrs->level[i]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
1214
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1215
- if(fwrite(&(fp->hdr->zoomHdrs->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 10;
1216
- if(fwrite(&(fp->hdr->zoomHdrs->indexOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 11;
1217
- }
1218
-
1219
- //Write the number of levels if needed
1220
- if(bwSetPos(fp, 0x6)) return 12;
1221
- if(fwrite(&actualNLevels, sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 13;
1222
-
1223
- if(bwSetPos(fp, offset1)) return 14;
1224
-
1225
- return 0;
1226
- }
1227
-
1228
- //0 on success
1229
- int bwFinalize(bigWigFile_t *fp) {
1230
- uint32_t four;
1231
- uint64_t offset;
1232
- if(!fp->isWrite) return 0;
1233
-
1234
- //Flush the buffer
1235
- if(flushBuffer(fp)) return 1; //Valgrind reports a problem here!
1236
-
1237
- //Update the data section with the number of blocks written
1238
- if(fp->hdr) {
1239
- if(writeAtPos(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->hdr->dataOffset, fp->URL->x.fp)) return 2;
1240
- } else {
1241
- //The header wasn't written!
1242
- return 1;
1243
- }
1244
-
1245
- //write the bufferSize
1246
- if(fp->hdr->bufSize) {
1247
- if(writeAtPos(&(fp->hdr->bufSize), sizeof(uint32_t), 1, 0x34, fp->URL->x.fp)) return 2;
1248
- }
1249
-
1250
- //write the summary information
1251
- if(writeSummary(fp)) return 3;
1252
-
1253
- //Convert the linked-list to a tree and write to disk
1254
- if(writeIndex(fp)) return 4;
1255
-
1256
- //Zoom level stuff here?
1257
- if(fp->hdr->nLevels && fp->writeBuffer->nBlocks) {
1258
- offset = bwTell(fp);
1259
- if(makeZoomLevels(fp)) return 5;
1260
- if(constructZoomLevels(fp)) return 6;
1261
- bwSetPos(fp, offset);
1262
- if(writeZoomLevels(fp)) return 7; //This write nLevels as well
1263
- }
1264
-
1265
- //write magic at the end of the file
1266
- four = BIGWIG_MAGIC;
1267
- if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1268
-
1269
- return 0;
1270
- }
1271
-
1272
- /*
1273
- data chunk:
1274
- uint64_t number of blocks (2 / 110851)
1275
- some blocks
1276
-
1277
- an uncompressed data block (24 byte header)
1278
- uint32_t Tid 0-4
1279
- uint32_t start 4-8
1280
- uint32_t end 8-12
1281
- uint32_t step 12-16
1282
- uint32_t span 16-20
1283
- uint8_t type 20
1284
- uint8_t padding
1285
- uint16_t nItems 22
1286
- nItems of:
1287
- type 1: //12 bytes
1288
- uint32_t start
1289
- uint32_t end
1290
- float value
1291
- type 2: //8 bytes
1292
- uint32_t start
1293
- float value
1294
- type 3: //4 bytes
1295
- float value
1296
-
1297
- data block index header
1298
- uint32_t magic
1299
- uint32_t blockSize (256 in the example) maximum number of children
1300
- uint64_t number of blocks (2 / 110851)
1301
- uint32_t startTid
1302
- uint32_t startPos
1303
- uint32_t endTid
1304
- uint32_t endPos
1305
- uint64_t index size? (0x1E7 / 0x1AF0401F) index address?
1306
- uint32_t itemsPerBlock (1 / 1) 1024 for zoom headers 1024 for zoom headers
1307
- uint32_t padding
1308
-
1309
- data block index node non-leaf (4 bytes + 24*nChildren)
1310
- uint8_t isLeaf
1311
- uint8_t padding
1312
- uint16_t nChildren (2, 256)
1313
- uint32_t startTid
1314
- uint32_t startPos
1315
- uint32_t endTid
1316
- uint32_t endPos
1317
- uint64_t dataOffset (0x1AF05853, 0x1AF07057)
1318
-
1319
- data block index node leaf (4 bytes + 32*nChildren)
1320
- uint8_t isLeaf
1321
- uint8_t padding
1322
- uint16_t nChildren (2)
1323
- uint32_t startTid
1324
- uint32_t startPos
1325
- uint32_t endTid
1326
- uint32_t endPos
1327
- uint64_t dataOffset (0x198, 0x1CF)
1328
- uint64_t dataSize (55, 24)
1329
-
1330
- zoom data block
1331
- uint32_t number of blocks (10519766)
1332
- some data blocks
1333
- */