bio-bigwig 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1333 @@
1
+ #include <limits.h>
2
+ #include <float.h>
3
+ #include <stdlib.h>
4
+ #include <string.h>
5
+ #include <math.h>
6
+ #include "bigWig.h"
7
+ #include "bwCommon.h"
8
+
9
+ /// @cond SKIP
10
+ struct val_t {
11
+ uint32_t tid;
12
+ uint32_t start;
13
+ uint32_t nBases;
14
+ float min, max, sum, sumsq;
15
+ double scalar;
16
+ struct val_t *next;
17
+ };
18
+ /// @endcond
19
+
20
+ //Create a chromList_t and attach it to a bigWigFile_t *. Returns NULL on error
21
+ //Note that chroms and lengths are duplicated, so you MUST free the input
22
+ chromList_t *bwCreateChromList(const char* const* chroms, const uint32_t *lengths, int64_t n) {
23
+ int64_t i = 0;
24
+ chromList_t *cl = calloc(1, sizeof(chromList_t));
25
+ if(!cl) return NULL;
26
+
27
+ cl->nKeys = n;
28
+ cl->chrom = malloc(sizeof(char*)*n);
29
+ cl->len = malloc(sizeof(uint32_t)*n);
30
+ if(!cl->chrom) goto error;
31
+ if(!cl->len) goto error;
32
+
33
+ for(i=0; i<n; i++) {
34
+ cl->len[i] = lengths[i];
35
+ cl->chrom[i] = bwStrdup(chroms[i]);
36
+ if(!cl->chrom[i]) goto error;
37
+ }
38
+
39
+ return cl;
40
+
41
+ error:
42
+ if(i) {
43
+ int64_t j;
44
+ for(j=0; j<i; j++) free(cl->chrom[j]);
45
+ }
46
+ if(cl) {
47
+ if(cl->chrom) free(cl->chrom);
48
+ if(cl->len) free(cl->len);
49
+ free(cl);
50
+ }
51
+ return NULL;
52
+ }
53
+
54
+ //If maxZooms == 0, then 0 is used (i.e., there are no zoom levels). If maxZooms < 0 or > 65535 then 10 is used.
55
+ //TODO allow changing bufSize and blockSize
56
+ int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms) {
57
+ if(!fp->isWrite) return 1;
58
+ bigWigHdr_t *hdr = calloc(1, sizeof(bigWigHdr_t));
59
+ if(!hdr) return 2;
60
+
61
+ hdr->version = 4;
62
+ if(maxZooms < 0 || maxZooms > 65535) {
63
+ hdr->nLevels = 10;
64
+ } else {
65
+ hdr->nLevels = maxZooms;
66
+ }
67
+
68
+ hdr->bufSize = 32768; //When the file is finalized this is reset if fp->writeBuffer->compressPsz is 0!
69
+ hdr->minVal = DBL_MAX;
70
+ hdr->maxVal = DBL_MIN;
71
+ fp->hdr = hdr;
72
+ fp->writeBuffer->blockSize = 64;
73
+
74
+ //Allocate the writeBuffer buffers
75
+ fp->writeBuffer->compressPsz = compressBound(hdr->bufSize);
76
+ fp->writeBuffer->compressP = malloc(fp->writeBuffer->compressPsz);
77
+ if(!fp->writeBuffer->compressP) return 3;
78
+ fp->writeBuffer->p = calloc(1,hdr->bufSize);
79
+ if(!fp->writeBuffer->p) return 4;
80
+
81
+ return 0;
82
+ }
83
+
84
+ //return 0 on success
85
+ static int writeAtPos(void *ptr, size_t sz, size_t nmemb, size_t pos, FILE *fp) {
86
+ size_t curpos = ftell(fp);
87
+ if(fseek(fp, pos, SEEK_SET)) return 1;
88
+ if(fwrite(ptr, sz, nmemb, fp) != nmemb) return 2;
89
+ if(fseek(fp, curpos, SEEK_SET)) return 3;
90
+ return 0;
91
+ }
92
+
93
+ //We lose keySize bytes on error
94
+ static int writeChromList(FILE *fp, chromList_t *cl) {
95
+ uint16_t k;
96
+ uint32_t j, magic = CIRTREE_MAGIC;
97
+ uint32_t nperblock = (cl->nKeys > 0x7FFF) ? 0x7FFF : cl->nKeys; //Items per leaf/non-leaf, there are no unsigned ints in java :(
98
+ uint32_t nblocks, keySize = 0, valSize = 8; //In theory valSize could be optimized, in practice that'd be annoying
99
+ uint64_t i, nonLeafEnd, leafSize, nextLeaf;
100
+ uint8_t eight;
101
+ int64_t i64;
102
+ char *chrom;
103
+ size_t l;
104
+
105
+ if(cl->nKeys > 1073676289) {
106
+ fprintf(stderr, "[writeChromList] Error: Currently only 1,073,676,289 contigs are supported. If you really need more then please post a request on github.\n");
107
+ return 1;
108
+ }
109
+ nblocks = cl->nKeys/nperblock;
110
+ nblocks += ((cl->nKeys % nperblock) > 0)?1:0;
111
+
112
+ for(i64=0; i64<cl->nKeys; i64++) {
113
+ l = strlen(cl->chrom[i64]);
114
+ if(l>keySize) keySize = l;
115
+ }
116
+ l--; //We don't null terminate strings, because schiess mich tot
117
+ chrom = calloc(keySize, sizeof(char));
118
+
119
+ //Write the root node of a largely pointless tree
120
+ if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 1;
121
+ if(fwrite(&nperblock, sizeof(uint32_t), 1, fp) != 1) return 2;
122
+ if(fwrite(&keySize, sizeof(uint32_t), 1, fp) != 1) return 3;
123
+ if(fwrite(&valSize, sizeof(uint32_t), 1, fp) != 1) return 4;
124
+ if(fwrite(&(cl->nKeys), sizeof(uint64_t), 1, fp) != 1) return 5;
125
+
126
+ //Padding?
127
+ i=0;
128
+ if(fwrite(&i, sizeof(uint64_t), 1, fp) != 1) return 6;
129
+
130
+ //Do we need a non-leaf node?
131
+ if(nblocks > 1) {
132
+ eight = 0;
133
+ if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 7;
134
+ if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 8; //padding
135
+ if(fwrite(&nblocks, sizeof(uint16_t), 1, fp) != 1) return 8;
136
+ nonLeafEnd = ftell(fp) + nperblock * (keySize + 8);
137
+ leafSize = nperblock * (keySize + 8) + 4;
138
+ for(i=0; i<nblocks; i++) { //Why yes, this is pointless
139
+ chrom = strncpy(chrom, cl->chrom[i * nperblock], keySize);
140
+ nextLeaf = nonLeafEnd + i * leafSize;
141
+ if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
142
+ if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
143
+ }
144
+ for(i=0; i<keySize; i++) chrom[i] = '\0';
145
+ nextLeaf = 0;
146
+ for(i=nblocks; i<nperblock; i++) {
147
+ if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
148
+ if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
149
+ }
150
+ }
151
+
152
+ //Write the leaves
153
+ nextLeaf = 0;
154
+ for(i=0, j=0; i<nblocks; i++) {
155
+ eight = 1;
156
+ if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 11;
157
+ eight = 0;
158
+ if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 12;
159
+ if(cl->nKeys - j < nperblock) {
160
+ k = cl->nKeys - j;
161
+ if(fwrite(&k, sizeof(uint16_t), 1, fp) != 1) return 13;
162
+ } else {
163
+ if(fwrite(&nperblock, sizeof(uint16_t), 1, fp) != 1) return 13;
164
+ }
165
+ for(k=0; k<nperblock; k++) {
166
+ if(j>=cl->nKeys) {
167
+ if(chrom[0]) {
168
+ for(l=0; l<keySize; l++) chrom[l] = '\0';
169
+ }
170
+ if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
171
+ if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 16;
172
+ } else {
173
+ chrom = strncpy(chrom, cl->chrom[j], keySize);
174
+ if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
175
+ if(fwrite(&j, sizeof(uint32_t), 1, fp) != 1) return 16;
176
+ if(fwrite(&(cl->len[j++]), sizeof(uint32_t), 1, fp) != 1) return 17;
177
+ }
178
+ }
179
+ }
180
+
181
+ free(chrom);
182
+ return 0;
183
+ }
184
+
185
+ //returns 0 on success
186
+ //Still need to fill in indexOffset
187
+ int bwWriteHdr(bigWigFile_t *bw) {
188
+ uint32_t magic = BIGWIG_MAGIC;
189
+ uint16_t two = 4;
190
+ FILE *fp;
191
+ const uint8_t pbuff[58] = {0}; // 58 bytes of nothing
192
+ const void *p = (const void *)&pbuff;
193
+ if(!bw->isWrite) return 1;
194
+
195
+ //The header itself, largely just reserving space...
196
+ fp = bw->URL->x.fp;
197
+ if(!fp) return 2;
198
+ if(fseek(fp, 0, SEEK_SET)) return 3;
199
+ if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 4;
200
+ if(fwrite(&two, sizeof(uint16_t), 1, fp) != 1) return 5;
201
+ if(fwrite(p, sizeof(uint8_t), 58, fp) != 58) return 6;
202
+
203
+ //Empty zoom headers
204
+ if(bw->hdr->nLevels) {
205
+ for(two=0; two<bw->hdr->nLevels; two++) {
206
+ if(fwrite(p, sizeof(uint8_t), 24, fp) != 24) return 9;
207
+ }
208
+ }
209
+
210
+ //Update summaryOffset and write an empty summary block
211
+ bw->hdr->summaryOffset = ftell(fp);
212
+ if(fwrite(p, sizeof(uint8_t), 40, fp) != 40) return 10;
213
+ if(writeAtPos(&(bw->hdr->summaryOffset), sizeof(uint64_t), 1, 0x2c, fp)) return 11;
214
+
215
+ //Write the chromosome list as a stupid freaking tree (because let's TREE ALL THE THINGS!!!)
216
+ bw->hdr->ctOffset = ftell(fp);
217
+ if(writeChromList(fp, bw->cl)) return 7;
218
+ if(writeAtPos(&(bw->hdr->ctOffset), sizeof(uint64_t), 1, 0x8, fp)) return 8;
219
+
220
+ //Update the dataOffset
221
+ bw->hdr->dataOffset = ftell(fp);
222
+ if(writeAtPos(&bw->hdr->dataOffset, sizeof(uint64_t), 1, 0x10, fp)) return 12;
223
+
224
+ //Save space for the number of blocks
225
+ if(fwrite(p, sizeof(uint8_t), 8, fp) != 8) return 13;
226
+
227
+ return 0;
228
+ }
229
+
230
+ static int insertIndexNode(bigWigFile_t *fp, bwRTreeNode_t *leaf) {
231
+ bwLL *l = malloc(sizeof(bwLL));
232
+ if(!l) return 1;
233
+ l->node = leaf;
234
+ l->next = NULL;
235
+
236
+ if(!fp->writeBuffer->firstIndexNode) {
237
+ fp->writeBuffer->firstIndexNode = l;
238
+ } else {
239
+ fp->writeBuffer->currentIndexNode->next = l;
240
+ }
241
+ fp->writeBuffer->currentIndexNode = l;
242
+ return 0;
243
+ }
244
+
245
+ //0 on success
246
+ static int appendIndexNodeEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
247
+ bwLL *n = fp->writeBuffer->currentIndexNode;
248
+ if(!n) return 1;
249
+ if(n->node->nChildren >= fp->writeBuffer->blockSize) return 2;
250
+
251
+ n->node->chrIdxStart[n->node->nChildren] = tid0;
252
+ n->node->baseStart[n->node->nChildren] = start;
253
+ n->node->chrIdxEnd[n->node->nChildren] = tid1;
254
+ n->node->baseEnd[n->node->nChildren] = end;
255
+ n->node->dataOffset[n->node->nChildren] = offset;
256
+ n->node->x.size[n->node->nChildren] = size;
257
+ n->node->nChildren++;
258
+ return 0;
259
+ }
260
+
261
+ //Returns 0 on success
262
+ static int addIndexEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
263
+ bwRTreeNode_t *node;
264
+
265
+ if(appendIndexNodeEntry(fp, tid0, tid1, start, end, offset, size)) {
266
+ //The last index node is full, we need to add a new one
267
+ node = calloc(1, sizeof(bwRTreeNode_t));
268
+ if(!node) return 1;
269
+
270
+ //Allocate and set the fields
271
+ node->isLeaf = 1;
272
+ node->nChildren = 1;
273
+ node->chrIdxStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
274
+ if(!node->chrIdxStart) goto error;
275
+ node->baseStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
276
+ if(!node->baseStart) goto error;
277
+ node->chrIdxEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
278
+ if(!node->chrIdxEnd) goto error;
279
+ node->baseEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
280
+ if(!node->baseEnd) goto error;
281
+ node->dataOffset = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
282
+ if(!node->dataOffset) goto error;
283
+ node->x.size = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
284
+ if(!node->x.size) goto error;
285
+
286
+ node->chrIdxStart[0] = tid0;
287
+ node->baseStart[0] = start;
288
+ node->chrIdxEnd[0] = tid1;
289
+ node->baseEnd[0] = end;
290
+ node->dataOffset[0] = offset;
291
+ node->x.size[0] = size;
292
+
293
+ if(insertIndexNode(fp, node)) goto error;
294
+ }
295
+
296
+ return 0;
297
+
298
+ error:
299
+ if(node->chrIdxStart) free(node->chrIdxStart);
300
+ if(node->baseStart) free(node->baseStart);
301
+ if(node->chrIdxEnd) free(node->chrIdxEnd);
302
+ if(node->baseEnd) free(node->baseEnd);
303
+ if(node->dataOffset) free(node->dataOffset);
304
+ if(node->x.size) free(node->x.size);
305
+ return 2;
306
+ }
307
+
308
+ /*
309
+ * TODO:
310
+ * The buffer size and compression sz need to be determined elsewhere (and p and compressP filled in!)
311
+ */
312
+ static int flushBuffer(bigWigFile_t *fp) {
313
+ bwWriteBuffer_t *wb = fp->writeBuffer;
314
+ uLongf sz = wb->compressPsz;
315
+ uint16_t nItems;
316
+ if(!fp->writeBuffer->l) return 0;
317
+ if(!wb->ltype) return 0;
318
+
319
+ //Fill in the header
320
+ if(!memcpy(wb->p, &(wb->tid), sizeof(uint32_t))) return 1;
321
+ if(!memcpy(wb->p+4, &(wb->start), sizeof(uint32_t))) return 2;
322
+ if(!memcpy(wb->p+8, &(wb->end), sizeof(uint32_t))) return 3;
323
+ if(!memcpy(wb->p+12, &(wb->step), sizeof(uint32_t))) return 4;
324
+ if(!memcpy(wb->p+16, &(wb->span), sizeof(uint32_t))) return 5;
325
+ if(!memcpy(wb->p+20, &(wb->ltype), sizeof(uint8_t))) return 6;
326
+ //1 byte padding
327
+ //Determine the number of items
328
+ switch(wb->ltype) {
329
+ case 1:
330
+ nItems = (wb->l-24)/12;
331
+ break;
332
+ case 2:
333
+ nItems = (wb->l-24)/8;
334
+ break;
335
+ case 3:
336
+ nItems = (wb->l-24)/4;
337
+ break;
338
+ default:
339
+ return 7;
340
+ }
341
+ if(!memcpy(wb->p+22, &nItems, sizeof(uint16_t))) return 8;
342
+
343
+ if(sz) {
344
+ //compress
345
+ if(compress(wb->compressP, &sz, wb->p, wb->l) != Z_OK) return 9;
346
+
347
+ //write the data to disk
348
+ if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 10;
349
+ } else {
350
+ sz = wb->l;
351
+ if(fwrite(wb->p, sizeof(uint8_t), wb->l, fp->URL->x.fp) != wb->l) return 10;
352
+ }
353
+
354
+ //Add an entry into the index
355
+ if(addIndexEntry(fp, wb->tid, wb->tid, wb->start, wb->end, bwTell(fp)-sz, sz)) return 11;
356
+
357
+ wb->nBlocks++;
358
+ wb->l = 24;
359
+ return 0;
360
+ }
361
+
362
+ static void updateStats(bigWigFile_t *fp, uint32_t span, float val) {
363
+ if(val < fp->hdr->minVal) fp->hdr->minVal = val;
364
+ else if(val > fp->hdr->maxVal) fp->hdr->maxVal = val;
365
+ fp->hdr->nBasesCovered += span;
366
+ fp->hdr->sumData += span*val;
367
+ fp->hdr->sumSquared += span*pow(val,2);
368
+
369
+ fp->writeBuffer->nEntries++;
370
+ fp->writeBuffer->runningWidthSum += span;
371
+ }
372
+
373
+ //12 bytes per entry
374
+ int bwAddIntervals(bigWigFile_t *fp, const char* const* chrom, const uint32_t *start, const uint32_t *end, const float *values, uint32_t n) {
375
+ uint32_t tid = 0, i;
376
+ const char *lastChrom = NULL;
377
+ bwWriteBuffer_t *wb = fp->writeBuffer;
378
+ if(!n) return 0; //Not an error per se
379
+ if(!fp->isWrite) return 1;
380
+ if(!wb) return 2;
381
+
382
+ //Flush if needed
383
+ if(wb->ltype != 1) if(flushBuffer(fp)) return 3;
384
+ if(wb->l+36 > fp->hdr->bufSize) if(flushBuffer(fp)) return 4;
385
+ lastChrom = chrom[0];
386
+ tid = bwGetTid(fp, chrom[0]);
387
+ if(tid == (uint32_t) -1) return 5;
388
+ if(tid != wb->tid) {
389
+ if(flushBuffer(fp)) return 6;
390
+ wb->tid = tid;
391
+ wb->start = start[0];
392
+ wb->end = end[0];
393
+ }
394
+
395
+ //Ensure that everything is set correctly
396
+ wb->ltype = 1;
397
+ if(wb->l <= 24) {
398
+ wb->start = start[0];
399
+ wb->span = 0;
400
+ wb->step = 0;
401
+ }
402
+ if(!memcpy(wb->p+wb->l, start, sizeof(uint32_t))) return 7;
403
+ if(!memcpy(wb->p+wb->l+4, end, sizeof(uint32_t))) return 8;
404
+ if(!memcpy(wb->p+wb->l+8, values, sizeof(float))) return 9;
405
+ updateStats(fp, end[0]-start[0], values[0]);
406
+ wb->l += 12;
407
+
408
+ for(i=1; i<n; i++) {
409
+ if(strcmp(chrom[i],lastChrom) != 0) {
410
+ wb->end = end[i-1];
411
+ flushBuffer(fp);
412
+ lastChrom = chrom[i];
413
+ tid = bwGetTid(fp, chrom[i]);
414
+ if(tid == (uint32_t) -1) return 10;
415
+ wb->tid = tid;
416
+ wb->start = start[i];
417
+ }
418
+ if(wb->l+12 > fp->hdr->bufSize) { //12 bytes/entry
419
+ wb->end = end[i-1];
420
+ flushBuffer(fp);
421
+ wb->start = start[i];
422
+ }
423
+ if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 11;
424
+ if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 12;
425
+ if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 13;
426
+ updateStats(fp, end[i]-start[i], values[i]);
427
+ wb->l += 12;
428
+ }
429
+ wb->end = end[i-1];
430
+
431
+ return 0;
432
+ }
433
+
434
+ int bwAppendIntervals(bigWigFile_t *fp, const uint32_t *start, const uint32_t *end, const float *values, uint32_t n) {
435
+ uint32_t i;
436
+ bwWriteBuffer_t *wb = fp->writeBuffer;
437
+ if(!n) return 0;
438
+ if(!fp->isWrite) return 1;
439
+ if(!wb) return 2;
440
+ if(wb->ltype != 1) return 3;
441
+
442
+ for(i=0; i<n; i++) {
443
+ if(wb->l+12 > fp->hdr->bufSize) {
444
+ if(i>0) { //otherwise it's already set
445
+ wb->end = end[i-1];
446
+ }
447
+ flushBuffer(fp);
448
+ wb->start = start[i];
449
+ }
450
+ if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
451
+ if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 5;
452
+ if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 6;
453
+ updateStats(fp, end[i]-start[i], values[i]);
454
+ wb->l += 12;
455
+ }
456
+ wb->end = end[i-1];
457
+
458
+ return 0;
459
+ }
460
+
461
+ //8 bytes per entry
462
+ int bwAddIntervalSpans(bigWigFile_t *fp, const char *chrom, const uint32_t *start, uint32_t span, const float *values, uint32_t n) {
463
+ uint32_t i, tid;
464
+ bwWriteBuffer_t *wb = fp->writeBuffer;
465
+ if(!n) return 0;
466
+ if(!fp->isWrite) return 1;
467
+ if(!wb) return 2;
468
+ if(wb->ltype != 2) if(flushBuffer(fp)) return 3;
469
+ if(flushBuffer(fp)) return 4;
470
+
471
+ tid = bwGetTid(fp, chrom);
472
+ if(tid == (uint32_t) -1) return 5;
473
+ wb->tid = tid;
474
+ wb->start = start[0];
475
+ wb->step = 0;
476
+ wb->span = span;
477
+ wb->ltype = 2;
478
+
479
+ for(i=0; i<n; i++) {
480
+ if(wb->l + 8 >= fp->hdr->bufSize) { //8 bytes/entry
481
+ if(i) wb->end = start[i-1]+span;
482
+ flushBuffer(fp);
483
+ wb->start = start[i];
484
+ }
485
+ if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 5;
486
+ if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 6;
487
+ updateStats(fp, span, values[i]);
488
+ wb->l += 8;
489
+ }
490
+ wb->end = start[n-1] + span;
491
+
492
+ return 0;
493
+ }
494
+
495
+ int bwAppendIntervalSpans(bigWigFile_t *fp, const uint32_t *start, const float *values, uint32_t n) {
496
+ uint32_t i;
497
+ bwWriteBuffer_t *wb = fp->writeBuffer;
498
+ if(!n) return 0;
499
+ if(!fp->isWrite) return 1;
500
+ if(!wb) return 2;
501
+ if(wb->ltype != 2) return 3;
502
+
503
+ for(i=0; i<n; i++) {
504
+ if(wb->l + 8 >= fp->hdr->bufSize) {
505
+ if(i) wb->end = start[i-1]+wb->span;
506
+ flushBuffer(fp);
507
+ wb->start = start[i];
508
+ }
509
+ if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
510
+ if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 5;
511
+ updateStats(fp, wb->span, values[i]);
512
+ wb->l += 8;
513
+ }
514
+ wb->end = start[n-1] + wb->span;
515
+
516
+ return 0;
517
+ }
518
+
519
+ //4 bytes per entry
520
+ int bwAddIntervalSpanSteps(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t span, uint32_t step, const float *values, uint32_t n) {
521
+ uint32_t i, tid;
522
+ bwWriteBuffer_t *wb = fp->writeBuffer;
523
+ if(!n) return 0;
524
+ if(!fp->isWrite) return 1;
525
+ if(!wb) return 2;
526
+ if(wb->ltype != 3) flushBuffer(fp);
527
+ if(flushBuffer(fp)) return 3;
528
+
529
+ tid = bwGetTid(fp, chrom);
530
+ if(tid == (uint32_t) -1) return 4;
531
+ wb->tid = tid;
532
+ wb->start = start;
533
+ wb->step = step;
534
+ wb->span = span;
535
+ wb->ltype = 3;
536
+
537
+ for(i=0; i<n; i++) {
538
+ if(wb->l + 4 >= fp->hdr->bufSize) {
539
+ wb->end = wb->start + ((wb->l-24)>>2) * step;
540
+ flushBuffer(fp);
541
+ wb->start = wb->end;
542
+ }
543
+ if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 5;
544
+ updateStats(fp, wb->span, values[i]);
545
+ wb->l += 4;
546
+ }
547
+ wb->end = wb->start + (wb->l>>2) * step;
548
+
549
+ return 0;
550
+ }
551
+
552
+ int bwAppendIntervalSpanSteps(bigWigFile_t *fp, const float *values, uint32_t n) {
553
+ uint32_t i;
554
+ bwWriteBuffer_t *wb = fp->writeBuffer;
555
+ if(!n) return 0;
556
+ if(!fp->isWrite) return 1;
557
+ if(!wb) return 2;
558
+ if(wb->ltype != 3) return 3;
559
+
560
+ for(i=0; i<n; i++) {
561
+ if(wb->l + 4 >= fp->hdr->bufSize) {
562
+ wb->end = wb->start + ((wb->l-24)>>2) * wb->step;
563
+ flushBuffer(fp);
564
+ wb->start = wb->end;
565
+ }
566
+ if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 4;
567
+ updateStats(fp, wb->span, values[i]);
568
+ wb->l += 4;
569
+ }
570
+ wb->end = wb->start + (wb->l>>2) * wb->step;
571
+
572
+ return 0;
573
+ }
574
+
575
+ //0 on success
576
+ int writeSummary(bigWigFile_t *fp) {
577
+ if(writeAtPos(&(fp->hdr->nBasesCovered), sizeof(uint64_t), 1, fp->hdr->summaryOffset, fp->URL->x.fp)) return 1;
578
+ if(writeAtPos(&(fp->hdr->minVal), sizeof(double), 1, fp->hdr->summaryOffset+8, fp->URL->x.fp)) return 2;
579
+ if(writeAtPos(&(fp->hdr->maxVal), sizeof(double), 1, fp->hdr->summaryOffset+16, fp->URL->x.fp)) return 3;
580
+ if(writeAtPos(&(fp->hdr->sumData), sizeof(double), 1, fp->hdr->summaryOffset+24, fp->URL->x.fp)) return 4;
581
+ if(writeAtPos(&(fp->hdr->sumSquared), sizeof(double), 1, fp->hdr->summaryOffset+32, fp->URL->x.fp)) return 5;
582
+ return 0;
583
+ }
584
+
585
+ static bwRTreeNode_t *makeEmptyNode(uint32_t blockSize) {
586
+ bwRTreeNode_t *n = calloc(1, sizeof(bwRTreeNode_t));
587
+ if(!n) return NULL;
588
+
589
+ n->chrIdxStart = malloc(blockSize*sizeof(uint32_t));
590
+ if(!n->chrIdxStart) goto error;
591
+ n->baseStart = malloc(blockSize*sizeof(uint32_t));
592
+ if(!n->baseStart) goto error;
593
+ n->chrIdxEnd = malloc(blockSize*sizeof(uint32_t));
594
+ if(!n->chrIdxEnd) goto error;
595
+ n->baseEnd = malloc(blockSize*sizeof(uint32_t));
596
+ if(!n->baseEnd) goto error;
597
+ n->dataOffset = calloc(blockSize,sizeof(uint64_t)); //This MUST be 0 for node writing!
598
+ if(!n->dataOffset) goto error;
599
+ n->x.child = malloc(blockSize*sizeof(uint64_t));
600
+ if(!n->x.child) goto error;
601
+
602
+ return n;
603
+
604
+ error:
605
+ if(n->chrIdxStart) free(n->chrIdxStart);
606
+ if(n->baseStart) free(n->baseStart);
607
+ if(n->chrIdxEnd) free(n->chrIdxEnd);
608
+ if(n->baseEnd) free(n->baseEnd);
609
+ if(n->dataOffset) free(n->dataOffset);
610
+ if(n->x.child) free(n->x.child);
611
+ free(n);
612
+ return NULL;
613
+ }
614
+
615
+ //Returns 0 on success. This doesn't attempt to clean up!
616
+ static bwRTreeNode_t *addLeaves(bwLL **ll, uint64_t *sz, uint64_t toProcess, uint32_t blockSize) {
617
+ uint32_t i;
618
+ uint64_t foo;
619
+ bwRTreeNode_t *n = makeEmptyNode(blockSize);
620
+ if(!n) return NULL;
621
+
622
+ if(toProcess <= blockSize) {
623
+ for(i=0; i<toProcess; i++) {
624
+ n->chrIdxStart[i] = (*ll)->node->chrIdxStart[0];
625
+ n->baseStart[i] = (*ll)->node->baseStart[0];
626
+ n->chrIdxEnd[i] = (*ll)->node->chrIdxEnd[(*ll)->node->nChildren-1];
627
+ n->baseEnd[i] = (*ll)->node->baseEnd[(*ll)->node->nChildren-1];
628
+ n->x.child[i] = (*ll)->node;
629
+ *sz += 4 + 32*(*ll)->node->nChildren;
630
+ *ll = (*ll)->next;
631
+ n->nChildren++;
632
+ }
633
+ } else {
634
+ for(i=0; i<blockSize; i++) {
635
+ foo = ceil(((double) toProcess)/((double) blockSize-i));
636
+ if(!ll) break;
637
+ n->x.child[i] = addLeaves(ll, sz, foo, blockSize);
638
+ if(!n->x.child[i]) goto error;
639
+ n->chrIdxStart[i] = n->x.child[i]->chrIdxStart[0];
640
+ n->baseStart[i] = n->x.child[i]->baseStart[0];
641
+ n->chrIdxEnd[i] = n->x.child[i]->chrIdxEnd[n->x.child[i]->nChildren-1];
642
+ n->baseEnd[i] = n->x.child[i]->baseEnd[n->x.child[i]->nChildren-1];
643
+ n->nChildren++;
644
+ toProcess -= foo;
645
+ }
646
+ }
647
+
648
+ *sz += 4 + 24*n->nChildren;
649
+ return n;
650
+
651
+ error:
652
+ bwDestroyIndexNode(n);
653
+ return NULL;
654
+ }
655
+
656
+ //Returns 1 on error
657
+ int writeIndexTreeNode(FILE *fp, bwRTreeNode_t *n, uint8_t *wrote, int level) {
658
+ uint8_t one = 0;
659
+ uint32_t i, j, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
660
+
661
+ if(n->isLeaf) return 0;
662
+
663
+ for(i=0; i<n->nChildren; i++) {
664
+ if(n->dataOffset[i]) { //traverse into child
665
+ if(n->isLeaf) return 0; //Only write leaves once!
666
+ if(writeIndexTreeNode(fp, n->x.child[i], wrote, level+1)) return 1;
667
+ } else {
668
+ n->dataOffset[i] = ftell(fp);
669
+ if(fwrite(&(n->x.child[i]->isLeaf), sizeof(uint8_t), 1, fp) != 1) return 1;
670
+ if(fwrite(&one, sizeof(uint8_t), 1, fp) != 1) return 1; //one byte of padding
671
+ if(fwrite(&(n->x.child[i]->nChildren), sizeof(uint16_t), 1, fp) != 1) return 1;
672
+ for(j=0; j<n->x.child[i]->nChildren; j++) {
673
+ vector[0] = n->x.child[i]->chrIdxStart[j];
674
+ vector[1] = n->x.child[i]->baseStart[j];
675
+ vector[2] = n->x.child[i]->chrIdxEnd[j];
676
+ vector[3] = n->x.child[i]->baseEnd[j];
677
+ if(n->x.child[i]->isLeaf) {
678
+ //Include the offset and size
679
+ if(fwrite(vector, sizeof(uint32_t), 4, fp) != 4) return 1;
680
+ if(fwrite(&(n->x.child[i]->dataOffset[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
681
+ if(fwrite(&(n->x.child[i]->x.size[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
682
+ } else {
683
+ if(fwrite(vector, sizeof(uint32_t), 6, fp) != 6) return 1;
684
+ }
685
+ }
686
+ *wrote = 1;
687
+ }
688
+ }
689
+
690
+ return 0;
691
+ }
692
+
693
+ //returns 1 on success
694
+ int writeIndexOffsets(FILE *fp, bwRTreeNode_t *n, uint64_t offset) {
695
+ uint32_t i;
696
+
697
+ if(n->isLeaf) return 0;
698
+ for(i=0; i<n->nChildren; i++) {
699
+ if(writeIndexOffsets(fp, n->x.child[i], n->dataOffset[i])) return 1;
700
+ if(writeAtPos(&(n->dataOffset[i]), sizeof(uint64_t), 1, offset+20+24*i, fp)) return 2;
701
+ }
702
+ return 0;
703
+ }
704
+
705
+ //Returns 0 on success
706
+ int writeIndexTree(bigWigFile_t *fp) {
707
+ uint64_t offset;
708
+ uint8_t wrote = 0;
709
+ int rv;
710
+
711
+ while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->idx->root, &wrote, 0)) == 0) {
712
+ if(!wrote) break;
713
+ wrote = 0;
714
+ }
715
+
716
+ if(rv || wrote) return 1;
717
+
718
+ //Save the file position
719
+ offset = bwTell(fp);
720
+
721
+ //Write the offsets
722
+ if(writeIndexOffsets(fp->URL->x.fp, fp->idx->root, fp->idx->rootOffset)) return 2;
723
+
724
+ //Move the file pointer back to the end
725
+ bwSetPos(fp, offset);
726
+
727
+ return 0;
728
+ }
729
+
730
+ //Returns 0 on success. The original state SHOULD be preserved on error
731
+ int writeIndex(bigWigFile_t *fp) {
732
+ uint32_t four = IDX_MAGIC;
733
+ uint64_t idxSize = 0, foo;
734
+ uint8_t one = 0;
735
+ uint32_t i, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
736
+ bwLL *ll = fp->writeBuffer->firstIndexNode, *p;
737
+ bwRTreeNode_t *root = NULL;
738
+
739
+ if(!fp->writeBuffer->nBlocks) return 0;
740
+ fp->idx = malloc(sizeof(bwRTree_t));
741
+ if(!fp->idx) return 2;
742
+ fp->idx->root = root;
743
+
744
+ //Update the file header to indicate the proper index position
745
+ foo = bwTell(fp);
746
+ if(writeAtPos(&foo, sizeof(uint64_t), 1, 0x18, fp->URL->x.fp)) return 3;
747
+
748
+ //Make the tree
749
+ if(ll == fp->writeBuffer->currentIndexNode) {
750
+ root = ll->node;
751
+ idxSize = 4 + 24*root->nChildren;
752
+ } else {
753
+ root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
754
+ }
755
+ if(!root) return 4;
756
+ fp->idx->root = root;
757
+
758
+ ll = fp->writeBuffer->firstIndexNode;
759
+ while(ll) {
760
+ p = ll->next;
761
+ free(ll);
762
+ ll=p;
763
+ }
764
+
765
+ //write the header
766
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 5;
767
+ if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
768
+ if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
769
+ if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
770
+ if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
771
+ if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
772
+ if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
773
+ if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
774
+ four = 1;
775
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
776
+ four = 0;
777
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
778
+ fp->idx->rootOffset = bwTell(fp);
779
+
780
+ //Write the root node, since writeIndexTree writes the children and fills in the offset
781
+ if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
782
+ if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
783
+ if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
784
+ for(i=0; i<root->nChildren; i++) {
785
+ vector[0] = root->chrIdxStart[i];
786
+ vector[1] = root->baseStart[i];
787
+ vector[2] = root->chrIdxEnd[i];
788
+ vector[3] = root->baseEnd[i];
789
+ if(root->isLeaf) {
790
+ //Include the offset and size
791
+ if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
792
+ if(fwrite(&(root->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
793
+ if(fwrite(&(root->x.size[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
794
+ } else {
795
+ root->dataOffset[i] = 0; //FIXME: Something upstream is setting this to impossible values (e.g., 0x21?!?!?)
796
+ if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
797
+ }
798
+ }
799
+
800
+ //Write each level
801
+ if(writeIndexTree(fp)) return 23;
802
+
803
+ return 0;
804
+ }
805
+
806
+ //The first zoom level has a resolution of 4x mean entry size
807
+ //This may or may not produce the requested number of zoom levels
808
+ int makeZoomLevels(bigWigFile_t *fp) {
809
+ uint32_t meanBinSize, i;
810
+ uint32_t multiplier = 4, zoom = 10, maxZoom = 0;
811
+ uint16_t nLevels = 0;
812
+
813
+ meanBinSize = ((double) fp->writeBuffer->runningWidthSum)/(fp->writeBuffer->nEntries);
814
+ //In reality, one level is skipped
815
+ meanBinSize *= 4;
816
+ //N.B., we must ALWAYS check that the zoom doesn't overflow a uint32_t!
817
+ if(((uint32_t)-1)>>2 < meanBinSize) return 0; //No zoom levels!
818
+ if(meanBinSize*4 > zoom) zoom = multiplier*meanBinSize;
819
+
820
+ fp->hdr->zoomHdrs = calloc(1, sizeof(bwZoomHdr_t));
821
+ if(!fp->hdr->zoomHdrs) return 1;
822
+ fp->hdr->zoomHdrs->level = malloc(fp->hdr->nLevels * sizeof(uint32_t));
823
+ fp->hdr->zoomHdrs->dataOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
824
+ fp->hdr->zoomHdrs->indexOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
825
+ fp->hdr->zoomHdrs->idx = calloc(fp->hdr->nLevels, sizeof(bwRTree_t*));
826
+ if(!fp->hdr->zoomHdrs->level) return 2;
827
+ if(!fp->hdr->zoomHdrs->dataOffset) return 3;
828
+ if(!fp->hdr->zoomHdrs->indexOffset) return 4;
829
+ if(!fp->hdr->zoomHdrs->idx) return 5;
830
+
831
+ //There's no point in having a zoom level larger than the largest chromosome
832
+ //This will none the less allow at least one zoom level, which is generally needed for IGV et al.
833
+ for(i=0; i<fp->cl->nKeys; i++) {
834
+ if(fp->cl->len[i] > maxZoom) maxZoom = fp->cl->len[i];
835
+ }
836
+ if(zoom > maxZoom) zoom = maxZoom;
837
+
838
+ for(i=0; i<fp->hdr->nLevels; i++) {
839
+ if(zoom > maxZoom) break; //prevent absurdly large zoom levels
840
+ fp->hdr->zoomHdrs->level[i] = zoom;
841
+ nLevels++;
842
+ if(((uint32_t)-1)/multiplier < zoom) break;
843
+ zoom *= multiplier;
844
+ }
845
+ fp->hdr->nLevels = nLevels;
846
+
847
+ fp->writeBuffer->firstZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
848
+ if(!fp->writeBuffer->firstZoomBuffer) goto error;
849
+ fp->writeBuffer->lastZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
850
+ if(!fp->writeBuffer->lastZoomBuffer) goto error;
851
+ fp->writeBuffer->nNodes = calloc(nLevels, sizeof(uint64_t));
852
+
853
+ for(i=0; i<fp->hdr->nLevels; i++) {
854
+ fp->writeBuffer->firstZoomBuffer[i] = calloc(1, sizeof(bwZoomBuffer_t));
855
+ if(!fp->writeBuffer->firstZoomBuffer[i]) goto error;
856
+ fp->writeBuffer->firstZoomBuffer[i]->p = calloc(fp->hdr->bufSize/32, 32);
857
+ if(!fp->writeBuffer->firstZoomBuffer[i]->p) goto error;
858
+ fp->writeBuffer->firstZoomBuffer[i]->m = fp->hdr->bufSize;
859
+ ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[0] = 0;
860
+ ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[1] = 0;
861
+ ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->hdr->zoomHdrs->level[i];
862
+ if(fp->hdr->zoomHdrs->level[i] > fp->cl->len[0]) ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->cl->len[0];
863
+ fp->writeBuffer->lastZoomBuffer[i] = fp->writeBuffer->firstZoomBuffer[i];
864
+ }
865
+
866
+ return 0;
867
+
868
+ error:
869
+ if(fp->writeBuffer->firstZoomBuffer) {
870
+ for(i=0; i<fp->hdr->nLevels; i++) {
871
+ if(fp->writeBuffer->firstZoomBuffer[i]) {
872
+ if(fp->writeBuffer->firstZoomBuffer[i]->p) free(fp->writeBuffer->firstZoomBuffer[i]->p);
873
+ free(fp->writeBuffer->firstZoomBuffer[i]);
874
+ }
875
+ }
876
+ free(fp->writeBuffer->firstZoomBuffer);
877
+ }
878
+ if(fp->writeBuffer->lastZoomBuffer) free(fp->writeBuffer->lastZoomBuffer);
879
+ if(fp->writeBuffer->nNodes) free(fp->writeBuffer->lastZoomBuffer);
880
+ return 6;
881
+ }
882
+
883
+ //Given an interval start, calculate the next one at a zoom level
884
+ void nextPos(bigWigFile_t *fp, uint32_t size, uint32_t *pos, uint32_t desiredTid) {
885
+ uint32_t *tid = pos;
886
+ uint32_t *start = pos+1;
887
+ uint32_t *end = pos+2;
888
+ *start += size;
889
+ if(*start >= fp->cl->len[*tid]) {
890
+ (*start) = 0;
891
+ (*tid)++;
892
+ }
893
+
894
+ //prevent needless iteration when changing chromosomes
895
+ if(*tid < desiredTid) {
896
+ *tid = desiredTid;
897
+ *start = 0;
898
+ }
899
+
900
+ (*end) = *start+size;
901
+ if(*end > fp->cl->len[*tid]) (*end) = fp->cl->len[*tid];
902
+ }
903
+
904
+ //Return the number of bases two intervals overlap
905
+ uint32_t overlapsInterval(uint32_t tid0, uint32_t start0, uint32_t end0, uint32_t tid1, uint32_t start1, uint32_t end1) {
906
+ if(tid0 != tid1) return 0;
907
+ if(end0 <= start1) return 0;
908
+ if(end1 <= start0) return 0;
909
+ if(end0 <= end1) {
910
+ if(start1 > start0) return end0-start1;
911
+ return end0-start0;
912
+ } else {
913
+ if(start1 > start0) return end1-start1;
914
+ return end1-start0;
915
+ }
916
+ }
917
+
918
+ //Returns the number of bases of the interval written
919
+ uint32_t updateInterval(bigWigFile_t *fp, bwZoomBuffer_t *buffer, double *sum, double *sumsq, uint32_t size, uint32_t tid, uint32_t start, uint32_t end, float value) {
920
+ uint32_t *p2 = (uint32_t*) buffer->p;
921
+ float *fp2 = (float*) p2;
922
+ uint32_t rv = 0, offset = 0;
923
+ if(!buffer) return 0;
924
+ if(buffer->l+32 >= buffer->m) return 0;
925
+
926
+ //Make sure that we don't overflow a uint32_t by adding some huge value to start
927
+ if(start + size < start) size = ((uint32_t) -1) - start;
928
+
929
+ if(buffer->l) {
930
+ offset = buffer->l/32;
931
+ } else {
932
+ p2[0] = tid;
933
+ p2[1] = start;
934
+ if(start+size < end) p2[2] = start+size;
935
+ else p2[2] = end;
936
+ }
937
+
938
+ //Do we have any overlap with the previously added interval?
939
+ if(offset) {
940
+ rv = overlapsInterval(p2[8*(offset-1)], p2[8*(offset-1)+1], p2[8*(offset-1)+1] + size, tid, start, end);
941
+ if(rv) {
942
+ p2[8*(offset-1)+2] = start + rv;
943
+ p2[8*(offset-1)+3] += rv;
944
+ if(fp2[8*(offset-1)+4] > value) fp2[8*(offset-1)+4] = value;
945
+ if(fp2[8*(offset-1)+5] < value) fp2[8*(offset-1)+5] = value;
946
+ *sum += rv*value;
947
+ *sumsq += rv*pow(value, 2.0);
948
+ return rv;
949
+ } else {
950
+ fp2[8*(offset-1)+6] = *sum;
951
+ fp2[8*(offset-1)+7] = *sumsq;
952
+ *sum = 0.0;
953
+ *sumsq = 0.0;
954
+ }
955
+ }
956
+
957
+ //If we move to a new interval then skip iterating over a bunch of obviously non-overlapping intervals
958
+ if(offset && p2[8*offset+2] == 0) {
959
+ p2[8*offset] = tid;
960
+ p2[8*offset+1] = start;
961
+ if(start+size < end) p2[8*offset+2] = start+size;
962
+ else p2[8*offset+2] = end;
963
+ //nextPos(fp, size, p2+8*offset, tid); //We can actually skip uncovered intervals
964
+ }
965
+
966
+ //Add a new entry
967
+ while(!(rv = overlapsInterval(p2[8*offset], p2[8*offset+1], p2[8*offset+1] + size, tid, start, end))) {
968
+ p2[8*offset] = tid;
969
+ p2[8*offset+1] = start;
970
+ if(start+size < end) p2[8*offset+2] = start+size;
971
+ else p2[8*offset+2] = end;
972
+ //nextPos(fp, size, p2+8*offset, tid);
973
+ }
974
+ p2[8*offset+3] = rv;
975
+ fp2[8*offset+4] = value; //min
976
+ fp2[8*offset+5] = value; //max
977
+ *sum += rv * value;
978
+ *sumsq += rv * pow(value,2.0);
979
+ buffer->l += 32;
980
+ return rv;
981
+ }
982
+
983
+ //Returns 0 on success
984
+ int addIntervalValue(bigWigFile_t *fp, uint64_t *nEntries, double *sum, double *sumsq, bwZoomBuffer_t *buffer, uint32_t itemsPerSlot, uint32_t zoom, uint32_t tid, uint32_t start, uint32_t end, float value) {
985
+ bwZoomBuffer_t *newBuffer = NULL;
986
+ uint32_t rv;
987
+
988
+ while(start < end) {
989
+ rv = updateInterval(fp, buffer, sum, sumsq, zoom, tid, start, end, value);
990
+ if(!rv) {
991
+ //Allocate a new buffer
992
+ newBuffer = calloc(1, sizeof(bwZoomBuffer_t));
993
+ if(!newBuffer) return 1;
994
+ newBuffer->p = calloc(itemsPerSlot, 32);
995
+ if(!newBuffer->p) goto error;
996
+ newBuffer->m = itemsPerSlot*32;
997
+ memcpy(newBuffer->p, buffer->p+buffer->l-32, 4);
998
+ memcpy(newBuffer->p+4, buffer->p+buffer->l-28, 4);
999
+ ((uint32_t*) newBuffer->p)[2] = ((uint32_t*) newBuffer->p)[1] + zoom;
1000
+ *sum = *sumsq = 0.0;
1001
+ rv = updateInterval(fp, newBuffer, sum, sumsq, zoom, tid, start, end, value);
1002
+ if(!rv) goto error;
1003
+ buffer->next = newBuffer;
1004
+ buffer = buffer->next;
1005
+ *nEntries += 1;
1006
+ }
1007
+ start += rv;
1008
+ }
1009
+
1010
+ return 0;
1011
+
1012
+ error:
1013
+ if(newBuffer) {
1014
+ if(newBuffer->m) free(newBuffer->p);
1015
+ free(newBuffer);
1016
+ }
1017
+ return 2;
1018
+ }
1019
+
1020
+ //Get all of the intervals and add them to the appropriate zoomBuffer
1021
+ int constructZoomLevels(bigWigFile_t *fp) {
1022
+ bwOverlapIterator_t *it = NULL;
1023
+ double *sum = NULL, *sumsq = NULL;
1024
+ uint32_t i, j, k;
1025
+
1026
+ sum = calloc(fp->hdr->nLevels, sizeof(double));
1027
+ sumsq = calloc(fp->hdr->nLevels, sizeof(double));
1028
+ if(!sum || !sumsq) goto error;
1029
+
1030
+ for(i=0; i<fp->cl->nKeys; i++) {
1031
+ it = bwOverlappingIntervalsIterator(fp, fp->cl->chrom[i], 0, fp->cl->len[i], 100000);
1032
+ if(!it) goto error;
1033
+ while(it->data != NULL){
1034
+ for(j=0;j<it->intervals->l;j++){
1035
+ for(k=0;k<fp->hdr->nLevels;k++){
1036
+ if(addIntervalValue(fp, &(fp->writeBuffer->nNodes[k]), sum+k, sumsq+k, fp->writeBuffer->lastZoomBuffer[k], fp->hdr->bufSize/32, fp->hdr->zoomHdrs->level[k], i, it->intervals->start[j], it->intervals->end[j], it->intervals->value[j])) goto error;
1037
+ while(fp->writeBuffer->lastZoomBuffer[k]->next) fp->writeBuffer->lastZoomBuffer[k] = fp->writeBuffer->lastZoomBuffer[k]->next;
1038
+ }
1039
+ }
1040
+ it = bwIteratorNext(it);
1041
+ }
1042
+ bwIteratorDestroy(it);
1043
+
1044
+ }
1045
+
1046
+ //Make an index for each zoom level
1047
+ for(i=0; i<fp->hdr->nLevels; i++) {
1048
+ fp->hdr->zoomHdrs->idx[i] = calloc(1, sizeof(bwRTree_t));
1049
+ if(!fp->hdr->zoomHdrs->idx[i]) return 1;
1050
+ fp->hdr->zoomHdrs->idx[i]->blockSize = fp->writeBuffer->blockSize;
1051
+ }
1052
+
1053
+
1054
+ free(sum);
1055
+ free(sumsq);
1056
+
1057
+ return 0;
1058
+
1059
+ error:
1060
+ if(it) bwIteratorDestroy(it);
1061
+ if(sum) free(sum);
1062
+ if(sumsq) free(sumsq);
1063
+ return 1;
1064
+ }
1065
+
1066
+ int writeZoomLevels(bigWigFile_t *fp) {
1067
+ uint64_t offset1, offset2, idxSize = 0;
1068
+ uint32_t i, j, four = 0, last, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0;
1069
+ uint8_t wrote, one = 0;
1070
+ uint16_t actualNLevels = 0;
1071
+ int rv;
1072
+ bwLL *ll, *p;
1073
+ bwRTreeNode_t *root;
1074
+ bwZoomBuffer_t *zb, *zb2;
1075
+ bwWriteBuffer_t *wb = fp->writeBuffer;
1076
+ uLongf sz;
1077
+
1078
+ for(i=0; i<fp->hdr->nLevels; i++) {
1079
+ if(i) {
1080
+ //Is this a duplicate level?
1081
+ if(fp->writeBuffer->nNodes[i] == fp->writeBuffer->nNodes[i-1]) break;
1082
+ }
1083
+ actualNLevels++;
1084
+
1085
+ //reserve a uint32_t for the number of blocks
1086
+ fp->hdr->zoomHdrs->dataOffset[i] = bwTell(fp);
1087
+ fp->writeBuffer->nBlocks = 0;
1088
+ fp->writeBuffer->l = 24;
1089
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
1090
+ zb = fp->writeBuffer->firstZoomBuffer[i];
1091
+ fp->writeBuffer->firstIndexNode = NULL;
1092
+ fp->writeBuffer->currentIndexNode = NULL;
1093
+ while(zb) {
1094
+ sz = fp->hdr->bufSize;
1095
+ if(compress(wb->compressP, &sz, zb->p, zb->l) != Z_OK) return 2;
1096
+
1097
+ //write the data to disk
1098
+ if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 3;
1099
+
1100
+ //Add an entry into the index
1101
+ last = (zb->l - 32)>>2;
1102
+ if(addIndexEntry(fp, ((uint32_t*)zb->p)[0], ((uint32_t*)zb->p)[last], ((uint32_t*)zb->p)[1], ((uint32_t*)zb->p)[last+2], bwTell(fp)-sz, sz)) return 4;
1103
+
1104
+ wb->nBlocks++;
1105
+ wb->l = 24;
1106
+ zb = zb->next;
1107
+ }
1108
+ if(writeAtPos(&(wb->nBlocks), sizeof(uint32_t), 1, fp->hdr->zoomHdrs->dataOffset[i], fp->URL->x.fp)) return 5;
1109
+
1110
+ //Make the tree
1111
+ ll = fp->writeBuffer->firstIndexNode;
1112
+ if(ll == fp->writeBuffer->currentIndexNode) {
1113
+ root = ll->node;
1114
+ idxSize = 4 + 24*root->nChildren;
1115
+ } else {
1116
+ root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
1117
+ }
1118
+ if(!root) return 4;
1119
+ fp->hdr->zoomHdrs->idx[i]->root = root;
1120
+
1121
+ ll = fp->writeBuffer->firstIndexNode;
1122
+ while(ll) {
1123
+ p = ll->next;
1124
+ free(ll);
1125
+ ll=p;
1126
+ }
1127
+
1128
+
1129
+ //write the index
1130
+ wrote = 0;
1131
+ fp->hdr->zoomHdrs->indexOffset[i] = bwTell(fp);
1132
+ four = IDX_MAGIC;
1133
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
1134
+ root = fp->hdr->zoomHdrs->idx[i]->root;
1135
+ if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
1136
+ if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
1137
+ if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
1138
+ if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1139
+ if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
1140
+ if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
1141
+ if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
1142
+ four = fp->hdr->bufSize/32;
1143
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
1144
+ four = 0;
1145
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
1146
+ fp->hdr->zoomHdrs->idx[i]->rootOffset = bwTell(fp);
1147
+
1148
+ //Write the root node, since writeIndexTree writes the children and fills in the offset
1149
+ offset1 = bwTell(fp);
1150
+ if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
1151
+ if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
1152
+ if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
1153
+ for(j=0; j<root->nChildren; j++) {
1154
+ vector[0] = root->chrIdxStart[j];
1155
+ vector[1] = root->baseStart[j];
1156
+ vector[2] = root->chrIdxEnd[j];
1157
+ vector[3] = root->baseEnd[j];
1158
+ if(root->isLeaf) {
1159
+ //Include the offset and size
1160
+ if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
1161
+ if(fwrite(&(root->dataOffset[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
1162
+ if(fwrite(&(root->x.size[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
1163
+ } else {
1164
+ if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
1165
+ }
1166
+ }
1167
+
1168
+ while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->hdr->zoomHdrs->idx[i]->root, &wrote, 0)) == 0) {
1169
+ if(!wrote) break;
1170
+ wrote = 0;
1171
+ }
1172
+
1173
+ if(rv || wrote) return 6;
1174
+
1175
+ //Save the file position
1176
+ offset2 = bwTell(fp);
1177
+
1178
+ //Write the offsets
1179
+ if(writeIndexOffsets(fp->URL->x.fp, root, offset1)) return 2;
1180
+
1181
+ //Move the file pointer back to the end
1182
+ bwSetPos(fp, offset2);
1183
+
1184
+
1185
+ //Free the linked list
1186
+ zb = fp->writeBuffer->firstZoomBuffer[i];
1187
+ while(zb) {
1188
+ if(zb->p) free(zb->p);
1189
+ zb2 = zb->next;
1190
+ free(zb);
1191
+ zb = zb2;
1192
+ }
1193
+ fp->writeBuffer->firstZoomBuffer[i] = NULL;
1194
+ }
1195
+
1196
+ //Free unused zoom levels
1197
+ for(i=actualNLevels; i<fp->hdr->nLevels; i++) {
1198
+ zb = fp->writeBuffer->firstZoomBuffer[i];
1199
+ while(zb) {
1200
+ if(zb->p) free(zb->p);
1201
+ zb2 = zb->next;
1202
+ free(zb);
1203
+ zb = zb2;
1204
+ }
1205
+ fp->writeBuffer->firstZoomBuffer[i] = NULL;
1206
+ }
1207
+
1208
+ //Write the zoom headers to disk
1209
+ offset1 = bwTell(fp);
1210
+ if(bwSetPos(fp, 0x40)) return 7;
1211
+ four = 0;
1212
+ for(i=0; i<actualNLevels; i++) {
1213
+ if(fwrite(&(fp->hdr->zoomHdrs->level[i]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
1214
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1215
+ if(fwrite(&(fp->hdr->zoomHdrs->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 10;
1216
+ if(fwrite(&(fp->hdr->zoomHdrs->indexOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 11;
1217
+ }
1218
+
1219
+ //Write the number of levels if needed
1220
+ if(bwSetPos(fp, 0x6)) return 12;
1221
+ if(fwrite(&actualNLevels, sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 13;
1222
+
1223
+ if(bwSetPos(fp, offset1)) return 14;
1224
+
1225
+ return 0;
1226
+ }
1227
+
1228
+ //0 on success
1229
+ int bwFinalize(bigWigFile_t *fp) {
1230
+ uint32_t four;
1231
+ uint64_t offset;
1232
+ if(!fp->isWrite) return 0;
1233
+
1234
+ //Flush the buffer
1235
+ if(flushBuffer(fp)) return 1; //Valgrind reports a problem here!
1236
+
1237
+ //Update the data section with the number of blocks written
1238
+ if(fp->hdr) {
1239
+ if(writeAtPos(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->hdr->dataOffset, fp->URL->x.fp)) return 2;
1240
+ } else {
1241
+ //The header wasn't written!
1242
+ return 1;
1243
+ }
1244
+
1245
+ //write the bufferSize
1246
+ if(fp->hdr->bufSize) {
1247
+ if(writeAtPos(&(fp->hdr->bufSize), sizeof(uint32_t), 1, 0x34, fp->URL->x.fp)) return 2;
1248
+ }
1249
+
1250
+ //write the summary information
1251
+ if(writeSummary(fp)) return 3;
1252
+
1253
+ //Convert the linked-list to a tree and write to disk
1254
+ if(writeIndex(fp)) return 4;
1255
+
1256
+ //Zoom level stuff here?
1257
+ if(fp->hdr->nLevels && fp->writeBuffer->nBlocks) {
1258
+ offset = bwTell(fp);
1259
+ if(makeZoomLevels(fp)) return 5;
1260
+ if(constructZoomLevels(fp)) return 6;
1261
+ bwSetPos(fp, offset);
1262
+ if(writeZoomLevels(fp)) return 7; //This write nLevels as well
1263
+ }
1264
+
1265
+ //write magic at the end of the file
1266
+ four = BIGWIG_MAGIC;
1267
+ if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
1268
+
1269
+ return 0;
1270
+ }
1271
+
1272
+ /*
1273
+ data chunk:
1274
+ uint64_t number of blocks (2 / 110851)
1275
+ some blocks
1276
+
1277
+ an uncompressed data block (24 byte header)
1278
+ uint32_t Tid 0-4
1279
+ uint32_t start 4-8
1280
+ uint32_t end 8-12
1281
+ uint32_t step 12-16
1282
+ uint32_t span 16-20
1283
+ uint8_t type 20
1284
+ uint8_t padding
1285
+ uint16_t nItems 22
1286
+ nItems of:
1287
+ type 1: //12 bytes
1288
+ uint32_t start
1289
+ uint32_t end
1290
+ float value
1291
+ type 2: //8 bytes
1292
+ uint32_t start
1293
+ float value
1294
+ type 3: //4 bytes
1295
+ float value
1296
+
1297
+ data block index header
1298
+ uint32_t magic
1299
+ uint32_t blockSize (256 in the example) maximum number of children
1300
+ uint64_t number of blocks (2 / 110851)
1301
+ uint32_t startTid
1302
+ uint32_t startPos
1303
+ uint32_t endTid
1304
+ uint32_t endPos
1305
+ uint64_t index size? (0x1E7 / 0x1AF0401F) index address?
1306
+ uint32_t itemsPerBlock (1 / 1) 1024 for zoom headers 1024 for zoom headers
1307
+ uint32_t padding
1308
+
1309
+ data block index node non-leaf (4 bytes + 24*nChildren)
1310
+ uint8_t isLeaf
1311
+ uint8_t padding
1312
+ uint16_t nChildren (2, 256)
1313
+ uint32_t startTid
1314
+ uint32_t startPos
1315
+ uint32_t endTid
1316
+ uint32_t endPos
1317
+ uint64_t dataOffset (0x1AF05853, 0x1AF07057)
1318
+
1319
+ data block index node leaf (4 bytes + 32*nChildren)
1320
+ uint8_t isLeaf
1321
+ uint8_t padding
1322
+ uint16_t nChildren (2)
1323
+ uint32_t startTid
1324
+ uint32_t startPos
1325
+ uint32_t endTid
1326
+ uint32_t endPos
1327
+ uint64_t dataOffset (0x198, 0x1CF)
1328
+ uint64_t dataSize (55, 24)
1329
+
1330
+ zoom data block
1331
+ uint32_t number of blocks (10519766)
1332
+ some data blocks
1333
+ */