bio-bigwig 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/bio/bigwig/libBigWig/LICENSE +22 -0
- data/ext/bio/bigwig/libBigWig/bigWig.h +606 -0
- data/ext/bio/bigwig/libBigWig/bigWigIO.h +110 -0
- data/ext/bio/bigwig/libBigWig/bwCommon.h +74 -0
- data/ext/bio/bigwig/libBigWig/bwRead.c +438 -0
- data/ext/bio/bigwig/libBigWig/bwStats.c +537 -0
- data/ext/bio/bigwig/libBigWig/bwValues.c +803 -0
- data/ext/bio/bigwig/libBigWig/bwValues.h +77 -0
- data/ext/bio/bigwig/libBigWig/bwWrite.c +1333 -0
- data/ext/bio/bigwig/libBigWig/io.c +296 -0
- data/lib/bio/bigwig/version.rb +1 -1
- metadata +11 -1
@@ -0,0 +1,1333 @@
|
|
1
|
+
#include <limits.h>
|
2
|
+
#include <float.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <string.h>
|
5
|
+
#include <math.h>
|
6
|
+
#include "bigWig.h"
|
7
|
+
#include "bwCommon.h"
|
8
|
+
|
9
|
+
/// @cond SKIP
|
10
|
+
struct val_t {
|
11
|
+
uint32_t tid;
|
12
|
+
uint32_t start;
|
13
|
+
uint32_t nBases;
|
14
|
+
float min, max, sum, sumsq;
|
15
|
+
double scalar;
|
16
|
+
struct val_t *next;
|
17
|
+
};
|
18
|
+
/// @endcond
|
19
|
+
|
20
|
+
//Create a chromList_t and attach it to a bigWigFile_t *. Returns NULL on error
|
21
|
+
//Note that chroms and lengths are duplicated, so you MUST free the input
|
22
|
+
chromList_t *bwCreateChromList(const char* const* chroms, const uint32_t *lengths, int64_t n) {
|
23
|
+
int64_t i = 0;
|
24
|
+
chromList_t *cl = calloc(1, sizeof(chromList_t));
|
25
|
+
if(!cl) return NULL;
|
26
|
+
|
27
|
+
cl->nKeys = n;
|
28
|
+
cl->chrom = malloc(sizeof(char*)*n);
|
29
|
+
cl->len = malloc(sizeof(uint32_t)*n);
|
30
|
+
if(!cl->chrom) goto error;
|
31
|
+
if(!cl->len) goto error;
|
32
|
+
|
33
|
+
for(i=0; i<n; i++) {
|
34
|
+
cl->len[i] = lengths[i];
|
35
|
+
cl->chrom[i] = bwStrdup(chroms[i]);
|
36
|
+
if(!cl->chrom[i]) goto error;
|
37
|
+
}
|
38
|
+
|
39
|
+
return cl;
|
40
|
+
|
41
|
+
error:
|
42
|
+
if(i) {
|
43
|
+
int64_t j;
|
44
|
+
for(j=0; j<i; j++) free(cl->chrom[j]);
|
45
|
+
}
|
46
|
+
if(cl) {
|
47
|
+
if(cl->chrom) free(cl->chrom);
|
48
|
+
if(cl->len) free(cl->len);
|
49
|
+
free(cl);
|
50
|
+
}
|
51
|
+
return NULL;
|
52
|
+
}
|
53
|
+
|
54
|
+
//If maxZooms == 0, then 0 is used (i.e., there are no zoom levels). If maxZooms < 0 or > 65535 then 10 is used.
|
55
|
+
//TODO allow changing bufSize and blockSize
|
56
|
+
int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms) {
|
57
|
+
if(!fp->isWrite) return 1;
|
58
|
+
bigWigHdr_t *hdr = calloc(1, sizeof(bigWigHdr_t));
|
59
|
+
if(!hdr) return 2;
|
60
|
+
|
61
|
+
hdr->version = 4;
|
62
|
+
if(maxZooms < 0 || maxZooms > 65535) {
|
63
|
+
hdr->nLevels = 10;
|
64
|
+
} else {
|
65
|
+
hdr->nLevels = maxZooms;
|
66
|
+
}
|
67
|
+
|
68
|
+
hdr->bufSize = 32768; //When the file is finalized this is reset if fp->writeBuffer->compressPsz is 0!
|
69
|
+
hdr->minVal = DBL_MAX;
|
70
|
+
hdr->maxVal = DBL_MIN;
|
71
|
+
fp->hdr = hdr;
|
72
|
+
fp->writeBuffer->blockSize = 64;
|
73
|
+
|
74
|
+
//Allocate the writeBuffer buffers
|
75
|
+
fp->writeBuffer->compressPsz = compressBound(hdr->bufSize);
|
76
|
+
fp->writeBuffer->compressP = malloc(fp->writeBuffer->compressPsz);
|
77
|
+
if(!fp->writeBuffer->compressP) return 3;
|
78
|
+
fp->writeBuffer->p = calloc(1,hdr->bufSize);
|
79
|
+
if(!fp->writeBuffer->p) return 4;
|
80
|
+
|
81
|
+
return 0;
|
82
|
+
}
|
83
|
+
|
84
|
+
//return 0 on success
|
85
|
+
static int writeAtPos(void *ptr, size_t sz, size_t nmemb, size_t pos, FILE *fp) {
|
86
|
+
size_t curpos = ftell(fp);
|
87
|
+
if(fseek(fp, pos, SEEK_SET)) return 1;
|
88
|
+
if(fwrite(ptr, sz, nmemb, fp) != nmemb) return 2;
|
89
|
+
if(fseek(fp, curpos, SEEK_SET)) return 3;
|
90
|
+
return 0;
|
91
|
+
}
|
92
|
+
|
93
|
+
//We lose keySize bytes on error
|
94
|
+
static int writeChromList(FILE *fp, chromList_t *cl) {
|
95
|
+
uint16_t k;
|
96
|
+
uint32_t j, magic = CIRTREE_MAGIC;
|
97
|
+
uint32_t nperblock = (cl->nKeys > 0x7FFF) ? 0x7FFF : cl->nKeys; //Items per leaf/non-leaf, there are no unsigned ints in java :(
|
98
|
+
uint32_t nblocks, keySize = 0, valSize = 8; //In theory valSize could be optimized, in practice that'd be annoying
|
99
|
+
uint64_t i, nonLeafEnd, leafSize, nextLeaf;
|
100
|
+
uint8_t eight;
|
101
|
+
int64_t i64;
|
102
|
+
char *chrom;
|
103
|
+
size_t l;
|
104
|
+
|
105
|
+
if(cl->nKeys > 1073676289) {
|
106
|
+
fprintf(stderr, "[writeChromList] Error: Currently only 1,073,676,289 contigs are supported. If you really need more then please post a request on github.\n");
|
107
|
+
return 1;
|
108
|
+
}
|
109
|
+
nblocks = cl->nKeys/nperblock;
|
110
|
+
nblocks += ((cl->nKeys % nperblock) > 0)?1:0;
|
111
|
+
|
112
|
+
for(i64=0; i64<cl->nKeys; i64++) {
|
113
|
+
l = strlen(cl->chrom[i64]);
|
114
|
+
if(l>keySize) keySize = l;
|
115
|
+
}
|
116
|
+
l--; //We don't null terminate strings, because schiess mich tot
|
117
|
+
chrom = calloc(keySize, sizeof(char));
|
118
|
+
|
119
|
+
//Write the root node of a largely pointless tree
|
120
|
+
if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 1;
|
121
|
+
if(fwrite(&nperblock, sizeof(uint32_t), 1, fp) != 1) return 2;
|
122
|
+
if(fwrite(&keySize, sizeof(uint32_t), 1, fp) != 1) return 3;
|
123
|
+
if(fwrite(&valSize, sizeof(uint32_t), 1, fp) != 1) return 4;
|
124
|
+
if(fwrite(&(cl->nKeys), sizeof(uint64_t), 1, fp) != 1) return 5;
|
125
|
+
|
126
|
+
//Padding?
|
127
|
+
i=0;
|
128
|
+
if(fwrite(&i, sizeof(uint64_t), 1, fp) != 1) return 6;
|
129
|
+
|
130
|
+
//Do we need a non-leaf node?
|
131
|
+
if(nblocks > 1) {
|
132
|
+
eight = 0;
|
133
|
+
if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 7;
|
134
|
+
if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 8; //padding
|
135
|
+
if(fwrite(&nblocks, sizeof(uint16_t), 1, fp) != 1) return 8;
|
136
|
+
nonLeafEnd = ftell(fp) + nperblock * (keySize + 8);
|
137
|
+
leafSize = nperblock * (keySize + 8) + 4;
|
138
|
+
for(i=0; i<nblocks; i++) { //Why yes, this is pointless
|
139
|
+
chrom = strncpy(chrom, cl->chrom[i * nperblock], keySize);
|
140
|
+
nextLeaf = nonLeafEnd + i * leafSize;
|
141
|
+
if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
|
142
|
+
if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
|
143
|
+
}
|
144
|
+
for(i=0; i<keySize; i++) chrom[i] = '\0';
|
145
|
+
nextLeaf = 0;
|
146
|
+
for(i=nblocks; i<nperblock; i++) {
|
147
|
+
if(fwrite(chrom, keySize, 1, fp) != 1) return 9;
|
148
|
+
if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 10;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
//Write the leaves
|
153
|
+
nextLeaf = 0;
|
154
|
+
for(i=0, j=0; i<nblocks; i++) {
|
155
|
+
eight = 1;
|
156
|
+
if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 11;
|
157
|
+
eight = 0;
|
158
|
+
if(fwrite(&eight, sizeof(uint8_t), 1, fp) != 1) return 12;
|
159
|
+
if(cl->nKeys - j < nperblock) {
|
160
|
+
k = cl->nKeys - j;
|
161
|
+
if(fwrite(&k, sizeof(uint16_t), 1, fp) != 1) return 13;
|
162
|
+
} else {
|
163
|
+
if(fwrite(&nperblock, sizeof(uint16_t), 1, fp) != 1) return 13;
|
164
|
+
}
|
165
|
+
for(k=0; k<nperblock; k++) {
|
166
|
+
if(j>=cl->nKeys) {
|
167
|
+
if(chrom[0]) {
|
168
|
+
for(l=0; l<keySize; l++) chrom[l] = '\0';
|
169
|
+
}
|
170
|
+
if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
|
171
|
+
if(fwrite(&nextLeaf, sizeof(uint64_t), 1, fp) != 1) return 16;
|
172
|
+
} else {
|
173
|
+
chrom = strncpy(chrom, cl->chrom[j], keySize);
|
174
|
+
if(fwrite(chrom, keySize, 1, fp) != 1) return 15;
|
175
|
+
if(fwrite(&j, sizeof(uint32_t), 1, fp) != 1) return 16;
|
176
|
+
if(fwrite(&(cl->len[j++]), sizeof(uint32_t), 1, fp) != 1) return 17;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
|
181
|
+
free(chrom);
|
182
|
+
return 0;
|
183
|
+
}
|
184
|
+
|
185
|
+
//returns 0 on success
|
186
|
+
//Still need to fill in indexOffset
|
187
|
+
int bwWriteHdr(bigWigFile_t *bw) {
|
188
|
+
uint32_t magic = BIGWIG_MAGIC;
|
189
|
+
uint16_t two = 4;
|
190
|
+
FILE *fp;
|
191
|
+
const uint8_t pbuff[58] = {0}; // 58 bytes of nothing
|
192
|
+
const void *p = (const void *)&pbuff;
|
193
|
+
if(!bw->isWrite) return 1;
|
194
|
+
|
195
|
+
//The header itself, largely just reserving space...
|
196
|
+
fp = bw->URL->x.fp;
|
197
|
+
if(!fp) return 2;
|
198
|
+
if(fseek(fp, 0, SEEK_SET)) return 3;
|
199
|
+
if(fwrite(&magic, sizeof(uint32_t), 1, fp) != 1) return 4;
|
200
|
+
if(fwrite(&two, sizeof(uint16_t), 1, fp) != 1) return 5;
|
201
|
+
if(fwrite(p, sizeof(uint8_t), 58, fp) != 58) return 6;
|
202
|
+
|
203
|
+
//Empty zoom headers
|
204
|
+
if(bw->hdr->nLevels) {
|
205
|
+
for(two=0; two<bw->hdr->nLevels; two++) {
|
206
|
+
if(fwrite(p, sizeof(uint8_t), 24, fp) != 24) return 9;
|
207
|
+
}
|
208
|
+
}
|
209
|
+
|
210
|
+
//Update summaryOffset and write an empty summary block
|
211
|
+
bw->hdr->summaryOffset = ftell(fp);
|
212
|
+
if(fwrite(p, sizeof(uint8_t), 40, fp) != 40) return 10;
|
213
|
+
if(writeAtPos(&(bw->hdr->summaryOffset), sizeof(uint64_t), 1, 0x2c, fp)) return 11;
|
214
|
+
|
215
|
+
//Write the chromosome list as a stupid freaking tree (because let's TREE ALL THE THINGS!!!)
|
216
|
+
bw->hdr->ctOffset = ftell(fp);
|
217
|
+
if(writeChromList(fp, bw->cl)) return 7;
|
218
|
+
if(writeAtPos(&(bw->hdr->ctOffset), sizeof(uint64_t), 1, 0x8, fp)) return 8;
|
219
|
+
|
220
|
+
//Update the dataOffset
|
221
|
+
bw->hdr->dataOffset = ftell(fp);
|
222
|
+
if(writeAtPos(&bw->hdr->dataOffset, sizeof(uint64_t), 1, 0x10, fp)) return 12;
|
223
|
+
|
224
|
+
//Save space for the number of blocks
|
225
|
+
if(fwrite(p, sizeof(uint8_t), 8, fp) != 8) return 13;
|
226
|
+
|
227
|
+
return 0;
|
228
|
+
}
|
229
|
+
|
230
|
+
static int insertIndexNode(bigWigFile_t *fp, bwRTreeNode_t *leaf) {
|
231
|
+
bwLL *l = malloc(sizeof(bwLL));
|
232
|
+
if(!l) return 1;
|
233
|
+
l->node = leaf;
|
234
|
+
l->next = NULL;
|
235
|
+
|
236
|
+
if(!fp->writeBuffer->firstIndexNode) {
|
237
|
+
fp->writeBuffer->firstIndexNode = l;
|
238
|
+
} else {
|
239
|
+
fp->writeBuffer->currentIndexNode->next = l;
|
240
|
+
}
|
241
|
+
fp->writeBuffer->currentIndexNode = l;
|
242
|
+
return 0;
|
243
|
+
}
|
244
|
+
|
245
|
+
//0 on success
|
246
|
+
static int appendIndexNodeEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
|
247
|
+
bwLL *n = fp->writeBuffer->currentIndexNode;
|
248
|
+
if(!n) return 1;
|
249
|
+
if(n->node->nChildren >= fp->writeBuffer->blockSize) return 2;
|
250
|
+
|
251
|
+
n->node->chrIdxStart[n->node->nChildren] = tid0;
|
252
|
+
n->node->baseStart[n->node->nChildren] = start;
|
253
|
+
n->node->chrIdxEnd[n->node->nChildren] = tid1;
|
254
|
+
n->node->baseEnd[n->node->nChildren] = end;
|
255
|
+
n->node->dataOffset[n->node->nChildren] = offset;
|
256
|
+
n->node->x.size[n->node->nChildren] = size;
|
257
|
+
n->node->nChildren++;
|
258
|
+
return 0;
|
259
|
+
}
|
260
|
+
|
261
|
+
//Returns 0 on success
|
262
|
+
static int addIndexEntry(bigWigFile_t *fp, uint32_t tid0, uint32_t tid1, uint32_t start, uint32_t end, uint64_t offset, uint64_t size) {
|
263
|
+
bwRTreeNode_t *node;
|
264
|
+
|
265
|
+
if(appendIndexNodeEntry(fp, tid0, tid1, start, end, offset, size)) {
|
266
|
+
//The last index node is full, we need to add a new one
|
267
|
+
node = calloc(1, sizeof(bwRTreeNode_t));
|
268
|
+
if(!node) return 1;
|
269
|
+
|
270
|
+
//Allocate and set the fields
|
271
|
+
node->isLeaf = 1;
|
272
|
+
node->nChildren = 1;
|
273
|
+
node->chrIdxStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
|
274
|
+
if(!node->chrIdxStart) goto error;
|
275
|
+
node->baseStart = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
|
276
|
+
if(!node->baseStart) goto error;
|
277
|
+
node->chrIdxEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
|
278
|
+
if(!node->chrIdxEnd) goto error;
|
279
|
+
node->baseEnd = malloc(sizeof(uint32_t)*fp->writeBuffer->blockSize);
|
280
|
+
if(!node->baseEnd) goto error;
|
281
|
+
node->dataOffset = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
|
282
|
+
if(!node->dataOffset) goto error;
|
283
|
+
node->x.size = malloc(sizeof(uint64_t)*fp->writeBuffer->blockSize);
|
284
|
+
if(!node->x.size) goto error;
|
285
|
+
|
286
|
+
node->chrIdxStart[0] = tid0;
|
287
|
+
node->baseStart[0] = start;
|
288
|
+
node->chrIdxEnd[0] = tid1;
|
289
|
+
node->baseEnd[0] = end;
|
290
|
+
node->dataOffset[0] = offset;
|
291
|
+
node->x.size[0] = size;
|
292
|
+
|
293
|
+
if(insertIndexNode(fp, node)) goto error;
|
294
|
+
}
|
295
|
+
|
296
|
+
return 0;
|
297
|
+
|
298
|
+
error:
|
299
|
+
if(node->chrIdxStart) free(node->chrIdxStart);
|
300
|
+
if(node->baseStart) free(node->baseStart);
|
301
|
+
if(node->chrIdxEnd) free(node->chrIdxEnd);
|
302
|
+
if(node->baseEnd) free(node->baseEnd);
|
303
|
+
if(node->dataOffset) free(node->dataOffset);
|
304
|
+
if(node->x.size) free(node->x.size);
|
305
|
+
return 2;
|
306
|
+
}
|
307
|
+
|
308
|
+
/*
|
309
|
+
* TODO:
|
310
|
+
* The buffer size and compression sz need to be determined elsewhere (and p and compressP filled in!)
|
311
|
+
*/
|
312
|
+
static int flushBuffer(bigWigFile_t *fp) {
|
313
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
314
|
+
uLongf sz = wb->compressPsz;
|
315
|
+
uint16_t nItems;
|
316
|
+
if(!fp->writeBuffer->l) return 0;
|
317
|
+
if(!wb->ltype) return 0;
|
318
|
+
|
319
|
+
//Fill in the header
|
320
|
+
if(!memcpy(wb->p, &(wb->tid), sizeof(uint32_t))) return 1;
|
321
|
+
if(!memcpy(wb->p+4, &(wb->start), sizeof(uint32_t))) return 2;
|
322
|
+
if(!memcpy(wb->p+8, &(wb->end), sizeof(uint32_t))) return 3;
|
323
|
+
if(!memcpy(wb->p+12, &(wb->step), sizeof(uint32_t))) return 4;
|
324
|
+
if(!memcpy(wb->p+16, &(wb->span), sizeof(uint32_t))) return 5;
|
325
|
+
if(!memcpy(wb->p+20, &(wb->ltype), sizeof(uint8_t))) return 6;
|
326
|
+
//1 byte padding
|
327
|
+
//Determine the number of items
|
328
|
+
switch(wb->ltype) {
|
329
|
+
case 1:
|
330
|
+
nItems = (wb->l-24)/12;
|
331
|
+
break;
|
332
|
+
case 2:
|
333
|
+
nItems = (wb->l-24)/8;
|
334
|
+
break;
|
335
|
+
case 3:
|
336
|
+
nItems = (wb->l-24)/4;
|
337
|
+
break;
|
338
|
+
default:
|
339
|
+
return 7;
|
340
|
+
}
|
341
|
+
if(!memcpy(wb->p+22, &nItems, sizeof(uint16_t))) return 8;
|
342
|
+
|
343
|
+
if(sz) {
|
344
|
+
//compress
|
345
|
+
if(compress(wb->compressP, &sz, wb->p, wb->l) != Z_OK) return 9;
|
346
|
+
|
347
|
+
//write the data to disk
|
348
|
+
if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 10;
|
349
|
+
} else {
|
350
|
+
sz = wb->l;
|
351
|
+
if(fwrite(wb->p, sizeof(uint8_t), wb->l, fp->URL->x.fp) != wb->l) return 10;
|
352
|
+
}
|
353
|
+
|
354
|
+
//Add an entry into the index
|
355
|
+
if(addIndexEntry(fp, wb->tid, wb->tid, wb->start, wb->end, bwTell(fp)-sz, sz)) return 11;
|
356
|
+
|
357
|
+
wb->nBlocks++;
|
358
|
+
wb->l = 24;
|
359
|
+
return 0;
|
360
|
+
}
|
361
|
+
|
362
|
+
static void updateStats(bigWigFile_t *fp, uint32_t span, float val) {
|
363
|
+
if(val < fp->hdr->minVal) fp->hdr->minVal = val;
|
364
|
+
else if(val > fp->hdr->maxVal) fp->hdr->maxVal = val;
|
365
|
+
fp->hdr->nBasesCovered += span;
|
366
|
+
fp->hdr->sumData += span*val;
|
367
|
+
fp->hdr->sumSquared += span*pow(val,2);
|
368
|
+
|
369
|
+
fp->writeBuffer->nEntries++;
|
370
|
+
fp->writeBuffer->runningWidthSum += span;
|
371
|
+
}
|
372
|
+
|
373
|
+
//12 bytes per entry
|
374
|
+
int bwAddIntervals(bigWigFile_t *fp, const char* const* chrom, const uint32_t *start, const uint32_t *end, const float *values, uint32_t n) {
|
375
|
+
uint32_t tid = 0, i;
|
376
|
+
const char *lastChrom = NULL;
|
377
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
378
|
+
if(!n) return 0; //Not an error per se
|
379
|
+
if(!fp->isWrite) return 1;
|
380
|
+
if(!wb) return 2;
|
381
|
+
|
382
|
+
//Flush if needed
|
383
|
+
if(wb->ltype != 1) if(flushBuffer(fp)) return 3;
|
384
|
+
if(wb->l+36 > fp->hdr->bufSize) if(flushBuffer(fp)) return 4;
|
385
|
+
lastChrom = chrom[0];
|
386
|
+
tid = bwGetTid(fp, chrom[0]);
|
387
|
+
if(tid == (uint32_t) -1) return 5;
|
388
|
+
if(tid != wb->tid) {
|
389
|
+
if(flushBuffer(fp)) return 6;
|
390
|
+
wb->tid = tid;
|
391
|
+
wb->start = start[0];
|
392
|
+
wb->end = end[0];
|
393
|
+
}
|
394
|
+
|
395
|
+
//Ensure that everything is set correctly
|
396
|
+
wb->ltype = 1;
|
397
|
+
if(wb->l <= 24) {
|
398
|
+
wb->start = start[0];
|
399
|
+
wb->span = 0;
|
400
|
+
wb->step = 0;
|
401
|
+
}
|
402
|
+
if(!memcpy(wb->p+wb->l, start, sizeof(uint32_t))) return 7;
|
403
|
+
if(!memcpy(wb->p+wb->l+4, end, sizeof(uint32_t))) return 8;
|
404
|
+
if(!memcpy(wb->p+wb->l+8, values, sizeof(float))) return 9;
|
405
|
+
updateStats(fp, end[0]-start[0], values[0]);
|
406
|
+
wb->l += 12;
|
407
|
+
|
408
|
+
for(i=1; i<n; i++) {
|
409
|
+
if(strcmp(chrom[i],lastChrom) != 0) {
|
410
|
+
wb->end = end[i-1];
|
411
|
+
flushBuffer(fp);
|
412
|
+
lastChrom = chrom[i];
|
413
|
+
tid = bwGetTid(fp, chrom[i]);
|
414
|
+
if(tid == (uint32_t) -1) return 10;
|
415
|
+
wb->tid = tid;
|
416
|
+
wb->start = start[i];
|
417
|
+
}
|
418
|
+
if(wb->l+12 > fp->hdr->bufSize) { //12 bytes/entry
|
419
|
+
wb->end = end[i-1];
|
420
|
+
flushBuffer(fp);
|
421
|
+
wb->start = start[i];
|
422
|
+
}
|
423
|
+
if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 11;
|
424
|
+
if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 12;
|
425
|
+
if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 13;
|
426
|
+
updateStats(fp, end[i]-start[i], values[i]);
|
427
|
+
wb->l += 12;
|
428
|
+
}
|
429
|
+
wb->end = end[i-1];
|
430
|
+
|
431
|
+
return 0;
|
432
|
+
}
|
433
|
+
|
434
|
+
int bwAppendIntervals(bigWigFile_t *fp, const uint32_t *start, const uint32_t *end, const float *values, uint32_t n) {
|
435
|
+
uint32_t i;
|
436
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
437
|
+
if(!n) return 0;
|
438
|
+
if(!fp->isWrite) return 1;
|
439
|
+
if(!wb) return 2;
|
440
|
+
if(wb->ltype != 1) return 3;
|
441
|
+
|
442
|
+
for(i=0; i<n; i++) {
|
443
|
+
if(wb->l+12 > fp->hdr->bufSize) {
|
444
|
+
if(i>0) { //otherwise it's already set
|
445
|
+
wb->end = end[i-1];
|
446
|
+
}
|
447
|
+
flushBuffer(fp);
|
448
|
+
wb->start = start[i];
|
449
|
+
}
|
450
|
+
if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
|
451
|
+
if(!memcpy(wb->p+wb->l+4, &(end[i]), sizeof(uint32_t))) return 5;
|
452
|
+
if(!memcpy(wb->p+wb->l+8, &(values[i]), sizeof(float))) return 6;
|
453
|
+
updateStats(fp, end[i]-start[i], values[i]);
|
454
|
+
wb->l += 12;
|
455
|
+
}
|
456
|
+
wb->end = end[i-1];
|
457
|
+
|
458
|
+
return 0;
|
459
|
+
}
|
460
|
+
|
461
|
+
//8 bytes per entry
|
462
|
+
int bwAddIntervalSpans(bigWigFile_t *fp, const char *chrom, const uint32_t *start, uint32_t span, const float *values, uint32_t n) {
|
463
|
+
uint32_t i, tid;
|
464
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
465
|
+
if(!n) return 0;
|
466
|
+
if(!fp->isWrite) return 1;
|
467
|
+
if(!wb) return 2;
|
468
|
+
if(wb->ltype != 2) if(flushBuffer(fp)) return 3;
|
469
|
+
if(flushBuffer(fp)) return 4;
|
470
|
+
|
471
|
+
tid = bwGetTid(fp, chrom);
|
472
|
+
if(tid == (uint32_t) -1) return 5;
|
473
|
+
wb->tid = tid;
|
474
|
+
wb->start = start[0];
|
475
|
+
wb->step = 0;
|
476
|
+
wb->span = span;
|
477
|
+
wb->ltype = 2;
|
478
|
+
|
479
|
+
for(i=0; i<n; i++) {
|
480
|
+
if(wb->l + 8 >= fp->hdr->bufSize) { //8 bytes/entry
|
481
|
+
if(i) wb->end = start[i-1]+span;
|
482
|
+
flushBuffer(fp);
|
483
|
+
wb->start = start[i];
|
484
|
+
}
|
485
|
+
if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 5;
|
486
|
+
if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 6;
|
487
|
+
updateStats(fp, span, values[i]);
|
488
|
+
wb->l += 8;
|
489
|
+
}
|
490
|
+
wb->end = start[n-1] + span;
|
491
|
+
|
492
|
+
return 0;
|
493
|
+
}
|
494
|
+
|
495
|
+
int bwAppendIntervalSpans(bigWigFile_t *fp, const uint32_t *start, const float *values, uint32_t n) {
|
496
|
+
uint32_t i;
|
497
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
498
|
+
if(!n) return 0;
|
499
|
+
if(!fp->isWrite) return 1;
|
500
|
+
if(!wb) return 2;
|
501
|
+
if(wb->ltype != 2) return 3;
|
502
|
+
|
503
|
+
for(i=0; i<n; i++) {
|
504
|
+
if(wb->l + 8 >= fp->hdr->bufSize) {
|
505
|
+
if(i) wb->end = start[i-1]+wb->span;
|
506
|
+
flushBuffer(fp);
|
507
|
+
wb->start = start[i];
|
508
|
+
}
|
509
|
+
if(!memcpy(wb->p+wb->l, &(start[i]), sizeof(uint32_t))) return 4;
|
510
|
+
if(!memcpy(wb->p+wb->l+4, &(values[i]), sizeof(float))) return 5;
|
511
|
+
updateStats(fp, wb->span, values[i]);
|
512
|
+
wb->l += 8;
|
513
|
+
}
|
514
|
+
wb->end = start[n-1] + wb->span;
|
515
|
+
|
516
|
+
return 0;
|
517
|
+
}
|
518
|
+
|
519
|
+
//4 bytes per entry
|
520
|
+
int bwAddIntervalSpanSteps(bigWigFile_t *fp, const char *chrom, uint32_t start, uint32_t span, uint32_t step, const float *values, uint32_t n) {
|
521
|
+
uint32_t i, tid;
|
522
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
523
|
+
if(!n) return 0;
|
524
|
+
if(!fp->isWrite) return 1;
|
525
|
+
if(!wb) return 2;
|
526
|
+
if(wb->ltype != 3) flushBuffer(fp);
|
527
|
+
if(flushBuffer(fp)) return 3;
|
528
|
+
|
529
|
+
tid = bwGetTid(fp, chrom);
|
530
|
+
if(tid == (uint32_t) -1) return 4;
|
531
|
+
wb->tid = tid;
|
532
|
+
wb->start = start;
|
533
|
+
wb->step = step;
|
534
|
+
wb->span = span;
|
535
|
+
wb->ltype = 3;
|
536
|
+
|
537
|
+
for(i=0; i<n; i++) {
|
538
|
+
if(wb->l + 4 >= fp->hdr->bufSize) {
|
539
|
+
wb->end = wb->start + ((wb->l-24)>>2) * step;
|
540
|
+
flushBuffer(fp);
|
541
|
+
wb->start = wb->end;
|
542
|
+
}
|
543
|
+
if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 5;
|
544
|
+
updateStats(fp, wb->span, values[i]);
|
545
|
+
wb->l += 4;
|
546
|
+
}
|
547
|
+
wb->end = wb->start + (wb->l>>2) * step;
|
548
|
+
|
549
|
+
return 0;
|
550
|
+
}
|
551
|
+
|
552
|
+
int bwAppendIntervalSpanSteps(bigWigFile_t *fp, const float *values, uint32_t n) {
|
553
|
+
uint32_t i;
|
554
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
555
|
+
if(!n) return 0;
|
556
|
+
if(!fp->isWrite) return 1;
|
557
|
+
if(!wb) return 2;
|
558
|
+
if(wb->ltype != 3) return 3;
|
559
|
+
|
560
|
+
for(i=0; i<n; i++) {
|
561
|
+
if(wb->l + 4 >= fp->hdr->bufSize) {
|
562
|
+
wb->end = wb->start + ((wb->l-24)>>2) * wb->step;
|
563
|
+
flushBuffer(fp);
|
564
|
+
wb->start = wb->end;
|
565
|
+
}
|
566
|
+
if(!memcpy(wb->p+wb->l, &(values[i]), sizeof(float))) return 4;
|
567
|
+
updateStats(fp, wb->span, values[i]);
|
568
|
+
wb->l += 4;
|
569
|
+
}
|
570
|
+
wb->end = wb->start + (wb->l>>2) * wb->step;
|
571
|
+
|
572
|
+
return 0;
|
573
|
+
}
|
574
|
+
|
575
|
+
//0 on success
|
576
|
+
int writeSummary(bigWigFile_t *fp) {
|
577
|
+
if(writeAtPos(&(fp->hdr->nBasesCovered), sizeof(uint64_t), 1, fp->hdr->summaryOffset, fp->URL->x.fp)) return 1;
|
578
|
+
if(writeAtPos(&(fp->hdr->minVal), sizeof(double), 1, fp->hdr->summaryOffset+8, fp->URL->x.fp)) return 2;
|
579
|
+
if(writeAtPos(&(fp->hdr->maxVal), sizeof(double), 1, fp->hdr->summaryOffset+16, fp->URL->x.fp)) return 3;
|
580
|
+
if(writeAtPos(&(fp->hdr->sumData), sizeof(double), 1, fp->hdr->summaryOffset+24, fp->URL->x.fp)) return 4;
|
581
|
+
if(writeAtPos(&(fp->hdr->sumSquared), sizeof(double), 1, fp->hdr->summaryOffset+32, fp->URL->x.fp)) return 5;
|
582
|
+
return 0;
|
583
|
+
}
|
584
|
+
|
585
|
+
static bwRTreeNode_t *makeEmptyNode(uint32_t blockSize) {
|
586
|
+
bwRTreeNode_t *n = calloc(1, sizeof(bwRTreeNode_t));
|
587
|
+
if(!n) return NULL;
|
588
|
+
|
589
|
+
n->chrIdxStart = malloc(blockSize*sizeof(uint32_t));
|
590
|
+
if(!n->chrIdxStart) goto error;
|
591
|
+
n->baseStart = malloc(blockSize*sizeof(uint32_t));
|
592
|
+
if(!n->baseStart) goto error;
|
593
|
+
n->chrIdxEnd = malloc(blockSize*sizeof(uint32_t));
|
594
|
+
if(!n->chrIdxEnd) goto error;
|
595
|
+
n->baseEnd = malloc(blockSize*sizeof(uint32_t));
|
596
|
+
if(!n->baseEnd) goto error;
|
597
|
+
n->dataOffset = calloc(blockSize,sizeof(uint64_t)); //This MUST be 0 for node writing!
|
598
|
+
if(!n->dataOffset) goto error;
|
599
|
+
n->x.child = malloc(blockSize*sizeof(uint64_t));
|
600
|
+
if(!n->x.child) goto error;
|
601
|
+
|
602
|
+
return n;
|
603
|
+
|
604
|
+
error:
|
605
|
+
if(n->chrIdxStart) free(n->chrIdxStart);
|
606
|
+
if(n->baseStart) free(n->baseStart);
|
607
|
+
if(n->chrIdxEnd) free(n->chrIdxEnd);
|
608
|
+
if(n->baseEnd) free(n->baseEnd);
|
609
|
+
if(n->dataOffset) free(n->dataOffset);
|
610
|
+
if(n->x.child) free(n->x.child);
|
611
|
+
free(n);
|
612
|
+
return NULL;
|
613
|
+
}
|
614
|
+
|
615
|
+
//Returns 0 on success. This doesn't attempt to clean up!
|
616
|
+
static bwRTreeNode_t *addLeaves(bwLL **ll, uint64_t *sz, uint64_t toProcess, uint32_t blockSize) {
|
617
|
+
uint32_t i;
|
618
|
+
uint64_t foo;
|
619
|
+
bwRTreeNode_t *n = makeEmptyNode(blockSize);
|
620
|
+
if(!n) return NULL;
|
621
|
+
|
622
|
+
if(toProcess <= blockSize) {
|
623
|
+
for(i=0; i<toProcess; i++) {
|
624
|
+
n->chrIdxStart[i] = (*ll)->node->chrIdxStart[0];
|
625
|
+
n->baseStart[i] = (*ll)->node->baseStart[0];
|
626
|
+
n->chrIdxEnd[i] = (*ll)->node->chrIdxEnd[(*ll)->node->nChildren-1];
|
627
|
+
n->baseEnd[i] = (*ll)->node->baseEnd[(*ll)->node->nChildren-1];
|
628
|
+
n->x.child[i] = (*ll)->node;
|
629
|
+
*sz += 4 + 32*(*ll)->node->nChildren;
|
630
|
+
*ll = (*ll)->next;
|
631
|
+
n->nChildren++;
|
632
|
+
}
|
633
|
+
} else {
|
634
|
+
for(i=0; i<blockSize; i++) {
|
635
|
+
foo = ceil(((double) toProcess)/((double) blockSize-i));
|
636
|
+
if(!ll) break;
|
637
|
+
n->x.child[i] = addLeaves(ll, sz, foo, blockSize);
|
638
|
+
if(!n->x.child[i]) goto error;
|
639
|
+
n->chrIdxStart[i] = n->x.child[i]->chrIdxStart[0];
|
640
|
+
n->baseStart[i] = n->x.child[i]->baseStart[0];
|
641
|
+
n->chrIdxEnd[i] = n->x.child[i]->chrIdxEnd[n->x.child[i]->nChildren-1];
|
642
|
+
n->baseEnd[i] = n->x.child[i]->baseEnd[n->x.child[i]->nChildren-1];
|
643
|
+
n->nChildren++;
|
644
|
+
toProcess -= foo;
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
*sz += 4 + 24*n->nChildren;
|
649
|
+
return n;
|
650
|
+
|
651
|
+
error:
|
652
|
+
bwDestroyIndexNode(n);
|
653
|
+
return NULL;
|
654
|
+
}
|
655
|
+
|
656
|
+
//Returns 1 on error
|
657
|
+
int writeIndexTreeNode(FILE *fp, bwRTreeNode_t *n, uint8_t *wrote, int level) {
|
658
|
+
uint8_t one = 0;
|
659
|
+
uint32_t i, j, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
|
660
|
+
|
661
|
+
if(n->isLeaf) return 0;
|
662
|
+
|
663
|
+
for(i=0; i<n->nChildren; i++) {
|
664
|
+
if(n->dataOffset[i]) { //traverse into child
|
665
|
+
if(n->isLeaf) return 0; //Only write leaves once!
|
666
|
+
if(writeIndexTreeNode(fp, n->x.child[i], wrote, level+1)) return 1;
|
667
|
+
} else {
|
668
|
+
n->dataOffset[i] = ftell(fp);
|
669
|
+
if(fwrite(&(n->x.child[i]->isLeaf), sizeof(uint8_t), 1, fp) != 1) return 1;
|
670
|
+
if(fwrite(&one, sizeof(uint8_t), 1, fp) != 1) return 1; //one byte of padding
|
671
|
+
if(fwrite(&(n->x.child[i]->nChildren), sizeof(uint16_t), 1, fp) != 1) return 1;
|
672
|
+
for(j=0; j<n->x.child[i]->nChildren; j++) {
|
673
|
+
vector[0] = n->x.child[i]->chrIdxStart[j];
|
674
|
+
vector[1] = n->x.child[i]->baseStart[j];
|
675
|
+
vector[2] = n->x.child[i]->chrIdxEnd[j];
|
676
|
+
vector[3] = n->x.child[i]->baseEnd[j];
|
677
|
+
if(n->x.child[i]->isLeaf) {
|
678
|
+
//Include the offset and size
|
679
|
+
if(fwrite(vector, sizeof(uint32_t), 4, fp) != 4) return 1;
|
680
|
+
if(fwrite(&(n->x.child[i]->dataOffset[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
|
681
|
+
if(fwrite(&(n->x.child[i]->x.size[j]), sizeof(uint64_t), 1, fp) != 1) return 1;
|
682
|
+
} else {
|
683
|
+
if(fwrite(vector, sizeof(uint32_t), 6, fp) != 6) return 1;
|
684
|
+
}
|
685
|
+
}
|
686
|
+
*wrote = 1;
|
687
|
+
}
|
688
|
+
}
|
689
|
+
|
690
|
+
return 0;
|
691
|
+
}
|
692
|
+
|
693
|
+
//returns 1 on success
|
694
|
+
int writeIndexOffsets(FILE *fp, bwRTreeNode_t *n, uint64_t offset) {
|
695
|
+
uint32_t i;
|
696
|
+
|
697
|
+
if(n->isLeaf) return 0;
|
698
|
+
for(i=0; i<n->nChildren; i++) {
|
699
|
+
if(writeIndexOffsets(fp, n->x.child[i], n->dataOffset[i])) return 1;
|
700
|
+
if(writeAtPos(&(n->dataOffset[i]), sizeof(uint64_t), 1, offset+20+24*i, fp)) return 2;
|
701
|
+
}
|
702
|
+
return 0;
|
703
|
+
}
|
704
|
+
|
705
|
+
//Returns 0 on success
|
706
|
+
int writeIndexTree(bigWigFile_t *fp) {
|
707
|
+
uint64_t offset;
|
708
|
+
uint8_t wrote = 0;
|
709
|
+
int rv;
|
710
|
+
|
711
|
+
while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->idx->root, &wrote, 0)) == 0) {
|
712
|
+
if(!wrote) break;
|
713
|
+
wrote = 0;
|
714
|
+
}
|
715
|
+
|
716
|
+
if(rv || wrote) return 1;
|
717
|
+
|
718
|
+
//Save the file position
|
719
|
+
offset = bwTell(fp);
|
720
|
+
|
721
|
+
//Write the offsets
|
722
|
+
if(writeIndexOffsets(fp->URL->x.fp, fp->idx->root, fp->idx->rootOffset)) return 2;
|
723
|
+
|
724
|
+
//Move the file pointer back to the end
|
725
|
+
bwSetPos(fp, offset);
|
726
|
+
|
727
|
+
return 0;
|
728
|
+
}
|
729
|
+
|
730
|
+
//Returns 0 on success. The original state SHOULD be preserved on error
|
731
|
+
int writeIndex(bigWigFile_t *fp) {
|
732
|
+
uint32_t four = IDX_MAGIC;
|
733
|
+
uint64_t idxSize = 0, foo;
|
734
|
+
uint8_t one = 0;
|
735
|
+
uint32_t i, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0
|
736
|
+
bwLL *ll = fp->writeBuffer->firstIndexNode, *p;
|
737
|
+
bwRTreeNode_t *root = NULL;
|
738
|
+
|
739
|
+
if(!fp->writeBuffer->nBlocks) return 0;
|
740
|
+
fp->idx = malloc(sizeof(bwRTree_t));
|
741
|
+
if(!fp->idx) return 2;
|
742
|
+
fp->idx->root = root;
|
743
|
+
|
744
|
+
//Update the file header to indicate the proper index position
|
745
|
+
foo = bwTell(fp);
|
746
|
+
if(writeAtPos(&foo, sizeof(uint64_t), 1, 0x18, fp->URL->x.fp)) return 3;
|
747
|
+
|
748
|
+
//Make the tree
|
749
|
+
if(ll == fp->writeBuffer->currentIndexNode) {
|
750
|
+
root = ll->node;
|
751
|
+
idxSize = 4 + 24*root->nChildren;
|
752
|
+
} else {
|
753
|
+
root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
|
754
|
+
}
|
755
|
+
if(!root) return 4;
|
756
|
+
fp->idx->root = root;
|
757
|
+
|
758
|
+
ll = fp->writeBuffer->firstIndexNode;
|
759
|
+
while(ll) {
|
760
|
+
p = ll->next;
|
761
|
+
free(ll);
|
762
|
+
ll=p;
|
763
|
+
}
|
764
|
+
|
765
|
+
//write the header
|
766
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 5;
|
767
|
+
if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
|
768
|
+
if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
|
769
|
+
if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
|
770
|
+
if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
|
771
|
+
if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
|
772
|
+
if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
|
773
|
+
if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
|
774
|
+
four = 1;
|
775
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
|
776
|
+
four = 0;
|
777
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
|
778
|
+
fp->idx->rootOffset = bwTell(fp);
|
779
|
+
|
780
|
+
//Write the root node, since writeIndexTree writes the children and fills in the offset
|
781
|
+
if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
|
782
|
+
if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
|
783
|
+
if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
|
784
|
+
for(i=0; i<root->nChildren; i++) {
|
785
|
+
vector[0] = root->chrIdxStart[i];
|
786
|
+
vector[1] = root->baseStart[i];
|
787
|
+
vector[2] = root->chrIdxEnd[i];
|
788
|
+
vector[3] = root->baseEnd[i];
|
789
|
+
if(root->isLeaf) {
|
790
|
+
//Include the offset and size
|
791
|
+
if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
|
792
|
+
if(fwrite(&(root->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
|
793
|
+
if(fwrite(&(root->x.size[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
|
794
|
+
} else {
|
795
|
+
root->dataOffset[i] = 0; //FIXME: Something upstream is setting this to impossible values (e.g., 0x21?!?!?)
|
796
|
+
if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
|
797
|
+
}
|
798
|
+
}
|
799
|
+
|
800
|
+
//Write each level
|
801
|
+
if(writeIndexTree(fp)) return 23;
|
802
|
+
|
803
|
+
return 0;
|
804
|
+
}
|
805
|
+
|
806
|
+
//The first zoom level has a resolution of 4x mean entry size
|
807
|
+
//This may or may not produce the requested number of zoom levels
|
808
|
+
int makeZoomLevels(bigWigFile_t *fp) {
|
809
|
+
uint32_t meanBinSize, i;
|
810
|
+
uint32_t multiplier = 4, zoom = 10, maxZoom = 0;
|
811
|
+
uint16_t nLevels = 0;
|
812
|
+
|
813
|
+
meanBinSize = ((double) fp->writeBuffer->runningWidthSum)/(fp->writeBuffer->nEntries);
|
814
|
+
//In reality, one level is skipped
|
815
|
+
meanBinSize *= 4;
|
816
|
+
//N.B., we must ALWAYS check that the zoom doesn't overflow a uint32_t!
|
817
|
+
if(((uint32_t)-1)>>2 < meanBinSize) return 0; //No zoom levels!
|
818
|
+
if(meanBinSize*4 > zoom) zoom = multiplier*meanBinSize;
|
819
|
+
|
820
|
+
fp->hdr->zoomHdrs = calloc(1, sizeof(bwZoomHdr_t));
|
821
|
+
if(!fp->hdr->zoomHdrs) return 1;
|
822
|
+
fp->hdr->zoomHdrs->level = malloc(fp->hdr->nLevels * sizeof(uint32_t));
|
823
|
+
fp->hdr->zoomHdrs->dataOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
|
824
|
+
fp->hdr->zoomHdrs->indexOffset = calloc(fp->hdr->nLevels, sizeof(uint64_t));
|
825
|
+
fp->hdr->zoomHdrs->idx = calloc(fp->hdr->nLevels, sizeof(bwRTree_t*));
|
826
|
+
if(!fp->hdr->zoomHdrs->level) return 2;
|
827
|
+
if(!fp->hdr->zoomHdrs->dataOffset) return 3;
|
828
|
+
if(!fp->hdr->zoomHdrs->indexOffset) return 4;
|
829
|
+
if(!fp->hdr->zoomHdrs->idx) return 5;
|
830
|
+
|
831
|
+
//There's no point in having a zoom level larger than the largest chromosome
|
832
|
+
//This will none the less allow at least one zoom level, which is generally needed for IGV et al.
|
833
|
+
for(i=0; i<fp->cl->nKeys; i++) {
|
834
|
+
if(fp->cl->len[i] > maxZoom) maxZoom = fp->cl->len[i];
|
835
|
+
}
|
836
|
+
if(zoom > maxZoom) zoom = maxZoom;
|
837
|
+
|
838
|
+
for(i=0; i<fp->hdr->nLevels; i++) {
|
839
|
+
if(zoom > maxZoom) break; //prevent absurdly large zoom levels
|
840
|
+
fp->hdr->zoomHdrs->level[i] = zoom;
|
841
|
+
nLevels++;
|
842
|
+
if(((uint32_t)-1)/multiplier < zoom) break;
|
843
|
+
zoom *= multiplier;
|
844
|
+
}
|
845
|
+
fp->hdr->nLevels = nLevels;
|
846
|
+
|
847
|
+
fp->writeBuffer->firstZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
|
848
|
+
if(!fp->writeBuffer->firstZoomBuffer) goto error;
|
849
|
+
fp->writeBuffer->lastZoomBuffer = calloc(nLevels,sizeof(bwZoomBuffer_t*));
|
850
|
+
if(!fp->writeBuffer->lastZoomBuffer) goto error;
|
851
|
+
fp->writeBuffer->nNodes = calloc(nLevels, sizeof(uint64_t));
|
852
|
+
|
853
|
+
for(i=0; i<fp->hdr->nLevels; i++) {
|
854
|
+
fp->writeBuffer->firstZoomBuffer[i] = calloc(1, sizeof(bwZoomBuffer_t));
|
855
|
+
if(!fp->writeBuffer->firstZoomBuffer[i]) goto error;
|
856
|
+
fp->writeBuffer->firstZoomBuffer[i]->p = calloc(fp->hdr->bufSize/32, 32);
|
857
|
+
if(!fp->writeBuffer->firstZoomBuffer[i]->p) goto error;
|
858
|
+
fp->writeBuffer->firstZoomBuffer[i]->m = fp->hdr->bufSize;
|
859
|
+
((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[0] = 0;
|
860
|
+
((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[1] = 0;
|
861
|
+
((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->hdr->zoomHdrs->level[i];
|
862
|
+
if(fp->hdr->zoomHdrs->level[i] > fp->cl->len[0]) ((uint32_t*)fp->writeBuffer->firstZoomBuffer[i]->p)[2] = fp->cl->len[0];
|
863
|
+
fp->writeBuffer->lastZoomBuffer[i] = fp->writeBuffer->firstZoomBuffer[i];
|
864
|
+
}
|
865
|
+
|
866
|
+
return 0;
|
867
|
+
|
868
|
+
error:
|
869
|
+
if(fp->writeBuffer->firstZoomBuffer) {
|
870
|
+
for(i=0; i<fp->hdr->nLevels; i++) {
|
871
|
+
if(fp->writeBuffer->firstZoomBuffer[i]) {
|
872
|
+
if(fp->writeBuffer->firstZoomBuffer[i]->p) free(fp->writeBuffer->firstZoomBuffer[i]->p);
|
873
|
+
free(fp->writeBuffer->firstZoomBuffer[i]);
|
874
|
+
}
|
875
|
+
}
|
876
|
+
free(fp->writeBuffer->firstZoomBuffer);
|
877
|
+
}
|
878
|
+
if(fp->writeBuffer->lastZoomBuffer) free(fp->writeBuffer->lastZoomBuffer);
|
879
|
+
if(fp->writeBuffer->nNodes) free(fp->writeBuffer->lastZoomBuffer);
|
880
|
+
return 6;
|
881
|
+
}
|
882
|
+
|
883
|
+
//Given an interval start, calculate the next one at a zoom level
|
884
|
+
void nextPos(bigWigFile_t *fp, uint32_t size, uint32_t *pos, uint32_t desiredTid) {
|
885
|
+
uint32_t *tid = pos;
|
886
|
+
uint32_t *start = pos+1;
|
887
|
+
uint32_t *end = pos+2;
|
888
|
+
*start += size;
|
889
|
+
if(*start >= fp->cl->len[*tid]) {
|
890
|
+
(*start) = 0;
|
891
|
+
(*tid)++;
|
892
|
+
}
|
893
|
+
|
894
|
+
//prevent needless iteration when changing chromosomes
|
895
|
+
if(*tid < desiredTid) {
|
896
|
+
*tid = desiredTid;
|
897
|
+
*start = 0;
|
898
|
+
}
|
899
|
+
|
900
|
+
(*end) = *start+size;
|
901
|
+
if(*end > fp->cl->len[*tid]) (*end) = fp->cl->len[*tid];
|
902
|
+
}
|
903
|
+
|
904
|
+
//Return the number of bases two intervals overlap
|
905
|
+
uint32_t overlapsInterval(uint32_t tid0, uint32_t start0, uint32_t end0, uint32_t tid1, uint32_t start1, uint32_t end1) {
|
906
|
+
if(tid0 != tid1) return 0;
|
907
|
+
if(end0 <= start1) return 0;
|
908
|
+
if(end1 <= start0) return 0;
|
909
|
+
if(end0 <= end1) {
|
910
|
+
if(start1 > start0) return end0-start1;
|
911
|
+
return end0-start0;
|
912
|
+
} else {
|
913
|
+
if(start1 > start0) return end1-start1;
|
914
|
+
return end1-start0;
|
915
|
+
}
|
916
|
+
}
|
917
|
+
|
918
|
+
//Returns the number of bases of the interval written
|
919
|
+
uint32_t updateInterval(bigWigFile_t *fp, bwZoomBuffer_t *buffer, double *sum, double *sumsq, uint32_t size, uint32_t tid, uint32_t start, uint32_t end, float value) {
|
920
|
+
uint32_t *p2 = (uint32_t*) buffer->p;
|
921
|
+
float *fp2 = (float*) p2;
|
922
|
+
uint32_t rv = 0, offset = 0;
|
923
|
+
if(!buffer) return 0;
|
924
|
+
if(buffer->l+32 >= buffer->m) return 0;
|
925
|
+
|
926
|
+
//Make sure that we don't overflow a uint32_t by adding some huge value to start
|
927
|
+
if(start + size < start) size = ((uint32_t) -1) - start;
|
928
|
+
|
929
|
+
if(buffer->l) {
|
930
|
+
offset = buffer->l/32;
|
931
|
+
} else {
|
932
|
+
p2[0] = tid;
|
933
|
+
p2[1] = start;
|
934
|
+
if(start+size < end) p2[2] = start+size;
|
935
|
+
else p2[2] = end;
|
936
|
+
}
|
937
|
+
|
938
|
+
//Do we have any overlap with the previously added interval?
|
939
|
+
if(offset) {
|
940
|
+
rv = overlapsInterval(p2[8*(offset-1)], p2[8*(offset-1)+1], p2[8*(offset-1)+1] + size, tid, start, end);
|
941
|
+
if(rv) {
|
942
|
+
p2[8*(offset-1)+2] = start + rv;
|
943
|
+
p2[8*(offset-1)+3] += rv;
|
944
|
+
if(fp2[8*(offset-1)+4] > value) fp2[8*(offset-1)+4] = value;
|
945
|
+
if(fp2[8*(offset-1)+5] < value) fp2[8*(offset-1)+5] = value;
|
946
|
+
*sum += rv*value;
|
947
|
+
*sumsq += rv*pow(value, 2.0);
|
948
|
+
return rv;
|
949
|
+
} else {
|
950
|
+
fp2[8*(offset-1)+6] = *sum;
|
951
|
+
fp2[8*(offset-1)+7] = *sumsq;
|
952
|
+
*sum = 0.0;
|
953
|
+
*sumsq = 0.0;
|
954
|
+
}
|
955
|
+
}
|
956
|
+
|
957
|
+
//If we move to a new interval then skip iterating over a bunch of obviously non-overlapping intervals
|
958
|
+
if(offset && p2[8*offset+2] == 0) {
|
959
|
+
p2[8*offset] = tid;
|
960
|
+
p2[8*offset+1] = start;
|
961
|
+
if(start+size < end) p2[8*offset+2] = start+size;
|
962
|
+
else p2[8*offset+2] = end;
|
963
|
+
//nextPos(fp, size, p2+8*offset, tid); //We can actually skip uncovered intervals
|
964
|
+
}
|
965
|
+
|
966
|
+
//Add a new entry
|
967
|
+
while(!(rv = overlapsInterval(p2[8*offset], p2[8*offset+1], p2[8*offset+1] + size, tid, start, end))) {
|
968
|
+
p2[8*offset] = tid;
|
969
|
+
p2[8*offset+1] = start;
|
970
|
+
if(start+size < end) p2[8*offset+2] = start+size;
|
971
|
+
else p2[8*offset+2] = end;
|
972
|
+
//nextPos(fp, size, p2+8*offset, tid);
|
973
|
+
}
|
974
|
+
p2[8*offset+3] = rv;
|
975
|
+
fp2[8*offset+4] = value; //min
|
976
|
+
fp2[8*offset+5] = value; //max
|
977
|
+
*sum += rv * value;
|
978
|
+
*sumsq += rv * pow(value,2.0);
|
979
|
+
buffer->l += 32;
|
980
|
+
return rv;
|
981
|
+
}
|
982
|
+
|
983
|
+
//Returns 0 on success
|
984
|
+
int addIntervalValue(bigWigFile_t *fp, uint64_t *nEntries, double *sum, double *sumsq, bwZoomBuffer_t *buffer, uint32_t itemsPerSlot, uint32_t zoom, uint32_t tid, uint32_t start, uint32_t end, float value) {
|
985
|
+
bwZoomBuffer_t *newBuffer = NULL;
|
986
|
+
uint32_t rv;
|
987
|
+
|
988
|
+
while(start < end) {
|
989
|
+
rv = updateInterval(fp, buffer, sum, sumsq, zoom, tid, start, end, value);
|
990
|
+
if(!rv) {
|
991
|
+
//Allocate a new buffer
|
992
|
+
newBuffer = calloc(1, sizeof(bwZoomBuffer_t));
|
993
|
+
if(!newBuffer) return 1;
|
994
|
+
newBuffer->p = calloc(itemsPerSlot, 32);
|
995
|
+
if(!newBuffer->p) goto error;
|
996
|
+
newBuffer->m = itemsPerSlot*32;
|
997
|
+
memcpy(newBuffer->p, buffer->p+buffer->l-32, 4);
|
998
|
+
memcpy(newBuffer->p+4, buffer->p+buffer->l-28, 4);
|
999
|
+
((uint32_t*) newBuffer->p)[2] = ((uint32_t*) newBuffer->p)[1] + zoom;
|
1000
|
+
*sum = *sumsq = 0.0;
|
1001
|
+
rv = updateInterval(fp, newBuffer, sum, sumsq, zoom, tid, start, end, value);
|
1002
|
+
if(!rv) goto error;
|
1003
|
+
buffer->next = newBuffer;
|
1004
|
+
buffer = buffer->next;
|
1005
|
+
*nEntries += 1;
|
1006
|
+
}
|
1007
|
+
start += rv;
|
1008
|
+
}
|
1009
|
+
|
1010
|
+
return 0;
|
1011
|
+
|
1012
|
+
error:
|
1013
|
+
if(newBuffer) {
|
1014
|
+
if(newBuffer->m) free(newBuffer->p);
|
1015
|
+
free(newBuffer);
|
1016
|
+
}
|
1017
|
+
return 2;
|
1018
|
+
}
|
1019
|
+
|
1020
|
+
//Get all of the intervals and add them to the appropriate zoomBuffer
|
1021
|
+
int constructZoomLevels(bigWigFile_t *fp) {
|
1022
|
+
bwOverlapIterator_t *it = NULL;
|
1023
|
+
double *sum = NULL, *sumsq = NULL;
|
1024
|
+
uint32_t i, j, k;
|
1025
|
+
|
1026
|
+
sum = calloc(fp->hdr->nLevels, sizeof(double));
|
1027
|
+
sumsq = calloc(fp->hdr->nLevels, sizeof(double));
|
1028
|
+
if(!sum || !sumsq) goto error;
|
1029
|
+
|
1030
|
+
for(i=0; i<fp->cl->nKeys; i++) {
|
1031
|
+
it = bwOverlappingIntervalsIterator(fp, fp->cl->chrom[i], 0, fp->cl->len[i], 100000);
|
1032
|
+
if(!it) goto error;
|
1033
|
+
while(it->data != NULL){
|
1034
|
+
for(j=0;j<it->intervals->l;j++){
|
1035
|
+
for(k=0;k<fp->hdr->nLevels;k++){
|
1036
|
+
if(addIntervalValue(fp, &(fp->writeBuffer->nNodes[k]), sum+k, sumsq+k, fp->writeBuffer->lastZoomBuffer[k], fp->hdr->bufSize/32, fp->hdr->zoomHdrs->level[k], i, it->intervals->start[j], it->intervals->end[j], it->intervals->value[j])) goto error;
|
1037
|
+
while(fp->writeBuffer->lastZoomBuffer[k]->next) fp->writeBuffer->lastZoomBuffer[k] = fp->writeBuffer->lastZoomBuffer[k]->next;
|
1038
|
+
}
|
1039
|
+
}
|
1040
|
+
it = bwIteratorNext(it);
|
1041
|
+
}
|
1042
|
+
bwIteratorDestroy(it);
|
1043
|
+
|
1044
|
+
}
|
1045
|
+
|
1046
|
+
//Make an index for each zoom level
|
1047
|
+
for(i=0; i<fp->hdr->nLevels; i++) {
|
1048
|
+
fp->hdr->zoomHdrs->idx[i] = calloc(1, sizeof(bwRTree_t));
|
1049
|
+
if(!fp->hdr->zoomHdrs->idx[i]) return 1;
|
1050
|
+
fp->hdr->zoomHdrs->idx[i]->blockSize = fp->writeBuffer->blockSize;
|
1051
|
+
}
|
1052
|
+
|
1053
|
+
|
1054
|
+
free(sum);
|
1055
|
+
free(sumsq);
|
1056
|
+
|
1057
|
+
return 0;
|
1058
|
+
|
1059
|
+
error:
|
1060
|
+
if(it) bwIteratorDestroy(it);
|
1061
|
+
if(sum) free(sum);
|
1062
|
+
if(sumsq) free(sumsq);
|
1063
|
+
return 1;
|
1064
|
+
}
|
1065
|
+
|
1066
|
+
int writeZoomLevels(bigWigFile_t *fp) {
|
1067
|
+
uint64_t offset1, offset2, idxSize = 0;
|
1068
|
+
uint32_t i, j, four = 0, last, vector[6] = {0, 0, 0, 0, 0, 0}; //The last 8 bytes are left as 0;
|
1069
|
+
uint8_t wrote, one = 0;
|
1070
|
+
uint16_t actualNLevels = 0;
|
1071
|
+
int rv;
|
1072
|
+
bwLL *ll, *p;
|
1073
|
+
bwRTreeNode_t *root;
|
1074
|
+
bwZoomBuffer_t *zb, *zb2;
|
1075
|
+
bwWriteBuffer_t *wb = fp->writeBuffer;
|
1076
|
+
uLongf sz;
|
1077
|
+
|
1078
|
+
for(i=0; i<fp->hdr->nLevels; i++) {
|
1079
|
+
if(i) {
|
1080
|
+
//Is this a duplicate level?
|
1081
|
+
if(fp->writeBuffer->nNodes[i] == fp->writeBuffer->nNodes[i-1]) break;
|
1082
|
+
}
|
1083
|
+
actualNLevels++;
|
1084
|
+
|
1085
|
+
//reserve a uint32_t for the number of blocks
|
1086
|
+
fp->hdr->zoomHdrs->dataOffset[i] = bwTell(fp);
|
1087
|
+
fp->writeBuffer->nBlocks = 0;
|
1088
|
+
fp->writeBuffer->l = 24;
|
1089
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
|
1090
|
+
zb = fp->writeBuffer->firstZoomBuffer[i];
|
1091
|
+
fp->writeBuffer->firstIndexNode = NULL;
|
1092
|
+
fp->writeBuffer->currentIndexNode = NULL;
|
1093
|
+
while(zb) {
|
1094
|
+
sz = fp->hdr->bufSize;
|
1095
|
+
if(compress(wb->compressP, &sz, zb->p, zb->l) != Z_OK) return 2;
|
1096
|
+
|
1097
|
+
//write the data to disk
|
1098
|
+
if(fwrite(wb->compressP, sizeof(uint8_t), sz, fp->URL->x.fp) != sz) return 3;
|
1099
|
+
|
1100
|
+
//Add an entry into the index
|
1101
|
+
last = (zb->l - 32)>>2;
|
1102
|
+
if(addIndexEntry(fp, ((uint32_t*)zb->p)[0], ((uint32_t*)zb->p)[last], ((uint32_t*)zb->p)[1], ((uint32_t*)zb->p)[last+2], bwTell(fp)-sz, sz)) return 4;
|
1103
|
+
|
1104
|
+
wb->nBlocks++;
|
1105
|
+
wb->l = 24;
|
1106
|
+
zb = zb->next;
|
1107
|
+
}
|
1108
|
+
if(writeAtPos(&(wb->nBlocks), sizeof(uint32_t), 1, fp->hdr->zoomHdrs->dataOffset[i], fp->URL->x.fp)) return 5;
|
1109
|
+
|
1110
|
+
//Make the tree
|
1111
|
+
ll = fp->writeBuffer->firstIndexNode;
|
1112
|
+
if(ll == fp->writeBuffer->currentIndexNode) {
|
1113
|
+
root = ll->node;
|
1114
|
+
idxSize = 4 + 24*root->nChildren;
|
1115
|
+
} else {
|
1116
|
+
root = addLeaves(&ll, &idxSize, ceil(((double)fp->writeBuffer->nBlocks)/fp->writeBuffer->blockSize), fp->writeBuffer->blockSize);
|
1117
|
+
}
|
1118
|
+
if(!root) return 4;
|
1119
|
+
fp->hdr->zoomHdrs->idx[i]->root = root;
|
1120
|
+
|
1121
|
+
ll = fp->writeBuffer->firstIndexNode;
|
1122
|
+
while(ll) {
|
1123
|
+
p = ll->next;
|
1124
|
+
free(ll);
|
1125
|
+
ll=p;
|
1126
|
+
}
|
1127
|
+
|
1128
|
+
|
1129
|
+
//write the index
|
1130
|
+
wrote = 0;
|
1131
|
+
fp->hdr->zoomHdrs->indexOffset[i] = bwTell(fp);
|
1132
|
+
four = IDX_MAGIC;
|
1133
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 1;
|
1134
|
+
root = fp->hdr->zoomHdrs->idx[i]->root;
|
1135
|
+
if(fwrite(&(fp->writeBuffer->blockSize), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 6;
|
1136
|
+
if(fwrite(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 7;
|
1137
|
+
if(fwrite(&(root->chrIdxStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
|
1138
|
+
if(fwrite(&(root->baseStart[0]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
|
1139
|
+
if(fwrite(&(root->chrIdxEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 10;
|
1140
|
+
if(fwrite(&(root->baseEnd[root->nChildren-1]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 11;
|
1141
|
+
if(fwrite(&idxSize, sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 12;
|
1142
|
+
four = fp->hdr->bufSize/32;
|
1143
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 13;
|
1144
|
+
four = 0;
|
1145
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 14; //padding
|
1146
|
+
fp->hdr->zoomHdrs->idx[i]->rootOffset = bwTell(fp);
|
1147
|
+
|
1148
|
+
//Write the root node, since writeIndexTree writes the children and fills in the offset
|
1149
|
+
offset1 = bwTell(fp);
|
1150
|
+
if(fwrite(&(root->isLeaf), sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 16;
|
1151
|
+
if(fwrite(&one, sizeof(uint8_t), 1, fp->URL->x.fp) != 1) return 17; //one byte of padding
|
1152
|
+
if(fwrite(&(root->nChildren), sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 18;
|
1153
|
+
for(j=0; j<root->nChildren; j++) {
|
1154
|
+
vector[0] = root->chrIdxStart[j];
|
1155
|
+
vector[1] = root->baseStart[j];
|
1156
|
+
vector[2] = root->chrIdxEnd[j];
|
1157
|
+
vector[3] = root->baseEnd[j];
|
1158
|
+
if(root->isLeaf) {
|
1159
|
+
//Include the offset and size
|
1160
|
+
if(fwrite(vector, sizeof(uint32_t), 4, fp->URL->x.fp) != 4) return 19;
|
1161
|
+
if(fwrite(&(root->dataOffset[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 20;
|
1162
|
+
if(fwrite(&(root->x.size[j]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 21;
|
1163
|
+
} else {
|
1164
|
+
if(fwrite(vector, sizeof(uint32_t), 6, fp->URL->x.fp) != 6) return 22;
|
1165
|
+
}
|
1166
|
+
}
|
1167
|
+
|
1168
|
+
while((rv = writeIndexTreeNode(fp->URL->x.fp, fp->hdr->zoomHdrs->idx[i]->root, &wrote, 0)) == 0) {
|
1169
|
+
if(!wrote) break;
|
1170
|
+
wrote = 0;
|
1171
|
+
}
|
1172
|
+
|
1173
|
+
if(rv || wrote) return 6;
|
1174
|
+
|
1175
|
+
//Save the file position
|
1176
|
+
offset2 = bwTell(fp);
|
1177
|
+
|
1178
|
+
//Write the offsets
|
1179
|
+
if(writeIndexOffsets(fp->URL->x.fp, root, offset1)) return 2;
|
1180
|
+
|
1181
|
+
//Move the file pointer back to the end
|
1182
|
+
bwSetPos(fp, offset2);
|
1183
|
+
|
1184
|
+
|
1185
|
+
//Free the linked list
|
1186
|
+
zb = fp->writeBuffer->firstZoomBuffer[i];
|
1187
|
+
while(zb) {
|
1188
|
+
if(zb->p) free(zb->p);
|
1189
|
+
zb2 = zb->next;
|
1190
|
+
free(zb);
|
1191
|
+
zb = zb2;
|
1192
|
+
}
|
1193
|
+
fp->writeBuffer->firstZoomBuffer[i] = NULL;
|
1194
|
+
}
|
1195
|
+
|
1196
|
+
//Free unused zoom levels
|
1197
|
+
for(i=actualNLevels; i<fp->hdr->nLevels; i++) {
|
1198
|
+
zb = fp->writeBuffer->firstZoomBuffer[i];
|
1199
|
+
while(zb) {
|
1200
|
+
if(zb->p) free(zb->p);
|
1201
|
+
zb2 = zb->next;
|
1202
|
+
free(zb);
|
1203
|
+
zb = zb2;
|
1204
|
+
}
|
1205
|
+
fp->writeBuffer->firstZoomBuffer[i] = NULL;
|
1206
|
+
}
|
1207
|
+
|
1208
|
+
//Write the zoom headers to disk
|
1209
|
+
offset1 = bwTell(fp);
|
1210
|
+
if(bwSetPos(fp, 0x40)) return 7;
|
1211
|
+
four = 0;
|
1212
|
+
for(i=0; i<actualNLevels; i++) {
|
1213
|
+
if(fwrite(&(fp->hdr->zoomHdrs->level[i]), sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 8;
|
1214
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
|
1215
|
+
if(fwrite(&(fp->hdr->zoomHdrs->dataOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 10;
|
1216
|
+
if(fwrite(&(fp->hdr->zoomHdrs->indexOffset[i]), sizeof(uint64_t), 1, fp->URL->x.fp) != 1) return 11;
|
1217
|
+
}
|
1218
|
+
|
1219
|
+
//Write the number of levels if needed
|
1220
|
+
if(bwSetPos(fp, 0x6)) return 12;
|
1221
|
+
if(fwrite(&actualNLevels, sizeof(uint16_t), 1, fp->URL->x.fp) != 1) return 13;
|
1222
|
+
|
1223
|
+
if(bwSetPos(fp, offset1)) return 14;
|
1224
|
+
|
1225
|
+
return 0;
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
//0 on success
|
1229
|
+
int bwFinalize(bigWigFile_t *fp) {
|
1230
|
+
uint32_t four;
|
1231
|
+
uint64_t offset;
|
1232
|
+
if(!fp->isWrite) return 0;
|
1233
|
+
|
1234
|
+
//Flush the buffer
|
1235
|
+
if(flushBuffer(fp)) return 1; //Valgrind reports a problem here!
|
1236
|
+
|
1237
|
+
//Update the data section with the number of blocks written
|
1238
|
+
if(fp->hdr) {
|
1239
|
+
if(writeAtPos(&(fp->writeBuffer->nBlocks), sizeof(uint64_t), 1, fp->hdr->dataOffset, fp->URL->x.fp)) return 2;
|
1240
|
+
} else {
|
1241
|
+
//The header wasn't written!
|
1242
|
+
return 1;
|
1243
|
+
}
|
1244
|
+
|
1245
|
+
//write the bufferSize
|
1246
|
+
if(fp->hdr->bufSize) {
|
1247
|
+
if(writeAtPos(&(fp->hdr->bufSize), sizeof(uint32_t), 1, 0x34, fp->URL->x.fp)) return 2;
|
1248
|
+
}
|
1249
|
+
|
1250
|
+
//write the summary information
|
1251
|
+
if(writeSummary(fp)) return 3;
|
1252
|
+
|
1253
|
+
//Convert the linked-list to a tree and write to disk
|
1254
|
+
if(writeIndex(fp)) return 4;
|
1255
|
+
|
1256
|
+
//Zoom level stuff here?
|
1257
|
+
if(fp->hdr->nLevels && fp->writeBuffer->nBlocks) {
|
1258
|
+
offset = bwTell(fp);
|
1259
|
+
if(makeZoomLevels(fp)) return 5;
|
1260
|
+
if(constructZoomLevels(fp)) return 6;
|
1261
|
+
bwSetPos(fp, offset);
|
1262
|
+
if(writeZoomLevels(fp)) return 7; //This write nLevels as well
|
1263
|
+
}
|
1264
|
+
|
1265
|
+
//write magic at the end of the file
|
1266
|
+
four = BIGWIG_MAGIC;
|
1267
|
+
if(fwrite(&four, sizeof(uint32_t), 1, fp->URL->x.fp) != 1) return 9;
|
1268
|
+
|
1269
|
+
return 0;
|
1270
|
+
}
|
1271
|
+
|
1272
|
+
/*
|
1273
|
+
data chunk:
|
1274
|
+
uint64_t number of blocks (2 / 110851)
|
1275
|
+
some blocks
|
1276
|
+
|
1277
|
+
an uncompressed data block (24 byte header)
|
1278
|
+
uint32_t Tid 0-4
|
1279
|
+
uint32_t start 4-8
|
1280
|
+
uint32_t end 8-12
|
1281
|
+
uint32_t step 12-16
|
1282
|
+
uint32_t span 16-20
|
1283
|
+
uint8_t type 20
|
1284
|
+
uint8_t padding
|
1285
|
+
uint16_t nItems 22
|
1286
|
+
nItems of:
|
1287
|
+
type 1: //12 bytes
|
1288
|
+
uint32_t start
|
1289
|
+
uint32_t end
|
1290
|
+
float value
|
1291
|
+
type 2: //8 bytes
|
1292
|
+
uint32_t start
|
1293
|
+
float value
|
1294
|
+
type 3: //4 bytes
|
1295
|
+
float value
|
1296
|
+
|
1297
|
+
data block index header
|
1298
|
+
uint32_t magic
|
1299
|
+
uint32_t blockSize (256 in the example) maximum number of children
|
1300
|
+
uint64_t number of blocks (2 / 110851)
|
1301
|
+
uint32_t startTid
|
1302
|
+
uint32_t startPos
|
1303
|
+
uint32_t endTid
|
1304
|
+
uint32_t endPos
|
1305
|
+
uint64_t index size? (0x1E7 / 0x1AF0401F) index address?
|
1306
|
+
uint32_t itemsPerBlock (1 / 1) 1024 for zoom headers 1024 for zoom headers
|
1307
|
+
uint32_t padding
|
1308
|
+
|
1309
|
+
data block index node non-leaf (4 bytes + 24*nChildren)
|
1310
|
+
uint8_t isLeaf
|
1311
|
+
uint8_t padding
|
1312
|
+
uint16_t nChildren (2, 256)
|
1313
|
+
uint32_t startTid
|
1314
|
+
uint32_t startPos
|
1315
|
+
uint32_t endTid
|
1316
|
+
uint32_t endPos
|
1317
|
+
uint64_t dataOffset (0x1AF05853, 0x1AF07057)
|
1318
|
+
|
1319
|
+
data block index node leaf (4 bytes + 32*nChildren)
|
1320
|
+
uint8_t isLeaf
|
1321
|
+
uint8_t padding
|
1322
|
+
uint16_t nChildren (2)
|
1323
|
+
uint32_t startTid
|
1324
|
+
uint32_t startPos
|
1325
|
+
uint32_t endTid
|
1326
|
+
uint32_t endPos
|
1327
|
+
uint64_t dataOffset (0x198, 0x1CF)
|
1328
|
+
uint64_t dataSize (55, 24)
|
1329
|
+
|
1330
|
+
zoom data block
|
1331
|
+
uint32_t number of blocks (10519766)
|
1332
|
+
some data blocks
|
1333
|
+
*/
|