bio-bigwig 0.0.2 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/bio/bigwig/bigwigext.c +24 -20
- data/ext/bio/bigwig/libBigWig/LICENSE +22 -0
- data/ext/bio/bigwig/libBigWig/bigWig.h +606 -0
- data/ext/bio/bigwig/libBigWig/bigWigIO.h +110 -0
- data/ext/bio/bigwig/libBigWig/bwCommon.h +74 -0
- data/ext/bio/bigwig/libBigWig/bwRead.c +438 -0
- data/ext/bio/bigwig/libBigWig/bwStats.c +537 -0
- data/ext/bio/bigwig/libBigWig/bwValues.c +803 -0
- data/ext/bio/bigwig/libBigWig/bwValues.h +77 -0
- data/ext/bio/bigwig/libBigWig/bwWrite.c +1333 -0
- data/ext/bio/bigwig/libBigWig/io.c +296 -0
- data/lib/bio/bigwig/version.rb +1 -1
- metadata +11 -1
@@ -0,0 +1,110 @@
|
|
1
|
+
#ifndef LIBBIGWIG_IO_H
|
2
|
+
#define LIBBIGWIG_IO_H
|
3
|
+
|
4
|
+
#ifndef NOCURL
|
5
|
+
#include <curl/curl.h>
|
6
|
+
#else
|
7
|
+
#include <stdio.h>
|
8
|
+
#ifndef CURLTYPE_DEFINED
|
9
|
+
#define CURLTYPE_DEFINED
|
10
|
+
typedef int CURLcode;
|
11
|
+
typedef void CURL;
|
12
|
+
#endif
|
13
|
+
#define CURLE_OK 0
|
14
|
+
#define CURLE_FAILED_INIT 1
|
15
|
+
#endif
|
16
|
+
/*! \file bigWigIO.h
|
17
|
+
* These are (typically internal) IO functions, so there's generally no need for you to directly use them!
|
18
|
+
*/
|
19
|
+
|
20
|
+
/*!
|
21
|
+
* The size of the buffer used for remote files.
|
22
|
+
*/
|
23
|
+
extern size_t GLOBAL_DEFAULTBUFFERSIZE;
|
24
|
+
|
25
|
+
/*!
|
26
|
+
* The enumerated values that indicate the connection type used to access a file.
|
27
|
+
*/
|
28
|
+
enum bigWigFile_type_enum {
|
29
|
+
BWG_FILE = 0,
|
30
|
+
BWG_HTTP = 1,
|
31
|
+
BWG_HTTPS = 2,
|
32
|
+
BWG_FTP = 3
|
33
|
+
};
|
34
|
+
|
35
|
+
/*!
|
36
|
+
* @brief This structure holds the file pointers and buffers needed for raw access to local and remote files.
|
37
|
+
*/
|
38
|
+
typedef struct {
|
39
|
+
union {
|
40
|
+
#ifndef NOCURL
|
41
|
+
CURL *curl; /**<The CURL * file pointer for remote files.*/
|
42
|
+
#endif
|
43
|
+
FILE *fp; /**<The FILE * file pointer for local files.**/
|
44
|
+
} x; /**<A union holding curl and fp.*/
|
45
|
+
void *memBuf; /**<A void * pointing to memory of size bufSize.*/
|
46
|
+
size_t filePos; /**<Current position inside the file.*/
|
47
|
+
size_t bufPos; /**<Curent position inside the buffer.*/
|
48
|
+
size_t bufSize; /**<The size of the buffer.*/
|
49
|
+
size_t bufLen; /**<The actual size of the buffer used.*/
|
50
|
+
enum bigWigFile_type_enum type; /**<The connection type*/
|
51
|
+
int isCompressed; /**<1 if the file is compressed, otherwise 0*/
|
52
|
+
const char *fname; /**<Only needed for remote connections. The original URL/filename requested, since we need to make multiple connections.*/
|
53
|
+
} URL_t;
|
54
|
+
|
55
|
+
/*!
|
56
|
+
* @brief Reads data into the given buffer.
|
57
|
+
*
|
58
|
+
* This function will store bufSize data into buf for both local and remote files. For remote files an internal buffer is used to store a (typically larger) segment of the remote file.
|
59
|
+
*
|
60
|
+
* @param URL A URL_t * pointing to a valid opened file or remote URL.
|
61
|
+
* @param buf The buffer in memory that you would like filled. It must be able to hold bufSize bytes!
|
62
|
+
* @param bufSize The number of bytes to transfer to buf.
|
63
|
+
*
|
64
|
+
* @return Returns the number of bytes stored in buf, which should be bufSize on success and something else on error.
|
65
|
+
*
|
66
|
+
* @warning Note that on error, URL for remote files is left in an unusable state. You can get around this by running urlSeek() to a position outside of the range held by the internal buffer.
|
67
|
+
*/
|
68
|
+
size_t urlRead(URL_t *URL, void *buf, size_t bufSize);
|
69
|
+
|
70
|
+
/*!
|
71
|
+
* @brief Seeks to a given position in a local or remote file.
|
72
|
+
*
|
73
|
+
* For local files, this will set the file position indicator for the file pointer to the desired position. For remote files, it sets the position to start downloading data for the next urlRead(). Note that for remote files that running urlSeek() with a pos within the current buffer will simply modify the internal offset.
|
74
|
+
*
|
75
|
+
* @param URL A URL_t * pointing to a valid opened file or remote URL.
|
76
|
+
* @param pos The position to seek to.
|
77
|
+
*
|
78
|
+
* @return CURLE_OK on success and a different CURLE_XXX on error. For local files, the error return value is always CURLE_FAILED_INIT
|
79
|
+
*/
|
80
|
+
CURLcode urlSeek(URL_t *URL, size_t pos);
|
81
|
+
|
82
|
+
/*!
|
83
|
+
* @brief Open a local or remote file
|
84
|
+
*
|
85
|
+
* Opens a local or remote file. Currently, http, https, and ftp are the only supported protocols and the URL must then begin with "http://", "https://", or "ftp://" as appropriate.
|
86
|
+
*
|
87
|
+
* For remote files, an internal buffer is used to hold file contents, to avoid downloading entire files before starting. The size of this buffer and various variable related to connection timeout are set with bwInit().
|
88
|
+
*
|
89
|
+
* Note that you **must** run urlClose() on this when finished. However, you would typically just use bwOpen() rather than directly calling this function.
|
90
|
+
*
|
91
|
+
* @param fname The file name or URL to open.
|
92
|
+
* @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information.
|
93
|
+
* @param mode "r", "w" or NULL. If and only if the mode contains the character "w" will the file be opened for writing.
|
94
|
+
*
|
95
|
+
* @return A URL_t * or NULL on error.
|
96
|
+
*/
|
97
|
+
URL_t *urlOpen(const char *fname, CURLcode (*callBack)(CURL*), const char* mode);
|
98
|
+
|
99
|
+
/*!
|
100
|
+
* @brief Close a local/remote file
|
101
|
+
*
|
102
|
+
* This will perform the cleanup required on a URL_t*, releasing memory as needed.
|
103
|
+
*
|
104
|
+
* @param URL A URL_t * pointing to a valid opened file or remote URL.
|
105
|
+
*
|
106
|
+
* @warning URL will no longer point to a valid location in memory!
|
107
|
+
*/
|
108
|
+
void urlClose(URL_t *URL);
|
109
|
+
|
110
|
+
#endif // LIBBIGWIG_IO_H
|
@@ -0,0 +1,74 @@
|
|
1
|
+
/*! \file bwCommon.h
|
2
|
+
*
|
3
|
+
* You have no reason to use these functions. They may change without warning because there's no reason for them to be used outside of libBigWig's internals.
|
4
|
+
*
|
5
|
+
* These are structures and functions from a variety of files that are used across files internally but don't need to be see by libBigWig users.
|
6
|
+
*/
|
7
|
+
|
8
|
+
/*!
|
9
|
+
* @brief Like fsetpos, but for local or remote bigWig files.
|
10
|
+
* This will set the file position indicator to the specified point. For local files this literally is `fsetpos`, while for remote files it fills a memory buffer with data starting at the desired position.
|
11
|
+
* @param fp A valid opened bigWigFile_t.
|
12
|
+
* @param pos The position within the file to seek to.
|
13
|
+
* @return 0 on success and -1 on error.
|
14
|
+
*/
|
15
|
+
int bwSetPos(bigWigFile_t *fp, size_t pos);
|
16
|
+
|
17
|
+
/*!
|
18
|
+
* @brief A local/remote version of `fread`.
|
19
|
+
* Reads data from either local or remote bigWig files.
|
20
|
+
* @param data An allocated memory block big enough to hold the data.
|
21
|
+
* @param sz The size of each member that should be copied.
|
22
|
+
* @param nmemb The number of members to copy.
|
23
|
+
* @param fp The bigWigFile_t * from which to copy the data.
|
24
|
+
* @see bwSetPos
|
25
|
+
* @return For nmemb==1, the size of the copied data. For nmemb>1, the number of members fully copied (this is equivalent to `fread`).
|
26
|
+
*/
|
27
|
+
size_t bwRead(void *data, size_t sz, size_t nmemb, bigWigFile_t *fp);
|
28
|
+
|
29
|
+
/*!
|
30
|
+
* @brief Determine what the file position indicator say.
|
31
|
+
* This is equivalent to `ftell` for local or remote files.
|
32
|
+
* @param fp The file.
|
33
|
+
* @return The position in the file.
|
34
|
+
*/
|
35
|
+
long bwTell(bigWigFile_t *fp);
|
36
|
+
|
37
|
+
/*!
|
38
|
+
* @brief Reads a data index (either full data or a zoom level) from a bigWig file.
|
39
|
+
* There is little reason for end users to use this function. This must be freed with `bwDestroyIndex`
|
40
|
+
* @param fp A valid bigWigFile_t pointer
|
41
|
+
* @param offset The file offset where the index begins
|
42
|
+
* @return A bwRTree_t pointer or NULL on error.
|
43
|
+
*/
|
44
|
+
bwRTree_t *bwReadIndex(bigWigFile_t *fp, uint64_t offset);
|
45
|
+
|
46
|
+
/*!
|
47
|
+
* @brief Destroy an bwRTreeNode_t and all of its children.
|
48
|
+
* @param node The node to destroy.
|
49
|
+
*/
|
50
|
+
void bwDestroyIndexNode(bwRTreeNode_t *node);
|
51
|
+
|
52
|
+
/*!
|
53
|
+
* @brief Frees space allocated by `bwReadIndex`
|
54
|
+
* There is generally little reason to use this, since end users should typically not need to run `bwReadIndex` themselves.
|
55
|
+
* @param idx A bwRTree_t pointer allocated by `bwReadIndex`.
|
56
|
+
*/
|
57
|
+
void bwDestroyIndex(bwRTree_t *idx);
|
58
|
+
|
59
|
+
/// @cond SKIP
|
60
|
+
bwOverlapBlock_t *walkRTreeNodes(bigWigFile_t *bw, bwRTreeNode_t *root, uint32_t tid, uint32_t start, uint32_t end);
|
61
|
+
void destroyBWOverlapBlock(bwOverlapBlock_t *b);
|
62
|
+
/// @endcond
|
63
|
+
|
64
|
+
/*!
|
65
|
+
* @brief Finishes what's needed to write a bigWigFile
|
66
|
+
* Flushes the buffer, converts the index linked list to a tree, writes that to disk, handles zoom level stuff, writes magic at the end
|
67
|
+
* @param fp A valid bigWigFile_t pointer
|
68
|
+
* @return 0 on success
|
69
|
+
*/
|
70
|
+
int bwFinalize(bigWigFile_t *fp);
|
71
|
+
|
72
|
+
/// @cond SKIP
|
73
|
+
char *bwStrdup(const char *s);
|
74
|
+
/// @endcond
|
@@ -0,0 +1,438 @@
|
|
1
|
+
#include "bigWig.h"
|
2
|
+
#include "bwCommon.h"
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <math.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <stdio.h>
|
7
|
+
|
8
|
+
static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize);
|
9
|
+
|
10
|
+
//Return the position in the file
|
11
|
+
long bwTell(bigWigFile_t *fp) {
|
12
|
+
if(fp->URL->type == BWG_FILE) return ftell(fp->URL->x.fp);
|
13
|
+
return (long) (fp->URL->filePos + fp->URL->bufPos);
|
14
|
+
}
|
15
|
+
|
16
|
+
//Seek to a given position, always from the beginning of the file
|
17
|
+
//Return 0 on success and -1 on error
|
18
|
+
//To do, use the return code of urlSeek() in a more useful way.
|
19
|
+
int bwSetPos(bigWigFile_t *fp, size_t pos) {
|
20
|
+
CURLcode rv = urlSeek(fp->URL, pos);
|
21
|
+
if(rv == CURLE_OK) return 0;
|
22
|
+
return -1;
|
23
|
+
}
|
24
|
+
|
25
|
+
//returns the number of full members read (nmemb on success, something less on error)
|
26
|
+
size_t bwRead(void *data, size_t sz, size_t nmemb, bigWigFile_t *fp) {
|
27
|
+
size_t i, rv;
|
28
|
+
for(i=0; i<nmemb; i++) {
|
29
|
+
rv = urlRead(fp->URL, data+i*sz, sz);
|
30
|
+
if(rv != sz) return i;
|
31
|
+
}
|
32
|
+
return nmemb;
|
33
|
+
}
|
34
|
+
|
35
|
+
//Initializes curl and sets global variables
|
36
|
+
//Returns 0 on success and 1 on error
|
37
|
+
//This should be called only once and bwCleanup() must be called when finished.
|
38
|
+
int bwInit(size_t defaultBufSize) {
|
39
|
+
//set the buffer size, number of iterations, sleep time between iterations, etc.
|
40
|
+
GLOBAL_DEFAULTBUFFERSIZE = defaultBufSize;
|
41
|
+
|
42
|
+
//call curl_global_init()
|
43
|
+
#ifndef NOCURL
|
44
|
+
CURLcode rv;
|
45
|
+
rv = curl_global_init(CURL_GLOBAL_ALL);
|
46
|
+
if(rv != CURLE_OK) return 1;
|
47
|
+
#endif
|
48
|
+
return 0;
|
49
|
+
}
|
50
|
+
|
51
|
+
//This should be called before quiting, to release memory acquired by curl
|
52
|
+
void bwCleanup() {
|
53
|
+
#ifndef NOCURL
|
54
|
+
curl_global_cleanup();
|
55
|
+
#endif
|
56
|
+
}
|
57
|
+
|
58
|
+
static bwZoomHdr_t *bwReadZoomHdrs(bigWigFile_t *bw) {
|
59
|
+
if(bw->isWrite) return NULL;
|
60
|
+
uint16_t i;
|
61
|
+
bwZoomHdr_t *zhdr = malloc(sizeof(bwZoomHdr_t));
|
62
|
+
if(!zhdr) return NULL;
|
63
|
+
uint32_t *level = malloc(bw->hdr->nLevels * sizeof(uint64_t));
|
64
|
+
if(!level) {
|
65
|
+
free(zhdr);
|
66
|
+
return NULL;
|
67
|
+
}
|
68
|
+
uint32_t padding = 0;
|
69
|
+
uint64_t *dataOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
|
70
|
+
if(!dataOffset) {
|
71
|
+
free(zhdr);
|
72
|
+
free(level);
|
73
|
+
return NULL;
|
74
|
+
}
|
75
|
+
uint64_t *indexOffset = malloc(sizeof(uint64_t) * bw->hdr->nLevels);
|
76
|
+
if(!indexOffset) {
|
77
|
+
free(zhdr);
|
78
|
+
free(level);
|
79
|
+
free(dataOffset);
|
80
|
+
return NULL;
|
81
|
+
}
|
82
|
+
|
83
|
+
for(i=0; i<bw->hdr->nLevels; i++) {
|
84
|
+
if(bwRead((void*) &(level[i]), sizeof(uint32_t), 1, bw) != 1) goto error;
|
85
|
+
if(bwRead((void*) &padding, sizeof(uint32_t), 1, bw) != 1) goto error;
|
86
|
+
if(bwRead((void*) &(dataOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
|
87
|
+
if(bwRead((void*) &(indexOffset[i]), sizeof(uint64_t), 1, bw) != 1) goto error;
|
88
|
+
}
|
89
|
+
|
90
|
+
zhdr->level = level;
|
91
|
+
zhdr->dataOffset = dataOffset;
|
92
|
+
zhdr->indexOffset = indexOffset;
|
93
|
+
zhdr->idx = calloc(bw->hdr->nLevels, sizeof(bwRTree_t*));
|
94
|
+
if(!zhdr->idx) goto error;
|
95
|
+
|
96
|
+
return zhdr;
|
97
|
+
|
98
|
+
error:
|
99
|
+
for(i=0; i<bw->hdr->nLevels; i++) {
|
100
|
+
if(zhdr->idx[i]) bwDestroyIndex(zhdr->idx[i]);
|
101
|
+
}
|
102
|
+
free(zhdr);
|
103
|
+
free(level);
|
104
|
+
free(dataOffset);
|
105
|
+
free(indexOffset);
|
106
|
+
return NULL;
|
107
|
+
}
|
108
|
+
|
109
|
+
static void bwHdrDestroy(bigWigHdr_t *hdr) {
|
110
|
+
int i;
|
111
|
+
if(hdr->zoomHdrs) {
|
112
|
+
free(hdr->zoomHdrs->level);
|
113
|
+
free(hdr->zoomHdrs->dataOffset);
|
114
|
+
free(hdr->zoomHdrs->indexOffset);
|
115
|
+
for(i=0; i<hdr->nLevels; i++) {
|
116
|
+
if(hdr->zoomHdrs->idx[i]) bwDestroyIndex(hdr->zoomHdrs->idx[i]);
|
117
|
+
}
|
118
|
+
free(hdr->zoomHdrs->idx);
|
119
|
+
free(hdr->zoomHdrs);
|
120
|
+
}
|
121
|
+
free(hdr);
|
122
|
+
}
|
123
|
+
|
124
|
+
static void bwHdrRead(bigWigFile_t *bw) {
|
125
|
+
uint32_t magic;
|
126
|
+
if(bw->isWrite) return;
|
127
|
+
bw->hdr = calloc(1, sizeof(bigWigHdr_t));
|
128
|
+
if(!bw->hdr) return;
|
129
|
+
|
130
|
+
if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error; //0x0
|
131
|
+
if(magic != BIGWIG_MAGIC && magic != BIGBED_MAGIC) goto error;
|
132
|
+
|
133
|
+
if(bwRead((void*) &(bw->hdr->version), sizeof(uint16_t), 1, bw) != 1) goto error; //0x4
|
134
|
+
if(bwRead((void*) &(bw->hdr->nLevels), sizeof(uint16_t), 1, bw) != 1) goto error; //0x6
|
135
|
+
if(bwRead((void*) &(bw->hdr->ctOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x8
|
136
|
+
if(bwRead((void*) &(bw->hdr->dataOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x10
|
137
|
+
if(bwRead((void*) &(bw->hdr->indexOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x18
|
138
|
+
if(bwRead((void*) &(bw->hdr->fieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x20
|
139
|
+
if(bwRead((void*) &(bw->hdr->definedFieldCount), sizeof(uint16_t), 1, bw) != 1) goto error; //0x22
|
140
|
+
if(bwRead((void*) &(bw->hdr->sqlOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x24
|
141
|
+
if(bwRead((void*) &(bw->hdr->summaryOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x2c
|
142
|
+
if(bwRead((void*) &(bw->hdr->bufSize), sizeof(uint32_t), 1, bw) != 1) goto error; //0x34
|
143
|
+
if(bwRead((void*) &(bw->hdr->extensionOffset), sizeof(uint64_t), 1, bw) != 1) goto error; //0x38
|
144
|
+
|
145
|
+
//zoom headers
|
146
|
+
if(bw->hdr->nLevels) {
|
147
|
+
if(!(bw->hdr->zoomHdrs = bwReadZoomHdrs(bw))) goto error;
|
148
|
+
}
|
149
|
+
|
150
|
+
//File summary information
|
151
|
+
if(bw->hdr->summaryOffset) {
|
152
|
+
if(urlSeek(bw->URL, bw->hdr->summaryOffset) != CURLE_OK) goto error;
|
153
|
+
if(bwRead((void*) &(bw->hdr->nBasesCovered), sizeof(uint64_t), 1, bw) != 1) goto error;
|
154
|
+
if(bwRead((void*) &(bw->hdr->minVal), sizeof(uint64_t), 1, bw) != 1) goto error;
|
155
|
+
if(bwRead((void*) &(bw->hdr->maxVal), sizeof(uint64_t), 1, bw) != 1) goto error;
|
156
|
+
if(bwRead((void*) &(bw->hdr->sumData), sizeof(uint64_t), 1, bw) != 1) goto error;
|
157
|
+
if(bwRead((void*) &(bw->hdr->sumSquared), sizeof(uint64_t), 1, bw) != 1) goto error;
|
158
|
+
}
|
159
|
+
|
160
|
+
//In case of uncompressed remote files, let the IO functions know to request larger chunks
|
161
|
+
bw->URL->isCompressed = (bw->hdr->bufSize > 0)?1:0;
|
162
|
+
|
163
|
+
return;
|
164
|
+
|
165
|
+
error:
|
166
|
+
bwHdrDestroy(bw->hdr);
|
167
|
+
fprintf(stderr, "[bwHdrRead] There was an error while reading in the header!\n");
|
168
|
+
bw->hdr = NULL;
|
169
|
+
}
|
170
|
+
|
171
|
+
static void destroyChromList(chromList_t *cl) {
|
172
|
+
uint32_t i;
|
173
|
+
if(!cl) return;
|
174
|
+
if(cl->nKeys && cl->chrom) {
|
175
|
+
for(i=0; i<cl->nKeys; i++) {
|
176
|
+
if(cl->chrom[i]) free(cl->chrom[i]);
|
177
|
+
}
|
178
|
+
}
|
179
|
+
if(cl->chrom) free(cl->chrom);
|
180
|
+
if(cl->len) free(cl->len);
|
181
|
+
free(cl);
|
182
|
+
}
|
183
|
+
|
184
|
+
static uint64_t readChromLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t valueSize) {
|
185
|
+
uint16_t nVals, i;
|
186
|
+
uint32_t idx;
|
187
|
+
char *chrom = NULL;
|
188
|
+
|
189
|
+
if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
|
190
|
+
chrom = calloc(valueSize+1, sizeof(char));
|
191
|
+
if(!chrom) return -1;
|
192
|
+
|
193
|
+
for(i=0; i<nVals; i++) {
|
194
|
+
if(bwRead((void*) chrom, sizeof(char), valueSize, bw) != valueSize) goto error;
|
195
|
+
if(bwRead((void*) &idx, sizeof(uint32_t), 1, bw) != 1) goto error;
|
196
|
+
if(bwRead((void*) &(cl->len[idx]), sizeof(uint32_t), 1, bw) != 1) goto error;
|
197
|
+
cl->chrom[idx] = bwStrdup(chrom);
|
198
|
+
if(!(cl->chrom[idx])) goto error;
|
199
|
+
}
|
200
|
+
|
201
|
+
free(chrom);
|
202
|
+
return nVals;
|
203
|
+
|
204
|
+
error:
|
205
|
+
free(chrom);
|
206
|
+
return -1;
|
207
|
+
}
|
208
|
+
|
209
|
+
static uint64_t readChromNonLeaf(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
|
210
|
+
uint64_t offset , rv = 0, previous;
|
211
|
+
uint16_t nVals, i;
|
212
|
+
|
213
|
+
if(bwRead((void*) &nVals, sizeof(uint16_t), 1, bw) != 1) return -1;
|
214
|
+
|
215
|
+
previous = bwTell(bw) + keySize;
|
216
|
+
for(i=0; i<nVals; i++) {
|
217
|
+
if(bwSetPos(bw, previous)) return -1;
|
218
|
+
if(bwRead((void*) &offset, sizeof(uint64_t), 1, bw) != 1) return -1;
|
219
|
+
if(bwSetPos(bw, offset)) return -1;
|
220
|
+
rv += readChromBlock(bw, cl, keySize);
|
221
|
+
previous += 8 + keySize;
|
222
|
+
}
|
223
|
+
|
224
|
+
return rv;
|
225
|
+
}
|
226
|
+
|
227
|
+
static uint64_t readChromBlock(bigWigFile_t *bw, chromList_t *cl, uint32_t keySize) {
|
228
|
+
uint8_t isLeaf, padding;
|
229
|
+
|
230
|
+
if(bwRead((void*) &isLeaf, sizeof(uint8_t), 1, bw) != 1) return -1;
|
231
|
+
if(bwRead((void*) &padding, sizeof(uint8_t), 1, bw) != 1) return -1;
|
232
|
+
|
233
|
+
if(isLeaf) {
|
234
|
+
return readChromLeaf(bw, cl, keySize);
|
235
|
+
} else { //I've never actually observed one of these, which is good since they're pointless
|
236
|
+
return readChromNonLeaf(bw, cl, keySize);
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
static chromList_t *bwReadChromList(bigWigFile_t *bw) {
|
241
|
+
chromList_t *cl = NULL;
|
242
|
+
uint32_t magic, keySize, valueSize, itemsPerBlock;
|
243
|
+
uint64_t rv, itemCount;
|
244
|
+
if(bw->isWrite) return NULL;
|
245
|
+
if(bwSetPos(bw, bw->hdr->ctOffset)) return NULL;
|
246
|
+
|
247
|
+
cl = calloc(1, sizeof(chromList_t));
|
248
|
+
if(!cl) return NULL;
|
249
|
+
|
250
|
+
if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
|
251
|
+
if(magic != CIRTREE_MAGIC) goto error;
|
252
|
+
|
253
|
+
if(bwRead((void*) &itemsPerBlock, sizeof(uint32_t), 1, bw) != 1) goto error;
|
254
|
+
if(bwRead((void*) &keySize, sizeof(uint32_t), 1, bw) != 1) goto error;
|
255
|
+
if(bwRead((void*) &valueSize, sizeof(uint32_t), 1, bw) != 1) goto error;
|
256
|
+
if(bwRead((void*) &itemCount, sizeof(uint64_t), 1, bw) != 1) goto error;
|
257
|
+
|
258
|
+
cl->nKeys = itemCount;
|
259
|
+
cl->chrom = calloc(itemCount, sizeof(char*));
|
260
|
+
cl->len = calloc(itemCount, sizeof(uint32_t));
|
261
|
+
if(!cl->chrom) goto error;
|
262
|
+
if(!cl->len) goto error;
|
263
|
+
|
264
|
+
if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
|
265
|
+
if(bwRead((void*) &magic, sizeof(uint32_t), 1, bw) != 1) goto error;
|
266
|
+
|
267
|
+
//Read in the blocks
|
268
|
+
rv = readChromBlock(bw, cl, keySize);
|
269
|
+
if(rv == (uint64_t) -1) goto error;
|
270
|
+
if(rv != itemCount) goto error;
|
271
|
+
|
272
|
+
return cl;
|
273
|
+
|
274
|
+
error:
|
275
|
+
destroyChromList(cl);
|
276
|
+
return NULL;
|
277
|
+
}
|
278
|
+
|
279
|
+
//This is here mostly for convenience
|
280
|
+
static void bwDestroyWriteBuffer(bwWriteBuffer_t *wb) {
|
281
|
+
if(wb->p) free(wb->p);
|
282
|
+
if(wb->compressP) free(wb->compressP);
|
283
|
+
if(wb->firstZoomBuffer) free(wb->firstZoomBuffer);
|
284
|
+
if(wb->lastZoomBuffer) free(wb->lastZoomBuffer);
|
285
|
+
if(wb->nNodes) free(wb->nNodes);
|
286
|
+
free(wb);
|
287
|
+
}
|
288
|
+
|
289
|
+
void bwClose(bigWigFile_t *fp) {
|
290
|
+
if(!fp) return;
|
291
|
+
if(bwFinalize(fp)) {
|
292
|
+
fprintf(stderr, "[bwClose] There was an error while finishing writing a bigWig file! The output is likely truncated.\n");
|
293
|
+
}
|
294
|
+
if(fp->URL) urlClose(fp->URL);
|
295
|
+
if(fp->hdr) bwHdrDestroy(fp->hdr);
|
296
|
+
if(fp->cl) destroyChromList(fp->cl);
|
297
|
+
if(fp->idx) bwDestroyIndex(fp->idx);
|
298
|
+
if(fp->writeBuffer) bwDestroyWriteBuffer(fp->writeBuffer);
|
299
|
+
free(fp);
|
300
|
+
}
|
301
|
+
|
302
|
+
int bwIsBigWig(const char *fname, CURLcode (*callBack) (CURL*)) {
|
303
|
+
uint32_t magic = 0;
|
304
|
+
URL_t *URL = NULL;
|
305
|
+
|
306
|
+
URL = urlOpen(fname, *callBack, NULL);
|
307
|
+
|
308
|
+
if(!URL) return 0;
|
309
|
+
if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
|
310
|
+
urlClose(URL);
|
311
|
+
if(magic == BIGWIG_MAGIC) return 1;
|
312
|
+
return 0;
|
313
|
+
}
|
314
|
+
|
315
|
+
char *bbGetSQL(bigWigFile_t *fp) {
|
316
|
+
char *o = NULL;
|
317
|
+
uint64_t len;
|
318
|
+
if(!fp->hdr->sqlOffset) return NULL;
|
319
|
+
len = fp->hdr->summaryOffset - fp->hdr->sqlOffset; //This includes the NULL terminator
|
320
|
+
o = malloc(sizeof(char) * len);
|
321
|
+
if(!o) goto error;
|
322
|
+
if(bwSetPos(fp, fp->hdr->sqlOffset)) goto error;
|
323
|
+
if(bwRead((void*) o, len, 1, fp) != 1) goto error;
|
324
|
+
return o;
|
325
|
+
|
326
|
+
error:
|
327
|
+
if(o) free(o);
|
328
|
+
printf("Got an error in bbGetSQL!\n");
|
329
|
+
return NULL;
|
330
|
+
}
|
331
|
+
|
332
|
+
int bbIsBigBed(const char *fname, CURLcode (*callBack) (CURL*)) {
|
333
|
+
uint32_t magic = 0;
|
334
|
+
URL_t *URL = NULL;
|
335
|
+
|
336
|
+
URL = urlOpen(fname, *callBack, NULL);
|
337
|
+
|
338
|
+
if(!URL) return 0;
|
339
|
+
if(urlRead(URL, (void*) &magic, sizeof(uint32_t)) != sizeof(uint32_t)) magic = 0;
|
340
|
+
urlClose(URL);
|
341
|
+
if(magic == BIGBED_MAGIC) return 1;
|
342
|
+
return 0;
|
343
|
+
}
|
344
|
+
|
345
|
+
bigWigFile_t *bwOpen(const char *fname, CURLcode (*callBack) (CURL*), const char *mode) {
|
346
|
+
bigWigFile_t *bwg = calloc(1, sizeof(bigWigFile_t));
|
347
|
+
if(!bwg) {
|
348
|
+
fprintf(stderr, "[bwOpen] Couldn't allocate space to create the output object!\n");
|
349
|
+
return NULL;
|
350
|
+
}
|
351
|
+
if((!mode) || (strchr(mode, 'w') == NULL)) {
|
352
|
+
bwg->isWrite = 0;
|
353
|
+
bwg->URL = urlOpen(fname, *callBack, NULL);
|
354
|
+
if(!bwg->URL) {
|
355
|
+
fprintf(stderr, "[bwOpen] urlOpen is NULL!\n");
|
356
|
+
goto error;
|
357
|
+
}
|
358
|
+
|
359
|
+
//Attempt to read in the fixed header
|
360
|
+
bwHdrRead(bwg);
|
361
|
+
if(!bwg->hdr) {
|
362
|
+
fprintf(stderr, "[bwOpen] bwg->hdr is NULL!\n");
|
363
|
+
goto error;
|
364
|
+
}
|
365
|
+
|
366
|
+
//Read in the chromosome list
|
367
|
+
bwg->cl = bwReadChromList(bwg);
|
368
|
+
if(!bwg->cl) {
|
369
|
+
fprintf(stderr, "[bwOpen] bwg->cl is NULL (%s)!\n", fname);
|
370
|
+
goto error;
|
371
|
+
}
|
372
|
+
|
373
|
+
//Read in the index
|
374
|
+
if(bwg->hdr->indexOffset) {
|
375
|
+
bwg->idx = bwReadIndex(bwg, 0);
|
376
|
+
if(!bwg->idx) {
|
377
|
+
fprintf(stderr, "[bwOpen] bwg->idx is NULL bwg->hdr->dataOffset 0x%"PRIx64"!\n", bwg->hdr->dataOffset);
|
378
|
+
goto error;
|
379
|
+
}
|
380
|
+
}
|
381
|
+
} else {
|
382
|
+
bwg->isWrite = 1;
|
383
|
+
bwg->URL = urlOpen(fname, NULL, "w+");
|
384
|
+
if(!bwg->URL) goto error;
|
385
|
+
bwg->writeBuffer = calloc(1,sizeof(bwWriteBuffer_t));
|
386
|
+
if(!bwg->writeBuffer) goto error;
|
387
|
+
bwg->writeBuffer->l = 24;
|
388
|
+
}
|
389
|
+
|
390
|
+
return bwg;
|
391
|
+
|
392
|
+
error:
|
393
|
+
bwClose(bwg);
|
394
|
+
return NULL;
|
395
|
+
}
|
396
|
+
|
397
|
+
bigWigFile_t *bbOpen(const char *fname, CURLcode (*callBack) (CURL*)) {
|
398
|
+
bigWigFile_t *bb = calloc(1, sizeof(bigWigFile_t));
|
399
|
+
if(!bb) {
|
400
|
+
fprintf(stderr, "[bbOpen] Couldn't allocate space to create the output object!\n");
|
401
|
+
return NULL;
|
402
|
+
}
|
403
|
+
|
404
|
+
//Set the type to 1 for bigBed
|
405
|
+
bb->type = 1;
|
406
|
+
|
407
|
+
bb->URL = urlOpen(fname, *callBack, NULL);
|
408
|
+
if(!bb->URL) goto error;
|
409
|
+
|
410
|
+
//Attempt to read in the fixed header
|
411
|
+
bwHdrRead(bb);
|
412
|
+
if(!bb->hdr) goto error;
|
413
|
+
|
414
|
+
//Read in the chromosome list
|
415
|
+
bb->cl = bwReadChromList(bb);
|
416
|
+
if(!bb->cl) goto error;
|
417
|
+
|
418
|
+
//Read in the index
|
419
|
+
bb->idx = bwReadIndex(bb, 0);
|
420
|
+
if(!bb->idx) goto error;
|
421
|
+
|
422
|
+
return bb;
|
423
|
+
|
424
|
+
error:
|
425
|
+
bwClose(bb);
|
426
|
+
return NULL;
|
427
|
+
}
|
428
|
+
|
429
|
+
|
430
|
+
//Implementation taken from musl:
|
431
|
+
//https://git.musl-libc.org/cgit/musl/tree/src/string/strdup.c
|
432
|
+
//License: https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
|
433
|
+
char* bwStrdup(const char *s) {
|
434
|
+
size_t l = strlen(s);
|
435
|
+
char *d = malloc(l+1);
|
436
|
+
if (!d) return NULL;
|
437
|
+
return memcpy(d, s, l+1);
|
438
|
+
}
|