bio-bigwig 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,9 @@
1
+ #ifndef BIO_BIGWIG_H
2
+ #define BIO_BIGWIG_H 1
3
+
4
+ #include "ruby.h"
5
+ #include "libBigWig/bigWig.h"
6
+
7
+ #endif /* BIO_BIGWIG_H */
8
+
9
+
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ # https://github.com/taf2/curb/blob/master/ext/extconf.rb
6
+ dir_config("curl")
7
+ if find_executable("curl-config")
8
+ $CFLAGS << " #{`curl-config --cflags`.strip} -g"
9
+ $LIBS << if ENV["STATIC_BUILD"]
10
+ " #{`curl-config --static-libs`.strip}"
11
+ else
12
+ " #{`curl-config --libs`.strip}"
13
+ end
14
+ ca_bundle_path = `curl-config --ca`.strip
15
+ if !ca_bundle_path.nil? && (ca_bundle_path != "")
16
+ $defs.push(%(-D HAVE_CURL_CONFIG_CA))
17
+ $defs.push(%(-D CURL_CONFIG_CA='#{ca_bundle_path.inspect}'))
18
+ end
19
+ end
20
+
21
+ # dir_config("libbigwig")
22
+ # unless find_header("bigWig.h") && have_library("bigwig")
23
+ $INCFLAGS << " -I$(srcdir)/libBigWig"
24
+ $VPATH << "$(srcdir)/libBigWig"
25
+ $srcs = Dir.glob(["{.,libBigWig}/*.c"], base: __dir__)
26
+ .map { |f| File.expand_path(f, __dir__) }
27
+ $objs = $srcs.map { |f| f.sub(/\.c$/, ".o") }
28
+ # end
29
+
30
+ create_makefile("bio/bigwig/bigwigext")
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Devon Ryan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,606 @@
1
+ #ifndef LIBBIGWIG_H
2
+ #define LIBBIGWIG_H
3
+
4
+ #include "bigWigIO.h"
5
+ #include "bwValues.h"
6
+ #include <inttypes.h>
7
+ #include <zlib.h>
8
+
9
+ #ifdef __cplusplus
10
+ extern "C" {
11
+ #endif
12
+
13
+ /*! \mainpage libBigWig
14
+ *
15
+ * \section Introduction
16
+ *
17
+ * libBigWig is a C library for parsing local/remote bigWig and bigBed files. This is similar to Kent's library from UCSC, except
18
+ * * The license is much more liberal
19
+ * * This code doesn't call `exit()` on error, thereby killing the calling application.
20
+ *
21
+ * External files are accessed using [curl](http://curl.haxx.se/).
22
+ *
23
+ * Please submit issues and pull requests [here](https://github.com/dpryan79/libBigWig).
24
+ *
25
+ * \section Compilation
26
+ *
27
+ * Assuming you already have the curl libraries installed (not just the curl binary!):
28
+ *
29
+ * make install prefix=/some/path
30
+ *
31
+ * \section Writing bigWig files
32
+ *
33
+ * There are three methods for storing values in a bigWig file, further described in the [wiggle format](http://genome.ucsc.edu/goldenpath/help/wiggle.html). The entries within the file are grouped into "blocks" and each such block is limited to storing entries of a single type. So, it is unwise to use a single bedGraph-like endtry followed by a single fixed-step entry followed by a variable-step entry, as that would require three separate blocks, with additional space required for each.
34
+ *
35
+ * \section Testing file types
36
+ *
37
+ * As of version 0.3.0, libBigWig supports reading bigBed files. If an application needs to support both bigBed and bigWig input, then the `bwIsBigWig` and `bbIsBigBed` functions can be used to determine the file type. These both use the "magic" number at the beginning of the file to determine the file type.
38
+ *
39
+ * \section Interval and entry iterators
40
+ *
41
+ * As of version 0.3.0, libBigWig supports iterating over intervals in bigWig files and entries in bigBed files. The number of intervals/entries returned with each iteration can be controlled by setting the number of blocks processed in each iteration (intervals and entries are group inside of bigWig and bigBed files into blocks of entries). See `test/testIterator.c` for an example.
42
+ *
43
+ * \section Examples
44
+ *
45
+ * Please see [README.md](README.md) and the files under `test/` for examples.
46
+ */
47
+
48
+
49
+ /*! \file bigWig.h
50
+ *
51
+ * These are the functions and structured that should be used by external users. While I don't particularly recommend dealing with some of the structures (e.g., a bigWigHdr_t), they're described here in case you need them.
52
+ *
53
+ * BTW, this library doesn't switch endianness as appropriate, since I kind of assume that there's only one type produced these days.
54
+ */
55
+
56
+ /*!
57
+ * The library version number
58
+ */
59
+ #define LIBBIGWIG_VERSION 0.4.6
60
+
61
+ /*!
62
+ * If 1, then this library was compiled with remote file support.
63
+ */
64
+ #ifdef NOCURL
65
+ #define LIBBIGWIG_CURL 0
66
+ #ifndef CURLTYPE_DEFINED
67
+ #define CURLTYPE_DEFINED
68
+ typedef int CURLcode;
69
+ typedef void CURL;
70
+ #endif
71
+ #else
72
+ #define LIBBIGWIG_CURL 1
73
+ #endif
74
+
75
+ /*!
76
+ * The magic number of a bigWig file.
77
+ */
78
+ #define BIGWIG_MAGIC 0x888FFC26
79
+ /*!
80
+ * The magic number of a bigBed file.
81
+ */
82
+ #define BIGBED_MAGIC 0x8789F2EB
83
+ /*!
84
+ * The magic number of a "cirTree" block in a file.
85
+ */
86
+ #define CIRTREE_MAGIC 0x78ca8c91
87
+ /*!
88
+ * The magic number of an index block in a file.
89
+ */
90
+ #define IDX_MAGIC 0x2468ace0
91
+ /*!
92
+ * The default number of children per block.
93
+ */
94
+ #define DEFAULT_nCHILDREN 64
95
+ /*!
96
+ * The default decompression buffer size in bytes. This is used to determin
97
+ */
98
+ #define DEFAULT_BLOCKSIZE 32768
99
+
100
+ /*!
101
+ * An enum that dictates the type of statistic to fetch for a given interval
102
+ */
103
+ enum bwStatsType {
104
+ doesNotExist = -1, /*!< This does nothing */
105
+ mean = 0, /*!< The mean value */
106
+ average = 0, /*!< The mean value */
107
+ stdev = 1, /*!< The standard deviation of the values */
108
+ dev = 1, /*!< The standard deviation of the values */
109
+ max = 2, /*!< The maximum value */
110
+ min = 3, /*!< The minimum value */
111
+ cov = 4, /*!< The number of bases covered */
112
+ coverage = 4, /*!<The number of bases covered */
113
+ sum = 5 /*!< The sum of per-base values */
114
+ };
115
+
116
+ //Should hide this from end users
117
+ /*!
118
+ * @brief BigWig files have multiple "zoom" levels, each of which has its own header. This hold those headers
119
+ *
120
+ * N.B., there's 4 bytes of padding in the on disk representation of level and dataOffset.
121
+ */
122
+ typedef struct {
123
+ uint32_t *level; /**<The zoom level, which is an integer starting with 0.*/
124
+ //There's 4 bytes of padding between these
125
+ uint64_t *dataOffset; /**<The offset to the on-disk start of the data. This isn't used currently.*/
126
+ uint64_t *indexOffset; /**<The offset to the on-disk start of the index. This *is* used.*/
127
+ bwRTree_t **idx; /**<Index for each zoom level. Represented as a tree*/
128
+ } bwZoomHdr_t;
129
+
130
+ /*!
131
+ * @brief The header section of a bigWig file.
132
+ *
133
+ * Some of the values aren't currently used for anything. Others may optionally not exist.
134
+ */
135
+ typedef struct {
136
+ uint16_t version; /**<The version information of the file.*/
137
+ uint16_t nLevels; /**<The number of "zoom" levels.*/
138
+ uint64_t ctOffset; /**<The offset to the on-disk chromosome tree list.*/
139
+ uint64_t dataOffset; /**<The on-disk offset to the first block of data.*/
140
+ uint64_t indexOffset; /**<The on-disk offset to the data index.*/
141
+ uint16_t fieldCount; /**<Total number of fields.*/
142
+ uint16_t definedFieldCount; /**<Number of fixed-format BED fields.*/
143
+ uint64_t sqlOffset; /**<The on-disk offset to an SQL string. This is unused.*/
144
+ uint64_t summaryOffset; /**<If there's a summary, this is the offset to it on the disk.*/
145
+ uint32_t bufSize; /**<The compression buffer size (if the data is compressed).*/
146
+ uint64_t extensionOffset; /**<Unused*/
147
+ bwZoomHdr_t *zoomHdrs; /**<Pointers to the header for each zoom level.*/
148
+ //total Summary
149
+ uint64_t nBasesCovered; /**<The total bases covered in the file.*/
150
+ double minVal; /**<The minimum value in the file.*/
151
+ double maxVal; /**<The maximum value in the file.*/
152
+ double sumData; /**<The sum of all values in the file.*/
153
+ double sumSquared; /**<The sum of the squared values in the file.*/
154
+ } bigWigHdr_t;
155
+
156
+ //Should probably replace this with a hash
157
+ /*!
158
+ * @brief Holds the chromosomes and their lengths
159
+ */
160
+ typedef struct {
161
+ int64_t nKeys; /**<The number of chromosomes */
162
+ char **chrom; /**<A list of null terminated chromosomes */
163
+ uint32_t *len; /**<The lengths of each chromosome */
164
+ } chromList_t;
165
+
166
+ //TODO remove from bigWig.h
167
+ /// @cond SKIP
168
+ typedef struct bwLL bwLL;
169
+ struct bwLL {
170
+ bwRTreeNode_t *node;
171
+ struct bwLL *next;
172
+ };
173
+ typedef struct bwZoomBuffer_t bwZoomBuffer_t;
174
+ struct bwZoomBuffer_t { //each individual entry takes 32 bytes
175
+ void *p;
176
+ uint32_t l, m;
177
+ struct bwZoomBuffer_t *next;
178
+ };
179
+ /// @endcond
180
+
181
+ /*!
182
+ * @brief This is only needed for writing bigWig files (and won't be created otherwise)
183
+ * This should be removed from bigWig.h
184
+ */
185
+ typedef struct {
186
+ uint64_t nBlocks; /**<The number of blocks written*/
187
+ uint32_t blockSize; /**<The maximum number of children*/
188
+ uint64_t nEntries; /**<The number of entries processed. This is used for the first contig and determining how the zoom levels are computed*/
189
+ uint64_t runningWidthSum; /**<The running sum of the entry widths for the first contig (again, used for the first contig and computing zoom levels)*/
190
+ uint32_t tid; /**<The current TID that's being processed*/
191
+ uint32_t start; /**<The start position of the block*/
192
+ uint32_t end; /**<The end position of the block*/
193
+ uint32_t span; /**<The span of each entry, if applicable*/
194
+ uint32_t step; /**<The step size, if applicable*/
195
+ uint8_t ltype; /**<The type of the last entry added*/
196
+ uint32_t l; /**<The current size of p. This and the type determine the number of items held*/
197
+ void *p; /**<A buffer of size hdr->bufSize*/
198
+ bwLL *firstIndexNode; /**<The first index node in the linked list*/
199
+ bwLL *currentIndexNode; /**<The last index node in a linked list*/
200
+ bwZoomBuffer_t **firstZoomBuffer; /**<The first node in a linked list of leaf nodes*/
201
+ bwZoomBuffer_t **lastZoomBuffer; /**<The last node in a linked list of leaf nodes*/
202
+ uint64_t *nNodes; /**<The number of leaf nodes per zoom level, useful for determining duplicate levels*/
203
+ uLongf compressPsz; /**<The size of the compression buffer*/
204
+ void *compressP; /**<A compressed buffer of size compressPsz*/
205
+ } bwWriteBuffer_t;
206
+
207
+ /*!
208
+ * @brief A structure that holds everything needed to access a bigWig file.
209
+ */
210
+ typedef struct {
211
+ URL_t *URL; /**<A pointer that can handle both local and remote files (including a buffer if needed).*/
212
+ bigWigHdr_t *hdr; /**<The file header.*/
213
+ chromList_t *cl; /**<A list of chromosome names (the order is the ID).*/
214
+ bwRTree_t *idx; /**<The index for the full dataset.*/
215
+ bwWriteBuffer_t *writeBuffer; /**<The buffer used for writing.*/
216
+ int isWrite; /**<0: Opened for reading, 1: Opened for writing.*/
217
+ int type; /**<0: bigWig, 1: bigBed.*/
218
+ } bigWigFile_t;
219
+
220
+ /*!
221
+ * @brief Holds interval:value associations
222
+ */
223
+ typedef struct {
224
+ uint32_t l; /**<Number of intervals held*/
225
+ uint32_t m; /**<Maximum number of values/intervals the struct can hold*/
226
+ uint32_t *start; /**<The start positions (0-based half open)*/
227
+ uint32_t *end; /**<The end positions (0-based half open)*/
228
+ float *value; /**<The value associated with each position*/
229
+ } bwOverlappingIntervals_t;
230
+
231
+ /*!
232
+ * @brief Holds interval:str associations
233
+ */
234
+ typedef struct {
235
+ uint32_t l; /**<Number of intervals held*/
236
+ uint32_t m; /**<Maximum number of values/intervals the struct can hold*/
237
+ uint32_t *start; /**<The start positions (0-based half open)*/
238
+ uint32_t *end; /**<The end positions (0-based half open)*/
239
+ char **str; /**<The strings associated with a given entry.*/
240
+ } bbOverlappingEntries_t;
241
+
242
+ /*!
243
+ * @brief A structure to hold iterations
244
+ * One of intervals and entries should be used to access records from bigWig or bigBed files, respectively.
245
+ */
246
+ typedef struct {
247
+ bigWigFile_t *bw; /**<Pointer to the bigWig/bigBed file.*/
248
+ uint32_t tid; /**<The contig/chromosome ID.*/
249
+ uint32_t start; /**<Start position of the query interval.*/
250
+ uint32_t end; /**<End position of the query interval.*/
251
+ uint64_t offset; /**<Offset into the blocks.*/
252
+ uint32_t blocksPerIteration; /**<Number of blocks to use per iteration.*/
253
+ int withString; /**<For bigBed entries, whether to return the string with the entries.*/
254
+ void *blocks; /**<Overlapping blocks.*/
255
+ bwOverlappingIntervals_t *intervals; /**<Overlapping intervals (or NULL).*/
256
+ bbOverlappingEntries_t *entries; /**<Overlapping entries (or NULL).*/
257
+ void *data; /**<Points to either intervals or entries. If there are no further intervals/entries, then this is NULL. Use this to test for whether to continue iterating.*/
258
+ } bwOverlapIterator_t;
259
+
260
+ /*!
261
+ * @brief Initializes curl and global variables. This *MUST* be called before other functions (at least if you want to connect to remote files).
262
+ * For remote file, curl must be initialized and regions of a file read into an internal buffer. If the buffer is too small then an excessive number of connections will be made. If the buffer is too large than more data than required is fetched. 128KiB is likely sufficient for most needs.
263
+ * @param bufSize The internal buffer size used for remote connection.
264
+ * @see bwCleanup
265
+ * @return 0 on success and 1 on error.
266
+ */
267
+ int bwInit(size_t bufSize);
268
+
269
+ /*!
270
+ * @brief The counterpart to bwInit, this cleans up curl.
271
+ * @see bwInit
272
+ */
273
+ void bwCleanup(void);
274
+
275
+ /*!
276
+ * @brief Determine if a file is a bigWig file.
277
+ * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
278
+ * @param fname The file name or URL (http, https, and ftp are supported)
279
+ * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
280
+ * @return 1 if the file appears to be bigWig, otherwise 0.
281
+ */
282
+ int bwIsBigWig(char *fname, CURLcode (*callBack)(CURL*));
283
+
284
+ /*!
285
+ * @brief Determine is a file is a bigBed file.
286
+ * This function will quickly check either local or remote files to determine if they appear to be valid bigWig files. This can be determined by reading the first 4 bytes of the file.
287
+ * @param fname The file name or URL (http, https, and ftp are supported)
288
+ * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
289
+ * @return 1 if the file appears to be bigWig, otherwise 0.
290
+ */
291
+ int bbIsBigBed(char *fname, CURLcode (*callBack)(CURL*));
292
+
293
+ /*!
294
+ * @brief Opens a local or remote bigWig file.
295
+ * This will open a local or remote bigWig file. Writing of local bigWig files is also supported.
296
+ * @param fname The file name or URL (http, https, and ftp are supported)
297
+ * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
298
+ * @param mode The mode, by default "r". Both local and remote files can be read, but only local files can be written. For files being written the callback function is ignored. If and only if the mode contains "w" will the file be opened for writing (in all other cases the file will be opened for reading.
299
+ * @return A bigWigFile_t * on success and NULL on error.
300
+ */
301
+ bigWigFile_t *bwOpen(char *fname, CURLcode (*callBack)(CURL*), const char* mode);
302
+
303
+ /*!
304
+ * @brief Opens a local or remote bigBed file.
305
+ * This will open a local or remote bigBed file. Note that this file format can only be read and NOT written!
306
+ * @param fname The file name or URL (http, https, and ftp are supported)
307
+ * @param callBack An optional user-supplied function. This is applied to remote connections so users can specify things like proxy and password information. See `test/testRemote` for an example.
308
+ * @return A bigWigFile_t * on success and NULL on error.
309
+ */
310
+ bigWigFile_t *bbOpen(char *fname, CURLcode (*callBack)(CURL*));
311
+
312
+ /*!
313
+ * @brief Returns a string containing the SQL entry (or NULL).
314
+ * The "auto SQL" field contains the names and value types of the entries in
315
+ * each bigBed entry. If you need to parse a particular value out of each entry,
316
+ * then you'll need to first parse this.
317
+ * @param fp The file pointer to a valid bigWigFile_t
318
+ * @return A char *, which you MUST free!
319
+ */
320
+ char *bbGetSQL(bigWigFile_t *fp);
321
+
322
+ /*!
323
+ * @brief Closes a bigWigFile_t and frees up allocated memory
324
+ * This closes both bigWig and bigBed files.
325
+ * @param fp The file pointer.
326
+ */
327
+ void bwClose(bigWigFile_t *fp);
328
+
329
+ /*******************************************************************************
330
+ *
331
+ * The following are in bwStats.c
332
+ *
333
+ *******************************************************************************/
334
+
335
+ /*!
336
+ * @brief Converts between chromosome name and ID
337
+ *
338
+ * @param fp A valid bigWigFile_t pointer
339
+ * @param chrom A chromosome name
340
+ * @return An ID, -1 will be returned on error (note that this is an unsigned value, so that's ~4 billion. bigWig/bigBed files can't store that many chromosomes anyway.
341
+ */
342
+ uint32_t bwGetTid(bigWigFile_t *fp, char *chrom);
343
+
344
+ /*!
345
+ * @brief Frees space allocated by `bwGetOverlappingIntervals`
346
+ * @param o A valid `bwOverlappingIntervals_t` pointer.
347
+ * @see bwGetOverlappingIntervals
348
+ */
349
+ void bwDestroyOverlappingIntervals(bwOverlappingIntervals_t *o);
350
+
351
+ /*!
352
+ * @brief Frees space allocated by `bbGetOverlappingEntries`
353
+ * @param o A valid `bbOverlappingEntries_t` pointer.
354
+ * @see bbGetOverlappingEntries
355
+ */
356
+ void bbDestroyOverlappingEntries(bbOverlappingEntries_t *o);
357
+
358
+ /*!
359
+ * @brief Return bigWig entries overlapping an interval.
360
+ * Find all bigWig entries overlapping a range and returns them, including their associated values.
361
+ * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
362
+ * @param chrom A valid chromosome name.
363
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
364
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
365
+ * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and intervals.
366
+ * @see bwOverlappingIntervals_t
367
+ * @see bwDestroyOverlappingIntervals
368
+ * @see bwGetValues
369
+ */
370
+ bwOverlappingIntervals_t *bwGetOverlappingIntervals(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end);
371
+
372
+ /*!
373
+ * @brief Return bigBed entries overlapping an interval.
374
+ * Find all bigBed entries overlapping a range and returns them.
375
+ * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
376
+ * @param chrom A valid chromosome name.
377
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
378
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
379
+ * @param withString If not 0, return the string associated with each entry in the output. If 0, there are no associated strings returned. This is useful if the only information needed are the locations of the entries, which require significantly less memory.
380
+ * @return NULL on error or no overlapping values, otherwise a `bbOverlappingEntries_t *` holding the intervals and (optionally) the associated string.
381
+ * @see bbOverlappingEntries_t
382
+ * @see bbDestroyOverlappingEntries
383
+ */
384
+ bbOverlappingEntries_t *bbGetOverlappingEntries(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString);
385
+
386
+ /*!
387
+ * @brief Creates an iterator over intervals in a bigWig file
388
+ * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
389
+ * Intervals are in the `intervals` member and `data` can be used to determine when to end iteration.
390
+ * @param fp A valid bigWigFile_t pointer. This MUST be for a bigWig file!
391
+ * @param chrom A valid chromosome name.
392
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
393
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
394
+ * @param blocksPerIteration The number of blocks (internal groupings of intervals in bigWig files) to return per iteration.
395
+ * @return NULL on error, otherwise a bwOverlapIterator_t pointer
396
+ * @see bwOverlapIterator_t
397
+ * @see bwIteratorNext
398
+ * @see bwIteratorDestroy
399
+ */
400
+ bwOverlapIterator_t *bwOverlappingIntervalsIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t blocksPerIteration);
401
+
402
+ /*!
403
+ * @brief Creates an iterator over entries in a bigBed file
404
+ * Iterators can be traversed with `bwIteratorNext()` and destroyed with `bwIteratorDestroy()`.
405
+ * Entries are in the `entries` member and `data` can be used to determine when to end iteration.
406
+ * @param fp A valid bigWigFile_t pointer. This MUST be for a bigBed file!
407
+ * @param chrom A valid chromosome name.
408
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
409
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
410
+ * @param withString Whether the returned entries should include their associated strings.
411
+ * @param blocksPerIteration The number of blocks (internal groupings of entries in bigBed files) to return per iteration.
412
+ * @return NULL on error, otherwise a bwOverlapIterator_t pointer
413
+ * @see bbGetOverlappingEntries
414
+ * @see bwOverlapIterator_t
415
+ * @see bwIteratorNext
416
+ * @see bwIteratorDestroy
417
+ */
418
+ bwOverlapIterator_t *bbOverlappingEntriesIterator(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int withString, uint32_t blocksPerIteration);
419
+
420
+ /*!
421
+ * @brief Traverses to the entries/intervals in the next group of blocks.
422
+ * @param iter A bwOverlapIterator_t pointer that is updated (or destroyed on error)
423
+ * @return NULL on error, otherwise a bwOverlapIterator_t pointer with the intervals or entries from the next set of blocks.
424
+ * @see bwOverlapIterator_t
425
+ * @see bwIteratorDestroy
426
+ */
427
+ bwOverlapIterator_t *bwIteratorNext(bwOverlapIterator_t *iter);
428
+
429
+ /*!
430
+ * @brief Destroys a bwOverlapIterator_t
431
+ * @param iter The bwOverlapIterator_t that should be destroyed
432
+ */
433
+ void bwIteratorDestroy(bwOverlapIterator_t *iter);
434
+
435
+ /*!
436
+ * @brief Return all per-base bigWig values in a given interval.
437
+ * Given an interval (e.g., chr1:0-100), return the value at each position in a bigWig file. Positions without associated values are suppressed by default, but may be returned if `includeNA` is not 0.
438
+ * @param fp A valid bigWigFile_t pointer.
439
+ * @param chrom A valid chromosome name.
440
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
441
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
442
+ * @param includeNA If not 0, report NA values as well (as NA).
443
+ * @return NULL on error or no overlapping values, otherwise a `bwOverlappingIntervals_t *` holding the values and positions.
444
+ * @see bwOverlappingIntervals_t
445
+ * @see bwDestroyOverlappingIntervals
446
+ * @see bwGetOverlappingIntervals
447
+ */
448
+ bwOverlappingIntervals_t *bwGetValues(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, int includeNA);
449
+
450
+ /*!
451
+ * @brief Determines per-interval bigWig statistics
452
+ * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals.
453
+ * @param fp The file from which to extract statistics.
454
+ * @param chrom A valid chromosome name.
455
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
456
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
457
+ * @param nBins The number of bins within the interval to calculate statistics for.
458
+ * @param type The type of statistic.
459
+ * @see bwStatsType
460
+ * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
461
+ */
462
+ double *bwStats(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
463
+
464
+ /*!
465
+ * @brief Determines per-interval bigWig statistics
466
+ * Can determine mean/min/max/coverage/standard deviation of values in one or more intervals in a bigWig file. You can optionally give it an interval and ask for values from X number of sub-intervals. The difference with bwStats is that zoom levels are never used.
467
+ * @param fp The file from which to extract statistics.
468
+ * @param chrom A valid chromosome name.
469
+ * @param start The start position of the interval. This is 0-based half open, so 0 is the first base.
470
+ * @param end The end position of the interval. Again, this is 0-based half open, so 100 will include the 100th base...which is at position 99.
471
+ * @param nBins The number of bins within the interval to calculate statistics for.
472
+ * @param type The type of statistic.
473
+ * @see bwStatsType
474
+ * @return A pointer to an array of double precission floating point values. Note that bigWig files only hold 32-bit values, so this is done to help prevent overflows.
475
+ */
476
+ double *bwStatsFromFull(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t end, uint32_t nBins, enum bwStatsType type);
477
+
478
+ //Writer functions
479
+
480
+ /*!
481
+ * @brief Create a largely empty bigWig header
482
+ * Every bigWig file has a header, this creates the template for one. It also takes care of space allocation in the output write buffer.
483
+ * @param fp The bigWigFile_t* that you want to write to.
484
+ * @param maxZooms The maximum number of zoom levels. If you specify 0 then there will be no zoom levels. A value <0 or > 65535 will result in a maximum of 10.
485
+ * @return 0 on success.
486
+ */
487
+ int bwCreateHdr(bigWigFile_t *fp, int32_t maxZooms);
488
+
489
+ /*!
490
+ * @brief Take a list of chromosome names and lengths and return a pointer to a chromList_t
491
+ * This MUST be run before `bwWriteHdr()`. Note that the input is NOT free()d!
492
+ * @param chroms A list of chromosomes.
493
+ * @param lengths The length of each chromosome.
494
+ * @param n The number of chromosomes (thus, the length of `chroms` and `lengths`)
495
+ * @return A pointer to a chromList_t or NULL on error.
496
+ */
497
+ chromList_t *bwCreateChromList(char **chroms, uint32_t *lengths, int64_t n);
498
+
499
+ /*!
500
+ * @brief Write a the header to a bigWig file.
501
+ * You must have already opened the output file, created a header and a chromosome list.
502
+ * @param bw The output bigWigFile_t pointer.
503
+ * @see bwCreateHdr
504
+ * @see bwCreateChromList
505
+ */
506
+ int bwWriteHdr(bigWigFile_t *bw);
507
+
508
+ /*!
509
+ * @brief Write a new block of bedGraph-like intervals to a bigWig file
510
+ * Adds entries of the form:
511
+ * chromosome start end value
512
+ * to the file. These will always be added in a new block, so you may have previously used a different storage type.
513
+ *
514
+ * In general it's more efficient to use the bwAppend* functions, but then you MUST know that the previously written block is of the same type. In other words, you can only use bwAppendIntervals() after bwAddIntervals() or a previous bwAppendIntervals().
515
+ * @param fp The output file pointer.
516
+ * @param chrom A list of chromosomes, of length `n`.
517
+ * @param start A list of start positions of length`n`.
518
+ * @param end A list of end positions of length`n`.
519
+ * @param values A list of values of length`n`.
520
+ * @param n The length of the aforementioned lists.
521
+ * @return 0 on success and another value on error.
522
+ * @see bwAppendIntervals
523
+ */
524
+ int bwAddIntervals(bigWigFile_t *fp, char **chrom, uint32_t *start, uint32_t *end, float *values, uint32_t n);
525
+
526
+ /*!
527
+ * @brief Append bedGraph-like intervals to a previous block of bedGraph-like intervals in a bigWig file.
528
+ * If you have previously used bwAddIntervals() then this will append additional entries into the previous block (or start a new one if needed).
529
+ * @param fp The output file pointer.
530
+ * @param start A list of start positions of length`n`.
531
+ * @param end A list of end positions of length`n`.
532
+ * @param values A list of values of length`n`.
533
+ * @param n The length of the aforementioned lists.
534
+ * @return 0 on success and another value on error.
535
+ * @warning Do NOT use this after `bwAddIntervalSpanSteps()`, `bwAppendIntervalSpanSteps()`, `bwAddIntervalSpanSteps()`, or `bwAppendIntervalSpanSteps()`.
536
+ * @see bwAddIntervals
537
+ */
538
+ int bwAppendIntervals(bigWigFile_t *fp, uint32_t *start, uint32_t *end, float *values, uint32_t n);
539
+
540
+ /*!
541
+ * @brief Add a new block of variable-step entries to a bigWig file
542
+ * Adds entries for the form
543
+ * chromosome start value
544
+ * to the file. Each block of such entries has an associated "span", so each value describes the region chromosome:start-(start+span)
545
+ *
546
+ * This will always start a new block of values.
547
+ * @param fp The output file pointer.
548
+ * @param chrom A list of chromosomes, of length `n`.
549
+ * @param start A list of start positions of length`n`.
550
+ * @param span The span of each entry (the must all be the same).
551
+ * @param values A list of values of length`n`.
552
+ * @param n The length of the aforementioned lists.
553
+ * @return 0 on success and another value on error.
554
+ * @see bwAppendIntervalSpans
555
+ */
556
+ int bwAddIntervalSpans(bigWigFile_t *fp, char *chrom, uint32_t *start, uint32_t span, float *values, uint32_t n);
557
+
558
+ /*!
559
+ * @brief Append to a previous block of variable-step entries.
560
+ * If you previously used `bwAddIntervalSpans()`, this will continue appending more values to the block(s) it created.
561
+ * @param fp The output file pointer.
562
+ * @param start A list of start positions of length`n`.
563
+ * @param values A list of values of length`n`.
564
+ * @param n The length of the aforementioned lists.
565
+ * @return 0 on success and another value on error.
566
+ * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpanSteps()` or `bwAppendIntervalSpanSteps()`
567
+ * @see bwAddIntervalSpans
568
+ */
569
+ int bwAppendIntervalSpans(bigWigFile_t *fp, uint32_t *start, float *values, uint32_t n);
570
+
571
+ /*!
572
+ * @brief Add a new block of fixed-step entries to a bigWig file
573
+ * Adds entries for the form
574
+ * value
575
+ * to the file. Each block of such entries has an associated "span", "step", chromosome and start position. See the wiggle format for more details.
576
+ *
577
+ * This will always start a new block of values.
578
+ * @param fp The output file pointer.
579
+ * @param chrom The chromosome that the entries describe.
580
+ * @param start The starting position of the block of entries.
581
+ * @param span The span of each entry (i.e., the number of bases it describes).
582
+ * @param step The step between entry start positions.
583
+ * @param values A list of values of length`n`.
584
+ * @param n The length of the aforementioned lists.
585
+ * @return 0 on success and another value on error.
586
+ * @see bwAddIntervalSpanSteps
587
+ */
588
+ int bwAddIntervalSpanSteps(bigWigFile_t *fp, char *chrom, uint32_t start, uint32_t span, uint32_t step, float *values, uint32_t n);
589
+
590
+ /*!
591
+ * @brief Append to a previous block of fixed-step entries.
592
+ * If you previously used `bwAddIntervalSpanSteps()`, this will continue appending more values to the block(s) it created.
593
+ * @param fp The output file pointer.
594
+ * @param values A list of values of length`n`.
595
+ * @param n The length of the aforementioned lists.
596
+ * @return 0 on success and another value on error.
597
+ * @warning Do NOT use this after `bwAddIntervals()`, `bwAppendIntervals()`, `bwAddIntervalSpans()` or `bwAppendIntervalSpans()`
598
+ * @see bwAddIntervalSpanSteps
599
+ */
600
+ int bwAppendIntervalSpanSteps(bigWigFile_t *fp, float *values, uint32_t n);
601
+
602
+ #ifdef __cplusplus
603
+ }
604
+ #endif
605
+
606
+ #endif // LIBBIGWIG_H