bio-bigwig 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,77 @@
1
+ #ifndef LIBBIGWIG_VALUES_H
2
+ #define LIBBIGWIG_VALUES_H
3
+
4
+ #include <inttypes.h>
5
+ /*! \file bwValues.h
6
+ *
7
+ * You should not directly use functions and structures defined here. They're really meant for internal use only.
8
+ *
9
+ * All of the structures here need to be destroyed or you'll leak memory! There are methods available to destroy anything that you need to take care of yourself.
10
+ */
11
+
12
+ //N.B., coordinates are still 0-based half open!
13
+ /*!
14
+ * @brief A node within an R-tree holding the index for data.
15
+ *
16
+ * Note that there are two types of nodes: leaf and twig. Leaf nodes point to where data actually is. Twig nodes point to additional index nodes, which may or may not be leaves. Each of these nodes has additional children, which may span multiple chromosomes/contigs.
17
+ *
18
+ * With the start/end position, these positions refer specifically to the chromosomes specified in chrIdxStart/chrIdxEnd. Any chromosomes between these are completely spanned by a given child.
19
+ */
20
+ typedef struct bwRTreeNode_t {
21
+ uint8_t isLeaf; /**<Is this node a leaf?*/
22
+ //1 byte of padding
23
+ uint16_t nChildren; /**<The number of children of this node, all lists have this length.*/
24
+ uint32_t *chrIdxStart; /**<A list of the starting chromosome indices of each child.*/
25
+ uint32_t *baseStart; /**<A list of the start position of each child.*/
26
+ uint32_t *chrIdxEnd; /**<A list of the end chromosome indices of each child.*/
27
+ uint32_t *baseEnd; /**<A list of the end position of each child.*/
28
+ uint64_t *dataOffset; /**<For leaves, the offset to the on-disk data. For twigs, the offset to the child node.*/
29
+ union {
30
+ uint64_t *size; /**<Leaves only: The size of the data block.*/
31
+ struct bwRTreeNode_t **child; /**<Twigs only: The child node(s).*/
32
+ } x; /**<A union holding either size or child*/
33
+ } bwRTreeNode_t;
34
+
35
+ /*!
36
+ * A header and index that points to an R-tree that in turn points to data blocks.
37
+ */
38
+ //TODO rootOffset is pointless, it's 48bytes after the indexOffset
39
+ typedef struct {
40
+ uint32_t blockSize; /**<The maximum number of children a node can have*/
41
+ uint64_t nItems; /**<The total number of data blocks pointed to by the tree. This is completely redundant.*/
42
+ uint32_t chrIdxStart; /**<The index to the first chromosome described.*/
43
+ uint32_t baseStart; /**<The first position on chrIdxStart with a value.*/
44
+ uint32_t chrIdxEnd; /**<The index of the last chromosome with an entry.*/
45
+ uint32_t baseEnd; /**<The last position on chrIdxEnd with an entry.*/
46
+ uint64_t idxSize; /**<This is actually the offset of the index rather than the size?!? Yes, it's completely redundant.*/
47
+ uint32_t nItemsPerSlot; /**<This is always 1!*/
48
+ //There's 4 bytes of padding in the file here
49
+ uint64_t rootOffset; /**<The offset to the root node of the R-Tree (on disk). Yes, this is redundant.*/
50
+ bwRTreeNode_t *root; /**<A pointer to the root node.*/
51
+ } bwRTree_t;
52
+
53
+ /*!
54
+ * @brief This structure holds the data blocks that overlap a given interval.
55
+ */
56
+ typedef struct {
57
+ uint64_t n; /**<The number of blocks that overlap. This *MAY* be 0!.*/
58
+ uint64_t *offset; /**<The offset to the on-disk position of the block.*/
59
+ uint64_t *size; /**<The size of each block on disk (in bytes).*/
60
+ } bwOverlapBlock_t;
61
+
62
+ /*!
63
+ * @brief The header section of a given data block.
64
+ *
65
+ * There are 3 types of data blocks in bigWig files, each with slightly different needs. This is all taken care of internally.
66
+ */
67
+ typedef struct {
68
+ uint32_t tid; /**<The chromosome ID.*/
69
+ uint32_t start; /**<The start position of a block*/
70
+ uint32_t end; /**<The end position of a block*/
71
+ uint32_t step; /**<The step size of the values*/
72
+ uint32_t span; /**<The span of each data value*/
73
+ uint8_t type; /**<The block type: 1, bedGraph; 2, variable step; 3, fixed step.*/
74
+ uint16_t nItems; /**<The number of values in a given block.*/
75
+ } bwDataHeader_t;
76
+
77
+ #endif // LIBBIGWIG_VALUES_H