bio-cgranges 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,87 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2019 Dana-Farber Cancer Institute
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+ #ifndef CRANGES_H
26
+ #define CRANGES_H
27
+
28
+ #include <stdint.h>
29
+
30
+ typedef struct { // a contig
31
+ char *name; // name of the contig
32
+ int32_t len; // max length seen in data
33
+ int32_t root_k;
34
+ int64_t n, off; // sum of lengths of previous contigs
35
+ } cr_ctg_t;
36
+
37
+ typedef struct { // an interval
38
+ uint64_t x; // prior to cr_index(), x = ctg_id<<32|start_pos; after: x = start_pos<<32|end_pos
39
+ uint32_t y:31, rev:1;
40
+ int32_t label; // NOT used
41
+ } cr_intv_t;
42
+
43
+ typedef struct {
44
+ int64_t n_r, m_r; // number and max number of intervals
45
+ cr_intv_t *r; // list of intervals (of size _n_r_)
46
+ int32_t n_ctg, m_ctg; // number and max number of contigs
47
+ cr_ctg_t *ctg; // list of contigs (of size _n_ctg_)
48
+ void *hc; // dictionary for converting contig names to integers
49
+ } cgranges_t;
50
+
51
+ #ifdef __cplusplus
52
+ extern "C" {
53
+ #endif
54
+
55
+ // retrieve start and end positions from a cr_intv_t object
56
+ static inline int32_t cr_st(const cr_intv_t *r) { return (int32_t)(r->x>>32); }
57
+ static inline int32_t cr_en(const cr_intv_t *r) { return (int32_t)r->x; }
58
+ static inline int32_t cr_start(const cgranges_t *cr, int64_t i) { return cr_st(&cr->r[i]); }
59
+ static inline int32_t cr_end(const cgranges_t *cr, int64_t i) { return cr_en(&cr->r[i]); }
60
+ static inline int32_t cr_label(const cgranges_t *cr, int64_t i) { return cr->r[i].label; }
61
+
62
+ // Initialize
63
+ cgranges_t *cr_init(void);
64
+
65
+ // Deallocate
66
+ void cr_destroy(cgranges_t *cr);
67
+
68
+ // Add an interval
69
+ cr_intv_t *cr_add(cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int32_t label_int);
70
+
71
+ // Sort and index intervals
72
+ void cr_index(cgranges_t *cr);
73
+
74
+ int64_t cr_overlap(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
75
+ int64_t cr_contain(const cgranges_t *cr, const char *ctg, int32_t st, int32_t en, int64_t **b_, int64_t *m_b_);
76
+
77
+ // Add a contig and length. Call this for desired contig ordering. _len_ can be 0.
78
+ int32_t cr_add_ctg(cgranges_t *cr, const char *ctg, int32_t len);
79
+
80
+ // Get the contig ID given its name
81
+ int32_t cr_get_ctg(const cgranges_t *cr, const char *ctg);
82
+
83
+ #ifdef __cplusplus
84
+ }
85
+ #endif
86
+
87
+ #endif