bio-twobit 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +14 -17
- data/ext/bio/twobit/twobit.c +27 -49
- data/lib/bio/twobit/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62c043e60fe3e2fec4a578563d72ecc97d1e829149a2f0d8690165dc05d5bd41
|
4
|
+
data.tar.gz: 9584e93079bfbbd4cbf3bfeea0b5d11a638026f0cfea1a4144064c1cfbdb45b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf70fc7feb773cc03552dbb3cf0f6f9c0d4d261a8f81c0f7826c5797aa6c7baea9c391bb1cf7a82ba317710cf3f4ef5088016e6c0ce315133b8c9cad380b73a9
|
7
|
+
data.tar.gz: 5b8d65b21d777125873964fbdd9b999d1750263460e01218cf97e411ad4a54e3eedab264cd0efb900a11c141155a516661ecf0a473aab5ee4c40cc134ccf3635
|
data/README.md
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
# bio-twobit
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/bio-twobit.svg)](https://badge.fury.io/rb/bio-twobit)
|
4
|
-
[![test](https://github.com/
|
4
|
+
[![test](https://github.com/kojix2/bio-twobit/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/bio-twobit/actions/workflows/ci.yml)
|
5
5
|
[![dics](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-twobit)
|
6
6
|
[![DOI](https://zenodo.org/badge/436454379.svg)](https://zenodo.org/badge/latestdoi/436454379)
|
7
7
|
|
8
|
+
Bio::TwoBit is a Ruby interface to 2bit files.
|
9
|
+
|
8
10
|
Ruby bindings to [lib2bit](https://github.com/dpryan79/lib2bit) / [py2bit](https://github.com/deeptools/py2bit).
|
9
11
|
|
12
|
+
|
13
|
+
2bit files are used to store and index DNA sequences, usually of entire reference genomes. [The 2bit format](https://genome.ucsc.edu/goldenPath/help/twoBit.html) is a compact binary representation of DNA sequences that is used by the UCSC Genome Browser.
|
14
|
+
|
10
15
|
## Installation
|
11
16
|
|
12
17
|
```sh
|
@@ -16,20 +21,13 @@ gem install bio-twobit
|
|
16
21
|
Linux and macOS are supported.
|
17
22
|
Windows is currently not supported.
|
18
23
|
|
19
|
-
|
20
|
-
|
21
|
-
Downlaod BSgenome.Hsapiens.UCSC.hg38
|
22
|
-
|
23
|
-
```sh
|
24
|
-
wget http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit
|
25
|
-
```
|
26
|
-
|
27
|
-
### Quick Start
|
24
|
+
### Usage
|
28
25
|
|
29
26
|
```ruby
|
30
27
|
require 'bio/twobit'
|
31
28
|
|
32
|
-
hg38 = Bio::TwoBit.open("hg38.2bit")
|
29
|
+
# hg38 = Bio::TwoBit.open("hg38.2bit")
|
30
|
+
hg38 = Bio::TwoBit::Hg38.new
|
33
31
|
|
34
32
|
hg38.path
|
35
33
|
# "hg38.2bit"
|
@@ -56,7 +54,7 @@ hg38.sequence("chr1", 50000, 50050)
|
|
56
54
|
# "AAACAGGTTAATCGCCACGACATAGTAGTATTTAGAGTTACTAGTAAGCC" # length 50
|
57
55
|
```
|
58
56
|
|
59
|
-
* The first number is the **(0-based)** position on the chromosome/contig where the sequence should begin.
|
57
|
+
* The first number is the **(0-based)** position on the chromosome/contig where the sequence should begin.
|
60
58
|
* The second number is the **(1-based)** position on the chromosome where the sequence should end.
|
61
59
|
|
62
60
|
```ruby
|
@@ -121,22 +119,22 @@ hg38 = Bio::TwoBit::Hg38.new
|
|
121
119
|
hs1 = Bio::TwoBit::Hs1.new
|
122
120
|
```
|
123
121
|
|
124
|
-
Adding a new reference genome is easy. Add [here](https://github.com/
|
122
|
+
Adding a new reference genome is easy. Add the id of the genome you want to use [here](https://github.com/kojix2/bio-twobit/blob/main/lib/bio/twobit/references/template.erb).
|
125
123
|
|
126
124
|
```
|
127
|
-
git clone https://github.com/
|
125
|
+
git clone https://github.com/kojix2/bio-twobit
|
128
126
|
vi lib/bio/twobit/references/template.erb # Add your id to ids list.
|
129
127
|
ruby lib/bio/twobit/references/template.erb
|
130
128
|
rake install
|
131
129
|
```
|
132
130
|
|
133
|
-
If you want to use 2-bit files from locations other than UCSC, create your own classes [here](https://github.com/
|
131
|
+
If you want to use 2-bit files from locations other than UCSC, create your own classes [here](https://github.com/kojix2/bio-twobit/tree/main/lib/bio/twobit/references).
|
134
132
|
|
135
133
|
Pull requests are welcome.
|
136
134
|
|
137
135
|
## Development
|
138
136
|
|
139
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
137
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/bio-twobit.
|
140
138
|
|
141
139
|
Do you need commit rights to my repository?
|
142
140
|
Do you want to get admin rights and take over the project?
|
@@ -147,4 +145,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-o
|
|
147
145
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
148
146
|
|
149
147
|
Code from [Red Datasets](https://github.com/red-data-tools/red-datasets) is used for automatic file download and caching. (The MIT license)
|
150
|
-
|
data/ext/bio/twobit/twobit.c
CHANGED
@@ -48,12 +48,12 @@ static void TwoBit_free(void *ptr);
|
|
48
48
|
static size_t TwoBit_memsize(const void *ptr);
|
49
49
|
|
50
50
|
static const rb_data_type_t TwoBit_type = {
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
51
|
+
.wrap_struct_name = "TwoBit",
|
52
|
+
.function = {
|
53
|
+
.dfree = TwoBit_free,
|
54
|
+
.dsize = TwoBit_memsize,
|
55
|
+
},
|
56
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
57
57
|
};
|
58
58
|
|
59
59
|
static void
|
@@ -100,7 +100,6 @@ twobit_init(VALUE klass, VALUE fpath, VALUE storeMasked)
|
|
100
100
|
tb = twobitOpen(path, mask);
|
101
101
|
if (!tb)
|
102
102
|
{
|
103
|
-
twobitClose(tb);
|
104
103
|
rb_raise(rb_eRuntimeError, "Could not open file %s", path);
|
105
104
|
return Qnil;
|
106
105
|
}
|
@@ -251,8 +250,8 @@ twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
|
251
250
|
TwoBit *tb;
|
252
251
|
|
253
252
|
ch = StringValueCStr(chrom);
|
254
|
-
startl =
|
255
|
-
endl =
|
253
|
+
startl = NUM2ULONG(rbstart);
|
254
|
+
endl = NUM2ULONG(rbend);
|
256
255
|
tb = getTwoBit(self);
|
257
256
|
|
258
257
|
if (!tb)
|
@@ -278,8 +277,14 @@ twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
|
278
277
|
start = (uint32_t)startl;
|
279
278
|
|
280
279
|
str = twobitSequence(tb, ch, start, end);
|
281
|
-
|
282
|
-
|
280
|
+
if (!str)
|
281
|
+
{
|
282
|
+
rb_raise(rb_eRuntimeError, "Failed to retrieve the sequence for %s:%u-%u", ch, start, end);
|
283
|
+
return Qnil;
|
284
|
+
}
|
285
|
+
VALUE result = rb_str_new2(str);
|
286
|
+
free(str);
|
287
|
+
return result;
|
283
288
|
}
|
284
289
|
|
285
290
|
static VALUE
|
@@ -290,6 +295,7 @@ twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
|
|
290
295
|
TwoBit *tb;
|
291
296
|
void *o = NULL;
|
292
297
|
VALUE val, hash;
|
298
|
+
const char *bases[4] = {"A", "C", "T", "G"};
|
293
299
|
|
294
300
|
tb = getTwoBit(self);
|
295
301
|
if (!tb)
|
@@ -312,48 +318,20 @@ twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
|
|
312
318
|
|
313
319
|
hash = rb_hash_new();
|
314
320
|
|
315
|
-
|
321
|
+
for (int i = 0; i < 4; i++)
|
316
322
|
{
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
{
|
327
|
-
val = DBL2NUM(((double *)o)[1]);
|
328
|
-
}
|
329
|
-
else
|
330
|
-
{
|
331
|
-
val = UINT32_2NUM(((uint32_t *)o)[1]);
|
332
|
-
}
|
333
|
-
rb_hash_aset(hash, rb_str_new2("C"), val);
|
334
|
-
|
335
|
-
if (fr)
|
336
|
-
{
|
337
|
-
val = DBL2NUM(((double *)o)[2]);
|
338
|
-
}
|
339
|
-
else
|
340
|
-
{
|
341
|
-
val = UINT32_2NUM(((uint32_t *)o)[2]);
|
342
|
-
}
|
343
|
-
rb_hash_aset(hash, rb_str_new2("T"), val);
|
344
|
-
|
345
|
-
if (fr)
|
346
|
-
{
|
347
|
-
val = DBL2NUM(((double *)o)[3]);
|
348
|
-
}
|
349
|
-
else
|
350
|
-
{
|
351
|
-
val = UINT32_2NUM(((uint32_t *)o)[3]);
|
323
|
+
if (fr)
|
324
|
+
{
|
325
|
+
val = DBL2NUM(((double *)o)[i]);
|
326
|
+
}
|
327
|
+
else
|
328
|
+
{
|
329
|
+
val = UINT32_2NUM(((uint32_t *)o)[i]);
|
330
|
+
}
|
331
|
+
rb_hash_aset(hash, rb_str_new2(bases[i]), val);
|
352
332
|
}
|
353
|
-
rb_hash_aset(hash, rb_str_new2("G"), val);
|
354
333
|
|
355
334
|
free(o);
|
356
|
-
|
357
335
|
return hash;
|
358
336
|
}
|
359
337
|
|
data/lib/bio/twobit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-twobit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit),
|
14
14
|
which provides high-speed access to genomic data in 2bit file format.
|
@@ -142,11 +142,11 @@ files:
|
|
142
142
|
- lib/bio/twobit/references/wuhcor1.rb
|
143
143
|
- lib/bio/twobit/references/xentro10.rb
|
144
144
|
- lib/bio/twobit/version.rb
|
145
|
-
homepage: https://github.com/
|
145
|
+
homepage: https://github.com/kojix2/bio-twobit
|
146
146
|
licenses:
|
147
147
|
- MIT
|
148
148
|
metadata: {}
|
149
|
-
post_install_message:
|
149
|
+
post_install_message:
|
150
150
|
rdoc_options: []
|
151
151
|
require_paths:
|
152
152
|
- lib
|
@@ -161,8 +161,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
161
|
- !ruby/object:Gem::Version
|
162
162
|
version: '0'
|
163
163
|
requirements: []
|
164
|
-
rubygems_version: 3.
|
165
|
-
signing_key:
|
164
|
+
rubygems_version: 3.5.16
|
165
|
+
signing_key:
|
166
166
|
specification_version: 4
|
167
167
|
summary: A ruby library for accessing 2bit files
|
168
168
|
test_files: []
|