bio-twobit 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -17
- data/ext/bio/twobit/twobit.c +27 -49
- data/lib/bio/twobit/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62c043e60fe3e2fec4a578563d72ecc97d1e829149a2f0d8690165dc05d5bd41
|
4
|
+
data.tar.gz: 9584e93079bfbbd4cbf3bfeea0b5d11a638026f0cfea1a4144064c1cfbdb45b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf70fc7feb773cc03552dbb3cf0f6f9c0d4d261a8f81c0f7826c5797aa6c7baea9c391bb1cf7a82ba317710cf3f4ef5088016e6c0ce315133b8c9cad380b73a9
|
7
|
+
data.tar.gz: 5b8d65b21d777125873964fbdd9b999d1750263460e01218cf97e411ad4a54e3eedab264cd0efb900a11c141155a516661ecf0a473aab5ee4c40cc134ccf3635
|
data/README.md
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
# bio-twobit
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/bio-twobit)
|
4
|
-
[](https://github.com/kojix2/bio-twobit/actions/workflows/ci.yml)
|
5
5
|
[](https://rubydoc.info/gems/bio-twobit)
|
6
6
|
[](https://zenodo.org/badge/latestdoi/436454379)
|
7
7
|
|
8
|
+
Bio::TwoBit is a Ruby interface to 2bit files.
|
9
|
+
|
8
10
|
Ruby bindings to [lib2bit](https://github.com/dpryan79/lib2bit) / [py2bit](https://github.com/deeptools/py2bit).
|
9
11
|
|
12
|
+
|
13
|
+
2bit files are used to store and index DNA sequences, usually of entire reference genomes. [The 2bit format](https://genome.ucsc.edu/goldenPath/help/twoBit.html) is a compact binary representation of DNA sequences that is used by the UCSC Genome Browser.
|
14
|
+
|
10
15
|
## Installation
|
11
16
|
|
12
17
|
```sh
|
@@ -16,20 +21,13 @@ gem install bio-twobit
|
|
16
21
|
Linux and macOS are supported.
|
17
22
|
Windows is currently not supported.
|
18
23
|
|
19
|
-
|
20
|
-
|
21
|
-
Downlaod BSgenome.Hsapiens.UCSC.hg38
|
22
|
-
|
23
|
-
```sh
|
24
|
-
wget http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit
|
25
|
-
```
|
26
|
-
|
27
|
-
### Quick Start
|
24
|
+
### Usage
|
28
25
|
|
29
26
|
```ruby
|
30
27
|
require 'bio/twobit'
|
31
28
|
|
32
|
-
hg38 = Bio::TwoBit.open("hg38.2bit")
|
29
|
+
# hg38 = Bio::TwoBit.open("hg38.2bit")
|
30
|
+
hg38 = Bio::TwoBit::Hg38.new
|
33
31
|
|
34
32
|
hg38.path
|
35
33
|
# "hg38.2bit"
|
@@ -56,7 +54,7 @@ hg38.sequence("chr1", 50000, 50050)
|
|
56
54
|
# "AAACAGGTTAATCGCCACGACATAGTAGTATTTAGAGTTACTAGTAAGCC" # length 50
|
57
55
|
```
|
58
56
|
|
59
|
-
* The first number is the **(0-based)** position on the chromosome/contig where the sequence should begin.
|
57
|
+
* The first number is the **(0-based)** position on the chromosome/contig where the sequence should begin.
|
60
58
|
* The second number is the **(1-based)** position on the chromosome where the sequence should end.
|
61
59
|
|
62
60
|
```ruby
|
@@ -121,22 +119,22 @@ hg38 = Bio::TwoBit::Hg38.new
|
|
121
119
|
hs1 = Bio::TwoBit::Hs1.new
|
122
120
|
```
|
123
121
|
|
124
|
-
Adding a new reference genome is easy. Add [here](https://github.com/
|
122
|
+
Adding a new reference genome is easy. Add the id of the genome you want to use [here](https://github.com/kojix2/bio-twobit/blob/main/lib/bio/twobit/references/template.erb).
|
125
123
|
|
126
124
|
```
|
127
|
-
git clone https://github.com/
|
125
|
+
git clone https://github.com/kojix2/bio-twobit
|
128
126
|
vi lib/bio/twobit/references/template.erb # Add your id to ids list.
|
129
127
|
ruby lib/bio/twobit/references/template.erb
|
130
128
|
rake install
|
131
129
|
```
|
132
130
|
|
133
|
-
If you want to use 2-bit files from locations other than UCSC, create your own classes [here](https://github.com/
|
131
|
+
If you want to use 2-bit files from locations other than UCSC, create your own classes [here](https://github.com/kojix2/bio-twobit/tree/main/lib/bio/twobit/references).
|
134
132
|
|
135
133
|
Pull requests are welcome.
|
136
134
|
|
137
135
|
## Development
|
138
136
|
|
139
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
137
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/bio-twobit.
|
140
138
|
|
141
139
|
Do you need commit rights to my repository?
|
142
140
|
Do you want to get admin rights and take over the project?
|
@@ -147,4 +145,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-o
|
|
147
145
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
148
146
|
|
149
147
|
Code from [Red Datasets](https://github.com/red-data-tools/red-datasets) is used for automatic file download and caching. (The MIT license)
|
150
|
-
|
data/ext/bio/twobit/twobit.c
CHANGED
@@ -48,12 +48,12 @@ static void TwoBit_free(void *ptr);
|
|
48
48
|
static size_t TwoBit_memsize(const void *ptr);
|
49
49
|
|
50
50
|
static const rb_data_type_t TwoBit_type = {
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
51
|
+
.wrap_struct_name = "TwoBit",
|
52
|
+
.function = {
|
53
|
+
.dfree = TwoBit_free,
|
54
|
+
.dsize = TwoBit_memsize,
|
55
|
+
},
|
56
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
57
57
|
};
|
58
58
|
|
59
59
|
static void
|
@@ -100,7 +100,6 @@ twobit_init(VALUE klass, VALUE fpath, VALUE storeMasked)
|
|
100
100
|
tb = twobitOpen(path, mask);
|
101
101
|
if (!tb)
|
102
102
|
{
|
103
|
-
twobitClose(tb);
|
104
103
|
rb_raise(rb_eRuntimeError, "Could not open file %s", path);
|
105
104
|
return Qnil;
|
106
105
|
}
|
@@ -251,8 +250,8 @@ twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
|
251
250
|
TwoBit *tb;
|
252
251
|
|
253
252
|
ch = StringValueCStr(chrom);
|
254
|
-
startl =
|
255
|
-
endl =
|
253
|
+
startl = NUM2ULONG(rbstart);
|
254
|
+
endl = NUM2ULONG(rbend);
|
256
255
|
tb = getTwoBit(self);
|
257
256
|
|
258
257
|
if (!tb)
|
@@ -278,8 +277,14 @@ twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
|
278
277
|
start = (uint32_t)startl;
|
279
278
|
|
280
279
|
str = twobitSequence(tb, ch, start, end);
|
281
|
-
|
282
|
-
|
280
|
+
if (!str)
|
281
|
+
{
|
282
|
+
rb_raise(rb_eRuntimeError, "Failed to retrieve the sequence for %s:%u-%u", ch, start, end);
|
283
|
+
return Qnil;
|
284
|
+
}
|
285
|
+
VALUE result = rb_str_new2(str);
|
286
|
+
free(str);
|
287
|
+
return result;
|
283
288
|
}
|
284
289
|
|
285
290
|
static VALUE
|
@@ -290,6 +295,7 @@ twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
|
|
290
295
|
TwoBit *tb;
|
291
296
|
void *o = NULL;
|
292
297
|
VALUE val, hash;
|
298
|
+
const char *bases[4] = {"A", "C", "T", "G"};
|
293
299
|
|
294
300
|
tb = getTwoBit(self);
|
295
301
|
if (!tb)
|
@@ -312,48 +318,20 @@ twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
|
|
312
318
|
|
313
319
|
hash = rb_hash_new();
|
314
320
|
|
315
|
-
|
321
|
+
for (int i = 0; i < 4; i++)
|
316
322
|
{
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
{
|
327
|
-
val = DBL2NUM(((double *)o)[1]);
|
328
|
-
}
|
329
|
-
else
|
330
|
-
{
|
331
|
-
val = UINT32_2NUM(((uint32_t *)o)[1]);
|
332
|
-
}
|
333
|
-
rb_hash_aset(hash, rb_str_new2("C"), val);
|
334
|
-
|
335
|
-
if (fr)
|
336
|
-
{
|
337
|
-
val = DBL2NUM(((double *)o)[2]);
|
338
|
-
}
|
339
|
-
else
|
340
|
-
{
|
341
|
-
val = UINT32_2NUM(((uint32_t *)o)[2]);
|
342
|
-
}
|
343
|
-
rb_hash_aset(hash, rb_str_new2("T"), val);
|
344
|
-
|
345
|
-
if (fr)
|
346
|
-
{
|
347
|
-
val = DBL2NUM(((double *)o)[3]);
|
348
|
-
}
|
349
|
-
else
|
350
|
-
{
|
351
|
-
val = UINT32_2NUM(((uint32_t *)o)[3]);
|
323
|
+
if (fr)
|
324
|
+
{
|
325
|
+
val = DBL2NUM(((double *)o)[i]);
|
326
|
+
}
|
327
|
+
else
|
328
|
+
{
|
329
|
+
val = UINT32_2NUM(((uint32_t *)o)[i]);
|
330
|
+
}
|
331
|
+
rb_hash_aset(hash, rb_str_new2(bases[i]), val);
|
352
332
|
}
|
353
|
-
rb_hash_aset(hash, rb_str_new2("G"), val);
|
354
333
|
|
355
334
|
free(o);
|
356
|
-
|
357
335
|
return hash;
|
358
336
|
}
|
359
337
|
|
data/lib/bio/twobit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-twobit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit),
|
14
14
|
which provides high-speed access to genomic data in 2bit file format.
|
@@ -142,11 +142,11 @@ files:
|
|
142
142
|
- lib/bio/twobit/references/wuhcor1.rb
|
143
143
|
- lib/bio/twobit/references/xentro10.rb
|
144
144
|
- lib/bio/twobit/version.rb
|
145
|
-
homepage: https://github.com/
|
145
|
+
homepage: https://github.com/kojix2/bio-twobit
|
146
146
|
licenses:
|
147
147
|
- MIT
|
148
148
|
metadata: {}
|
149
|
-
post_install_message:
|
149
|
+
post_install_message:
|
150
150
|
rdoc_options: []
|
151
151
|
require_paths:
|
152
152
|
- lib
|
@@ -161,8 +161,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
161
|
- !ruby/object:Gem::Version
|
162
162
|
version: '0'
|
163
163
|
requirements: []
|
164
|
-
rubygems_version: 3.
|
165
|
-
signing_key:
|
164
|
+
rubygems_version: 3.5.16
|
165
|
+
signing_key:
|
166
166
|
specification_version: 4
|
167
167
|
summary: A ruby library for accessing 2bit files
|
168
168
|
test_files: []
|