bio-maf 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/README.md +147 -113
- data/bin/maf_count +0 -1
- data/bin/maf_dump_blocks +0 -1
- data/bin/maf_extract +177 -0
- data/bin/maf_index +15 -8
- data/bin/maf_tile +2 -0
- data/bin/maf_to_fasta +4 -7
- data/bio-maf.gemspec +3 -4
- data/features/maf-indexing.feature +21 -1
- data/features/step_definitions/convert_steps.rb +2 -7
- data/features/step_definitions/index_steps.rb +4 -0
- data/lib/bio-maf.rb +5 -0
- data/lib/bio/maf/index.rb +33 -23
- data/lib/bio/maf/maf.rb +10 -7
- data/lib/bio/maf/parser.rb +37 -15
- data/lib/bio/maf/tiler.rb +60 -8
- data/lib/bio/maf/writer.rb +26 -0
- data/man/maf_extract.1 +159 -0
- data/man/maf_extract.1.ronn +175 -0
- data/man/maf_index.1 +21 -10
- data/man/maf_index.1.ronn +14 -7
- data/man/maf_tile.1 +12 -0
- data/man/maf_tile.1.ronn +9 -0
- data/spec/bio/maf/index_spec.rb +23 -0
- metadata +15 -11
data/man/maf_index.1
CHANGED
@@ -28,7 +28,10 @@ The \fImaf\fR input file must be a valid MAF file of any length\.
|
|
28
28
|
The index created is a Kyoto Cabinet TreeDB (B+ tree) database; \fIindex\fR must have a \fB\.kct\fR extension\.
|
29
29
|
.
|
30
30
|
.SH "OPTIONS"
|
31
|
-
|
31
|
+
.
|
32
|
+
.TP
|
33
|
+
\fB\-a\fR, \fB\-\-all\fR
|
34
|
+
All sequences, not just the reference sequence, will be indexed for searching\.
|
32
35
|
.
|
33
36
|
.TP
|
34
37
|
\fB\-d\fR, \fB\-\-dump\fR
|
@@ -38,25 +41,33 @@ Instead of creating an index, dump out the given \fIindex\fR in human\-readable
|
|
38
41
|
.
|
39
42
|
.nf
|
40
43
|
|
41
|
-
0 [bin 1195] 80082334:80082368
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
44
|
+
0 [bin 1195] 80082334:80082368
|
45
|
+
offset 16, length 1087
|
46
|
+
text size: 54
|
47
|
+
sequences in block: 10
|
48
|
+
species vector: 00000000000003ff
|
46
49
|
.
|
47
50
|
.fi
|
48
51
|
.
|
49
52
|
.IP "" 0
|
50
53
|
|
51
|
-
.
|
52
|
-
.TP
|
53
|
-
\fB\-t\fR, \fB\-\-threaded\fR
|
54
|
-
Use a separate reader thread to do I/O in parallel with parsing\. Only useful on JRuby\.
|
55
54
|
.
|
56
55
|
.TP
|
57
56
|
\fB\-\-time\fR
|
58
57
|
Print elapsed time for index creation\. Mainly useful for measuring performance with different Ruby implementations, I/O subsystems, etc\.
|
59
58
|
.
|
59
|
+
.TP
|
60
|
+
\fB\-q\fR, \fB\-\-quiet\fR
|
61
|
+
Run quietly, with warnings suppressed\.
|
62
|
+
.
|
63
|
+
.TP
|
64
|
+
\fB\-v\fR, \fB\-\-verbose\fR
|
65
|
+
Run verbosely, with additional informational messages\.
|
66
|
+
.
|
67
|
+
.TP
|
68
|
+
\fB\-\-debug\fR
|
69
|
+
Log debugging information\.
|
70
|
+
.
|
60
71
|
.SH "EXAMPLES"
|
61
72
|
Build an index on a MAF file:
|
62
73
|
.
|
data/man/maf_index.1.ronn
CHANGED
@@ -29,9 +29,11 @@ The index created is a Kyoto Cabinet TreeDB (B+ tree) database;
|
|
29
29
|
|
30
30
|
## OPTIONS
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
* `-a`, `--all`:
|
33
|
+
All sequences, not just the reference sequence, will be indexed for
|
34
|
+
searching.
|
35
|
+
|
36
|
+
* `-d`, `--dump`:
|
35
37
|
Instead of creating an index, dump out the given <index> in
|
36
38
|
human-readable form. Index records will appear like:
|
37
39
|
|
@@ -41,15 +43,20 @@ TODO
|
|
41
43
|
sequences in block: 10
|
42
44
|
species vector: 00000000000003ff
|
43
45
|
|
44
|
-
* `-t`, `--threaded`:
|
45
|
-
Use a separate reader thread to do I/O in parallel with
|
46
|
-
parsing. Only useful on JRuby.
|
47
|
-
|
48
46
|
* `--time`:
|
49
47
|
Print elapsed time for index creation. Mainly useful for measuring
|
50
48
|
performance with different Ruby implementations, I/O subsystems,
|
51
49
|
etc.
|
52
50
|
|
51
|
+
* `-q`, `--quiet`:
|
52
|
+
Run quietly, with warnings suppressed.
|
53
|
+
|
54
|
+
* `-v`, `--verbose`:
|
55
|
+
Run verbosely, with additional informational messages.
|
56
|
+
|
57
|
+
* `--debug`:
|
58
|
+
Log debugging information.
|
59
|
+
|
53
60
|
## EXAMPLES
|
54
61
|
|
55
62
|
Build an index on a MAF file:
|
data/man/maf_tile.1
CHANGED
@@ -43,6 +43,18 @@ The given BED file will be used to provide a list of intervals to process\. If p
|
|
43
43
|
\fB\-o\fR, \fB\-\-output\-base BASE\fR
|
44
44
|
The given path will be used as the base name for output files, as described above\.
|
45
45
|
.
|
46
|
+
.TP
|
47
|
+
\fB\-q\fR, \fB\-\-quiet\fR
|
48
|
+
Run quietly, with warnings suppressed\.
|
49
|
+
.
|
50
|
+
.TP
|
51
|
+
\fB\-v\fR, \fB\-\-verbose\fR
|
52
|
+
Run verbosely, with additional informational messages\.
|
53
|
+
.
|
54
|
+
.TP
|
55
|
+
\fB\-\-debug\fR
|
56
|
+
Log debugging information\.
|
57
|
+
.
|
46
58
|
.SH "EXAMPLES"
|
47
59
|
Generate an alignment of the \fBhg19\fR, \fBpetMar1\fR, and \fBornAna1\fR sequences from \fBchrY\.maf\fR over the interval 14400 to 15000 on the reference sequence of the MAF file\. Fills in gaps from \fBchrY\.refseq\.fa\.gz\fR\. Writes FASTA output to stdout\.
|
48
60
|
.
|
data/man/maf_tile.1.ronn
CHANGED
@@ -52,6 +52,15 @@ output.
|
|
52
52
|
The given path will be used as the base name for output files, as
|
53
53
|
described above.
|
54
54
|
|
55
|
+
* `-q`, `--quiet`:
|
56
|
+
Run quietly, with warnings suppressed.
|
57
|
+
|
58
|
+
* `-v`, `--verbose`:
|
59
|
+
Run verbosely, with additional informational messages.
|
60
|
+
|
61
|
+
* `--debug`:
|
62
|
+
Log debugging information.
|
63
|
+
|
55
64
|
## EXAMPLES
|
56
65
|
|
57
66
|
Generate an alignment of the `hg19`, `petMar1`, and `ornAna1`
|
data/spec/bio/maf/index_spec.rb
CHANGED
@@ -43,6 +43,29 @@ module Bio
|
|
43
43
|
buf.string.should == File.read(TestData + 'gap-filled1.fa')
|
44
44
|
end
|
45
45
|
end
|
46
|
+
it "gives a bio-alignment representation" do
|
47
|
+
access = Access.maf_dir(TestData)
|
48
|
+
interval = GenomicInterval.zero_based('sp1.chr1', 0, 50)
|
49
|
+
aln = access.tile(interval) do |tiler|
|
50
|
+
tiler.reference = TestData + 'gap-sp1.fa'
|
51
|
+
tiler.species = %w(sp1 sp2 sp3)
|
52
|
+
tiler.build_bio_alignment
|
53
|
+
end
|
54
|
+
aln.sequences[0].id.should == 'sp1'
|
55
|
+
aln.sequences[0].to_s.start_with?('CCAGGATGC').should be_true
|
56
|
+
end
|
57
|
+
it "allows setting the fill character" do
|
58
|
+
access = Access.maf_dir(TestData)
|
59
|
+
interval = GenomicInterval.zero_based('sp1.chr1', 0, 50)
|
60
|
+
aln = access.tile(interval) do |tiler|
|
61
|
+
tiler.reference = TestData + 'gap-sp1.fa'
|
62
|
+
tiler.species = %w(sp1 sp2 sp3)
|
63
|
+
tiler.fill_char = '-'
|
64
|
+
tiler.build_bio_alignment
|
65
|
+
end
|
66
|
+
aln.sequences[1].id.should == 'sp2'
|
67
|
+
aln.sequences[1].to_s.start_with?('----------GGGCTG').should be_true
|
68
|
+
end
|
46
69
|
end
|
47
70
|
describe ".file" do
|
48
71
|
it "accepts a MAF file and index" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-maf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bio-alignment
|
@@ -28,29 +28,29 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: 0.0.7
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name: bio-
|
31
|
+
name: bio-genomic-interval
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
-
- -
|
35
|
+
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version:
|
37
|
+
version: 0.1.2
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
45
|
+
version: 0.1.2
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
|
-
name: bio-
|
47
|
+
name: bio-logger
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
49
49
|
none: false
|
50
50
|
requirements:
|
51
51
|
- - ~>
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: 0.1
|
53
|
+
version: 1.0.1
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.1
|
61
|
+
version: 1.0.1
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: kyotocabinet-ruby
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,6 +81,7 @@ executables:
|
|
81
81
|
- find_overlaps
|
82
82
|
- maf_count
|
83
83
|
- maf_dump_blocks
|
84
|
+
- maf_extract
|
84
85
|
- maf_extract_ranges_count
|
85
86
|
- maf_index
|
86
87
|
- maf_parse_bench
|
@@ -112,6 +113,7 @@ files:
|
|
112
113
|
- bin/find_overlaps
|
113
114
|
- bin/maf_count
|
114
115
|
- bin/maf_dump_blocks
|
116
|
+
- bin/maf_extract
|
115
117
|
- bin/maf_extract_ranges_count
|
116
118
|
- bin/maf_index
|
117
119
|
- bin/maf_parse_bench
|
@@ -157,6 +159,8 @@ files:
|
|
157
159
|
- lib/bio/ucsc/genomic-interval-bin.rb
|
158
160
|
- lib/bio/ucsc/ucsc_bin.rb
|
159
161
|
- man/.gitignore
|
162
|
+
- man/maf_extract.1
|
163
|
+
- man/maf_extract.1.ronn
|
160
164
|
- man/maf_index.1
|
161
165
|
- man/maf_index.1.markdown
|
162
166
|
- man/maf_index.1.ronn
|
@@ -215,7 +219,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
215
219
|
version: '0'
|
216
220
|
segments:
|
217
221
|
- 0
|
218
|
-
hash: -
|
222
|
+
hash: -1336822573836516057
|
219
223
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
224
|
none: false
|
221
225
|
requirements:
|