bio-maf 0.1.0-java → 0.2.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +53 -0
- data/DEVELOPMENT.md +29 -0
- data/Gemfile +1 -0
- data/README.md +69 -1
- data/Rakefile +4 -3
- data/bin/find_overlaps +21 -0
- data/bin/maf_tile +103 -0
- data/bio-maf.gemspec +43 -0
- data/features/gap-filling.feature +158 -0
- data/features/gap-removal.feature +50 -0
- data/features/step_definitions/gap-filling_steps.rb +32 -0
- data/features/step_definitions/gap_removal_steps.rb +19 -0
- data/features/step_definitions/parse_steps.rb +2 -1
- data/lib/bio/maf.rb +2 -0
- data/lib/bio/maf/index.rb +15 -8
- data/lib/bio/maf/maf.rb +267 -0
- data/lib/bio/maf/parser.rb +115 -175
- data/lib/bio/maf/tiler.rb +167 -0
- data/man/maf_tile.1 +108 -0
- data/man/maf_tile.1.ronn +104 -0
- data/spec/bio/maf/index_spec.rb +1 -0
- data/spec/bio/maf/parser_spec.rb +103 -0
- data/spec/bio/maf/tiler_spec.rb +69 -0
- data/test/data/gap-sp1.fa +6 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- metadata +65 -7
data/man/maf_tile.1
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
.\" generated with Ronn/v0.7.3
|
2
|
+
.\" http://github.com/rtomayko/ronn/tree/0.7.3
|
3
|
+
.
|
4
|
+
.TH "MAF_TILE" "1" "June 2012" "Clayton Wheeler" "BioRuby Manual"
|
5
|
+
.
|
6
|
+
.SH "NAME"
|
7
|
+
\fBmaf_tile\fR \- synthesize an alignment for a given region
|
8
|
+
.
|
9
|
+
.SH "SYNOPSIS"
|
10
|
+
\fBmaf_tile\fR [\fIoptions\fR] \-i BEGIN:END [\-s SPECIES[:NAME] \.\.\.] \fImaf\fR \fIindex\fR
|
11
|
+
.
|
12
|
+
.P
|
13
|
+
\fBmaf_tile\fR [\fIoptions\fR] \-\-bed BED \-o BASE [\-s SPECIES[:NAME] \.\.\.] \fImaf\fR \fIindex\fR
|
14
|
+
.
|
15
|
+
.SH "DESCRIPTION"
|
16
|
+
\fBmaf_tile\fR takes a MAF file with index (generated by maf_index(1)), extracts alignment blocks overlapping the given genomic interval, and constructs a single alignment block covering the entire interval for the specified species\. Optionally, any gaps in coverage of the MAF file\'s reference sequence can be filled in from a FASTA sequence file\.
|
17
|
+
.
|
18
|
+
.P
|
19
|
+
If a single interval is specified, the output will be written to stdout in FASTA format\. If the \fB\-\-output\-base\fR option is specified, \fB_<start>:<end>\.fa\fR will be appended to the given parameter and used to construct the output path\. If a BED file is specified with \fB\-\-bed\fR, \fB\-\-output\-base\fR is also required\.
|
20
|
+
.
|
21
|
+
.P
|
22
|
+
Species can be renamed for output by specifying them as SPECIES:NAME; the first component will be used to select the species from the MAF file, and the second will be used in the FASTA description line for output\.
|
23
|
+
.
|
24
|
+
.SH "OPTIONS"
|
25
|
+
.
|
26
|
+
.TP
|
27
|
+
\fB\-r\fR, \fB\-\-reference SEQ\fR
|
28
|
+
The FASTA reference sequence file given, which may be gzipped, will be used to fill in any gaps between alignment blocks\.
|
29
|
+
.
|
30
|
+
.TP
|
31
|
+
\fB\-i\fR, \fB\-\-interval BEGIN:END\fR
|
32
|
+
The given zero\-based genomic interval will be used to select alignment blocks from the MAF file\.
|
33
|
+
.
|
34
|
+
.TP
|
35
|
+
\fB\-s\fR, \fB\-\-species SPECIES[:NAME]\fR
|
36
|
+
The given species will be selected for output\. If given as \fBspecies:name\fR, it will appear in the FASTA output as \fIname\fR\.
|
37
|
+
.
|
38
|
+
.TP
|
39
|
+
\fB\-b\fR, \fB\-\-bed BED\fR
|
40
|
+
The given BED file will be used to provide a list of intervals to process\. If present, \fB\-\-interval\fR will be ignored and \fB\-\-output\-base\fR must be given as well\.
|
41
|
+
.
|
42
|
+
.TP
|
43
|
+
\fB\-o\fR, \fB\-\-output\-base BASE\fR
|
44
|
+
The given path will be used as the base name for output files, as described above\.
|
45
|
+
.
|
46
|
+
.SH "EXAMPLES"
|
47
|
+
Generate an alignment of the \fBhg19\fR, \fBpetMar1\fR, and \fBornAna1\fR sequences from \fBchrY\.maf\fR over the interval 14400 to 15000 on the reference sequence of the MAF file\. Fills in gaps from \fBchrY\.refseq\.fa\.gz\fR\. Writes FASTA output to stdout\.
|
48
|
+
.
|
49
|
+
.IP "" 4
|
50
|
+
.
|
51
|
+
.nf
|
52
|
+
|
53
|
+
$ maf_tile \-\-reference ~/maf/chrY\.refseq\.fa\.gz \e
|
54
|
+
\-\-interval 14400:15000 \e
|
55
|
+
\-s hg19:human \-s petMar1 \-s ornAna1 \e
|
56
|
+
chrY\.maf chrY\.kct
|
57
|
+
>human
|
58
|
+
GGGTGACGAAAAGAGCCGA\-\-\-\-\-[\.\.\.]
|
59
|
+
>petMar1
|
60
|
+
gagtgccggggagtgccggggagt[\.\.\.]
|
61
|
+
>ornAna1
|
62
|
+
AGGGATCTGGGAATTCTGG\-\-\-\-\-[\.\.\.]
|
63
|
+
.
|
64
|
+
.fi
|
65
|
+
.
|
66
|
+
.IP "" 0
|
67
|
+
.
|
68
|
+
.P
|
69
|
+
Write out a FASTA file for each interval in the given BED file, prefixed with \fB/tmp/mm8\fR, and without filling in data from a reference sequence:
|
70
|
+
.
|
71
|
+
.IP "" 4
|
72
|
+
.
|
73
|
+
.nf
|
74
|
+
|
75
|
+
$ maf_tile \-\-bed /tmp/mm8\.bed \-\-output\-base /tmp/mm8 \e
|
76
|
+
\-s mm8:mouse \-s rn4:rat \-s hg18:human \e
|
77
|
+
mm8_chr7_tiny\.maf mm8_chr7_tiny\.kct
|
78
|
+
.
|
79
|
+
.fi
|
80
|
+
.
|
81
|
+
.IP "" 0
|
82
|
+
.
|
83
|
+
.SH "FILES"
|
84
|
+
The output is generated in FASTA format, with one sequence per species\.
|
85
|
+
.
|
86
|
+
.P
|
87
|
+
The input \fImaf\fR file must be a Multiple Alignment Format file\.
|
88
|
+
.
|
89
|
+
.P
|
90
|
+
The \fIindex\fR must be a MAF index built with maf_index(1)\.
|
91
|
+
.
|
92
|
+
.P
|
93
|
+
If \fB\-\-bed\fR \fIbed\fR is specified, its argument must be a BED file\. Only the second and third columns will be used, to specify the zero\-based start and end positions of intervals\.
|
94
|
+
.
|
95
|
+
.SH "ENVIRONMENT"
|
96
|
+
\fBmaf_tile\fR is a Ruby program and relies on ordinary Ruby environment variables\.
|
97
|
+
.
|
98
|
+
.SH "COPYRIGHT"
|
99
|
+
\fBmaf_tile\fR is copyright (C) 2012 Clayton Wheeler\.
|
100
|
+
.
|
101
|
+
.SH "SEE ALSO"
|
102
|
+
maf_index(1), ruby(1)
|
103
|
+
.
|
104
|
+
.IP "\(bu" 4
|
105
|
+
\fIhttps://github\.com/csw/bioruby\-maf/\fR
|
106
|
+
.
|
107
|
+
.IP "" 0
|
108
|
+
|
data/man/maf_tile.1.ronn
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
maf_tile(1) -- synthesize an alignment for a given region
|
2
|
+
=========================================================
|
3
|
+
|
4
|
+
## SYNOPSIS
|
5
|
+
|
6
|
+
`maf_tile` [<options>] -i BEGIN:END [-s SPECIES[:NAME] ...] <maf> <index>
|
7
|
+
|
8
|
+
`maf_tile` [<options>] --bed BED -o BASE [-s SPECIES[:NAME] ...] <maf> <index>
|
9
|
+
|
10
|
+
## DESCRIPTION
|
11
|
+
|
12
|
+
**maf_tile** takes a MAF file with index (generated by maf_index(1)),
|
13
|
+
extracts alignment blocks overlapping the given genomic interval, and
|
14
|
+
constructs a single alignment block covering the entire interval for
|
15
|
+
the specified species. Optionally, any gaps in coverage of the MAF
|
16
|
+
file's reference sequence can be filled in from a FASTA sequence file.
|
17
|
+
|
18
|
+
If a single interval is specified, the output will be written to
|
19
|
+
stdout in FASTA format. If the `--output-base` option is specified,
|
20
|
+
`_<start>:<end>.fa` will be appended to the given <base> parameter and
|
21
|
+
used to construct the output path. If a BED file is specified with
|
22
|
+
`--bed`, `--output-base` is also required.
|
23
|
+
|
24
|
+
Species can be renamed for output by specifying them as SPECIES:NAME;
|
25
|
+
the first component will be used to select the species from the MAF
|
26
|
+
file, and the second will be used in the FASTA description line for
|
27
|
+
output.
|
28
|
+
|
29
|
+
## OPTIONS
|
30
|
+
|
31
|
+
* `-r`, `--reference SEQ`:
|
32
|
+
The FASTA reference sequence file given, which may be gzipped, will
|
33
|
+
be used to fill in any gaps between alignment blocks.
|
34
|
+
|
35
|
+
* `-i`, `--interval BEGIN:END`:
|
36
|
+
The given zero-based genomic interval will be used to select
|
37
|
+
alignment blocks from the MAF file.
|
38
|
+
|
39
|
+
* `-s`, `--species SPECIES[:NAME]`:
|
40
|
+
The given species will be selected for output. If given as
|
41
|
+
`species:name`, it will appear in the FASTA output as <name>.
|
42
|
+
|
43
|
+
* `-b`, `--bed BED`:
|
44
|
+
The given BED file will be used to provide a list of intervals to
|
45
|
+
process. If present, `--interval` will be ignored and
|
46
|
+
`--output-base` must be given as well.
|
47
|
+
|
48
|
+
* `-o`, `--output-base BASE`:
|
49
|
+
The given path will be used as the base name for output files, as
|
50
|
+
described above.
|
51
|
+
|
52
|
+
## EXAMPLES
|
53
|
+
|
54
|
+
Generate an alignment of the `hg19`, `petMar1`, and `ornAna1`
|
55
|
+
sequences from `chrY.maf` over the interval 14400 to 15000 on the
|
56
|
+
reference sequence of the MAF file. Fills in gaps from
|
57
|
+
`chrY.refseq.fa.gz`. Writes FASTA output to stdout.
|
58
|
+
|
59
|
+
$ maf_tile --reference ~/maf/chrY.refseq.fa.gz \
|
60
|
+
--interval 14400:15000 \
|
61
|
+
-s hg19:human -s petMar1 -s ornAna1 \
|
62
|
+
chrY.maf chrY.kct
|
63
|
+
>human
|
64
|
+
GGGTGACGAAAAGAGCCGA-----[...]
|
65
|
+
>petMar1
|
66
|
+
gagtgccggggagtgccggggagt[...]
|
67
|
+
>ornAna1
|
68
|
+
AGGGATCTGGGAATTCTGG-----[...]
|
69
|
+
|
70
|
+
Write out a FASTA file for each interval in the given BED file,
|
71
|
+
prefixed with `/tmp/mm8`, and without filling in data from a reference
|
72
|
+
sequence:
|
73
|
+
|
74
|
+
$ maf_tile --bed /tmp/mm8.bed --output-base /tmp/mm8 \
|
75
|
+
-s mm8:mouse -s rn4:rat -s hg18:human \
|
76
|
+
mm8_chr7_tiny.maf mm8_chr7_tiny.kct
|
77
|
+
|
78
|
+
## FILES
|
79
|
+
|
80
|
+
The output is generated in FASTA format, with one sequence per
|
81
|
+
species.
|
82
|
+
|
83
|
+
The input <maf> file must be a Multiple Alignment Format file.
|
84
|
+
|
85
|
+
The <index> must be a MAF index built with maf_index(1).
|
86
|
+
|
87
|
+
If `--bed` <bed> is specified, its argument must be a BED file. Only
|
88
|
+
the second and third columns will be used, to specify the zero-based
|
89
|
+
start and end positions of intervals.
|
90
|
+
|
91
|
+
## ENVIRONMENT
|
92
|
+
|
93
|
+
`maf_tile` is a Ruby program and relies on ordinary Ruby environment
|
94
|
+
variables.
|
95
|
+
|
96
|
+
## COPYRIGHT
|
97
|
+
|
98
|
+
`maf_tile` is copyright (C) 2012 Clayton Wheeler.
|
99
|
+
|
100
|
+
## SEE ALSO
|
101
|
+
|
102
|
+
maf_index(1), ruby(1)
|
103
|
+
|
104
|
+
* <https://github.com/csw/bioruby-maf/>
|
data/spec/bio/maf/index_spec.rb
CHANGED
data/spec/bio/maf/parser_spec.rb
CHANGED
@@ -25,6 +25,99 @@ module Bio
|
|
25
25
|
it "provides arbitrary parameters"
|
26
26
|
end
|
27
27
|
|
28
|
+
describe Block do
|
29
|
+
describe "#find_gaps" do
|
30
|
+
it "finds a single 14-base gap" do
|
31
|
+
p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
|
32
|
+
p.sequence_filter = { :only_species => %w(mm8 rn4 hg18 canFam2 loxAfr1) }
|
33
|
+
block = p.parse_block
|
34
|
+
gaps = block.find_gaps
|
35
|
+
gaps.size.should == 1
|
36
|
+
gaps[0][0].should == 34
|
37
|
+
gaps[0][1].should == 14
|
38
|
+
end
|
39
|
+
end
|
40
|
+
describe "#remove_gaps!" do
|
41
|
+
it "removes a single 14-base gap" do
|
42
|
+
p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
|
43
|
+
p.sequence_filter = { :only_species => %w(mm8 rn4 hg18 canFam2 loxAfr1) }
|
44
|
+
block = p.parse_block
|
45
|
+
block.sequences.size.should == 5
|
46
|
+
block.text_size.should == 54
|
47
|
+
block.remove_gaps!
|
48
|
+
block.text_size.should == 40
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe Sequence do
|
54
|
+
before(:each) do
|
55
|
+
@parser = DummyParser.new
|
56
|
+
end
|
57
|
+
|
58
|
+
describe "#gapped?" do
|
59
|
+
it "is false for sequences with no gaps" do
|
60
|
+
line = "s human_unc 9077 8 + 10998 ACAGTATT"
|
61
|
+
s = @parser.parse_seq_line(line, nil)
|
62
|
+
s.gapped?.should be_false
|
63
|
+
end
|
64
|
+
it "is true for sequences with gaps" do
|
65
|
+
line = "s human_unc 9077 8 + 10998 AC-AGTATT"
|
66
|
+
s = @parser.parse_seq_line(line, nil)
|
67
|
+
s.gapped?.should be_true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "#text_range" do
|
72
|
+
it "returns 0...text.size for a spanning interval" do
|
73
|
+
line = "s human_unc 9077 8 + 10998 ACAGTATT"
|
74
|
+
s = @parser.parse_seq_line(line, nil)
|
75
|
+
range = s.text_range(9077...(9077 + 8))
|
76
|
+
range.should == (0...(s.text.size))
|
77
|
+
end
|
78
|
+
it "returns 0...text.size for a gapped spanning interval" do
|
79
|
+
line = "s human_unc 9077 8 + 10998 AC--AGTATT"
|
80
|
+
s = @parser.parse_seq_line(line, nil)
|
81
|
+
range = s.text_range(9077...(9077 + 8))
|
82
|
+
range.should == (0...(s.text.size))
|
83
|
+
end
|
84
|
+
it "handles a leading subset" do
|
85
|
+
line = "s human_unc 9077 8 + 10998 ACAGTATT"
|
86
|
+
s = @parser.parse_seq_line(line, nil)
|
87
|
+
range = s.text_range(9077...(9077 + 2))
|
88
|
+
range.should == (0...2)
|
89
|
+
end
|
90
|
+
it "handles a trailing subset" do
|
91
|
+
line = "s human_unc 9077 8 + 10998 ACAGTATT"
|
92
|
+
s = @parser.parse_seq_line(line, nil)
|
93
|
+
range = s.text_range(9079...9085)
|
94
|
+
range.should == (2...8)
|
95
|
+
end
|
96
|
+
it "handles a gap in the middle" do
|
97
|
+
line = "s human_unc 9077 8 + 10998 AC--AGTATT"
|
98
|
+
s = @parser.parse_seq_line(line, nil)
|
99
|
+
range = s.text_range(9078...(9077 + 8))
|
100
|
+
range.should == (1...(s.text.size))
|
101
|
+
end
|
102
|
+
it "errors on a range starting before" do
|
103
|
+
expect {
|
104
|
+
line = "s human_unc 9077 8 + 10998 ACAGTATT"
|
105
|
+
s = @parser.parse_seq_line(line, nil)
|
106
|
+
range = s.text_range(9076...(9077 + 8))
|
107
|
+
}.to raise_error
|
108
|
+
end
|
109
|
+
it "errors on a range ending after" do
|
110
|
+
expect {
|
111
|
+
line = "s human_unc 9077 8 + 10998 ACAGTATT"
|
112
|
+
s = @parser.parse_seq_line(line, nil)
|
113
|
+
range = s.text_range(9076...(9077 + 9))
|
114
|
+
}.to raise_error
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
28
121
|
describe ParseContext do
|
29
122
|
it "tracks the last block position"
|
30
123
|
end
|
@@ -206,6 +299,16 @@ module Bio
|
|
206
299
|
@p.sequence_filter = { :only_species => %w(mm8 hg18) }
|
207
300
|
@p.parse_block.sequences.size.should == 2
|
208
301
|
end
|
302
|
+
it "sets filtered? when modified" do
|
303
|
+
@p.sequence_filter = { :only_species => %w(mm8 rn4) }
|
304
|
+
@p.parse_block.filtered?.should be_true
|
305
|
+
end
|
306
|
+
it "does not set filtered? when unmodified" do
|
307
|
+
@p.sequence_filter = {
|
308
|
+
:only_species => %w(mm8 rn4 oryCun1 hg18 hg181)
|
309
|
+
}
|
310
|
+
@p.parse_block.filtered?.should be_false
|
311
|
+
end
|
209
312
|
end
|
210
313
|
|
211
314
|
context "at end of file" do
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Bio::MAF
|
4
|
+
|
5
|
+
describe Tiler do
|
6
|
+
|
7
|
+
describe "#runs" do
|
8
|
+
it "returns a uniform run properly" do
|
9
|
+
a = Array.new(10, 'a')
|
10
|
+
runs = Tiler.new.enum_for(:runs, a).to_a
|
11
|
+
runs.should == [[0...10, 'a']]
|
12
|
+
end
|
13
|
+
it "yields a trailing item" do
|
14
|
+
a = Array.new(10, 'a')
|
15
|
+
a.fill('b', 8...10)
|
16
|
+
runs = Tiler.new.enum_for(:runs, a).to_a
|
17
|
+
runs.should == [[0...8, 'a'], [8...10, 'b']]
|
18
|
+
end
|
19
|
+
it "handles mixed contents" do
|
20
|
+
spec = [[0...2, 'a'],
|
21
|
+
[2...3, 'b'],
|
22
|
+
[3...4, 'c'],
|
23
|
+
[4...7, 'd']]
|
24
|
+
a = Array.new(7, nil)
|
25
|
+
spec.each { |range, obj| a.fill(obj, range) }
|
26
|
+
runs = Tiler.new.enum_for(:runs, a).to_a
|
27
|
+
runs.should == spec
|
28
|
+
end
|
29
|
+
it "handles overwrites" do
|
30
|
+
spec = [[0...7, 'a'],
|
31
|
+
[2...5, 'b'],
|
32
|
+
[3...4, 'c'],
|
33
|
+
[4...7, 'd']]
|
34
|
+
a = Array.new(7, nil)
|
35
|
+
spec.each { |range, obj| a.fill(obj, range) }
|
36
|
+
runs = Tiler.new.enum_for(:runs, a).to_a
|
37
|
+
runs.should == [[0...2, 'a'],
|
38
|
+
[2...3, 'b'],
|
39
|
+
[3...4, 'c'],
|
40
|
+
[4...7, 'd']]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
describe FASTARangeReader do
|
47
|
+
describe "#read" do
|
48
|
+
before(:each) do
|
49
|
+
@r = FASTARangeReader.new('test/data/gap-sp1.fa')
|
50
|
+
@s = 'CCAGGATGCTGGGCTGAGGGCAGTTGTGTCAGGGCGGTCCGGTGCAGGCA'
|
51
|
+
end
|
52
|
+
|
53
|
+
def check_range(z_start, z_end)
|
54
|
+
@r.read_interval(z_start, z_end).should == @s.slice(z_start...z_end)
|
55
|
+
end
|
56
|
+
|
57
|
+
it "returns the entire sequence" do
|
58
|
+
check_range(0, 50)
|
59
|
+
end
|
60
|
+
it "returns an entire line" do
|
61
|
+
check_range(10, 20)
|
62
|
+
end
|
63
|
+
it "returns arbitrary components" do
|
64
|
+
check_range(17, 41)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
data/test/data/mm8_chr7_tiny.kct
CHANGED
Binary file
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: bio-maf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.2.0
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Clayton Wheeler
|
@@ -17,13 +17,15 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - ! '>='
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: !binary |-
|
21
|
+
MA==
|
21
22
|
none: false
|
22
23
|
requirement: !ruby/object:Gem::Requirement
|
23
24
|
requirements:
|
24
25
|
- - ! '>='
|
25
26
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
27
|
+
version: !binary |-
|
28
|
+
MA==
|
27
29
|
none: false
|
28
30
|
prerelease: false
|
29
31
|
type: :runtime
|
@@ -62,11 +64,13 @@ dependencies:
|
|
62
64
|
description: Multiple Alignment Format parser for BioRuby.
|
63
65
|
email: cswh@umich.edu
|
64
66
|
executables:
|
67
|
+
- find_overlaps
|
65
68
|
- maf_count
|
66
69
|
- maf_dump_blocks
|
67
70
|
- maf_extract_ranges_count
|
68
71
|
- maf_index
|
69
72
|
- maf_parse_bench
|
73
|
+
- maf_tile
|
70
74
|
- maf_to_fasta
|
71
75
|
- maf_write
|
72
76
|
- random_ranges
|
@@ -76,6 +80,7 @@ extra_rdoc_files:
|
|
76
80
|
- README.md
|
77
81
|
files:
|
78
82
|
- .document
|
83
|
+
- .gitignore
|
79
84
|
- .simplecov
|
80
85
|
- .travis.yml
|
81
86
|
- .yardopts
|
@@ -90,20 +95,27 @@ files:
|
|
90
95
|
- benchmarks/read_bench
|
91
96
|
- benchmarks/sort_bench
|
92
97
|
- benchmarks/split_bench
|
98
|
+
- bin/find_overlaps
|
93
99
|
- bin/maf_count
|
94
100
|
- bin/maf_dump_blocks
|
95
101
|
- bin/maf_extract_ranges_count
|
96
102
|
- bin/maf_index
|
97
103
|
- bin/maf_parse_bench
|
104
|
+
- bin/maf_tile
|
98
105
|
- bin/maf_to_fasta
|
99
106
|
- bin/maf_write
|
100
107
|
- bin/random_ranges
|
108
|
+
- bio-maf.gemspec
|
109
|
+
- features/gap-filling.feature
|
110
|
+
- features/gap-removal.feature
|
101
111
|
- features/maf-indexing.feature
|
102
112
|
- features/maf-output.feature
|
103
113
|
- features/maf-parsing.feature
|
104
114
|
- features/maf-querying.feature
|
105
115
|
- features/maf-to-fasta.feature
|
106
116
|
- features/step_definitions/convert_steps.rb
|
117
|
+
- features/step_definitions/gap-filling_steps.rb
|
118
|
+
- features/step_definitions/gap_removal_steps.rb
|
107
119
|
- features/step_definitions/index_steps.rb
|
108
120
|
- features/step_definitions/output_steps.rb
|
109
121
|
- features/step_definitions/parse_steps.rb
|
@@ -115,8 +127,10 @@ files:
|
|
115
127
|
- lib/bio-maf/maf.rb
|
116
128
|
- lib/bio/maf.rb
|
117
129
|
- lib/bio/maf/index.rb
|
130
|
+
- lib/bio/maf/maf.rb
|
118
131
|
- lib/bio/maf/parser.rb
|
119
132
|
- lib/bio/maf/struct.rb
|
133
|
+
- lib/bio/maf/tiler.rb
|
120
134
|
- lib/bio/maf/writer.rb
|
121
135
|
- lib/bio/ucsc.rb
|
122
136
|
- lib/bio/ucsc/genomic-interval-bin.rb
|
@@ -125,17 +139,21 @@ files:
|
|
125
139
|
- man/maf_index.1
|
126
140
|
- man/maf_index.1.markdown
|
127
141
|
- man/maf_index.1.ronn
|
142
|
+
- man/maf_tile.1
|
143
|
+
- man/maf_tile.1.ronn
|
128
144
|
- man/maf_to_fasta.1
|
129
145
|
- man/maf_to_fasta.1.ronn
|
130
146
|
- spec/bio/maf/index_spec.rb
|
131
147
|
- spec/bio/maf/parser_spec.rb
|
132
148
|
- spec/bio/maf/struct_spec.rb
|
149
|
+
- spec/bio/maf/tiler_spec.rb
|
133
150
|
- spec/spec_helper.rb
|
134
151
|
- test/data/big-block.maf
|
135
152
|
- test/data/chr22_ieq.maf
|
136
153
|
- test/data/chrY-1block.maf
|
137
154
|
- test/data/empty
|
138
155
|
- test/data/empty.db
|
156
|
+
- test/data/gap-sp1.fa
|
139
157
|
- test/data/mm8_chr7_tiny.kct
|
140
158
|
- test/data/mm8_chr7_tiny.maf
|
141
159
|
- test/data/mm8_mod_a.maf
|
@@ -164,13 +182,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
164
182
|
segments:
|
165
183
|
- 0
|
166
184
|
hash: 2
|
167
|
-
version:
|
185
|
+
version: !binary |-
|
186
|
+
MA==
|
168
187
|
none: false
|
169
188
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
189
|
requirements:
|
171
190
|
- - ! '>='
|
172
191
|
- !ruby/object:Gem::Version
|
173
|
-
version:
|
192
|
+
version: !binary |-
|
193
|
+
MA==
|
174
194
|
none: false
|
175
195
|
requirements: []
|
176
196
|
rubyforge_project:
|
@@ -178,5 +198,43 @@ rubygems_version: 1.8.24
|
|
178
198
|
signing_key:
|
179
199
|
specification_version: 3
|
180
200
|
summary: MAF parser for BioRuby
|
181
|
-
test_files:
|
182
|
-
|
201
|
+
test_files:
|
202
|
+
- features/gap-filling.feature
|
203
|
+
- features/gap-removal.feature
|
204
|
+
- features/maf-indexing.feature
|
205
|
+
- features/maf-output.feature
|
206
|
+
- features/maf-parsing.feature
|
207
|
+
- features/maf-querying.feature
|
208
|
+
- features/maf-to-fasta.feature
|
209
|
+
- features/step_definitions/convert_steps.rb
|
210
|
+
- features/step_definitions/gap-filling_steps.rb
|
211
|
+
- features/step_definitions/gap_removal_steps.rb
|
212
|
+
- features/step_definitions/index_steps.rb
|
213
|
+
- features/step_definitions/output_steps.rb
|
214
|
+
- features/step_definitions/parse_steps.rb
|
215
|
+
- features/step_definitions/query_steps.rb
|
216
|
+
- features/step_definitions/ucsc_bin_steps.rb
|
217
|
+
- features/support/env.rb
|
218
|
+
- features/ucsc-bins.feature
|
219
|
+
- spec/bio/maf/index_spec.rb
|
220
|
+
- spec/bio/maf/parser_spec.rb
|
221
|
+
- spec/bio/maf/struct_spec.rb
|
222
|
+
- spec/bio/maf/tiler_spec.rb
|
223
|
+
- spec/spec_helper.rb
|
224
|
+
- test/data/big-block.maf
|
225
|
+
- test/data/chr22_ieq.maf
|
226
|
+
- test/data/chrY-1block.maf
|
227
|
+
- test/data/empty
|
228
|
+
- test/data/empty.db
|
229
|
+
- test/data/gap-sp1.fa
|
230
|
+
- test/data/mm8_chr7_tiny.kct
|
231
|
+
- test/data/mm8_chr7_tiny.maf
|
232
|
+
- test/data/mm8_mod_a.maf
|
233
|
+
- test/data/mm8_single.maf
|
234
|
+
- test/data/mm8_subset_a.maf
|
235
|
+
- test/data/t1-bad1.maf
|
236
|
+
- test/data/t1.fasta
|
237
|
+
- test/data/t1.maf
|
238
|
+
- test/data/t1a.maf
|
239
|
+
- test/helper.rb
|
240
|
+
- test/test_bio-maf.rb
|