bio-maf 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio-maf.rb +12 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +181 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
## NOTE: this is probably not the best place for this, ultimately.
|
2
|
+
## If it works, think about moving it.
|
3
|
+
|
4
|
+
module Bio
|
5
|
+
|
6
|
+
module MAF
|
7
|
+
|
8
|
+
class Struct
|
9
|
+
def initialize(spec)
|
10
|
+
@members = []
|
11
|
+
@by_name = {}
|
12
|
+
offset = 0
|
13
|
+
spec.each do |m_spec|
|
14
|
+
m = Member.new(offset, *m_spec)
|
15
|
+
@members << m
|
16
|
+
@by_name[m.name] = m
|
17
|
+
offset += m.size
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def fmt
|
22
|
+
@members.collect { |m| m.fmt }.join('')
|
23
|
+
end
|
24
|
+
|
25
|
+
def extractor_fmt(*names)
|
26
|
+
extract = names.collect { |name| @by_name.fetch(name) }
|
27
|
+
extract.sort_by! { |m| m.offset }
|
28
|
+
fmt = ''
|
29
|
+
pos = 0
|
30
|
+
extract.each do |member|
|
31
|
+
if member.offset != pos
|
32
|
+
fmt << "@#{member.offset}"
|
33
|
+
pos = member.offset
|
34
|
+
end
|
35
|
+
fmt << member.fmt
|
36
|
+
pos += member.size
|
37
|
+
end
|
38
|
+
return fmt
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
TYPE_PROPS = {
|
43
|
+
:uint8 => { :size => 1, :fmt => 'C' },
|
44
|
+
:uint16 => { :size => 2, :fmt => 'S>' },
|
45
|
+
:uint32 => { :size => 4, :fmt => 'L>' },
|
46
|
+
:uint64 => { :size => 8, :fmt => 'Q>' }
|
47
|
+
}
|
48
|
+
|
49
|
+
class Member
|
50
|
+
attr_reader :offset, :name, :type, :size, :fmt
|
51
|
+
def initialize(offset, name, type)
|
52
|
+
@offset = offset
|
53
|
+
@name = name
|
54
|
+
@type = type
|
55
|
+
props = TYPE_PROPS.fetch(type)
|
56
|
+
@size = props.fetch(:size)
|
57
|
+
@fmt = props.fetch(:fmt)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Bio::MAF
|
2
|
+
|
3
|
+
class Writer
|
4
|
+
attr_reader :f, :path
|
5
|
+
|
6
|
+
def initialize(fspec)
|
7
|
+
if fspec.respond_to? :write
|
8
|
+
@f = fspec
|
9
|
+
if fspec.respond_to? :path
|
10
|
+
@path = fspec.path
|
11
|
+
end
|
12
|
+
else
|
13
|
+
@path = fspec
|
14
|
+
@f = File.open(fspec, 'w')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def flatten_vars(vars)
|
19
|
+
vars.to_a.collect {|k, v| "#{k}=#{v}"}.join(" ")
|
20
|
+
end
|
21
|
+
|
22
|
+
def write_header(header)
|
23
|
+
f.puts "##maf #{flatten_vars(header.vars)}"
|
24
|
+
f.puts "##{header.alignment_params}" if header.alignment_params
|
25
|
+
end
|
26
|
+
|
27
|
+
def write_blocks(blocks)
|
28
|
+
blocks.each do |block|
|
29
|
+
write_block(block)
|
30
|
+
end
|
31
|
+
f.flush
|
32
|
+
end
|
33
|
+
|
34
|
+
def write_block(block)
|
35
|
+
lines = ["a #{flatten_vars(block.vars)}"]
|
36
|
+
block.sequences.each do |seq|
|
37
|
+
write_seq(seq, lines)
|
38
|
+
end
|
39
|
+
lines << " "
|
40
|
+
f.puts lines.join("\n")
|
41
|
+
end
|
42
|
+
|
43
|
+
def write_seq(s, lines)
|
44
|
+
lines << sprintf("%s %-20s %12d %2d %s %9d %s",
|
45
|
+
s.empty? ? "e" : "s",
|
46
|
+
s.source,
|
47
|
+
s.start,
|
48
|
+
s.size,
|
49
|
+
s.strand,
|
50
|
+
s.src_size,
|
51
|
+
s.empty? ? s.status : s.text)
|
52
|
+
if s.quality
|
53
|
+
lines << sprintf("q %-20s %s",
|
54
|
+
s.source, s.quality)
|
55
|
+
end
|
56
|
+
if s.i_data
|
57
|
+
lines << sprintf("i %-20s %s %s %s %s",
|
58
|
+
s.source, *s.i_data)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
data/lib/bio/maf.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# = UCSCBin
|
2
|
+
# Author:: MISHIMA, Hiroyuki
|
3
|
+
# Copyright:: MISHIMA, Hiroyuki, 2010-2011
|
4
|
+
# License:: The Ruby licence (Ryby's / GPLv2 dual)
|
5
|
+
#
|
6
|
+
# Original program in C by Jim Kent, 2002
|
7
|
+
# See also http://genomewiki.ucsc.edu/index.php/Bin_indexing_system;
|
8
|
+
# a paper Kent, et. al. Genome Research 2002.12:996-1006;
|
9
|
+
# and src/lib/binRange.c in the kent source tree.
|
10
|
+
#
|
11
|
+
# Bio::Ucsc::UcscBin -
|
12
|
+
# 1) convert between 0-based half-open interval and
|
13
|
+
# 1-based full-close intervals.
|
14
|
+
# 2) Calculate Bin number from genomic physical position
|
15
|
+
# according to UCSC's Bin Indexing System.
|
16
|
+
#
|
17
|
+
module Bio
|
18
|
+
module Ucsc
|
19
|
+
class UcscBin
|
20
|
+
# Version = "0.1.0" # 20100714
|
21
|
+
# Version = "0.2.0" # 20101028
|
22
|
+
# Version = "0.2.1" # 20110408
|
23
|
+
Version = "0.2.2" # 20110418 the licence is changed
|
24
|
+
# embeded in BioRubyUcscApi
|
25
|
+
# handle the case, start==end in [start, end)
|
26
|
+
|
27
|
+
BINRANGE_MAXEND_512M = (512*1024*1024)
|
28
|
+
BIN_OFFSETS_EXTENDED = [4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0]
|
29
|
+
# BIN_OFFSETS_EXTENDED = [4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1] (to omit BIN=0)
|
30
|
+
BIN_OFFSETS = [512+64+8+1, 64+8+1, 8+1, 1, 0]
|
31
|
+
# BIN_OFFSETS = [512+64+8+1, 64+8+1, 8+1, 1] (to omit BIN=0)
|
32
|
+
BIN_OFFSET_OLD_TO_EXTENDED = 4681
|
33
|
+
# How much to shift to get to finest bin.
|
34
|
+
BIN_FIRST_SHIFT = 17
|
35
|
+
# How much to shift to get to next larger bin.
|
36
|
+
BIN_NEXT_SHIFT = 3
|
37
|
+
|
38
|
+
# Return a Integer of a BIN which is the smallest/finest bin
|
39
|
+
# containing whole the interval/range.
|
40
|
+
#
|
41
|
+
# Extended bin index for positions >= 512M is not supported yet
|
42
|
+
# Do you need it? Please email me.
|
43
|
+
def self.bin_from_range(bin_start, bin_end)
|
44
|
+
if bin_end <= BINRANGE_MAXEND_512M
|
45
|
+
bin_from_range_standard(bin_start, bin_end)
|
46
|
+
else
|
47
|
+
bin_from_range_extended(bin_start, bin_end)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class << self; alias bin bin_from_range; end
|
52
|
+
|
53
|
+
# Return an Array of BINs which are all bins containing whole the
|
54
|
+
# interval/range. Thus, it always contains "0" indicating a bin
|
55
|
+
# containing whole of a chromosome.
|
56
|
+
#
|
57
|
+
# extended bin index for positions >= 512M is not supported yet
|
58
|
+
# Do you need it? Please email me.
|
59
|
+
#
|
60
|
+
def self.bin_all(p_start, p_end)
|
61
|
+
if p_end <= BINRANGE_MAXEND_512M
|
62
|
+
bin_all_standard(p_start, p_end)
|
63
|
+
else
|
64
|
+
bin_all_extended(p_start, p_end)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def self.bin_from_range_standard(bin_start, bin_end)
|
71
|
+
# Given start,end in chromosome coordinates assign it
|
72
|
+
# a bin. There's a bin for each 128k segment, for each
|
73
|
+
# 1M segment, for each 8M segment, for each 64M segment,
|
74
|
+
# and for each chromosome (which is assumed to be less than
|
75
|
+
# 512M.) A range goes into the smallest bin it will fit in.
|
76
|
+
|
77
|
+
bin_start >>= BIN_FIRST_SHIFT
|
78
|
+
bin_end -= 1
|
79
|
+
bin_end >>= BIN_FIRST_SHIFT
|
80
|
+
|
81
|
+
BIN_OFFSETS.each do |offset|
|
82
|
+
return offset + bin_start if bin_start == bin_end
|
83
|
+
bin_start >>= BIN_NEXT_SHIFT
|
84
|
+
bin_end >>= BIN_NEXT_SHIFT
|
85
|
+
end
|
86
|
+
raise RangeError, \
|
87
|
+
"start #{bin_start}, end #{bin_end} out of range in findBin (max is 512M)"
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.bin_from_range_extended(bin_start, bin_end)
|
91
|
+
raise NotImplementedError, "Extended bins are not supported yet"
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.bin_all_standard(bin_start, bin_end)
|
95
|
+
bin_start_orig = bin_start
|
96
|
+
bin_end_orig = bin_end
|
97
|
+
results = Array.new
|
98
|
+
|
99
|
+
bin_start >>= BIN_FIRST_SHIFT
|
100
|
+
bin_end -= 1
|
101
|
+
bin_end >>= BIN_FIRST_SHIFT
|
102
|
+
|
103
|
+
BIN_OFFSETS.each do |offset|
|
104
|
+
results.concat(((offset + bin_start)..(offset + bin_end)).to_a)
|
105
|
+
bin_start >>= BIN_NEXT_SHIFT
|
106
|
+
bin_end >>= BIN_NEXT_SHIFT
|
107
|
+
end
|
108
|
+
return results
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.bin_all_extended(bin_start, bin_end)
|
112
|
+
raise NotImplementedError, "Extended bins are not supported yet"
|
113
|
+
end
|
114
|
+
|
115
|
+
end # class UcscBin
|
116
|
+
end # module Ucsc
|
117
|
+
end # module Bio
|
data/lib/bio/ucsc.rb
ADDED
data/lib/bio-maf/maf.rb
ADDED
data/lib/bio-maf.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# Please require your code below, respecting the naming conventions in the
|
2
|
+
# bioruby directory tree.
|
3
|
+
#
|
4
|
+
# For example, say you have a plugin named bio-plugin, the only uncommented
|
5
|
+
# line in this file would be
|
6
|
+
#
|
7
|
+
# require 'bio/bio-plugin/plugin'
|
8
|
+
#
|
9
|
+
# In this file only require other files. Avoid other source code.
|
10
|
+
|
11
|
+
require 'bio/ucsc'
|
12
|
+
require 'bio/maf'
|
data/man/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.html
|
data/man/maf_index.1
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
.\" generated with Ronn/v0.7.3
|
2
|
+
.\" http://github.com/rtomayko/ronn/tree/0.7.3
|
3
|
+
.
|
4
|
+
.TH "MAF_INDEX" "1" "June 2012" "Clayton Wheeler" "BioRuby Manual"
|
5
|
+
.
|
6
|
+
.SH "NAME"
|
7
|
+
\fBmaf_index\fR \- build and examine MAF indexes
|
8
|
+
.
|
9
|
+
.SH "SYNOPSIS"
|
10
|
+
\fBmaf_index\fR [\-t] \fImaf\fR \fIindex\fR
|
11
|
+
.
|
12
|
+
.br
|
13
|
+
\fBmaf_index\fR \fB\-d\fR|\fB\-\-dump\fR \fIindex\fR
|
14
|
+
.
|
15
|
+
.SH "DESCRIPTION"
|
16
|
+
\fBmaf_index\fR is part of the bioruby\-maf library and creates Kyoto Cabinet indexes for Multiple Alignment Format (MAF) files\. These indexes enable other MAF tools to selectively extract alignment blocks of interest\.
|
17
|
+
.
|
18
|
+
.P
|
19
|
+
In its default mode, \fBmaf_index\fR parses the \fImaf\fR file given as an argument and creates an index in \fIindex\fR\.
|
20
|
+
.
|
21
|
+
.P
|
22
|
+
The index data is stored in binary form, so with the \fB\-\-dump\fR argument, \fBmaf_index\fR can dump out the index data in human\-readable form for debugging\.
|
23
|
+
.
|
24
|
+
.SH "FILES"
|
25
|
+
The \fImaf\fR input file must be a valid MAF file of any length\.
|
26
|
+
.
|
27
|
+
.P
|
28
|
+
The index created is a Kyoto Cabinet TreeDB (B+ tree) database; \fIindex\fR must have a \fB\.kct\fR extension\.
|
29
|
+
.
|
30
|
+
.SH "OPTIONS"
|
31
|
+
TODO
|
32
|
+
.
|
33
|
+
.TP
|
34
|
+
\fB\-d\fR, \fB\-\-dump\fR
|
35
|
+
Instead of creating an index, dump out the given \fIindex\fR in human\-readable form\. Index records will appear like:
|
36
|
+
.
|
37
|
+
.IP "" 4
|
38
|
+
.
|
39
|
+
.nf
|
40
|
+
|
41
|
+
0 [bin 1195] 80082334:80082368
|
42
|
+
offset 16, length 1087
|
43
|
+
text size: 54
|
44
|
+
sequences in block: 10
|
45
|
+
species vector: 00000000000003ff
|
46
|
+
.
|
47
|
+
.fi
|
48
|
+
.
|
49
|
+
.IP "" 0
|
50
|
+
|
51
|
+
.
|
52
|
+
.TP
|
53
|
+
\fB\-t\fR, \fB\-\-threaded\fR
|
54
|
+
Use a separate reader thread to do I/O in parallel with parsing\. Only useful on JRuby\.
|
55
|
+
.
|
56
|
+
.TP
|
57
|
+
\fB\-\-time\fR
|
58
|
+
Print elapsed time for index creation\. Mainly useful for measuring performance with different Ruby implementations, I/O subsystems, etc\.
|
59
|
+
.
|
60
|
+
.SH "EXAMPLES"
|
61
|
+
Build an index on a MAF file:
|
62
|
+
.
|
63
|
+
.IP "" 4
|
64
|
+
.
|
65
|
+
.nf
|
66
|
+
|
67
|
+
$ maf_index chr22\.maf chr22\.kct
|
68
|
+
.
|
69
|
+
.fi
|
70
|
+
.
|
71
|
+
.IP "" 0
|
72
|
+
.
|
73
|
+
.P
|
74
|
+
Dump out an index:
|
75
|
+
.
|
76
|
+
.IP "" 4
|
77
|
+
.
|
78
|
+
.nf
|
79
|
+
|
80
|
+
$ maf_index \-d chr22\.kct > /tmp/chr22\.dump
|
81
|
+
.
|
82
|
+
.fi
|
83
|
+
.
|
84
|
+
.IP "" 0
|
85
|
+
.
|
86
|
+
.SH "ENVIRONMENT"
|
87
|
+
\fBmaf_index\fR is a Ruby program and relies on ordinary Ruby environment variables\.
|
88
|
+
.
|
89
|
+
.SH "BUGS"
|
90
|
+
\fBmaf_index\fR does not currently allow Kyoto Cabinet database parameters to be set\.
|
91
|
+
.
|
92
|
+
.SH "COPYRIGHT"
|
93
|
+
\fBmaf_index\fR is copyright (C) 2012 Clayton Wheeler\.
|
94
|
+
.
|
95
|
+
.SH "SEE ALSO"
|
96
|
+
ruby(1), kctreemgr(1)
|
97
|
+
.
|
98
|
+
.IP "\(bu" 4
|
99
|
+
\fIhttps://github\.com/csw/bioruby\-maf/\fR
|
100
|
+
.
|
101
|
+
.IP "\(bu" 4
|
102
|
+
\fIhttp://fallabs\.com/kyotocabinet/\fR
|
103
|
+
.
|
104
|
+
.IP "" 0
|
105
|
+
|
@@ -0,0 +1,97 @@
|
|
1
|
+
maf_index(1) -- build and examine MAF indexes
|
2
|
+
=============================================
|
3
|
+
|
4
|
+
## SYNOPSIS
|
5
|
+
|
6
|
+
`maf_index` [-t] <var>maf</var> <var>index</var><br>
|
7
|
+
`maf_index` `-d`|`--dump` <var>index</var>
|
8
|
+
|
9
|
+
## DESCRIPTION
|
10
|
+
|
11
|
+
**maf_index** is part of the bioruby-maf library and creates
|
12
|
+
Kyoto Cabinet indexes for Multiple Alignment Format (MAF)
|
13
|
+
files. These indexes enable other MAF tools to selectively extract
|
14
|
+
alignment blocks of interest.
|
15
|
+
|
16
|
+
In its default mode, `maf_index` parses the <var>maf</var> file given as an
|
17
|
+
argument and creates an index in <var>index</var>.
|
18
|
+
|
19
|
+
The index data is stored in binary form, so with the `--dump`
|
20
|
+
argument, `maf_index` can dump out the index data in human-readable
|
21
|
+
form for debugging.
|
22
|
+
|
23
|
+
## FILES
|
24
|
+
|
25
|
+
The <var>maf</var> input file must be a valid MAF file of any length.
|
26
|
+
|
27
|
+
The index created is a Kyoto Cabinet TreeDB (B+ tree) database;
|
28
|
+
<var>index</var> must have a `.kct` extension.
|
29
|
+
|
30
|
+
## OPTIONS
|
31
|
+
|
32
|
+
TODO
|
33
|
+
|
34
|
+
* `-d`, `--dump`:
|
35
|
+
Instead of creating an index, dump out the given <var>index</var> in
|
36
|
+
human-readable form. Index records will appear like:
|
37
|
+
|
38
|
+
0 [bin 1195] 80082334:80082368
|
39
|
+
offset 16, length 1087
|
40
|
+
text size: 54
|
41
|
+
sequences in block: 10
|
42
|
+
species vector: 00000000000003ff
|
43
|
+
|
44
|
+
* `-t`, `--threaded`:
|
45
|
+
Use a separate reader thread to do I/O in parallel with
|
46
|
+
parsing. Only useful on JRuby.
|
47
|
+
|
48
|
+
* `--time`:
|
49
|
+
Print elapsed time for index creation. Mainly useful for measuring
|
50
|
+
performance with different Ruby implementations, I/O subsystems,
|
51
|
+
etc.
|
52
|
+
|
53
|
+
## EXAMPLES
|
54
|
+
|
55
|
+
Build an index on a MAF file:
|
56
|
+
|
57
|
+
$ maf_index chr22.maf chr22.kct
|
58
|
+
|
59
|
+
Dump out an index:
|
60
|
+
|
61
|
+
$ maf_index -d chr22.kct > /tmp/chr22.dump
|
62
|
+
|
63
|
+
## ENVIRONMENT
|
64
|
+
|
65
|
+
`maf_index` is a Ruby program and relies on ordinary Ruby environment
|
66
|
+
variables.
|
67
|
+
|
68
|
+
## BUGS
|
69
|
+
|
70
|
+
`maf_index` does not currently allow Kyoto Cabinet database parameters
|
71
|
+
to be set.
|
72
|
+
|
73
|
+
## COPYRIGHT
|
74
|
+
|
75
|
+
`maf_index` is copyright (C) 2012 Clayton Wheeler.
|
76
|
+
|
77
|
+
## SEE ALSO
|
78
|
+
|
79
|
+
ruby(1), kctreemgr(1)
|
80
|
+
|
81
|
+
* <https://github.com/csw/bioruby-maf/>
|
82
|
+
* <http://fallabs.com/kyotocabinet/>
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
[SYNOPSIS]: #SYNOPSIS "SYNOPSIS"
|
87
|
+
[DESCRIPTION]: #DESCRIPTION "DESCRIPTION"
|
88
|
+
[FILES]: #FILES "FILES"
|
89
|
+
[OPTIONS]: #OPTIONS "OPTIONS"
|
90
|
+
[EXAMPLES]: #EXAMPLES "EXAMPLES"
|
91
|
+
[ENVIRONMENT]: #ENVIRONMENT "ENVIRONMENT"
|
92
|
+
[BUGS]: #BUGS "BUGS"
|
93
|
+
[COPYRIGHT]: #COPYRIGHT "COPYRIGHT"
|
94
|
+
[SEE ALSO]: #SEE-ALSO "SEE ALSO"
|
95
|
+
|
96
|
+
|
97
|
+
[maf_index(1)]: maf_index.1.html
|
@@ -0,0 +1,83 @@
|
|
1
|
+
maf_index(1) -- build and examine MAF indexes
|
2
|
+
=============================================
|
3
|
+
|
4
|
+
## SYNOPSIS
|
5
|
+
|
6
|
+
`maf_index` [-t] <maf> <index><br>
|
7
|
+
`maf_index` `-d`|`--dump` <index>
|
8
|
+
|
9
|
+
## DESCRIPTION
|
10
|
+
|
11
|
+
**maf_index** is part of the bioruby-maf library and creates
|
12
|
+
Kyoto Cabinet indexes for Multiple Alignment Format (MAF)
|
13
|
+
files. These indexes enable other MAF tools to selectively extract
|
14
|
+
alignment blocks of interest.
|
15
|
+
|
16
|
+
In its default mode, `maf_index` parses the <maf> file given as an
|
17
|
+
argument and creates an index in <index>.
|
18
|
+
|
19
|
+
The index data is stored in binary form, so with the `--dump`
|
20
|
+
argument, `maf_index` can dump out the index data in human-readable
|
21
|
+
form for debugging.
|
22
|
+
|
23
|
+
## FILES
|
24
|
+
|
25
|
+
The <maf> input file must be a valid MAF file of any length.
|
26
|
+
|
27
|
+
The index created is a Kyoto Cabinet TreeDB (B+ tree) database;
|
28
|
+
<index> must have a `.kct` extension.
|
29
|
+
|
30
|
+
## OPTIONS
|
31
|
+
|
32
|
+
TODO
|
33
|
+
|
34
|
+
* `-d`, `--dump`:
|
35
|
+
Instead of creating an index, dump out the given <index> in
|
36
|
+
human-readable form. Index records will appear like:
|
37
|
+
|
38
|
+
0 [bin 1195] 80082334:80082368
|
39
|
+
offset 16, length 1087
|
40
|
+
text size: 54
|
41
|
+
sequences in block: 10
|
42
|
+
species vector: 00000000000003ff
|
43
|
+
|
44
|
+
* `-t`, `--threaded`:
|
45
|
+
Use a separate reader thread to do I/O in parallel with
|
46
|
+
parsing. Only useful on JRuby.
|
47
|
+
|
48
|
+
* `--time`:
|
49
|
+
Print elapsed time for index creation. Mainly useful for measuring
|
50
|
+
performance with different Ruby implementations, I/O subsystems,
|
51
|
+
etc.
|
52
|
+
|
53
|
+
## EXAMPLES
|
54
|
+
|
55
|
+
Build an index on a MAF file:
|
56
|
+
|
57
|
+
$ maf_index chr22.maf chr22.kct
|
58
|
+
|
59
|
+
Dump out an index:
|
60
|
+
|
61
|
+
$ maf_index -d chr22.kct > /tmp/chr22.dump
|
62
|
+
|
63
|
+
## ENVIRONMENT
|
64
|
+
|
65
|
+
`maf_index` is a Ruby program and relies on ordinary Ruby environment
|
66
|
+
variables.
|
67
|
+
|
68
|
+
## BUGS
|
69
|
+
|
70
|
+
`maf_index` does not currently allow Kyoto Cabinet database parameters
|
71
|
+
to be set.
|
72
|
+
|
73
|
+
## COPYRIGHT
|
74
|
+
|
75
|
+
`maf_index` is copyright (C) 2012 Clayton Wheeler.
|
76
|
+
|
77
|
+
## SEE ALSO
|
78
|
+
|
79
|
+
ruby(1), kctreemgr(1)
|
80
|
+
|
81
|
+
* <https://github.com/csw/bioruby-maf/>
|
82
|
+
* <http://fallabs.com/kyotocabinet/>
|
83
|
+
|
data/man/maf_to_fasta.1
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
.\" generated with Ronn/v0.7.3
|
2
|
+
.\" http://github.com/rtomayko/ronn/tree/0.7.3
|
3
|
+
.
|
4
|
+
.TH "MAF_TO_FASTA" "1" "June 2012" "Clayton Wheeler" "BioRuby Manual"
|
5
|
+
.
|
6
|
+
.SH "NAME"
|
7
|
+
\fBmaf_to_fasta\fR \- convert MAF file to FASTA
|
8
|
+
.
|
9
|
+
.SH "SYNOPSIS"
|
10
|
+
\fBmaf_to_fasta\fR [\fIoptions\fR\.\.\.] \fImaf\fR \fIfasta\fR
|
11
|
+
.
|
12
|
+
.SH "DESCRIPTION"
|
13
|
+
\fBmaf_to_fasta\fR, part of the bioruby\-maf library, converts Multiple Alignment Format (MAF) files to FASTA format\. It does not attempt to combine alignment blocks, but simply writes out each sequence in order\.
|
14
|
+
.
|
15
|
+
.SH "FILES"
|
16
|
+
The \fImaf\fR input file must be a valid MAF file of any length\.
|
17
|
+
.
|
18
|
+
.P
|
19
|
+
The \fIfasta\fR output file will be written in FASTA format\.
|
20
|
+
.
|
21
|
+
.SH "OPTIONS"
|
22
|
+
The options are only useful for performance measurement\.
|
23
|
+
.
|
24
|
+
.SH "EXAMPLES"
|
25
|
+
Convert a MAF file to FASTA:
|
26
|
+
.
|
27
|
+
.IP "" 4
|
28
|
+
.
|
29
|
+
.nf
|
30
|
+
|
31
|
+
$ maf_to_fasta chrY\.maf chrY\.fa
|
32
|
+
.
|
33
|
+
.fi
|
34
|
+
.
|
35
|
+
.IP "" 0
|
36
|
+
.
|
37
|
+
.SH "ENVIRONMENT"
|
38
|
+
\fBmaf_to_fasta\fR is a Ruby program and relies on ordinary Ruby environment variables\.
|
39
|
+
.
|
40
|
+
.SH "BUGS"
|
41
|
+
\fBmaf_to_fasta\fR should provide flexibility in selecting the alignment blocks and sequences to convert, and perhaps allow alignment blocks to be spliced together\.
|
42
|
+
.
|
43
|
+
.SH "COPYRIGHT"
|
44
|
+
\fBmaf_to_fasta\fR is copyright (C) 2012 Clayton Wheeler\.
|
45
|
+
.
|
46
|
+
.SH "SEE ALSO"
|
47
|
+
ruby(1)
|
48
|
+
.
|
49
|
+
.IP "\(bu" 4
|
50
|
+
\fIhttps://github\.com/csw/bioruby\-maf/\fR
|
51
|
+
.
|
52
|
+
.IP "" 0
|
53
|
+
|
@@ -0,0 +1,51 @@
|
|
1
|
+
maf_to_fasta(1) -- convert MAF file to FASTA
|
2
|
+
============================================
|
3
|
+
|
4
|
+
## SYNOPSIS
|
5
|
+
|
6
|
+
`maf_to_fasta` [<options>...] <maf> <fasta>
|
7
|
+
|
8
|
+
## DESCRIPTION
|
9
|
+
|
10
|
+
**maf_to_fasta**, part of the bioruby-maf library, converts Multiple
|
11
|
+
Alignment Format (MAF) files to FASTA format. It does not attempt to
|
12
|
+
combine alignment blocks, but simply writes out each sequence in
|
13
|
+
order.
|
14
|
+
|
15
|
+
## FILES
|
16
|
+
|
17
|
+
The <maf> input file must be a valid MAF file of any length.
|
18
|
+
|
19
|
+
The <fasta> output file will be written in FASTA format.
|
20
|
+
|
21
|
+
## OPTIONS
|
22
|
+
|
23
|
+
The options are only useful for performance measurement.
|
24
|
+
|
25
|
+
## EXAMPLES
|
26
|
+
|
27
|
+
Convert a MAF file to FASTA:
|
28
|
+
|
29
|
+
$ maf_to_fasta chrY.maf chrY.fa
|
30
|
+
|
31
|
+
## ENVIRONMENT
|
32
|
+
|
33
|
+
`maf_to_fasta` is a Ruby program and relies on ordinary Ruby
|
34
|
+
environment variables.
|
35
|
+
|
36
|
+
## BUGS
|
37
|
+
|
38
|
+
`maf_to_fasta` should provide flexibility in selecting the alignment
|
39
|
+
blocks and sequences to convert, and perhaps allow alignment blocks to
|
40
|
+
be spliced together.
|
41
|
+
|
42
|
+
## COPYRIGHT
|
43
|
+
|
44
|
+
`maf_to_fasta` is copyright (C) 2012 Clayton Wheeler.
|
45
|
+
|
46
|
+
## SEE ALSO
|
47
|
+
|
48
|
+
ruby(1)
|
49
|
+
|
50
|
+
* <https://github.com/csw/bioruby-maf/>
|
51
|
+
|