bio-maf 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.simplecov +1 -0
- data/.travis.yml +16 -0
- data/.yardopts +3 -0
- data/DEVELOPMENT.md +40 -0
- data/Gemfile +23 -0
- data/LICENSE.txt +20 -0
- data/README.md +209 -0
- data/Rakefile +76 -0
- data/VERSION +1 -0
- data/benchmarks/dispatch_bench +53 -0
- data/benchmarks/iter_bench +44 -0
- data/benchmarks/read_bench +40 -0
- data/benchmarks/sort_bench +33 -0
- data/benchmarks/split_bench +33 -0
- data/bin/maf_count +82 -0
- data/bin/maf_dump_blocks +27 -0
- data/bin/maf_extract_ranges_count +44 -0
- data/bin/maf_index +88 -0
- data/bin/maf_parse_bench +94 -0
- data/bin/maf_to_fasta +68 -0
- data/bin/maf_write +84 -0
- data/bin/random_ranges +35 -0
- data/features/maf-indexing.feature +31 -0
- data/features/maf-output.feature +29 -0
- data/features/maf-parsing.feature +44 -0
- data/features/maf-querying.feature +75 -0
- data/features/maf-to-fasta.feature +50 -0
- data/features/step_definitions/convert_steps.rb +45 -0
- data/features/step_definitions/index_steps.rb +20 -0
- data/features/step_definitions/output_steps.rb +27 -0
- data/features/step_definitions/parse_steps.rb +63 -0
- data/features/step_definitions/query_steps.rb +31 -0
- data/features/step_definitions/ucsc_bin_steps.rb +14 -0
- data/features/support/env.rb +16 -0
- data/features/ucsc-bins.feature +24 -0
- data/lib/bio-maf.rb +12 -0
- data/lib/bio-maf/maf.rb +3 -0
- data/lib/bio/maf.rb +4 -0
- data/lib/bio/maf/index.rb +620 -0
- data/lib/bio/maf/parser.rb +888 -0
- data/lib/bio/maf/struct.rb +63 -0
- data/lib/bio/maf/writer.rb +63 -0
- data/lib/bio/ucsc.rb +2 -0
- data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
- data/lib/bio/ucsc/ucsc_bin.rb +117 -0
- data/man/.gitignore +1 -0
- data/man/maf_index.1 +105 -0
- data/man/maf_index.1.markdown +97 -0
- data/man/maf_index.1.ronn +83 -0
- data/man/maf_to_fasta.1 +53 -0
- data/man/maf_to_fasta.1.ronn +51 -0
- data/spec/bio/maf/index_spec.rb +363 -0
- data/spec/bio/maf/parser_spec.rb +354 -0
- data/spec/bio/maf/struct_spec.rb +75 -0
- data/spec/spec_helper.rb +14 -0
- data/test/data/big-block.maf +15999 -0
- data/test/data/chr22_ieq.maf +11 -0
- data/test/data/chrY-1block.maf +6 -0
- data/test/data/empty +0 -0
- data/test/data/empty.db +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chr7_tiny.maf +76 -0
- data/test/data/mm8_mod_a.maf +7 -0
- data/test/data/mm8_single.maf +13 -0
- data/test/data/mm8_subset_a.maf +23 -0
- data/test/data/t1-bad1.maf +15 -0
- data/test/data/t1.fasta +12 -0
- data/test/data/t1.maf +15 -0
- data/test/data/t1a.maf +17 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-maf.rb +7 -0
- data/travis-ci/install_kc +13 -0
- data/travis-ci/install_kc_java +13 -0
- data/travis-ci/report_errors +4 -0
- metadata +182 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
##maf version=1 scoring=autoMZ.v1
|
2
|
+
a score=13668.000000
|
3
|
+
s hg19.chr22 16054189 54 + 51304566 TCTGTGAAACCCACAGTAATGGGGCTGACATCCTCTGCCCTATGCAAGAGAGGT
|
4
|
+
s ponAbe2.chrUn 13354616 54 + 72422247 TCTTTCAAACCCACAGTAATGGGGCTGACATCCTCTACCATATGCAAGAGAGGT
|
5
|
+
q ponAbe2.chrUn 855489998999999899968899889893997969799999879999999989
|
6
|
+
i ponAbe2.chrUn C 0 C 0
|
7
|
+
s panTro2.chrUn 7684562 54 + 58616431 TCTGTGAAACCCACAGTAATGGGGCTGACATCCTCTGCCCTATGCAAGAGAGAT
|
8
|
+
q panTro2.chrUn 999999999999999999999999999999999999999999999999999999
|
9
|
+
i panTro2.chrUn C 0 C 0
|
10
|
+
e turTru1.scaffold_109008 25049 1601 + 50103 I
|
11
|
+
|
@@ -0,0 +1,6 @@
|
|
1
|
+
##maf version=1 scoring=autoMZ.v1
|
2
|
+
a score=4443.000000
|
3
|
+
s hg19.chrY 10501 107 + 59373566 GGACAGCCCGGAAAATGAGCTCCTCATCTCTAACCCAGTTCCCCTGTGGGGATTTAGGGGACCAGGGACAGCCCGTTGCATGAGCCCCTGGACTCTAACCCAGTTCC
|
4
|
+
s tarSyr1.scaffold_53149 1869 107 + 12002 GGACAGCCCCACAGATGATCTCCTGTTCTGTAAACCAGTTCCCCTGGAGGGACTGAAGGAACCTGGGAGAGGCCCGCAGAGGGTCTCCTGGTTTGTAGGCCAGTTCC
|
5
|
+
q tarSyr1.scaffold_53149 99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999
|
6
|
+
i tarSyr1.scaffold_53149 N 0 N 0
|
data/test/data/empty
ADDED
File without changes
|
data/test/data/empty.db
ADDED
Binary file
|
Binary file
|
@@ -0,0 +1,76 @@
|
|
1
|
+
##maf version=1
|
2
|
+
a score=10542.0
|
3
|
+
s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA-
|
4
|
+
s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA-
|
5
|
+
s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG-
|
6
|
+
s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
7
|
+
s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
8
|
+
s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA-
|
9
|
+
s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG-
|
10
|
+
s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA-
|
11
|
+
s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA
|
12
|
+
s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA
|
13
|
+
|
14
|
+
a score=-33148.0
|
15
|
+
s mm8.chr7 80082368 103 + 145134094 TGAGAGGGCATGCT-GTGAAGGGACTGTGCT---CAGTTCAAGGCATAGTCCACTTCC--------CTTCCCTTGGTCATTCTGTTCGGTGTGTTTCCAGCAGATATGGAGAGT-------------------------------------C----
|
16
|
+
s rn4.chr1 136011819 86 + 267910886 TGAGAGGGCATGTT-ATGAAGGCACTGTGCT--------------------CACTTTC--------CATCCCATGGTCATTCTGTTGAGTGTGTTCCCAGCAGATACGGAAAGT-------------------------------------C----
|
17
|
+
s oryCun1.scaffold_199771 14064 74 - 75077 TAGGACTGCCTGGTGGGGGGGGCCCTGCACC--------------------TACTTCTGCAAGGCACGTCCCGCG----------TCTGTGCCTTCGCCGCA-----------T-------------------------------------C----
|
18
|
+
s hg18.chr15 88557607 128 + 100338915 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTCTACTCCCAGCATGGCTGGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC----
|
19
|
+
s panTro2.chr15 87959864 116 + 100063422 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTA------------GGCTAGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC----
|
20
|
+
s rheMac2.chr7 69864742 107 + 169801366 GGAGAAAGCCTGGT-TAAGGGGCCCTTCA-----CTCTCCAAGGCACATTCCACTTTC--------TGTCCCTTTGTCATTCCATTCACTCTACTCCCCGCATGGCTAGAGGGC----------------------TGG---------AGGC----
|
21
|
+
s canFam2.chr3 56030609 103 + 94715083 AGGGAATGCATGGTGTATGGGGGCCCCCGTC--------------------CACTTC---------TGTCCCGTTGCTATTTCCTTGACCATACTTCCAGTATGACTGGGGGAG---GTGCGG---TGGAGCAGGTTC------------------
|
22
|
+
s loxAfr1.scaffold_8298 30302 144 + 78952 --TGGATGCCTGGT-TTAAGGATCC-GCTCACCCACTTCTGAGTCACGTTACACTTTC--------TGCCCCTTTGCCATTTCATTTATGGTACTCCCAACACCGGGGGAGGGTGCGCTTTGGTTCTTGAGCAGTTTGTGTATATAGGGGGCTGAG
|
23
|
+
s echTel1.scaffold_304651 631 67 - 10007 --TGGAGGGCTACT-TTAAGAAACC----CTCCCGTTTCTCAG-------------CC--------TGCTTC---------------------------------------------CTTTGGGTTTGAGGTACTTTGT----------------G
|
24
|
+
|
25
|
+
a score=87527.0
|
26
|
+
s mm8.chr7 80082471 121 + 145134094 CTG-AGC---------------CGCTGGCCCCTGGGCTTCCCCTCCAGCCTGGCTTGACTTTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CCCAGGCTGAAGTGGAGGGGGTGTTGAGCTGCCACCTGGGACTT
|
27
|
+
s rn4.chr1 136011905 121 + 267910886 TCG-GAC---------------CGCTGGCACCCAGGCTTCCCCTCCAGCCTGGCCTGACTCTGTCTGAGGGACCCTGGGCAGC-TTGCCATCCA---------CGCAGGCAAAAGTGGAGGGGATGTTGAGCTGCCACCTGGAACTT
|
28
|
+
s oryCun1.scaffold_199771 14138 103 - 75077 CCGCAGT---------------GGATCCCACCTCGGCTGTAGCAGTAGGCCAACCAGG----GCCCGACAGGCGCCCGGCTGTGCTGGCTTCCA-CACCCTCTCCCAGGC---------------------CTGCCACCCAGGC---
|
29
|
+
s hg18.chr15 88557735 127 + 100338915 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG
|
30
|
+
s panTro2.chr15 87959980 127 + 100063422 CTG-GGCTGAACCAGGGACT--GGCTGGTCTATAGGTTTCCCCTCCAGCC-GGCTGCACTCTG----TAGTGCCCGTGGCAGG-TTTCCACCCC-----TTCTCCCAGGCGTAAGTGGG------ATTGAGTTGCCACCTGGGACTG
|
31
|
+
s rheMac2.chr7 69864849 116 + 169801366 CTG-GGCTGAACCAGGGGCT--GGCTGGTCTGCAG----------------GGCTGCACTCTGTCTATAGTGCCCGAGGCAGG-TTTCCACCCC-----TTCTTCCAGTCGTAAGTGGG------GTTGAGCTGCCACCTGGGACTG
|
32
|
+
s bosTau2.scaffold2397 93191 110 + 117874 CTG-GGC---------------AGCTGGCGCCTCGGCTGCCCCTCCCACCTGGCT-------------GTGACCCTTGGCAAG-TCTCCCCGCCCCCCATGCCCCCAGGCCTGAGCAAG------GCTGAGCTGCCACCT-GGACTA
|
33
|
+
s canFam2.chr3 56030712 116 + 94715083 TCT-AGC---------------AGCTGGCGCCCCAGCTGTCCTTCCAACCTGGCTGTGCTCTGTCTACGTGACCTTTGGCAGA-TTGCCACTCC-------CTCCCAGGCCCGAGCAGG------GCCAAGCTGCCACCT-GGATGG
|
34
|
+
s loxAfr1.scaffold_8298 30446 129 + 78952 CTG-AAC-----CAGGGACTGCAGCTAGTGCCTGGGCCACCGCTCCAGCCTGGCTGTGCTCTGTCTACAGGACGCATGGCAAG-TTGCCACCCC----CCTCTCCCAGG-CTAGGTGGG------GCTAAGCTGCCACTTGAAACTT
|
35
|
+
s echTel1.scaffold_304651 698 101 - 10007 CTG-GAC-----CAGGAACTGCAGCT---------GCTGCCCCTCTAGCCTACCTGTGC---------------CTTGGCAGG-TTGCCAGCCC-------CTCCCAGGCCTAGGTGGG------GTGACGCTGCCTCCTGGGAC--
|
36
|
+
|
37
|
+
a score=185399.0
|
38
|
+
s mm8.chr7 80082592 121 + 145134094 GTGCTTATCTCGGACTCTTGGCATTTCTGTTTCTGGACAGAACCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCC----ACCCAGGTGGAAATG--CCCTCCGGTGCAGGCAGATAAGCTCTGG
|
39
|
+
s rn4.chr1 136012026 121 + 267910886 GTGCTTATCTTGGCCTCTTGGCATTTCTGTATCTGGACAGAATCCAAGGGTGGCTTCCCGCTTAGAGCTGTAGGTCCC----ACCCAGGTGGAAATG--CCCTCCGGAGCAGGCAGATAAGCTCTGG
|
40
|
+
s oryCun1.scaffold_199771 14241 119 - 75077 ---CTTATCTCCGACTGCTGGCATTGCTGTGTCTGGGCAGAGGCCAAGGGCGGCCTCCCGCACAGACACTCGGGGCCC----GCCCAGGTAGAAGTG-CCCCTCCTGTGCAGGCAGATAAGCGCTGG
|
41
|
+
s hg18.chr15 88557862 119 + 100338915 AGGCTTATCTCTGACTCTTGGCATTTCTTTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCT----GCCCAGGTGGAAACG--CTCTTT--TGCAGGTAGATAAGCACGGG
|
42
|
+
s panTro2.chr15 87960107 119 + 100063422 AGGCTTATCTCTGACTCTTGGCATTTCTTTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCT----GCCCAGGTGGAAACG--CTCTTT--TGCAGGTAGATAAGCACGGG
|
43
|
+
s rheMac2.chr7 69864965 114 + 169801366 AGGCTTATCTCTGATCCTTGGCATTTCTGTGTCTGGACAGATTCCAAGGGCGGTCTGCTGCCCAGACTTACAGGGCCC----GCCCAGGTG-----G--CTCTTC--TGCAGGTAGATAAGCATGGG
|
44
|
+
s bosTau2.scaffold2397 93301 123 + 117874 AAGCTTATCTCTGACCTTTGGCATTCCTGTGTGTGGACAGATTGCAAGAGCAGCCTCT-GCCCAGGCTTACGGGGACCTGCTGCCTCGGTAGAAATG-CGCCTCCTCTGTAGGCAGATAAGCCCT--
|
45
|
+
s canFam2.chr3 56030828 121 + 94715083 CAACTTATCTTTGACCTTCGGCATTTCTATATCTGGATGGATCCTAAGTGCAGCCTCCAGCCTAGACTTCCAGGACCC----ACCCTGGGA-AGATG-CCCCTCCTGTGTGGGCAGATAAATGTTGG
|
46
|
+
s echTel1.scaffold_304651 799 118 - 10007 ATGACAATCT--GACCTTTGACATT--TGTTTTAGGATAGGTTCCAAGTGAAGCCTCCTGCCTAGACTTCCTGATTCT-----CCCAGATAGAAGCGCCCCCTTCTTGGAAGACAGATAAGCGATAA
|
47
|
+
|
48
|
+
a score=30120.0
|
49
|
+
s mm8.chr7 80082713 54 + 145134094 CAA-------ACCAAAGGCAGCCTGT-GCTTCCAGAAAACCTT-GAGGGGTGCAAGAGATAAA
|
50
|
+
s rn4.chr1 136012147 54 + 267910886 CAA-------ACCAGAGGCAGCCTAC-GTTTCCAGAAAACCTT-GAGGGGTACAAGAGATAAA
|
51
|
+
s hg18.chr15 88557981 62 + 100338915 CAACCAGCTTATCTGAACCAGCCCTT-GCTTCCAGAGAACTATGGAAAAATCCAAAAGATAAG
|
52
|
+
s panTro2.chr15 87960226 62 + 100063422 CAACCAGCTTATCTGAACCAGCCCTT-GCTTCCAGAGAACTATGGAAAAATCCAAAAGATAAG
|
53
|
+
s rheMac2.chr7 69865079 62 + 169801366 CAACCAGCTTATCTGAACCAGCCCTC-GTTTCCAGGTAACTCTGGAAAAATCCAAAAGATGAG
|
54
|
+
s canFam2.chr3 56030949 40 + 94715083 -------CATATTTGACCCAGCCCTTGGCTTTCAGAAAACC------------ACAA----AG
|
55
|
+
s echTel1.scaffold_304651 917 55 - 10007 CAA-------ATTCCATCCCACCCTT-CGTTCTGGACGGGCTGGGAGGGGTACAAAAGATAAA
|
56
|
+
|
57
|
+
a score=58255.0
|
58
|
+
s mm8.chr7 80082767 128 + 145134094 GGGGTGCAGGAGCTGTG----TGTCTTGATCTCCCAGA----GTCTTCGTGAGCCT-----------CACTTTTTGTCTTATCCCT---GTGATACACACAGG-AAGCCACAGTGAATTCAGTGGGTGTCAT---------ACAGAAGGGCCTCC-TGGAG-
|
59
|
+
s rn4.chr1 136012201 139 + 267910886 GGGGTACAGGAGCTGTG----TG-CTTGATGTCGCTGA----GCCTTCGTGAGGCTCCTGTGAGCTGCACTTTTTGTCTCGTCCCT---GTGATAGACACAAG-AAGCCACAGTGAATTCAGTGGGTATCAT---------ATGGAAGGGCCTCCTTGGAC-
|
60
|
+
s hg18.chr15 88558043 143 + 100338915 AAGGGACCGCAG-TGTC----TGTCTTGGTCTCAC--------TCCTCTTGAGACTCCTGTGAT---CTTTATATGTCTCATTCCTCCCGTGACATGTATGAG-AAACTGCAGCTCATTGAGACGATGTCTCTGCTGCCTGACAGAAGGGCCTAC-TTGAG-
|
61
|
+
s panTro2.chr15 87960288 143 + 100063422 AAGGGACCGCAG-TGTC----TGTCTTGGTCTCAC--------TCCTCTTGAGACTCCTGTGAT---CTTTATATGTCTCATTCCTCCCGTGACATGTATGAG-AAACTGCAGCTCATTGAGACGATGTCTCTGCTGCCTGACAGAAGGGCCTAC-TTGAG-
|
62
|
+
s rheMac2.chr7 69865141 147 + 169801366 GAGGGACCACAG-TGTCTGTTTGTCCTGGTCTCAC--------TCCTCATGAGACTCCTGTGAT---CTTTGTATGTCTCATTCCTCCTGTGACATGTATGAG-AATGTACAGCTCAGTGAGATGATGTCTCTGCTGCCTGACAGAAGTGCCTAC-TTGAG-
|
63
|
+
s bosTau2.scaffold2397 93775 133 + 117874 GGACTGCAGTGGCCATT----TGCTCTGGCCTCACTGA----CTCCTTGTGAGCCCGCTGTGAG---TTTTGTTT---TCATTATCCCCAT------TATGAGAAAACTCCAGTTTGGTGAGATGGCATCTACCCTGCCCT--------ACAAAC-ATGgtg
|
64
|
+
s canFam2.chr3 56030989 153 + 94715083 GGGATGTGGAAGACGTT----TGCCCTCGTCTCACAGACTCCCTCCTTGTAAGGCTGCTGGGAG---TCATATTTTGCTCATTATCCCTGCGGTATGTATGAG-AAGCCAAAGGTCAGTGAGCTGGAGTTTGCACTGCCCTCCAGAGGGACCGAC-ATGgtg
|
65
|
+
|
66
|
+
a score=2607.0
|
67
|
+
s mm8.chr7 80082895 114 + 145134094 CTTCTCAGAGTGTAGT-----------CCTTGGGCTACC-TCCTCCTAAGTCACTGGG-----------------------AGCTGGTCA-AGAGG------CTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCT--------GGAGTCCGGGTGATGTT
|
68
|
+
s rn4.chr1 136012340 112 + 267910886 CTTCTCAGA--GTAGT-----------CCTTGGGCCACC-TCCTTCTAAGTTACTGAG-----------------------AGCTGGTCA-AGAGG------CTCAGACCAGCAGTTTCAGAATCTCTTGGGAGGGCCT--------GGAGTCAAGGTACTGTT
|
69
|
+
s rheMac2.chr7 69865323 119 + 169801366 CTTCTTGTTGACTAGTGTCACCCCCACCCGAGGGCTTCCTTCCTCATTTGCTGCCAGGTGTAAAGCTGAGCTTC-------agctgggcgcagtgg------ctcacacccataatcctagca--ttttgggag------------------------------
|
70
|
+
s bosTau2.scaffold2397 93908 136 + 117874 cttctcaaagtgtgct-----------ccatgagcctcc-tacttcagaatcccctgg---------gagattcaaaaccttgcatgttc-tcaggccccatcacgggccagcatcgtcagagtcttcagggtcagctcgtggatctagagtgtaggt------
|
71
|
+
s canFam2.chr3 56031142 126 + 94715083 cttttcagagggtggt-----------ccctgggcctcc-cactttggaattgcctgg---------gag-ctcatagaattgcccgttg-tcagg--ccatcccagggcagtggcagcag-gcctctagggcaggcct------------ttcaggtgacttt
|
72
|
+
|
73
|
+
a score=8132.0
|
74
|
+
s mm8.chr7 80083009 147 + 145134094 TAGGGAGGTTGGCATTGGTGCTGGAACTTTCCTTGGCCCCCCAATTTATCGAAGTACTAAGGGTTGGAAGTCTCTGGAGCTGCAGGAGTT--GAGTTTGAGAAAAGGCTCTTGGTGGTTTAAAGAGA----------------GGTTTCAACTGC--------------------------CTCTGGCCTC
|
75
|
+
s rn4.chr1 136012452 190 + 267910886 TAGGGAGATTGGGATTGGTACTGGAACTTTCCTTGGCCTCCCAGTGTATT-CAGTACTAAGGGTTGGAAGTCTCGGGTGCTACAAGAATTAAGAGTTTGAGAAGAGGCTCTTGGTAGTTTAGAAAGAGAGAAGGACATCTTTGGGTTTCGACTACCTGTGGTGGCAGTGTCAGAATTCAGGCTCTGGCCTC
|
76
|
+
|
@@ -0,0 +1,7 @@
|
|
1
|
+
##maf version=1
|
2
|
+
a score=10542.0
|
3
|
+
s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA-
|
4
|
+
s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA-
|
5
|
+
s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG-
|
6
|
+
s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
7
|
+
s hg181.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
@@ -0,0 +1,13 @@
|
|
1
|
+
##maf version=1
|
2
|
+
a score=10542.0
|
3
|
+
s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA-
|
4
|
+
s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA-
|
5
|
+
s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG-
|
6
|
+
s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
7
|
+
s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
8
|
+
s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA-
|
9
|
+
s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG-
|
10
|
+
s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA-
|
11
|
+
s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA
|
12
|
+
s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA
|
13
|
+
|
@@ -0,0 +1,23 @@
|
|
1
|
+
##maf version=1
|
2
|
+
a score=10542.0
|
3
|
+
s mm8.chr7 80082334 34 + 145134094 GGGCTGAGGGC--AGGGATGG---AGGGCGGTCC--------------CAGCA-
|
4
|
+
s rn4.chr1 136011785 34 + 267910886 GGGCTGAGGGC--AGGGACGG---AGGGCGGTCC--------------CAGCA-
|
5
|
+
s oryCun1.scaffold_199771 14021 43 - 75077 -----ATGGGC--AAGCGTGG---AGGGGAACCTCTCCTCCCCTCCGACAAAG-
|
6
|
+
s hg18.chr15 88557580 27 + 100338915 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
7
|
+
s panTro2.chr15 87959837 27 + 100063422 --------GGC--AAGTGTGGA--AGGGAAGCCC--------------CAGAA-
|
8
|
+
s rheMac2.chr7 69864714 28 + 169801366 -------GGGC--AAGTATGGA--AGGGAAGCCC--------------CAGAA-
|
9
|
+
s canFam2.chr3 56030570 39 + 94715083 AGGTTTAGGGCAGAGGGATGAAGGAGGAGAATCC--------------CTATG-
|
10
|
+
s dasNov1.scaffold_106893 7435 34 + 9831 GGAACGAGGGC--ATGTGTGG---AGGGGGCTGC--------------CCACA-
|
11
|
+
s loxAfr1.scaffold_8298 30264 38 + 78952 ATGATGAGGGG--AAGCGTGGAGGAGGGGAACCC--------------CTAGGA
|
12
|
+
s echTel1.scaffold_304651 594 37 - 10007 -TGCTATGGCT--TTGTGTCTAGGAGGGGAATCC--------------CCAGGA
|
13
|
+
|
14
|
+
a score=-33148.0
|
15
|
+
s mm8.chr7 80082368 103 + 145134094 TGAGAGGGCATGCT-GTGAAGGGACTGTGCT---CAGTTCAAGGCATAGTCCACTTCC--------CTTCCCTTGGTCATTCTGTTCGGTGTGTTTCCAGCAGATATGGAGAGT-------------------------------------C----
|
16
|
+
s rn4.chr1 136011819 86 + 267910886 TGAGAGGGCATGTT-ATGAAGGCACTGTGCT--------------------CACTTTC--------CATCCCATGGTCATTCTGTTGAGTGTGTTCCCAGCAGATACGGAAAGT-------------------------------------C----
|
17
|
+
s oryCun1.scaffold_199771 14064 74 - 75077 TAGGACTGCCTGGTGGGGGGGGCCCTGCACC--------------------TACTTCTGCAAGGCACGTCCCGCG----------TCTGTGCCTTCGCCGCA-----------T-------------------------------------C----
|
18
|
+
s hg18.chr15 88557607 128 + 100338915 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTCTACTCCCAGCATGGCTGGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC----
|
19
|
+
s panTro2.chr15 87959864 116 + 100063422 GGGGAAAGCCTGGT-TAAGGGGCCCTTCACCCCCCTCTCCAAGGCACATTCCCCTTTC--------TGTCCCTTTGTCGTTTCATTCACTA------------GGCTAGAGGGC---TTGTGG---CTGGCTCGTTTGG---------AGGC----
|
20
|
+
s rheMac2.chr7 69864742 107 + 169801366 GGAGAAAGCCTGGT-TAAGGGGCCCTTCA-----CTCTCCAAGGCACATTCCACTTTC--------TGTCCCTTTGTCATTCCATTCACTCTACTCCCCGCATGGCTAGAGGGC----------------------TGG---------AGGC----
|
21
|
+
s canFam2.chr3 56030609 103 + 94715083 AGGGAATGCATGGTGTATGGGGGCCCCCGTC--------------------CACTTC---------TGTCCCGTTGCTATTTCCTTGACCATACTTCCAGTATGACTGGGGGAG---GTGCGG---TGGAGCAGGTTC------------------
|
22
|
+
s loxAfr1.scaffold_8298 30302 144 + 78952 --TGGATGCCTGGT-TTAAGGATCC-GCTCACCCACTTCTGAGTCACGTTACACTTTC--------TGCCCCTTTGCCATTTCATTTATGGTACTCCCAACACCGGGGGAGGGTGCGCTTTGGTTCTTGAGCAGTTTGTGTATATAGGGGGCTGAG
|
23
|
+
s echTel1.scaffold_304651 631 67 - 10007 --TGGAGGGCTACT-TTAAGAAACC----CTCCCGTTTCTCAG-------------CC--------TGCTTC---------------------------------------------CTTTGGGTTTGAGGTACTTTGT----------------G
|
@@ -0,0 +1,15 @@
|
|
1
|
+
##maf version=1 scoring=humor.v4
|
2
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
3
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
4
|
+
|
5
|
+
a score=0.128
|
6
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
7
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
8
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
9
|
+
|
10
|
+
|
11
|
+
a score=0.071
|
12
|
+
s human_unc 9077 8 + 10998 ACAGTATT
|
13
|
+
# Comment
|
14
|
+
s horse_unc 4555 6 - 5099 ACA--ATT
|
15
|
+
s fugu_unc 4000 4 + 4038 AC----TT
|
data/test/data/t1.fasta
ADDED
data/test/data/t1.maf
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
##maf version=1 scoring=humor.v4
|
2
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
3
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
4
|
+
|
5
|
+
a score=0.128
|
6
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
7
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
8
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
9
|
+
|
10
|
+
|
11
|
+
a score=0.071
|
12
|
+
s human_unc 9077 8 + 10998 ACAGTATT
|
13
|
+
# Comment
|
14
|
+
s horse_unc 4555 6 - 5099 ACA--ATT
|
15
|
+
s fugu_unc 4000 4 + 4038 AC----TT
|
data/test/data/t1a.maf
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
##maf version=1 scoring=humor.v4
|
2
|
+
# humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
|
3
|
+
# /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
|
4
|
+
|
5
|
+
a score=0.128
|
6
|
+
s human_hoxa 100 8 + 100257 ACA-TTACT
|
7
|
+
s horse_hoxa 120 9 - 98892 ACAATTGCT
|
8
|
+
s fugu_hoxa 88 7 + 90788 ACA--TGCT
|
9
|
+
|
10
|
+
|
11
|
+
a score=0.071
|
12
|
+
s human_unc 9077 8 + 10998 ACAGTATT
|
13
|
+
# Comment
|
14
|
+
s horse_unc 4555 6 - 5099 ACA--ATT
|
15
|
+
s fugu_unc 4000 4 + 4038 AC----TT
|
16
|
+
|
17
|
+
##eof maf
|
data/test/helper.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
require 'shoulda'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-maf'
|
16
|
+
|
17
|
+
class Test::Unit::TestCase
|
18
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
## NOTE: I am aware that this shell script is horrible.
|
4
|
+
|
5
|
+
cd /tmp
|
6
|
+
wget http://fallabs.com/kyotocabinet/pkg/kyotocabinet-1.2.76.tar.gz
|
7
|
+
tar xzf kyotocabinet-1.2.76.tar.gz
|
8
|
+
cd kyotocabinet-1.2.76
|
9
|
+
./configure && make && make install
|
10
|
+
grep -q local /etc/ld.so.conf
|
11
|
+
if [ $? -ne 0 ]; then
|
12
|
+
echo "/usr/local/lib" >> /etc/ld.so.conf && ldconfig
|
13
|
+
fi
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
## NOTE: I am aware that this shell script is horrible.
|
4
|
+
|
5
|
+
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-i386
|
6
|
+
|
7
|
+
cd /tmp
|
8
|
+
wget http://fallabs.com/kyotocabinet/javapkg/kyotocabinet-java-1.24.tar.gz
|
9
|
+
tar xzf kyotocabinet-java-1.24.tar.gz
|
10
|
+
cd kyotocabinet-java-1.24
|
11
|
+
./configure && make && make install
|
12
|
+
cp -Rf /usr/local/lib/libjkyotocabinet.so* /usr/lib/jni/
|
13
|
+
ldconfig
|
metadata
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-maf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.0
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- Clayton Wheeler
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-29 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bio-bigbio
|
16
|
+
version_requirements: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ! '>='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
none: false
|
22
|
+
requirement: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
none: false
|
28
|
+
prerelease: false
|
29
|
+
type: :runtime
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: bio-genomic-interval
|
32
|
+
version_requirements: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ~>
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: 0.1.2
|
37
|
+
none: false
|
38
|
+
requirement: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ~>
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.1.2
|
43
|
+
none: false
|
44
|
+
prerelease: false
|
45
|
+
type: :runtime
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: kyotocabinet-java
|
48
|
+
version_requirements: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ~>
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.2.0
|
53
|
+
none: false
|
54
|
+
requirement: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ~>
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: 0.2.0
|
59
|
+
none: false
|
60
|
+
prerelease: false
|
61
|
+
type: :runtime
|
62
|
+
description: Multiple Alignment Format parser for BioRuby.
|
63
|
+
email: cswh@umich.edu
|
64
|
+
executables:
|
65
|
+
- maf_count
|
66
|
+
- maf_dump_blocks
|
67
|
+
- maf_extract_ranges_count
|
68
|
+
- maf_index
|
69
|
+
- maf_parse_bench
|
70
|
+
- maf_to_fasta
|
71
|
+
- maf_write
|
72
|
+
- random_ranges
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files:
|
75
|
+
- LICENSE.txt
|
76
|
+
- README.md
|
77
|
+
files:
|
78
|
+
- .document
|
79
|
+
- .simplecov
|
80
|
+
- .travis.yml
|
81
|
+
- .yardopts
|
82
|
+
- DEVELOPMENT.md
|
83
|
+
- Gemfile
|
84
|
+
- LICENSE.txt
|
85
|
+
- README.md
|
86
|
+
- Rakefile
|
87
|
+
- VERSION
|
88
|
+
- benchmarks/dispatch_bench
|
89
|
+
- benchmarks/iter_bench
|
90
|
+
- benchmarks/read_bench
|
91
|
+
- benchmarks/sort_bench
|
92
|
+
- benchmarks/split_bench
|
93
|
+
- bin/maf_count
|
94
|
+
- bin/maf_dump_blocks
|
95
|
+
- bin/maf_extract_ranges_count
|
96
|
+
- bin/maf_index
|
97
|
+
- bin/maf_parse_bench
|
98
|
+
- bin/maf_to_fasta
|
99
|
+
- bin/maf_write
|
100
|
+
- bin/random_ranges
|
101
|
+
- features/maf-indexing.feature
|
102
|
+
- features/maf-output.feature
|
103
|
+
- features/maf-parsing.feature
|
104
|
+
- features/maf-querying.feature
|
105
|
+
- features/maf-to-fasta.feature
|
106
|
+
- features/step_definitions/convert_steps.rb
|
107
|
+
- features/step_definitions/index_steps.rb
|
108
|
+
- features/step_definitions/output_steps.rb
|
109
|
+
- features/step_definitions/parse_steps.rb
|
110
|
+
- features/step_definitions/query_steps.rb
|
111
|
+
- features/step_definitions/ucsc_bin_steps.rb
|
112
|
+
- features/support/env.rb
|
113
|
+
- features/ucsc-bins.feature
|
114
|
+
- lib/bio-maf.rb
|
115
|
+
- lib/bio-maf/maf.rb
|
116
|
+
- lib/bio/maf.rb
|
117
|
+
- lib/bio/maf/index.rb
|
118
|
+
- lib/bio/maf/parser.rb
|
119
|
+
- lib/bio/maf/struct.rb
|
120
|
+
- lib/bio/maf/writer.rb
|
121
|
+
- lib/bio/ucsc.rb
|
122
|
+
- lib/bio/ucsc/genomic-interval-bin.rb
|
123
|
+
- lib/bio/ucsc/ucsc_bin.rb
|
124
|
+
- man/.gitignore
|
125
|
+
- man/maf_index.1
|
126
|
+
- man/maf_index.1.markdown
|
127
|
+
- man/maf_index.1.ronn
|
128
|
+
- man/maf_to_fasta.1
|
129
|
+
- man/maf_to_fasta.1.ronn
|
130
|
+
- spec/bio/maf/index_spec.rb
|
131
|
+
- spec/bio/maf/parser_spec.rb
|
132
|
+
- spec/bio/maf/struct_spec.rb
|
133
|
+
- spec/spec_helper.rb
|
134
|
+
- test/data/big-block.maf
|
135
|
+
- test/data/chr22_ieq.maf
|
136
|
+
- test/data/chrY-1block.maf
|
137
|
+
- test/data/empty
|
138
|
+
- test/data/empty.db
|
139
|
+
- test/data/mm8_chr7_tiny.kct
|
140
|
+
- test/data/mm8_chr7_tiny.maf
|
141
|
+
- test/data/mm8_mod_a.maf
|
142
|
+
- test/data/mm8_single.maf
|
143
|
+
- test/data/mm8_subset_a.maf
|
144
|
+
- test/data/t1-bad1.maf
|
145
|
+
- test/data/t1.fasta
|
146
|
+
- test/data/t1.maf
|
147
|
+
- test/data/t1a.maf
|
148
|
+
- test/helper.rb
|
149
|
+
- test/test_bio-maf.rb
|
150
|
+
- travis-ci/install_kc
|
151
|
+
- travis-ci/install_kc_java
|
152
|
+
- travis-ci/report_errors
|
153
|
+
homepage: http://github.com/csw/bioruby-maf
|
154
|
+
licenses:
|
155
|
+
- MIT
|
156
|
+
post_install_message:
|
157
|
+
rdoc_options: []
|
158
|
+
require_paths:
|
159
|
+
- lib
|
160
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
161
|
+
requirements:
|
162
|
+
- - ! '>='
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
segments:
|
165
|
+
- 0
|
166
|
+
hash: 2
|
167
|
+
version: '0'
|
168
|
+
none: false
|
169
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ! '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
none: false
|
175
|
+
requirements: []
|
176
|
+
rubyforge_project:
|
177
|
+
rubygems_version: 1.8.24
|
178
|
+
signing_key:
|
179
|
+
specification_version: 3
|
180
|
+
summary: MAF parser for BioRuby
|
181
|
+
test_files: []
|
182
|
+
...
|