bio-velvet_underground 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitmodules +1 -0
- data/.rspec +1 -0
- data/Gemfile +1 -0
- data/README.md +53 -9
- data/VERSION +1 -1
- data/ext/mkrf_conf.rb +22 -4
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +18 -16
- data/ext/src/src/graph.c +50 -16
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- data/lib/bio-velvet_underground.rb +55 -11
- data/lib/bio-velvet_underground/binary_sequence_store.rb +86 -0
- data/lib/bio-velvet_underground/constants.rb +33 -0
- data/lib/bio-velvet_underground/graph.rb +262 -0
- data/lib/bio-velvet_underground/runner.rb +59 -0
- data/spec/binary_sequence_store_spec.rb +12 -0
- data/spec/data/2/CnyUnifiedSeq +0 -0
- data/spec/data/3/Assem/Graph2 +40 -0
- data/spec/data/3/Assem/LastGraph +40 -0
- data/spec/data/3/Assem/Log +42 -0
- data/spec/data/3/Assem/PreGraph +9 -0
- data/spec/data/3/Assem/Roadmaps +15 -0
- data/spec/data/3/Assem/Sequences +50 -0
- data/spec/data/3/Assem/contigs.fa +15 -0
- data/spec/data/3/Assem/stats.txt +5 -0
- data/spec/data/3/Sequences +50 -0
- data/spec/data/4/LastGraphKmer51Head +7 -0
- data/spec/graph_spec.rb +52 -0
- data/spec/runner_spec.rb +18 -0
- data/spec/spec_helper.rb +1 -16
- metadata +34 -4
- data/ext/bioruby.patch +0 -60
- data/lib/bio-velvet_underground/velvet_underground.rb +0 -72
@@ -0,0 +1,42 @@
|
|
1
|
+
Thu Apr 17 13:20:41 2014
|
2
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velveth Assem 31 -short -fasta Sequences
|
3
|
+
Version 1.2.10
|
4
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
5
|
+
This is free software; see the source for copying conditions. There is NO
|
6
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
7
|
+
Compilation settings:
|
8
|
+
CATEGORIES = 2
|
9
|
+
MAXKMERLENGTH = 31
|
10
|
+
|
11
|
+
Thu Apr 17 13:20:58 2014
|
12
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velvetg Assem
|
13
|
+
Version 1.2.10
|
14
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
15
|
+
This is free software; see the source for copying conditions. There is NO
|
16
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
17
|
+
Compilation settings:
|
18
|
+
CATEGORIES = 2
|
19
|
+
MAXKMERLENGTH = 31
|
20
|
+
|
21
|
+
Final graph has 4 nodes and n50 of 224, max 228, total 519, using 0/5 reads
|
22
|
+
Thu Apr 17 13:21:17 2014
|
23
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velveth Assem 31 -short -fasta Sequences -read_trkg yes
|
24
|
+
Version 1.2.10
|
25
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
26
|
+
This is free software; see the source for copying conditions. There is NO
|
27
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
28
|
+
Compilation settings:
|
29
|
+
CATEGORIES = 2
|
30
|
+
MAXKMERLENGTH = 31
|
31
|
+
|
32
|
+
Thu Apr 17 13:21:53 2014
|
33
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velvetg Assem -read_trkg yes
|
34
|
+
Version 1.2.10
|
35
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
36
|
+
This is free software; see the source for copying conditions. There is NO
|
37
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
38
|
+
Compilation settings:
|
39
|
+
CATEGORIES = 2
|
40
|
+
MAXKMERLENGTH = 31
|
41
|
+
|
42
|
+
Final graph has 4 nodes and n50 of 224, max 228, total 519, using 5/5 reads
|
@@ -0,0 +1,9 @@
|
|
1
|
+
4 5 31 1
|
2
|
+
NODE 1 228
|
3
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAGATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATACGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATGGACGAGTTATATTTACTG
|
4
|
+
NODE 2 29
|
5
|
+
CTGATAAAAATGGACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAA
|
6
|
+
NODE 3 224
|
7
|
+
GGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
8
|
+
NODE 4 38
|
9
|
+
CTGATAAAAATGGACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAA
|
@@ -0,0 +1,50 @@
|
|
1
|
+
>1 1 0
|
2
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
3
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
4
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
5
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
6
|
+
GACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTC
|
7
|
+
CTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATG
|
8
|
+
ATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAA
|
9
|
+
GTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAA
|
10
|
+
ACTATGCTGGTATTTCACTTCCAGGTACAGG
|
11
|
+
>2 2 0
|
12
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
13
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
14
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
15
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
16
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
17
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
18
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
19
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
20
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
21
|
+
>NODE_1_length_481_cov_1.471933_revcom 3 0
|
22
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
23
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
24
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
25
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
26
|
+
CCTTCTTTTAAACCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCCTTTCCTTTT
|
27
|
+
AATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTACCATTTTTA
|
28
|
+
TCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAATCTACTTTT
|
29
|
+
TTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTATAGTTTGAT
|
30
|
+
TCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
31
|
+
>3 4 0
|
32
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
33
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
34
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
35
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
36
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
37
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
38
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
39
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
40
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
41
|
+
>3_revcom 5 0
|
42
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
43
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
44
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
45
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
46
|
+
CCTTCTTTTAAACCCCCCCCCGCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCC
|
47
|
+
TTTCCTTTTAATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTA
|
48
|
+
CCATTTTTATCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAA
|
49
|
+
TCTACTTTTTTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTA
|
50
|
+
TAGTTTGATTCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
@@ -0,0 +1,15 @@
|
|
1
|
+
>NODE_1_length_228_cov_5.000000
|
2
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
3
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
4
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
5
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
6
|
+
GACGAGTTATATTTACTG
|
7
|
+
>NODE_3_length_224_cov_5.000000
|
8
|
+
GGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTCCTAAAGGGTATAGCCTT
|
9
|
+
TTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATGATAATGGAGAGTATACT
|
10
|
+
GGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAAGTATAATAAATAATATT
|
11
|
+
ACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAAACTATGCTGGTATTTCA
|
12
|
+
CTTCCAGGTACAGg
|
13
|
+
>NODE_4_length_38_cov_3.000000
|
14
|
+
CTGATAAAAATGGACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACT
|
15
|
+
TTATAAAA
|
@@ -0,0 +1,5 @@
|
|
1
|
+
ID lgth out in long_cov short1_cov short1_Ocov short2_cov short2_Ocov long_nb short1_nb short2_nb
|
2
|
+
1 228 2 0 0.000000 5.000000 5.000000 0.000000 0.000000 0 3 0
|
3
|
+
2 29 1 1 0.000000 2.000000 2.000000 0.000000 0.000000 0 1 0
|
4
|
+
3 224 0 2 0.000000 5.000000 5.000000 0.000000 0.000000 0 3 0
|
5
|
+
4 38 1 1 0.000000 3.000000 3.000000 0.000000 0.000000 0 2 0
|
@@ -0,0 +1,50 @@
|
|
1
|
+
>1 1 0
|
2
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
3
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
4
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
5
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
6
|
+
GACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTC
|
7
|
+
CTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATG
|
8
|
+
ATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAA
|
9
|
+
GTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAA
|
10
|
+
ACTATGCTGGTATTTCACTTCCAGGTACAGG
|
11
|
+
>2 2 0
|
12
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
13
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
14
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
15
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
16
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
17
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
18
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
19
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
20
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
21
|
+
>NODE_1_length_481_cov_1.471933_revcom 3 0
|
22
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
23
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
24
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
25
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
26
|
+
CCTTCTTTTAAACCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCCTTTCCTTTT
|
27
|
+
AATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTACCATTTTTA
|
28
|
+
TCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAATCTACTTTT
|
29
|
+
TTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTATAGTTTGAT
|
30
|
+
TCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
31
|
+
>3 4 0
|
32
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
33
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
34
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
35
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
36
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
37
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
38
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
39
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
40
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
41
|
+
>3_revcom 5 0
|
42
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
43
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
44
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
45
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
46
|
+
CCTTCTTTTAAACCCCCCCCCGCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCC
|
47
|
+
TTTCCTTTTAATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTA
|
48
|
+
CCATTTTTATCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAA
|
49
|
+
TCTACTTTTTTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTA
|
50
|
+
TAGTTTGATTCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
@@ -0,0 +1,7 @@
|
|
1
|
+
221 8987 51 1
|
2
|
+
NODE 1 236 8451 8451 0 0
|
3
|
+
CGCGGCGGCTTGCCTCCGTCTTCATCCAGCGTGTCCGGCTTGAGCGTCCACAAACCGAAGCCGATGAAGAGCACCGCCAGGAGCAGCGCCATCCACTTCGCCGGCACGTGCGCGGACACCCAACTGCCCACGCTGGACGCCAGCGCGTGATTGGCGACGGTGGCGACGAAGATGCCCGCCAGCACATGCCACGGCTTGCGAAACCGCGTGGCCAGGGAGAACGCGAGCAACTGCGT
|
4
|
+
TGTGCTGGCGGGCATCTTCGTCGCCACCGTCGCCAATCACGCGCTGGCGTCCAGCGTGGGCAGTTGGGTGTCCGCGCACGTGCCGGCGAAGTGGATGGCGCTGCTCCTGGCGGTGCTCTTCATCGGCTTCGGTTTGTGGACGCTCAAGCCGGACACGCTGGATGAAGACGGAGGCAAGCCGCCGCGCTTCGGCGCTTTCCTCACCACGGTGGTGCTCTTCTTCCTCGCGGAGATGG
|
5
|
+
NODE 2 913 31246 31246 0 0
|
6
|
+
ACCTTCCGCCTGACGCCGGGGCCGCATCCCGGCCCCGGGTGCCGTGTCGATGCGGCTTCTTACGGGGTGGGCGTCGACGCCTGCTCCGTCTCCGCGGGAGCGGCTCCCGTCTTCGCGGGGGCGGTGTCACCGCTCTTGCCCGTCACCACACCCTTCACCTTCTCCATGGCGGCCTTCGGGTCGATGTTGCCCTTGAGGGTGCCGAACGGCGTGTCGATGACCTGCTCCCGCTTCTCCGAGGAGGCGTAGCCCTCTGGCGCGCACAGGGCCTTCGAGGTGTCGCGCACGGAGACGACGGGGGTGTTGATGGTGGTGGAGTCCTCGGACGTGGCCTTGGCGACGAGCTTGTCGTCGCGCATCAGCACGCAGCGGTCCTCGCCGTAGTACCAGGCGGTGGAGCTGTCCGGGAACTCCTGCGCGCGCGTGGGTCCTGAGCCCATGGCATCGACGACCTGATGGGACGACATGCCCGGGTAGAGCTTGTCGAAGCCAGGGGTGGCACAGCCGGCGGCGGCGAGCGCGAGTGCGGCGCTGACAATCGGGAGACGCATGGGGTGTGGGGCTCCTTCCGAAAAGGAATCGCGGAGCCTACCCCGGCGCCACGTCCGGCCTCCATGTGGGGGGCCGGACGCATCGTGTCAGAGTTCCTGATCCAACCAGCGGACGATGGCCTCGCGAATCGAGGCCGGCCGCTCGGAGTCCAGTCGGGTGCGAAGCAGGCGGGCCGCGTCGGCGTCGCGCAGCATTGAAGCGGGCATCAGCTCACGCGTGCGAGGAAGGGACGCGCGGGACGCGACGGCGGCAGACCTCATGACTGGCCTTCCTCCTTGGGCTCTCCGCGGCCGTACTTGCCGAGCAGGTCATCCACGGCCTCGCGCAGGTACTCGCTCTGGTGGATGCGGGTCCGCCGCGC
|
7
|
+
TGACCTGCTCGGCAAGTACGGCCGCGGAGAGCCCAAGGAGGAAGGCCAGTCATGAGGTCTGCCGCCGTCGCGTCCCGCGCGTCCCTTCCTCGCACGCGTGAGCTGATGCCCGCTTCAATGCTGCGCGACGCCGACGCGGCCCGCCTGCTTCGCACCCGACTGGACTCCGAGCGGCCGGCCTCGATTCGCGAGGCCATCGTCCGCTGGTTGGATCAGGAACTCTGACACGATGCGTCCGGCCCCCCACATGGAGGCCGGACGTGGCGCCGGGGTAGGCTCCGCGATTCCTTTTCGGAAGGAGCCCCACACCCCATGCGTCTCCCGATTGTCAGCGCCGCACTCGCGCTCGCCGCCGCCGGCTGTGCCACCCCTGGCTTCGACAAGCTCTACCCGGGCATGTCGTCCCATCAGGTCGTCGATGCCATGGGCTCAGGACCCACGCGCGCGCAGGAGTTCCCGGACAGCTCCACCGCCTGGTACTACGGCGAGGACCGCTGCGTGCTGATGCGCGACGACAAGCTCGTCGCCAAGGCCACGTCCGAGGACTCCACCACCATCAACACCCCCGTCGTCTCCGTGCGCGACACCTCGAAGGCCCTGTGCGCGCCAGAGGGCTACGCCTCCTCGGAGAAGCGGGAGCAGGTCATCGACACGCCGTTCGGCACCCTCAAGGGCAACATCGACCCGAAGGCCGCCATGGAGAAGGTGAAGGGTGTGGTGACGGGCAAGAGCGGTGACACCGCCCCCGCGAAGACGGGAGCCGCTCCCGCGGAGACGGAGCAGGCGTCGACGCCCACCCCGTAAGAAGCCGCATCGACACGGCACCCGGGGCCGGGATGCGGCCCCGGCGTCAGGCGGAAGGTGCCGCCGCCTCGCCGTCCTTGTCGTCCAGCTTGCCGGCGATGTCCTTGAA
|
data/spec/graph_spec.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "graph" do
|
4
|
+
it "should be able to load a graph and respond to basic structures" do
|
5
|
+
path = File.join TEST_DATA_DIR, '3', 'Assem', 'LastGraph'
|
6
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
7
|
+
|
8
|
+
graph.hash_length.should == 31
|
9
|
+
graph.node_count.should == 4
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should be able to parse a graph with kmer length > 31, the default' do
|
13
|
+
path = File.join TEST_DATA_DIR, '4', 'LastGraphKmer51Head'
|
14
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
15
|
+
|
16
|
+
graph.hash_length.should == 51
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "nodes" do
|
20
|
+
it "should provide basic info" do
|
21
|
+
path = File.join TEST_DATA_DIR, '3', 'Assem', 'LastGraph'
|
22
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
23
|
+
|
24
|
+
graph.nodes[1].kind_of?(Bio::Velvet::Underground::Graph::Node).should == true
|
25
|
+
graph.nodes[1].length_alone.should == 228
|
26
|
+
graph.nodes[1].node_id.should == 1
|
27
|
+
graph.nodes[2].kind_of?(Bio::Velvet::Underground::Graph::Node).should == true
|
28
|
+
graph.nodes[2].node_id.should == 2
|
29
|
+
graph.nodes[2].length_alone.should == 29
|
30
|
+
graph.nodes[3].length_alone.should == 224
|
31
|
+
graph.nodes[4].length_alone.should == 38
|
32
|
+
graph.nodes[4].node_id.should == 4
|
33
|
+
graph.nodes[2].coverages.should == [58,0]
|
34
|
+
|
35
|
+
graph.nodes[2].ends_of_kmers_of_node.should == 'GTTTAAAAGAAGGAGATTACTTTATAAAA'
|
36
|
+
graph.nodes[2].ends_of_kmers_of_twin_node.should == 'AGTAAATATAACTCGTCCATTTTTATCAG'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should work with short reads' do
|
40
|
+
path = File.join TEST_DATA_DIR, '3', 'Assem', 'LastGraph'
|
41
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
42
|
+
|
43
|
+
node = graph.nodes[1]
|
44
|
+
shorts = node.short_reads
|
45
|
+
shorts.length.should == 5
|
46
|
+
shorts.collect{|s| s.direction}.should == [true, true, true, false, false]
|
47
|
+
shorts.collect{|s| s.read_id}.should == [1,2,4,3,5]
|
48
|
+
shorts.collect{|s| s.offset_from_start_of_node}.should == [0,0,0,0,0]
|
49
|
+
shorts.collect{|s| s.start_coord}.should == [0,0,0,253,262]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/runner_spec.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
#Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('bio-velvet_underground'); Bio::Log::CLI.configure('bio-velvet_underground')
|
6
|
+
describe "runner" do
|
7
|
+
it "should run basic" do
|
8
|
+
reads = File.join TEST_DATA_DIR, '3', 'Sequences'
|
9
|
+
Dir.mktmpdir do |dir|
|
10
|
+
Bio::Velvet::Underground::Runner.run(51,
|
11
|
+
['-fasta',reads],
|
12
|
+
['-tour_bus','no'],
|
13
|
+
{:velvet_directory => dir}).should == 0
|
14
|
+
|
15
|
+
File.exist?(File.join(dir, 'contigs.fa')).should == true
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,24 +1,9 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
|
3
|
-
module SimpleCov::Configuration
|
4
|
-
def clean_filters
|
5
|
-
@filters = []
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
SimpleCov.configure do
|
10
|
-
clean_filters
|
11
|
-
load_adapter 'test_frameworks'
|
12
|
-
end
|
13
|
-
|
14
|
-
ENV["COVERAGE"] && SimpleCov.start do
|
15
|
-
add_filter "/.rvm/"
|
16
|
-
end
|
17
1
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
18
2
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
19
3
|
|
20
4
|
require 'rspec'
|
21
5
|
require 'bio-velvet_underground'
|
6
|
+
require 'pry'
|
22
7
|
|
23
8
|
# Requires supporting files with custom matchers and macros, etc,
|
24
9
|
# in ./support/ and its subdirectories.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-velvet_underground
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Woodcroft
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bio-logger
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: pry
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -147,13 +161,13 @@ extra_rdoc_files:
|
|
147
161
|
files:
|
148
162
|
- ".document"
|
149
163
|
- ".gitmodules"
|
164
|
+
- ".rspec"
|
150
165
|
- ".travis.yml"
|
151
166
|
- Gemfile
|
152
167
|
- LICENSE.txt
|
153
168
|
- README.md
|
154
169
|
- Rakefile
|
155
170
|
- VERSION
|
156
|
-
- ext/bioruby.patch
|
157
171
|
- ext/mkrf_conf.rb
|
158
172
|
- ext/src/Makefile
|
159
173
|
- ext/src/src/allocArray.c
|
@@ -424,10 +438,26 @@ files:
|
|
424
438
|
- ext/src/third-party/zlib-1.2.3/zutil.c
|
425
439
|
- ext/src/third-party/zlib-1.2.3/zutil.h
|
426
440
|
- lib/bio-velvet_underground.rb
|
441
|
+
- lib/bio-velvet_underground/binary_sequence_store.rb
|
442
|
+
- lib/bio-velvet_underground/constants.rb
|
427
443
|
- lib/bio-velvet_underground/external/VERSION
|
428
|
-
- lib/bio-velvet_underground/
|
444
|
+
- lib/bio-velvet_underground/graph.rb
|
445
|
+
- lib/bio-velvet_underground/runner.rb
|
429
446
|
- spec/binary_sequence_store_spec.rb
|
430
447
|
- spec/data/1/CnyUnifiedSeq
|
448
|
+
- spec/data/2/CnyUnifiedSeq
|
449
|
+
- spec/data/3/Assem/Graph2
|
450
|
+
- spec/data/3/Assem/LastGraph
|
451
|
+
- spec/data/3/Assem/Log
|
452
|
+
- spec/data/3/Assem/PreGraph
|
453
|
+
- spec/data/3/Assem/Roadmaps
|
454
|
+
- spec/data/3/Assem/Sequences
|
455
|
+
- spec/data/3/Assem/contigs.fa
|
456
|
+
- spec/data/3/Assem/stats.txt
|
457
|
+
- spec/data/3/Sequences
|
458
|
+
- spec/data/4/LastGraphKmer51Head
|
459
|
+
- spec/graph_spec.rb
|
460
|
+
- spec/runner_spec.rb
|
431
461
|
- spec/spec_helper.rb
|
432
462
|
homepage: http://github.com/wwood/bioruby-velvet_underground
|
433
463
|
licenses:
|
data/ext/bioruby.patch
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
diff --git a/Makefile b/Makefile
|
2
|
-
index 8239e72..e0308db 100644
|
3
|
-
--- a/Makefile
|
4
|
-
+++ b/Makefile
|
5
|
-
@@ -38,16 +38,17 @@ endif
|
6
|
-
|
7
|
-
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
|
8
|
-
OBJDBG = $(subst obj,obj/dbg,$(OBJ))
|
9
|
-
+OBJSHARED = $(subst obj,obj/shared,$(OBJ))
|
10
|
-
|
11
|
-
default : cleanobj zlib obj velveth velvetg doc
|
12
|
-
|
13
|
-
clean : clean-zlib
|
14
|
-
- -rm obj/*.o obj/dbg/*.o ./velvet*
|
15
|
-
+ -rm obj/*.o obj/dbg/*.o obj/shared/*.o obj/shared/velvet.so.0.0.1 ./velvet*
|
16
|
-
-rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
|
17
|
-
-rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
|
18
|
-
|
19
|
-
cleanobj:
|
20
|
-
- -rm obj/*.o obj/dbg/*.o
|
21
|
-
+ -rm obj/*.o obj/dbg/*.o obj/shared/*.o
|
22
|
-
|
23
|
-
ifdef BUNDLEDZLIB
|
24
|
-
Z_LIB_DIR=third-party/zlib-1.2.3
|
25
|
-
@@ -118,3 +119,15 @@ Manual.pdf: doc/manual_src/Manual.tex doc/manual_src/Columbus_manual.tex
|
26
|
-
|
27
|
-
test: velvetg velveth
|
28
|
-
cd tests && ./run-tests.sh
|
29
|
-
+
|
30
|
-
+sharedobjdir:
|
31
|
-
+ mkdir -p obj/shared
|
32
|
-
+
|
33
|
-
+obj/shared: sharedobjdir $(OBJSHARED)
|
34
|
-
+
|
35
|
-
+obj/shared/%.o: src/%.c
|
36
|
-
+ $(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
37
|
-
+
|
38
|
-
+shared: zlib obj/shared
|
39
|
-
+ cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o
|
40
|
-
+
|
41
|
-
diff --git a/src/utility.c b/src/utility.c
|
42
|
-
index d402629..126b386 100644
|
43
|
-
--- a/src/utility.c
|
44
|
-
+++ b/src/utility.c
|
45
|
-
@@ -97,6 +97,7 @@ void exitErrorf(int exitStatus, boolean showErrno, const char *format, ...)
|
46
|
-
|
47
|
-
void velvetLog(const char *format, ...)
|
48
|
-
{
|
49
|
-
+/*
|
50
|
-
static boolean timeIsSet = false;
|
51
|
-
static struct timeval tvStart;
|
52
|
-
struct timeval tvNow;
|
53
|
-
@@ -120,6 +121,7 @@ void velvetLog(const char *format, ...)
|
54
|
-
#ifdef DEBUG
|
55
|
-
fflush(stdout);
|
56
|
-
#endif
|
57
|
-
+*/
|
58
|
-
}
|
59
|
-
|
60
|
-
void velvetFprintf(FILE * file, const char * format, ...)
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'ffi'
|
2
|
-
require 'pry'
|
3
|
-
|
4
|
-
module Bio
|
5
|
-
module Velvet
|
6
|
-
class Underground
|
7
|
-
extend FFI::Library
|
8
|
-
ffi_lib File.join(File.dirname(__FILE__),'external','libvelvet.so.1.0')
|
9
|
-
|
10
|
-
class BinarySequenceStore
|
11
|
-
# Parse a CnyUnifiedSeq file in so that sequences can be accessed
|
12
|
-
def initialize(cny_unified_seq_file)
|
13
|
-
readset_pointer = Bio::Velvet::Underground.importCnyReadSet cny_unified_seq_file
|
14
|
-
@readset = Bio::Velvet::Underground::ReadSet.new(readset_pointer)
|
15
|
-
end
|
16
|
-
|
17
|
-
# Return a sequence from the store given its read ID.
|
18
|
-
def [](sequence_id)
|
19
|
-
if sequence_id==0 or sequence_id > @readset[:readCount]
|
20
|
-
raise "Invalid sequence_id #{sequence_id}"
|
21
|
-
end
|
22
|
-
|
23
|
-
pointer = Bio::Velvet::Underground.getTightStringInArray(
|
24
|
-
@readset[:tSequences], sequence_id-1
|
25
|
-
)
|
26
|
-
Bio::Velvet::Underground.readTightString pointer
|
27
|
-
end
|
28
|
-
|
29
|
-
# Number of sequences in this store
|
30
|
-
def length
|
31
|
-
@readset[:readCount]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
# struct readSet_st {
|
37
|
-
# char **sequences;
|
38
|
-
# TightString *tSequences;
|
39
|
-
# char **labels;
|
40
|
-
# char *tSeqMem;
|
41
|
-
# Quality **confidenceScores;
|
42
|
-
# Probability **kmerProbabilities;
|
43
|
-
# IDnum *mateReads;
|
44
|
-
# Category *categories;
|
45
|
-
# unsigned char *secondInPair;
|
46
|
-
# IDnum readCount;
|
47
|
-
# };
|
48
|
-
class ReadSet < FFI::Struct
|
49
|
-
layout :sequences, :pointer, # char **sequences;
|
50
|
-
:tSequences, :pointer, # TightString *tSequences;
|
51
|
-
:labels, :pointer, # char **labels;
|
52
|
-
:tSeqMem, :pointer, # char *tSeqMem; #TODO: they don't really mean char* here - meant as an unsigned short?
|
53
|
-
:confidenceScores, :pointer, # Quality **confidenceScores;
|
54
|
-
:kmerProbabilities, :pointer, # Probability **kmerProbabilities;
|
55
|
-
:mateReads, :pointer, # IDnum *mateReads;
|
56
|
-
:categories, :pointer, # Category *categories;
|
57
|
-
:secondInPair, :pointer, # unsigned char *secondInPair;
|
58
|
-
:readCount, :int32 # IDnum readCount;
|
59
|
-
end
|
60
|
-
|
61
|
-
# ReadSet *importCnyReadSet(char *filename);
|
62
|
-
attach_function :importCnyReadSet, [:string], :pointer
|
63
|
-
|
64
|
-
# char *readTightString(TightString * tString); #tightString.h
|
65
|
-
attach_function :readTightString, [:pointer], :string
|
66
|
-
|
67
|
-
# TightString *getTightStringInArray(TightString * tString,
|
68
|
-
# IDnum position);
|
69
|
-
attach_function :getTightStringInArray, [:pointer, :int32], :pointer
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|