bio-velvet_underground 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitmodules +1 -0
- data/.rspec +1 -0
- data/Gemfile +1 -0
- data/README.md +53 -9
- data/VERSION +1 -1
- data/ext/mkrf_conf.rb +22 -4
- data/ext/src/Makefile +27 -14
- data/ext/src/src/concatenatedPreGraph.c +4 -4
- data/ext/src/src/correctedGraph.c +18 -16
- data/ext/src/src/graph.c +50 -16
- data/ext/src/src/graphStats.c +65 -65
- data/ext/src/src/run.c +9 -9
- data/ext/src/src/run2.c +51 -37
- data/ext/src/src/utility.c +10 -9
- data/lib/bio-velvet_underground.rb +55 -11
- data/lib/bio-velvet_underground/binary_sequence_store.rb +86 -0
- data/lib/bio-velvet_underground/constants.rb +33 -0
- data/lib/bio-velvet_underground/graph.rb +262 -0
- data/lib/bio-velvet_underground/runner.rb +59 -0
- data/spec/binary_sequence_store_spec.rb +12 -0
- data/spec/data/2/CnyUnifiedSeq +0 -0
- data/spec/data/3/Assem/Graph2 +40 -0
- data/spec/data/3/Assem/LastGraph +40 -0
- data/spec/data/3/Assem/Log +42 -0
- data/spec/data/3/Assem/PreGraph +9 -0
- data/spec/data/3/Assem/Roadmaps +15 -0
- data/spec/data/3/Assem/Sequences +50 -0
- data/spec/data/3/Assem/contigs.fa +15 -0
- data/spec/data/3/Assem/stats.txt +5 -0
- data/spec/data/3/Sequences +50 -0
- data/spec/data/4/LastGraphKmer51Head +7 -0
- data/spec/graph_spec.rb +52 -0
- data/spec/runner_spec.rb +18 -0
- data/spec/spec_helper.rb +1 -16
- metadata +34 -4
- data/ext/bioruby.patch +0 -60
- data/lib/bio-velvet_underground/velvet_underground.rb +0 -72
@@ -0,0 +1,42 @@
|
|
1
|
+
Thu Apr 17 13:20:41 2014
|
2
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velveth Assem 31 -short -fasta Sequences
|
3
|
+
Version 1.2.10
|
4
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
5
|
+
This is free software; see the source for copying conditions. There is NO
|
6
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
7
|
+
Compilation settings:
|
8
|
+
CATEGORIES = 2
|
9
|
+
MAXKMERLENGTH = 31
|
10
|
+
|
11
|
+
Thu Apr 17 13:20:58 2014
|
12
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velvetg Assem
|
13
|
+
Version 1.2.10
|
14
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
15
|
+
This is free software; see the source for copying conditions. There is NO
|
16
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
17
|
+
Compilation settings:
|
18
|
+
CATEGORIES = 2
|
19
|
+
MAXKMERLENGTH = 31
|
20
|
+
|
21
|
+
Final graph has 4 nodes and n50 of 224, max 228, total 519, using 0/5 reads
|
22
|
+
Thu Apr 17 13:21:17 2014
|
23
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velveth Assem 31 -short -fasta Sequences -read_trkg yes
|
24
|
+
Version 1.2.10
|
25
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
26
|
+
This is free software; see the source for copying conditions. There is NO
|
27
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
28
|
+
Compilation settings:
|
29
|
+
CATEGORIES = 2
|
30
|
+
MAXKMERLENGTH = 31
|
31
|
+
|
32
|
+
Thu Apr 17 13:21:53 2014
|
33
|
+
/srv/whitlam/home/users/uqbwoodc/git/bioruby-velvet_underground/ext/src/velvetg Assem -read_trkg yes
|
34
|
+
Version 1.2.10
|
35
|
+
Copyright 2007, 2008 Daniel Zerbino (zerbino@ebi.ac.uk)
|
36
|
+
This is free software; see the source for copying conditions. There is NO
|
37
|
+
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
38
|
+
Compilation settings:
|
39
|
+
CATEGORIES = 2
|
40
|
+
MAXKMERLENGTH = 31
|
41
|
+
|
42
|
+
Final graph has 4 nodes and n50 of 224, max 228, total 519, using 5/5 reads
|
@@ -0,0 +1,9 @@
|
|
1
|
+
4 5 31 1
|
2
|
+
NODE 1 228
|
3
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAGTAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAGATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATACGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATGGACGAGTTATATTTACTG
|
4
|
+
NODE 2 29
|
5
|
+
CTGATAAAAATGGACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAA
|
6
|
+
NODE 3 224
|
7
|
+
GGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
8
|
+
NODE 4 38
|
9
|
+
CTGATAAAAATGGACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAA
|
@@ -0,0 +1,50 @@
|
|
1
|
+
>1 1 0
|
2
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
3
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
4
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
5
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
6
|
+
GACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTC
|
7
|
+
CTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATG
|
8
|
+
ATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAA
|
9
|
+
GTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAA
|
10
|
+
ACTATGCTGGTATTTCACTTCCAGGTACAGG
|
11
|
+
>2 2 0
|
12
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
13
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
14
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
15
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
16
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
17
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
18
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
19
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
20
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
21
|
+
>NODE_1_length_481_cov_1.471933_revcom 3 0
|
22
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
23
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
24
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
25
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
26
|
+
CCTTCTTTTAAACCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCCTTTCCTTTT
|
27
|
+
AATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTACCATTTTTA
|
28
|
+
TCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAATCTACTTTT
|
29
|
+
TTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTATAGTTTGAT
|
30
|
+
TCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
31
|
+
>3 4 0
|
32
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
33
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
34
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
35
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
36
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
37
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
38
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
39
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
40
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
41
|
+
>3_revcom 5 0
|
42
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
43
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
44
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
45
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
46
|
+
CCTTCTTTTAAACCCCCCCCCGCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCC
|
47
|
+
TTTCCTTTTAATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTA
|
48
|
+
CCATTTTTATCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAA
|
49
|
+
TCTACTTTTTTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTA
|
50
|
+
TAGTTTGATTCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
@@ -0,0 +1,15 @@
|
|
1
|
+
>NODE_1_length_228_cov_5.000000
|
2
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
3
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
4
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
5
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
6
|
+
GACGAGTTATATTTACTG
|
7
|
+
>NODE_3_length_224_cov_5.000000
|
8
|
+
GGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTCCTAAAGGGTATAGCCTT
|
9
|
+
TTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATGATAATGGAGAGTATACT
|
10
|
+
GGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAAGTATAATAAATAATATT
|
11
|
+
ACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAAACTATGCTGGTATTTCA
|
12
|
+
CTTCCAGGTACAGg
|
13
|
+
>NODE_4_length_38_cov_3.000000
|
14
|
+
CTGATAAAAATGGACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACT
|
15
|
+
TTATAAAA
|
@@ -0,0 +1,5 @@
|
|
1
|
+
ID lgth out in long_cov short1_cov short1_Ocov short2_cov short2_Ocov long_nb short1_nb short2_nb
|
2
|
+
1 228 2 0 0.000000 5.000000 5.000000 0.000000 0.000000 0 3 0
|
3
|
+
2 29 1 1 0.000000 2.000000 2.000000 0.000000 0.000000 0 1 0
|
4
|
+
3 224 0 2 0.000000 5.000000 5.000000 0.000000 0.000000 0 3 0
|
5
|
+
4 38 1 1 0.000000 3.000000 3.000000 0.000000 0.000000 0 2 0
|
@@ -0,0 +1,50 @@
|
|
1
|
+
>1 1 0
|
2
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
3
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
4
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
5
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
6
|
+
GACGAGTTATATTTACTGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAGAAAAAGCTC
|
7
|
+
CTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTCAAAAAGATG
|
8
|
+
ATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATGAAGCTGGAA
|
9
|
+
GTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTACAAATTAAAA
|
10
|
+
ACTATGCTGGTATTTCACTTCCAGGTACAGG
|
11
|
+
>2 2 0
|
12
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
13
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
14
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
15
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
16
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
17
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
18
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
19
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
20
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
21
|
+
>NODE_1_length_481_cov_1.471933_revcom 3 0
|
22
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
23
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
24
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
25
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
26
|
+
CCTTCTTTTAAACCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCCTTTCCTTTT
|
27
|
+
AATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTACCATTTTTA
|
28
|
+
TCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAATCTACTTTT
|
29
|
+
TTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTATAGTTTGAT
|
30
|
+
TCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
31
|
+
>3 4 0
|
32
|
+
CACTTATCTCTACCAAAGATCACGATTTAGAATCAAACTATAAAGTTTTAGAAGATAAAG
|
33
|
+
TAACAACTTATACATGGGGATTCGGAGTTAAAAAAGTAGATTCAGAAAATATTTCAATAG
|
34
|
+
ATCTTGCAGGCGCAGCTTTTTCTGTTAGGGATAAAAATGGTAATGTAATTGGTAAATATA
|
35
|
+
CGTATGATTCTACTGGAAATGTGGTTTTATTAAAAGGAAAGGGTGTAACTGATAAAAATG
|
36
|
+
GACGAGTTATATTTACTGCGGGGGGGGGTTTAAAAGAAGGAGATTACTTTATAAAAGAAG
|
37
|
+
AAAAAGCTCCTAAAGGGTATAGCCTTTTAAAAGAACCAGTAAAAGTTACTATAACAGCTC
|
38
|
+
AAAAAGATGATAATGGAGAGTATACTGGTCAAGCAACTATATCTGTAACTAATGGCAATG
|
39
|
+
AAGCTGGAAGTATAATAAATAATATTACTATGAATGATGGCAATGTATTATTTAATGTAC
|
40
|
+
AAATTAAAAACTATGCTGGTATTTCACTTCCAGGTACAGG
|
41
|
+
>3_revcom 5 0
|
42
|
+
CCTGTACCTGGAAGTGAAATACCAGCATAGTTTTTAATTTGTACATTAAATAATACATTG
|
43
|
+
CCATCATTCATAGTAATATTATTTATTATACTTCCAGCTTCATTGCCATTAGTTACAGAT
|
44
|
+
ATAGTTGCTTGACCAGTATACTCTCCATTATCATCTTTTTGAGCTGTTATAGTAACTTTT
|
45
|
+
ACTGGTTCTTTTAAAAGGCTATACCCTTTAGGAGCTTTTTCTTCTTTTATAAAGTAATCT
|
46
|
+
CCTTCTTTTAAACCCCCCCCCGCAGTAAATATAACTCGTCCATTTTTATCAGTTACACCC
|
47
|
+
TTTCCTTTTAATAAAACCACATTTCCAGTAGAATCATACGTATATTTACCAATTACATTA
|
48
|
+
CCATTTTTATCCCTAACAGAAAAAGCTGCGCCTGCAAGATCTATTGAAATATTTTCTGAA
|
49
|
+
TCTACTTTTTTAACTCCGAATCCCCATGTATAAGTTGTTACTTTATCTTCTAAAACTTTA
|
50
|
+
TAGTTTGATTCTAAATCGTGATCTTTGGTAGAGATAAGTG
|
@@ -0,0 +1,7 @@
|
|
1
|
+
221 8987 51 1
|
2
|
+
NODE 1 236 8451 8451 0 0
|
3
|
+
CGCGGCGGCTTGCCTCCGTCTTCATCCAGCGTGTCCGGCTTGAGCGTCCACAAACCGAAGCCGATGAAGAGCACCGCCAGGAGCAGCGCCATCCACTTCGCCGGCACGTGCGCGGACACCCAACTGCCCACGCTGGACGCCAGCGCGTGATTGGCGACGGTGGCGACGAAGATGCCCGCCAGCACATGCCACGGCTTGCGAAACCGCGTGGCCAGGGAGAACGCGAGCAACTGCGT
|
4
|
+
TGTGCTGGCGGGCATCTTCGTCGCCACCGTCGCCAATCACGCGCTGGCGTCCAGCGTGGGCAGTTGGGTGTCCGCGCACGTGCCGGCGAAGTGGATGGCGCTGCTCCTGGCGGTGCTCTTCATCGGCTTCGGTTTGTGGACGCTCAAGCCGGACACGCTGGATGAAGACGGAGGCAAGCCGCCGCGCTTCGGCGCTTTCCTCACCACGGTGGTGCTCTTCTTCCTCGCGGAGATGG
|
5
|
+
NODE 2 913 31246 31246 0 0
|
6
|
+
ACCTTCCGCCTGACGCCGGGGCCGCATCCCGGCCCCGGGTGCCGTGTCGATGCGGCTTCTTACGGGGTGGGCGTCGACGCCTGCTCCGTCTCCGCGGGAGCGGCTCCCGTCTTCGCGGGGGCGGTGTCACCGCTCTTGCCCGTCACCACACCCTTCACCTTCTCCATGGCGGCCTTCGGGTCGATGTTGCCCTTGAGGGTGCCGAACGGCGTGTCGATGACCTGCTCCCGCTTCTCCGAGGAGGCGTAGCCCTCTGGCGCGCACAGGGCCTTCGAGGTGTCGCGCACGGAGACGACGGGGGTGTTGATGGTGGTGGAGTCCTCGGACGTGGCCTTGGCGACGAGCTTGTCGTCGCGCATCAGCACGCAGCGGTCCTCGCCGTAGTACCAGGCGGTGGAGCTGTCCGGGAACTCCTGCGCGCGCGTGGGTCCTGAGCCCATGGCATCGACGACCTGATGGGACGACATGCCCGGGTAGAGCTTGTCGAAGCCAGGGGTGGCACAGCCGGCGGCGGCGAGCGCGAGTGCGGCGCTGACAATCGGGAGACGCATGGGGTGTGGGGCTCCTTCCGAAAAGGAATCGCGGAGCCTACCCCGGCGCCACGTCCGGCCTCCATGTGGGGGGCCGGACGCATCGTGTCAGAGTTCCTGATCCAACCAGCGGACGATGGCCTCGCGAATCGAGGCCGGCCGCTCGGAGTCCAGTCGGGTGCGAAGCAGGCGGGCCGCGTCGGCGTCGCGCAGCATTGAAGCGGGCATCAGCTCACGCGTGCGAGGAAGGGACGCGCGGGACGCGACGGCGGCAGACCTCATGACTGGCCTTCCTCCTTGGGCTCTCCGCGGCCGTACTTGCCGAGCAGGTCATCCACGGCCTCGCGCAGGTACTCGCTCTGGTGGATGCGGGTCCGCCGCGC
|
7
|
+
TGACCTGCTCGGCAAGTACGGCCGCGGAGAGCCCAAGGAGGAAGGCCAGTCATGAGGTCTGCCGCCGTCGCGTCCCGCGCGTCCCTTCCTCGCACGCGTGAGCTGATGCCCGCTTCAATGCTGCGCGACGCCGACGCGGCCCGCCTGCTTCGCACCCGACTGGACTCCGAGCGGCCGGCCTCGATTCGCGAGGCCATCGTCCGCTGGTTGGATCAGGAACTCTGACACGATGCGTCCGGCCCCCCACATGGAGGCCGGACGTGGCGCCGGGGTAGGCTCCGCGATTCCTTTTCGGAAGGAGCCCCACACCCCATGCGTCTCCCGATTGTCAGCGCCGCACTCGCGCTCGCCGCCGCCGGCTGTGCCACCCCTGGCTTCGACAAGCTCTACCCGGGCATGTCGTCCCATCAGGTCGTCGATGCCATGGGCTCAGGACCCACGCGCGCGCAGGAGTTCCCGGACAGCTCCACCGCCTGGTACTACGGCGAGGACCGCTGCGTGCTGATGCGCGACGACAAGCTCGTCGCCAAGGCCACGTCCGAGGACTCCACCACCATCAACACCCCCGTCGTCTCCGTGCGCGACACCTCGAAGGCCCTGTGCGCGCCAGAGGGCTACGCCTCCTCGGAGAAGCGGGAGCAGGTCATCGACACGCCGTTCGGCACCCTCAAGGGCAACATCGACCCGAAGGCCGCCATGGAGAAGGTGAAGGGTGTGGTGACGGGCAAGAGCGGTGACACCGCCCCCGCGAAGACGGGAGCCGCTCCCGCGGAGACGGAGCAGGCGTCGACGCCCACCCCGTAAGAAGCCGCATCGACACGGCACCCGGGGCCGGGATGCGGCCCCGGCGTCAGGCGGAAGGTGCCGCCGCCTCGCCGTCCTTGTCGTCCAGCTTGCCGGCGATGTCCTTGAA
|
data/spec/graph_spec.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "graph" do
|
4
|
+
it "should be able to load a graph and respond to basic structures" do
|
5
|
+
path = File.join TEST_DATA_DIR, '3', 'Assem', 'LastGraph'
|
6
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
7
|
+
|
8
|
+
graph.hash_length.should == 31
|
9
|
+
graph.node_count.should == 4
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should be able to parse a graph with kmer length > 31, the default' do
|
13
|
+
path = File.join TEST_DATA_DIR, '4', 'LastGraphKmer51Head'
|
14
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
15
|
+
|
16
|
+
graph.hash_length.should == 51
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "nodes" do
|
20
|
+
it "should provide basic info" do
|
21
|
+
path = File.join TEST_DATA_DIR, '3', 'Assem', 'LastGraph'
|
22
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
23
|
+
|
24
|
+
graph.nodes[1].kind_of?(Bio::Velvet::Underground::Graph::Node).should == true
|
25
|
+
graph.nodes[1].length_alone.should == 228
|
26
|
+
graph.nodes[1].node_id.should == 1
|
27
|
+
graph.nodes[2].kind_of?(Bio::Velvet::Underground::Graph::Node).should == true
|
28
|
+
graph.nodes[2].node_id.should == 2
|
29
|
+
graph.nodes[2].length_alone.should == 29
|
30
|
+
graph.nodes[3].length_alone.should == 224
|
31
|
+
graph.nodes[4].length_alone.should == 38
|
32
|
+
graph.nodes[4].node_id.should == 4
|
33
|
+
graph.nodes[2].coverages.should == [58,0]
|
34
|
+
|
35
|
+
graph.nodes[2].ends_of_kmers_of_node.should == 'GTTTAAAAGAAGGAGATTACTTTATAAAA'
|
36
|
+
graph.nodes[2].ends_of_kmers_of_twin_node.should == 'AGTAAATATAACTCGTCCATTTTTATCAG'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should work with short reads' do
|
40
|
+
path = File.join TEST_DATA_DIR, '3', 'Assem', 'LastGraph'
|
41
|
+
graph = Bio::Velvet::Underground::Graph.parse_from_file path
|
42
|
+
|
43
|
+
node = graph.nodes[1]
|
44
|
+
shorts = node.short_reads
|
45
|
+
shorts.length.should == 5
|
46
|
+
shorts.collect{|s| s.direction}.should == [true, true, true, false, false]
|
47
|
+
shorts.collect{|s| s.read_id}.should == [1,2,4,3,5]
|
48
|
+
shorts.collect{|s| s.offset_from_start_of_node}.should == [0,0,0,0,0]
|
49
|
+
shorts.collect{|s| s.start_coord}.should == [0,0,0,253,262]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/runner_spec.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
#Bio::Log::CLI.logger('stderr'); Bio::Log::CLI.trace('debug'); log = Bio::Log::LoggerPlus.new('bio-velvet_underground'); Bio::Log::CLI.configure('bio-velvet_underground')
|
6
|
+
describe "runner" do
|
7
|
+
it "should run basic" do
|
8
|
+
reads = File.join TEST_DATA_DIR, '3', 'Sequences'
|
9
|
+
Dir.mktmpdir do |dir|
|
10
|
+
Bio::Velvet::Underground::Runner.run(51,
|
11
|
+
['-fasta',reads],
|
12
|
+
['-tour_bus','no'],
|
13
|
+
{:velvet_directory => dir}).should == 0
|
14
|
+
|
15
|
+
File.exist?(File.join(dir, 'contigs.fa')).should == true
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,24 +1,9 @@
|
|
1
|
-
require 'simplecov'
|
2
|
-
|
3
|
-
module SimpleCov::Configuration
|
4
|
-
def clean_filters
|
5
|
-
@filters = []
|
6
|
-
end
|
7
|
-
end
|
8
|
-
|
9
|
-
SimpleCov.configure do
|
10
|
-
clean_filters
|
11
|
-
load_adapter 'test_frameworks'
|
12
|
-
end
|
13
|
-
|
14
|
-
ENV["COVERAGE"] && SimpleCov.start do
|
15
|
-
add_filter "/.rvm/"
|
16
|
-
end
|
17
1
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
18
2
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
19
3
|
|
20
4
|
require 'rspec'
|
21
5
|
require 'bio-velvet_underground'
|
6
|
+
require 'pry'
|
22
7
|
|
23
8
|
# Requires supporting files with custom matchers and macros, etc,
|
24
9
|
# in ./support/ and its subdirectories.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-velvet_underground
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Woodcroft
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bio-logger
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: pry
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -147,13 +161,13 @@ extra_rdoc_files:
|
|
147
161
|
files:
|
148
162
|
- ".document"
|
149
163
|
- ".gitmodules"
|
164
|
+
- ".rspec"
|
150
165
|
- ".travis.yml"
|
151
166
|
- Gemfile
|
152
167
|
- LICENSE.txt
|
153
168
|
- README.md
|
154
169
|
- Rakefile
|
155
170
|
- VERSION
|
156
|
-
- ext/bioruby.patch
|
157
171
|
- ext/mkrf_conf.rb
|
158
172
|
- ext/src/Makefile
|
159
173
|
- ext/src/src/allocArray.c
|
@@ -424,10 +438,26 @@ files:
|
|
424
438
|
- ext/src/third-party/zlib-1.2.3/zutil.c
|
425
439
|
- ext/src/third-party/zlib-1.2.3/zutil.h
|
426
440
|
- lib/bio-velvet_underground.rb
|
441
|
+
- lib/bio-velvet_underground/binary_sequence_store.rb
|
442
|
+
- lib/bio-velvet_underground/constants.rb
|
427
443
|
- lib/bio-velvet_underground/external/VERSION
|
428
|
-
- lib/bio-velvet_underground/
|
444
|
+
- lib/bio-velvet_underground/graph.rb
|
445
|
+
- lib/bio-velvet_underground/runner.rb
|
429
446
|
- spec/binary_sequence_store_spec.rb
|
430
447
|
- spec/data/1/CnyUnifiedSeq
|
448
|
+
- spec/data/2/CnyUnifiedSeq
|
449
|
+
- spec/data/3/Assem/Graph2
|
450
|
+
- spec/data/3/Assem/LastGraph
|
451
|
+
- spec/data/3/Assem/Log
|
452
|
+
- spec/data/3/Assem/PreGraph
|
453
|
+
- spec/data/3/Assem/Roadmaps
|
454
|
+
- spec/data/3/Assem/Sequences
|
455
|
+
- spec/data/3/Assem/contigs.fa
|
456
|
+
- spec/data/3/Assem/stats.txt
|
457
|
+
- spec/data/3/Sequences
|
458
|
+
- spec/data/4/LastGraphKmer51Head
|
459
|
+
- spec/graph_spec.rb
|
460
|
+
- spec/runner_spec.rb
|
431
461
|
- spec/spec_helper.rb
|
432
462
|
homepage: http://github.com/wwood/bioruby-velvet_underground
|
433
463
|
licenses:
|
data/ext/bioruby.patch
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
diff --git a/Makefile b/Makefile
|
2
|
-
index 8239e72..e0308db 100644
|
3
|
-
--- a/Makefile
|
4
|
-
+++ b/Makefile
|
5
|
-
@@ -38,16 +38,17 @@ endif
|
6
|
-
|
7
|
-
OBJ = obj/tightString.o obj/run.o obj/splay.o obj/splayTable.o obj/graph.o obj/run2.o obj/fibHeap.o obj/fib.o obj/concatenatedGraph.o obj/passageMarker.o obj/graphStats.o obj/correctedGraph.o obj/dfib.o obj/dfibHeap.o obj/recycleBin.o obj/readSet.o obj/binarySequences.o obj/shortReadPairs.o obj/locallyCorrectedGraph.o obj/graphReConstruction.o obj/roadMap.o obj/preGraph.o obj/preGraphConstruction.o obj/concatenatedPreGraph.o obj/readCoherentGraph.o obj/utility.o obj/kmer.o obj/scaffold.o obj/kmerOccurenceTable.o obj/allocArray.o obj/autoOpen.o
|
8
|
-
OBJDBG = $(subst obj,obj/dbg,$(OBJ))
|
9
|
-
+OBJSHARED = $(subst obj,obj/shared,$(OBJ))
|
10
|
-
|
11
|
-
default : cleanobj zlib obj velveth velvetg doc
|
12
|
-
|
13
|
-
clean : clean-zlib
|
14
|
-
- -rm obj/*.o obj/dbg/*.o ./velvet*
|
15
|
-
+ -rm obj/*.o obj/dbg/*.o obj/shared/*.o obj/shared/velvet.so.0.0.1 ./velvet*
|
16
|
-
-rm -f doc/manual_src/Manual.toc doc/manual_src/Manual.aux doc/manual_src/Manual.out doc/manual_src/Manual.log
|
17
|
-
-rm -f doc/manual_src/Columbus_manual.aux doc/manual_src/Columbus_manual.out doc/manual_src/Columbus_manual.log
|
18
|
-
|
19
|
-
cleanobj:
|
20
|
-
- -rm obj/*.o obj/dbg/*.o
|
21
|
-
+ -rm obj/*.o obj/dbg/*.o obj/shared/*.o
|
22
|
-
|
23
|
-
ifdef BUNDLEDZLIB
|
24
|
-
Z_LIB_DIR=third-party/zlib-1.2.3
|
25
|
-
@@ -118,3 +119,15 @@ Manual.pdf: doc/manual_src/Manual.tex doc/manual_src/Columbus_manual.tex
|
26
|
-
|
27
|
-
test: velvetg velveth
|
28
|
-
cd tests && ./run-tests.sh
|
29
|
-
+
|
30
|
-
+sharedobjdir:
|
31
|
-
+ mkdir -p obj/shared
|
32
|
-
+
|
33
|
-
+obj/shared: sharedobjdir $(OBJSHARED)
|
34
|
-
+
|
35
|
-
+obj/shared/%.o: src/%.c
|
36
|
-
+ $(CC) -fPIC $(CFLAGS) $(DEBUG) $(DEF) -c $? -o $@
|
37
|
-
+
|
38
|
-
+shared: zlib obj/shared
|
39
|
-
+ cd obj/shared && gcc -shared -Wl,-soname,libvelvet.so.1 -o libvelvet.so.1.0 allocArray.o autoOpen.o binarySequences.o concatenatedGraph.o concatenatedPreGraph.o correctedGraph.o dfibHeap.o dfib.o fibHeap.o fib.o graph.o graphReConstruction.o graphStats.o kmer.o kmerOccurenceTable.o locallyCorrectedGraph.o passageMarker.o preGraphConstruction.o preGraph.o readCoherentGraph.o readSet.o recycleBin.o roadMap.o scaffold.o shortReadPairs.o splay.o splayTable.o tightString.o utility.o
|
40
|
-
+
|
41
|
-
diff --git a/src/utility.c b/src/utility.c
|
42
|
-
index d402629..126b386 100644
|
43
|
-
--- a/src/utility.c
|
44
|
-
+++ b/src/utility.c
|
45
|
-
@@ -97,6 +97,7 @@ void exitErrorf(int exitStatus, boolean showErrno, const char *format, ...)
|
46
|
-
|
47
|
-
void velvetLog(const char *format, ...)
|
48
|
-
{
|
49
|
-
+/*
|
50
|
-
static boolean timeIsSet = false;
|
51
|
-
static struct timeval tvStart;
|
52
|
-
struct timeval tvNow;
|
53
|
-
@@ -120,6 +121,7 @@ void velvetLog(const char *format, ...)
|
54
|
-
#ifdef DEBUG
|
55
|
-
fflush(stdout);
|
56
|
-
#endif
|
57
|
-
+*/
|
58
|
-
}
|
59
|
-
|
60
|
-
void velvetFprintf(FILE * file, const char * format, ...)
|
@@ -1,72 +0,0 @@
|
|
1
|
-
require 'ffi'
|
2
|
-
require 'pry'
|
3
|
-
|
4
|
-
module Bio
|
5
|
-
module Velvet
|
6
|
-
class Underground
|
7
|
-
extend FFI::Library
|
8
|
-
ffi_lib File.join(File.dirname(__FILE__),'external','libvelvet.so.1.0')
|
9
|
-
|
10
|
-
class BinarySequenceStore
|
11
|
-
# Parse a CnyUnifiedSeq file in so that sequences can be accessed
|
12
|
-
def initialize(cny_unified_seq_file)
|
13
|
-
readset_pointer = Bio::Velvet::Underground.importCnyReadSet cny_unified_seq_file
|
14
|
-
@readset = Bio::Velvet::Underground::ReadSet.new(readset_pointer)
|
15
|
-
end
|
16
|
-
|
17
|
-
# Return a sequence from the store given its read ID.
|
18
|
-
def [](sequence_id)
|
19
|
-
if sequence_id==0 or sequence_id > @readset[:readCount]
|
20
|
-
raise "Invalid sequence_id #{sequence_id}"
|
21
|
-
end
|
22
|
-
|
23
|
-
pointer = Bio::Velvet::Underground.getTightStringInArray(
|
24
|
-
@readset[:tSequences], sequence_id-1
|
25
|
-
)
|
26
|
-
Bio::Velvet::Underground.readTightString pointer
|
27
|
-
end
|
28
|
-
|
29
|
-
# Number of sequences in this store
|
30
|
-
def length
|
31
|
-
@readset[:readCount]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
# struct readSet_st {
|
37
|
-
# char **sequences;
|
38
|
-
# TightString *tSequences;
|
39
|
-
# char **labels;
|
40
|
-
# char *tSeqMem;
|
41
|
-
# Quality **confidenceScores;
|
42
|
-
# Probability **kmerProbabilities;
|
43
|
-
# IDnum *mateReads;
|
44
|
-
# Category *categories;
|
45
|
-
# unsigned char *secondInPair;
|
46
|
-
# IDnum readCount;
|
47
|
-
# };
|
48
|
-
class ReadSet < FFI::Struct
|
49
|
-
layout :sequences, :pointer, # char **sequences;
|
50
|
-
:tSequences, :pointer, # TightString *tSequences;
|
51
|
-
:labels, :pointer, # char **labels;
|
52
|
-
:tSeqMem, :pointer, # char *tSeqMem; #TODO: they don't really mean char* here - meant as an unsigned short?
|
53
|
-
:confidenceScores, :pointer, # Quality **confidenceScores;
|
54
|
-
:kmerProbabilities, :pointer, # Probability **kmerProbabilities;
|
55
|
-
:mateReads, :pointer, # IDnum *mateReads;
|
56
|
-
:categories, :pointer, # Category *categories;
|
57
|
-
:secondInPair, :pointer, # unsigned char *secondInPair;
|
58
|
-
:readCount, :int32 # IDnum readCount;
|
59
|
-
end
|
60
|
-
|
61
|
-
# ReadSet *importCnyReadSet(char *filename);
|
62
|
-
attach_function :importCnyReadSet, [:string], :pointer
|
63
|
-
|
64
|
-
# char *readTightString(TightString * tString); #tightString.h
|
65
|
-
attach_function :readTightString, [:pointer], :string
|
66
|
-
|
67
|
-
# TightString *getTightStringInArray(TightString * tString,
|
68
|
-
# IDnum position);
|
69
|
-
attach_function :getTightStringInArray, [:pointer, :int32], :pointer
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|