bio-polyploid-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio'
4
+ require 'rubygems'
5
+ require 'pathname'
6
+ require 'bio-samtools'
7
+
8
+ require 'set'
9
+
10
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
11
+ $: << File.expand_path('.')
12
+ path=File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
13
+ $stderr.puts "Loading: #{path}"
14
+ require path
15
+
16
+
17
+
18
+ fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
19
+ fasta_db.load_fai_entries
20
+ bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
21
+ bam2 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
22
+
23
+
24
+ output_prefix = ARGV[3]
25
+
26
+ block_size=300
27
+
28
+ min_cov = ARGV[4].to_i ? ARGV[4].to_i : 10
29
+ chunk = ARGV[5].to_i
30
+ chunk_size = ARGV[6].to_i
31
+
32
+
33
+
34
+
35
+ main_table="#{output_prefix}_#{block_size}_#{min_cov}_table.#{chunk}.csv"
36
+
37
+ table_file = File.open(main_table, "w")
38
+ table_file.puts "gene\tlength\tsnps_1\tcalled_1\tsnps_per_#{block_size}_1\tsnps_2\tcalled_2\tsnps_per_#{block_size}_2\tsnps_tot\tsnps_per_1k_tot"
39
+
40
+ hist_1= Hash.new(0)
41
+ hist_2= Hash.new(0)
42
+
43
+ fasta_file = File.open("#{output_prefix}_#{min_cov}.#{chunk}.fa", "w")
44
+ i = -1
45
+ min = chunk * chunk_size
46
+ max = min + chunk_size
47
+
48
+ fasta_db.index.entries.each do | r |
49
+ i = i + 1
50
+ next if i < min or i >= max
51
+ #Np r.get_full_region
52
+ #container.process_region( { :region => r.get_full_region.to_s, :output_file => output_file } )
53
+ region=r.get_full_region
54
+
55
+
56
+ begin
57
+
58
+ cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
59
+ cons_2 = bam2.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
60
+ if cons_1 != cons_2
61
+
62
+ snps_1 = cons_1.count_ambiguities
63
+ snps_2 = cons_2.count_ambiguities
64
+
65
+ called_1 = cons_1.upper_case_count
66
+ called_2 = cons_2.upper_case_count
67
+
68
+ snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
69
+
70
+ snps_per_1k_1 = (block_size * snps_1.to_f ) / called_1
71
+ snps_per_1k_2 = (block_size * snps_2.to_f ) / called_2
72
+ snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
73
+
74
+ hist_1[snps_per_1k_1.to_i] += 1
75
+ hist_2[snps_per_1k_2.to_i] += 1
76
+
77
+ table_file.print "#{r.id}\t#{region.size}\t"
78
+ table_file.print "#{snps_1}\t#{called_1}\t#{snps_per_1k_1}\t"
79
+ table_file.print "#{snps_2}\t#{called_2}\t#{snps_per_1k_2}\t"
80
+ table_file.print "#{snps_tot}\t#{snps_per_1k_tot}\n"
81
+ fasta_file.puts ">#{r.id}_1"
82
+ fasta_file.puts "#{cons_1}"
83
+ fasta_file.puts ">#{r.id}_2"
84
+ fasta_file.puts "#{cons_2}"
85
+ end
86
+ rescue Exception => e
87
+ $stderr.puts "Unable to process #{region}: #{e.to_s}"
88
+ end
89
+ end
90
+ fasta_file.close
91
+ table_file.close
92
+
93
+ hist_table="#{output_prefix}_#{block_size}_#{min_cov}_hist.#{chunk}.csv"
94
+ hist_file = File.open(hist_table, "w")
95
+
96
+ all_keys = SortedSet.new(hist_1.keys)
97
+ all_keys.merge(hist_2.keys)
98
+ hist_file.puts "SNPs/#{block_size}\thist_1\thist_2\n"
99
+ all_keys.each do |k|
100
+ hist_file.puts "#{k}\t#{hist_1[k]}\t#{hist_2[k]}"
101
+ end
102
+
103
+ hist_file.close
104
+
105
+
106
+
@@ -0,0 +1,139 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: bio-polyploid-tools 0.1.0 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "bio-polyploid-tools"
9
+ s.version = "0.1.0"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
13
+ s.authors = ["Ricardo H. Ramirez-Gonzalez"]
14
+ s.date = "2014-03-31"
15
+ s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
16
+ s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
17
+ s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
18
+ s.extra_rdoc_files = [
19
+ "README"
20
+ ]
21
+ s.files = [
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "README",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "bin/bfr.rb",
28
+ "bin/count_variations.rb",
29
+ "bin/filter_blat_by_target_coverage.rb",
30
+ "bin/find_best_blat_hit.rb",
31
+ "bin/hexaploid_primers.rb",
32
+ "bin/homokaryot_primers.rb",
33
+ "bin/map_markers_to_contigs.rb",
34
+ "bin/markers_in_region.rb",
35
+ "bin/polymarker.rb",
36
+ "bin/snps_between_bams.rb",
37
+ "bio-polyploid-tools.gemspec",
38
+ "conf/defaults.rb",
39
+ "conf/primer3_config/dangle.dh",
40
+ "conf/primer3_config/dangle.ds",
41
+ "conf/primer3_config/interpretations/dangle_i.dh",
42
+ "conf/primer3_config/interpretations/dangle_i.ds",
43
+ "conf/primer3_config/interpretations/loops_i.dh",
44
+ "conf/primer3_config/interpretations/loops_i.ds",
45
+ "conf/primer3_config/interpretations/stack_i.dh",
46
+ "conf/primer3_config/interpretations/stack_i.ds",
47
+ "conf/primer3_config/interpretations/stackmm_i_mm.dh",
48
+ "conf/primer3_config/interpretations/stackmm_i_mm.ds",
49
+ "conf/primer3_config/interpretations/tetraloop_i.dh",
50
+ "conf/primer3_config/interpretations/tetraloop_i.ds",
51
+ "conf/primer3_config/interpretations/triloop_i.dh",
52
+ "conf/primer3_config/interpretations/triloop_i.ds",
53
+ "conf/primer3_config/interpretations/tstack2_i.dh",
54
+ "conf/primer3_config/interpretations/tstack2_i.ds",
55
+ "conf/primer3_config/interpretations/tstack_i.dh",
56
+ "conf/primer3_config/interpretations/tstack_i.ds",
57
+ "conf/primer3_config/interpretations/tstack_tm_inf_i.dh",
58
+ "conf/primer3_config/interpretations/tstack_tm_inf_i.ds",
59
+ "conf/primer3_config/loops.dh",
60
+ "conf/primer3_config/loops.ds",
61
+ "conf/primer3_config/stack.dh",
62
+ "conf/primer3_config/stack.ds",
63
+ "conf/primer3_config/stackmm.dh",
64
+ "conf/primer3_config/stackmm.ds",
65
+ "conf/primer3_config/tetraloop.dh",
66
+ "conf/primer3_config/tetraloop.ds",
67
+ "conf/primer3_config/triloop.dh",
68
+ "conf/primer3_config/triloop.ds",
69
+ "conf/primer3_config/tstack.dh",
70
+ "conf/primer3_config/tstack2.dh",
71
+ "conf/primer3_config/tstack2.ds",
72
+ "conf/primer3_config/tstack_tm_inf.ds",
73
+ "lib/bio/BFRTools.rb",
74
+ "lib/bio/BIOExtensions.rb",
75
+ "lib/bio/PolyploidTools/ChromosomeArm.rb",
76
+ "lib/bio/PolyploidTools/ExonContainer.rb",
77
+ "lib/bio/PolyploidTools/Marker.rb",
78
+ "lib/bio/PolyploidTools/PrimerRegion.rb",
79
+ "lib/bio/PolyploidTools/SNP.rb",
80
+ "lib/bio/PolyploidTools/SNPSequence.rb",
81
+ "lib/bio/SAMToolsExtensions.rb",
82
+ "lib/bio/db/exonerate.rb",
83
+ "lib/bio/db/fastadb.rb",
84
+ "lib/bio/db/primer3.rb",
85
+ "lib/bioruby-polyploid-tools.rb",
86
+ "test/data/BS00068396_51.fa",
87
+ "test/data/BS00068396_51_contigs.aln",
88
+ "test/data/BS00068396_51_contigs.dnd",
89
+ "test/data/BS00068396_51_contigs.fa",
90
+ "test/data/BS00068396_51_exonerate.tab",
91
+ "test/data/BS00068396_51_genes.txt",
92
+ "test/data/LIB1716.bam",
93
+ "test/data/LIB1716.bam.bai",
94
+ "test/data/LIB1719.bam",
95
+ "test/data/LIB1719.bam.bai",
96
+ "test/data/LIB1721.bam",
97
+ "test/data/LIB1721.bam.bai",
98
+ "test/data/LIB1722.bam",
99
+ "test/data/LIB1722.bam.bai",
100
+ "test/data/S22380157.fa",
101
+ "test/data/S22380157.fa.fai",
102
+ "test/data/Test3Aspecific.csv",
103
+ "test/data/Test3Aspecific_contigs.fa",
104
+ "test/data/patological_cases5D.csv",
105
+ "test/data/short_primer_design_test.csv",
106
+ "test/data/test_primer3_error.csv",
107
+ "test/data/test_primer3_error_contigs.fa",
108
+ "test/test_bfr.rb",
109
+ "test/test_exon_container.rb",
110
+ "test/test_exonearate.rb",
111
+ "test/test_snp_parsing.rb"
112
+ ]
113
+ s.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
114
+ s.licenses = ["MIT"]
115
+ s.rubygems_version = "2.2.1"
116
+ s.summary = "Tool to work with polyploids, NGS and molecular biology"
117
+
118
+ if s.respond_to? :specification_version then
119
+ s.specification_version = 4
120
+
121
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
122
+ s.add_runtime_dependency(%q<bio>, ["= 1.4.2"])
123
+ s.add_runtime_dependency(%q<bio-samtools>, ["= 0.6.2"])
124
+ s.add_runtime_dependency(%q<rake>, [">= 0"])
125
+ s.add_runtime_dependency(%q<jeweler>, [">= 0"])
126
+ else
127
+ s.add_dependency(%q<bio>, ["= 1.4.2"])
128
+ s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
129
+ s.add_dependency(%q<rake>, [">= 0"])
130
+ s.add_dependency(%q<jeweler>, [">= 0"])
131
+ end
132
+ else
133
+ s.add_dependency(%q<bio>, ["= 1.4.2"])
134
+ s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
135
+ s.add_dependency(%q<rake>, [">= 0"])
136
+ s.add_dependency(%q<jeweler>, [">= 0"])
137
+ end
138
+ end
139
+
@@ -0,0 +1 @@
1
+ path_to_chromosomes="/Users/ramirezr/Documents/PHD/201305_Databases/iwgcs"
@@ -0,0 +1,128 @@
1
+ 0
2
+ 0
3
+ 0
4
+ 0
5
+ 0
6
+ 0
7
+ 0
8
+ 0
9
+ 0
10
+ 0
11
+ 0
12
+ 0
13
+ -500
14
+ 4700
15
+ -4100
16
+ -3800
17
+ 0
18
+ 0
19
+ 0
20
+ 0
21
+ 0
22
+ 0
23
+ 0
24
+ 0
25
+ -5900
26
+ -2600
27
+ -3200
28
+ -5200
29
+ 0
30
+ 0
31
+ 0
32
+ 0
33
+ 0
34
+ 0
35
+ 0
36
+ 0
37
+ -2100
38
+ -200
39
+ -3900
40
+ -4400
41
+ 0
42
+ 0
43
+ 0
44
+ 0
45
+ 0
46
+ 0
47
+ 0
48
+ 0
49
+ -700
50
+ 4400
51
+ -1600
52
+ 2900
53
+ 0
54
+ 0
55
+ 0
56
+ 0
57
+ 0
58
+ 0
59
+ 0
60
+ 0
61
+ 0
62
+ 0
63
+ 0
64
+ 0
65
+ 0
66
+ 0
67
+ 0
68
+ 0
69
+ 0
70
+ 0
71
+ 0
72
+ 0
73
+ 0
74
+ 0
75
+ 0
76
+ 0
77
+ -2900
78
+ -4100
79
+ -4200
80
+ -200
81
+ 0
82
+ 0
83
+ 0
84
+ 0
85
+ 0
86
+ 0
87
+ 0
88
+ 0
89
+ -3700
90
+ -4000
91
+ -3900
92
+ -4900
93
+ 0
94
+ 0
95
+ 0
96
+ 0
97
+ 0
98
+ 0
99
+ 0
100
+ 0
101
+ -6300
102
+ -4400
103
+ -5100
104
+ -4000
105
+ 0
106
+ 0
107
+ 0
108
+ 0
109
+ 0
110
+ 0
111
+ 0
112
+ 0
113
+ 200
114
+ 600
115
+ -1100
116
+ -6900
117
+ 0
118
+ 0
119
+ 0
120
+ 0
121
+ 0
122
+ 0
123
+ 0
124
+ 0
125
+ 0
126
+ 0
127
+ 0
128
+ 0
@@ -0,0 +1,128 @@
1
+ inf
2
+ inf
3
+ inf
4
+ inf
5
+ inf
6
+ inf
7
+ inf
8
+ inf
9
+ inf
10
+ inf
11
+ inf
12
+ inf
13
+ -1.1
14
+ 14.2
15
+ -13.1
16
+ -12.6
17
+ inf
18
+ inf
19
+ inf
20
+ inf
21
+ inf
22
+ inf
23
+ inf
24
+ inf
25
+ -16.5
26
+ -7.4
27
+ -10.4
28
+ -15
29
+ inf
30
+ inf
31
+ inf
32
+ inf
33
+ inf
34
+ inf
35
+ inf
36
+ inf
37
+ -3.9
38
+ -0.1
39
+ -11.2
40
+ -13.1
41
+ inf
42
+ inf
43
+ inf
44
+ inf
45
+ inf
46
+ inf
47
+ inf
48
+ inf
49
+ -0.8
50
+ 14.9
51
+ -3.6
52
+ 10.4
53
+ inf
54
+ inf
55
+ inf
56
+ inf
57
+ inf
58
+ inf
59
+ inf
60
+ inf
61
+ inf
62
+ inf
63
+ inf
64
+ inf
65
+ inf
66
+ inf
67
+ inf
68
+ inf
69
+ inf
70
+ inf
71
+ inf
72
+ inf
73
+ inf
74
+ inf
75
+ inf
76
+ inf
77
+ -7.6
78
+ -13
79
+ -15
80
+ -0.5
81
+ inf
82
+ inf
83
+ inf
84
+ inf
85
+ inf
86
+ inf
87
+ inf
88
+ inf
89
+ -10
90
+ -11.9
91
+ -10.9
92
+ -13.8
93
+ inf
94
+ inf
95
+ inf
96
+ inf
97
+ inf
98
+ inf
99
+ inf
100
+ inf
101
+ -17.1
102
+ -12.6
103
+ -14
104
+ -10.9
105
+ inf
106
+ inf
107
+ inf
108
+ inf
109
+ inf
110
+ inf
111
+ inf
112
+ inf
113
+ 2.3
114
+ 3.3
115
+ -1.6
116
+ -20
117
+ inf
118
+ inf
119
+ inf
120
+ inf
121
+ inf
122
+ inf
123
+ inf
124
+ inf
125
+ inf
126
+ inf
127
+ inf
128
+ inf