bio-polyploid-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bio'
4
+ require 'rubygems'
5
+ require 'pathname'
6
+ require 'bio-samtools'
7
+
8
+ require 'set'
9
+
10
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
11
+ $: << File.expand_path('.')
12
+ path=File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
13
+ $stderr.puts "Loading: #{path}"
14
+ require path
15
+
16
+
17
+
18
+ fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
19
+ fasta_db.load_fai_entries
20
+ bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
21
+ bam2 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
22
+
23
+
24
+ output_prefix = ARGV[3]
25
+
26
+ block_size=300
27
+
28
+ min_cov = ARGV[4].to_i ? ARGV[4].to_i : 10
29
+ chunk = ARGV[5].to_i
30
+ chunk_size = ARGV[6].to_i
31
+
32
+
33
+
34
+
35
+ main_table="#{output_prefix}_#{block_size}_#{min_cov}_table.#{chunk}.csv"
36
+
37
+ table_file = File.open(main_table, "w")
38
+ table_file.puts "gene\tlength\tsnps_1\tcalled_1\tsnps_per_#{block_size}_1\tsnps_2\tcalled_2\tsnps_per_#{block_size}_2\tsnps_tot\tsnps_per_1k_tot"
39
+
40
+ hist_1= Hash.new(0)
41
+ hist_2= Hash.new(0)
42
+
43
+ fasta_file = File.open("#{output_prefix}_#{min_cov}.#{chunk}.fa", "w")
44
+ i = -1
45
+ min = chunk * chunk_size
46
+ max = min + chunk_size
47
+
48
+ fasta_db.index.entries.each do | r |
49
+ i = i + 1
50
+ next if i < min or i >= max
51
+ #Np r.get_full_region
52
+ #container.process_region( { :region => r.get_full_region.to_s, :output_file => output_file } )
53
+ region=r.get_full_region
54
+
55
+
56
+ begin
57
+
58
+ cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
59
+ cons_2 = bam2.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
60
+ if cons_1 != cons_2
61
+
62
+ snps_1 = cons_1.count_ambiguities
63
+ snps_2 = cons_2.count_ambiguities
64
+
65
+ called_1 = cons_1.upper_case_count
66
+ called_2 = cons_2.upper_case_count
67
+
68
+ snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
69
+
70
+ snps_per_1k_1 = (block_size * snps_1.to_f ) / called_1
71
+ snps_per_1k_2 = (block_size * snps_2.to_f ) / called_2
72
+ snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
73
+
74
+ hist_1[snps_per_1k_1.to_i] += 1
75
+ hist_2[snps_per_1k_2.to_i] += 1
76
+
77
+ table_file.print "#{r.id}\t#{region.size}\t"
78
+ table_file.print "#{snps_1}\t#{called_1}\t#{snps_per_1k_1}\t"
79
+ table_file.print "#{snps_2}\t#{called_2}\t#{snps_per_1k_2}\t"
80
+ table_file.print "#{snps_tot}\t#{snps_per_1k_tot}\n"
81
+ fasta_file.puts ">#{r.id}_1"
82
+ fasta_file.puts "#{cons_1}"
83
+ fasta_file.puts ">#{r.id}_2"
84
+ fasta_file.puts "#{cons_2}"
85
+ end
86
+ rescue Exception => e
87
+ $stderr.puts "Unable to process #{region}: #{e.to_s}"
88
+ end
89
+ end
90
+ fasta_file.close
91
+ table_file.close
92
+
93
+ hist_table="#{output_prefix}_#{block_size}_#{min_cov}_hist.#{chunk}.csv"
94
+ hist_file = File.open(hist_table, "w")
95
+
96
+ all_keys = SortedSet.new(hist_1.keys)
97
+ all_keys.merge(hist_2.keys)
98
+ hist_file.puts "SNPs/#{block_size}\thist_1\thist_2\n"
99
+ all_keys.each do |k|
100
+ hist_file.puts "#{k}\t#{hist_1[k]}\t#{hist_2[k]}"
101
+ end
102
+
103
+ hist_file.close
104
+
105
+
106
+
@@ -0,0 +1,139 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: bio-polyploid-tools 0.1.0 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "bio-polyploid-tools"
9
+ s.version = "0.1.0"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
13
+ s.authors = ["Ricardo H. Ramirez-Gonzalez"]
14
+ s.date = "2014-03-31"
15
+ s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
16
+ s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
17
+ s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
18
+ s.extra_rdoc_files = [
19
+ "README"
20
+ ]
21
+ s.files = [
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "README",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "bin/bfr.rb",
28
+ "bin/count_variations.rb",
29
+ "bin/filter_blat_by_target_coverage.rb",
30
+ "bin/find_best_blat_hit.rb",
31
+ "bin/hexaploid_primers.rb",
32
+ "bin/homokaryot_primers.rb",
33
+ "bin/map_markers_to_contigs.rb",
34
+ "bin/markers_in_region.rb",
35
+ "bin/polymarker.rb",
36
+ "bin/snps_between_bams.rb",
37
+ "bio-polyploid-tools.gemspec",
38
+ "conf/defaults.rb",
39
+ "conf/primer3_config/dangle.dh",
40
+ "conf/primer3_config/dangle.ds",
41
+ "conf/primer3_config/interpretations/dangle_i.dh",
42
+ "conf/primer3_config/interpretations/dangle_i.ds",
43
+ "conf/primer3_config/interpretations/loops_i.dh",
44
+ "conf/primer3_config/interpretations/loops_i.ds",
45
+ "conf/primer3_config/interpretations/stack_i.dh",
46
+ "conf/primer3_config/interpretations/stack_i.ds",
47
+ "conf/primer3_config/interpretations/stackmm_i_mm.dh",
48
+ "conf/primer3_config/interpretations/stackmm_i_mm.ds",
49
+ "conf/primer3_config/interpretations/tetraloop_i.dh",
50
+ "conf/primer3_config/interpretations/tetraloop_i.ds",
51
+ "conf/primer3_config/interpretations/triloop_i.dh",
52
+ "conf/primer3_config/interpretations/triloop_i.ds",
53
+ "conf/primer3_config/interpretations/tstack2_i.dh",
54
+ "conf/primer3_config/interpretations/tstack2_i.ds",
55
+ "conf/primer3_config/interpretations/tstack_i.dh",
56
+ "conf/primer3_config/interpretations/tstack_i.ds",
57
+ "conf/primer3_config/interpretations/tstack_tm_inf_i.dh",
58
+ "conf/primer3_config/interpretations/tstack_tm_inf_i.ds",
59
+ "conf/primer3_config/loops.dh",
60
+ "conf/primer3_config/loops.ds",
61
+ "conf/primer3_config/stack.dh",
62
+ "conf/primer3_config/stack.ds",
63
+ "conf/primer3_config/stackmm.dh",
64
+ "conf/primer3_config/stackmm.ds",
65
+ "conf/primer3_config/tetraloop.dh",
66
+ "conf/primer3_config/tetraloop.ds",
67
+ "conf/primer3_config/triloop.dh",
68
+ "conf/primer3_config/triloop.ds",
69
+ "conf/primer3_config/tstack.dh",
70
+ "conf/primer3_config/tstack2.dh",
71
+ "conf/primer3_config/tstack2.ds",
72
+ "conf/primer3_config/tstack_tm_inf.ds",
73
+ "lib/bio/BFRTools.rb",
74
+ "lib/bio/BIOExtensions.rb",
75
+ "lib/bio/PolyploidTools/ChromosomeArm.rb",
76
+ "lib/bio/PolyploidTools/ExonContainer.rb",
77
+ "lib/bio/PolyploidTools/Marker.rb",
78
+ "lib/bio/PolyploidTools/PrimerRegion.rb",
79
+ "lib/bio/PolyploidTools/SNP.rb",
80
+ "lib/bio/PolyploidTools/SNPSequence.rb",
81
+ "lib/bio/SAMToolsExtensions.rb",
82
+ "lib/bio/db/exonerate.rb",
83
+ "lib/bio/db/fastadb.rb",
84
+ "lib/bio/db/primer3.rb",
85
+ "lib/bioruby-polyploid-tools.rb",
86
+ "test/data/BS00068396_51.fa",
87
+ "test/data/BS00068396_51_contigs.aln",
88
+ "test/data/BS00068396_51_contigs.dnd",
89
+ "test/data/BS00068396_51_contigs.fa",
90
+ "test/data/BS00068396_51_exonerate.tab",
91
+ "test/data/BS00068396_51_genes.txt",
92
+ "test/data/LIB1716.bam",
93
+ "test/data/LIB1716.bam.bai",
94
+ "test/data/LIB1719.bam",
95
+ "test/data/LIB1719.bam.bai",
96
+ "test/data/LIB1721.bam",
97
+ "test/data/LIB1721.bam.bai",
98
+ "test/data/LIB1722.bam",
99
+ "test/data/LIB1722.bam.bai",
100
+ "test/data/S22380157.fa",
101
+ "test/data/S22380157.fa.fai",
102
+ "test/data/Test3Aspecific.csv",
103
+ "test/data/Test3Aspecific_contigs.fa",
104
+ "test/data/patological_cases5D.csv",
105
+ "test/data/short_primer_design_test.csv",
106
+ "test/data/test_primer3_error.csv",
107
+ "test/data/test_primer3_error_contigs.fa",
108
+ "test/test_bfr.rb",
109
+ "test/test_exon_container.rb",
110
+ "test/test_exonearate.rb",
111
+ "test/test_snp_parsing.rb"
112
+ ]
113
+ s.homepage = "http://github.com/tgac/bioruby-polyploid-tools"
114
+ s.licenses = ["MIT"]
115
+ s.rubygems_version = "2.2.1"
116
+ s.summary = "Tool to work with polyploids, NGS and molecular biology"
117
+
118
+ if s.respond_to? :specification_version then
119
+ s.specification_version = 4
120
+
121
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
122
+ s.add_runtime_dependency(%q<bio>, ["= 1.4.2"])
123
+ s.add_runtime_dependency(%q<bio-samtools>, ["= 0.6.2"])
124
+ s.add_runtime_dependency(%q<rake>, [">= 0"])
125
+ s.add_runtime_dependency(%q<jeweler>, [">= 0"])
126
+ else
127
+ s.add_dependency(%q<bio>, ["= 1.4.2"])
128
+ s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
129
+ s.add_dependency(%q<rake>, [">= 0"])
130
+ s.add_dependency(%q<jeweler>, [">= 0"])
131
+ end
132
+ else
133
+ s.add_dependency(%q<bio>, ["= 1.4.2"])
134
+ s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
135
+ s.add_dependency(%q<rake>, [">= 0"])
136
+ s.add_dependency(%q<jeweler>, [">= 0"])
137
+ end
138
+ end
139
+
@@ -0,0 +1 @@
1
+ path_to_chromosomes="/Users/ramirezr/Documents/PHD/201305_Databases/iwgcs"
@@ -0,0 +1,128 @@
1
+ 0
2
+ 0
3
+ 0
4
+ 0
5
+ 0
6
+ 0
7
+ 0
8
+ 0
9
+ 0
10
+ 0
11
+ 0
12
+ 0
13
+ -500
14
+ 4700
15
+ -4100
16
+ -3800
17
+ 0
18
+ 0
19
+ 0
20
+ 0
21
+ 0
22
+ 0
23
+ 0
24
+ 0
25
+ -5900
26
+ -2600
27
+ -3200
28
+ -5200
29
+ 0
30
+ 0
31
+ 0
32
+ 0
33
+ 0
34
+ 0
35
+ 0
36
+ 0
37
+ -2100
38
+ -200
39
+ -3900
40
+ -4400
41
+ 0
42
+ 0
43
+ 0
44
+ 0
45
+ 0
46
+ 0
47
+ 0
48
+ 0
49
+ -700
50
+ 4400
51
+ -1600
52
+ 2900
53
+ 0
54
+ 0
55
+ 0
56
+ 0
57
+ 0
58
+ 0
59
+ 0
60
+ 0
61
+ 0
62
+ 0
63
+ 0
64
+ 0
65
+ 0
66
+ 0
67
+ 0
68
+ 0
69
+ 0
70
+ 0
71
+ 0
72
+ 0
73
+ 0
74
+ 0
75
+ 0
76
+ 0
77
+ -2900
78
+ -4100
79
+ -4200
80
+ -200
81
+ 0
82
+ 0
83
+ 0
84
+ 0
85
+ 0
86
+ 0
87
+ 0
88
+ 0
89
+ -3700
90
+ -4000
91
+ -3900
92
+ -4900
93
+ 0
94
+ 0
95
+ 0
96
+ 0
97
+ 0
98
+ 0
99
+ 0
100
+ 0
101
+ -6300
102
+ -4400
103
+ -5100
104
+ -4000
105
+ 0
106
+ 0
107
+ 0
108
+ 0
109
+ 0
110
+ 0
111
+ 0
112
+ 0
113
+ 200
114
+ 600
115
+ -1100
116
+ -6900
117
+ 0
118
+ 0
119
+ 0
120
+ 0
121
+ 0
122
+ 0
123
+ 0
124
+ 0
125
+ 0
126
+ 0
127
+ 0
128
+ 0
@@ -0,0 +1,128 @@
1
+ inf
2
+ inf
3
+ inf
4
+ inf
5
+ inf
6
+ inf
7
+ inf
8
+ inf
9
+ inf
10
+ inf
11
+ inf
12
+ inf
13
+ -1.1
14
+ 14.2
15
+ -13.1
16
+ -12.6
17
+ inf
18
+ inf
19
+ inf
20
+ inf
21
+ inf
22
+ inf
23
+ inf
24
+ inf
25
+ -16.5
26
+ -7.4
27
+ -10.4
28
+ -15
29
+ inf
30
+ inf
31
+ inf
32
+ inf
33
+ inf
34
+ inf
35
+ inf
36
+ inf
37
+ -3.9
38
+ -0.1
39
+ -11.2
40
+ -13.1
41
+ inf
42
+ inf
43
+ inf
44
+ inf
45
+ inf
46
+ inf
47
+ inf
48
+ inf
49
+ -0.8
50
+ 14.9
51
+ -3.6
52
+ 10.4
53
+ inf
54
+ inf
55
+ inf
56
+ inf
57
+ inf
58
+ inf
59
+ inf
60
+ inf
61
+ inf
62
+ inf
63
+ inf
64
+ inf
65
+ inf
66
+ inf
67
+ inf
68
+ inf
69
+ inf
70
+ inf
71
+ inf
72
+ inf
73
+ inf
74
+ inf
75
+ inf
76
+ inf
77
+ -7.6
78
+ -13
79
+ -15
80
+ -0.5
81
+ inf
82
+ inf
83
+ inf
84
+ inf
85
+ inf
86
+ inf
87
+ inf
88
+ inf
89
+ -10
90
+ -11.9
91
+ -10.9
92
+ -13.8
93
+ inf
94
+ inf
95
+ inf
96
+ inf
97
+ inf
98
+ inf
99
+ inf
100
+ inf
101
+ -17.1
102
+ -12.6
103
+ -14
104
+ -10.9
105
+ inf
106
+ inf
107
+ inf
108
+ inf
109
+ inf
110
+ inf
111
+ inf
112
+ inf
113
+ 2.3
114
+ 3.3
115
+ -1.6
116
+ -20
117
+ inf
118
+ inf
119
+ inf
120
+ inf
121
+ inf
122
+ inf
123
+ inf
124
+ inf
125
+ inf
126
+ inf
127
+ inf
128
+ inf