bio-gngm 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +33 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +53 -0
  7. data/VERSION +1 -0
  8. data/bio-gngm.gemspec +173 -0
  9. data/doc/Bio.html +129 -0
  10. data/doc/Bio/DB.html +128 -0
  11. data/doc/Bio/DB/Pileup.html +316 -0
  12. data/doc/Bio/DB/Vcf.html +683 -0
  13. data/doc/Bio/Util.html +135 -0
  14. data/doc/Bio/Util/Gngm.html +1655 -0
  15. data/doc/LICENSE_txt.html +111 -0
  16. data/doc/_index.html +169 -0
  17. data/doc/class_list.html +47 -0
  18. data/doc/created.rid +4 -0
  19. data/doc/css/common.css +1 -0
  20. data/doc/css/full_list.css +55 -0
  21. data/doc/css/style.css +322 -0
  22. data/doc/doc/created.rid +0 -0
  23. data/doc/file_list.html +52 -0
  24. data/doc/frames.html +13 -0
  25. data/doc/images/add.png +0 -0
  26. data/doc/images/bands.png +0 -0
  27. data/doc/images/brick.png +0 -0
  28. data/doc/images/brick_link.png +0 -0
  29. data/doc/images/bug.png +0 -0
  30. data/doc/images/bullet_black.png +0 -0
  31. data/doc/images/bullet_toggle_minus.png +0 -0
  32. data/doc/images/bullet_toggle_plus.png +0 -0
  33. data/doc/images/date.png +0 -0
  34. data/doc/images/delete.png +0 -0
  35. data/doc/images/find.png +0 -0
  36. data/doc/images/loadingAnimation.gif +0 -0
  37. data/doc/images/macFFBgHack.png +0 -0
  38. data/doc/images/package.png +0 -0
  39. data/doc/images/page_green.png +0 -0
  40. data/doc/images/page_white_text.png +0 -0
  41. data/doc/images/page_white_width.png +0 -0
  42. data/doc/images/plugin.png +0 -0
  43. data/doc/images/ruby.png +0 -0
  44. data/doc/images/signal.png +0 -0
  45. data/doc/images/tag_blue.png +0 -0
  46. data/doc/images/tag_green.png +0 -0
  47. data/doc/images/threads.png +0 -0
  48. data/doc/images/transparent.png +0 -0
  49. data/doc/images/wrench.png +0 -0
  50. data/doc/images/wrench_orange.png +0 -0
  51. data/doc/images/zoom.png +0 -0
  52. data/doc/index.html +88 -0
  53. data/doc/js/app.js +205 -0
  54. data/doc/js/darkfish.js +153 -0
  55. data/doc/js/full_list.js +167 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +94 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/lib/bio-gngm_rb.html +103 -0
  62. data/doc/lib/bio/util/bio-gngm_rb.html +96 -0
  63. data/doc/method_list.html +382 -0
  64. data/doc/rdoc.css +543 -0
  65. data/doc/table_of_contents.html +161 -0
  66. data/examples/.DS_Store +0 -0
  67. data/examples/make_histograms.rb +40 -0
  68. data/examples/make_threads.rb +42 -0
  69. data/examples/make_threads_isize.rb +41 -0
  70. data/examples/use_indels.rb +36 -0
  71. data/lib/bio-gngm.rb +12 -0
  72. data/lib/bio/util/bio-gngm.rb +1029 -0
  73. data/scripts/get_subseq.rb +16 -0
  74. data/scripts/make_histograms_laerfyve.rb +83 -0
  75. data/scripts/make_histograms_laerfyve_stitched.rb +59 -0
  76. data/scripts/make_threads_isize_laerfyfe.rb +52 -0
  77. data/scripts/make_threads_unmapped_laerfyfe.rb +72 -0
  78. data/scripts/make_threads_unmapped_laerfyfe_pseudo.rb +56 -0
  79. data/scripts/make_threads_unmapped_simulation.rb +54 -0
  80. data/scripts/make_threads_unmapped_simulation_immediate_region.rb +59 -0
  81. data/scripts/optimise_freq_window_size.rb +82 -0
  82. data/stitched_contigs.zip +0 -0
  83. data/test/data/ids2.txt +1 -0
  84. data/test/data/sorted.bam +0 -0
  85. data/test/data/test +0 -0
  86. data/test/data/test.bam +0 -0
  87. data/test/data/test.fa +20 -0
  88. data/test/data/test.fai +0 -0
  89. data/test/data/test.sai +0 -0
  90. data/test/data/test.tam +10 -0
  91. data/test/data/test_chr.fasta +1000 -0
  92. data/test/data/test_chr.fasta.amb +2 -0
  93. data/test/data/test_chr.fasta.ann +3 -0
  94. data/test/data/test_chr.fasta.bwt +0 -0
  95. data/test/data/test_chr.fasta.fai +1 -0
  96. data/test/data/test_chr.fasta.pac +0 -0
  97. data/test/data/test_chr.fasta.rbwt +0 -0
  98. data/test/data/test_chr.fasta.rpac +0 -0
  99. data/test/data/test_chr.fasta.rsa +0 -0
  100. data/test/data/test_chr.fasta.sa +0 -0
  101. data/test/data/testu.bam +0 -0
  102. data/test/data/testu.bam.bai +0 -0
  103. data/test/helper.rb +18 -0
  104. data/test/test_bio-gngm.rb +126 -0
  105. metadata +276 -0
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # untitled
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-03-01.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ require 'bio'
10
+
11
+ file = Bio::FastaFormat.open(ARGV[0])
12
+ file.each do |entry|
13
+ section = entry.seq[ARGV[1].to_i..ARGV[2].to_i]
14
+ puts ">#{entry.entry_id}:#{ARGV[1]}..#{ARGV[2]}"
15
+ puts section
16
+ end
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_histograms
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
10
+ ### of the frequncy of discordant SNPs. Generates plots for each.
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'bio-gngm'
15
+ require 'bio-samtools'
16
+ require 'bio'
17
+ =begin
18
+ module Bio
19
+ class DB
20
+ class Sam
21
+ def each_reference
22
+ index_stats.each_pair do |k, v|
23
+ yield k, v[:length].to_i
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ =end
30
+
31
+ #ff = Bio::FlatFile.new(Bio::FastaFormat, "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
32
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
33
+ file.each do |entry|
34
+ next if entry.length < 10000
35
+ $stderr.puts "doing #{entry.entry_id} - #{entry.length}"
36
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
37
+ :format => :bam,
38
+ :samtools => {:q => 20, :Q => 50, :r => "#{entry.entry_id}:1-#{entry.length}"},
39
+ :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa"
40
+
41
+ )
42
+ g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
43
+ puts "found #{g.snp_positions.length} SNPs .."
44
+ begin
45
+ [1000, 2500, 5000, 10000, 25000, 50000].each do |bin_width|
46
+ next if bin_width > entry.length
47
+ file_name = "ler_contigs_#{entry.entry_id}_#{bin_width}.png"
48
+ g.frequency_histogram("#{file_name}",bin_width)
49
+ end
50
+ rescue Exception => e
51
+ puts "failed #{e}"
52
+ ensure
53
+ g.close
54
+ end
55
+ end
56
+ #sam = Bio::DB::Sam.new(:bam => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
57
+ # :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
58
+ #sam.open
59
+ #sam.fetch("Scaffold_2", 10000, 10500).each do |a|
60
+ # puts a.qname
61
+ #end
62
+
63
+ #sam.close
64
+ #sam.each_reference do |name, length|
65
+ # $stderr.puts "skipping..."
66
+ # next if length < 10000
67
+ # $stderr.puts "doing #{name}"
68
+ # g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
69
+ # :format => :bam,
70
+ # :samtools => {:q => 20, :Q => 50},
71
+ # :fasta => "/Users/macleand/Desktop/laerfyfe/Ler-1.SHORE.scaffolds2010-09-30.bp.fa"
72
+
73
+ # )
74
+ # g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
75
+ #puts g.snp_positions.length
76
+
77
+ # [10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
78
+ # file_name = "ler_contigs_#{name}_#{bin_width}.png"
79
+ # g.frequency_histogram("#{file_name}",bin_width)
80
+ # end
81
+ # g.close
82
+ #end
83
+ #sam.close
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_histograms
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
10
+ ### of the frequncy of discordant SNPs. Generates plots for each.
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'bio-gngm'
15
+ require 'bio-samtools'
16
+ require 'bio'
17
+ #!/usr/bin/env ruby
18
+ #
19
+ # make_histograms
20
+ #
21
+ # Created by Dan MacLean (TSL) on 2012-01-17.
22
+ # Copyright (c) . All rights reserved.
23
+ ###################################################
24
+
25
+ ### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
26
+ ### of the frequncy of discordant SNPs. Generates plots for each.
27
+
28
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
29
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
30
+ require 'bio-gngm'
31
+ require 'bio-samtools'
32
+ require 'bio'
33
+
34
+ length = 0
35
+ chr_name = ""
36
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
37
+ file.each do |entry|
38
+ length = entry.length
39
+ chr_name = entry.entry_id
40
+ end
41
+
42
+
43
+
44
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
45
+ :format => :bam,
46
+ :samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"},
47
+ :fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
48
+
49
+ )
50
+ g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
51
+ puts g.snp_positions.length
52
+
53
+ [10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
54
+ file_name = "stitched_contigs_snps_q_filt_#{bin_width}.png"
55
+ g.frequency_histogram("#{file_name}",bin_width)
56
+ end
57
+ g.close
58
+
59
+
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+ puts chr_name
26
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
27
+ :format => :bam,
28
+ :fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
29
+ :samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"
30
+ }
31
+ )
32
+
33
+ g.get_insert_size_frequency(:ref_window_size => 750, :ref_window_slide => 750, :isize => 184)
34
+ g.collect_threads
35
+ [0.25, 0.5, 1.0].each do |kernel_adjust|
36
+ [4, 9, 11].each do | k |
37
+ begin
38
+ g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.5, :expected_chd => 0.2)
39
+ filename = "isize_#{k}_#{kernel_adjust}_all_threads.png"
40
+ g.draw_threads(filename, :draw_legend => "isize_#{k}_#{kernel_adjust}_legend.png")
41
+ filename = "isize_#{k}_#{kernel_adjust}_bands.png"
42
+ g.draw_bands(filename)
43
+ filename = "isize_#{k}_#{kernel_adjust}_signal.png"
44
+ g.draw_signal(filename)
45
+ filename = "isize_#{k}_#{kernel_adjust}_hits.png"
46
+ g.draw_hit_count(filename)
47
+ rescue Exception => e
48
+ puts "failed on #{k} #{kernel_adjust}"
49
+ puts e.message, e.backtrace
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+ ctrl_thread_values = []
26
+ expected_thread_values = []
27
+ interval_width = 10000000
28
+ #puts chr_name
29
+ (1..length).step(interval_width) do |start|
30
+ stop = start + interval_width
31
+ region = "#{chr_name}:#{start}-#{stop}"
32
+
33
+ puts "analyzing - #{region}"
34
+
35
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
36
+ :format => :bam,
37
+ :fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
38
+ :samtools => {:q => 20, :Q => 50, :r => region}
39
+ )
40
+
41
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
42
+ g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
43
+ #puts g.threads
44
+ [0.5].each do |kernel_adjust|
45
+ [4].each do | k |
46
+ begin
47
+ g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.0, :expected_chd => 0.4, :pseudo => false)
48
+ #filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
49
+ #g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
50
+ ctrl_threads = g.get_band(0.0)
51
+ expected_threads = g.get_band(0.4)
52
+ ctrl_thread_values += g.threads.select {|x| ctrl_threads.include?(x.first) }.last
53
+ expected_thread_values += g.threads.select {|x| expected_threads.include?(x.first) }.last
54
+ filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_bands.png"
55
+ g.draw_bands(filename)
56
+ #filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_signal.png"
57
+ #g.draw_signal(filename)
58
+ #filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_hits.png"
59
+ #g.draw_hit_count(filename)
60
+
61
+ rescue Exception => e
62
+ puts "failed on #{k} #{kernel_adjust}"
63
+ puts e.message, e.backtrace
64
+ end
65
+ end
66
+ end
67
+ g.close
68
+
69
+
70
+ end
71
+ File.open("ctrl_thread.txt", 'w') {|f| f.write(ctrl_thread_values.join("\n")) }
72
+ File.open("epxected_thread.txt", 'w') {|f| f.write(expected_thread_values.join("\n")) }
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+
24
+
25
+ interval_width = 5000000
26
+ puts chr_name
27
+ #(1..length).step(interval_width) do |start|
28
+ #stop = start + interval_width
29
+ region = "#{chr_name}:#{1}-#{length}"
30
+ file = chr_name
31
+
32
+ puts "analyzing - #{region}"
33
+
34
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
35
+ :format => :bam,
36
+ :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa",
37
+ :samtools => {:q => 20, :Q => 50, :r => region
38
+ }
39
+ )
40
+
41
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
42
+ g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
43
+
44
+ begin
45
+ g.calculate_clusters(:pseudo => true)
46
+ filename = "unmapped_#{file}_all_threads.png"
47
+ g.draw_threads(filename, :draw_legend => "unmapped_#{file}_legend.png")
48
+ ##no bands or signal to draw without clustering...
49
+ filename = "unmapped_#{file}_hits.png"
50
+ g.draw_hit_count(filename)
51
+ rescue Exception => e
52
+ puts e.message, e.backtrace
53
+ end
54
+
55
+ #end
56
+ end
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+
26
+ region = "gi|57116681|ref|NC_000962.2|:1-#{length}"
27
+
28
+ puts "analyzing - #{region}"
29
+
30
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
31
+ :format => :bam,
32
+ :fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
33
+ :samtools => {:q => 20, :Q => 50, :r => region
34
+ }
35
+ )
36
+
37
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
38
+ g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
39
+ puts g.threads
40
+
41
+ begin
42
+ g.calculate_clusters(:pseudo => true)
43
+ filename = "sim_2_#{region}_all_threads.png"
44
+ g.draw_threads(filename, :draw_legend => "sim_#{region}_legend.png")
45
+ ##no bands or signal to draw without clustering...
46
+ filename = "sim_#{region}_hits.png"
47
+ g.draw_hit_count(filename)
48
+ rescue Exception => e
49
+ puts e.message, e.backtrace
50
+ end
51
+
52
+
53
+
54
+
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+
26
+ region = "gi|57116681|ref|NC_000962.2|:1010000-1020000"
27
+
28
+ puts "analyzing - #{region}"
29
+
30
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
31
+ :format => :bam,
32
+ :fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
33
+ :samtools => {:q => 20, :Q => 50, :r => region
34
+ }
35
+ )
36
+
37
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
38
+ g.collect_threads
39
+ puts g.snp_positions
40
+ =begin
41
+ [0.25, 0.5, 1.0].each do |kernel_adjust|
42
+ [4, 9, 11].each do | k |
43
+ begin
44
+ g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 1.0, :expected_chd => 0.5, :pseudo => false)
45
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
46
+ g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
47
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_bands.png"
48
+ g.draw_bands(filename)
49
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_signal.png"
50
+ g.draw_signal(filename)
51
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_hits.png"
52
+ g.draw_hit_count(filename)
53
+ rescue Exception => e
54
+ puts "failed on #{k} #{kernel_adjust}"
55
+ puts e.message, e.backtrace
56
+ end
57
+ end
58
+ end
59
+ =end