bio-gngm 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/.document +5 -0
  2. data/Gemfile +20 -0
  3. data/Gemfile.lock +33 -0
  4. data/LICENSE.txt +20 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +53 -0
  7. data/VERSION +1 -0
  8. data/bio-gngm.gemspec +173 -0
  9. data/doc/Bio.html +129 -0
  10. data/doc/Bio/DB.html +128 -0
  11. data/doc/Bio/DB/Pileup.html +316 -0
  12. data/doc/Bio/DB/Vcf.html +683 -0
  13. data/doc/Bio/Util.html +135 -0
  14. data/doc/Bio/Util/Gngm.html +1655 -0
  15. data/doc/LICENSE_txt.html +111 -0
  16. data/doc/_index.html +169 -0
  17. data/doc/class_list.html +47 -0
  18. data/doc/created.rid +4 -0
  19. data/doc/css/common.css +1 -0
  20. data/doc/css/full_list.css +55 -0
  21. data/doc/css/style.css +322 -0
  22. data/doc/doc/created.rid +0 -0
  23. data/doc/file_list.html +52 -0
  24. data/doc/frames.html +13 -0
  25. data/doc/images/add.png +0 -0
  26. data/doc/images/bands.png +0 -0
  27. data/doc/images/brick.png +0 -0
  28. data/doc/images/brick_link.png +0 -0
  29. data/doc/images/bug.png +0 -0
  30. data/doc/images/bullet_black.png +0 -0
  31. data/doc/images/bullet_toggle_minus.png +0 -0
  32. data/doc/images/bullet_toggle_plus.png +0 -0
  33. data/doc/images/date.png +0 -0
  34. data/doc/images/delete.png +0 -0
  35. data/doc/images/find.png +0 -0
  36. data/doc/images/loadingAnimation.gif +0 -0
  37. data/doc/images/macFFBgHack.png +0 -0
  38. data/doc/images/package.png +0 -0
  39. data/doc/images/page_green.png +0 -0
  40. data/doc/images/page_white_text.png +0 -0
  41. data/doc/images/page_white_width.png +0 -0
  42. data/doc/images/plugin.png +0 -0
  43. data/doc/images/ruby.png +0 -0
  44. data/doc/images/signal.png +0 -0
  45. data/doc/images/tag_blue.png +0 -0
  46. data/doc/images/tag_green.png +0 -0
  47. data/doc/images/threads.png +0 -0
  48. data/doc/images/transparent.png +0 -0
  49. data/doc/images/wrench.png +0 -0
  50. data/doc/images/wrench_orange.png +0 -0
  51. data/doc/images/zoom.png +0 -0
  52. data/doc/index.html +88 -0
  53. data/doc/js/app.js +205 -0
  54. data/doc/js/darkfish.js +153 -0
  55. data/doc/js/full_list.js +167 -0
  56. data/doc/js/jquery.js +18 -0
  57. data/doc/js/navigation.js +142 -0
  58. data/doc/js/search.js +94 -0
  59. data/doc/js/search_index.js +1 -0
  60. data/doc/js/searcher.js +228 -0
  61. data/doc/lib/bio-gngm_rb.html +103 -0
  62. data/doc/lib/bio/util/bio-gngm_rb.html +96 -0
  63. data/doc/method_list.html +382 -0
  64. data/doc/rdoc.css +543 -0
  65. data/doc/table_of_contents.html +161 -0
  66. data/examples/.DS_Store +0 -0
  67. data/examples/make_histograms.rb +40 -0
  68. data/examples/make_threads.rb +42 -0
  69. data/examples/make_threads_isize.rb +41 -0
  70. data/examples/use_indels.rb +36 -0
  71. data/lib/bio-gngm.rb +12 -0
  72. data/lib/bio/util/bio-gngm.rb +1029 -0
  73. data/scripts/get_subseq.rb +16 -0
  74. data/scripts/make_histograms_laerfyve.rb +83 -0
  75. data/scripts/make_histograms_laerfyve_stitched.rb +59 -0
  76. data/scripts/make_threads_isize_laerfyfe.rb +52 -0
  77. data/scripts/make_threads_unmapped_laerfyfe.rb +72 -0
  78. data/scripts/make_threads_unmapped_laerfyfe_pseudo.rb +56 -0
  79. data/scripts/make_threads_unmapped_simulation.rb +54 -0
  80. data/scripts/make_threads_unmapped_simulation_immediate_region.rb +59 -0
  81. data/scripts/optimise_freq_window_size.rb +82 -0
  82. data/stitched_contigs.zip +0 -0
  83. data/test/data/ids2.txt +1 -0
  84. data/test/data/sorted.bam +0 -0
  85. data/test/data/test +0 -0
  86. data/test/data/test.bam +0 -0
  87. data/test/data/test.fa +20 -0
  88. data/test/data/test.fai +0 -0
  89. data/test/data/test.sai +0 -0
  90. data/test/data/test.tam +10 -0
  91. data/test/data/test_chr.fasta +1000 -0
  92. data/test/data/test_chr.fasta.amb +2 -0
  93. data/test/data/test_chr.fasta.ann +3 -0
  94. data/test/data/test_chr.fasta.bwt +0 -0
  95. data/test/data/test_chr.fasta.fai +1 -0
  96. data/test/data/test_chr.fasta.pac +0 -0
  97. data/test/data/test_chr.fasta.rbwt +0 -0
  98. data/test/data/test_chr.fasta.rpac +0 -0
  99. data/test/data/test_chr.fasta.rsa +0 -0
  100. data/test/data/test_chr.fasta.sa +0 -0
  101. data/test/data/testu.bam +0 -0
  102. data/test/data/testu.bam.bai +0 -0
  103. data/test/helper.rb +18 -0
  104. data/test/test_bio-gngm.rb +126 -0
  105. metadata +276 -0
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # untitled
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-03-01.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ require 'bio'
10
+
11
+ file = Bio::FastaFormat.open(ARGV[0])
12
+ file.each do |entry|
13
+ section = entry.seq[ARGV[1].to_i..ARGV[2].to_i]
14
+ puts ">#{entry.entry_id}:#{ARGV[1]}..#{ARGV[2]}"
15
+ puts section
16
+ end
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_histograms
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
10
+ ### of the frequncy of discordant SNPs. Generates plots for each.
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'bio-gngm'
15
+ require 'bio-samtools'
16
+ require 'bio'
17
+ =begin
18
+ module Bio
19
+ class DB
20
+ class Sam
21
+ def each_reference
22
+ index_stats.each_pair do |k, v|
23
+ yield k, v[:length].to_i
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ =end
30
+
31
+ #ff = Bio::FlatFile.new(Bio::FastaFormat, "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
32
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
33
+ file.each do |entry|
34
+ next if entry.length < 10000
35
+ $stderr.puts "doing #{entry.entry_id} - #{entry.length}"
36
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
37
+ :format => :bam,
38
+ :samtools => {:q => 20, :Q => 50, :r => "#{entry.entry_id}:1-#{entry.length}"},
39
+ :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa"
40
+
41
+ )
42
+ g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
43
+ puts "found #{g.snp_positions.length} SNPs .."
44
+ begin
45
+ [1000, 2500, 5000, 10000, 25000, 50000].each do |bin_width|
46
+ next if bin_width > entry.length
47
+ file_name = "ler_contigs_#{entry.entry_id}_#{bin_width}.png"
48
+ g.frequency_histogram("#{file_name}",bin_width)
49
+ end
50
+ rescue Exception => e
51
+ puts "failed #{e}"
52
+ ensure
53
+ g.close
54
+ end
55
+ end
56
+ #sam = Bio::DB::Sam.new(:bam => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
57
+ # :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
58
+ #sam.open
59
+ #sam.fetch("Scaffold_2", 10000, 10500).each do |a|
60
+ # puts a.qname
61
+ #end
62
+
63
+ #sam.close
64
+ #sam.each_reference do |name, length|
65
+ # $stderr.puts "skipping..."
66
+ # next if length < 10000
67
+ # $stderr.puts "doing #{name}"
68
+ # g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
69
+ # :format => :bam,
70
+ # :samtools => {:q => 20, :Q => 50},
71
+ # :fasta => "/Users/macleand/Desktop/laerfyfe/Ler-1.SHORE.scaffolds2010-09-30.bp.fa"
72
+
73
+ # )
74
+ # g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
75
+ #puts g.snp_positions.length
76
+
77
+ # [10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
78
+ # file_name = "ler_contigs_#{name}_#{bin_width}.png"
79
+ # g.frequency_histogram("#{file_name}",bin_width)
80
+ # end
81
+ # g.close
82
+ #end
83
+ #sam.close
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_histograms
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
10
+ ### of the frequncy of discordant SNPs. Generates plots for each.
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'bio-gngm'
15
+ require 'bio-samtools'
16
+ require 'bio'
17
+ #!/usr/bin/env ruby
18
+ #
19
+ # make_histograms
20
+ #
21
+ # Created by Dan MacLean (TSL) on 2012-01-17.
22
+ # Copyright (c) . All rights reserved.
23
+ ###################################################
24
+
25
+ ### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
26
+ ### of the frequncy of discordant SNPs. Generates plots for each.
27
+
28
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
29
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
30
+ require 'bio-gngm'
31
+ require 'bio-samtools'
32
+ require 'bio'
33
+
34
+ length = 0
35
+ chr_name = ""
36
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
37
+ file.each do |entry|
38
+ length = entry.length
39
+ chr_name = entry.entry_id
40
+ end
41
+
42
+
43
+
44
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
45
+ :format => :bam,
46
+ :samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"},
47
+ :fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
48
+
49
+ )
50
+ g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
51
+ puts g.snp_positions.length
52
+
53
+ [10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
54
+ file_name = "stitched_contigs_snps_q_filt_#{bin_width}.png"
55
+ g.frequency_histogram("#{file_name}",bin_width)
56
+ end
57
+ g.close
58
+
59
+
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+ puts chr_name
26
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
27
+ :format => :bam,
28
+ :fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
29
+ :samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"
30
+ }
31
+ )
32
+
33
+ g.get_insert_size_frequency(:ref_window_size => 750, :ref_window_slide => 750, :isize => 184)
34
+ g.collect_threads
35
+ [0.25, 0.5, 1.0].each do |kernel_adjust|
36
+ [4, 9, 11].each do | k |
37
+ begin
38
+ g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.5, :expected_chd => 0.2)
39
+ filename = "isize_#{k}_#{kernel_adjust}_all_threads.png"
40
+ g.draw_threads(filename, :draw_legend => "isize_#{k}_#{kernel_adjust}_legend.png")
41
+ filename = "isize_#{k}_#{kernel_adjust}_bands.png"
42
+ g.draw_bands(filename)
43
+ filename = "isize_#{k}_#{kernel_adjust}_signal.png"
44
+ g.draw_signal(filename)
45
+ filename = "isize_#{k}_#{kernel_adjust}_hits.png"
46
+ g.draw_hit_count(filename)
47
+ rescue Exception => e
48
+ puts "failed on #{k} #{kernel_adjust}"
49
+ puts e.message, e.backtrace
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+ ctrl_thread_values = []
26
+ expected_thread_values = []
27
+ interval_width = 10000000
28
+ #puts chr_name
29
+ (1..length).step(interval_width) do |start|
30
+ stop = start + interval_width
31
+ region = "#{chr_name}:#{start}-#{stop}"
32
+
33
+ puts "analyzing - #{region}"
34
+
35
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
36
+ :format => :bam,
37
+ :fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
38
+ :samtools => {:q => 20, :Q => 50, :r => region}
39
+ )
40
+
41
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
42
+ g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
43
+ #puts g.threads
44
+ [0.5].each do |kernel_adjust|
45
+ [4].each do | k |
46
+ begin
47
+ g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.0, :expected_chd => 0.4, :pseudo => false)
48
+ #filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
49
+ #g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
50
+ ctrl_threads = g.get_band(0.0)
51
+ expected_threads = g.get_band(0.4)
52
+ ctrl_thread_values += g.threads.select {|x| ctrl_threads.include?(x.first) }.last
53
+ expected_thread_values += g.threads.select {|x| expected_threads.include?(x.first) }.last
54
+ filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_bands.png"
55
+ g.draw_bands(filename)
56
+ #filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_signal.png"
57
+ #g.draw_signal(filename)
58
+ #filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_hits.png"
59
+ #g.draw_hit_count(filename)
60
+
61
+ rescue Exception => e
62
+ puts "failed on #{k} #{kernel_adjust}"
63
+ puts e.message, e.backtrace
64
+ end
65
+ end
66
+ end
67
+ g.close
68
+
69
+
70
+ end
71
+ File.open("ctrl_thread.txt", 'w') {|f| f.write(ctrl_thread_values.join("\n")) }
72
+ File.open("epxected_thread.txt", 'w') {|f| f.write(expected_thread_values.join("\n")) }
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+
24
+
25
+ interval_width = 5000000
26
+ puts chr_name
27
+ #(1..length).step(interval_width) do |start|
28
+ #stop = start + interval_width
29
+ region = "#{chr_name}:#{1}-#{length}"
30
+ file = chr_name
31
+
32
+ puts "analyzing - #{region}"
33
+
34
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
35
+ :format => :bam,
36
+ :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa",
37
+ :samtools => {:q => 20, :Q => 50, :r => region
38
+ }
39
+ )
40
+
41
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
42
+ g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
43
+
44
+ begin
45
+ g.calculate_clusters(:pseudo => true)
46
+ filename = "unmapped_#{file}_all_threads.png"
47
+ g.draw_threads(filename, :draw_legend => "unmapped_#{file}_legend.png")
48
+ ##no bands or signal to draw without clustering...
49
+ filename = "unmapped_#{file}_hits.png"
50
+ g.draw_hit_count(filename)
51
+ rescue Exception => e
52
+ puts e.message, e.backtrace
53
+ end
54
+
55
+ #end
56
+ end
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+
26
+ region = "gi|57116681|ref|NC_000962.2|:1-#{length}"
27
+
28
+ puts "analyzing - #{region}"
29
+
30
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
31
+ :format => :bam,
32
+ :fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
33
+ :samtools => {:q => 20, :Q => 50, :r => region
34
+ }
35
+ )
36
+
37
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
38
+ g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
39
+ puts g.threads
40
+
41
+ begin
42
+ g.calculate_clusters(:pseudo => true)
43
+ filename = "sim_2_#{region}_all_threads.png"
44
+ g.draw_threads(filename, :draw_legend => "sim_#{region}_legend.png")
45
+ ##no bands or signal to draw without clustering...
46
+ filename = "sim_#{region}_hits.png"
47
+ g.draw_hit_count(filename)
48
+ rescue Exception => e
49
+ puts e.message, e.backtrace
50
+ end
51
+
52
+
53
+
54
+
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # make_bands
4
+ #
5
+ # Created by Dan MacLean (TSL) on 2012-01-17.
6
+ # Copyright (c) . All rights reserved.
7
+ ###################################################
8
+
9
+ ### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
10
+ ### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
11
+
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-gngm'
16
+ require 'bio'
17
+ length = 0
18
+ chr_name = ""
19
+ file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
20
+ file.each do |entry|
21
+ length = entry.length
22
+ chr_name = entry.entry_id
23
+ end
24
+
25
+
26
+ region = "gi|57116681|ref|NC_000962.2|:1010000-1020000"
27
+
28
+ puts "analyzing - #{region}"
29
+
30
+ g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
31
+ :format => :bam,
32
+ :fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
33
+ :samtools => {:q => 20, :Q => 50, :r => region
34
+ }
35
+ )
36
+
37
+ g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
38
+ g.collect_threads
39
+ puts g.snp_positions
40
+ =begin
41
+ [0.25, 0.5, 1.0].each do |kernel_adjust|
42
+ [4, 9, 11].each do | k |
43
+ begin
44
+ g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 1.0, :expected_chd => 0.5, :pseudo => false)
45
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
46
+ g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
47
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_bands.png"
48
+ g.draw_bands(filename)
49
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_signal.png"
50
+ g.draw_signal(filename)
51
+ filename = "sim_#{region}_#{k}_#{kernel_adjust}_hits.png"
52
+ g.draw_hit_count(filename)
53
+ rescue Exception => e
54
+ puts "failed on #{k} #{kernel_adjust}"
55
+ puts e.message, e.backtrace
56
+ end
57
+ end
58
+ end
59
+ =end