bio-gngm 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +33 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-gngm.gemspec +173 -0
- data/doc/Bio.html +129 -0
- data/doc/Bio/DB.html +128 -0
- data/doc/Bio/DB/Pileup.html +316 -0
- data/doc/Bio/DB/Vcf.html +683 -0
- data/doc/Bio/Util.html +135 -0
- data/doc/Bio/Util/Gngm.html +1655 -0
- data/doc/LICENSE_txt.html +111 -0
- data/doc/_index.html +169 -0
- data/doc/class_list.html +47 -0
- data/doc/created.rid +4 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +55 -0
- data/doc/css/style.css +322 -0
- data/doc/doc/created.rid +0 -0
- data/doc/file_list.html +52 -0
- data/doc/frames.html +13 -0
- data/doc/images/add.png +0 -0
- data/doc/images/bands.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/signal.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/threads.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +88 -0
- data/doc/js/app.js +205 -0
- data/doc/js/darkfish.js +153 -0
- data/doc/js/full_list.js +167 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/lib/bio-gngm_rb.html +103 -0
- data/doc/lib/bio/util/bio-gngm_rb.html +96 -0
- data/doc/method_list.html +382 -0
- data/doc/rdoc.css +543 -0
- data/doc/table_of_contents.html +161 -0
- data/examples/.DS_Store +0 -0
- data/examples/make_histograms.rb +40 -0
- data/examples/make_threads.rb +42 -0
- data/examples/make_threads_isize.rb +41 -0
- data/examples/use_indels.rb +36 -0
- data/lib/bio-gngm.rb +12 -0
- data/lib/bio/util/bio-gngm.rb +1029 -0
- data/scripts/get_subseq.rb +16 -0
- data/scripts/make_histograms_laerfyve.rb +83 -0
- data/scripts/make_histograms_laerfyve_stitched.rb +59 -0
- data/scripts/make_threads_isize_laerfyfe.rb +52 -0
- data/scripts/make_threads_unmapped_laerfyfe.rb +72 -0
- data/scripts/make_threads_unmapped_laerfyfe_pseudo.rb +56 -0
- data/scripts/make_threads_unmapped_simulation.rb +54 -0
- data/scripts/make_threads_unmapped_simulation_immediate_region.rb +59 -0
- data/scripts/optimise_freq_window_size.rb +82 -0
- data/stitched_contigs.zip +0 -0
- data/test/data/ids2.txt +1 -0
- data/test/data/sorted.bam +0 -0
- data/test/data/test +0 -0
- data/test/data/test.bam +0 -0
- data/test/data/test.fa +20 -0
- data/test/data/test.fai +0 -0
- data/test/data/test.sai +0 -0
- data/test/data/test.tam +10 -0
- data/test/data/test_chr.fasta +1000 -0
- data/test/data/test_chr.fasta.amb +2 -0
- data/test/data/test_chr.fasta.ann +3 -0
- data/test/data/test_chr.fasta.bwt +0 -0
- data/test/data/test_chr.fasta.fai +1 -0
- data/test/data/test_chr.fasta.pac +0 -0
- data/test/data/test_chr.fasta.rbwt +0 -0
- data/test/data/test_chr.fasta.rpac +0 -0
- data/test/data/test_chr.fasta.rsa +0 -0
- data/test/data/test_chr.fasta.sa +0 -0
- data/test/data/testu.bam +0 -0
- data/test/data/testu.bam.bai +0 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-gngm.rb +126 -0
- metadata +276 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# untitled
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-03-01.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
require 'bio'
|
10
|
+
|
11
|
+
file = Bio::FastaFormat.open(ARGV[0])
|
12
|
+
file.each do |entry|
|
13
|
+
section = entry.seq[ARGV[1].to_i..ARGV[2].to_i]
|
14
|
+
puts ">#{entry.entry_id}:#{ARGV[1]}..#{ARGV[2]}"
|
15
|
+
puts section
|
16
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_histograms
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
|
10
|
+
### of the frequncy of discordant SNPs. Generates plots for each.
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'bio-gngm'
|
15
|
+
require 'bio-samtools'
|
16
|
+
require 'bio'
|
17
|
+
=begin
|
18
|
+
module Bio
|
19
|
+
class DB
|
20
|
+
class Sam
|
21
|
+
def each_reference
|
22
|
+
index_stats.each_pair do |k, v|
|
23
|
+
yield k, v[:length].to_i
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
=end
|
30
|
+
|
31
|
+
#ff = Bio::FlatFile.new(Bio::FastaFormat, "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
32
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
33
|
+
file.each do |entry|
|
34
|
+
next if entry.length < 10000
|
35
|
+
$stderr.puts "doing #{entry.entry_id} - #{entry.length}"
|
36
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
37
|
+
:format => :bam,
|
38
|
+
:samtools => {:q => 20, :Q => 50, :r => "#{entry.entry_id}:1-#{entry.length}"},
|
39
|
+
:fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa"
|
40
|
+
|
41
|
+
)
|
42
|
+
g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
|
43
|
+
puts "found #{g.snp_positions.length} SNPs .."
|
44
|
+
begin
|
45
|
+
[1000, 2500, 5000, 10000, 25000, 50000].each do |bin_width|
|
46
|
+
next if bin_width > entry.length
|
47
|
+
file_name = "ler_contigs_#{entry.entry_id}_#{bin_width}.png"
|
48
|
+
g.frequency_histogram("#{file_name}",bin_width)
|
49
|
+
end
|
50
|
+
rescue Exception => e
|
51
|
+
puts "failed #{e}"
|
52
|
+
ensure
|
53
|
+
g.close
|
54
|
+
end
|
55
|
+
end
|
56
|
+
#sam = Bio::DB::Sam.new(:bam => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
57
|
+
# :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
58
|
+
#sam.open
|
59
|
+
#sam.fetch("Scaffold_2", 10000, 10500).each do |a|
|
60
|
+
# puts a.qname
|
61
|
+
#end
|
62
|
+
|
63
|
+
#sam.close
|
64
|
+
#sam.each_reference do |name, length|
|
65
|
+
# $stderr.puts "skipping..."
|
66
|
+
# next if length < 10000
|
67
|
+
# $stderr.puts "doing #{name}"
|
68
|
+
# g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
69
|
+
# :format => :bam,
|
70
|
+
# :samtools => {:q => 20, :Q => 50},
|
71
|
+
# :fasta => "/Users/macleand/Desktop/laerfyfe/Ler-1.SHORE.scaffolds2010-09-30.bp.fa"
|
72
|
+
|
73
|
+
# )
|
74
|
+
# g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
|
75
|
+
#puts g.snp_positions.length
|
76
|
+
|
77
|
+
# [10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
|
78
|
+
# file_name = "ler_contigs_#{name}_#{bin_width}.png"
|
79
|
+
# g.frequency_histogram("#{file_name}",bin_width)
|
80
|
+
# end
|
81
|
+
# g.close
|
82
|
+
#end
|
83
|
+
#sam.close
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_histograms
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
|
10
|
+
### of the frequncy of discordant SNPs. Generates plots for each.
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'bio-gngm'
|
15
|
+
require 'bio-samtools'
|
16
|
+
require 'bio'
|
17
|
+
#!/usr/bin/env ruby
|
18
|
+
#
|
19
|
+
# make_histograms
|
20
|
+
#
|
21
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
22
|
+
# Copyright (c) . All rights reserved.
|
23
|
+
###################################################
|
24
|
+
|
25
|
+
### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
|
26
|
+
### of the frequncy of discordant SNPs. Generates plots for each.
|
27
|
+
|
28
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
29
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
30
|
+
require 'bio-gngm'
|
31
|
+
require 'bio-samtools'
|
32
|
+
require 'bio'
|
33
|
+
|
34
|
+
length = 0
|
35
|
+
chr_name = ""
|
36
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
|
37
|
+
file.each do |entry|
|
38
|
+
length = entry.length
|
39
|
+
chr_name = entry.entry_id
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
|
45
|
+
:format => :bam,
|
46
|
+
:samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"},
|
47
|
+
:fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
|
48
|
+
|
49
|
+
)
|
50
|
+
g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
|
51
|
+
puts g.snp_positions.length
|
52
|
+
|
53
|
+
[10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
|
54
|
+
file_name = "stitched_contigs_snps_q_filt_#{bin_width}.png"
|
55
|
+
g.frequency_histogram("#{file_name}",bin_width)
|
56
|
+
end
|
57
|
+
g.close
|
58
|
+
|
59
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
puts chr_name
|
26
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
|
27
|
+
:format => :bam,
|
28
|
+
:fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
|
29
|
+
:samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"
|
30
|
+
}
|
31
|
+
)
|
32
|
+
|
33
|
+
g.get_insert_size_frequency(:ref_window_size => 750, :ref_window_slide => 750, :isize => 184)
|
34
|
+
g.collect_threads
|
35
|
+
[0.25, 0.5, 1.0].each do |kernel_adjust|
|
36
|
+
[4, 9, 11].each do | k |
|
37
|
+
begin
|
38
|
+
g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.5, :expected_chd => 0.2)
|
39
|
+
filename = "isize_#{k}_#{kernel_adjust}_all_threads.png"
|
40
|
+
g.draw_threads(filename, :draw_legend => "isize_#{k}_#{kernel_adjust}_legend.png")
|
41
|
+
filename = "isize_#{k}_#{kernel_adjust}_bands.png"
|
42
|
+
g.draw_bands(filename)
|
43
|
+
filename = "isize_#{k}_#{kernel_adjust}_signal.png"
|
44
|
+
g.draw_signal(filename)
|
45
|
+
filename = "isize_#{k}_#{kernel_adjust}_hits.png"
|
46
|
+
g.draw_hit_count(filename)
|
47
|
+
rescue Exception => e
|
48
|
+
puts "failed on #{k} #{kernel_adjust}"
|
49
|
+
puts e.message, e.backtrace
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
ctrl_thread_values = []
|
26
|
+
expected_thread_values = []
|
27
|
+
interval_width = 10000000
|
28
|
+
#puts chr_name
|
29
|
+
(1..length).step(interval_width) do |start|
|
30
|
+
stop = start + interval_width
|
31
|
+
region = "#{chr_name}:#{start}-#{stop}"
|
32
|
+
|
33
|
+
puts "analyzing - #{region}"
|
34
|
+
|
35
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
|
36
|
+
:format => :bam,
|
37
|
+
:fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
|
38
|
+
:samtools => {:q => 20, :Q => 50, :r => region}
|
39
|
+
)
|
40
|
+
|
41
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
42
|
+
g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
|
43
|
+
#puts g.threads
|
44
|
+
[0.5].each do |kernel_adjust|
|
45
|
+
[4].each do | k |
|
46
|
+
begin
|
47
|
+
g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.0, :expected_chd => 0.4, :pseudo => false)
|
48
|
+
#filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
|
49
|
+
#g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
|
50
|
+
ctrl_threads = g.get_band(0.0)
|
51
|
+
expected_threads = g.get_band(0.4)
|
52
|
+
ctrl_thread_values += g.threads.select {|x| ctrl_threads.include?(x.first) }.last
|
53
|
+
expected_thread_values += g.threads.select {|x| expected_threads.include?(x.first) }.last
|
54
|
+
filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_bands.png"
|
55
|
+
g.draw_bands(filename)
|
56
|
+
#filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_signal.png"
|
57
|
+
#g.draw_signal(filename)
|
58
|
+
#filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_hits.png"
|
59
|
+
#g.draw_hit_count(filename)
|
60
|
+
|
61
|
+
rescue Exception => e
|
62
|
+
puts "failed on #{k} #{kernel_adjust}"
|
63
|
+
puts e.message, e.backtrace
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
g.close
|
68
|
+
|
69
|
+
|
70
|
+
end
|
71
|
+
File.open("ctrl_thread.txt", 'w') {|f| f.write(ctrl_thread_values.join("\n")) }
|
72
|
+
File.open("epxected_thread.txt", 'w') {|f| f.write(expected_thread_values.join("\n")) }
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
|
24
|
+
|
25
|
+
interval_width = 5000000
|
26
|
+
puts chr_name
|
27
|
+
#(1..length).step(interval_width) do |start|
|
28
|
+
#stop = start + interval_width
|
29
|
+
region = "#{chr_name}:#{1}-#{length}"
|
30
|
+
file = chr_name
|
31
|
+
|
32
|
+
puts "analyzing - #{region}"
|
33
|
+
|
34
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
35
|
+
:format => :bam,
|
36
|
+
:fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa",
|
37
|
+
:samtools => {:q => 20, :Q => 50, :r => region
|
38
|
+
}
|
39
|
+
)
|
40
|
+
|
41
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
42
|
+
g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
|
43
|
+
|
44
|
+
begin
|
45
|
+
g.calculate_clusters(:pseudo => true)
|
46
|
+
filename = "unmapped_#{file}_all_threads.png"
|
47
|
+
g.draw_threads(filename, :draw_legend => "unmapped_#{file}_legend.png")
|
48
|
+
##no bands or signal to draw without clustering...
|
49
|
+
filename = "unmapped_#{file}_hits.png"
|
50
|
+
g.draw_hit_count(filename)
|
51
|
+
rescue Exception => e
|
52
|
+
puts e.message, e.backtrace
|
53
|
+
end
|
54
|
+
|
55
|
+
#end
|
56
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
region = "gi|57116681|ref|NC_000962.2|:1-#{length}"
|
27
|
+
|
28
|
+
puts "analyzing - #{region}"
|
29
|
+
|
30
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
|
31
|
+
:format => :bam,
|
32
|
+
:fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
|
33
|
+
:samtools => {:q => 20, :Q => 50, :r => region
|
34
|
+
}
|
35
|
+
)
|
36
|
+
|
37
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
38
|
+
g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
|
39
|
+
puts g.threads
|
40
|
+
|
41
|
+
begin
|
42
|
+
g.calculate_clusters(:pseudo => true)
|
43
|
+
filename = "sim_2_#{region}_all_threads.png"
|
44
|
+
g.draw_threads(filename, :draw_legend => "sim_#{region}_legend.png")
|
45
|
+
##no bands or signal to draw without clustering...
|
46
|
+
filename = "sim_#{region}_hits.png"
|
47
|
+
g.draw_hit_count(filename)
|
48
|
+
rescue Exception => e
|
49
|
+
puts e.message, e.backtrace
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
region = "gi|57116681|ref|NC_000962.2|:1010000-1020000"
|
27
|
+
|
28
|
+
puts "analyzing - #{region}"
|
29
|
+
|
30
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
|
31
|
+
:format => :bam,
|
32
|
+
:fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
|
33
|
+
:samtools => {:q => 20, :Q => 50, :r => region
|
34
|
+
}
|
35
|
+
)
|
36
|
+
|
37
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
38
|
+
g.collect_threads
|
39
|
+
puts g.snp_positions
|
40
|
+
=begin
|
41
|
+
[0.25, 0.5, 1.0].each do |kernel_adjust|
|
42
|
+
[4, 9, 11].each do | k |
|
43
|
+
begin
|
44
|
+
g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 1.0, :expected_chd => 0.5, :pseudo => false)
|
45
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
|
46
|
+
g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
|
47
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_bands.png"
|
48
|
+
g.draw_bands(filename)
|
49
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_signal.png"
|
50
|
+
g.draw_signal(filename)
|
51
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_hits.png"
|
52
|
+
g.draw_hit_count(filename)
|
53
|
+
rescue Exception => e
|
54
|
+
puts "failed on #{k} #{kernel_adjust}"
|
55
|
+
puts e.message, e.backtrace
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
=end
|