bio-gngm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +33 -0
- data/LICENSE.txt +20 -0
- data/README.rdoc +33 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/bio-gngm.gemspec +173 -0
- data/doc/Bio.html +129 -0
- data/doc/Bio/DB.html +128 -0
- data/doc/Bio/DB/Pileup.html +316 -0
- data/doc/Bio/DB/Vcf.html +683 -0
- data/doc/Bio/Util.html +135 -0
- data/doc/Bio/Util/Gngm.html +1655 -0
- data/doc/LICENSE_txt.html +111 -0
- data/doc/_index.html +169 -0
- data/doc/class_list.html +47 -0
- data/doc/created.rid +4 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +55 -0
- data/doc/css/style.css +322 -0
- data/doc/doc/created.rid +0 -0
- data/doc/file_list.html +52 -0
- data/doc/frames.html +13 -0
- data/doc/images/add.png +0 -0
- data/doc/images/bands.png +0 -0
- data/doc/images/brick.png +0 -0
- data/doc/images/brick_link.png +0 -0
- data/doc/images/bug.png +0 -0
- data/doc/images/bullet_black.png +0 -0
- data/doc/images/bullet_toggle_minus.png +0 -0
- data/doc/images/bullet_toggle_plus.png +0 -0
- data/doc/images/date.png +0 -0
- data/doc/images/delete.png +0 -0
- data/doc/images/find.png +0 -0
- data/doc/images/loadingAnimation.gif +0 -0
- data/doc/images/macFFBgHack.png +0 -0
- data/doc/images/package.png +0 -0
- data/doc/images/page_green.png +0 -0
- data/doc/images/page_white_text.png +0 -0
- data/doc/images/page_white_width.png +0 -0
- data/doc/images/plugin.png +0 -0
- data/doc/images/ruby.png +0 -0
- data/doc/images/signal.png +0 -0
- data/doc/images/tag_blue.png +0 -0
- data/doc/images/tag_green.png +0 -0
- data/doc/images/threads.png +0 -0
- data/doc/images/transparent.png +0 -0
- data/doc/images/wrench.png +0 -0
- data/doc/images/wrench_orange.png +0 -0
- data/doc/images/zoom.png +0 -0
- data/doc/index.html +88 -0
- data/doc/js/app.js +205 -0
- data/doc/js/darkfish.js +153 -0
- data/doc/js/full_list.js +167 -0
- data/doc/js/jquery.js +18 -0
- data/doc/js/navigation.js +142 -0
- data/doc/js/search.js +94 -0
- data/doc/js/search_index.js +1 -0
- data/doc/js/searcher.js +228 -0
- data/doc/lib/bio-gngm_rb.html +103 -0
- data/doc/lib/bio/util/bio-gngm_rb.html +96 -0
- data/doc/method_list.html +382 -0
- data/doc/rdoc.css +543 -0
- data/doc/table_of_contents.html +161 -0
- data/examples/.DS_Store +0 -0
- data/examples/make_histograms.rb +40 -0
- data/examples/make_threads.rb +42 -0
- data/examples/make_threads_isize.rb +41 -0
- data/examples/use_indels.rb +36 -0
- data/lib/bio-gngm.rb +12 -0
- data/lib/bio/util/bio-gngm.rb +1029 -0
- data/scripts/get_subseq.rb +16 -0
- data/scripts/make_histograms_laerfyve.rb +83 -0
- data/scripts/make_histograms_laerfyve_stitched.rb +59 -0
- data/scripts/make_threads_isize_laerfyfe.rb +52 -0
- data/scripts/make_threads_unmapped_laerfyfe.rb +72 -0
- data/scripts/make_threads_unmapped_laerfyfe_pseudo.rb +56 -0
- data/scripts/make_threads_unmapped_simulation.rb +54 -0
- data/scripts/make_threads_unmapped_simulation_immediate_region.rb +59 -0
- data/scripts/optimise_freq_window_size.rb +82 -0
- data/stitched_contigs.zip +0 -0
- data/test/data/ids2.txt +1 -0
- data/test/data/sorted.bam +0 -0
- data/test/data/test +0 -0
- data/test/data/test.bam +0 -0
- data/test/data/test.fa +20 -0
- data/test/data/test.fai +0 -0
- data/test/data/test.sai +0 -0
- data/test/data/test.tam +10 -0
- data/test/data/test_chr.fasta +1000 -0
- data/test/data/test_chr.fasta.amb +2 -0
- data/test/data/test_chr.fasta.ann +3 -0
- data/test/data/test_chr.fasta.bwt +0 -0
- data/test/data/test_chr.fasta.fai +1 -0
- data/test/data/test_chr.fasta.pac +0 -0
- data/test/data/test_chr.fasta.rbwt +0 -0
- data/test/data/test_chr.fasta.rpac +0 -0
- data/test/data/test_chr.fasta.rsa +0 -0
- data/test/data/test_chr.fasta.sa +0 -0
- data/test/data/testu.bam +0 -0
- data/test/data/testu.bam.bai +0 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-gngm.rb +126 -0
- metadata +276 -0
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# untitled
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-03-01.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
require 'bio'
|
10
|
+
|
11
|
+
file = Bio::FastaFormat.open(ARGV[0])
|
12
|
+
file.each do |entry|
|
13
|
+
section = entry.seq[ARGV[1].to_i..ARGV[2].to_i]
|
14
|
+
puts ">#{entry.entry_id}:#{ARGV[1]}..#{ARGV[2]}"
|
15
|
+
puts section
|
16
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_histograms
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
|
10
|
+
### of the frequncy of discordant SNPs. Generates plots for each.
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'bio-gngm'
|
15
|
+
require 'bio-samtools'
|
16
|
+
require 'bio'
|
17
|
+
=begin
|
18
|
+
module Bio
|
19
|
+
class DB
|
20
|
+
class Sam
|
21
|
+
def each_reference
|
22
|
+
index_stats.each_pair do |k, v|
|
23
|
+
yield k, v[:length].to_i
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
=end
|
30
|
+
|
31
|
+
#ff = Bio::FlatFile.new(Bio::FastaFormat, "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
32
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
33
|
+
file.each do |entry|
|
34
|
+
next if entry.length < 10000
|
35
|
+
$stderr.puts "doing #{entry.entry_id} - #{entry.length}"
|
36
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
37
|
+
:format => :bam,
|
38
|
+
:samtools => {:q => 20, :Q => 50, :r => "#{entry.entry_id}:1-#{entry.length}"},
|
39
|
+
:fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa"
|
40
|
+
|
41
|
+
)
|
42
|
+
g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
|
43
|
+
puts "found #{g.snp_positions.length} SNPs .."
|
44
|
+
begin
|
45
|
+
[1000, 2500, 5000, 10000, 25000, 50000].each do |bin_width|
|
46
|
+
next if bin_width > entry.length
|
47
|
+
file_name = "ler_contigs_#{entry.entry_id}_#{bin_width}.png"
|
48
|
+
g.frequency_histogram("#{file_name}",bin_width)
|
49
|
+
end
|
50
|
+
rescue Exception => e
|
51
|
+
puts "failed #{e}"
|
52
|
+
ensure
|
53
|
+
g.close
|
54
|
+
end
|
55
|
+
end
|
56
|
+
#sam = Bio::DB::Sam.new(:bam => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
57
|
+
# :fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
58
|
+
#sam.open
|
59
|
+
#sam.fetch("Scaffold_2", 10000, 10500).each do |a|
|
60
|
+
# puts a.qname
|
61
|
+
#end
|
62
|
+
|
63
|
+
#sam.close
|
64
|
+
#sam.each_reference do |name, length|
|
65
|
+
# $stderr.puts "skipping..."
|
66
|
+
# next if length < 10000
|
67
|
+
# $stderr.puts "doing #{name}"
|
68
|
+
# g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
69
|
+
# :format => :bam,
|
70
|
+
# :samtools => {:q => 20, :Q => 50},
|
71
|
+
# :fasta => "/Users/macleand/Desktop/laerfyfe/Ler-1.SHORE.scaffolds2010-09-30.bp.fa"
|
72
|
+
|
73
|
+
# )
|
74
|
+
# g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
|
75
|
+
#puts g.snp_positions.length
|
76
|
+
|
77
|
+
# [10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
|
78
|
+
# file_name = "ler_contigs_#{name}_#{bin_width}.png"
|
79
|
+
# g.frequency_histogram("#{file_name}",bin_width)
|
80
|
+
# end
|
81
|
+
# g.close
|
82
|
+
#end
|
83
|
+
#sam.close
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_histograms
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
|
10
|
+
### of the frequncy of discordant SNPs. Generates plots for each.
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'bio-gngm'
|
15
|
+
require 'bio-samtools'
|
16
|
+
require 'bio'
|
17
|
+
#!/usr/bin/env ruby
|
18
|
+
#
|
19
|
+
# make_histograms
|
20
|
+
#
|
21
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
22
|
+
# Copyright (c) . All rights reserved.
|
23
|
+
###################################################
|
24
|
+
|
25
|
+
### An example script to loop over each reference in the BAM file, get SNP positions and make histograms
|
26
|
+
### of the frequncy of discordant SNPs. Generates plots for each.
|
27
|
+
|
28
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
29
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
30
|
+
require 'bio-gngm'
|
31
|
+
require 'bio-samtools'
|
32
|
+
require 'bio'
|
33
|
+
|
34
|
+
length = 0
|
35
|
+
chr_name = ""
|
36
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
|
37
|
+
file.each do |entry|
|
38
|
+
length = entry.length
|
39
|
+
chr_name = entry.entry_id
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
|
45
|
+
:format => :bam,
|
46
|
+
:samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"},
|
47
|
+
:fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa"
|
48
|
+
|
49
|
+
)
|
50
|
+
g.snp_positions(:min_depth => 10, :mapping_quality => 40.0, :min_non_ref_count => 5)
|
51
|
+
puts g.snp_positions.length
|
52
|
+
|
53
|
+
[10000, 25000, 50000, 100000, 250000, 500000].each do |bin_width|
|
54
|
+
file_name = "stitched_contigs_snps_q_filt_#{bin_width}.png"
|
55
|
+
g.frequency_histogram("#{file_name}",bin_width)
|
56
|
+
end
|
57
|
+
g.close
|
58
|
+
|
59
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
puts chr_name
|
26
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
|
27
|
+
:format => :bam,
|
28
|
+
:fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
|
29
|
+
:samtools => {:q => 20, :Q => 50, :r => "#{chr_name}:1-#{length}"
|
30
|
+
}
|
31
|
+
)
|
32
|
+
|
33
|
+
g.get_insert_size_frequency(:ref_window_size => 750, :ref_window_slide => 750, :isize => 184)
|
34
|
+
g.collect_threads
|
35
|
+
[0.25, 0.5, 1.0].each do |kernel_adjust|
|
36
|
+
[4, 9, 11].each do | k |
|
37
|
+
begin
|
38
|
+
g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.5, :expected_chd => 0.2)
|
39
|
+
filename = "isize_#{k}_#{kernel_adjust}_all_threads.png"
|
40
|
+
g.draw_threads(filename, :draw_legend => "isize_#{k}_#{kernel_adjust}_legend.png")
|
41
|
+
filename = "isize_#{k}_#{kernel_adjust}_bands.png"
|
42
|
+
g.draw_bands(filename)
|
43
|
+
filename = "isize_#{k}_#{kernel_adjust}_signal.png"
|
44
|
+
g.draw_signal(filename)
|
45
|
+
filename = "isize_#{k}_#{kernel_adjust}_hits.png"
|
46
|
+
g.draw_hit_count(filename)
|
47
|
+
rescue Exception => e
|
48
|
+
puts "failed on #{k} #{kernel_adjust}"
|
49
|
+
puts e.message, e.backtrace
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
ctrl_thread_values = []
|
26
|
+
expected_thread_values = []
|
27
|
+
interval_width = 10000000
|
28
|
+
#puts chr_name
|
29
|
+
(1..length).step(interval_width) do |start|
|
30
|
+
stop = start + interval_width
|
31
|
+
region = "#{chr_name}:#{start}-#{stop}"
|
32
|
+
|
33
|
+
puts "analyzing - #{region}"
|
34
|
+
|
35
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/aln.sort.bam",
|
36
|
+
:format => :bam,
|
37
|
+
:fasta => "/Users/macleand/Desktop/laerfyve_vs_stitched_ler/ler_contigs_stitched.fa",
|
38
|
+
:samtools => {:q => 20, :Q => 50, :r => region}
|
39
|
+
)
|
40
|
+
|
41
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
42
|
+
g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
|
43
|
+
#puts g.threads
|
44
|
+
[0.5].each do |kernel_adjust|
|
45
|
+
[4].each do | k |
|
46
|
+
begin
|
47
|
+
g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 0.0, :expected_chd => 0.4, :pseudo => false)
|
48
|
+
#filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
|
49
|
+
#g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
|
50
|
+
ctrl_threads = g.get_band(0.0)
|
51
|
+
expected_threads = g.get_band(0.4)
|
52
|
+
ctrl_thread_values += g.threads.select {|x| ctrl_threads.include?(x.first) }.last
|
53
|
+
expected_thread_values += g.threads.select {|x| expected_threads.include?(x.first) }.last
|
54
|
+
filename = "unmapped_2_#{region}_#{k}_#{kernel_adjust}_bands.png"
|
55
|
+
g.draw_bands(filename)
|
56
|
+
#filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_signal.png"
|
57
|
+
#g.draw_signal(filename)
|
58
|
+
#filename = "unmapped_#{region}_#{k}_#{kernel_adjust}_hits.png"
|
59
|
+
#g.draw_hit_count(filename)
|
60
|
+
|
61
|
+
rescue Exception => e
|
62
|
+
puts "failed on #{k} #{kernel_adjust}"
|
63
|
+
puts e.message, e.backtrace
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
g.close
|
68
|
+
|
69
|
+
|
70
|
+
end
|
71
|
+
File.open("ctrl_thread.txt", 'w') {|f| f.write(ctrl_thread_values.join("\n")) }
|
72
|
+
File.open("epxected_thread.txt", 'w') {|f| f.write(expected_thread_values.join("\n")) }
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
|
24
|
+
|
25
|
+
interval_width = 5000000
|
26
|
+
puts chr_name
|
27
|
+
#(1..length).step(interval_width) do |start|
|
28
|
+
#stop = start + interval_width
|
29
|
+
region = "#{chr_name}:#{1}-#{length}"
|
30
|
+
file = chr_name
|
31
|
+
|
32
|
+
puts "analyzing - #{region}"
|
33
|
+
|
34
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/laerfyfe_vs_ler/aln.sort.bam",
|
35
|
+
:format => :bam,
|
36
|
+
:fasta => "/Users/macleand/Desktop/laerfyfe_vs_ler/Ler-1.SHORE.scaffolds.2010-09-30.500bp.fa",
|
37
|
+
:samtools => {:q => 20, :Q => 50, :r => region
|
38
|
+
}
|
39
|
+
)
|
40
|
+
|
41
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
42
|
+
g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
|
43
|
+
|
44
|
+
begin
|
45
|
+
g.calculate_clusters(:pseudo => true)
|
46
|
+
filename = "unmapped_#{file}_all_threads.png"
|
47
|
+
g.draw_threads(filename, :draw_legend => "unmapped_#{file}_legend.png")
|
48
|
+
##no bands or signal to draw without clustering...
|
49
|
+
filename = "unmapped_#{file}_hits.png"
|
50
|
+
g.draw_hit_count(filename)
|
51
|
+
rescue Exception => e
|
52
|
+
puts e.message, e.backtrace
|
53
|
+
end
|
54
|
+
|
55
|
+
#end
|
56
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
region = "gi|57116681|ref|NC_000962.2|:1-#{length}"
|
27
|
+
|
28
|
+
puts "analyzing - #{region}"
|
29
|
+
|
30
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
|
31
|
+
:format => :bam,
|
32
|
+
:fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
|
33
|
+
:samtools => {:q => 20, :Q => 50, :r => region
|
34
|
+
}
|
35
|
+
)
|
36
|
+
|
37
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
38
|
+
g.collect_threads(:start => 0.0, :stop => 0.5, :slide => 0.1, :size => 0.1)
|
39
|
+
puts g.threads
|
40
|
+
|
41
|
+
begin
|
42
|
+
g.calculate_clusters(:pseudo => true)
|
43
|
+
filename = "sim_2_#{region}_all_threads.png"
|
44
|
+
g.draw_threads(filename, :draw_legend => "sim_#{region}_legend.png")
|
45
|
+
##no bands or signal to draw without clustering...
|
46
|
+
filename = "sim_#{region}_hits.png"
|
47
|
+
g.draw_hit_count(filename)
|
48
|
+
rescue Exception => e
|
49
|
+
puts e.message, e.backtrace
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# make_bands
|
4
|
+
#
|
5
|
+
# Created by Dan MacLean (TSL) on 2012-01-17.
|
6
|
+
# Copyright (c) . All rights reserved.
|
7
|
+
###################################################
|
8
|
+
|
9
|
+
### An example script to loop over each reference in the BAM file, get SNP positions, make density threads for different kernels,
|
10
|
+
### cluster for different values of k and then draw the threads, bands and signal. Generates plots for each new set of parameters
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'bio-gngm'
|
16
|
+
require 'bio'
|
17
|
+
length = 0
|
18
|
+
chr_name = ""
|
19
|
+
file = Bio::FastaFormat.open("/Users/macleand/Desktop/deletion_simulation/NC_000962.fna")
|
20
|
+
file.each do |entry|
|
21
|
+
length = entry.length
|
22
|
+
chr_name = entry.entry_id
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
region = "gi|57116681|ref|NC_000962.2|:1010000-1020000"
|
27
|
+
|
28
|
+
puts "analyzing - #{region}"
|
29
|
+
|
30
|
+
g = Bio::Util::Gngm.new(:file => "/Users/macleand/Desktop/deletion_simulation/aln.sort.bam",
|
31
|
+
:format => :bam,
|
32
|
+
:fasta => "/Users/macleand/Desktop/deletion_simulation/NC_000962.fna",
|
33
|
+
:samtools => {:q => 20, :Q => 50, :r => region
|
34
|
+
}
|
35
|
+
)
|
36
|
+
|
37
|
+
g.get_unmapped_mate_frequency(:ref_window_size => 76, :ref_window_slide => 76)
|
38
|
+
g.collect_threads
|
39
|
+
puts g.snp_positions
|
40
|
+
=begin
|
41
|
+
[0.25, 0.5, 1.0].each do |kernel_adjust|
|
42
|
+
[4, 9, 11].each do | k |
|
43
|
+
begin
|
44
|
+
g.calculate_clusters(:k => k, :adjust => kernel_adjust, :control_chd => 1.0, :expected_chd => 0.5, :pseudo => false)
|
45
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_all_threads.png"
|
46
|
+
g.draw_threads(filename, :draw_legend => "unmapped_#{region}_#{k}_#{kernel_adjust}_legend.png")
|
47
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_bands.png"
|
48
|
+
g.draw_bands(filename)
|
49
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_signal.png"
|
50
|
+
g.draw_signal(filename)
|
51
|
+
filename = "sim_#{region}_#{k}_#{kernel_adjust}_hits.png"
|
52
|
+
g.draw_hit_count(filename)
|
53
|
+
rescue Exception => e
|
54
|
+
puts "failed on #{k} #{kernel_adjust}"
|
55
|
+
puts e.message, e.backtrace
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
=end
|