bio-polyploid-tools 0.1.0 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
4
- data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
3
+ metadata.gz: 019bf8dc15f35de0be9a55567b8041f9b98ac326
4
+ data.tar.gz: 3e0a76bbefead5c5284c64a01b36645748a70098
5
5
  SHA512:
6
- metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
7
- data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
6
+ metadata.gz: 98e2d6c023ee8d89014efe65da619f0a98808c1540c3773aaef901de9f5c2338a3cc4645bdee1a3cdc430d525587c27d576d1f19e2ac8e59d7724a6efaac5901
7
+ data.tar.gz: 3d09c9a1972b7538eb160ee89786f9bdd3f8c52fec554da110222241767e7a17f6efd7f0f42a219fb06cd3689037fbee2ec88eeae0cb94e40333a5c491259421
data/Gemfile CHANGED
@@ -2,12 +2,13 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
- gem "bio", "= 1.4.2"
6
- gem "bio-samtools", "= 0.6.2"
5
+
6
+ gem "bio", ">= 1.4.3"
7
+ gem "bio-samtools", ">= 2.0.3"
7
8
  gem "rake"
8
9
  gem "jeweler"
9
10
 
10
- #gem "systemu", ">=2.5.2"
11
+ gem "systemu", ">=2.5.2"
11
12
 
12
13
  group :development do
13
14
  # gem "shoulda", ">= 0"
@@ -3,17 +3,16 @@ GEM
3
3
  specs:
4
4
  addressable (2.3.6)
5
5
  atomic (1.1.16)
6
- bio (1.4.2)
7
- bio-samtools (0.6.2)
6
+ bio (1.4.3.0001)
7
+ bio-samtools (2.0.3)
8
8
  bio (>= 1.4.2)
9
- ffi
10
- systemu (>= 2.5.2)
9
+ bio-svgenes (>= 0.4.1)
10
+ bio-svgenes (0.4.1)
11
11
  builder (3.2.2)
12
12
  descendants_tracker (0.0.4)
13
13
  thread_safe (~> 0.3, >= 0.3.1)
14
14
  faraday (0.9.0)
15
15
  multipart-post (>= 1.2, < 3)
16
- ffi (1.9.3)
17
16
  git (1.2.6)
18
17
  github_api (0.11.3)
19
18
  addressable (~> 2.3)
@@ -53,7 +52,7 @@ GEM
53
52
  rake (10.2.2)
54
53
  rdoc (4.1.1)
55
54
  json (~> 1.4)
56
- systemu (2.6.0)
55
+ systemu (2.6.4)
57
56
  thread_safe (0.3.1)
58
57
  atomic (>= 1.1.7, < 2)
59
58
 
@@ -61,7 +60,8 @@ PLATFORMS
61
60
  ruby
62
61
 
63
62
  DEPENDENCIES
64
- bio (= 1.4.2)
65
- bio-samtools (= 0.6.2)
63
+ bio (>= 1.4.3)
64
+ bio-samtools (>= 2.0.3)
66
65
  jeweler
67
66
  rake
67
+ systemu (>= 2.5.2)
@@ -0,0 +1,45 @@
1
+ bio-polyploid-tools
2
+ ===================
3
+
4
+ Introduction
5
+ -------------
6
+ This tools are designed to deal with polyploid wheat. The first tool is to design KASP primers,
7
+ making them as specific as possible.
8
+
9
+
10
+ Installation
11
+ ------------
12
+ 'gem install bio-polyploid-tools'
13
+
14
+ You need to have in your $PATH the following programs:
15
+ * [MAFFT]{http://mafft.cbrc.jp/alignment/software/}
16
+ * [primer3]{http://primer3.sourceforge.net/releases.php}
17
+ * [exonerate]{http://www.ebi.ac.uk/~guy/exonerate/}
18
+
19
+ The code has been developed on ruby 2.1.0, but it should work on 1.9.3 and above.
20
+
21
+
22
+ Polymarker
23
+ ----------
24
+
25
+ To run poolymerker with the CSS wheat contigs, you need to unzip the
26
+ reference file [Triticum_aestivum.IWGSP1.22.dna_rm.genome.fa.gz{ftp://ftp.ensemblgenomes.org/pub/release-22/plants/fasta/triticum_aestivum/dna/}.
27
+
28
+ polymarker.rb --contigs Triticum_aestivum.IWGSP1.22.dna_rm.genome.fa --marker_list snp_list.csv --output output_folder
29
+
30
+ The snp_list file must follow the convention
31
+ <ID>,<Chromosome>,<SEQUENCE>
32
+ with the SNP inside the sequence in the format [A/T]. As a reference, look at test/data/short_primer_design_test.csv
33
+
34
+ Notes
35
+ -----
36
+
37
+ * If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
38
+
39
+ BUG: Blocks with NNNs are picked and treated as semi-specific.
40
+ BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
41
+ TODO: If reading from a reference file, only get one reference to align when the region is queried several times
42
+ TODO: Add a parameter file to configure the alignments.
43
+ TODO: Produce primers for products of different sizes
44
+
45
+
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.3
data/bin/bfr.rb CHANGED
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env ruby
1
2
  require 'rubygems'
2
3
  #require 'extensions/all'
3
4
  require 'bio-samtools'
@@ -70,18 +71,12 @@ chunk_size = options[:chunk_size]
70
71
  output_filename = options[:output_filename]
71
72
  stats_file = options[:stats_file]
72
73
 
73
-
74
- #reference = ARGV[6]
75
-
76
-
77
74
 
78
75
  min = chunk * chunk_size
79
76
  max = min + chunk_size
80
77
 
81
78
 
82
- #AvocetS
83
79
  parental_1=options[:parent_1]
84
- #AvocetS (Yr15)
85
80
  parental_2=options[:parent_2]
86
81
 
87
82
 
@@ -89,7 +84,7 @@ bulk_1 = options[:bulk_1]
89
84
  bulk_2 = options[:bulk_2]
90
85
 
91
86
 
92
- fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
87
+ fasta_db = Bio::DB::Fasta::FastaFile.new({:fasta=>reference})
93
88
  fasta_db.load_fai_entries
94
89
 
95
90
 
@@ -14,7 +14,7 @@ require path
14
14
 
15
15
  puts ARGV[0]
16
16
 
17
- fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
17
+ fasta_db = Bio::DB::Fasta::FastaFile.new( {:fasta=>ARGV[0]})
18
18
  fasta_db.load_fai_entries
19
19
  bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
20
20
 
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ found_cointigs = Set.new
5
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model, :chunk=>chunk, :total_chunks=>}) do |aln|
6
+ if aln.identity > min_identity
7
+ exo_f.puts aln.line
8
+ unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
9
+ found_cointigs.add(aln.target_id)
10
+ entry = fasta_file.index.region_for_entry(aln.target_id)
11
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
12
+ region = entry.get_full_region
13
+ seq = fasta_file.fetch_sequence(region)
14
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
15
+ end
16
+ end
17
+ end
@@ -43,7 +43,7 @@ snps = Array.new
43
43
  #0. Load the fasta index
44
44
  fasta_reference_db = nil
45
45
  if fasta_reference
46
- fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
46
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
47
47
  fasta_reference_db.load_fai_entries
48
48
  p "Fasta reference: #{fasta_reference}"
49
49
  end
@@ -99,7 +99,7 @@ Dir.foreach(path_to_contigs) do |filename |
99
99
  puts filename
100
100
  target="#{path_to_contigs}/#{filename}"
101
101
 
102
- fasta_file = Bio::DB::Fasta::FastaFile.new(target)
102
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
103
103
  fasta_file.load_fai_entries
104
104
  Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
105
105
  if aln.identity > min_identity
@@ -82,7 +82,7 @@ snps = Array.new
82
82
  #0. Load the fasta index
83
83
  fasta_reference_db = nil
84
84
  if reference_file
85
- fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
85
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>reference_file})
86
86
  fasta_reference_db.load_fai_entries
87
87
  p "Fasta reference: #{reference_file}"
88
88
  end
@@ -87,7 +87,7 @@ snps = Array.new
87
87
  #0. Load the fasta index
88
88
  fasta_reference_db = nil
89
89
  if fasta_reference
90
- fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
90
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
91
91
  fasta_reference_db.load_fai_entries
92
92
  p "Fasta reference: #{fasta_reference}"
93
93
  end
@@ -141,7 +141,7 @@ filename=path_to_contigs
141
141
  puts filename
142
142
  target=filename
143
143
 
144
- fasta_file = Bio::DB::Fasta::FastaFile.new(target)
144
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
145
145
  fasta_file.load_fai_entries
146
146
 
147
147
  found_cointigs = Set.new
@@ -15,7 +15,7 @@ require path
15
15
 
16
16
 
17
17
 
18
- fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
18
+ fasta_db = Bio::DB::Fasta::FastaFile.new(:fasta=>ARGV[0])
19
19
  fasta_db.load_fai_entries
20
20
  bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
21
21
  bam2 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
@@ -23,7 +23,7 @@ bam2 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
23
23
 
24
24
  output_prefix = ARGV[3]
25
25
 
26
- block_size=300
26
+ block_size=1000
27
27
 
28
28
  min_cov = ARGV[4].to_i ? ARGV[4].to_i : 10
29
29
  chunk = ARGV[5].to_i
@@ -54,6 +54,38 @@ fasta_db.index.entries.each do | r |
54
54
 
55
55
 
56
56
  begin
57
+ <<<<<<< HEAD
58
+ reg_a = bam1.fetch_region({:region=>region, :min_cov=>min_cov, :A=>1})
59
+ reg_b = bam2.fetch_region({:region=>region, :min_cov=>min_cov, :A=>1})
60
+ cons_1 = reg_a.consensus
61
+ cons_2 = reg_b.consensus
62
+
63
+
64
+ snps_1 = cons_1.count_ambiguities
65
+ snps_2 = cons_2.count_ambiguities
66
+
67
+ called_1 = reg_a.called
68
+ called_2 = reg_b.called
69
+
70
+ snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
71
+
72
+ snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
73
+ snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
74
+ snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
75
+
76
+ hist_1[snps_per_1k_1.to_i] += 1
77
+ hist_2[snps_per_1k_2.to_i] += 1
78
+
79
+ table_file.print "#{r.id}\t#{region.size}\t"
80
+ table_file.print "#{snps_1}\t#{called_1}\t#{snps_per_1k_1}\t"
81
+ table_file.print "#{snps_2}\t#{called_2}\t#{snps_per_1k_2}\t"
82
+ table_file.print "#{snps_tot}\t#{snps_per_1k_tot}\n"
83
+ fasta_file.puts ">#{r.id}_1"
84
+ fasta_file.puts "#{cons_1}"
85
+ fasta_file.puts ">#{r.id}_2"
86
+ fasta_file.puts "#{cons_2}"
87
+
88
+ =======
57
89
 
58
90
  cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
59
91
  cons_2 = bam2.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
@@ -62,13 +94,10 @@ fasta_db.index.entries.each do | r |
62
94
  snps_1 = cons_1.count_ambiguities
63
95
  snps_2 = cons_2.count_ambiguities
64
96
 
65
- called_1 = cons_1.upper_case_count
66
- called_2 = cons_2.upper_case_count
67
-
68
97
  snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
69
98
 
70
- snps_per_1k_1 = (block_size * snps_1.to_f ) / called_1
71
- snps_per_1k_2 = (block_size * snps_2.to_f ) / called_2
99
+ snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
100
+ snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
72
101
  snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
73
102
 
74
103
  hist_1[snps_per_1k_1.to_i] += 1
@@ -83,6 +112,7 @@ fasta_db.index.entries.each do | r |
83
112
  fasta_file.puts ">#{r.id}_2"
84
113
  fasta_file.puts "#{cons_2}"
85
114
  end
115
+ >>>>>>> 1b60bd09fdb1b087d6cb53c643ff36e536efe4a3
86
116
  rescue Exception => e
87
117
  $stderr.puts "Unable to process #{region}: #{e.to_s}"
88
118
  end
@@ -2,32 +2,35 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.1.0 ruby lib
5
+ # stub: bio-polyploid-tools 0.2.3 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools"
9
- s.version = "0.1.0"
9
+ s.version = "0.2.3"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez"]
14
- s.date = "2014-03-31"
14
+ s.date = "2014-04-27"
15
15
  s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
16
16
  s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
17
- s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
17
+ s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "find_best_exonerate.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
18
18
  s.extra_rdoc_files = [
19
- "README"
19
+ "README",
20
+ "README.md"
20
21
  ]
21
22
  s.files = [
22
23
  "Gemfile",
23
24
  "Gemfile.lock",
24
25
  "README",
26
+ "README.md",
25
27
  "Rakefile",
26
28
  "VERSION",
27
29
  "bin/bfr.rb",
28
30
  "bin/count_variations.rb",
29
31
  "bin/filter_blat_by_target_coverage.rb",
30
32
  "bin/find_best_blat_hit.rb",
33
+ "bin/find_best_exonerate.rb",
31
34
  "bin/hexaploid_primers.rb",
32
35
  "bin/homokaryot_primers.rb",
33
36
  "bin/map_markers_to_contigs.rb",
@@ -78,9 +81,7 @@ Gem::Specification.new do |s|
78
81
  "lib/bio/PolyploidTools/PrimerRegion.rb",
79
82
  "lib/bio/PolyploidTools/SNP.rb",
80
83
  "lib/bio/PolyploidTools/SNPSequence.rb",
81
- "lib/bio/SAMToolsExtensions.rb",
82
84
  "lib/bio/db/exonerate.rb",
83
- "lib/bio/db/fastadb.rb",
84
85
  "lib/bio/db/primer3.rb",
85
86
  "lib/bioruby-polyploid-tools.rb",
86
87
  "test/data/BS00068396_51.fa",
@@ -119,21 +120,24 @@ Gem::Specification.new do |s|
119
120
  s.specification_version = 4
120
121
 
121
122
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
122
- s.add_runtime_dependency(%q<bio>, ["= 1.4.2"])
123
- s.add_runtime_dependency(%q<bio-samtools>, ["= 0.6.2"])
123
+ s.add_runtime_dependency(%q<bio>, [">= 1.4.3"])
124
+ s.add_runtime_dependency(%q<bio-samtools>, [">= 2.0.3"])
124
125
  s.add_runtime_dependency(%q<rake>, [">= 0"])
125
126
  s.add_runtime_dependency(%q<jeweler>, [">= 0"])
127
+ s.add_runtime_dependency(%q<systemu>, [">= 2.5.2"])
126
128
  else
127
- s.add_dependency(%q<bio>, ["= 1.4.2"])
128
- s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
129
+ s.add_dependency(%q<bio>, [">= 1.4.3"])
130
+ s.add_dependency(%q<bio-samtools>, [">= 2.0.3"])
129
131
  s.add_dependency(%q<rake>, [">= 0"])
130
132
  s.add_dependency(%q<jeweler>, [">= 0"])
133
+ s.add_dependency(%q<systemu>, [">= 2.5.2"])
131
134
  end
132
135
  else
133
- s.add_dependency(%q<bio>, ["= 1.4.2"])
134
- s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
136
+ s.add_dependency(%q<bio>, [">= 1.4.3"])
137
+ s.add_dependency(%q<bio-samtools>, [">= 2.0.3"])
135
138
  s.add_dependency(%q<rake>, [">= 0"])
136
139
  s.add_dependency(%q<jeweler>, [">= 0"])
140
+ s.add_dependency(%q<systemu>, [">= 2.5.2"])
137
141
  end
138
142
  end
139
143
 
@@ -5,252 +5,16 @@ require 'rubygems'
5
5
  #require 'bio/db/vcf'
6
6
  require 'pathname'
7
7
  #require_relative 'BIOExtensions.rb'
8
- require_relative 'db/fastadb.rb'
8
+
9
9
 
10
10
  require 'bio'
11
+ require 'bio-samtools'
12
+
11
13
  require "set"
12
14
  require 'systemu'
13
15
  require 'json'
14
16
  #require 'strmask'
15
17
 
16
- =begin
17
-
18
- Extends the methods to be able to calculate the BFR and a consensus from the pileup
19
-
20
- =end
21
-
22
- class Bio::DB::Pileup
23
-
24
- #attr_accessor :minumum_ratio_for_iup_consensus
25
- #@minumum_ratio_for_iup_consensus = 0.20
26
-
27
- #Returns a hash with the count of bases
28
-
29
- def bases
30
- return @bases if @bases
31
- @bases = self.non_refs
32
- #puts self.ref_count
33
- @bases[self.ref_base.upcase.to_sym] = self.ref_count
34
- @bases
35
- end
36
-
37
- def base_coverage
38
- total = 0
39
- @bases.each do |k,v|
40
- total += v
41
- end
42
- total
43
- end
44
-
45
- def base_ratios
46
- return @base_ratios if @base_ratios
47
- bases = self.bases
48
- @base_ratios = Hash.new
49
- bases.each do |k,v|
50
- @base_ratios[k] = v.to_f/self.base_coverage.to_f
51
- end
52
- @base_ratios
53
- end
54
-
55
- # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
56
- def consensus_iuap(minumum_ratio_for_iup_consensus)
57
- minumum_ratio_for_iup_consensus
58
- if @consensus_iuap.nil?
59
- @consensus_iuap = self.ref_base.downcase
60
- bases = self.bases
61
- tmp = String.new
62
- bases.each do |k,v|
63
- tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
64
- end
65
- if tmp.length > 0
66
- @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
67
- end
68
- end
69
- @consensus_iuap
70
- end
71
- end
72
-
73
- class Bio::DB::Fasta::Region
74
- attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
75
-
76
- #TODO: Debug, as it hasnt been tested in the actual code.
77
- def base_ratios_for_base(base)
78
- @all_ratios = Hash.new unless @all_ratios
79
- unless @all_ratios[base]
80
- ratios = Array.new
81
- for i in (0..region.size-1)
82
- ratios << @base_ratios[i][base]
83
- end
84
- @all_ratios[base] = ratios
85
- end
86
- @all_ratios[base]
87
- end
88
-
89
- end
90
-
91
- class Bio::DB::Sam::SAMException < RuntimeError
92
-
93
- end
94
-
95
- class Bio::DB::Sam
96
-
97
-
98
- attr_accessor :minumum_ratio_for_iup_consensus
99
- attr_reader :cached_regions
100
- #attr_accessor :pileup_cache
101
- @minumum_ratio_for_iup_consensus = 0.20
102
-
103
-
104
- #Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
105
- #the pile for different operations, it won't execute the mpilup command several times
106
- #Whenever you finish using a region, call mpileup_clear_cache to free the cache
107
- #The argument Region is required, as it will be the key for the underlying hash.
108
- #We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
109
- #
110
- #TODO: It may be good to load partially the pileup
111
- def mpileup_cached (opts={})
112
- raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
113
- @pileup_cache = Hash.new unless @pileup_cache
114
- @cached_regions = Hash.new unless @cached_regions
115
-
116
- region = opts[:r] ? opts[:r] : opts[:region]
117
- opts[:r] = "'#{region.to_s}'"
118
- opts[:region] = "'#{region.to_s}'"
119
- opts[:A] = true
120
- #reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
121
-
122
- unless @cached_regions[region.to_s]
123
- @cached_regions[region.to_s] = Bio::DB::Fasta::Region.parse_region(region.to_s)
124
- tmp = Array.new
125
- @cached_regions[region.to_s].pileup = tmp
126
- #puts "Loading #{region.to_s}"
127
- mpileup(opts) do | pile |
128
- # puts pile
129
- tmp << pile
130
- yield pile
131
- end
132
- else
133
- # puts "Loaded, reruning #{region.to_s}"
134
- @cached_regions.pileup[region.to_s] .each do | pile |
135
- yield pile
136
- end
137
- end
138
- end
139
-
140
- #Clears the pileup cache. If a region is passed as argument, just the specified region is removed
141
- #If no region is passed, the hash is emptied
142
- def mpileup_clear_cache (region)
143
- return unless @cached_regions
144
- if region
145
- @cached_regions[region.to_s] = nil
146
- else
147
- @cached_regions.clear
148
- end
149
- end
150
-
151
- #Gets the coverage of a region from a pileup.
152
- def average_coverage_from_pileup(opts={})
153
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
154
- region = opts[:region]
155
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
156
- @cached_regions[region].average_coverage
157
- end
158
-
159
- #
160
- def coverages_from_pileup(opts={})
161
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
162
- region = opts[:region]
163
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
164
- @cached_regions[region].coverages
165
- end
166
-
167
- def consensus_with_ambiguities(opts={})
168
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
169
- region = opts[:region]
170
- # p "consensus with ambiguities for: " << opts[:region]
171
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
172
- @cached_regions[region].consensus
173
- end
174
-
175
- def calculate_stats_from_pile(opts={})
176
- min_cov = opts[:min_cov] ? opts[:min_cov] : 20
177
-
178
-
179
- opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
180
- region = opts[:region]
181
- reference = self.fetch_reference(region.entry, region.start, region.end).downcase
182
- # p "calculationg from pile..." << region.to_s
183
- base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
184
- bases = Array.new(region.size, BASE_COUNT_ZERO)
185
- coverages = Array.new(region.size, 0)
186
- total_cov = 0
187
-
188
- self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
189
- #puts pile
190
- #puts pile.coverage
191
- if pile.coverage > min_cov
192
- base_ratios[pile.pos - region.start ] = pile.base_ratios
193
- reference[pile.pos - region.start ] = pile.consensus_iuap(0.20)
194
- coverages[pile.pos - region.start ] = pile.coverage.to_i
195
- bases[pile.pos - region.start ] = pile.bases
196
- end
197
- total_cov += pile.coverage
198
- end
199
-
200
- region = @cached_regions[region.to_s]
201
- region.coverages = coverages
202
- region.base_ratios = base_ratios
203
- region.consensus = reference
204
-
205
- region.average_coverage = total_cov.to_f/region.size.to_f
206
- region.bases = bases
207
- region
208
- end
209
-
210
-
211
-
212
- BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
213
-
214
- #Gets an array with the proportions of the bases in the region. If there is no coverage, a
215
- def base_ratios_in_region(opts={})
216
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
217
- region = opts[:region]
218
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
219
- @cached_regions[region].base_ratios
220
- end
221
-
222
- #Gets an array with the bsaes count in the region. If there is no coverage, a
223
- def bases_in_region(opts={})
224
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
225
- region = opts[:region]
226
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
227
- @cached_regions[region].bases
228
- end
229
-
230
-
231
-
232
- def extract_reads(opts={})
233
- opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
234
- fastq_filename = opts[:fastq]
235
- fastq_file = opts[:fastq_file]
236
-
237
- out = $stdout
238
-
239
- print_fastq = Proc.new do |alignment|
240
- out.puts "@#{alignment.qname}"
241
- out.puts "#{alignment.seq}"
242
- out.puts "+#{alignment.qname}"
243
- out.puts "#{alignment.qual}"
244
- end
245
-
246
- fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
247
-
248
-
249
- end
250
-
251
-
252
-
253
- end
254
18
 
255
19
  module Bio::BFRTools
256
20
 
@@ -267,7 +31,7 @@ module Bio::BFRTools
267
31
  BASES = [:A, :C, :G, :T]
268
32
  #Sets the reference file
269
33
  def reference(path)
270
- @reference_db = Bio::DB::Fasta::FastaFile.new(path)
34
+ @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>path})
271
35
  @reference_path = path
272
36
  end
273
37
 
@@ -350,33 +114,35 @@ module Bio::BFRTools
350
114
  self.entry = reg.entry
351
115
  self.start = reg.start
352
116
  self.end = reg.end
353
-
117
+ opts[:region] = reg
354
118
  @container = opts[:container]
355
119
 
356
- parental_1_sam = @container.parental_1_sam
357
- parental_2_sam = @container.parental_2_sam
358
- bulk_1_sam = @container.bulk_1_sam
359
- bulk_2_sam = @container.bulk_2_sam
360
-
361
- @parental_1_sequence = parental_1_sam.consensus_with_ambiguities(opts)
362
- @parental_2_sequence = parental_2_sam.consensus_with_ambiguities(opts)
120
+ parental_1_reg = @container.parental_1_sam.fetch_region(opts)
121
+ parental_2_reg = @container.parental_2_sam.fetch_region(opts)
122
+ bulk_1_reg = @container.bulk_1_sam.fetch_region(opts)
123
+ bulk_2_reg = @container.bulk_2_sam.fetch_region(opts)
124
+
125
+
363
126
 
364
- @bulk_1_sequence = bulk_1_sam.consensus_with_ambiguities(opts)
365
- @bulk_2_sequence = bulk_2_sam.consensus_with_ambiguities(opts)
127
+ @parental_1_sequence = parental_1_reg.consensus
128
+ @parental_2_sequence = parental_2_reg.consensus
129
+
130
+ @bulk_1_sequence = bulk_1_reg.consensus
131
+ @bulk_2_sequence = bulk_2_reg.consensus
366
132
 
367
133
  @snp_count = Container.snps_between( @parental_1_sequence , @parental_2_sequence )
368
134
 
369
- @ratios_bulk_1 = bulk_1_sam.base_ratios_in_region(opts)
370
- @ratios_bulk_2 = bulk_2_sam.base_ratios_in_region(opts)
135
+ @ratios_bulk_1 = bulk_1_reg.base_ratios
136
+ @ratios_bulk_2 = bulk_2_reg.base_ratios
371
137
 
372
- @bases_bulk_1 = bulk_1_sam.bases_in_region(opts)
373
- @bases_bulk_2 = bulk_2_sam.bases_in_region(opts)
138
+ @bases_bulk_1 = bulk_1_reg.bases
139
+ @bases_bulk_2 = bulk_2_reg.bases
374
140
 
375
- @avg_cov_bulk_1 = bulk_1_sam.average_coverage_from_pileup(opts)
376
- @avg_cov_bulk_2 = bulk_2_sam.average_coverage_from_pileup(opts)
141
+ @avg_cov_bulk_1 = bulk_1_reg.average_coverage
142
+ @avg_cov_bulk_2 = bulk_2_reg.average_coverage
377
143
 
378
- @coverages_1 = bulk_1_sam.coverages_from_pileup(opts)
379
- @coverages_2 = bulk_2_sam.coverages_from_pileup(opts)
144
+ @coverages_1 = bulk_1_reg.coverages
145
+ @coverages_2 = bulk_2_reg.coverages
380
146
 
381
147
  end
382
148
 
@@ -472,7 +238,7 @@ module Bio::BFRTools
472
238
  raise BFRToolsException.new ("The reference for the line should be :first or :second, but was " + reference.to_s )
473
239
  end
474
240
 
475
- relative_position = self.start + position + 1
241
+ relative_position = self.start + position
476
242
 
477
243
  bfr = bfrs[reference][base][position]
478
244
  cov_1 = @coverages_1[position]
@@ -622,7 +388,7 @@ module Bio::BFRTools
622
388
  end
623
389
 
624
390
  def process_region(opts={})
625
- opts = { :min_cov=>20, :max_snp_1kbp => 10 }.merge!(opts)
391
+ opts = { :min_cov=>20, :max_snp_1kbp => 10, :max_per=>0.20 }.merge!(opts)
626
392
 
627
393
  @proccesed_regions += 1
628
394
  output = opts[:output_file] ? opts[:output_file] : $stdout
@@ -675,7 +441,7 @@ module Bio::BFRTools
675
441
 
676
442
 
677
443
  for informative in info
678
- line = region.get_bfr_line(i+1, base, informative)
444
+ line = region.get_bfr_line(i, base, informative)
679
445
  output.print line , "\n"
680
446
  end
681
447
  end