bio-polyploid-tools 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d32372b6eef65b23de3a9c669bb6f7dfb178882
4
- data.tar.gz: c83526572adf6c745dd0785eb610aa18b6d7aab8
3
+ metadata.gz: 019bf8dc15f35de0be9a55567b8041f9b98ac326
4
+ data.tar.gz: 3e0a76bbefead5c5284c64a01b36645748a70098
5
5
  SHA512:
6
- metadata.gz: 2994977ba9b126e2cdc27c2e511abc23d1a08677f8fd5e6d5641ab877a0e0ae38a58a03036e1c4d41b1e8225454ae08fa44ec9e93ec96cec9c3bdaab29cf65e5
7
- data.tar.gz: fe025cdaa7b49550d675cdc901855f35ac3e1170ac39a2d444a8fadb785f0cf6e40f64c97c335d247b52d5dcac4a790a1b3b8019456efa232fc97e04a052fdd8
6
+ metadata.gz: 98e2d6c023ee8d89014efe65da619f0a98808c1540c3773aaef901de9f5c2338a3cc4645bdee1a3cdc430d525587c27d576d1f19e2ac8e59d7724a6efaac5901
7
+ data.tar.gz: 3d09c9a1972b7538eb160ee89786f9bdd3f8c52fec554da110222241767e7a17f6efd7f0f42a219fb06cd3689037fbee2ec88eeae0cb94e40333a5c491259421
data/Gemfile CHANGED
@@ -2,12 +2,13 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
- gem "bio", "= 1.4.2"
6
- gem "bio-samtools", "= 0.6.2"
5
+
6
+ gem "bio", ">= 1.4.3"
7
+ gem "bio-samtools", ">= 2.0.3"
7
8
  gem "rake"
8
9
  gem "jeweler"
9
10
 
10
- #gem "systemu", ">=2.5.2"
11
+ gem "systemu", ">=2.5.2"
11
12
 
12
13
  group :development do
13
14
  # gem "shoulda", ">= 0"
@@ -3,17 +3,16 @@ GEM
3
3
  specs:
4
4
  addressable (2.3.6)
5
5
  atomic (1.1.16)
6
- bio (1.4.2)
7
- bio-samtools (0.6.2)
6
+ bio (1.4.3.0001)
7
+ bio-samtools (2.0.3)
8
8
  bio (>= 1.4.2)
9
- ffi
10
- systemu (>= 2.5.2)
9
+ bio-svgenes (>= 0.4.1)
10
+ bio-svgenes (0.4.1)
11
11
  builder (3.2.2)
12
12
  descendants_tracker (0.0.4)
13
13
  thread_safe (~> 0.3, >= 0.3.1)
14
14
  faraday (0.9.0)
15
15
  multipart-post (>= 1.2, < 3)
16
- ffi (1.9.3)
17
16
  git (1.2.6)
18
17
  github_api (0.11.3)
19
18
  addressable (~> 2.3)
@@ -53,7 +52,7 @@ GEM
53
52
  rake (10.2.2)
54
53
  rdoc (4.1.1)
55
54
  json (~> 1.4)
56
- systemu (2.6.0)
55
+ systemu (2.6.4)
57
56
  thread_safe (0.3.1)
58
57
  atomic (>= 1.1.7, < 2)
59
58
 
@@ -61,7 +60,8 @@ PLATFORMS
61
60
  ruby
62
61
 
63
62
  DEPENDENCIES
64
- bio (= 1.4.2)
65
- bio-samtools (= 0.6.2)
63
+ bio (>= 1.4.3)
64
+ bio-samtools (>= 2.0.3)
66
65
  jeweler
67
66
  rake
67
+ systemu (>= 2.5.2)
@@ -0,0 +1,45 @@
1
+ bio-polyploid-tools
2
+ ===================
3
+
4
+ Introduction
5
+ -------------
6
+ This tools are designed to deal with polyploid wheat. The first tool is to design KASP primers,
7
+ making them as specific as possible.
8
+
9
+
10
+ Installation
11
+ ------------
12
+ 'gem install bio-polyploid-tools'
13
+
14
+ You need to have in your $PATH the following programs:
15
+ * [MAFFT]{http://mafft.cbrc.jp/alignment/software/}
16
+ * [primer3]{http://primer3.sourceforge.net/releases.php}
17
+ * [exonerate]{http://www.ebi.ac.uk/~guy/exonerate/}
18
+
19
+ The code has been developed on ruby 2.1.0, but it should work on 1.9.3 and above.
20
+
21
+
22
+ Polymarker
23
+ ----------
24
+
25
+ To run poolymerker with the CSS wheat contigs, you need to unzip the
26
+ reference file [Triticum_aestivum.IWGSP1.22.dna_rm.genome.fa.gz{ftp://ftp.ensemblgenomes.org/pub/release-22/plants/fasta/triticum_aestivum/dna/}.
27
+
28
+ polymarker.rb --contigs Triticum_aestivum.IWGSP1.22.dna_rm.genome.fa --marker_list snp_list.csv --output output_folder
29
+
30
+ The snp_list file must follow the convention
31
+ <ID>,<Chromosome>,<SEQUENCE>
32
+ with the SNP inside the sequence in the format [A/T]. As a reference, look at test/data/short_primer_design_test.csv
33
+
34
+ Notes
35
+ -----
36
+
37
+ * If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
38
+
39
+ BUG: Blocks with NNNs are picked and treated as semi-specific.
40
+ BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
41
+ TODO: If reading from a reference file, only get one reference to align when the region is queried several times
42
+ TODO: Add a parameter file to configure the alignments.
43
+ TODO: Produce primers for products of different sizes
44
+
45
+
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.3
data/bin/bfr.rb CHANGED
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env ruby
1
2
  require 'rubygems'
2
3
  #require 'extensions/all'
3
4
  require 'bio-samtools'
@@ -70,18 +71,12 @@ chunk_size = options[:chunk_size]
70
71
  output_filename = options[:output_filename]
71
72
  stats_file = options[:stats_file]
72
73
 
73
-
74
- #reference = ARGV[6]
75
-
76
-
77
74
 
78
75
  min = chunk * chunk_size
79
76
  max = min + chunk_size
80
77
 
81
78
 
82
- #AvocetS
83
79
  parental_1=options[:parent_1]
84
- #AvocetS (Yr15)
85
80
  parental_2=options[:parent_2]
86
81
 
87
82
 
@@ -89,7 +84,7 @@ bulk_1 = options[:bulk_1]
89
84
  bulk_2 = options[:bulk_2]
90
85
 
91
86
 
92
- fasta_db = Bio::DB::Fasta::FastaFile.new(reference)
87
+ fasta_db = Bio::DB::Fasta::FastaFile.new({:fasta=>reference})
93
88
  fasta_db.load_fai_entries
94
89
 
95
90
 
@@ -14,7 +14,7 @@ require path
14
14
 
15
15
  puts ARGV[0]
16
16
 
17
- fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
17
+ fasta_db = Bio::DB::Fasta::FastaFile.new( {:fasta=>ARGV[0]})
18
18
  fasta_db.load_fai_entries
19
19
  bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
20
20
 
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+
4
+ found_cointigs = Set.new
5
+ Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model, :chunk=>chunk, :total_chunks=>}) do |aln|
6
+ if aln.identity > min_identity
7
+ exo_f.puts aln.line
8
+ unless found_cointigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
9
+ found_cointigs.add(aln.target_id)
10
+ entry = fasta_file.index.region_for_entry(aln.target_id)
11
+ raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
12
+ region = entry.get_full_region
13
+ seq = fasta_file.fetch_sequence(region)
14
+ contigs_f.puts(">#{aln.target_id}\n#{seq}")
15
+ end
16
+ end
17
+ end
@@ -43,7 +43,7 @@ snps = Array.new
43
43
  #0. Load the fasta index
44
44
  fasta_reference_db = nil
45
45
  if fasta_reference
46
- fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
46
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
47
47
  fasta_reference_db.load_fai_entries
48
48
  p "Fasta reference: #{fasta_reference}"
49
49
  end
@@ -99,7 +99,7 @@ Dir.foreach(path_to_contigs) do |filename |
99
99
  puts filename
100
100
  target="#{path_to_contigs}/#{filename}"
101
101
 
102
- fasta_file = Bio::DB::Fasta::FastaFile.new(target)
102
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
103
103
  fasta_file.load_fai_entries
104
104
  Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
105
105
  if aln.identity > min_identity
@@ -82,7 +82,7 @@ snps = Array.new
82
82
  #0. Load the fasta index
83
83
  fasta_reference_db = nil
84
84
  if reference_file
85
- fasta_reference_db = Bio::DB::Fasta::FastaFile.new(reference_file)
85
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>reference_file})
86
86
  fasta_reference_db.load_fai_entries
87
87
  p "Fasta reference: #{reference_file}"
88
88
  end
@@ -87,7 +87,7 @@ snps = Array.new
87
87
  #0. Load the fasta index
88
88
  fasta_reference_db = nil
89
89
  if fasta_reference
90
- fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta_reference)
90
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>fasta_reference})
91
91
  fasta_reference_db.load_fai_entries
92
92
  p "Fasta reference: #{fasta_reference}"
93
93
  end
@@ -141,7 +141,7 @@ filename=path_to_contigs
141
141
  puts filename
142
142
  target=filename
143
143
 
144
- fasta_file = Bio::DB::Fasta::FastaFile.new(target)
144
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
145
145
  fasta_file.load_fai_entries
146
146
 
147
147
  found_cointigs = Set.new
@@ -15,7 +15,7 @@ require path
15
15
 
16
16
 
17
17
 
18
- fasta_db = Bio::DB::Fasta::FastaFile.new( ARGV[0])
18
+ fasta_db = Bio::DB::Fasta::FastaFile.new(:fasta=>ARGV[0])
19
19
  fasta_db.load_fai_entries
20
20
  bam1 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[1]})
21
21
  bam2 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
@@ -23,7 +23,7 @@ bam2 = Bio::DB::Sam.new({:fasta=>ARGV[0], :bam=>ARGV[2]})
23
23
 
24
24
  output_prefix = ARGV[3]
25
25
 
26
- block_size=300
26
+ block_size=1000
27
27
 
28
28
  min_cov = ARGV[4].to_i ? ARGV[4].to_i : 10
29
29
  chunk = ARGV[5].to_i
@@ -54,6 +54,38 @@ fasta_db.index.entries.each do | r |
54
54
 
55
55
 
56
56
  begin
57
+ <<<<<<< HEAD
58
+ reg_a = bam1.fetch_region({:region=>region, :min_cov=>min_cov, :A=>1})
59
+ reg_b = bam2.fetch_region({:region=>region, :min_cov=>min_cov, :A=>1})
60
+ cons_1 = reg_a.consensus
61
+ cons_2 = reg_b.consensus
62
+
63
+
64
+ snps_1 = cons_1.count_ambiguities
65
+ snps_2 = cons_2.count_ambiguities
66
+
67
+ called_1 = reg_a.called
68
+ called_2 = reg_b.called
69
+
70
+ snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
71
+
72
+ snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
73
+ snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
74
+ snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
75
+
76
+ hist_1[snps_per_1k_1.to_i] += 1
77
+ hist_2[snps_per_1k_2.to_i] += 1
78
+
79
+ table_file.print "#{r.id}\t#{region.size}\t"
80
+ table_file.print "#{snps_1}\t#{called_1}\t#{snps_per_1k_1}\t"
81
+ table_file.print "#{snps_2}\t#{called_2}\t#{snps_per_1k_2}\t"
82
+ table_file.print "#{snps_tot}\t#{snps_per_1k_tot}\n"
83
+ fasta_file.puts ">#{r.id}_1"
84
+ fasta_file.puts "#{cons_1}"
85
+ fasta_file.puts ">#{r.id}_2"
86
+ fasta_file.puts "#{cons_2}"
87
+
88
+ =======
57
89
 
58
90
  cons_1 = bam1.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
59
91
  cons_2 = bam2.consensus_with_ambiguities({:region=>region, :case=>true, :min_cov=>min_cov})
@@ -62,13 +94,10 @@ fasta_db.index.entries.each do | r |
62
94
  snps_1 = cons_1.count_ambiguities
63
95
  snps_2 = cons_2.count_ambiguities
64
96
 
65
- called_1 = cons_1.upper_case_count
66
- called_2 = cons_2.upper_case_count
67
-
68
97
  snps_tot = Bio::Sequence.snps_between(cons_1, cons_2)
69
98
 
70
- snps_per_1k_1 = (block_size * snps_1.to_f ) / called_1
71
- snps_per_1k_2 = (block_size * snps_2.to_f ) / called_2
99
+ snps_per_1k_1 = (block_size * snps_1.to_f ) / region.size
100
+ snps_per_1k_2 = (block_size * snps_2.to_f ) / region.size
72
101
  snps_per_1k_tot = (block_size * snps_tot.to_f ) / region.size
73
102
 
74
103
  hist_1[snps_per_1k_1.to_i] += 1
@@ -83,6 +112,7 @@ fasta_db.index.entries.each do | r |
83
112
  fasta_file.puts ">#{r.id}_2"
84
113
  fasta_file.puts "#{cons_2}"
85
114
  end
115
+ >>>>>>> 1b60bd09fdb1b087d6cb53c643ff36e536efe4a3
86
116
  rescue Exception => e
87
117
  $stderr.puts "Unable to process #{region}: #{e.to_s}"
88
118
  end
@@ -2,32 +2,35 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.1.0 ruby lib
5
+ # stub: bio-polyploid-tools 0.2.3 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools"
9
- s.version = "0.1.0"
9
+ s.version = "0.2.3"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez"]
14
- s.date = "2014-03-31"
14
+ s.date = "2014-04-27"
15
15
  s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
16
16
  s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
17
- s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
17
+ s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "find_best_exonerate.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snps_between_bams.rb"]
18
18
  s.extra_rdoc_files = [
19
- "README"
19
+ "README",
20
+ "README.md"
20
21
  ]
21
22
  s.files = [
22
23
  "Gemfile",
23
24
  "Gemfile.lock",
24
25
  "README",
26
+ "README.md",
25
27
  "Rakefile",
26
28
  "VERSION",
27
29
  "bin/bfr.rb",
28
30
  "bin/count_variations.rb",
29
31
  "bin/filter_blat_by_target_coverage.rb",
30
32
  "bin/find_best_blat_hit.rb",
33
+ "bin/find_best_exonerate.rb",
31
34
  "bin/hexaploid_primers.rb",
32
35
  "bin/homokaryot_primers.rb",
33
36
  "bin/map_markers_to_contigs.rb",
@@ -78,9 +81,7 @@ Gem::Specification.new do |s|
78
81
  "lib/bio/PolyploidTools/PrimerRegion.rb",
79
82
  "lib/bio/PolyploidTools/SNP.rb",
80
83
  "lib/bio/PolyploidTools/SNPSequence.rb",
81
- "lib/bio/SAMToolsExtensions.rb",
82
84
  "lib/bio/db/exonerate.rb",
83
- "lib/bio/db/fastadb.rb",
84
85
  "lib/bio/db/primer3.rb",
85
86
  "lib/bioruby-polyploid-tools.rb",
86
87
  "test/data/BS00068396_51.fa",
@@ -119,21 +120,24 @@ Gem::Specification.new do |s|
119
120
  s.specification_version = 4
120
121
 
121
122
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
122
- s.add_runtime_dependency(%q<bio>, ["= 1.4.2"])
123
- s.add_runtime_dependency(%q<bio-samtools>, ["= 0.6.2"])
123
+ s.add_runtime_dependency(%q<bio>, [">= 1.4.3"])
124
+ s.add_runtime_dependency(%q<bio-samtools>, [">= 2.0.3"])
124
125
  s.add_runtime_dependency(%q<rake>, [">= 0"])
125
126
  s.add_runtime_dependency(%q<jeweler>, [">= 0"])
127
+ s.add_runtime_dependency(%q<systemu>, [">= 2.5.2"])
126
128
  else
127
- s.add_dependency(%q<bio>, ["= 1.4.2"])
128
- s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
129
+ s.add_dependency(%q<bio>, [">= 1.4.3"])
130
+ s.add_dependency(%q<bio-samtools>, [">= 2.0.3"])
129
131
  s.add_dependency(%q<rake>, [">= 0"])
130
132
  s.add_dependency(%q<jeweler>, [">= 0"])
133
+ s.add_dependency(%q<systemu>, [">= 2.5.2"])
131
134
  end
132
135
  else
133
- s.add_dependency(%q<bio>, ["= 1.4.2"])
134
- s.add_dependency(%q<bio-samtools>, ["= 0.6.2"])
136
+ s.add_dependency(%q<bio>, [">= 1.4.3"])
137
+ s.add_dependency(%q<bio-samtools>, [">= 2.0.3"])
135
138
  s.add_dependency(%q<rake>, [">= 0"])
136
139
  s.add_dependency(%q<jeweler>, [">= 0"])
140
+ s.add_dependency(%q<systemu>, [">= 2.5.2"])
137
141
  end
138
142
  end
139
143
 
@@ -5,252 +5,16 @@ require 'rubygems'
5
5
  #require 'bio/db/vcf'
6
6
  require 'pathname'
7
7
  #require_relative 'BIOExtensions.rb'
8
- require_relative 'db/fastadb.rb'
8
+
9
9
 
10
10
  require 'bio'
11
+ require 'bio-samtools'
12
+
11
13
  require "set"
12
14
  require 'systemu'
13
15
  require 'json'
14
16
  #require 'strmask'
15
17
 
16
- =begin
17
-
18
- Extends the methods to be able to calculate the BFR and a consensus from the pileup
19
-
20
- =end
21
-
22
- class Bio::DB::Pileup
23
-
24
- #attr_accessor :minumum_ratio_for_iup_consensus
25
- #@minumum_ratio_for_iup_consensus = 0.20
26
-
27
- #Returns a hash with the count of bases
28
-
29
- def bases
30
- return @bases if @bases
31
- @bases = self.non_refs
32
- #puts self.ref_count
33
- @bases[self.ref_base.upcase.to_sym] = self.ref_count
34
- @bases
35
- end
36
-
37
- def base_coverage
38
- total = 0
39
- @bases.each do |k,v|
40
- total += v
41
- end
42
- total
43
- end
44
-
45
- def base_ratios
46
- return @base_ratios if @base_ratios
47
- bases = self.bases
48
- @base_ratios = Hash.new
49
- bases.each do |k,v|
50
- @base_ratios[k] = v.to_f/self.base_coverage.to_f
51
- end
52
- @base_ratios
53
- end
54
-
55
- # returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
56
- def consensus_iuap(minumum_ratio_for_iup_consensus)
57
- minumum_ratio_for_iup_consensus
58
- if @consensus_iuap.nil?
59
- @consensus_iuap = self.ref_base.downcase
60
- bases = self.bases
61
- tmp = String.new
62
- bases.each do |k,v|
63
- tmp << k[0].to_s if v/self.coverage > minumum_ratio_for_iup_consensus
64
- end
65
- if tmp.length > 0
66
- @consensus_iuap = Bio::NucleicAcid.to_IUAPC(tmp)
67
- end
68
- end
69
- @consensus_iuap
70
- end
71
- end
72
-
73
- class Bio::DB::Fasta::Region
74
- attr_accessor :pileup, :average_coverage, :snps, :reference, :base_ratios, :consensus, :coverages, :bases
75
-
76
- #TODO: Debug, as it hasnt been tested in the actual code.
77
- def base_ratios_for_base(base)
78
- @all_ratios = Hash.new unless @all_ratios
79
- unless @all_ratios[base]
80
- ratios = Array.new
81
- for i in (0..region.size-1)
82
- ratios << @base_ratios[i][base]
83
- end
84
- @all_ratios[base] = ratios
85
- end
86
- @all_ratios[base]
87
- end
88
-
89
- end
90
-
91
- class Bio::DB::Sam::SAMException < RuntimeError
92
-
93
- end
94
-
95
- class Bio::DB::Sam
96
-
97
-
98
- attr_accessor :minumum_ratio_for_iup_consensus
99
- attr_reader :cached_regions
100
- #attr_accessor :pileup_cache
101
- @minumum_ratio_for_iup_consensus = 0.20
102
-
103
-
104
- #Same as mpilup, but it caches the pileup, so if you want several operations on the same set of regions
105
- #the pile for different operations, it won't execute the mpilup command several times
106
- #Whenever you finish using a region, call mpileup_clear_cache to free the cache
107
- #The argument Region is required, as it will be the key for the underlying hash.
108
- #We asume that the options are constant. If they are not, the cache mechanism may not be consistent.
109
- #
110
- #TODO: It may be good to load partially the pileup
111
- def mpileup_cached (opts={})
112
- raise SAMException.new(), "A region must be provided" unless opts[:r] or opts[:region]
113
- @pileup_cache = Hash.new unless @pileup_cache
114
- @cached_regions = Hash.new unless @cached_regions
115
-
116
- region = opts[:r] ? opts[:r] : opts[:region]
117
- opts[:r] = "'#{region.to_s}'"
118
- opts[:region] = "'#{region.to_s}'"
119
- opts[:A] = true
120
- #reg = region.class == Bio::DB::Fasta::Region ? region : Bio::DB::Fasta::Region.parse_region(region.to_s)
121
-
122
- unless @cached_regions[region.to_s]
123
- @cached_regions[region.to_s] = Bio::DB::Fasta::Region.parse_region(region.to_s)
124
- tmp = Array.new
125
- @cached_regions[region.to_s].pileup = tmp
126
- #puts "Loading #{region.to_s}"
127
- mpileup(opts) do | pile |
128
- # puts pile
129
- tmp << pile
130
- yield pile
131
- end
132
- else
133
- # puts "Loaded, reruning #{region.to_s}"
134
- @cached_regions.pileup[region.to_s] .each do | pile |
135
- yield pile
136
- end
137
- end
138
- end
139
-
140
- #Clears the pileup cache. If a region is passed as argument, just the specified region is removed
141
- #If no region is passed, the hash is emptied
142
- def mpileup_clear_cache (region)
143
- return unless @cached_regions
144
- if region
145
- @cached_regions[region.to_s] = nil
146
- else
147
- @cached_regions.clear
148
- end
149
- end
150
-
151
- #Gets the coverage of a region from a pileup.
152
- def average_coverage_from_pileup(opts={})
153
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
154
- region = opts[:region]
155
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
156
- @cached_regions[region].average_coverage
157
- end
158
-
159
- #
160
- def coverages_from_pileup(opts={})
161
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
162
- region = opts[:region]
163
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
164
- @cached_regions[region].coverages
165
- end
166
-
167
- def consensus_with_ambiguities(opts={})
168
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
169
- region = opts[:region]
170
- # p "consensus with ambiguities for: " << opts[:region]
171
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
172
- @cached_regions[region].consensus
173
- end
174
-
175
- def calculate_stats_from_pile(opts={})
176
- min_cov = opts[:min_cov] ? opts[:min_cov] : 20
177
-
178
-
179
- opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
180
- region = opts[:region]
181
- reference = self.fetch_reference(region.entry, region.start, region.end).downcase
182
- # p "calculationg from pile..." << region.to_s
183
- base_ratios = Array.new(region.size, BASE_COUNT_ZERO)
184
- bases = Array.new(region.size, BASE_COUNT_ZERO)
185
- coverages = Array.new(region.size, 0)
186
- total_cov = 0
187
-
188
- self.mpileup_cached(:region=>"#{region.to_s}") do | pile |
189
- #puts pile
190
- #puts pile.coverage
191
- if pile.coverage > min_cov
192
- base_ratios[pile.pos - region.start ] = pile.base_ratios
193
- reference[pile.pos - region.start ] = pile.consensus_iuap(0.20)
194
- coverages[pile.pos - region.start ] = pile.coverage.to_i
195
- bases[pile.pos - region.start ] = pile.bases
196
- end
197
- total_cov += pile.coverage
198
- end
199
-
200
- region = @cached_regions[region.to_s]
201
- region.coverages = coverages
202
- region.base_ratios = base_ratios
203
- region.consensus = reference
204
-
205
- region.average_coverage = total_cov.to_f/region.size.to_f
206
- region.bases = bases
207
- region
208
- end
209
-
210
-
211
-
212
- BASE_COUNT_ZERO = {:A => 0, :C => 0, :G => 0, :T => 0}
213
-
214
- #Gets an array with the proportions of the bases in the region. If there is no coverage, a
215
- def base_ratios_in_region(opts={})
216
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
217
- region = opts[:region]
218
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
219
- @cached_regions[region].base_ratios
220
- end
221
-
222
- #Gets an array with the bsaes count in the region. If there is no coverage, a
223
- def bases_in_region(opts={})
224
- opts[:region] = opts[:region].to_s if opts[:region] .class == Bio::DB::Fasta::Region
225
- region = opts[:region]
226
- calculate_stats_from_pile(opts) if @cached_regions == nil or @cached_regions[region] == nil
227
- @cached_regions[region].bases
228
- end
229
-
230
-
231
-
232
- def extract_reads(opts={})
233
- opts[:region] = Bio::DB::Fasta::Region.parse_region( opts[:region] .to_s) unless opts[:region].class == Bio::DB::Fasta::Region
234
- fastq_filename = opts[:fastq]
235
- fastq_file = opts[:fastq_file]
236
-
237
- out = $stdout
238
-
239
- print_fastq = Proc.new do |alignment|
240
- out.puts "@#{alignment.qname}"
241
- out.puts "#{alignment.seq}"
242
- out.puts "+#{alignment.qname}"
243
- out.puts "#{alignment.qual}"
244
- end
245
-
246
- fetch_with_function(chromosome, qstart, qstart+len, print_fastq)
247
-
248
-
249
- end
250
-
251
-
252
-
253
- end
254
18
 
255
19
  module Bio::BFRTools
256
20
 
@@ -267,7 +31,7 @@ module Bio::BFRTools
267
31
  BASES = [:A, :C, :G, :T]
268
32
  #Sets the reference file
269
33
  def reference(path)
270
- @reference_db = Bio::DB::Fasta::FastaFile.new(path)
34
+ @reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>path})
271
35
  @reference_path = path
272
36
  end
273
37
 
@@ -350,33 +114,35 @@ module Bio::BFRTools
350
114
  self.entry = reg.entry
351
115
  self.start = reg.start
352
116
  self.end = reg.end
353
-
117
+ opts[:region] = reg
354
118
  @container = opts[:container]
355
119
 
356
- parental_1_sam = @container.parental_1_sam
357
- parental_2_sam = @container.parental_2_sam
358
- bulk_1_sam = @container.bulk_1_sam
359
- bulk_2_sam = @container.bulk_2_sam
360
-
361
- @parental_1_sequence = parental_1_sam.consensus_with_ambiguities(opts)
362
- @parental_2_sequence = parental_2_sam.consensus_with_ambiguities(opts)
120
+ parental_1_reg = @container.parental_1_sam.fetch_region(opts)
121
+ parental_2_reg = @container.parental_2_sam.fetch_region(opts)
122
+ bulk_1_reg = @container.bulk_1_sam.fetch_region(opts)
123
+ bulk_2_reg = @container.bulk_2_sam.fetch_region(opts)
124
+
125
+
363
126
 
364
- @bulk_1_sequence = bulk_1_sam.consensus_with_ambiguities(opts)
365
- @bulk_2_sequence = bulk_2_sam.consensus_with_ambiguities(opts)
127
+ @parental_1_sequence = parental_1_reg.consensus
128
+ @parental_2_sequence = parental_2_reg.consensus
129
+
130
+ @bulk_1_sequence = bulk_1_reg.consensus
131
+ @bulk_2_sequence = bulk_2_reg.consensus
366
132
 
367
133
  @snp_count = Container.snps_between( @parental_1_sequence , @parental_2_sequence )
368
134
 
369
- @ratios_bulk_1 = bulk_1_sam.base_ratios_in_region(opts)
370
- @ratios_bulk_2 = bulk_2_sam.base_ratios_in_region(opts)
135
+ @ratios_bulk_1 = bulk_1_reg.base_ratios
136
+ @ratios_bulk_2 = bulk_2_reg.base_ratios
371
137
 
372
- @bases_bulk_1 = bulk_1_sam.bases_in_region(opts)
373
- @bases_bulk_2 = bulk_2_sam.bases_in_region(opts)
138
+ @bases_bulk_1 = bulk_1_reg.bases
139
+ @bases_bulk_2 = bulk_2_reg.bases
374
140
 
375
- @avg_cov_bulk_1 = bulk_1_sam.average_coverage_from_pileup(opts)
376
- @avg_cov_bulk_2 = bulk_2_sam.average_coverage_from_pileup(opts)
141
+ @avg_cov_bulk_1 = bulk_1_reg.average_coverage
142
+ @avg_cov_bulk_2 = bulk_2_reg.average_coverage
377
143
 
378
- @coverages_1 = bulk_1_sam.coverages_from_pileup(opts)
379
- @coverages_2 = bulk_2_sam.coverages_from_pileup(opts)
144
+ @coverages_1 = bulk_1_reg.coverages
145
+ @coverages_2 = bulk_2_reg.coverages
380
146
 
381
147
  end
382
148
 
@@ -472,7 +238,7 @@ module Bio::BFRTools
472
238
  raise BFRToolsException.new ("The reference for the line should be :first or :second, but was " + reference.to_s )
473
239
  end
474
240
 
475
- relative_position = self.start + position + 1
241
+ relative_position = self.start + position
476
242
 
477
243
  bfr = bfrs[reference][base][position]
478
244
  cov_1 = @coverages_1[position]
@@ -622,7 +388,7 @@ module Bio::BFRTools
622
388
  end
623
389
 
624
390
  def process_region(opts={})
625
- opts = { :min_cov=>20, :max_snp_1kbp => 10 }.merge!(opts)
391
+ opts = { :min_cov=>20, :max_snp_1kbp => 10, :max_per=>0.20 }.merge!(opts)
626
392
 
627
393
  @proccesed_regions += 1
628
394
  output = opts[:output_file] ? opts[:output_file] : $stdout
@@ -675,7 +441,7 @@ module Bio::BFRTools
675
441
 
676
442
 
677
443
  for informative in info
678
- line = region.get_bfr_line(i+1, base, informative)
444
+ line = region.get_bfr_line(i, base, informative)
679
445
  output.print line , "\n"
680
446
  end
681
447
  end