bio-polyploid-tools 0.9.9 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 48bdfc00532bec29d3ca2cd3214e22f8907ef45abb3bdf21802be60654fd188a
4
- data.tar.gz: 8ade77f7455c3a38d16f011642ecc69325c4d9bcd650c7f440d524e870c702ba
2
+ SHA1:
3
+ metadata.gz: 73e03b0d4cf0869a9e1a6b7d146bbb0e3ed4c91e
4
+ data.tar.gz: 64e0efc76fc1cd424b3febf1f4f298e202619a94
5
5
  SHA512:
6
- metadata.gz: e1fa868fc0d1f2d249a40c1a4f351d7bc2918d17303868e84342e0cc6a31656160af459efed2697b9b09bf0e47cf987d4ed3404d9ebf101e91f0b3d420f9c717
7
- data.tar.gz: 66839a8ae1794b510a98c8c69b3819f8eebee58518907a4169955f12a7d5502c2438e8a0449dea9b58671ee177ba519384d35327a1d2e7adec957a0bd8f005c9
6
+ metadata.gz: cd81ec303480157a64ac9af123bc331381cc825cfac4e557894c35ba3ab815f15d95d03b8d5e6f7921a8ff950cebefdb9a6182b81a274568d8dca9f1d0aa13db
7
+ data.tar.gz: 4104dc656d8b29604fc2293731f75a906c9d23941138ebb70e7b7a2a84a456a459e30be23530ca4c16d6ebdad03ca9dcbca4bd1e1ea7e068a734dee1c350e1b1
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.9.9
1
+ 0.9.10
@@ -0,0 +1,241 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bio'
3
+ require 'rubygems'
4
+ require 'pathname'
5
+ require 'bio-samtools'
6
+ require 'optparse'
7
+ require 'set'
8
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
9
+ $: << File.expand_path('.')
10
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
11
+ require path
12
+
13
+ options = {}
14
+ options[:min_identity] = 90
15
+ options[:filter_best] = false
16
+ options[:debug] = false
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: marler_to_vcf.rb [options]"
20
+
21
+ opts.on("-c", "--contigs FILE", "File with contigs to use as database") do |o|
22
+ options[:path_to_contigs] = o
23
+ end
24
+
25
+ opts.on("-m", "--marker_list FILE", "File with the list of markers to search from") do |o|
26
+ options[:marker_list] = o
27
+ end
28
+
29
+ opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
30
+ options[:filter_best] = false
31
+ end
32
+
33
+ opts.on("-D", "--debug", "Validate that the flanking sequences are correct") do
34
+ options[:debug] = true
35
+ end
36
+
37
+ opts.on("-i", "--min_identity INT", "Minimum identity to consider a hit (default 90)") do |o|
38
+ options[:min_identity] = o.to_i
39
+ end
40
+
41
+ opts.on("-o", "--output FOLDER", "Output folder") do |o|
42
+ options[:output_folder] = o
43
+ end
44
+
45
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
46
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
47
+ end
48
+
49
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: blast") do |o|
50
+ raise "Invalid aligner" unless o == "exonerate" or o == "blast"
51
+ options[:aligner] = o.to_sym
52
+ end
53
+
54
+ opts.on("-d", "--database PREFIX", "Path to the blast database. Only used if the aligner is blast. The default is the name of the contigs file without extension.") do |o|
55
+ options[:database] = o
56
+ end
57
+
58
+ end.parse!
59
+ options[:database] = options[:path_to_contigs]
60
+ p options
61
+ p ARGV
62
+
63
+
64
+ path_to_contigs=options[:path_to_contigs]
65
+
66
+ original_name="A"
67
+ snp_in="B"
68
+
69
+ fasta_reference = nil
70
+ test_file=options[:marker_list]
71
+
72
+ output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
73
+ output_folder= options[:output_folder] if options[:output_folder]
74
+ Dir.mkdir(output_folder)
75
+ #T
76
+ temp_fasta_query="#{output_folder}/to_align.fa"
77
+ temp_contigs="#{output_folder}/contigs_tmp.fa"
78
+ exonerate_file="#{output_folder}/exonerate_tmp.tab"
79
+ vcf_file="#{output_folder}/snp_positions.vcf"
80
+
81
+ min_identity= options[:min_identity]
82
+
83
+ @status_file="#{output_folder}/status.txt"
84
+
85
+
86
+ def write_status(status)
87
+ f=File.open(@status_file, "a")
88
+ f.puts "#{Time.now.to_s},#{status}"
89
+ f.close
90
+ end
91
+
92
+
93
+ snps = Hash.new
94
+
95
+ fasta_reference_db=nil
96
+
97
+ #if options[:debug]
98
+ write_status "Loading Reference"
99
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>path_to_contigs})
100
+ fasta_reference_db.load_fai_entries
101
+ write_status "Fasta reference: #{fasta_reference}"
102
+ #end
103
+
104
+ #1. Read all the SNP files
105
+ #chromosome = nil
106
+ write_status "Reading SNPs"
107
+
108
+ File.open(test_file) do | f |
109
+ f.each_line do | line |
110
+ snp = Bio::PolyploidTools::SNPSequence.parse(line)
111
+ snp.genomes_count = options[:genomes_count]
112
+ snp.snp_in = snp_in
113
+ snp.original_name = original_name
114
+ if snp.position
115
+ snps[snp.gene] = snp
116
+ else
117
+ $stderr.puts "ERROR: #{snp.gene} doesn't contain a SNP"
118
+ end
119
+ end
120
+ end
121
+
122
+ #2. Generate all the fasta files
123
+ write_status "Writing sequences to align"
124
+ written_seqs = Set.new
125
+ file = File.open(temp_fasta_query, "w")
126
+ snps.each_pair do |k,snp|
127
+ unless written_seqs.include?(snp.gene)
128
+ written_seqs << snp.gene
129
+ file.puts snp.to_fasta
130
+ end
131
+ end
132
+ file.close
133
+
134
+
135
+ #3. Run exonerate on each of the possible chromosomes for the SNP
136
+ #puts chromosome
137
+ #chr_group = chromosome[0]
138
+ write_status "Searching markers in genome"
139
+ exo_f = File.open(exonerate_file, "w")
140
+ contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
141
+ filename=path_to_contigs
142
+ #puts filename
143
+ target=filename
144
+
145
+ fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
146
+ fasta_file.load_fai_entries
147
+ found_contigs = Set.new
148
+
149
+ def do_align(aln, exo_f, found_contigs, min_identity,fasta_file,options)
150
+ if aln.identity > min_identity
151
+ exo_f.puts aln.line
152
+ end
153
+ end
154
+
155
+ Bio::DB::Blast.align({:query=>temp_fasta_query, :target=>options[:database]}) do |aln|
156
+ do_align(aln, exo_f, found_contigs,min_identity, fasta_file,options)
157
+ end
158
+
159
+ exo_f.close()
160
+
161
+ def print_positions(min_identity:90, filter_best:false, exonerate_filename:"test.exo", snps:{}, reference:nil, out:$stdout)
162
+ marker_count=Hash.new { |h, k| h[k] = 1 }
163
+ File.open(exonerate_filename) do |f|
164
+ f.each_line do | line |
165
+ record = Bio::DB::Exonerate::Alignment.parse_custom(line)
166
+ next unless record and record.identity >= min_identity
167
+ snp = snps[record.query_id]
168
+ next unless snp != nil and snp.position.between?( (record.query_start + 1) , record.query_end)
169
+ begin
170
+
171
+ position = record.query_position_on_target(snp.position)
172
+ q_strand = record.query_strand
173
+ t_strand = record.target_strand
174
+ template = snp.template_sequence
175
+
176
+ vulgar = record.exon_on_gene_position(snp.position)
177
+ tr = vulgar.target_region
178
+ qr = vulgar.query_region
179
+ template_pre = template[qr.start - 1 .. snp.position - 1 ]
180
+ tr.orientation == :forward ? tr.end = position : tr.start = position
181
+ region = tr
182
+ target_seq = reference.fetch_sequence(region)
183
+ target_seq[-1] = target_seq[-1].upcase
184
+ ref_base = target_seq[-1]
185
+ ma = ref_base
186
+ alt_base = [snp.snp, snp.original].join(",")
187
+
188
+ if snp.original == ref_base
189
+ alt_base = snp.snp
190
+ elsif snp.snp == ref_base
191
+ alt_base = snp.original
192
+ end
193
+
194
+ if record.target_strand == :reverse
195
+ alt_base = Bio::Sequence::NA.new(alt_base)
196
+ ref_base = Bio::Sequence::NA.new(ref_base)
197
+ alt_base.complement!.upcase!
198
+ ref_base.complement!.upcase!
199
+ end
200
+
201
+ info = ["OR=#{record.target_strand}"]
202
+ info << "SC=#{record.score}"
203
+ info << "PI=#{record.pi}"
204
+ info << "MA=#{ma}"
205
+ info << "TS=#{target_seq}"
206
+ vcf_line="#{record.target_id}\t#{position}\t#{record.query_id}.path#{marker_count[record.query_id]}\t#{ref_base}\t#{alt_base}\t#{record.pi}\t.\t#{info.join(";")}"
207
+ #snp2 = Bio::PolyploidTools::SNP.parseVCF( vcf_line )
208
+ #snp2.setTemplateFromFastaFile(reference)
209
+ #seq2=snp2.to_polymarker_sequence(50)
210
+ #info << "PS=#{seq2}"
211
+ vcf_line="#{record.target_id}\t#{position}\t#{record.query_id}.path#{marker_count[record.query_id]}\t#{ref_base}\t#{alt_base}\t#{record.pi}\t.\t#{info.join(";")}"
212
+ out.puts(vcf_line)
213
+
214
+ marker_count[record.query_id] += 1
215
+ rescue Bio::DB::Exonerate::ExonerateException
216
+ $stderr.puts "Failed for the range #{record.query_start}-#{record.query_end} for position #{snp.position}"
217
+ end
218
+ end
219
+ end
220
+ end
221
+
222
+
223
+ write_status "Printing VCF file"
224
+ #puts snps.inspect
225
+ out = File.open(vcf_file, "w")
226
+ out.puts "##fileformat=VCFv4.2"
227
+ out.puts "##fileDate=#{Time.now.strftime("%Y%m%d")}"
228
+ out.puts "##source=#{$0}"
229
+ out.puts "##reference=file://#{options[:path_to_contigs]}"
230
+ out.puts "##INFO=<ID=OR,Number=1,Type=String,Description=\"Orientation of the alignment of the marker\">"
231
+ out.puts "##INFO=<ID=SC,Number=1,Type=Float,Description=\"Alignment score of the marker\">"
232
+ out.puts "##INFO=<ID=PI,Number=1,Type=Float,Description=\"Percentage of identity of the alignment to the marker\">"
233
+ out.puts "##INFO=<ID=PS,Number=1,Type=String,Description=\"SNP sequence for PolyMarker\">"
234
+ out.puts "##INFO=<ID=MA,Number=1,Type=String,Description=\"Allele based on the original marker sequence\">"
235
+ out.puts "##INFO=<ID=TS,Number=1,Type=String,Description=\"Target sequence before the SNP from the reference\">"
236
+ out.puts "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
237
+ print_positions(exonerate_filename:exonerate_file, min_identity:95, snps:snps, reference: fasta_reference_db, out:out)
238
+ out.close
239
+ write_status "DONE"
240
+
241
+
data/bin/polymarker.rb CHANGED
@@ -124,7 +124,7 @@ OptionParser.new do |opts|
124
124
  options[:scoring] = :het_dels
125
125
  end
126
126
 
127
- opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: exonerate") do |o|
127
+ opts.on("-A", "--aligner exonerate|blast", "Select the aligner to use. Default: #{options[:aligner]}") do |o|
128
128
  raise "Invalid aligner" unless o == "exonerate" or o == "blast"
129
129
  options[:aligner] = o.to_sym
130
130
  end
@@ -137,7 +137,7 @@ end.parse!
137
137
 
138
138
  validate_files(options)
139
139
 
140
- options[:database] = options[:path_to_contigs] unless options[:database]
140
+ options[:database] = options[:path_to_contigs] unless options[:database]
141
141
 
142
142
 
143
143
  if options[:primer_3_preferences][:primer_product_size_range]
@@ -169,7 +169,7 @@ test_file=options[:mutant_list] if options[:mutant_list]
169
169
  fasta_reference = options[:reference]
170
170
  output_folder="#{test_file}_primer_design_#{Time.now.strftime('%Y%m%d-%H%M%S')}"
171
171
  output_folder= options[:output_folder] if options[:output_folder]
172
- Dir.mkdir(output_folder)
172
+ Dir.mkdir(output_folder) unless Dir.exist?(output_folder)
173
173
  #TODO Make this tmp files
174
174
  temp_fasta_query="#{output_folder}/to_align.fa"
175
175
  temp_contigs="#{output_folder}/contigs_tmp.fa"
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+
5
+ require 'csv'
6
+ require 'bio'
7
+ require 'bio-samtools'
8
+
9
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
10
+ $: << File.expand_path('.')
11
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
12
+ require path
13
+
14
+ options = {}
15
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene");
16
+
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = "Usage: polymarker.rb [options]"
20
+
21
+ opts.on("-c", "--reference FILE", "File with genome reference to use as database") do |o|
22
+ options[:path_to_contigs] = o
23
+ end
24
+
25
+ opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
26
+ tmp_str = o
27
+ arr = o.split(",")
28
+ if arr.size == 2
29
+ options[:arm_selection] = lambda do |contig_name|
30
+ separator, field = arr
31
+ field = field.to_i
32
+ ret = contig_name.split(separator)[field]
33
+ return ret
34
+ end
35
+ else
36
+ options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
37
+ end
38
+ end
39
+
40
+
41
+ end.parse!
42
+
43
+ def parseVCFheader(head_line="")
44
+ ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
45
+
46
+ m=/##INFO=<ID=(.+),Number=(.+),Type=(.+),Description="(.+)">/.match(head_line)
47
+ {:id=>m[1],:number=>m[2],:type=>m[3],:desc=>m[4]}
48
+
49
+ end
50
+
51
+
52
+
53
+
54
+ header_info = Hash.new
55
+ ref=options[:path_to_contigs]
56
+
57
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>ref})
58
+ fasta_reference_db.load_fai_entries
59
+
60
+ $stdin.each do |line|
61
+
62
+ h = nil
63
+ h = parseVCFheader(line) if line.start_with? "##INFO"
64
+
65
+ header_info[h[:id]] = h[:desc] if h
66
+ #puts header_info.inspect
67
+ next if line.start_with? "##"
68
+ if line.start_with? "#CHROM"
69
+ arr = line.split
70
+ arr = arr.drop(9)
71
+ arr2 = arr.map { |s| [s.clone().prepend('Cov'), s.clone().prepend('Hap') ]}
72
+ #header += arr2.join("\t")
73
+ #puts header
74
+ next
75
+ end
76
+ line.chomp!
77
+ #puts line
78
+ snp = Bio::PolyploidTools::SNP.parseVCF( line , options[:arm_selection])
79
+ #puts snp.inspect
80
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
81
+ puts [snp.gene, snp.chromosome ,snp.to_polymarker_sequence(100)].join(",")
82
+ end
@@ -2,19 +2,19 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.9.9 ruby lib
5
+ # stub: bio-polyploid-tools 0.9.10 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools".freeze
9
- s.version = "0.9.9"
9
+ s.version = "0.9.10"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
14
- s.date = "2018-11-21"
14
+ s.date = "2019-03-11"
15
15
  s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
16
16
  s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
17
- s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze]
17
+ s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "marker_to_vcf.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze, "vcfToPolyMarker.rb".freeze]
18
18
  s.extra_rdoc_files = [
19
19
  "README",
20
20
  "README.md"
@@ -41,6 +41,7 @@ Gem::Specification.new do |s|
41
41
  "bin/mafft_triads.rb",
42
42
  "bin/mafft_triads_promoters.rb",
43
43
  "bin/map_markers_to_contigs.rb",
44
+ "bin/marker_to_vcf.rb",
44
45
  "bin/markers_in_region.rb",
45
46
  "bin/mask_triads.rb",
46
47
  "bin/polymarker.rb",
@@ -49,6 +50,7 @@ Gem::Specification.new do |s|
49
50
  "bin/snps_between_bams.rb",
50
51
  "bin/tag_stats.rb",
51
52
  "bin/vcfLineToTable.rb",
53
+ "bin/vcfToPolyMarker.rb",
52
54
  "bio-polyploid-tools.gemspec",
53
55
  "conf/defaults.rb",
54
56
  "conf/primer3_config/dangle.dh",
@@ -183,7 +185,7 @@ Gem::Specification.new do |s|
183
185
  ]
184
186
  s.homepage = "http://github.com/tgac/bioruby-polyploid-tools".freeze
185
187
  s.licenses = ["MIT".freeze]
186
- s.rubygems_version = "2.7.7".freeze
188
+ s.rubygems_version = "2.6.14".freeze
187
189
  s.summary = "Tool to work with polyploids, NGS and molecular biology".freeze
188
190
 
189
191
  if s.respond_to? :specification_version then
@@ -1,3 +1,5 @@
1
+ require 'bio'
2
+
1
3
  class Array
2
4
  def sum
3
5
  inject(0.0) { |result, el| result + el }
@@ -9,7 +11,6 @@ class Array
9
11
  end
10
12
 
11
13
  module Bio::PolyploidTools::Mask
12
-
13
14
  def self.find_end(seqs)
14
15
  size = seqs.values[0].size
15
16
  names = seqs.keys
@@ -112,3 +113,4 @@ module Bio::PolyploidTools::Mask
112
113
  }
113
114
  end
114
115
  end
116
+
@@ -1,11 +1,12 @@
1
1
  require 'bio'
2
2
  module Bio::PolyploidTools
3
3
  class SNPException < RuntimeError
4
- end
4
+ end
5
+
5
6
  class SNP
6
-
7
7
  #GENE,ORIGINAL,POS,SNP
8
8
  attr_accessor :gene, :original, :position, :snp, :snp_in, :original_name
9
+ attr_accessor :contig
9
10
  attr_accessor :exon_list
10
11
  attr_accessor :container
11
12
  attr_accessor :flanking_size, :ideal_min, :ideal_max
@@ -20,6 +21,7 @@ module Bio::PolyploidTools
20
21
  attr_accessor :repetitive
21
22
  attr_accessor :hit_count
22
23
  attr_accessor :snp_type
24
+ attr_accessor :orientation
23
25
 
24
26
  #Format:
25
27
  #Gene_name,Original,SNP_Pos,pos,chromosome
@@ -35,28 +37,57 @@ module Bio::PolyploidTools
35
37
  snp.snp.upcase!
36
38
  snp.snp.strip!
37
39
  snp.chromosome.strip!
38
-
40
+
39
41
  snp.use_reference = false
40
42
  snp
41
43
  end
42
44
 
43
- def setTemplateFromFastaFile(fastaFile ,flanking_size = 100)
45
+ #Format:
46
+ #IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 .
47
+ def self.parseVCF(vcf_line, chr_arm_parser: Bio::PolyploidTools::ChromosomeArm.getArmSelection("first_two") )
48
+ snp = SNP.new
49
+ arr = vcf_line.split("\t")
50
+ snp.gene = arr[2]
51
+ snp.original = arr[3]
52
+ snp.position = arr[1]
53
+ snp.snp = arr[4]
54
+ snp.chromosome = chr_arm_parser.call(arr[0])
55
+ snp.contig = arr[0]
56
+ snp.position.strip!
57
+ snp.position = snp.position.to_i
58
+ snp.original.upcase!
59
+ snp.original.strip!
60
+ snp.snp.upcase!
61
+ snp.snp.strip!
62
+ snp.chromosome.strip!
63
+ snp.orientation = :forward
64
+
65
+ info = arr[7]
66
+ if info
67
+ details = info.scan(/(\w+)=([\w|.]+)/).collect { |id, value| { :id => id, :value => value }}
68
+ details.each do |e|
69
+ snp.orientation = :reverse if e[:id] == "OR" and e[:value] == "reverse"
70
+ end
71
+ end
72
+ return snp
73
+ end
74
+
75
+ def setTemplateFromFastaFile(fastaFile ,flanking_size: 100)
44
76
  reg = Bio::DB::Fasta::Region.new
45
77
  reg.entry = gene
46
78
  reg.entry = @contig if @contig
47
- #puts reg.entry
48
- #puts @contig
49
- #puts gene
50
79
  reg.start = position - flanking_size
51
- reg.end = position + flanking_size +1
80
+ reg.end = position + flanking_size + 1
52
81
  reg.orientation = :forward
53
- entry = fastaFile.index.region_for_entry(gene)
82
+ entry = fastaFile.index.region_for_entry(reg.entry)
54
83
  reg.start = 1 if reg.start < 1
55
84
  reg.end = entry.length if reg.end > entry.length
56
85
  amb = Bio::NucleicAcid.to_IUAPC("#{original}#{snp}")
57
86
  @position = @position - reg.start + 1
58
87
  @position = 1 if @position < 1
88
+ #puts "about to fetch"
59
89
  self.template_sequence = fastaFile.fetch_sequence(reg)
90
+ #puts "done fetching"
60
91
  template_sequence[position - 1] = amb
61
92
  end
62
93
 
@@ -83,15 +114,24 @@ module Bio::PolyploidTools
83
114
 
84
115
  def to_polymarker_sequence(flanking_size, total:nil)
85
116
  out = template_sequence.clone
86
- #puts "changing: #{position} #{flanking_size} len: #{total}"
87
- out[position-1] = "[#{original}/#{snp}]"
117
+ snp_seq = "[#{original}/#{snp}]"
118
+ p = position-1
119
+ if orientation == :reverse
120
+ p = out.length - p - 1
121
+ s = Bio::Sequence::NA.new(out)
122
+ s1 = Bio::Sequence::NA.new(original)
123
+ s2 = Bio::Sequence::NA.new(snp)
124
+ out = s.reverse_complement
125
+ snp_seq = "[#{s1.reverse_complement}/#{s2.reverse_complement}]"
126
+
127
+ end
128
+
129
+ out[p] = snp_seq
88
130
  start = position - flanking_size - 1
89
- #puts "Start: #{start}"
90
131
  start = 0 if start < 0
91
132
  total = flanking_size * 2 unless total
92
133
  total += 5
93
- #puts "Total: #{total}"
94
- out[start , total ]
134
+ out[start , total ].upcase
95
135
  end
96
136
 
97
137
  def snp_id_in_seq
@@ -187,7 +227,7 @@ module Bio::PolyploidTools
187
227
  self.position - self.covered_region.start
188
228
  end
189
229
 
190
- def padded_position (pos)
230
+ def padded_position(pos)
191
231
  pos + left_padding
192
232
  end
193
233
 
@@ -18,9 +18,10 @@ module Bio::PolyploidTools
18
18
  arr = reg_str.split(",")
19
19
 
20
20
  if arr.size == 3
21
- snp.gene, snp.chromosome, snp.sequence_original = reg_str.split(",")
21
+ snp.gene, snp.chromosome, snp.sequence_original = arr
22
22
  elsif arr.size == 2
23
23
  snp.gene, snp.sequence_original = arr
24
+ snp.chromosome = ""
24
25
  else
25
26
  throw SNPSequenceException.new "Need two or three fields to parse, and got #{arr.size} in #{reg_str}"
26
27
  end
@@ -51,4 +52,4 @@ module Bio::PolyploidTools
51
52
 
52
53
 
53
54
  end
54
- end
55
+ end
data/lib/bio/db/blast.rb CHANGED
@@ -79,7 +79,7 @@ module Bio::DB::Blast
79
79
  def self.align(opts={})
80
80
  target=opts[:target]
81
81
  query=opts[:query]
82
- max_target_seqs = 15
82
+ max_target_seqs = 6 #TODO: Actually add this as an argument to PolyMarker.
83
83
  max_target_seqs = opts[:max_hits] * 2 if opts[:max_hits]
84
84
  cmdline = "blastn -max_target_seqs #{max_target_seqs} -query #{query} -db #{target} -outfmt '6 qseqid qstart qend qframe sseqid sstart send sframe score pident qlen slen qseq sseq'"
85
85
 
@@ -190,6 +190,26 @@ module Bio::DB::Exonerate
190
190
  nil
191
191
  end
192
192
 
193
+ def query_position_on_target(position, base:0)
194
+ vulgar = exon_on_gene_position(position)
195
+ qr = vulgar.query_region
196
+ tr = vulgar.target_region
197
+
198
+ offset = qr.orientation == :forward ? position - qr.start + 1 : qr.end - position
199
+
200
+ #puts vulgar.to_s
201
+ #puts "SNP position: #{position}"
202
+ #puts vulgar.query_region
203
+ #puts vulgar.query_region.orientation
204
+ #puts "Offset query: #{offset}"
205
+ #puts vulgar.target_region
206
+ #puts vulgar.target_region.orientation
207
+
208
+ new_pos = tr.orientation == :forward ? offset + tr.start - 1 : tr.end - offset + 1
209
+
210
+ return new_pos
211
+ end
212
+
193
213
  def tarpostion_from_query_position(position)
194
214
  ret = nil
195
215
  vulgar_block = exon_on_gene_position(position)
@@ -206,7 +226,6 @@ module Bio::DB::Exonerate
206
226
  end
207
227
  end
208
228
 
209
-
210
229
  class Vulgar
211
230
  attr_reader :label, :query_length, :target_length, :query_start, :query_end, :target_start, :target_end, :record, :snp_in_gap
212
231
  def initialize(label, ql, tl, target_start, target_multiply, query_start, query_multiply, record)
@@ -552,7 +552,18 @@ module Bio::DB::Primer3
552
552
  #CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
553
553
  def parse_header
554
554
  #puts "Parsing header: '#{self.sequence_id}'"
555
- @snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = self.sequence_id.split(" ")
555
+ arr = self.sequence_id.split(" ")
556
+
557
+ #if arr.size == 7 This validation can be useful to get the best primers regardless of the chromosome,
558
+ #But it is commented as it will require further testing.
559
+ @snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = arr
560
+ #else
561
+ # if arr.size == 6
562
+ # @snp, @line, @type, @in, @polymorphism, @orientation = arr
563
+ # @chromosome = ""
564
+ # end
565
+ #end
566
+
556
567
  @type = @type.to_sym
557
568
  if @in
558
569
  @in = @in.to_sym == :exon
@@ -9,7 +9,9 @@ require 'json'
9
9
  #require 'bio/db/fasta'
10
10
 
11
11
  #puts "Loading all... #{Dir[File.dirname(__FILE__) + "/bio/*/*.rb"]}"
12
+ module Bio::PolyploidTools
12
13
 
14
+ end
13
15
  Dir[File.dirname(__FILE__) + "/bio/*.rb"].each {|file|
14
16
  # puts file
15
17
  require_relative file }
@@ -22,4 +24,5 @@ require_relative File.dirname(__FILE__) + "/../conf/defaults.rb"
22
24
 
23
25
 
24
26
  #require_relative "bio/BFRTools.rb"
25
- #require_relative "bio/PolyploidTools/ExonContainer.rb"
27
+ #require_relative "bio/PolyploidTools/ExonContainer.rb"
28
+
@@ -36,17 +36,50 @@ class TestSNPparsing < Test::Unit::TestCase
36
36
  assert_equal(snp.contig, "IWGSC_CSS_1AL_scaff_1455974")
37
37
  assert_equal(snp.chromosome, "1A", "The chromosome wasnt parsed: #{snp.chromosome}")
38
38
  assert_equal(snp.position, 127, "The position is not parsed: #{snp.position}")
39
- #snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = 100)
39
+ #snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
40
40
  region = fasta_reference_db.index.region_for_entry(snp.contig).get_full_region
41
41
  snp.full_sequence = fasta_reference_db.fetch_sequence(region)
42
42
 
43
- assert_equal(snp.template_sequence, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag")
44
- assert_equal(snp.sequence_original, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag")
43
+ assert_equal(snp.template_sequence, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase)
44
+ assert_equal(snp.sequence_original, "actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase)
45
45
  assert_equal(snp.position, 101)
46
46
  assert_equal(snp.original, "C")
47
47
  assert_equal(snp.snp, "T")
48
+ end
49
+
50
+ def test_vcf_line
51
+ ref=@data + "/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa"
52
+ fasta_reference_db = Bio::DB::Fasta::FastaFile.new({:fasta=>ref})
48
53
 
49
-
54
+ fasta_reference_db.load_fai_entries
55
+ vcf="IWGSC_CSS_1AL_scaff_1455974 127 test_snp C T 135.03 . "
56
+
57
+ chr_arm_parser = Bio::PolyploidTools::ChromosomeArm.getArmSelection("embl");
58
+ snp = Bio::PolyploidTools::SNP.parseVCF(vcf, chr_arm_parser: chr_arm_parser)
59
+ assert_equal(snp.gene , "test_snp", "The original name was not parsed: #{snp.gene}")
60
+ assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
61
+ assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
62
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
63
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
64
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase, snp.to_polymarker_sequence(100))
65
+ assert_equal(101,snp.position)
66
+ assert_equal("C",snp.original)
67
+ assert_equal("T",snp.snp)
68
+
69
+ vcf="IWGSC_CSS_1AL_scaff_1455974\t127\ttest_snp\tC\tT\t135.03\t.\tOR=reverse"
70
+
71
+ chr_arm_parser = Bio::PolyploidTools::ChromosomeArm.getArmSelection("embl");
72
+ snp = Bio::PolyploidTools::SNP.parseVCF(vcf, chr_arm_parser: chr_arm_parser)
73
+ assert_equal(snp.gene , "test_snp", "The original name was not parsed: #{snp.gene}")
74
+ assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
75
+ assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
76
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
77
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
78
+ assert_equal("TCTTGTACCTACCGAGTGCAGCATGCTACGTACCTTATAGCCAGAAGCCTTGACGTGGTGGATGCGGTCTCCAAAGCGCTTGTCAAGTCCGGGTACGACAA[G/A]GAGACCTGTAAGCAGCGCGTGCTCATACAGTCAGAGGACGCCCCGGTGCTTGCGGCGTTCAAGACGTTCCCCAAGTTCCAGCGGGTGCTGACGATCGAG", snp.to_polymarker_sequence(100))
79
+ assert_equal(101,snp.position)
80
+ assert_equal("C",snp.original)
81
+ assert_equal("T",snp.snp)
82
+
50
83
  end
51
84
 
52
85
  def test_reference_snp
@@ -60,9 +93,9 @@ class TestSNPparsing < Test::Unit::TestCase
60
93
  assert_equal(snp.gene , "IWGSC_CSS_1AL_scaff_1455974", "The original name was not parsed: #{snp.gene}")
61
94
  assert_equal("1A", snp.chromosome, "The chromosome wasnt parsed: #{snp.chromosome}")
62
95
  assert_equal(127, snp.position, "The position is not parsed: #{snp.position}")
63
- snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = 100)
96
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: 100)
64
97
  assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctcYttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaaga", snp.template_sequence)
65
- assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag", snp.to_polymarker_sequence(100))
98
+ assert_equal("actcgatcgtcagcacccgctggaacttggggaacgtcttgaacgccgcaagcaccggggcgtcctctgactgtatgagcacgcgctgcttacaggtctc[C/T]ttgtcgtacccggacttgacaagcgctttggagaccgcatccaccacgtcaaggcttctggctataaggtacgtagcatgctgcactcggtaggtacaag".upcase, snp.to_polymarker_sequence(100))
66
99
  assert_equal(101,snp.position)
67
100
  assert_equal("C",snp.original)
68
101
  assert_equal("T",snp.snp)
@@ -70,14 +103,14 @@ class TestSNPparsing < Test::Unit::TestCase
70
103
  flanking_size = 3
71
104
 
72
105
  snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1DL_scaff_2258883,A,12498,C,1D")
73
- snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = flanking_size)
106
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: flanking_size)
74
107
  assert_equal(4,snp.position)
75
108
  assert_equal("A",snp.original)
76
109
  assert_equal("C",snp.snp)
77
110
  assert_equal("gatM", snp.template_sequence)
78
111
 
79
112
  snp = Bio::PolyploidTools::SNP.parse("IWGSC_CSS_1BL_scaff_3810460,G,1,T,1B")
80
- snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size = flanking_size)
113
+ snp.setTemplateFromFastaFile(fasta_reference_db, flanking_size: flanking_size)
81
114
  assert_equal(1,snp.position)
82
115
  assert_equal("G",snp.original)
83
116
  assert_equal("T",snp.snp)
@@ -85,4 +118,4 @@ class TestSNPparsing < Test::Unit::TestCase
85
118
  end
86
119
 
87
120
 
88
- end
121
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-polyploid-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.9
4
+ version: 0.9.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricardo H. Ramirez-Gonzalez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-21 00:00:00.000000000 Z
11
+ date: 2019-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -127,6 +127,7 @@ executables:
127
127
  - mafft_triads.rb
128
128
  - mafft_triads_promoters.rb
129
129
  - map_markers_to_contigs.rb
130
+ - marker_to_vcf.rb
130
131
  - markers_in_region.rb
131
132
  - mask_triads.rb
132
133
  - polymarker.rb
@@ -135,6 +136,7 @@ executables:
135
136
  - snps_between_bams.rb
136
137
  - tag_stats.rb
137
138
  - vcfLineToTable.rb
139
+ - vcfToPolyMarker.rb
138
140
  extensions: []
139
141
  extra_rdoc_files:
140
142
  - README
@@ -161,6 +163,7 @@ files:
161
163
  - bin/mafft_triads.rb
162
164
  - bin/mafft_triads_promoters.rb
163
165
  - bin/map_markers_to_contigs.rb
166
+ - bin/marker_to_vcf.rb
164
167
  - bin/markers_in_region.rb
165
168
  - bin/mask_triads.rb
166
169
  - bin/polymarker.rb
@@ -169,6 +172,7 @@ files:
169
172
  - bin/snps_between_bams.rb
170
173
  - bin/tag_stats.rb
171
174
  - bin/vcfLineToTable.rb
175
+ - bin/vcfToPolyMarker.rb
172
176
  - bio-polyploid-tools.gemspec
173
177
  - conf/defaults.rb
174
178
  - conf/primer3_config/dangle.dh
@@ -320,7 +324,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
320
324
  version: '0'
321
325
  requirements: []
322
326
  rubyforge_project:
323
- rubygems_version: 2.7.7
327
+ rubygems_version: 2.6.14
324
328
  signing_key:
325
329
  specification_version: 4
326
330
  summary: Tool to work with polyploids, NGS and molecular biology