bio-polyploid-tools 0.7.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +17 -0
- data/Gemfile +10 -7
- data/README.md +44 -0
- data/Rakefile +14 -14
- data/VERSION +1 -1
- data/bin/bfr.rb +2 -2
- data/bin/blast_triads.rb +166 -0
- data/bin/blast_triads_promoters.rb +192 -0
- data/bin/find_homoeologue_variations.rb +385 -0
- data/bin/get_longest_hsp_blastx_triads.rb +66 -0
- data/bin/hexaploid_primers.rb +2 -2
- data/bin/homokaryot_primers.rb +2 -2
- data/bin/mafft_triads.rb +120 -0
- data/bin/mafft_triads_promoters.rb +403 -0
- data/bin/polymarker.rb +73 -17
- data/bin/polymarker_capillary.rb +416 -0
- data/bin/snp_position_to_polymarker.rb +5 -3
- data/bin/snps_between_bams.rb +0 -29
- data/bin/vcfLineToTable.rb +56 -0
- data/bio-polyploid-tools.gemspec +74 -32
- data/lib/bio/BFRTools.rb +1 -0
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +2 -6
- data/lib/bio/PolyploidTools/ExonContainer.rb +31 -8
- data/lib/bio/PolyploidTools/NoSNPSequence.rb +286 -0
- data/lib/bio/PolyploidTools/PrimerRegion.rb +9 -1
- data/lib/bio/PolyploidTools/SNP.rb +58 -18
- data/lib/bio/PolyploidTools/SNPMutant.rb +5 -3
- data/lib/bio/db/blast.rb +112 -0
- data/lib/bio/db/exonerate.rb +4 -5
- data/lib/bio/db/primer3.rb +83 -14
- data/test/data/BS00068396_51_blast.tab +4 -0
- data/test/data/BS00068396_51_contigs.nhr +0 -0
- data/test/data/BS00068396_51_contigs.nin +0 -0
- data/test/data/BS00068396_51_contigs.nsq +0 -0
- data/test/data/BS00068396_51_for_polymarker.fa +1 -0
- data/test/data/IWGSC_CSS_1AL_scaff_1455974_aln_contigs.fa.fai +11 -0
- data/test/data/S22380157.vcf +67 -0
- data/test/data/S58861868/LIB1716.bam +0 -0
- data/test/data/S58861868/LIB1716.sam +651 -0
- data/test/data/S58861868/LIB1719.bam +0 -0
- data/test/data/S58861868/LIB1719.sam +805 -0
- data/test/data/S58861868/LIB1721.bam +0 -0
- data/test/data/S58861868/LIB1721.sam +1790 -0
- data/test/data/S58861868/LIB1722.bam +0 -0
- data/test/data/S58861868/LIB1722.sam +1271 -0
- data/test/data/S58861868/S58861868.fa +16 -0
- data/test/data/S58861868/S58861868.fa.fai +1 -0
- data/test/data/S58861868/S58861868.vcf +76 -0
- data/test/data/S58861868/header.txt +9 -0
- data/test/data/S58861868/merged.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam +0 -0
- data/test/data/S58861868/merged_reheader.bam.bai +0 -0
- data/test/data/bfr_out_test.csv +5 -5
- data/test/data/headerMergeed.txt +9 -0
- data/test/data/headerS2238015 +1 -0
- data/test/data/mergedLibs.bam +0 -0
- data/test/data/mergedLibsReheader.bam +0 -0
- data/test/data/mergedLibsSorted.bam +0 -0
- data/test/data/mergedLibsSorted.bam.bai +0 -0
- data/test/test_bfr.rb +26 -34
- data/test/test_blast.rb +47 -0
- data/test/test_exonearate.rb +4 -9
- data/test/test_snp_parsing.rb +42 -22
- metadata +81 -20
- data/Gemfile.lock +0 -67
@@ -0,0 +1,385 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bio'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'pathname'
|
5
|
+
require 'bio-samtools'
|
6
|
+
require 'optparse'
|
7
|
+
require 'set'
|
8
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
9
|
+
$: << File.expand_path('.')
|
10
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
11
|
+
require path
|
12
|
+
|
13
|
+
def log(msg)
|
14
|
+
time=Time.now.strftime("%Y-%m-%d %H:%M:%S.%L")
|
15
|
+
puts "#{time}: #{msg}"
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
class Bio::PolyploidTools::ExonContainer
|
20
|
+
def add_alignments(opts=Hash.new)
|
21
|
+
opts = { :min_identity=>90 }.merge!(opts)
|
22
|
+
exonerate_filename = opts[:exonerate_file]
|
23
|
+
arm_selection = opts[:arm_selection]
|
24
|
+
|
25
|
+
unless arm_selection
|
26
|
+
arm_selection = lambda do | contig_name |
|
27
|
+
ret = contig_name[0,3]
|
28
|
+
return ret
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
File.open(exonerate_filename) do |f|
|
33
|
+
f.each_line do | line |
|
34
|
+
record = Bio::DB::Exonerate::Alignment.parse_custom(line)
|
35
|
+
if record and record.identity >= opts[:min_identity]
|
36
|
+
snp_array = @snp_map[record.query_id]
|
37
|
+
if snp_array != nil
|
38
|
+
snp_array.each do |snp|
|
39
|
+
if snp != nil and snp.position.between?( (record.query_start + 1) , record.query_end)
|
40
|
+
begin
|
41
|
+
exon = record.exon_on_gene_position(snp.position)
|
42
|
+
snp.add_exon(exon, arm_selection.call(record.target_id))
|
43
|
+
rescue Bio::DB::Exonerate::ExonerateException
|
44
|
+
$stderr.puts "Failed for the range #{record.query_start}-#{record.query_end} for position #{snp.position}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Bio::DB::Primer3::SNP
|
56
|
+
|
57
|
+
def to_s
|
58
|
+
"#{gene}:#{snp_from.chromosome}"
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
class Bio::DB::Primer3::Primer3Record
|
63
|
+
|
64
|
+
|
65
|
+
def best_pair
|
66
|
+
return @best_pair if @best_pair
|
67
|
+
@best_pair = nil
|
68
|
+
@total_caps = 100
|
69
|
+
@primerPairs.each do | primer |
|
70
|
+
capital_count = "#{primer.left.sequence}#{primer.right.sequence}".scan(/[A-Z]/).length
|
71
|
+
if @best_pair.nil?
|
72
|
+
@best_pair = primer
|
73
|
+
@total_caps = capital_count
|
74
|
+
next
|
75
|
+
end
|
76
|
+
if capital_count < @total_caps
|
77
|
+
@best_pair = primer
|
78
|
+
@total_caps = capital_count
|
79
|
+
end
|
80
|
+
if primer.size < @best_pair.size
|
81
|
+
@best_pair = primer
|
82
|
+
@total_caps = capital_count
|
83
|
+
end
|
84
|
+
end
|
85
|
+
#@best_pair = @primerPairs.min
|
86
|
+
@best_pair
|
87
|
+
end
|
88
|
+
|
89
|
+
#CL3339Contig1:T509C AvocetS chromosome_specific exon 4D forward
|
90
|
+
def parse_header
|
91
|
+
@snp, @line, @type, @in, @polymorphism, @chromosome, @orientation = self.sequence_id.split(" ")
|
92
|
+
@type = @type.to_sym
|
93
|
+
if @in
|
94
|
+
@in = @in.to_sym == :exon
|
95
|
+
else
|
96
|
+
@exon = false
|
97
|
+
end
|
98
|
+
|
99
|
+
if @polymorphism.to_sym == :homoeologous
|
100
|
+
@homoeologous = true
|
101
|
+
else
|
102
|
+
@homoeologous = false
|
103
|
+
end
|
104
|
+
@parsed = true
|
105
|
+
@orientation = @orientation.to_sym
|
106
|
+
end
|
107
|
+
|
108
|
+
def score
|
109
|
+
best_pair
|
110
|
+
# puts "score"
|
111
|
+
# puts self.inspect
|
112
|
+
ret = 0
|
113
|
+
ret += @scores[type]
|
114
|
+
ret += @scores[:exon] if exon?
|
115
|
+
ret -= @total_caps * 10
|
116
|
+
ret -= product_length
|
117
|
+
ret
|
118
|
+
end
|
119
|
+
|
120
|
+
def to_s
|
121
|
+
"#{gene}:#{snp_from.chromosome}"
|
122
|
+
end
|
123
|
+
|
124
|
+
def left_primer_snp(snp)
|
125
|
+
tmp_primer = String.new(left_primer)
|
126
|
+
#if self.orientation == :forward
|
127
|
+
# base_original = snp.original
|
128
|
+
# base_snp = snp.snp
|
129
|
+
#elsif self.orientation == :reverse
|
130
|
+
# base_original = reverse_complement_string(snp.original )
|
131
|
+
# base_snp = reverse_complement_string(snp.snp)
|
132
|
+
#else
|
133
|
+
# raise Primer3Exception.new "#{self.orientation} is not a valid orientation"
|
134
|
+
#end
|
135
|
+
|
136
|
+
# puts "#{snp.to_s} #{self.orientation} #{tmp_primer[-1] } #{base_original} #{base_snp}"
|
137
|
+
#if tmp_primer[-1] == base_original
|
138
|
+
# tmp_primer[-1] = base_snp
|
139
|
+
#elsif tmp_primer[-1] == base_snp
|
140
|
+
# tmp_primer[-1] = base_original
|
141
|
+
#else
|
142
|
+
# raise Primer3Exception.new "#{tmp_primer} doesnt end in a base in the SNP #{snp.to_s}"
|
143
|
+
#end
|
144
|
+
#puts "tmp_primer: #{tmp_primer}"
|
145
|
+
return tmp_primer
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
arm_selection_functions = Hash.new;
|
151
|
+
|
152
|
+
|
153
|
+
arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
|
154
|
+
ret = contig_name[0,2]
|
155
|
+
return ret
|
156
|
+
end
|
157
|
+
|
158
|
+
#Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
|
159
|
+
#Or the first two characters in the contig name, to deal with
|
160
|
+
#pseudomolecules that start with headers like: "1A"
|
161
|
+
#And with the cases when 3B is named with the prefix: v443
|
162
|
+
arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
|
163
|
+
|
164
|
+
arr = contig_name.split('_')
|
165
|
+
ret = "U"
|
166
|
+
ret = arr[2][0,2] if arr.size >= 3
|
167
|
+
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
168
|
+
ret = arr[0][0,2] if arr.size == 1
|
169
|
+
return ret
|
170
|
+
end
|
171
|
+
|
172
|
+
arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
|
173
|
+
ret = contig_name.split(':')[0].split("_")[1];
|
174
|
+
return ret
|
175
|
+
end
|
176
|
+
|
177
|
+
arm_selection_functions[:scaffold] = lambda do | contig_name |
|
178
|
+
ret = contig_name;
|
179
|
+
return ret
|
180
|
+
end
|
181
|
+
|
182
|
+
markers = nil
|
183
|
+
|
184
|
+
options = {}
|
185
|
+
options[:model] = "est2genome"
|
186
|
+
options[:min_identity] = 90
|
187
|
+
options[:extract_found_contigs] = false
|
188
|
+
options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
|
189
|
+
options[:genomes_count] = 3
|
190
|
+
|
191
|
+
|
192
|
+
options[:primer_3_preferences] = {
|
193
|
+
:primer_product_size_range => "50-150" ,
|
194
|
+
:primer_max_size => 25 ,
|
195
|
+
:primer_lib_ambiguity_codes_consensus => 1,
|
196
|
+
:primer_liberal_base => 1,
|
197
|
+
:primer_num_return=>5,
|
198
|
+
:primer_explain_flag => 1,
|
199
|
+
:primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
|
200
|
+
}
|
201
|
+
|
202
|
+
|
203
|
+
OptionParser.new do |opts|
|
204
|
+
|
205
|
+
opts.banner = "Usage: find_homoeologue_variations.rb [options]"
|
206
|
+
|
207
|
+
opts.on("-c", "--sequences FASTA", "Sequence of the region to searc") do |o|
|
208
|
+
options[:sequences] = o
|
209
|
+
end
|
210
|
+
opts.on("-r", "--reference FASTA", "reference with the contigs") do |o|
|
211
|
+
options[:reference] = o
|
212
|
+
end
|
213
|
+
opts.on("-o", "--output DIR", "Directory to write the output") do |o|
|
214
|
+
options[:output] = o
|
215
|
+
end
|
216
|
+
|
217
|
+
opts.on("-g", "--genomes_count INT", "Number of genomes (default 3, for hexaploid)") do |o|
|
218
|
+
options[:genomes_count] = o.to_i
|
219
|
+
end
|
220
|
+
|
221
|
+
opts.on("-x", "--extract_found_contigs", "If present, save in a separate file the contigs with matches. Useful to debug.") do |o|
|
222
|
+
options[:extract_found_contigs] = true
|
223
|
+
end
|
224
|
+
|
225
|
+
end.parse!
|
226
|
+
#reference="/Users/ramirezr/Documents/TGAC/references/Triticum_aestivum.IWGSP1.21.dna_rm.genome.fa"
|
227
|
+
reference = options[:reference] if options[:reference]
|
228
|
+
throw raise Exception.new(), "Reference has to be provided" unless reference
|
229
|
+
sequences = options[:sequences] if options[:sequences]
|
230
|
+
throw raise Exception.new(), "Fasta file with sequences has to be provided" unless sequences
|
231
|
+
output_folder = options[:output] if options[:output]
|
232
|
+
throw raise Exception.new(), "An output directory has to be provided" unless output_folder
|
233
|
+
model=options[:model]
|
234
|
+
Dir.mkdir(output_folder)
|
235
|
+
min_identity= options[:min_identity]
|
236
|
+
|
237
|
+
exonerate_file="#{output_folder}/exonerate_tmp.tab"
|
238
|
+
temp_contigs="#{output_folder}/contigs_tmp.fa"
|
239
|
+
primer_3_input="#{output_folder}/primer_3_input_temp"
|
240
|
+
primer_3_output="#{output_folder}/primer_3_output_temp"
|
241
|
+
exons_filename="#{output_folder}/exons_genes_and_contigs.fa"
|
242
|
+
output_primers="#{output_folder}/primers.csv"
|
243
|
+
output_to_order="#{output_folder}/primers_to_order.csv"
|
244
|
+
|
245
|
+
fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>reference})
|
246
|
+
fasta_file.load_fai_entries
|
247
|
+
|
248
|
+
original_name="A"
|
249
|
+
snp_in="B"
|
250
|
+
|
251
|
+
arm_selection = options[:arm_selection]
|
252
|
+
|
253
|
+
unless arm_selection
|
254
|
+
arm_selection = lambda do | contig_name |
|
255
|
+
ret = contig_name[0,3]
|
256
|
+
return ret
|
257
|
+
end
|
258
|
+
end
|
259
|
+
begin
|
260
|
+
log "Reading exons"
|
261
|
+
exons = Array.new
|
262
|
+
Bio::FlatFile.auto(sequences) do |ff|
|
263
|
+
ff.each do |entry|
|
264
|
+
fields = Array.new
|
265
|
+
fields << entry.definition
|
266
|
+
fields << arm_selection.call(entry.definition)
|
267
|
+
fields << entry.seq
|
268
|
+
|
269
|
+
line = fields.join(",")
|
270
|
+
snp = Bio::PolyploidTools::NoSNPSequence.parse(line)
|
271
|
+
snp.genomes_count = options[:genomes_count]
|
272
|
+
exons << snp
|
273
|
+
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
log "Searching markers in genome"
|
280
|
+
found_contigs = Set.new
|
281
|
+
exo_f = File.open(exonerate_file, "w")
|
282
|
+
contigs_f = File.open(temp_contigs, "w") if options[:extract_found_contigs]
|
283
|
+
Bio::DB::Exonerate.align({:query=>sequences, :target=>reference, :model=>model}) do |aln|
|
284
|
+
if aln.identity > min_identity
|
285
|
+
exo_f.puts aln.line
|
286
|
+
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
287
|
+
found_contigs.add(aln.target_id)
|
288
|
+
entry = fasta_file.index.region_for_entry(aln.target_id)
|
289
|
+
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
290
|
+
region = entry.get_full_region
|
291
|
+
seq = fasta_file.fetch_sequence(region)
|
292
|
+
contigs_f.puts(">#{aln.target_id}\n#{seq}") if options[:extract_found_contigs]
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
exo_f.close()
|
297
|
+
contigs_f.close() if options[:extract_found_contigs]
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
log "Reading best alignment on each chromosome"
|
302
|
+
|
303
|
+
container= Bio::PolyploidTools::ExonContainer.new
|
304
|
+
container.flanking_size=options[:flanking_size]
|
305
|
+
container.gene_models(sequences)
|
306
|
+
container.chromosomes(temp_contigs)
|
307
|
+
container.add_parental({:name=>"A"})
|
308
|
+
container.add_parental({:name=>"B"})
|
309
|
+
exons.each do |exon|
|
310
|
+
exon.container = container
|
311
|
+
exon.flanking_size = 50
|
312
|
+
exon.variation_free_region = options[:variation_free_region]
|
313
|
+
# puts exon.inspect
|
314
|
+
container.add_snp(exon)
|
315
|
+
|
316
|
+
end
|
317
|
+
container.add_alignments({:exonerate_file=>exonerate_file, :arm_selection=>options[:arm_selection] , :min_identity=>min_identity})
|
318
|
+
|
319
|
+
#4.1 generating primer3 file
|
320
|
+
log "Running primer3"
|
321
|
+
file = File.open(exons_filename, "w")
|
322
|
+
container.print_fasta_snp_exones(file)
|
323
|
+
file.close
|
324
|
+
|
325
|
+
file = File.open(primer_3_input, "w")
|
326
|
+
|
327
|
+
Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
|
328
|
+
added_exons = container.print_primer_3_exons(file, nil, snp_in)
|
329
|
+
file.close
|
330
|
+
|
331
|
+
Bio::DB::Primer3.run({:in=>primer_3_input, :out=>primer_3_output}) if added_exons > 0
|
332
|
+
|
333
|
+
#5. Pick the best primer and make the primer3 output
|
334
|
+
log "Selecting best primers"
|
335
|
+
kasp_container=Bio::DB::Primer3::KASPContainer.new
|
336
|
+
kasp_container.line_1= original_name
|
337
|
+
kasp_container.line_2= snp_in
|
338
|
+
|
339
|
+
if options[:scoring] == :het_dels
|
340
|
+
kasp_container.scores = Hash.new
|
341
|
+
kasp_container.scores[:chromosome_specific] = 0
|
342
|
+
kasp_container.scores[:chromosome_semispecific] = 1000
|
343
|
+
kasp_container.scores[:chromosome_nonspecific] = 100
|
344
|
+
end
|
345
|
+
|
346
|
+
exons.each do |snp|
|
347
|
+
snpk = kasp_container.add_snp(snp)
|
348
|
+
end
|
349
|
+
|
350
|
+
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
|
351
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
|
352
|
+
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
353
|
+
|
354
|
+
kasp_container.snp_hash.each_pair do |name, kaspSNP|
|
355
|
+
#puts kaspSNP.snp_from.surrounding_exon_sequences.inspect
|
356
|
+
#puts kaspSNP.first_product
|
357
|
+
#puts kaspSNP.realigned_primers
|
358
|
+
|
359
|
+
out_fasta_products = "#{output_folder}/#{name}.fa"
|
360
|
+
File.open(out_fasta_products, 'w') { |f| f.write(kaspSNP.realigned_primers_fasta) }
|
361
|
+
|
362
|
+
|
363
|
+
end
|
364
|
+
|
365
|
+
File.open(output_to_order, "w") { |io| io.write(kasp_container.print_primers_with_tails()) }
|
366
|
+
|
367
|
+
log "DONE"
|
368
|
+
rescue StandardError => e
|
369
|
+
log "ERROR\t#{e.message}"
|
370
|
+
$stderr.puts e.backtrace
|
371
|
+
raise e
|
372
|
+
rescue Exception => e
|
373
|
+
log "ERROR\t#{e.message}"
|
374
|
+
$stderr.puts e.backtrace
|
375
|
+
raise e
|
376
|
+
end
|
377
|
+
#puts container.inspect
|
378
|
+
|
379
|
+
#container.snp_map.each do | gene, snp_array|
|
380
|
+
# snp_array.each do |e|
|
381
|
+
# puts e.inspect
|
382
|
+
# puts e.aligned_sequences_fasta
|
383
|
+
# end
|
384
|
+
#end
|
385
|
+
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'bio'
|
4
|
+
require 'csv'
|
5
|
+
#$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
6
|
+
#$: << File.expand_path('.')
|
7
|
+
#path= File.expand_path(File.dirname(__FILE__) + '/../lib/bio-polyploid-tools.rb')
|
8
|
+
#require path
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
options[:identity] = 50
|
12
|
+
options[:min_bases] = 200
|
13
|
+
options[:blastx] = "-"
|
14
|
+
|
15
|
+
OptionParser.new do |opts|
|
16
|
+
|
17
|
+
opts.banner = "Usage: filter_blat.rb [options]"
|
18
|
+
|
19
|
+
opts.on("-p", "--blastx FILE", "BLAST XML file") do |o|
|
20
|
+
options[:blastx] = o
|
21
|
+
end
|
22
|
+
opts.on("-i", "--identity FLOAT", "Minimum percentage identity") do |o|
|
23
|
+
options[:identity] = o.to_f
|
24
|
+
end
|
25
|
+
opts.on("-c", "--min_bases int", "Minimum alignment length (default 200)") do |o|
|
26
|
+
options[:min_bases] = o.to_i
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
|
30
|
+
options[:triads] = o
|
31
|
+
end
|
32
|
+
|
33
|
+
end.parse!
|
34
|
+
|
35
|
+
valid_pairs_A_B = Hash.new
|
36
|
+
valid_pairs_A_D = Hash.new
|
37
|
+
valid_pairs_B_D = Hash.new
|
38
|
+
|
39
|
+
CSV.foreach(options[:triads], headers:true ) do |row|
|
40
|
+
valid_pairs_A_B[row['A']] = row['B']
|
41
|
+
valid_pairs_A_D[row['A']] = row['D']
|
42
|
+
valid_pairs_B_D[row['B']] = row['D']
|
43
|
+
end
|
44
|
+
|
45
|
+
stream = ARGF
|
46
|
+
stream = IO.open(options[:blastx]) unless options[:blastx] == "-"
|
47
|
+
puts "Loaded #{valid_pairs_B_D.length} triads"
|
48
|
+
$stdout.flush
|
49
|
+
|
50
|
+
blast_report = Bio::FlatFile.new(Bio::Blast::Report, stream)
|
51
|
+
|
52
|
+
blast_report.each_entry do |report|
|
53
|
+
puts "Hits for " + report.query_def + " against " + report.db
|
54
|
+
$stdout.flush
|
55
|
+
report.each do |hit|
|
56
|
+
query = hit.query_id.split("-")[0]
|
57
|
+
target = hit.target_id.split("-")[0]
|
58
|
+
if valid_pairs_A_B[query] == target or valid_pairs_A_D[query] == target or valid_pairs_B_D[query] == target
|
59
|
+
puts hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
|
60
|
+
puts hit.inspect
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
stream.close unless options[:blat_file] == "-"
|