bio-polyploid-tools 0.4.7 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/bin/polymarker.rb +7 -14
- data/bio-polyploid-tools.gemspec +3 -3
- data/lib/bio/PolyploidTools/ExonContainer.rb +2 -1
- data/lib/bio/db/primer3.rb +21 -22
- data/test/data/test_iselect.csv +1 -5
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b72bd3369d361c595f7709f9afad4e6c7c815dcc
|
4
|
+
data.tar.gz: 9e7feca5269cba60eb49725c1954d55c63bf8f9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5e74d4df3e6f04f6dc884521a5470d97a59013168ab2c3978658d8c498d5a499ae8a72abf014d45939b1f366f387608d52f09dccc0fc2715132f63be9059c51a
|
7
|
+
data.tar.gz: 745042ade686bfa72436c6b9db555e032350ef4f868f95fe548ee0a02f0279628db71e04630b29e2b80c53c8b791f72d3fbfc276af9357036f425eeedf3ed80c
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bin/polymarker.rb
CHANGED
@@ -35,19 +35,17 @@ options[:bucket_size] = 0
|
|
35
35
|
options[:bucket] = 1
|
36
36
|
options[:model] = "est2genome"
|
37
37
|
options[:arm_selection] = arm_selection_functions[:arm_selection_embl] ;
|
38
|
-
options[:flanking_size] =
|
38
|
+
options[:flanking_size] = 150;
|
39
39
|
options[:primer_3_preferences] = {
|
40
40
|
:primer_product_size_range => "50-150" ,
|
41
41
|
:primer_max_size => 25 ,
|
42
42
|
:primer_lib_ambiguity_codes_consensus => 1,
|
43
43
|
:primer_liberal_base => 1,
|
44
44
|
:primer_num_return=>5,
|
45
|
+
:primer_explain_flag => 1,
|
45
46
|
:primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
|
46
|
-
|
47
47
|
}
|
48
48
|
|
49
|
-
|
50
|
-
|
51
49
|
OptionParser.new do |opts|
|
52
50
|
opts.banner = "Usage: polymarker.rb [options]"
|
53
51
|
|
@@ -81,6 +79,7 @@ OptionParser.new do |opts|
|
|
81
79
|
|
82
80
|
opts.on("-p", "--primer_3_preferences FILE", "file with preferences to be sent to primer3") do |o|
|
83
81
|
options[:primer_3_preferences] = Bio::DB::Primer3.read_primer_preferences(o, options[:primer_3_preferences] )
|
82
|
+
|
84
83
|
end
|
85
84
|
|
86
85
|
end.parse!
|
@@ -214,12 +213,12 @@ target=filename
|
|
214
213
|
fasta_file = Bio::DB::Fasta::FastaFile.new({:fasta=>target})
|
215
214
|
fasta_file.load_fai_entries
|
216
215
|
|
217
|
-
|
216
|
+
found_contigs = Set.new
|
218
217
|
Bio::DB::Exonerate.align({:query=>temp_fasta_query, :target=>target, :model=>model}) do |aln|
|
219
218
|
if aln.identity > min_identity
|
220
219
|
exo_f.puts aln.line
|
221
|
-
unless
|
222
|
-
|
220
|
+
unless found_contigs.include?(aln.target_id) #We only add once each contig. Should reduce the size of the output file.
|
221
|
+
found_contigs.add(aln.target_id)
|
223
222
|
entry = fasta_file.index.region_for_entry(aln.target_id)
|
224
223
|
raise ExonerateException.new, "Entry not found! #{aln.target_id}. Make sure that the #{target_id}.fai was generated properly." if entry == nil
|
225
224
|
region = entry.get_full_region
|
@@ -259,12 +258,6 @@ container.print_fasta_snp_exones(file)
|
|
259
258
|
file.close
|
260
259
|
|
261
260
|
file = File.open(primer_3_input, "w")
|
262
|
-
#file.puts("PRIMER_PRODUCT_SIZE_RANGE=50-150")
|
263
|
-
#file.puts("PRIMER_MAX_SIZE=25")
|
264
|
-
#file.puts("PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS=1")
|
265
|
-
#file.puts("PRIMER_LIBERAL_BASE=1")
|
266
|
-
#file.puts("PRIMER_NUM_RETURN=5")
|
267
|
-
#file.puts("PRIMER_THERMODYNAMIC_PARAMETERS_PATH=#{primer_3_config}/")
|
268
261
|
|
269
262
|
Bio::DB::Primer3.prepare_input_file(file, options[:primer_3_preferences])
|
270
263
|
added_exons = container.print_primer_3_exons(file, nil, snp_in)
|
@@ -284,7 +277,7 @@ snps.each do |snp|
|
|
284
277
|
end
|
285
278
|
|
286
279
|
kasp_container.add_primers_file(primer_3_output) if added_exons > 0
|
287
|
-
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size"
|
280
|
+
header = "Marker,SNP,RegionSize,chromosome,total_contigs,contig_regions,SNP_type,#{original_name},#{snp_in},common,primer_type,orientation,#{original_name}_TM,#{snp_in}_TM,common_TM,selected_from,product_size,errors"
|
288
281
|
File.open(output_primers, 'w') { |f| f.write("#{header}\n#{kasp_container.print_primers}") }
|
289
282
|
|
290
283
|
write_status "DONE"
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.
|
5
|
+
# stub: bio-polyploid-tools 0.5.0 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools"
|
9
|
-
s.version = "0.
|
9
|
+
s.version = "0.5.0"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez"]
|
14
|
-
s.date = "2014-10-
|
14
|
+
s.date = "2014-10-14"
|
15
15
|
s.description = "Repository of tools developed in TGAC and Crop Genetics in JIC to work with polyploid wheat"
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@tgac.ac.uk"
|
17
17
|
s.executables = ["bfr.rb", "count_variations.rb", "filter_blat_by_target_coverage.rb", "find_best_blat_hit.rb", "find_best_exonerate.rb", "hexaploid_primers.rb", "homokaryot_primers.rb", "map_markers_to_contigs.rb", "markers_in_region.rb", "polymarker.rb", "snp_position_to_polymarker.rb", "snps_between_bams.rb"]
|
@@ -136,12 +136,13 @@ module Bio::PolyploidTools
|
|
136
136
|
begin
|
137
137
|
primer_3_min_seq_length
|
138
138
|
string = snp.primer_3_string( snp.chromosome, parental )
|
139
|
+
#puts "print_primer_3_exons: #{string.size}"
|
139
140
|
if string.size > 0
|
140
141
|
file.puts string
|
141
142
|
added += 1
|
142
143
|
end
|
143
144
|
rescue Exception=>e
|
144
|
-
|
145
|
+
@missing_exons << snp.to_s
|
145
146
|
|
146
147
|
$stderr.puts e.to_s
|
147
148
|
end
|
data/lib/bio/db/primer3.rb
CHANGED
@@ -22,9 +22,9 @@ module Bio::DB::Primer3
|
|
22
22
|
:primer_max_size => 25 ,
|
23
23
|
:primer_lib_ambiguity_codes_consensus => 1,
|
24
24
|
:primer_liberal_base => 1,
|
25
|
-
:primer_num_return=>5,
|
26
|
-
:
|
27
|
-
|
25
|
+
:primer_num_return => 5,
|
26
|
+
:primer_explain_flag => 1,
|
27
|
+
:primer_thermodynamic_parameters_path => File.expand_path(File.dirname(__FILE__) + '../../../../conf/primer3_config/') + '/'
|
28
28
|
}.merge(opts2)
|
29
29
|
|
30
30
|
opts.each do |key,value|
|
@@ -67,7 +67,8 @@ module Bio::DB::Primer3
|
|
67
67
|
def initialize
|
68
68
|
@primers_line_1 = SortedSet.new
|
69
69
|
@primers_line_2 = SortedSet.new
|
70
|
-
@
|
70
|
+
@regions = SortedSet.new
|
71
|
+
@primer3_errors = Set.new
|
71
72
|
end
|
72
73
|
|
73
74
|
def line_2_name
|
@@ -112,11 +113,7 @@ module Bio::DB::Primer3
|
|
112
113
|
left_end = 0
|
113
114
|
right_start = 0
|
114
115
|
right_end = 0
|
115
|
-
|
116
|
-
|
117
|
-
# puts "Exons: #{exon_list.size}"
|
118
|
-
|
119
|
-
# puts "It has the following exons: #{snp_in.exon_list.to_s}"
|
116
|
+
total_columns_before_messages=17
|
120
117
|
values = Array.new
|
121
118
|
#values << "#{gene},,#{template_length},"
|
122
119
|
values << gene
|
@@ -242,6 +239,12 @@ module Bio::DB::Primer3
|
|
242
239
|
values << primer3_line_2.best_pair.product_size
|
243
240
|
|
244
241
|
end
|
242
|
+
if values.size < total_columns_before_messages
|
243
|
+
values[total_columns_before_messages] = primer3_errors.to_a.join("|")
|
244
|
+
else
|
245
|
+
values << nil
|
246
|
+
end
|
247
|
+
|
245
248
|
values.join(",")
|
246
249
|
end
|
247
250
|
|
@@ -268,13 +271,14 @@ module Bio::DB::Primer3
|
|
268
271
|
|
269
272
|
|
270
273
|
def add_record(primer3record)
|
271
|
-
@primer3_errors =
|
274
|
+
@primer3_errors = Set.new unless @primer3_errors
|
272
275
|
@template_length = primer3record.sequence_template.size
|
273
276
|
if primer3record.primer_error != nil
|
274
|
-
primer3_errors << primer3record
|
277
|
+
primer3_errors << primer3record.primer_error
|
275
278
|
return
|
276
279
|
end
|
277
280
|
case
|
281
|
+
|
278
282
|
when primer3record.line == @line_1
|
279
283
|
|
280
284
|
@line_1_template = primer3record.sequence_template
|
@@ -297,6 +301,10 @@ module Bio::DB::Primer3
|
|
297
301
|
else
|
298
302
|
raise Primer3Exception.new "#{primer3record.line} is not recognized (#{line_1}, #{line_2})"
|
299
303
|
end
|
304
|
+
else
|
305
|
+
primer3_errors << "#{primer3record.line}(#{primer3record.orientation}):#{primer3record.primer_left_explain}"
|
306
|
+
primer3_errors << "common(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_right_explain}"
|
307
|
+
primer3_errors << "pair(#{primer3record.orientation}#{primer3record.type}):#{primer3record.primer_pair_explain}"
|
300
308
|
end
|
301
309
|
end
|
302
310
|
end
|
@@ -327,7 +335,8 @@ module Bio::DB::Primer3
|
|
327
335
|
return @properties[method_name] if @properties[method_name]
|
328
336
|
$stderr.puts "Missing #{method_name}"
|
329
337
|
$stderr.puts @properties.inspect
|
330
|
-
|
338
|
+
return "" #if a property is missing, return blank.
|
339
|
+
#raise NoMethodError.new()
|
331
340
|
end
|
332
341
|
|
333
342
|
def find_left_tm(primer)
|
@@ -665,26 +674,18 @@ module Bio::DB::Primer3
|
|
665
674
|
snp.position = snp_in.position
|
666
675
|
snp.snp = snp_in.snp
|
667
676
|
|
668
|
-
# snp.original.upcase!
|
669
|
-
# snp.snp.upcase!
|
670
677
|
snp.line_1 = @line_1
|
671
678
|
snp.line_2 = @line_2
|
672
679
|
snp.snp_from = snp_in
|
673
|
-
#puts "Kasp container, adding #{snp.to_s} #{snp.class} #{snp_in.class}"
|
674
|
-
#puts "#{snp.regions}"
|
675
680
|
snp.regions = snp_in.exon_list.values.collect { |x| x.target_region.to_s }
|
676
|
-
#puts "#{snp.regions}"
|
677
681
|
@snp_hash[snp.to_s] = snp
|
678
682
|
snp
|
679
683
|
end
|
680
684
|
|
681
685
|
def add_primers_file(filename)
|
682
686
|
Primer3Record.parse_file(filename) do | primer3record |
|
683
|
-
# puts "#{primer3record.snp.to_s}:#{primer3record.chromosome}"
|
684
|
-
# puts @snp_hash.keys.to_s
|
685
687
|
current_snp = @snp_hash["#{primer3record.snp.to_s}:#{primer3record.chromosome}"]
|
686
688
|
current_snp.add_record(primer3record)
|
687
|
-
|
688
689
|
end
|
689
690
|
end
|
690
691
|
|
@@ -695,8 +696,6 @@ module Bio::DB::Primer3
|
|
695
696
|
end
|
696
697
|
return str
|
697
698
|
end
|
698
|
-
|
699
699
|
end
|
700
|
-
|
701
700
|
end
|
702
701
|
|
data/test/data/test_iselect.csv
CHANGED
@@ -98,7 +98,6 @@ RAC875_c9500_1796,2B,TTGCCCATGACACAGCTAGATTCTGTCACCTCAATCTTTGTAATCTCATC[A/G]TCTT
|
|
98
98
|
RAC875_c95103_67,7A,GTGTGCATGTTGGATGGTCACATTCTAGTGTACTAGCTAGCTAGTAAGCT[T/G]GTTTTGCTACCGGATGATTGAATGTCATCTCAGTTTGTACCTCATCTAGC
|
99
99
|
RAC875_c9523_328,2A,TCGGCGTGTTGATCCTGGAGATCGTGTGTGGCCGTAGGAACAGCTCCCTC[A/G]ttgatcacgaatggtcaatgaaccttgtgggccacgcatggacgctgtgg
|
100
100
|
RAC875_c9542_235,7A,TATTTCATCCGAGCAGGGAGAAATGCCAGTAGTTCAATATTGCCAGATGG[T/G]CAACTGTCTTATTAGGATGGAATCCCGTCCGTATAAGATTGGTGGTTCCT
|
101
|
-
RAC875_c9543_99,2A,CGGAGTAAATATATGAAACACAACGGGGAACTTGTGTTTGCTAAAATGAT[T/G]GAGCGACTACTCTGCTTTGAATATATGCAGGGCGGAAGCCTTGAGAAATA
|
102
101
|
RAC875_c9543_99,7B,CGGAGTAAATATATGAAACACAACGGGGAACTTGTGTTTGCTAAAATGAT[T/G]GAGCGACTACTCTGCTTTGAATATATGCAGGGCGGAAGCCTTGAGAAATA
|
103
102
|
RAC875_c95993_265,3B,GCGGCTCCATGGAAGCCGCCATCATGTTGTCCGACTAAATGTGGAGTTAC[T/C]TGTGCAAGTTCCACCATCAACAAACACAAAGTTCTGCATGCTATCATCGC
|
104
103
|
RAC875_c9601_1680,1D,CTGATGAACCAGAATGCCCATTCCTTATGAAGCGTGGGTACTGTAAACTT[A/C]gggcacaatgtaagttttatcacccagaggcttcgagcccaacagatgca
|
@@ -119,7 +118,6 @@ RAC875_c982_710,6B,TGGGAGCCATAGTTGTTTGGGCAGGAGCAAAATGCCTTTGTGATGGTCGA[T/G]AGCTGG
|
|
119
118
|
RAC875_c98608_173,5B,CTACAGAAGATGAGATCGTCGCTGCAGACGGTGGAGGCCCTGCTCGAAGA[T/C]GCTGAGAGGCGGTCCATCACGGATCAGGCGGTGCGTCTGTGGCTGGGGCG
|
120
119
|
RAC875_c98794_101,3D,cataaatcgtcatacatatagagaggggaatccagggcaatctcagggtt[A/G]GGGAGCAATCCAGCAATGATCACCAGCACCTGAAAGTACCATGTCTCAAG
|
121
120
|
RAC875_c99286_60,6A,GCAGATTAAGGTCAGAGGAAGCATCTGCTACGAGGAACATCTTCGTCTTC[A/G]ACCTACAGGTGGAATATGCAGAGCAAGCACGGGCGTGCATCCGTGCCGCC
|
122
|
-
RAC875_c99286_90,2A,CGAGGAACATCTTCGTCTTCAACCTACAGGTGGAATATGCAGAGCAAGCA[T/C]GGGCGTGCATCCGTGCCGCCTTGGGTAGCCACCCCAGATTCGTCGTGCCC
|
123
121
|
RAC875_c99286_90,6A,CGAGGAACATCTTCGTCTTCAACCTACAGGTGGAATATGCAGAGCAAGCA[T/C]GGGCGTGCATCCGTGCCGCCTTGGGTAGCCACCCCAGATTCGTCGTGCCC
|
124
122
|
RAC875_c9932_705,2B,TGCCGATGCAGATGTCCAGCAATGATAAGGTTGCTACGCTCAAAGAACAA[T/C]GGATGGTACATAAGCGAGCACAGGGAATTCCACAACCACTCTCTTACAGA
|
125
123
|
RAC875_c9934_252,5A,TAGAGTCGGACGCGTCGTGATGGCCGCAGCGGCCAAGCATCTGACGCCGG[T/C]CGTCTTGGAGCTCGGTGGCAAATGCCCGGTGGTTGTCGATTCAAACGTCG
|
@@ -184,7 +182,6 @@ RAC875_rep_c106158_701,4A,AATGATGAGAGCTCTAATTTTATCAACTCACTAGACACCATGACCACTCC[T/G
|
|
184
182
|
RAC875_rep_c106170_204,3D,GAAATCTTTTATGTCCCCCAGCGACCATATACAGCTGTCGGAACACTTCG[T/C]GACCAGTTAATCTATCCACTTACAGCAGATCAGGAAACCGAACCACTTAG
|
185
183
|
RAC875_rep_c106177_206,4B,ATTTGCATGGTCTATGGTGGCATCAATTGCACCATCTCTTATGGCCTTGG[T/C]TACGATGCACTCCGCATCAGCAACAGGATTCTCAGTGTCTAGCCTTAGCT
|
186
184
|
RAC875_rep_c106241_304,5A,ATGATCCTTCCTACTGGAGCTGCCTCATTCAAGGAGGCAATGAAGATGGG[T/C]gttgaagtgtaccacaacttgaagtctgttatcaagaagaagtatgggca
|
187
|
-
RAC875_rep_c106241_304,5B,ATGATCCTTCCTACTGGAGCTGCCTCATTCAAGGAGGCAATGAAGATGGG[T/C]gttgaagtgtaccacaacttgaagtctgttatcaagaagaagtatgggca
|
188
185
|
RAC875_rep_c106322_1091,4B,TGAACTTCCCAAGATCACGAAGTGCTATAGTTTTGAGATATATGGTCTCC[A/G]TTACTAGATTAACAGTATGCAAGGACATTGGTGGATAACTGGAATGTGGA
|
189
186
|
RAC875_rep_c106400_276,2B,CCCGATCTTTGCATCGACACGATCACAAAGAACCGAACAGGATTACACGA[A/G]cattccgccctgctgtaaccctttggctttgggaagtgcacgtagggcag
|
190
187
|
RAC875_rep_c106488_130,5A,CTTGCGTTAGTCTCATGGCAGGCCACTGCTTCTGATCCTAGCCCACTCCA[A/G]gacttttgtgtcgccgacatgaattcaccagtccgtgtcaatgggtttgt
|
@@ -196,5 +193,4 @@ RAC875_rep_c106636_232,3A,CTTTAGGCATCGCCCGAGGAACGGAGTACCTGCATCAAGGTTGCAACCAG[T/C
|
|
196
193
|
RAC875_rep_c106876_558,1B,ttatgatcagattaagaagatgaacaccttcggctattttctggtactta[T/C]TGCGCCTTGCATAATCTTCGTGATGAATGTGCTTTGGTTCTCCAAGATCC
|
197
194
|
RAC875_rep_c106976_263,7A,ATCACCATCTCCCTGCTGTGGGTCGCCATCAACCCGCCGTCGCAGAACTC[A/G]CAGATCGGAGGCTCCTTCCAGTTCCCCTGATCGATGAAGCTAGCTTCCTC
|
198
195
|
RAC875_rep_c107031_84,2B,AATTAGGTCTTGCAGCAGGCGCCCCACCTCCAGAATAGGAACTATGAGGT[T/G]GTTGCTGCTGCCACTGTTGGGCTCCTGCATATGGTGCAGGCTGGTAGACC
|
199
|
-
RAC875_rep_c107110_137,3B,AAGAGATAATCACCGTGGCCAAGGCGGCCAACGCTCATGAGTTCATATCG[A/G]GCTTGCCACAGGGATACAACACTAACGTTGGTGAGAGAGGAACACAACTA
|
200
|
-
RAC875_rep_c107110_137,3D,AAGAGATAATCACCGTGGCCAAGGCGGCCAACGCTCATGAGTTCATATCG[A/G]GCTTGCCACAGGGATACAACACTAACGTTGGTGAGAGAGGAACACAACTA
|
196
|
+
RAC875_rep_c107110_137,3B,AAGAGATAATCACCGTGGCCAAGGCGGCCAACGCTCATGAGTTCATATCG[A/G]GCTTGCCACAGGGATACAACACTAACGTTGGTGAGAGAGGAACACAACTA
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-polyploid-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricardo H. Ramirez-Gonzalez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|