bio-polyploid-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +16 -0
  3. data/Gemfile.lock +67 -0
  4. data/README +21 -0
  5. data/Rakefile +61 -0
  6. data/VERSION +1 -0
  7. data/bin/bfr.rb +133 -0
  8. data/bin/count_variations.rb +36 -0
  9. data/bin/filter_blat_by_target_coverage.rb +15 -0
  10. data/bin/find_best_blat_hit.rb +32 -0
  11. data/bin/hexaploid_primers.rb +168 -0
  12. data/bin/homokaryot_primers.rb +155 -0
  13. data/bin/map_markers_to_contigs.rb +66 -0
  14. data/bin/markers_in_region.rb +42 -0
  15. data/bin/polymarker.rb +219 -0
  16. data/bin/snps_between_bams.rb +106 -0
  17. data/bio-polyploid-tools.gemspec +139 -0
  18. data/conf/defaults.rb +1 -0
  19. data/conf/primer3_config/dangle.dh +128 -0
  20. data/conf/primer3_config/dangle.ds +128 -0
  21. data/conf/primer3_config/interpretations/dangle_i.dh +131 -0
  22. data/conf/primer3_config/interpretations/dangle_i.ds +131 -0
  23. data/conf/primer3_config/interpretations/loops_i.dh +34 -0
  24. data/conf/primer3_config/interpretations/loops_i.ds +31 -0
  25. data/conf/primer3_config/interpretations/stack_i.dh +257 -0
  26. data/conf/primer3_config/interpretations/stack_i.ds +256 -0
  27. data/conf/primer3_config/interpretations/stackmm_i_mm.dh +257 -0
  28. data/conf/primer3_config/interpretations/stackmm_i_mm.ds +256 -0
  29. data/conf/primer3_config/interpretations/tetraloop_i.dh +79 -0
  30. data/conf/primer3_config/interpretations/tetraloop_i.ds +81 -0
  31. data/conf/primer3_config/interpretations/triloop_i.dh +21 -0
  32. data/conf/primer3_config/interpretations/triloop_i.ds +18 -0
  33. data/conf/primer3_config/interpretations/tstack2_i.dh +256 -0
  34. data/conf/primer3_config/interpretations/tstack2_i.ds +256 -0
  35. data/conf/primer3_config/interpretations/tstack_i.dh +256 -0
  36. data/conf/primer3_config/interpretations/tstack_i.ds +256 -0
  37. data/conf/primer3_config/interpretations/tstack_tm_inf_i.dh +256 -0
  38. data/conf/primer3_config/interpretations/tstack_tm_inf_i.ds +256 -0
  39. data/conf/primer3_config/loops.dh +30 -0
  40. data/conf/primer3_config/loops.ds +30 -0
  41. data/conf/primer3_config/stack.dh +256 -0
  42. data/conf/primer3_config/stack.ds +256 -0
  43. data/conf/primer3_config/stackmm.dh +256 -0
  44. data/conf/primer3_config/stackmm.ds +256 -0
  45. data/conf/primer3_config/tetraloop.dh +77 -0
  46. data/conf/primer3_config/tetraloop.ds +77 -0
  47. data/conf/primer3_config/triloop.dh +16 -0
  48. data/conf/primer3_config/triloop.ds +16 -0
  49. data/conf/primer3_config/tstack.dh +256 -0
  50. data/conf/primer3_config/tstack2.dh +256 -0
  51. data/conf/primer3_config/tstack2.ds +256 -0
  52. data/conf/primer3_config/tstack_tm_inf.ds +256 -0
  53. data/lib/bio/BFRTools.rb +698 -0
  54. data/lib/bio/BIOExtensions.rb +186 -0
  55. data/lib/bio/PolyploidTools/ChromosomeArm.rb +52 -0
  56. data/lib/bio/PolyploidTools/ExonContainer.rb +194 -0
  57. data/lib/bio/PolyploidTools/Marker.rb +175 -0
  58. data/lib/bio/PolyploidTools/PrimerRegion.rb +22 -0
  59. data/lib/bio/PolyploidTools/SNP.rb +681 -0
  60. data/lib/bio/PolyploidTools/SNPSequence.rb +56 -0
  61. data/lib/bio/SAMToolsExtensions.rb +284 -0
  62. data/lib/bio/db/exonerate.rb +272 -0
  63. data/lib/bio/db/fastadb.rb +164 -0
  64. data/lib/bio/db/primer3.rb +673 -0
  65. data/lib/bioruby-polyploid-tools.rb +25 -0
  66. data/test/data/BS00068396_51.fa +2 -0
  67. data/test/data/BS00068396_51_contigs.aln +1412 -0
  68. data/test/data/BS00068396_51_contigs.dnd +7 -0
  69. data/test/data/BS00068396_51_contigs.fa +8 -0
  70. data/test/data/BS00068396_51_exonerate.tab +6 -0
  71. data/test/data/BS00068396_51_genes.txt +14 -0
  72. data/test/data/LIB1716.bam +0 -0
  73. data/test/data/LIB1716.bam.bai +0 -0
  74. data/test/data/LIB1719.bam +0 -0
  75. data/test/data/LIB1719.bam.bai +0 -0
  76. data/test/data/LIB1721.bam +0 -0
  77. data/test/data/LIB1721.bam.bai +0 -0
  78. data/test/data/LIB1722.bam +0 -0
  79. data/test/data/LIB1722.bam.bai +0 -0
  80. data/test/data/S22380157.fa +16 -0
  81. data/test/data/S22380157.fa.fai +1 -0
  82. data/test/data/Test3Aspecific.csv +1 -0
  83. data/test/data/Test3Aspecific_contigs.fa +6 -0
  84. data/test/data/patological_cases5D.csv +1 -0
  85. data/test/data/short_primer_design_test.csv +10 -0
  86. data/test/data/test_primer3_error.csv +4 -0
  87. data/test/data/test_primer3_error_contigs.fa +10 -0
  88. data/test/test_bfr.rb +51 -0
  89. data/test/test_exon_container.rb +17 -0
  90. data/test/test_exonearate.rb +53 -0
  91. data/test/test_snp_parsing.rb +40 -0
  92. metadata +201 -0
@@ -0,0 +1,186 @@
1
+
2
+ module Bio::NucleicAcid::Data
3
+ IUPAC_CODES = {
4
+
5
+ 'y' => 'ct',
6
+ 'r' => 'ag',
7
+ 'w' => 'at',
8
+ 's' => 'cg',
9
+ 'k' => 'gt',
10
+ 'm' => 'ac',
11
+
12
+ 'b' => 'cgt',
13
+ 'd' => 'agt',
14
+ 'h' => 'act',
15
+ 'v' => 'acg',
16
+
17
+ 'n' => 'acgt',
18
+
19
+ 'a' => 'a',
20
+ 't' => 't',
21
+ 'g' => 'g',
22
+ 'c' => 'c',
23
+ 'u' => 'u',
24
+
25
+ 'ct' => 'y',
26
+ 'ag' => 'r',
27
+ 'at' => 'w',
28
+ 'cg' => 's',
29
+ 'gt' => 'k',
30
+ 'ac' => 'm',
31
+
32
+ 'cgt' => 'b',
33
+ 'agt' => 'd',
34
+ 'act' => 'h',
35
+ 'acg' => 'v',
36
+
37
+ 'acgt' => 'n'
38
+ }
39
+
40
+
41
+ end
42
+
43
+ class Bio::NucleicAcid
44
+
45
+ IUPAC_CODES = {
46
+
47
+ 'y' => 'ct',
48
+ 'r' => 'ag',
49
+ 'w' => 'at',
50
+ 's' => 'cg',
51
+ 'k' => 'gt',
52
+ 'm' => 'ac',
53
+
54
+ 'b' => 'cgt',
55
+ 'd' => 'agt',
56
+ 'h' => 'act',
57
+ 'v' => 'acg',
58
+
59
+ 'n' => 'acgt',
60
+
61
+ 'a' => 'a',
62
+ 't' => 't',
63
+ 'g' => 'g',
64
+ 'c' => 'c',
65
+ 'u' => 'u',
66
+
67
+ 'ct' => 'y',
68
+ 'ag' => 'r',
69
+ 'at' => 'w',
70
+ 'cg' => 's',
71
+ 'gt' => 'k',
72
+ 'ac' => 'm',
73
+
74
+ 'cgt' => 'b',
75
+ 'agt' => 'd',
76
+ 'act' => 'h',
77
+ 'acg' => 'v',
78
+
79
+ 'acgt' => 'n'
80
+ }
81
+
82
+ def self.is_unambiguous(base)
83
+ "acgtACGT".match(base)
84
+ end
85
+
86
+ def self.to_IUAPC(bases)
87
+ base = IUPAC_CODES[bases.to_s.downcase.chars.sort.uniq.join]
88
+ if base == nil
89
+ p "Invalid base! #{base}"
90
+ base = 'n' #This is a patch... as one of the scripts failed here.
91
+ end
92
+ base.upcase
93
+ end
94
+
95
+ def self.is_valid(code, base)
96
+ IUPAC_CODES[code.downcase].chars.include? base.downcase
97
+ end
98
+
99
+ end
100
+
101
+ class Bio::Sequence
102
+ def self.snps_between(seq1, seq2)
103
+ snps=0
104
+ for i in (0..seq1.size-1)
105
+ snps += 1 if seq1[i] != seq2[i]
106
+ end
107
+ snps
108
+ end
109
+ end
110
+
111
+ class String
112
+ def count_ambiguities
113
+ snps=0
114
+
115
+ for i in (0..self.size-1)
116
+
117
+ snps += 1 if !Bio::NucleicAcid.is_unambiguous(self[i])
118
+ end
119
+ snps
120
+ end
121
+
122
+ def upper_case_count
123
+ match(/[^A-Z]*/).to_s.size
124
+ end
125
+ end
126
+
127
+ class Bio::Blat
128
+ def self.align(database , query , output)
129
+ cmdline = "blat #{database} #{query} #{output}"
130
+ puts $stderr.puts cmdline
131
+ status, stdout, stderr = systemu cmdline
132
+ if status.exitstatus == 0
133
+ alns = Array.new unless block_given?
134
+ blat_aln = Bio::Blat::Report.new(Bio::FlatFile.open(output).to_io)
135
+ #p blat_aln
136
+ blat_aln.each_hit() do |hit|
137
+ if block_given?
138
+ yield hit
139
+ else
140
+ alns << hit
141
+ end
142
+ end
143
+ return alns unless block_given?
144
+ else
145
+ raise Exception.new(), "Error running exonerate. Command line was '#{cmdline}'\nBlat STDERR was:\n#{stderr}"
146
+ end
147
+ end
148
+ end
149
+
150
+ class Bio::Blat::Report::Hit
151
+
152
+ #Function to parse stuff like: IWGSC_CSS_1AL_scaff_110
153
+ def wheat_chr_arm
154
+ @wheat_chr_arm if @wheat_chr_arm
155
+ @wheat_chr_arm = target_id.split('_')[2]
156
+ end
157
+
158
+ def wheat_chr
159
+ wheat_chr_arm[0,2]
160
+ end
161
+
162
+ def wheat_chr_group
163
+ raise Exception.new(), "No wheat group for #{target_id} #{self.inspect}" unless wheat_chr
164
+ wheat_chr_arm[0]
165
+ end
166
+
167
+ def wheat_genome
168
+ wheat_chr_arm[1]
169
+ end
170
+
171
+ def wheat_arm
172
+ wheat_chr_arm[2]
173
+ end
174
+
175
+ def percentage_covered
176
+ ( match + mismatch ) * 100.0 / query_len.to_f
177
+ end
178
+
179
+ end
180
+
181
+
182
+ class Hash
183
+ def join(keyvaldelim=$,, entrydelim=$,)
184
+ map {|e| e.join(keyvaldelim) }.join(entrydelim)
185
+ end
186
+ end
@@ -0,0 +1,52 @@
1
+ module Bio::PolyploidTools
2
+
3
+ class ChromosomeArm
4
+ attr_accessor :name
5
+ attr_reader :genes
6
+ attr_reader :loaded_entries
7
+ attr_reader :fasta_db
8
+
9
+ def initialize(name, path_to_fasta)
10
+ @name = name
11
+ @fasta_db = Bio::DB::Fasta::FastaFile.new(path_to_fasta)
12
+ #$stderr.puts "Loading entries for #{name}"
13
+
14
+ @genes = Hash.new
15
+ end
16
+
17
+ def fetch_contig(contig_id)
18
+
19
+ @fasta_db.load_fai_entries unless @loaded_entries
20
+ @loaded_entries = true
21
+ entry = fasta_db.index.region_for_entry(contig_id)
22
+ # puts entry
23
+ @fasta_db.fetch_sequence(entry.get_full_region)
24
+ end
25
+ #Loads all the chromosome arms in a folder.
26
+ #The current version requires that all the references end with .fa, and start with XXX_*.fa
27
+ #Where XXX is the chromosome name
28
+ def self.load_from_folder(path_to_contigs)
29
+ chromosomeArms = Hash.new
30
+
31
+ Dir.foreach(path_to_contigs) do |filename |
32
+
33
+ if File.fnmatch("*.fa", filename)
34
+
35
+ parsed = /^(?<arm>\d\w+)/.match(filename)
36
+
37
+ target="#{path_to_contigs}/#{filename}"
38
+
39
+
40
+
41
+ # fasta_file = Bio::DB::Fasta::FastaFile.new(target)
42
+ #fasta_file.load_fai_entries
43
+ arm = ChromosomeArm.new(parsed[:arm], target)
44
+ chromosomeArms[arm.name] = arm
45
+ end
46
+ end
47
+ return chromosomeArms
48
+ end
49
+
50
+ end
51
+
52
+ end
@@ -0,0 +1,194 @@
1
+ #puts "Loading ExonCointainer..."
2
+ module Bio::PolyploidTools
3
+ class ExonContainer
4
+ attr_reader :parental_1_sam, :parental_2_sam
5
+ attr_reader :parental_1_name, :parental_2_name, :gene_models_db
6
+ attr_reader :chromosomes, :snp_map
7
+ attr_reader :parents
8
+ attr_accessor :flanking_size
9
+
10
+ BASES = [:A, :C, :G, :T]
11
+ #Sets the reference file for the gene models
12
+
13
+ def initialize
14
+ @parents=Hash.new
15
+ @snp_map = Hash.new
16
+ @snp_contigs
17
+ end
18
+
19
+ def gene_models(path)
20
+ @gene_models_db = Bio::DB::Fasta::FastaFile.new(path)
21
+ @gene_models_path = path
22
+ end
23
+
24
+ #Retunrs the sequence for a region in the gene models (exon)
25
+ def gene_model_sequence(region)
26
+ seq=@gene_models_db.fetch_sequence(region)
27
+
28
+
29
+ end
30
+
31
+ #Sets the reference file for the gene models
32
+ def chromosomes(path)
33
+ @chromosomes_db = Bio::DB::Fasta::FastaFile.new(path)
34
+ @chromosomes_path = path
35
+ end
36
+
37
+ #Retunrs the sequence for a region in the gene models (exon)
38
+ def chromosome_sequence(region)
39
+ left_pad = 0
40
+ #TODO: Padd if it goes to the right
41
+ if(region.start < 0)
42
+ left_pad = region.start * -1
43
+ left_pad += 1
44
+ region.start = 0
45
+ end
46
+ str = "-" * left_pad << @chromosomes_db.fetch_sequence(region)
47
+ #str << "n" * (region.size - str.size + 1) if region.size > str.size
48
+ str
49
+ end
50
+
51
+
52
+ def add_chromosome_arm(opts)
53
+ @chromosomes = Hash.new unless @chromosomes
54
+ name = opts[:name]
55
+ path = opts[:reference_path]
56
+ path = opts[:alig_path]
57
+ chromosomes[name] = Bio::DB::Fasta::FastaFile.new(path)
58
+ end
59
+
60
+ def add_snp(snp)
61
+ @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
62
+ @snp_map[snp.gene] << snp
63
+ end
64
+
65
+ def add_snp_file(filename, chromosome, snp_in, original_name)
66
+
67
+ File.open(filename) do | f |
68
+ f.each_line do | line |
69
+ snp = SNP.parse(line)
70
+ snp.flanking_size = flanking_size
71
+ if snp.position > 0
72
+ snp.container = self
73
+ snp.chromosome = chromosome
74
+ snp.snp_in = snp_in
75
+ snp.original_name = original_name
76
+ @snp_map[snp.gene] = Array.new unless @snp_map[snp.gene]
77
+ @snp_map[snp.gene] << snp
78
+ end
79
+
80
+ end
81
+ end
82
+ end
83
+
84
+ def primer_3_input_for_snp(snp)
85
+ gene_region = snp.covered_region
86
+ local_pos_in_gene = snp.local_position
87
+ puts ""
88
+ end
89
+
90
+ def fasta_string_for_snp(snp)
91
+ gene_region = snp.covered_region
92
+ local_pos_in_gene = snp.local_position
93
+ ret_str = ""
94
+ @parents.each do |name, bam|
95
+ ret_str << ">#{gene_region.id}_SNP-#{snp.position}_#{name} Overlapping_exons:#{gene_region.to_s} localSNPpo:#{local_pos_in_gene+1}\n"
96
+ to_print = bam.consensus_with_ambiguities({:region=>gene_region}).to_s
97
+ to_print[local_pos_in_gene] = to_print[local_pos_in_gene].upcase
98
+ ret_str << to_print << "\n"
99
+ end
100
+
101
+ snp.exon_list.each do | chromosome, exon |
102
+ target_region = exon.target_region
103
+ exon_start_offset = exon.query_region.start - gene_region.start
104
+ chr_local_pos=local_pos_in_gene + target_region.start + 1
105
+ ret_str << ">#{chromosome}_SNP-#{chr_local_pos} #{exon.to_s} #{target_region.orientation}\n"
106
+ to_print = "-" * exon_start_offset
107
+ chr_seq = chromosome_sequence(exon.target_region).to_s
108
+ l_pos = exon_start_offset + local_pos_in_gene
109
+ to_print << chr_seq
110
+ to_print[local_pos_in_gene] = to_print[local_pos_in_gene].upcase
111
+ ret_str << to_print
112
+ end
113
+ puts ret_str
114
+ ret_str
115
+ end
116
+
117
+ def print_fasta_snp_exones (file)
118
+ @missing_exons = Set.new unless @missing_exons
119
+ @snp_map.each do | gene, snp_array|
120
+ snp_array.each do |snp|
121
+ #file.puts snp.primer_fasta_string
122
+ begin
123
+ file.puts snp.aligned_sequences_fasta
124
+ rescue Exception=>e
125
+ @missing_exons << snp.to_s
126
+ # $stderr.puts e.to_s
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ def print_primer_3_exons (file, target_chromosome , parental )
133
+ @snp_map.each do | gene, snp_array|
134
+ snp_array.each do |snp|
135
+ begin
136
+ string = snp.primer_3_string( snp.chromosome, parental )
137
+ file.puts string if string.size > 0
138
+ rescue Exception=>e
139
+ @missing_exons << snp.to_s
140
+ #$stderr.puts e.to_s
141
+ end
142
+ end
143
+ end
144
+ end
145
+
146
+ def add_alignments(opts=Hash.new)
147
+ opts = { :min_identity=>90 }.merge!(opts)
148
+ exonerate_filename = opts[:exonerate_file]
149
+ arm_selection = opts[:arm_selection]
150
+
151
+ unless arm_selection
152
+ arm_selection = lambda do | contig_name |
153
+ ret = contig_name[0,3]
154
+ return ret
155
+ end
156
+ end
157
+
158
+
159
+ File.open(exonerate_filename) do |f|
160
+ f.each_line do | line |
161
+ record = Bio::DB::Exonerate::Alignment.parse_custom(line)
162
+ if record and record.identity >= opts[:min_identity]
163
+ snp_array = @snp_map[record.query_id]
164
+ if snp_array != nil
165
+ snp_array.each do |snp|
166
+ if snp != nil and snp.position.between?( (record.query_start + 1) , record.query_end)
167
+ begin
168
+ exon = record.exon_on_gene_position(snp.position)
169
+ snp.add_exon(exon, arm_selection.call(record.target_id))
170
+ rescue Bio::DB::Exonerate::ExonerateException
171
+ $stderr.puts "Failed for the range #{record.query_start}-#{record.query_end} for position #{snp.position}"
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
180
+
181
+ def add_parental(opts=Hash.new)
182
+ # opts = { :name=>opts[:path]}.merge!(opts)
183
+ sam = nil
184
+ name = opts[:name] ? opts[:name] : "Unknown"
185
+ if opts[:path]
186
+ path = opts[:path]
187
+ name = opts[:name] ? opts[:name] : path.basename(".bam")
188
+ sam = Bio::DB::Sam.new({:fasta=>@gene_models_path, :bam=>opts[:path]})
189
+ end
190
+ @parents[name] = sam
191
+ end
192
+ end
193
+
194
+ end
@@ -0,0 +1,175 @@
1
+ module Bio::PolyploidTools
2
+ class Marker
3
+ include Comparable
4
+ #include Virgola
5
+ attr_reader :template_sequence, :original, :snp
6
+ attr_accessor :best_hit
7
+ attr_accessor :index_90k
8
+ attr_accessor :snp_id
9
+ attr_accessor :snp_name
10
+ attr_accessor :chr
11
+ attr_accessor :coordinates_chr
12
+ attr_accessor :map_order
13
+ attr_accessor :chr_arm
14
+ attr_accessor :distance_cm
15
+ attr_accessor :sequence
16
+ attr_writer :contig
17
+
18
+
19
+
20
+ #after_map :parse_sequence_snp
21
+
22
+ def to_fasta
23
+ ">#{self.snp_name}\n#{self.template_sequence}"
24
+ end
25
+
26
+ def contig
27
+ @contig = best_hit.target_id.chomp if best_hit
28
+ @contig
29
+ end
30
+
31
+ def to_csv
32
+ "#{index_90k},#{snp_id},#{snp_name},#{chr},#{coordinates_chr},#{map_order},#{chr_arm},#{distance_cm},#{sequence},#{contig}"
33
+ end
34
+
35
+ def <=>(anOter)
36
+ return 0 if anOter.snp_name == @snp_name
37
+ return @chr_arm <=> anOter.chr_arm if anOter.chr_arm != @chr_arm
38
+ return @snp_name <=> anOter.snp_name if anOter.coordinates_chr == @coordinates_chr
39
+ return @coordinates_chr <=> anOter.coordinates_chr
40
+ end
41
+
42
+ def initialize(line)
43
+ line.chomp!
44
+ @template_sequence = nil
45
+ #INDEX_90K,SNP_ID,SNP_NAME,CHR,COORDINATES_CHR,MAP_ORDER,CHR_ARM,DISTANCE_CM,SEQUENCE
46
+ @index_90k, @snp_id, @snp_name, @chr, @coordinates_chr, @map_order, @chr_arm, @distance_cm, @sequence, @contig = line.split(',')
47
+ parse_sequence_snp
48
+ end
49
+
50
+ def self.parse(filename)
51
+ f = File.open(filename, "r").read
52
+ f.each_line do |line|
53
+ m = Marker.new(line)
54
+ yield m if m.template_sequence
55
+
56
+ end
57
+ end
58
+
59
+ protected
60
+ def parse_sequence_snp
61
+ pos = 0
62
+ @chr.upcase!
63
+ match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence)
64
+ if match_data
65
+ @position = Regexp.last_match(:pre).size + 1
66
+ @original = Regexp.last_match(:org)
67
+ @snp = Regexp.last_match(:snp)
68
+ amb_base = Bio::NucleicAcid.to_IUAPC("#{@original}#{@snp}")
69
+ @template_sequence = "#{Regexp.last_match(:pre)}#{amb_base}#{Regexp.last_match(:pos)}"
70
+ return @template_sequence
71
+ end
72
+ return nil
73
+ end
74
+ end
75
+
76
+
77
+ #The map hast to come sorted.
78
+ class ArmMap
79
+ attr_reader :markers , :global_reference, :reference
80
+ attr_accessor :chromosome
81
+ def initialize
82
+ @markers = Hash.new
83
+ end
84
+
85
+ def align_markers(output)
86
+ Bio::Blat.align(@reference.fasta_path, @fasta_markers, output) do |hit|
87
+ marker = markers[hit.query_id]
88
+ best = marker.best_hit
89
+ unless marker.best_hit
90
+ markers[hit.query_id].best_hit = hit
91
+ else
92
+ marker.best_hit = hit if hit.score > marker.best_hit.score
93
+ end
94
+ end
95
+ end
96
+
97
+ def print_fasta_contigs_for_markers(contigs_file)
98
+
99
+ contigs = Set.new
100
+ markers.each do |k, marker|
101
+
102
+ if marker.best_hit
103
+ contigs << marker.best_hit.target_id
104
+ end
105
+ end
106
+
107
+ fasta=File.open(contigs_file, "w")
108
+ contigs.each do |contig_id|
109
+ reg = @reference.index.region_for_entry(contig_id)
110
+ fasta.puts ">#{contig_id}\n#{@reference.fetch_sequence(reg.get_full_region)}"
111
+ end
112
+ fasta.close
113
+ end
114
+
115
+
116
+
117
+
118
+ def print_fasta_markers(filename)
119
+ @fasta_markers = filename
120
+ fasta=File.open(filename, "w")
121
+
122
+ markers.each do |k, marker|
123
+ fasta.puts marker.to_fasta
124
+ end
125
+ fasta.close
126
+ end
127
+
128
+ def global_reference(reference)
129
+ @global_reference = Bio::DB::Fasta::FastaFile.new(reference)
130
+ @global_reference.load_fai_entries
131
+ end
132
+
133
+ def reference(reference)
134
+ @reference = Bio::DB::Fasta::FastaFile.new(reference)
135
+ @reference.load_fai_entries
136
+ end
137
+
138
+ def print_fasta_contigs_from_reference(filename)
139
+ if File.exist?(filename)
140
+ reference(filename)
141
+ return
142
+ end
143
+
144
+ #puts "loaded"
145
+
146
+ fasta=File.open(filename, "w")
147
+
148
+ Bio::FlatFile.auto( @global_reference.fasta_path) do |ff|
149
+ ff.each do |f|
150
+ chr_reg = arm_selection_embl(f.entry_id)
151
+ if chr_reg == chromosome
152
+ fasta.puts f.entry
153
+ end
154
+ end
155
+ end
156
+ fasta.close
157
+ reference(filename)
158
+ end
159
+
160
+
161
+ def print_map_with_contigs(filename)
162
+ file = File.open(filename, "w")
163
+ markers.values.sort { |x,y| x.map_order <=> y.map_order }.each do | marker |
164
+ file.puts marker.to_csv
165
+ end
166
+ file.close
167
+ end
168
+
169
+ protected
170
+ def arm_selection_embl(contig_name)
171
+ ret = contig_name.split('_')[2][0,2]
172
+ return ret
173
+ end
174
+ end
175
+ end