bio-polyploid-tools 0.8.3 → 0.8.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e9acfc190624dec8c4d6e1831c71524c330ffdd2
4
- data.tar.gz: 4307bf90f54200f65972ea596f94048d65edbcb1
2
+ SHA256:
3
+ metadata.gz: 728d9fb436e9e7d26698d011179da63ba79fc45c40b0a771cafc5f4dc6d84bc3
4
+ data.tar.gz: 3c62bd8bcfcb5d3f460729f19f8382bd46c7821fcacb83d01b0ff3f336b38f1b
5
5
  SHA512:
6
- metadata.gz: 7b7f9875c56e9395d4c934b456b2332218d3a4e202addd1cab48adcb0c443ad9cef607452ee4ed8ff32994cd4c10bc2cd10b3b24f966de49c02657a961d0d3d0
7
- data.tar.gz: a445e30d15f958c54424300250a481e470c47284f3842ba0a285138527f14cd0baaeb30547abf6f8fdd52c5c742e81b0cc875cbf783c6664fdd72c5d950bdcca
6
+ metadata.gz: 99784b38c37f00e71c3c1fa07899ccca8e32b6ff27fc363bcece989e788c0db745a7db4ae566efb42b55742fd01e5a99adeb211a7d46029f6c148dba8e91e92a
7
+ data.tar.gz: 40f0990a5652374ea3bef3b0e8777882720626e33fdc448ff48c5f71141b87ce153680a0215ad95e13595ddead39db822d276911baf0647723cc7e8bf3195bdb
data/.travis.yml CHANGED
@@ -9,6 +9,7 @@ addons:
9
9
  - exonerate
10
10
  before_install:
11
11
  - gem update --system
12
+ - export RUBYOPT="-W1"
12
13
  rvm:
13
14
  - 2.1.10
14
15
  - 2.2.5
@@ -16,5 +17,4 @@ rvm:
16
17
  - 2.4.2
17
18
  - 2.5.0
18
19
 
19
- before_install:
20
- - export RUBYOPT="-W1"
20
+
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ source "http://rubygems.org"
5
5
 
6
6
  gem "bio", ">= 1.5.1"
7
7
  gem "bio-samtools", ">= 2.6.2"
8
+ gem "descriptive_statistics"
8
9
  #gem "rake"
9
10
 
10
11
  gem "systemu", ">=2.5.2"
data/README.md CHANGED
@@ -128,6 +128,14 @@ To use blast instead of exonerate, use the following command:
128
128
 
129
129
  ## Release Notes
130
130
 
131
+ ### 0.8.4
132
+
133
+ * Added script ```tag_stats.rb`` That gets the descriptive statistics for a tag in a bam file for each reference.
134
+
135
+ ```bash
136
+ ruby tag_stats.rb -b HI.3206.006.Index_2.CS_125RNA_14d_Leaf8.sorted.bam -r /Users/ramirezr/Dropbox/JIC/expVIPMetadatas/RefSeq1.0/Genes/annotation/IWGSCv1.0_UTR_ALL.cdnas.fasta --tag 'NH'
137
+ ```
138
+
131
139
  ### 0.8.3
132
140
 
133
141
  * BUGFIX: ```ChromosomeArm.rb``` was fixed to conform the module assumptions for the package.
@@ -171,8 +179,6 @@ To use blast instead of exonerate, use the following command:
171
179
 
172
180
  # Notes
173
181
 
174
-
175
- * BUG: If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
176
182
  * BUG: Blocks with NNNs are picked and treated as semi-specific.
177
183
  * BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
178
184
  * TODO: Add a parameter file to configure the alignments.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.3
1
+ 0.8.4
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+
4
+ require 'csv'
5
+ require 'fileutils'
6
+ require 'tmpdir'
7
+ require 'bio-samtools'
8
+ require 'bio'
9
+
10
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
11
+ $: << File.expand_path('.')
12
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
13
+ require path
14
+ opts = {}
15
+ opts[:identity] = 50
16
+ opts[:min_bases] = 200
17
+ opts[:split_token] = "."
18
+ opts[:tmp_folder] = Dir.mktmpdir
19
+ opts[:random_sample] = 0
20
+ opts[:output_folder] = "."
21
+
22
+ OptionParser.new do |o|
23
+
24
+ o.banner = "Usage: mask_triads.rb [options]"
25
+
26
+ o.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
27
+ opts[:triads] = o
28
+ end
29
+
30
+ o.on("-f", "--fasta FILE" , "FASTA file containing all the possible peptide sequences. ") do |o|
31
+ opts[:fasta] = o
32
+ end
33
+
34
+ o.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
35
+ opts[:split_token] = o
36
+ end
37
+
38
+ o.on("-o", "--output_folder DIR", "Location to save the alignment masks. If the alignment exists, it is recycled to avoid calling MAFFT again") do |o|
39
+ opts[:output_folder] = o
40
+ end
41
+ end.parse!
42
+
43
+
44
+ split_token = opts[:split_token]
45
+ reference_name = File.basename opts[:fasta]
46
+ output_folder = opts[:output_folder]
47
+ @fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: opts[:fasta])
48
+ @fasta_reference_db.load_fai_entries
49
+ #puts @fasta_reference_db.index.entries
50
+ @cannonical = Hash.new
51
+ @fasta_reference_db.index.entries.each do |e|
52
+ gene = e.id.split(split_token)[0]
53
+ @cannonical[gene] = e unless @cannonical[gene]
54
+ @cannonical[gene] = e if e.length > @cannonical[gene].length
55
+ end
56
+
57
+ $stderr.puts "#Loaded #{@cannonical.length} canonical sequences from #{@fasta_reference_db.index.size} in reference"
58
+
59
+ $stderr.puts "TMP dir: #{opts[:tmp_folder]}"
60
+
61
+ def write_fasta_from_hash(sequences, filename)
62
+ out = File.new(filename, "w")
63
+ sequences.each_pair do | chromosome, exon_seq |
64
+ out.puts ">#{chromosome}\n#{exon_seq}\n"
65
+ end
66
+ out.close
67
+ end
68
+
69
+ def mafft_align(a, b, d)
70
+ to_align = Bio::Alignment::SequenceHash.new
71
+ seq_a = @fasta_reference_db.fetch_sequence(@cannonical[a].get_full_region)
72
+ seq_b = @fasta_reference_db.fetch_sequence(@cannonical[b].get_full_region)
73
+ seq_d = @fasta_reference_db.fetch_sequence(@cannonical[d].get_full_region)
74
+ to_align[a] = seq_a
75
+ to_align[b] = seq_b
76
+ to_align[d] = seq_d
77
+ report = mafft.query_alignment(to_align)
78
+ aln = report.alignment
79
+ aln
80
+ end
81
+
82
+ def read_alignment(path)
83
+ aln = Bio::Alignment::SequenceHash.new
84
+ i = 0
85
+ Bio::FlatFile.open(Bio::FastaFormat, path) do |fasta_file|
86
+ fasta_file.each do |entry|
87
+ aln[entry.entry_id] = entry.seq if i < 3
88
+ i += 1
89
+ end
90
+ end
91
+ aln
92
+ end
93
+
94
+
95
+ mafft_opts = ['--maxiterate', '1000', '--localpair', '--quiet']
96
+ mafft = Bio::MAFFT.new( "mafft" , mafft_opts)
97
+ header_printed = false
98
+ stats = File.open("#{output_folder}/#{reference_name}.identity_stats.csv", "w")
99
+ distances = File.open("#{output_folder}/#{reference_name}.distance_between_snps.csv.gz", "w")
100
+ gz = Zlib::GzipWriter.new(distances)
101
+ gz.write "triad,gene,genome,reference,type,distance\n"
102
+ #gz.close
103
+
104
+ def write_distances(distances, triad, gene, genome, reference, type, out)
105
+ distances.each { |e| out.write "#{triad},#{gene},#{genome},#{reference},#{type},#{e}\n" }
106
+ end
107
+
108
+ i = 0
109
+ CSV.foreach(opts[:triads], headers:true ) do |row|
110
+ next unless row["cardinality_abs"] == "1:1:1" and row["HC.LC"] == "HC-only"
111
+ a = row['A']
112
+ b = row['B']
113
+ d = row['D']
114
+ triad = row['group_id']
115
+ cent_triad = triad.to_i / 100
116
+ folder = "#{output_folder}/alignments/#{reference_name}/#{cent_triad}/"
117
+ save_cds = "#{folder}/#{triad}.fa"
118
+ aligned = File.file?(save_cds)
119
+ aln = aligned ? read_alignment(save_cds) : mafft_align(a,b,d)
120
+ folder = "#{output_folder}/alignments_new/#{reference_name}/#{cent_triad}/" if aligned
121
+ FileUtils.mkdir_p folder
122
+ save_cds = "#{folder}/#{triad}.fa"
123
+
124
+ aln2 = Bio::Alignment.new aln
125
+ seq_start = Bio::PolyploidTools::Mask.find_start(aln)
126
+ seq_end = Bio::PolyploidTools::Mask.find_end(aln)
127
+ #puts "#{triad}: #{seq_start}-#{seq_end}"
128
+
129
+
130
+ aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: a), "A")
131
+ aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: b), "B")
132
+ aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: d), "D")
133
+
134
+ a_stats = Bio::PolyploidTools::Mask.stats(aln2["A"], triad, a, "A", reference_name)
135
+ b_stats = Bio::PolyploidTools::Mask.stats(aln2["B"], triad, b, "B", reference_name)
136
+ d_stats = Bio::PolyploidTools::Mask.stats(aln2["D"], triad, d, "D", reference_name)
137
+
138
+ write_distances(a_stats[:specific], triad, a, "A", reference_name, "specific", gz)
139
+ write_distances(b_stats[:specific], triad, b, "B", reference_name, "specific", gz)
140
+ write_distances(d_stats[:specific], triad, d, "D", reference_name, "specific", gz)
141
+
142
+ write_distances(a_stats[:semispecific], triad, a, "A", reference_name, "semispecific", gz)
143
+ write_distances(b_stats[:semispecific], triad, b, "B", reference_name, "semispecific", gz)
144
+ write_distances(d_stats[:semispecific], triad, d, "D", reference_name, "semispecific", gz)
145
+
146
+ a_stats.delete(:semispecific)
147
+ b_stats.delete(:semispecific)
148
+ d_stats.delete(:semispecific)
149
+
150
+ a_stats.delete(:specific)
151
+ b_stats.delete(:specific)
152
+ d_stats.delete(:specific)
153
+
154
+ a_stats[:length] = @cannonical[a].length
155
+ b_stats[:length] = @cannonical[b].length
156
+ d_stats[:length] = @cannonical[d].length
157
+
158
+ stats.puts a_stats.keys.join(",") unless header_printed
159
+ stats.puts a_stats.values.join(",")
160
+ stats.puts b_stats.values.join(",")
161
+ stats.puts d_stats.values.join(",")
162
+ header_printed = true
163
+
164
+ write_fasta_from_hash(aln2, save_cds)
165
+ i += 1
166
+ end
167
+ gz.close
168
+ distances.close
169
+ stats.close
data/bin/polymarker.rb CHANGED
@@ -350,10 +350,11 @@ container.add_alignments({
350
350
 
351
351
 
352
352
  #4.1 generating primer3 file
353
- write_status "Running primer3"
353
+ write_status "Finding genome-specific positions"
354
354
  file = File.open(exons_filename, "w")
355
355
  container.print_fasta_snp_exones(file)
356
356
  file.close
357
+ write_status "Running primer3"
357
358
 
358
359
  file = File.open(primer_3_input, "w")
359
360
 
data/bin/tag_stats.rb ADDED
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+
4
+ require 'csv'
5
+ require 'fileutils'
6
+ require 'tmpdir'
7
+ require 'bio-samtools'
8
+ require 'bio'
9
+ require 'descriptive_statistics'
10
+
11
+ class Bio::DB::Tag
12
+ def set(str)
13
+ @tag = str[0..1]
14
+ @type = str[3]
15
+ @value = str[5..-1]
16
+ @value = @value.to_i if @type == "i"
17
+ end
18
+ end
19
+
20
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
21
+ $: << File.expand_path('.')
22
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
23
+ require path
24
+ opts = {}
25
+ opts[:tag] = "NH"
26
+ opts[:bam] = nil
27
+ opts[:out] = nil
28
+ opts[:ref] = nil
29
+
30
+ out = $stdout
31
+
32
+ OptionParser.new do |o|
33
+ o.banner = "Usage: tag_stats.rb [options]"
34
+
35
+ o.on("-t", "--tag str", "The tag to extract (default NH)") do |o|
36
+ opts[:tag] = o
37
+ end
38
+
39
+ o.on("-b", "--bam FILE" , "BAM file with the alignments ") do |o|
40
+ opts[:bam] = o
41
+ end
42
+
43
+ o.on("-o", "--out_file CHAR", "File to save the stats") do |o|
44
+ opts[:out] = o
45
+ end
46
+
47
+ o.on("-r", "--reference FILE", "Fasta file with the reference") do |o|
48
+ opts[:ref] = o
49
+ end
50
+ end.parse!
51
+
52
+ bam = Bio::DB::Sam.new(fasta: opts[:ref], bam: opts[:bam])
53
+ tag = opts[:tag]
54
+
55
+ sample = File.basename(opts[:bam], '.sorted.bam')
56
+ last_ref = ""
57
+ values = []
58
+ to_print = [:sum, :min, :max, :mean, :mode, :median, :q1, :q2, :q3]
59
+ percentiles = [90, 95, 97.5, 99]
60
+ #Add the 90, 95, 97.5 and 99 percentiles.
61
+ out = File.open(opts[:out], "w") if opts[:out]
62
+ bam.view do |aln |
63
+ if(last_ref != aln.rname)
64
+
65
+ desc_stats = values.descriptive_statistics
66
+ to_print.each { |e| out.puts [sample, last_ref, e , desc_stats[e] ].join("\t") } if(last_ref != "")
67
+ percentiles.each { |e| out.puts [sample, last_ref, "P#{e}", values.percentile(e)].join("\t") } if(last_ref != "")
68
+ out.puts [sample, last_ref, "N", values.length].join("\t") if(last_ref != "")
69
+ values.clear
70
+ last_ref = aln.rname
71
+ end
72
+ values << aln.tags[tag].value
73
+ end
74
+
75
+ out.close if opts[:out]
@@ -2,19 +2,19 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.8.3 ruby lib
5
+ # stub: bio-polyploid-tools 0.8.4 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools".freeze
9
- s.version = "0.8.3"
9
+ s.version = "0.8.4"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
14
- s.date = "2018-01-23"
14
+ s.date = "2018-02-27"
15
15
  s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
16
16
  s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
17
- s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "vcfLineToTable.rb".freeze]
17
+ s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze]
18
18
  s.extra_rdoc_files = [
19
19
  "README",
20
20
  "README.md"
@@ -42,10 +42,12 @@ Gem::Specification.new do |s|
42
42
  "bin/mafft_triads_promoters.rb",
43
43
  "bin/map_markers_to_contigs.rb",
44
44
  "bin/markers_in_region.rb",
45
+ "bin/mask_triads.rb",
45
46
  "bin/polymarker.rb",
46
47
  "bin/polymarker_capillary.rb",
47
48
  "bin/snp_position_to_polymarker.rb",
48
49
  "bin/snps_between_bams.rb",
50
+ "bin/tag_stats.rb",
49
51
  "bin/vcfLineToTable.rb",
50
52
  "bio-polyploid-tools.gemspec",
51
53
  "conf/defaults.rb",
@@ -88,6 +90,7 @@ Gem::Specification.new do |s|
88
90
  "lib/bio/PolyploidTools/ChromosomeArm.rb",
89
91
  "lib/bio/PolyploidTools/ExonContainer.rb",
90
92
  "lib/bio/PolyploidTools/Marker.rb",
93
+ "lib/bio/PolyploidTools/Mask.rb",
91
94
  "lib/bio/PolyploidTools/NoSNPSequence.rb",
92
95
  "lib/bio/PolyploidTools/PrimerRegion.rb",
93
96
  "lib/bio/PolyploidTools/SNP.rb",
@@ -172,7 +175,7 @@ Gem::Specification.new do |s|
172
175
  ]
173
176
  s.homepage = "http://github.com/tgac/bioruby-polyploid-tools".freeze
174
177
  s.licenses = ["MIT".freeze]
175
- s.rubygems_version = "2.6.14".freeze
178
+ s.rubygems_version = "2.7.4".freeze
176
179
  s.summary = "Tool to work with polyploids, NGS and molecular biology".freeze
177
180
 
178
181
  if s.respond_to? :specification_version then
@@ -181,6 +184,7 @@ Gem::Specification.new do |s|
181
184
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
182
185
  s.add_runtime_dependency(%q<bio>.freeze, [">= 1.5.1"])
183
186
  s.add_runtime_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
187
+ s.add_runtime_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
184
188
  s.add_runtime_dependency(%q<systemu>.freeze, [">= 2.5.2"])
185
189
  s.add_development_dependency(%q<shoulda>.freeze, [">= 2.10"])
186
190
  s.add_development_dependency(%q<test-unit>.freeze, [">= 0"])
@@ -188,6 +192,7 @@ Gem::Specification.new do |s|
188
192
  else
189
193
  s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
190
194
  s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
195
+ s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
191
196
  s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
192
197
  s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
193
198
  s.add_dependency(%q<test-unit>.freeze, [">= 0"])
@@ -196,6 +201,7 @@ Gem::Specification.new do |s|
196
201
  else
197
202
  s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
198
203
  s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
204
+ s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
199
205
  s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
200
206
  s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
201
207
  s.add_dependency(%q<test-unit>.freeze, [">= 0"])
@@ -0,0 +1,114 @@
1
+ class Array
2
+ def sum
3
+ inject(0.0) { |result, el| result + el }
4
+ end
5
+
6
+ def mean
7
+ sum / size
8
+ end
9
+ end
10
+
11
+ module Bio::PolyploidTools::Mask
12
+
13
+ def self.find_end(seqs)
14
+ size = seqs.values[0].size
15
+ names = seqs.keys
16
+ i = size - 1
17
+ gap_count = 3
18
+ while i > 0 and gap_count > 0
19
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
20
+ i -= 1
21
+ end
22
+ i + 1
23
+ end
24
+
25
+ def self.find_start(seqs)
26
+ size = seqs.values[0].size
27
+ names = seqs.keys
28
+ i = 0
29
+ gap_count = 3
30
+ while i < size and gap_count > 0
31
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
32
+
33
+ i += 1
34
+ end
35
+ i - 1
36
+ end
37
+
38
+ def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
39
+ names = seqs.keys
40
+ target = names[0] if target.nil?
41
+ masked_snps = seqs[target].downcase
42
+ i = 0
43
+ while i < masked_snps.size
44
+ different = 0
45
+ cov = 0
46
+ gap = false
47
+ names.each do | chr |
48
+ if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
49
+ cov += 1
50
+ end
51
+ if chr != target
52
+ different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
53
+ end
54
+ if seqs[chr][i] == "-" and chr == target
55
+ gap = true
56
+ end
57
+ end
58
+ masked_snps[i] = "." if different == 0
59
+ masked_snps[i] = "." if cov == 1
60
+ masked_snps[i] = "*" if cov == 0
61
+ expected_snps = names.size - 1
62
+ masked_snps[i] = masked_snps[i].upcase if different == expected_snps
63
+ if gap
64
+ masked_snps[i] = different == expected_snps ? "-" : "_"
65
+ end
66
+ masked_snps[i] = "|" if i < seq_start or i > seq_end
67
+ i += 1
68
+ end
69
+ masked_snps
70
+ end
71
+
72
+ def self.stats(mask, triad, gene, genome, reference)
73
+ specific = []
74
+ semispecific = []
75
+ sp_i = 0
76
+ semi = 0
77
+ i = 0
78
+ mask.to_s.each_char do |e|
79
+ case e
80
+ when "n","N"
81
+ i += 1
82
+ when /[[:lower:]]/ then
83
+ semispecific << semi
84
+ semi = 0
85
+ i += 1
86
+ when /[[:upper:]]/ then
87
+ specific << sp_i
88
+ semispecific << semi
89
+ sp_i = 0
90
+ semi = 0
91
+ i += 1
92
+ when "." then
93
+ semi += 1
94
+ sp_i += 1
95
+ i += 1
96
+ end
97
+ end
98
+ {
99
+ reference: reference,
100
+ triad: triad,
101
+ genome: genome,
102
+ gene: gene,
103
+ semispecific_mean: semispecific.mean,
104
+ semispecific_bases: semispecific.size,
105
+ semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
106
+ specific_mean: specific.mean,
107
+ specific_bases: specific.size,
108
+ specific_identity: (1 - (specific.size.to_f / i )) * 100,
109
+ aligned_length: i,
110
+ specific: specific,
111
+ semispecific: semispecific
112
+ }
113
+ end
114
+ end
@@ -33,10 +33,6 @@ module Bio::PolyploidTools
33
33
  snp
34
34
  end
35
35
 
36
- def parse_snp
37
-
38
- end
39
-
40
36
  def parse_sequence_snp
41
37
  pos = 0
42
38
  match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-polyploid-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricardo H. Ramirez-Gonzalez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-23 00:00:00.000000000 Z
11
+ date: 2018-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: 2.6.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: descriptive_statistics
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: systemu
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -114,10 +128,12 @@ executables:
114
128
  - mafft_triads_promoters.rb
115
129
  - map_markers_to_contigs.rb
116
130
  - markers_in_region.rb
131
+ - mask_triads.rb
117
132
  - polymarker.rb
118
133
  - polymarker_capillary.rb
119
134
  - snp_position_to_polymarker.rb
120
135
  - snps_between_bams.rb
136
+ - tag_stats.rb
121
137
  - vcfLineToTable.rb
122
138
  extensions: []
123
139
  extra_rdoc_files:
@@ -146,10 +162,12 @@ files:
146
162
  - bin/mafft_triads_promoters.rb
147
163
  - bin/map_markers_to_contigs.rb
148
164
  - bin/markers_in_region.rb
165
+ - bin/mask_triads.rb
149
166
  - bin/polymarker.rb
150
167
  - bin/polymarker_capillary.rb
151
168
  - bin/snp_position_to_polymarker.rb
152
169
  - bin/snps_between_bams.rb
170
+ - bin/tag_stats.rb
153
171
  - bin/vcfLineToTable.rb
154
172
  - bio-polyploid-tools.gemspec
155
173
  - conf/defaults.rb
@@ -192,6 +210,7 @@ files:
192
210
  - lib/bio/PolyploidTools/ChromosomeArm.rb
193
211
  - lib/bio/PolyploidTools/ExonContainer.rb
194
212
  - lib/bio/PolyploidTools/Marker.rb
213
+ - lib/bio/PolyploidTools/Mask.rb
195
214
  - lib/bio/PolyploidTools/NoSNPSequence.rb
196
215
  - lib/bio/PolyploidTools/PrimerRegion.rb
197
216
  - lib/bio/PolyploidTools/SNP.rb
@@ -293,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
293
312
  version: '0'
294
313
  requirements: []
295
314
  rubyforge_project:
296
- rubygems_version: 2.6.14
315
+ rubygems_version: 2.7.4
297
316
  signing_key:
298
317
  specification_version: 4
299
318
  summary: Tool to work with polyploids, NGS and molecular biology