bio-polyploid-tools 0.8.3 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e9acfc190624dec8c4d6e1831c71524c330ffdd2
4
- data.tar.gz: 4307bf90f54200f65972ea596f94048d65edbcb1
2
+ SHA256:
3
+ metadata.gz: 728d9fb436e9e7d26698d011179da63ba79fc45c40b0a771cafc5f4dc6d84bc3
4
+ data.tar.gz: 3c62bd8bcfcb5d3f460729f19f8382bd46c7821fcacb83d01b0ff3f336b38f1b
5
5
  SHA512:
6
- metadata.gz: 7b7f9875c56e9395d4c934b456b2332218d3a4e202addd1cab48adcb0c443ad9cef607452ee4ed8ff32994cd4c10bc2cd10b3b24f966de49c02657a961d0d3d0
7
- data.tar.gz: a445e30d15f958c54424300250a481e470c47284f3842ba0a285138527f14cd0baaeb30547abf6f8fdd52c5c742e81b0cc875cbf783c6664fdd72c5d950bdcca
6
+ metadata.gz: 99784b38c37f00e71c3c1fa07899ccca8e32b6ff27fc363bcece989e788c0db745a7db4ae566efb42b55742fd01e5a99adeb211a7d46029f6c148dba8e91e92a
7
+ data.tar.gz: 40f0990a5652374ea3bef3b0e8777882720626e33fdc448ff48c5f71141b87ce153680a0215ad95e13595ddead39db822d276911baf0647723cc7e8bf3195bdb
data/.travis.yml CHANGED
@@ -9,6 +9,7 @@ addons:
9
9
  - exonerate
10
10
  before_install:
11
11
  - gem update --system
12
+ - export RUBYOPT="-W1"
12
13
  rvm:
13
14
  - 2.1.10
14
15
  - 2.2.5
@@ -16,5 +17,4 @@ rvm:
16
17
  - 2.4.2
17
18
  - 2.5.0
18
19
 
19
- before_install:
20
- - export RUBYOPT="-W1"
20
+
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ source "http://rubygems.org"
5
5
 
6
6
  gem "bio", ">= 1.5.1"
7
7
  gem "bio-samtools", ">= 2.6.2"
8
+ gem "descriptive_statistics"
8
9
  #gem "rake"
9
10
 
10
11
  gem "systemu", ">=2.5.2"
data/README.md CHANGED
@@ -128,6 +128,14 @@ To use blast instead of exonerate, use the following command:
128
128
 
129
129
  ## Release Notes
130
130
 
131
+ ### 0.8.4
132
+
133
+ * Added script ```tag_stats.rb`` That gets the descriptive statistics for a tag in a bam file for each reference.
134
+
135
+ ```bash
136
+ ruby tag_stats.rb -b HI.3206.006.Index_2.CS_125RNA_14d_Leaf8.sorted.bam -r /Users/ramirezr/Dropbox/JIC/expVIPMetadatas/RefSeq1.0/Genes/annotation/IWGSCv1.0_UTR_ALL.cdnas.fasta --tag 'NH'
137
+ ```
138
+
131
139
  ### 0.8.3
132
140
 
133
141
  * BUGFIX: ```ChromosomeArm.rb``` was fixed to conform the module assumptions for the package.
@@ -171,8 +179,6 @@ To use blast instead of exonerate, use the following command:
171
179
 
172
180
  # Notes
173
181
 
174
-
175
- * BUG: If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
176
182
  * BUG: Blocks with NNNs are picked and treated as semi-specific.
177
183
  * BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
178
184
  * TODO: Add a parameter file to configure the alignments.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.3
1
+ 0.8.4
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+
4
+ require 'csv'
5
+ require 'fileutils'
6
+ require 'tmpdir'
7
+ require 'bio-samtools'
8
+ require 'bio'
9
+
10
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
11
+ $: << File.expand_path('.')
12
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
13
+ require path
14
+ opts = {}
15
+ opts[:identity] = 50
16
+ opts[:min_bases] = 200
17
+ opts[:split_token] = "."
18
+ opts[:tmp_folder] = Dir.mktmpdir
19
+ opts[:random_sample] = 0
20
+ opts[:output_folder] = "."
21
+
22
+ OptionParser.new do |o|
23
+
24
+ o.banner = "Usage: mask_triads.rb [options]"
25
+
26
+ o.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
27
+ opts[:triads] = o
28
+ end
29
+
30
+ o.on("-f", "--fasta FILE" , "FASTA file containing all the possible peptide sequences. ") do |o|
31
+ opts[:fasta] = o
32
+ end
33
+
34
+ o.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
35
+ opts[:split_token] = o
36
+ end
37
+
38
+ o.on("-o", "--output_folder DIR", "Location to save the alignment masks. If the alignment exists, it is recycled to avoid calling MAFFT again") do |o|
39
+ opts[:output_folder] = o
40
+ end
41
+ end.parse!
42
+
43
+
44
+ split_token = opts[:split_token]
45
+ reference_name = File.basename opts[:fasta]
46
+ output_folder = opts[:output_folder]
47
+ @fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: opts[:fasta])
48
+ @fasta_reference_db.load_fai_entries
49
+ #puts @fasta_reference_db.index.entries
50
+ @cannonical = Hash.new
51
+ @fasta_reference_db.index.entries.each do |e|
52
+ gene = e.id.split(split_token)[0]
53
+ @cannonical[gene] = e unless @cannonical[gene]
54
+ @cannonical[gene] = e if e.length > @cannonical[gene].length
55
+ end
56
+
57
+ $stderr.puts "#Loaded #{@cannonical.length} canonical sequences from #{@fasta_reference_db.index.size} in reference"
58
+
59
+ $stderr.puts "TMP dir: #{opts[:tmp_folder]}"
60
+
61
+ def write_fasta_from_hash(sequences, filename)
62
+ out = File.new(filename, "w")
63
+ sequences.each_pair do | chromosome, exon_seq |
64
+ out.puts ">#{chromosome}\n#{exon_seq}\n"
65
+ end
66
+ out.close
67
+ end
68
+
69
+ def mafft_align(a, b, d)
70
+ to_align = Bio::Alignment::SequenceHash.new
71
+ seq_a = @fasta_reference_db.fetch_sequence(@cannonical[a].get_full_region)
72
+ seq_b = @fasta_reference_db.fetch_sequence(@cannonical[b].get_full_region)
73
+ seq_d = @fasta_reference_db.fetch_sequence(@cannonical[d].get_full_region)
74
+ to_align[a] = seq_a
75
+ to_align[b] = seq_b
76
+ to_align[d] = seq_d
77
+ report = mafft.query_alignment(to_align)
78
+ aln = report.alignment
79
+ aln
80
+ end
81
+
82
+ def read_alignment(path)
83
+ aln = Bio::Alignment::SequenceHash.new
84
+ i = 0
85
+ Bio::FlatFile.open(Bio::FastaFormat, path) do |fasta_file|
86
+ fasta_file.each do |entry|
87
+ aln[entry.entry_id] = entry.seq if i < 3
88
+ i += 1
89
+ end
90
+ end
91
+ aln
92
+ end
93
+
94
+
95
+ mafft_opts = ['--maxiterate', '1000', '--localpair', '--quiet']
96
+ mafft = Bio::MAFFT.new( "mafft" , mafft_opts)
97
+ header_printed = false
98
+ stats = File.open("#{output_folder}/#{reference_name}.identity_stats.csv", "w")
99
+ distances = File.open("#{output_folder}/#{reference_name}.distance_between_snps.csv.gz", "w")
100
+ gz = Zlib::GzipWriter.new(distances)
101
+ gz.write "triad,gene,genome,reference,type,distance\n"
102
+ #gz.close
103
+
104
+ def write_distances(distances, triad, gene, genome, reference, type, out)
105
+ distances.each { |e| out.write "#{triad},#{gene},#{genome},#{reference},#{type},#{e}\n" }
106
+ end
107
+
108
+ i = 0
109
+ CSV.foreach(opts[:triads], headers:true ) do |row|
110
+ next unless row["cardinality_abs"] == "1:1:1" and row["HC.LC"] == "HC-only"
111
+ a = row['A']
112
+ b = row['B']
113
+ d = row['D']
114
+ triad = row['group_id']
115
+ cent_triad = triad.to_i / 100
116
+ folder = "#{output_folder}/alignments/#{reference_name}/#{cent_triad}/"
117
+ save_cds = "#{folder}/#{triad}.fa"
118
+ aligned = File.file?(save_cds)
119
+ aln = aligned ? read_alignment(save_cds) : mafft_align(a,b,d)
120
+ folder = "#{output_folder}/alignments_new/#{reference_name}/#{cent_triad}/" if aligned
121
+ FileUtils.mkdir_p folder
122
+ save_cds = "#{folder}/#{triad}.fa"
123
+
124
+ aln2 = Bio::Alignment.new aln
125
+ seq_start = Bio::PolyploidTools::Mask.find_start(aln)
126
+ seq_end = Bio::PolyploidTools::Mask.find_end(aln)
127
+ #puts "#{triad}: #{seq_start}-#{seq_end}"
128
+
129
+
130
+ aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: a), "A")
131
+ aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: b), "B")
132
+ aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: d), "D")
133
+
134
+ a_stats = Bio::PolyploidTools::Mask.stats(aln2["A"], triad, a, "A", reference_name)
135
+ b_stats = Bio::PolyploidTools::Mask.stats(aln2["B"], triad, b, "B", reference_name)
136
+ d_stats = Bio::PolyploidTools::Mask.stats(aln2["D"], triad, d, "D", reference_name)
137
+
138
+ write_distances(a_stats[:specific], triad, a, "A", reference_name, "specific", gz)
139
+ write_distances(b_stats[:specific], triad, b, "B", reference_name, "specific", gz)
140
+ write_distances(d_stats[:specific], triad, d, "D", reference_name, "specific", gz)
141
+
142
+ write_distances(a_stats[:semispecific], triad, a, "A", reference_name, "semispecific", gz)
143
+ write_distances(b_stats[:semispecific], triad, b, "B", reference_name, "semispecific", gz)
144
+ write_distances(d_stats[:semispecific], triad, d, "D", reference_name, "semispecific", gz)
145
+
146
+ a_stats.delete(:semispecific)
147
+ b_stats.delete(:semispecific)
148
+ d_stats.delete(:semispecific)
149
+
150
+ a_stats.delete(:specific)
151
+ b_stats.delete(:specific)
152
+ d_stats.delete(:specific)
153
+
154
+ a_stats[:length] = @cannonical[a].length
155
+ b_stats[:length] = @cannonical[b].length
156
+ d_stats[:length] = @cannonical[d].length
157
+
158
+ stats.puts a_stats.keys.join(",") unless header_printed
159
+ stats.puts a_stats.values.join(",")
160
+ stats.puts b_stats.values.join(",")
161
+ stats.puts d_stats.values.join(",")
162
+ header_printed = true
163
+
164
+ write_fasta_from_hash(aln2, save_cds)
165
+ i += 1
166
+ end
167
+ gz.close
168
+ distances.close
169
+ stats.close
data/bin/polymarker.rb CHANGED
@@ -350,10 +350,11 @@ container.add_alignments({
350
350
 
351
351
 
352
352
  #4.1 generating primer3 file
353
- write_status "Running primer3"
353
+ write_status "Finding genome-specific positions"
354
354
  file = File.open(exons_filename, "w")
355
355
  container.print_fasta_snp_exones(file)
356
356
  file.close
357
+ write_status "Running primer3"
357
358
 
358
359
  file = File.open(primer_3_input, "w")
359
360
 
data/bin/tag_stats.rb ADDED
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+
4
+ require 'csv'
5
+ require 'fileutils'
6
+ require 'tmpdir'
7
+ require 'bio-samtools'
8
+ require 'bio'
9
+ require 'descriptive_statistics'
10
+
11
+ class Bio::DB::Tag
12
+ def set(str)
13
+ @tag = str[0..1]
14
+ @type = str[3]
15
+ @value = str[5..-1]
16
+ @value = @value.to_i if @type == "i"
17
+ end
18
+ end
19
+
20
+ $: << File.expand_path(File.dirname(__FILE__) + '/../lib')
21
+ $: << File.expand_path('.')
22
+ path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
23
+ require path
24
+ opts = {}
25
+ opts[:tag] = "NH"
26
+ opts[:bam] = nil
27
+ opts[:out] = nil
28
+ opts[:ref] = nil
29
+
30
+ out = $stdout
31
+
32
+ OptionParser.new do |o|
33
+ o.banner = "Usage: tag_stats.rb [options]"
34
+
35
+ o.on("-t", "--tag str", "The tag to extract (default NH)") do |o|
36
+ opts[:tag] = o
37
+ end
38
+
39
+ o.on("-b", "--bam FILE" , "BAM file with the alignments ") do |o|
40
+ opts[:bam] = o
41
+ end
42
+
43
+ o.on("-o", "--out_file CHAR", "File to save the stats") do |o|
44
+ opts[:out] = o
45
+ end
46
+
47
+ o.on("-r", "--reference FILE", "Fasta file with the reference") do |o|
48
+ opts[:ref] = o
49
+ end
50
+ end.parse!
51
+
52
+ bam = Bio::DB::Sam.new(fasta: opts[:ref], bam: opts[:bam])
53
+ tag = opts[:tag]
54
+
55
+ sample = File.basename(opts[:bam], '.sorted.bam')
56
+ last_ref = ""
57
+ values = []
58
+ to_print = [:sum, :min, :max, :mean, :mode, :median, :q1, :q2, :q3]
59
+ percentiles = [90, 95, 97.5, 99]
60
+ #Add the 90, 95, 97.5 and 99 percentiles.
61
+ out = File.open(opts[:out], "w") if opts[:out]
62
+ bam.view do |aln |
63
+ if(last_ref != aln.rname)
64
+
65
+ desc_stats = values.descriptive_statistics
66
+ to_print.each { |e| out.puts [sample, last_ref, e , desc_stats[e] ].join("\t") } if(last_ref != "")
67
+ percentiles.each { |e| out.puts [sample, last_ref, "P#{e}", values.percentile(e)].join("\t") } if(last_ref != "")
68
+ out.puts [sample, last_ref, "N", values.length].join("\t") if(last_ref != "")
69
+ values.clear
70
+ last_ref = aln.rname
71
+ end
72
+ values << aln.tags[tag].value
73
+ end
74
+
75
+ out.close if opts[:out]
@@ -2,19 +2,19 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: bio-polyploid-tools 0.8.3 ruby lib
5
+ # stub: bio-polyploid-tools 0.8.4 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "bio-polyploid-tools".freeze
9
- s.version = "0.8.3"
9
+ s.version = "0.8.4"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
14
- s.date = "2018-01-23"
14
+ s.date = "2018-02-27"
15
15
  s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
16
16
  s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
17
- s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "vcfLineToTable.rb".freeze]
17
+ s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze]
18
18
  s.extra_rdoc_files = [
19
19
  "README",
20
20
  "README.md"
@@ -42,10 +42,12 @@ Gem::Specification.new do |s|
42
42
  "bin/mafft_triads_promoters.rb",
43
43
  "bin/map_markers_to_contigs.rb",
44
44
  "bin/markers_in_region.rb",
45
+ "bin/mask_triads.rb",
45
46
  "bin/polymarker.rb",
46
47
  "bin/polymarker_capillary.rb",
47
48
  "bin/snp_position_to_polymarker.rb",
48
49
  "bin/snps_between_bams.rb",
50
+ "bin/tag_stats.rb",
49
51
  "bin/vcfLineToTable.rb",
50
52
  "bio-polyploid-tools.gemspec",
51
53
  "conf/defaults.rb",
@@ -88,6 +90,7 @@ Gem::Specification.new do |s|
88
90
  "lib/bio/PolyploidTools/ChromosomeArm.rb",
89
91
  "lib/bio/PolyploidTools/ExonContainer.rb",
90
92
  "lib/bio/PolyploidTools/Marker.rb",
93
+ "lib/bio/PolyploidTools/Mask.rb",
91
94
  "lib/bio/PolyploidTools/NoSNPSequence.rb",
92
95
  "lib/bio/PolyploidTools/PrimerRegion.rb",
93
96
  "lib/bio/PolyploidTools/SNP.rb",
@@ -172,7 +175,7 @@ Gem::Specification.new do |s|
172
175
  ]
173
176
  s.homepage = "http://github.com/tgac/bioruby-polyploid-tools".freeze
174
177
  s.licenses = ["MIT".freeze]
175
- s.rubygems_version = "2.6.14".freeze
178
+ s.rubygems_version = "2.7.4".freeze
176
179
  s.summary = "Tool to work with polyploids, NGS and molecular biology".freeze
177
180
 
178
181
  if s.respond_to? :specification_version then
@@ -181,6 +184,7 @@ Gem::Specification.new do |s|
181
184
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
182
185
  s.add_runtime_dependency(%q<bio>.freeze, [">= 1.5.1"])
183
186
  s.add_runtime_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
187
+ s.add_runtime_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
184
188
  s.add_runtime_dependency(%q<systemu>.freeze, [">= 2.5.2"])
185
189
  s.add_development_dependency(%q<shoulda>.freeze, [">= 2.10"])
186
190
  s.add_development_dependency(%q<test-unit>.freeze, [">= 0"])
@@ -188,6 +192,7 @@ Gem::Specification.new do |s|
188
192
  else
189
193
  s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
190
194
  s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
195
+ s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
191
196
  s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
192
197
  s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
193
198
  s.add_dependency(%q<test-unit>.freeze, [">= 0"])
@@ -196,6 +201,7 @@ Gem::Specification.new do |s|
196
201
  else
197
202
  s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
198
203
  s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
204
+ s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
199
205
  s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
200
206
  s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
201
207
  s.add_dependency(%q<test-unit>.freeze, [">= 0"])
@@ -0,0 +1,114 @@
1
+ class Array
2
+ def sum
3
+ inject(0.0) { |result, el| result + el }
4
+ end
5
+
6
+ def mean
7
+ sum / size
8
+ end
9
+ end
10
+
11
+ module Bio::PolyploidTools::Mask
12
+
13
+ def self.find_end(seqs)
14
+ size = seqs.values[0].size
15
+ names = seqs.keys
16
+ i = size - 1
17
+ gap_count = 3
18
+ while i > 0 and gap_count > 0
19
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
20
+ i -= 1
21
+ end
22
+ i + 1
23
+ end
24
+
25
+ def self.find_start(seqs)
26
+ size = seqs.values[0].size
27
+ names = seqs.keys
28
+ i = 0
29
+ gap_count = 3
30
+ while i < size and gap_count > 0
31
+ gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
32
+
33
+ i += 1
34
+ end
35
+ i - 1
36
+ end
37
+
38
+ def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
39
+ names = seqs.keys
40
+ target = names[0] if target.nil?
41
+ masked_snps = seqs[target].downcase
42
+ i = 0
43
+ while i < masked_snps.size
44
+ different = 0
45
+ cov = 0
46
+ gap = false
47
+ names.each do | chr |
48
+ if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
49
+ cov += 1
50
+ end
51
+ if chr != target
52
+ different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
53
+ end
54
+ if seqs[chr][i] == "-" and chr == target
55
+ gap = true
56
+ end
57
+ end
58
+ masked_snps[i] = "." if different == 0
59
+ masked_snps[i] = "." if cov == 1
60
+ masked_snps[i] = "*" if cov == 0
61
+ expected_snps = names.size - 1
62
+ masked_snps[i] = masked_snps[i].upcase if different == expected_snps
63
+ if gap
64
+ masked_snps[i] = different == expected_snps ? "-" : "_"
65
+ end
66
+ masked_snps[i] = "|" if i < seq_start or i > seq_end
67
+ i += 1
68
+ end
69
+ masked_snps
70
+ end
71
+
72
+ def self.stats(mask, triad, gene, genome, reference)
73
+ specific = []
74
+ semispecific = []
75
+ sp_i = 0
76
+ semi = 0
77
+ i = 0
78
+ mask.to_s.each_char do |e|
79
+ case e
80
+ when "n","N"
81
+ i += 1
82
+ when /[[:lower:]]/ then
83
+ semispecific << semi
84
+ semi = 0
85
+ i += 1
86
+ when /[[:upper:]]/ then
87
+ specific << sp_i
88
+ semispecific << semi
89
+ sp_i = 0
90
+ semi = 0
91
+ i += 1
92
+ when "." then
93
+ semi += 1
94
+ sp_i += 1
95
+ i += 1
96
+ end
97
+ end
98
+ {
99
+ reference: reference,
100
+ triad: triad,
101
+ genome: genome,
102
+ gene: gene,
103
+ semispecific_mean: semispecific.mean,
104
+ semispecific_bases: semispecific.size,
105
+ semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
106
+ specific_mean: specific.mean,
107
+ specific_bases: specific.size,
108
+ specific_identity: (1 - (specific.size.to_f / i )) * 100,
109
+ aligned_length: i,
110
+ specific: specific,
111
+ semispecific: semispecific
112
+ }
113
+ end
114
+ end
@@ -33,10 +33,6 @@ module Bio::PolyploidTools
33
33
  snp
34
34
  end
35
35
 
36
- def parse_snp
37
-
38
- end
39
-
40
36
  def parse_sequence_snp
41
37
  pos = 0
42
38
  match_data = /(?<pre>\w*)\[(?<org>[ACGT])\/(?<snp>[ACGT])\](?<pos>\w*)/.match(sequence_original.strip)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-polyploid-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ricardo H. Ramirez-Gonzalez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-23 00:00:00.000000000 Z
11
+ date: 2018-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: 2.6.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: descriptive_statistics
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: systemu
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -114,10 +128,12 @@ executables:
114
128
  - mafft_triads_promoters.rb
115
129
  - map_markers_to_contigs.rb
116
130
  - markers_in_region.rb
131
+ - mask_triads.rb
117
132
  - polymarker.rb
118
133
  - polymarker_capillary.rb
119
134
  - snp_position_to_polymarker.rb
120
135
  - snps_between_bams.rb
136
+ - tag_stats.rb
121
137
  - vcfLineToTable.rb
122
138
  extensions: []
123
139
  extra_rdoc_files:
@@ -146,10 +162,12 @@ files:
146
162
  - bin/mafft_triads_promoters.rb
147
163
  - bin/map_markers_to_contigs.rb
148
164
  - bin/markers_in_region.rb
165
+ - bin/mask_triads.rb
149
166
  - bin/polymarker.rb
150
167
  - bin/polymarker_capillary.rb
151
168
  - bin/snp_position_to_polymarker.rb
152
169
  - bin/snps_between_bams.rb
170
+ - bin/tag_stats.rb
153
171
  - bin/vcfLineToTable.rb
154
172
  - bio-polyploid-tools.gemspec
155
173
  - conf/defaults.rb
@@ -192,6 +210,7 @@ files:
192
210
  - lib/bio/PolyploidTools/ChromosomeArm.rb
193
211
  - lib/bio/PolyploidTools/ExonContainer.rb
194
212
  - lib/bio/PolyploidTools/Marker.rb
213
+ - lib/bio/PolyploidTools/Mask.rb
195
214
  - lib/bio/PolyploidTools/NoSNPSequence.rb
196
215
  - lib/bio/PolyploidTools/PrimerRegion.rb
197
216
  - lib/bio/PolyploidTools/SNP.rb
@@ -293,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
293
312
  version: '0'
294
313
  requirements: []
295
314
  rubyforge_project:
296
- rubygems_version: 2.6.14
315
+ rubygems_version: 2.7.4
297
316
  signing_key:
298
317
  specification_version: 4
299
318
  summary: Tool to work with polyploids, NGS and molecular biology