bio-polyploid-tools 0.8.3 → 0.8.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +2 -2
- data/Gemfile +1 -0
- data/README.md +8 -2
- data/VERSION +1 -1
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +2 -1
- data/bin/tag_stats.rb +75 -0
- data/bio-polyploid-tools.gemspec +11 -5
- data/lib/bio/PolyploidTools/Mask.rb +114 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +0 -4
- metadata +22 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 728d9fb436e9e7d26698d011179da63ba79fc45c40b0a771cafc5f4dc6d84bc3
|
4
|
+
data.tar.gz: 3c62bd8bcfcb5d3f460729f19f8382bd46c7821fcacb83d01b0ff3f336b38f1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99784b38c37f00e71c3c1fa07899ccca8e32b6ff27fc363bcece989e788c0db745a7db4ae566efb42b55742fd01e5a99adeb211a7d46029f6c148dba8e91e92a
|
7
|
+
data.tar.gz: 40f0990a5652374ea3bef3b0e8777882720626e33fdc448ff48c5f71141b87ce153680a0215ad95e13595ddead39db822d276911baf0647723cc7e8bf3195bdb
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -128,6 +128,14 @@ To use blast instead of exonerate, use the following command:
|
|
128
128
|
|
129
129
|
## Release Notes
|
130
130
|
|
131
|
+
### 0.8.4
|
132
|
+
|
133
|
+
* Added script ```tag_stats.rb`` That gets the descriptive statistics for a tag in a bam file for each reference.
|
134
|
+
|
135
|
+
```bash
|
136
|
+
ruby tag_stats.rb -b HI.3206.006.Index_2.CS_125RNA_14d_Leaf8.sorted.bam -r /Users/ramirezr/Dropbox/JIC/expVIPMetadatas/RefSeq1.0/Genes/annotation/IWGSCv1.0_UTR_ALL.cdnas.fasta --tag 'NH'
|
137
|
+
```
|
138
|
+
|
131
139
|
### 0.8.3
|
132
140
|
|
133
141
|
* BUGFIX: ```ChromosomeArm.rb``` was fixed to conform the module assumptions for the package.
|
@@ -171,8 +179,6 @@ To use blast instead of exonerate, use the following command:
|
|
171
179
|
|
172
180
|
# Notes
|
173
181
|
|
174
|
-
|
175
|
-
* BUG: If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
|
176
182
|
* BUG: Blocks with NNNs are picked and treated as semi-specific.
|
177
183
|
* BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
|
178
184
|
* TODO: Add a parameter file to configure the alignments.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.4
|
data/bin/mask_triads.rb
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
require 'csv'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'tmpdir'
|
7
|
+
require 'bio-samtools'
|
8
|
+
require 'bio'
|
9
|
+
|
10
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
11
|
+
$: << File.expand_path('.')
|
12
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
13
|
+
require path
|
14
|
+
opts = {}
|
15
|
+
opts[:identity] = 50
|
16
|
+
opts[:min_bases] = 200
|
17
|
+
opts[:split_token] = "."
|
18
|
+
opts[:tmp_folder] = Dir.mktmpdir
|
19
|
+
opts[:random_sample] = 0
|
20
|
+
opts[:output_folder] = "."
|
21
|
+
|
22
|
+
OptionParser.new do |o|
|
23
|
+
|
24
|
+
o.banner = "Usage: mask_triads.rb [options]"
|
25
|
+
|
26
|
+
o.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
|
27
|
+
opts[:triads] = o
|
28
|
+
end
|
29
|
+
|
30
|
+
o.on("-f", "--fasta FILE" , "FASTA file containing all the possible peptide sequences. ") do |o|
|
31
|
+
opts[:fasta] = o
|
32
|
+
end
|
33
|
+
|
34
|
+
o.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
|
35
|
+
opts[:split_token] = o
|
36
|
+
end
|
37
|
+
|
38
|
+
o.on("-o", "--output_folder DIR", "Location to save the alignment masks. If the alignment exists, it is recycled to avoid calling MAFFT again") do |o|
|
39
|
+
opts[:output_folder] = o
|
40
|
+
end
|
41
|
+
end.parse!
|
42
|
+
|
43
|
+
|
44
|
+
split_token = opts[:split_token]
|
45
|
+
reference_name = File.basename opts[:fasta]
|
46
|
+
output_folder = opts[:output_folder]
|
47
|
+
@fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: opts[:fasta])
|
48
|
+
@fasta_reference_db.load_fai_entries
|
49
|
+
#puts @fasta_reference_db.index.entries
|
50
|
+
@cannonical = Hash.new
|
51
|
+
@fasta_reference_db.index.entries.each do |e|
|
52
|
+
gene = e.id.split(split_token)[0]
|
53
|
+
@cannonical[gene] = e unless @cannonical[gene]
|
54
|
+
@cannonical[gene] = e if e.length > @cannonical[gene].length
|
55
|
+
end
|
56
|
+
|
57
|
+
$stderr.puts "#Loaded #{@cannonical.length} canonical sequences from #{@fasta_reference_db.index.size} in reference"
|
58
|
+
|
59
|
+
$stderr.puts "TMP dir: #{opts[:tmp_folder]}"
|
60
|
+
|
61
|
+
def write_fasta_from_hash(sequences, filename)
|
62
|
+
out = File.new(filename, "w")
|
63
|
+
sequences.each_pair do | chromosome, exon_seq |
|
64
|
+
out.puts ">#{chromosome}\n#{exon_seq}\n"
|
65
|
+
end
|
66
|
+
out.close
|
67
|
+
end
|
68
|
+
|
69
|
+
def mafft_align(a, b, d)
|
70
|
+
to_align = Bio::Alignment::SequenceHash.new
|
71
|
+
seq_a = @fasta_reference_db.fetch_sequence(@cannonical[a].get_full_region)
|
72
|
+
seq_b = @fasta_reference_db.fetch_sequence(@cannonical[b].get_full_region)
|
73
|
+
seq_d = @fasta_reference_db.fetch_sequence(@cannonical[d].get_full_region)
|
74
|
+
to_align[a] = seq_a
|
75
|
+
to_align[b] = seq_b
|
76
|
+
to_align[d] = seq_d
|
77
|
+
report = mafft.query_alignment(to_align)
|
78
|
+
aln = report.alignment
|
79
|
+
aln
|
80
|
+
end
|
81
|
+
|
82
|
+
def read_alignment(path)
|
83
|
+
aln = Bio::Alignment::SequenceHash.new
|
84
|
+
i = 0
|
85
|
+
Bio::FlatFile.open(Bio::FastaFormat, path) do |fasta_file|
|
86
|
+
fasta_file.each do |entry|
|
87
|
+
aln[entry.entry_id] = entry.seq if i < 3
|
88
|
+
i += 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
aln
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
mafft_opts = ['--maxiterate', '1000', '--localpair', '--quiet']
|
96
|
+
mafft = Bio::MAFFT.new( "mafft" , mafft_opts)
|
97
|
+
header_printed = false
|
98
|
+
stats = File.open("#{output_folder}/#{reference_name}.identity_stats.csv", "w")
|
99
|
+
distances = File.open("#{output_folder}/#{reference_name}.distance_between_snps.csv.gz", "w")
|
100
|
+
gz = Zlib::GzipWriter.new(distances)
|
101
|
+
gz.write "triad,gene,genome,reference,type,distance\n"
|
102
|
+
#gz.close
|
103
|
+
|
104
|
+
def write_distances(distances, triad, gene, genome, reference, type, out)
|
105
|
+
distances.each { |e| out.write "#{triad},#{gene},#{genome},#{reference},#{type},#{e}\n" }
|
106
|
+
end
|
107
|
+
|
108
|
+
i = 0
|
109
|
+
CSV.foreach(opts[:triads], headers:true ) do |row|
|
110
|
+
next unless row["cardinality_abs"] == "1:1:1" and row["HC.LC"] == "HC-only"
|
111
|
+
a = row['A']
|
112
|
+
b = row['B']
|
113
|
+
d = row['D']
|
114
|
+
triad = row['group_id']
|
115
|
+
cent_triad = triad.to_i / 100
|
116
|
+
folder = "#{output_folder}/alignments/#{reference_name}/#{cent_triad}/"
|
117
|
+
save_cds = "#{folder}/#{triad}.fa"
|
118
|
+
aligned = File.file?(save_cds)
|
119
|
+
aln = aligned ? read_alignment(save_cds) : mafft_align(a,b,d)
|
120
|
+
folder = "#{output_folder}/alignments_new/#{reference_name}/#{cent_triad}/" if aligned
|
121
|
+
FileUtils.mkdir_p folder
|
122
|
+
save_cds = "#{folder}/#{triad}.fa"
|
123
|
+
|
124
|
+
aln2 = Bio::Alignment.new aln
|
125
|
+
seq_start = Bio::PolyploidTools::Mask.find_start(aln)
|
126
|
+
seq_end = Bio::PolyploidTools::Mask.find_end(aln)
|
127
|
+
#puts "#{triad}: #{seq_start}-#{seq_end}"
|
128
|
+
|
129
|
+
|
130
|
+
aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: a), "A")
|
131
|
+
aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: b), "B")
|
132
|
+
aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: d), "D")
|
133
|
+
|
134
|
+
a_stats = Bio::PolyploidTools::Mask.stats(aln2["A"], triad, a, "A", reference_name)
|
135
|
+
b_stats = Bio::PolyploidTools::Mask.stats(aln2["B"], triad, b, "B", reference_name)
|
136
|
+
d_stats = Bio::PolyploidTools::Mask.stats(aln2["D"], triad, d, "D", reference_name)
|
137
|
+
|
138
|
+
write_distances(a_stats[:specific], triad, a, "A", reference_name, "specific", gz)
|
139
|
+
write_distances(b_stats[:specific], triad, b, "B", reference_name, "specific", gz)
|
140
|
+
write_distances(d_stats[:specific], triad, d, "D", reference_name, "specific", gz)
|
141
|
+
|
142
|
+
write_distances(a_stats[:semispecific], triad, a, "A", reference_name, "semispecific", gz)
|
143
|
+
write_distances(b_stats[:semispecific], triad, b, "B", reference_name, "semispecific", gz)
|
144
|
+
write_distances(d_stats[:semispecific], triad, d, "D", reference_name, "semispecific", gz)
|
145
|
+
|
146
|
+
a_stats.delete(:semispecific)
|
147
|
+
b_stats.delete(:semispecific)
|
148
|
+
d_stats.delete(:semispecific)
|
149
|
+
|
150
|
+
a_stats.delete(:specific)
|
151
|
+
b_stats.delete(:specific)
|
152
|
+
d_stats.delete(:specific)
|
153
|
+
|
154
|
+
a_stats[:length] = @cannonical[a].length
|
155
|
+
b_stats[:length] = @cannonical[b].length
|
156
|
+
d_stats[:length] = @cannonical[d].length
|
157
|
+
|
158
|
+
stats.puts a_stats.keys.join(",") unless header_printed
|
159
|
+
stats.puts a_stats.values.join(",")
|
160
|
+
stats.puts b_stats.values.join(",")
|
161
|
+
stats.puts d_stats.values.join(",")
|
162
|
+
header_printed = true
|
163
|
+
|
164
|
+
write_fasta_from_hash(aln2, save_cds)
|
165
|
+
i += 1
|
166
|
+
end
|
167
|
+
gz.close
|
168
|
+
distances.close
|
169
|
+
stats.close
|
data/bin/polymarker.rb
CHANGED
@@ -350,10 +350,11 @@ container.add_alignments({
|
|
350
350
|
|
351
351
|
|
352
352
|
#4.1 generating primer3 file
|
353
|
-
write_status "
|
353
|
+
write_status "Finding genome-specific positions"
|
354
354
|
file = File.open(exons_filename, "w")
|
355
355
|
container.print_fasta_snp_exones(file)
|
356
356
|
file.close
|
357
|
+
write_status "Running primer3"
|
357
358
|
|
358
359
|
file = File.open(primer_3_input, "w")
|
359
360
|
|
data/bin/tag_stats.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
require 'csv'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'tmpdir'
|
7
|
+
require 'bio-samtools'
|
8
|
+
require 'bio'
|
9
|
+
require 'descriptive_statistics'
|
10
|
+
|
11
|
+
class Bio::DB::Tag
|
12
|
+
def set(str)
|
13
|
+
@tag = str[0..1]
|
14
|
+
@type = str[3]
|
15
|
+
@value = str[5..-1]
|
16
|
+
@value = @value.to_i if @type == "i"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
21
|
+
$: << File.expand_path('.')
|
22
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
23
|
+
require path
|
24
|
+
opts = {}
|
25
|
+
opts[:tag] = "NH"
|
26
|
+
opts[:bam] = nil
|
27
|
+
opts[:out] = nil
|
28
|
+
opts[:ref] = nil
|
29
|
+
|
30
|
+
out = $stdout
|
31
|
+
|
32
|
+
OptionParser.new do |o|
|
33
|
+
o.banner = "Usage: tag_stats.rb [options]"
|
34
|
+
|
35
|
+
o.on("-t", "--tag str", "The tag to extract (default NH)") do |o|
|
36
|
+
opts[:tag] = o
|
37
|
+
end
|
38
|
+
|
39
|
+
o.on("-b", "--bam FILE" , "BAM file with the alignments ") do |o|
|
40
|
+
opts[:bam] = o
|
41
|
+
end
|
42
|
+
|
43
|
+
o.on("-o", "--out_file CHAR", "File to save the stats") do |o|
|
44
|
+
opts[:out] = o
|
45
|
+
end
|
46
|
+
|
47
|
+
o.on("-r", "--reference FILE", "Fasta file with the reference") do |o|
|
48
|
+
opts[:ref] = o
|
49
|
+
end
|
50
|
+
end.parse!
|
51
|
+
|
52
|
+
bam = Bio::DB::Sam.new(fasta: opts[:ref], bam: opts[:bam])
|
53
|
+
tag = opts[:tag]
|
54
|
+
|
55
|
+
sample = File.basename(opts[:bam], '.sorted.bam')
|
56
|
+
last_ref = ""
|
57
|
+
values = []
|
58
|
+
to_print = [:sum, :min, :max, :mean, :mode, :median, :q1, :q2, :q3]
|
59
|
+
percentiles = [90, 95, 97.5, 99]
|
60
|
+
#Add the 90, 95, 97.5 and 99 percentiles.
|
61
|
+
out = File.open(opts[:out], "w") if opts[:out]
|
62
|
+
bam.view do |aln |
|
63
|
+
if(last_ref != aln.rname)
|
64
|
+
|
65
|
+
desc_stats = values.descriptive_statistics
|
66
|
+
to_print.each { |e| out.puts [sample, last_ref, e , desc_stats[e] ].join("\t") } if(last_ref != "")
|
67
|
+
percentiles.each { |e| out.puts [sample, last_ref, "P#{e}", values.percentile(e)].join("\t") } if(last_ref != "")
|
68
|
+
out.puts [sample, last_ref, "N", values.length].join("\t") if(last_ref != "")
|
69
|
+
values.clear
|
70
|
+
last_ref = aln.rname
|
71
|
+
end
|
72
|
+
values << aln.tags[tag].value
|
73
|
+
end
|
74
|
+
|
75
|
+
out.close if opts[:out]
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,19 +2,19 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.8.
|
5
|
+
# stub: bio-polyploid-tools 0.8.4 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools".freeze
|
9
|
-
s.version = "0.8.
|
9
|
+
s.version = "0.8.4"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
|
14
|
-
s.date = "2018-
|
14
|
+
s.date = "2018-02-27"
|
15
15
|
s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
|
17
|
-
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "vcfLineToTable.rb".freeze]
|
17
|
+
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze]
|
18
18
|
s.extra_rdoc_files = [
|
19
19
|
"README",
|
20
20
|
"README.md"
|
@@ -42,10 +42,12 @@ Gem::Specification.new do |s|
|
|
42
42
|
"bin/mafft_triads_promoters.rb",
|
43
43
|
"bin/map_markers_to_contigs.rb",
|
44
44
|
"bin/markers_in_region.rb",
|
45
|
+
"bin/mask_triads.rb",
|
45
46
|
"bin/polymarker.rb",
|
46
47
|
"bin/polymarker_capillary.rb",
|
47
48
|
"bin/snp_position_to_polymarker.rb",
|
48
49
|
"bin/snps_between_bams.rb",
|
50
|
+
"bin/tag_stats.rb",
|
49
51
|
"bin/vcfLineToTable.rb",
|
50
52
|
"bio-polyploid-tools.gemspec",
|
51
53
|
"conf/defaults.rb",
|
@@ -88,6 +90,7 @@ Gem::Specification.new do |s|
|
|
88
90
|
"lib/bio/PolyploidTools/ChromosomeArm.rb",
|
89
91
|
"lib/bio/PolyploidTools/ExonContainer.rb",
|
90
92
|
"lib/bio/PolyploidTools/Marker.rb",
|
93
|
+
"lib/bio/PolyploidTools/Mask.rb",
|
91
94
|
"lib/bio/PolyploidTools/NoSNPSequence.rb",
|
92
95
|
"lib/bio/PolyploidTools/PrimerRegion.rb",
|
93
96
|
"lib/bio/PolyploidTools/SNP.rb",
|
@@ -172,7 +175,7 @@ Gem::Specification.new do |s|
|
|
172
175
|
]
|
173
176
|
s.homepage = "http://github.com/tgac/bioruby-polyploid-tools".freeze
|
174
177
|
s.licenses = ["MIT".freeze]
|
175
|
-
s.rubygems_version = "2.
|
178
|
+
s.rubygems_version = "2.7.4".freeze
|
176
179
|
s.summary = "Tool to work with polyploids, NGS and molecular biology".freeze
|
177
180
|
|
178
181
|
if s.respond_to? :specification_version then
|
@@ -181,6 +184,7 @@ Gem::Specification.new do |s|
|
|
181
184
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
182
185
|
s.add_runtime_dependency(%q<bio>.freeze, [">= 1.5.1"])
|
183
186
|
s.add_runtime_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
|
187
|
+
s.add_runtime_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
|
184
188
|
s.add_runtime_dependency(%q<systemu>.freeze, [">= 2.5.2"])
|
185
189
|
s.add_development_dependency(%q<shoulda>.freeze, [">= 2.10"])
|
186
190
|
s.add_development_dependency(%q<test-unit>.freeze, [">= 0"])
|
@@ -188,6 +192,7 @@ Gem::Specification.new do |s|
|
|
188
192
|
else
|
189
193
|
s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
|
190
194
|
s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
|
195
|
+
s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
|
191
196
|
s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
|
192
197
|
s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
|
193
198
|
s.add_dependency(%q<test-unit>.freeze, [">= 0"])
|
@@ -196,6 +201,7 @@ Gem::Specification.new do |s|
|
|
196
201
|
else
|
197
202
|
s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
|
198
203
|
s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
|
204
|
+
s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
|
199
205
|
s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
|
200
206
|
s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
|
201
207
|
s.add_dependency(%q<test-unit>.freeze, [">= 0"])
|
@@ -0,0 +1,114 @@
|
|
1
|
+
class Array
|
2
|
+
def sum
|
3
|
+
inject(0.0) { |result, el| result + el }
|
4
|
+
end
|
5
|
+
|
6
|
+
def mean
|
7
|
+
sum / size
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Bio::PolyploidTools::Mask
|
12
|
+
|
13
|
+
def self.find_end(seqs)
|
14
|
+
size = seqs.values[0].size
|
15
|
+
names = seqs.keys
|
16
|
+
i = size - 1
|
17
|
+
gap_count = 3
|
18
|
+
while i > 0 and gap_count > 0
|
19
|
+
gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
|
20
|
+
i -= 1
|
21
|
+
end
|
22
|
+
i + 1
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.find_start(seqs)
|
26
|
+
size = seqs.values[0].size
|
27
|
+
names = seqs.keys
|
28
|
+
i = 0
|
29
|
+
gap_count = 3
|
30
|
+
while i < size and gap_count > 0
|
31
|
+
gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
|
32
|
+
|
33
|
+
i += 1
|
34
|
+
end
|
35
|
+
i - 1
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
|
39
|
+
names = seqs.keys
|
40
|
+
target = names[0] if target.nil?
|
41
|
+
masked_snps = seqs[target].downcase
|
42
|
+
i = 0
|
43
|
+
while i < masked_snps.size
|
44
|
+
different = 0
|
45
|
+
cov = 0
|
46
|
+
gap = false
|
47
|
+
names.each do | chr |
|
48
|
+
if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
|
49
|
+
cov += 1
|
50
|
+
end
|
51
|
+
if chr != target
|
52
|
+
different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
|
53
|
+
end
|
54
|
+
if seqs[chr][i] == "-" and chr == target
|
55
|
+
gap = true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
masked_snps[i] = "." if different == 0
|
59
|
+
masked_snps[i] = "." if cov == 1
|
60
|
+
masked_snps[i] = "*" if cov == 0
|
61
|
+
expected_snps = names.size - 1
|
62
|
+
masked_snps[i] = masked_snps[i].upcase if different == expected_snps
|
63
|
+
if gap
|
64
|
+
masked_snps[i] = different == expected_snps ? "-" : "_"
|
65
|
+
end
|
66
|
+
masked_snps[i] = "|" if i < seq_start or i > seq_end
|
67
|
+
i += 1
|
68
|
+
end
|
69
|
+
masked_snps
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.stats(mask, triad, gene, genome, reference)
|
73
|
+
specific = []
|
74
|
+
semispecific = []
|
75
|
+
sp_i = 0
|
76
|
+
semi = 0
|
77
|
+
i = 0
|
78
|
+
mask.to_s.each_char do |e|
|
79
|
+
case e
|
80
|
+
when "n","N"
|
81
|
+
i += 1
|
82
|
+
when /[[:lower:]]/ then
|
83
|
+
semispecific << semi
|
84
|
+
semi = 0
|
85
|
+
i += 1
|
86
|
+
when /[[:upper:]]/ then
|
87
|
+
specific << sp_i
|
88
|
+
semispecific << semi
|
89
|
+
sp_i = 0
|
90
|
+
semi = 0
|
91
|
+
i += 1
|
92
|
+
when "." then
|
93
|
+
semi += 1
|
94
|
+
sp_i += 1
|
95
|
+
i += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
{
|
99
|
+
reference: reference,
|
100
|
+
triad: triad,
|
101
|
+
genome: genome,
|
102
|
+
gene: gene,
|
103
|
+
semispecific_mean: semispecific.mean,
|
104
|
+
semispecific_bases: semispecific.size,
|
105
|
+
semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
|
106
|
+
specific_mean: specific.mean,
|
107
|
+
specific_bases: specific.size,
|
108
|
+
specific_identity: (1 - (specific.size.to_f / i )) * 100,
|
109
|
+
aligned_length: i,
|
110
|
+
specific: specific,
|
111
|
+
semispecific: semispecific
|
112
|
+
}
|
113
|
+
end
|
114
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-polyploid-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricardo H. Ramirez-Gonzalez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 2.6.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: descriptive_statistics
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: systemu
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,10 +128,12 @@ executables:
|
|
114
128
|
- mafft_triads_promoters.rb
|
115
129
|
- map_markers_to_contigs.rb
|
116
130
|
- markers_in_region.rb
|
131
|
+
- mask_triads.rb
|
117
132
|
- polymarker.rb
|
118
133
|
- polymarker_capillary.rb
|
119
134
|
- snp_position_to_polymarker.rb
|
120
135
|
- snps_between_bams.rb
|
136
|
+
- tag_stats.rb
|
121
137
|
- vcfLineToTable.rb
|
122
138
|
extensions: []
|
123
139
|
extra_rdoc_files:
|
@@ -146,10 +162,12 @@ files:
|
|
146
162
|
- bin/mafft_triads_promoters.rb
|
147
163
|
- bin/map_markers_to_contigs.rb
|
148
164
|
- bin/markers_in_region.rb
|
165
|
+
- bin/mask_triads.rb
|
149
166
|
- bin/polymarker.rb
|
150
167
|
- bin/polymarker_capillary.rb
|
151
168
|
- bin/snp_position_to_polymarker.rb
|
152
169
|
- bin/snps_between_bams.rb
|
170
|
+
- bin/tag_stats.rb
|
153
171
|
- bin/vcfLineToTable.rb
|
154
172
|
- bio-polyploid-tools.gemspec
|
155
173
|
- conf/defaults.rb
|
@@ -192,6 +210,7 @@ files:
|
|
192
210
|
- lib/bio/PolyploidTools/ChromosomeArm.rb
|
193
211
|
- lib/bio/PolyploidTools/ExonContainer.rb
|
194
212
|
- lib/bio/PolyploidTools/Marker.rb
|
213
|
+
- lib/bio/PolyploidTools/Mask.rb
|
195
214
|
- lib/bio/PolyploidTools/NoSNPSequence.rb
|
196
215
|
- lib/bio/PolyploidTools/PrimerRegion.rb
|
197
216
|
- lib/bio/PolyploidTools/SNP.rb
|
@@ -293,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
293
312
|
version: '0'
|
294
313
|
requirements: []
|
295
314
|
rubyforge_project:
|
296
|
-
rubygems_version: 2.
|
315
|
+
rubygems_version: 2.7.4
|
297
316
|
signing_key:
|
298
317
|
specification_version: 4
|
299
318
|
summary: Tool to work with polyploids, NGS and molecular biology
|