bio-polyploid-tools 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +2 -2
- data/Gemfile +1 -0
- data/README.md +8 -2
- data/VERSION +1 -1
- data/bin/mask_triads.rb +169 -0
- data/bin/polymarker.rb +2 -1
- data/bin/tag_stats.rb +75 -0
- data/bio-polyploid-tools.gemspec +11 -5
- data/lib/bio/PolyploidTools/Mask.rb +114 -0
- data/lib/bio/PolyploidTools/SNPSequence.rb +0 -4
- metadata +22 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 728d9fb436e9e7d26698d011179da63ba79fc45c40b0a771cafc5f4dc6d84bc3
|
4
|
+
data.tar.gz: 3c62bd8bcfcb5d3f460729f19f8382bd46c7821fcacb83d01b0ff3f336b38f1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 99784b38c37f00e71c3c1fa07899ccca8e32b6ff27fc363bcece989e788c0db745a7db4ae566efb42b55742fd01e5a99adeb211a7d46029f6c148dba8e91e92a
|
7
|
+
data.tar.gz: 40f0990a5652374ea3bef3b0e8777882720626e33fdc448ff48c5f71141b87ce153680a0215ad95e13595ddead39db822d276911baf0647723cc7e8bf3195bdb
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -128,6 +128,14 @@ To use blast instead of exonerate, use the following command:
|
|
128
128
|
|
129
129
|
## Release Notes
|
130
130
|
|
131
|
+
### 0.8.4
|
132
|
+
|
133
|
+
* Added script ```tag_stats.rb`` That gets the descriptive statistics for a tag in a bam file for each reference.
|
134
|
+
|
135
|
+
```bash
|
136
|
+
ruby tag_stats.rb -b HI.3206.006.Index_2.CS_125RNA_14d_Leaf8.sorted.bam -r /Users/ramirezr/Dropbox/JIC/expVIPMetadatas/RefSeq1.0/Genes/annotation/IWGSCv1.0_UTR_ALL.cdnas.fasta --tag 'NH'
|
137
|
+
```
|
138
|
+
|
131
139
|
### 0.8.3
|
132
140
|
|
133
141
|
* BUGFIX: ```ChromosomeArm.rb``` was fixed to conform the module assumptions for the package.
|
@@ -171,8 +179,6 @@ To use blast instead of exonerate, use the following command:
|
|
171
179
|
|
172
180
|
# Notes
|
173
181
|
|
174
|
-
|
175
|
-
* BUG: If the SNP is in a gap in the alignment to the chromosomes, it is ignored.
|
176
182
|
* BUG: Blocks with NNNs are picked and treated as semi-specific.
|
177
183
|
* BUG: If the name of the reference have space, the ID is not chopped. ">gene_1 (G12A)" shouls be treated as ">gene_1".
|
178
184
|
* TODO: Add a parameter file to configure the alignments.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.4
|
data/bin/mask_triads.rb
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
require 'csv'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'tmpdir'
|
7
|
+
require 'bio-samtools'
|
8
|
+
require 'bio'
|
9
|
+
|
10
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
11
|
+
$: << File.expand_path('.')
|
12
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
13
|
+
require path
|
14
|
+
opts = {}
|
15
|
+
opts[:identity] = 50
|
16
|
+
opts[:min_bases] = 200
|
17
|
+
opts[:split_token] = "."
|
18
|
+
opts[:tmp_folder] = Dir.mktmpdir
|
19
|
+
opts[:random_sample] = 0
|
20
|
+
opts[:output_folder] = "."
|
21
|
+
|
22
|
+
OptionParser.new do |o|
|
23
|
+
|
24
|
+
o.banner = "Usage: mask_triads.rb [options]"
|
25
|
+
|
26
|
+
o.on("-t", "--triads FILE", "CSV file with the gene triad names in the named columns 'A','B' and 'D' ") do |o|
|
27
|
+
opts[:triads] = o
|
28
|
+
end
|
29
|
+
|
30
|
+
o.on("-f", "--fasta FILE" , "FASTA file containing all the possible peptide sequences. ") do |o|
|
31
|
+
opts[:fasta] = o
|
32
|
+
end
|
33
|
+
|
34
|
+
o.on("-s", "--split_token CHAR", "Character used to split the sequence name. The name will be evarything before this token on the name of the sequences") do |o|
|
35
|
+
opts[:split_token] = o
|
36
|
+
end
|
37
|
+
|
38
|
+
o.on("-o", "--output_folder DIR", "Location to save the alignment masks. If the alignment exists, it is recycled to avoid calling MAFFT again") do |o|
|
39
|
+
opts[:output_folder] = o
|
40
|
+
end
|
41
|
+
end.parse!
|
42
|
+
|
43
|
+
|
44
|
+
split_token = opts[:split_token]
|
45
|
+
reference_name = File.basename opts[:fasta]
|
46
|
+
output_folder = opts[:output_folder]
|
47
|
+
@fasta_reference_db = Bio::DB::Fasta::FastaFile.new(fasta: opts[:fasta])
|
48
|
+
@fasta_reference_db.load_fai_entries
|
49
|
+
#puts @fasta_reference_db.index.entries
|
50
|
+
@cannonical = Hash.new
|
51
|
+
@fasta_reference_db.index.entries.each do |e|
|
52
|
+
gene = e.id.split(split_token)[0]
|
53
|
+
@cannonical[gene] = e unless @cannonical[gene]
|
54
|
+
@cannonical[gene] = e if e.length > @cannonical[gene].length
|
55
|
+
end
|
56
|
+
|
57
|
+
$stderr.puts "#Loaded #{@cannonical.length} canonical sequences from #{@fasta_reference_db.index.size} in reference"
|
58
|
+
|
59
|
+
$stderr.puts "TMP dir: #{opts[:tmp_folder]}"
|
60
|
+
|
61
|
+
def write_fasta_from_hash(sequences, filename)
|
62
|
+
out = File.new(filename, "w")
|
63
|
+
sequences.each_pair do | chromosome, exon_seq |
|
64
|
+
out.puts ">#{chromosome}\n#{exon_seq}\n"
|
65
|
+
end
|
66
|
+
out.close
|
67
|
+
end
|
68
|
+
|
69
|
+
def mafft_align(a, b, d)
|
70
|
+
to_align = Bio::Alignment::SequenceHash.new
|
71
|
+
seq_a = @fasta_reference_db.fetch_sequence(@cannonical[a].get_full_region)
|
72
|
+
seq_b = @fasta_reference_db.fetch_sequence(@cannonical[b].get_full_region)
|
73
|
+
seq_d = @fasta_reference_db.fetch_sequence(@cannonical[d].get_full_region)
|
74
|
+
to_align[a] = seq_a
|
75
|
+
to_align[b] = seq_b
|
76
|
+
to_align[d] = seq_d
|
77
|
+
report = mafft.query_alignment(to_align)
|
78
|
+
aln = report.alignment
|
79
|
+
aln
|
80
|
+
end
|
81
|
+
|
82
|
+
def read_alignment(path)
|
83
|
+
aln = Bio::Alignment::SequenceHash.new
|
84
|
+
i = 0
|
85
|
+
Bio::FlatFile.open(Bio::FastaFormat, path) do |fasta_file|
|
86
|
+
fasta_file.each do |entry|
|
87
|
+
aln[entry.entry_id] = entry.seq if i < 3
|
88
|
+
i += 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
aln
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
mafft_opts = ['--maxiterate', '1000', '--localpair', '--quiet']
|
96
|
+
mafft = Bio::MAFFT.new( "mafft" , mafft_opts)
|
97
|
+
header_printed = false
|
98
|
+
stats = File.open("#{output_folder}/#{reference_name}.identity_stats.csv", "w")
|
99
|
+
distances = File.open("#{output_folder}/#{reference_name}.distance_between_snps.csv.gz", "w")
|
100
|
+
gz = Zlib::GzipWriter.new(distances)
|
101
|
+
gz.write "triad,gene,genome,reference,type,distance\n"
|
102
|
+
#gz.close
|
103
|
+
|
104
|
+
def write_distances(distances, triad, gene, genome, reference, type, out)
|
105
|
+
distances.each { |e| out.write "#{triad},#{gene},#{genome},#{reference},#{type},#{e}\n" }
|
106
|
+
end
|
107
|
+
|
108
|
+
i = 0
|
109
|
+
CSV.foreach(opts[:triads], headers:true ) do |row|
|
110
|
+
next unless row["cardinality_abs"] == "1:1:1" and row["HC.LC"] == "HC-only"
|
111
|
+
a = row['A']
|
112
|
+
b = row['B']
|
113
|
+
d = row['D']
|
114
|
+
triad = row['group_id']
|
115
|
+
cent_triad = triad.to_i / 100
|
116
|
+
folder = "#{output_folder}/alignments/#{reference_name}/#{cent_triad}/"
|
117
|
+
save_cds = "#{folder}/#{triad}.fa"
|
118
|
+
aligned = File.file?(save_cds)
|
119
|
+
aln = aligned ? read_alignment(save_cds) : mafft_align(a,b,d)
|
120
|
+
folder = "#{output_folder}/alignments_new/#{reference_name}/#{cent_triad}/" if aligned
|
121
|
+
FileUtils.mkdir_p folder
|
122
|
+
save_cds = "#{folder}/#{triad}.fa"
|
123
|
+
|
124
|
+
aln2 = Bio::Alignment.new aln
|
125
|
+
seq_start = Bio::PolyploidTools::Mask.find_start(aln)
|
126
|
+
seq_end = Bio::PolyploidTools::Mask.find_end(aln)
|
127
|
+
#puts "#{triad}: #{seq_start}-#{seq_end}"
|
128
|
+
|
129
|
+
|
130
|
+
aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: a), "A")
|
131
|
+
aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: b), "B")
|
132
|
+
aln2.add_seq(Bio::PolyploidTools::Mask.get(aln,seq_start: seq_start, seq_end: seq_end, target: d), "D")
|
133
|
+
|
134
|
+
a_stats = Bio::PolyploidTools::Mask.stats(aln2["A"], triad, a, "A", reference_name)
|
135
|
+
b_stats = Bio::PolyploidTools::Mask.stats(aln2["B"], triad, b, "B", reference_name)
|
136
|
+
d_stats = Bio::PolyploidTools::Mask.stats(aln2["D"], triad, d, "D", reference_name)
|
137
|
+
|
138
|
+
write_distances(a_stats[:specific], triad, a, "A", reference_name, "specific", gz)
|
139
|
+
write_distances(b_stats[:specific], triad, b, "B", reference_name, "specific", gz)
|
140
|
+
write_distances(d_stats[:specific], triad, d, "D", reference_name, "specific", gz)
|
141
|
+
|
142
|
+
write_distances(a_stats[:semispecific], triad, a, "A", reference_name, "semispecific", gz)
|
143
|
+
write_distances(b_stats[:semispecific], triad, b, "B", reference_name, "semispecific", gz)
|
144
|
+
write_distances(d_stats[:semispecific], triad, d, "D", reference_name, "semispecific", gz)
|
145
|
+
|
146
|
+
a_stats.delete(:semispecific)
|
147
|
+
b_stats.delete(:semispecific)
|
148
|
+
d_stats.delete(:semispecific)
|
149
|
+
|
150
|
+
a_stats.delete(:specific)
|
151
|
+
b_stats.delete(:specific)
|
152
|
+
d_stats.delete(:specific)
|
153
|
+
|
154
|
+
a_stats[:length] = @cannonical[a].length
|
155
|
+
b_stats[:length] = @cannonical[b].length
|
156
|
+
d_stats[:length] = @cannonical[d].length
|
157
|
+
|
158
|
+
stats.puts a_stats.keys.join(",") unless header_printed
|
159
|
+
stats.puts a_stats.values.join(",")
|
160
|
+
stats.puts b_stats.values.join(",")
|
161
|
+
stats.puts d_stats.values.join(",")
|
162
|
+
header_printed = true
|
163
|
+
|
164
|
+
write_fasta_from_hash(aln2, save_cds)
|
165
|
+
i += 1
|
166
|
+
end
|
167
|
+
gz.close
|
168
|
+
distances.close
|
169
|
+
stats.close
|
data/bin/polymarker.rb
CHANGED
@@ -350,10 +350,11 @@ container.add_alignments({
|
|
350
350
|
|
351
351
|
|
352
352
|
#4.1 generating primer3 file
|
353
|
-
write_status "
|
353
|
+
write_status "Finding genome-specific positions"
|
354
354
|
file = File.open(exons_filename, "w")
|
355
355
|
container.print_fasta_snp_exones(file)
|
356
356
|
file.close
|
357
|
+
write_status "Running primer3"
|
357
358
|
|
358
359
|
file = File.open(primer_3_input, "w")
|
359
360
|
|
data/bin/tag_stats.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
require 'csv'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'tmpdir'
|
7
|
+
require 'bio-samtools'
|
8
|
+
require 'bio'
|
9
|
+
require 'descriptive_statistics'
|
10
|
+
|
11
|
+
class Bio::DB::Tag
|
12
|
+
def set(str)
|
13
|
+
@tag = str[0..1]
|
14
|
+
@type = str[3]
|
15
|
+
@value = str[5..-1]
|
16
|
+
@value = @value.to_i if @type == "i"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
$: << File.expand_path(File.dirname(__FILE__) + '/../lib')
|
21
|
+
$: << File.expand_path('.')
|
22
|
+
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
23
|
+
require path
|
24
|
+
opts = {}
|
25
|
+
opts[:tag] = "NH"
|
26
|
+
opts[:bam] = nil
|
27
|
+
opts[:out] = nil
|
28
|
+
opts[:ref] = nil
|
29
|
+
|
30
|
+
out = $stdout
|
31
|
+
|
32
|
+
OptionParser.new do |o|
|
33
|
+
o.banner = "Usage: tag_stats.rb [options]"
|
34
|
+
|
35
|
+
o.on("-t", "--tag str", "The tag to extract (default NH)") do |o|
|
36
|
+
opts[:tag] = o
|
37
|
+
end
|
38
|
+
|
39
|
+
o.on("-b", "--bam FILE" , "BAM file with the alignments ") do |o|
|
40
|
+
opts[:bam] = o
|
41
|
+
end
|
42
|
+
|
43
|
+
o.on("-o", "--out_file CHAR", "File to save the stats") do |o|
|
44
|
+
opts[:out] = o
|
45
|
+
end
|
46
|
+
|
47
|
+
o.on("-r", "--reference FILE", "Fasta file with the reference") do |o|
|
48
|
+
opts[:ref] = o
|
49
|
+
end
|
50
|
+
end.parse!
|
51
|
+
|
52
|
+
bam = Bio::DB::Sam.new(fasta: opts[:ref], bam: opts[:bam])
|
53
|
+
tag = opts[:tag]
|
54
|
+
|
55
|
+
sample = File.basename(opts[:bam], '.sorted.bam')
|
56
|
+
last_ref = ""
|
57
|
+
values = []
|
58
|
+
to_print = [:sum, :min, :max, :mean, :mode, :median, :q1, :q2, :q3]
|
59
|
+
percentiles = [90, 95, 97.5, 99]
|
60
|
+
#Add the 90, 95, 97.5 and 99 percentiles.
|
61
|
+
out = File.open(opts[:out], "w") if opts[:out]
|
62
|
+
bam.view do |aln |
|
63
|
+
if(last_ref != aln.rname)
|
64
|
+
|
65
|
+
desc_stats = values.descriptive_statistics
|
66
|
+
to_print.each { |e| out.puts [sample, last_ref, e , desc_stats[e] ].join("\t") } if(last_ref != "")
|
67
|
+
percentiles.each { |e| out.puts [sample, last_ref, "P#{e}", values.percentile(e)].join("\t") } if(last_ref != "")
|
68
|
+
out.puts [sample, last_ref, "N", values.length].join("\t") if(last_ref != "")
|
69
|
+
values.clear
|
70
|
+
last_ref = aln.rname
|
71
|
+
end
|
72
|
+
values << aln.tags[tag].value
|
73
|
+
end
|
74
|
+
|
75
|
+
out.close if opts[:out]
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,19 +2,19 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.8.
|
5
|
+
# stub: bio-polyploid-tools 0.8.4 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools".freeze
|
9
|
-
s.version = "0.8.
|
9
|
+
s.version = "0.8.4"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
|
14
|
-
s.date = "2018-
|
14
|
+
s.date = "2018-02-27"
|
15
15
|
s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
|
17
|
-
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "vcfLineToTable.rb".freeze]
|
17
|
+
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "mask_triads.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "tag_stats.rb".freeze, "vcfLineToTable.rb".freeze]
|
18
18
|
s.extra_rdoc_files = [
|
19
19
|
"README",
|
20
20
|
"README.md"
|
@@ -42,10 +42,12 @@ Gem::Specification.new do |s|
|
|
42
42
|
"bin/mafft_triads_promoters.rb",
|
43
43
|
"bin/map_markers_to_contigs.rb",
|
44
44
|
"bin/markers_in_region.rb",
|
45
|
+
"bin/mask_triads.rb",
|
45
46
|
"bin/polymarker.rb",
|
46
47
|
"bin/polymarker_capillary.rb",
|
47
48
|
"bin/snp_position_to_polymarker.rb",
|
48
49
|
"bin/snps_between_bams.rb",
|
50
|
+
"bin/tag_stats.rb",
|
49
51
|
"bin/vcfLineToTable.rb",
|
50
52
|
"bio-polyploid-tools.gemspec",
|
51
53
|
"conf/defaults.rb",
|
@@ -88,6 +90,7 @@ Gem::Specification.new do |s|
|
|
88
90
|
"lib/bio/PolyploidTools/ChromosomeArm.rb",
|
89
91
|
"lib/bio/PolyploidTools/ExonContainer.rb",
|
90
92
|
"lib/bio/PolyploidTools/Marker.rb",
|
93
|
+
"lib/bio/PolyploidTools/Mask.rb",
|
91
94
|
"lib/bio/PolyploidTools/NoSNPSequence.rb",
|
92
95
|
"lib/bio/PolyploidTools/PrimerRegion.rb",
|
93
96
|
"lib/bio/PolyploidTools/SNP.rb",
|
@@ -172,7 +175,7 @@ Gem::Specification.new do |s|
|
|
172
175
|
]
|
173
176
|
s.homepage = "http://github.com/tgac/bioruby-polyploid-tools".freeze
|
174
177
|
s.licenses = ["MIT".freeze]
|
175
|
-
s.rubygems_version = "2.
|
178
|
+
s.rubygems_version = "2.7.4".freeze
|
176
179
|
s.summary = "Tool to work with polyploids, NGS and molecular biology".freeze
|
177
180
|
|
178
181
|
if s.respond_to? :specification_version then
|
@@ -181,6 +184,7 @@ Gem::Specification.new do |s|
|
|
181
184
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
182
185
|
s.add_runtime_dependency(%q<bio>.freeze, [">= 1.5.1"])
|
183
186
|
s.add_runtime_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
|
187
|
+
s.add_runtime_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
|
184
188
|
s.add_runtime_dependency(%q<systemu>.freeze, [">= 2.5.2"])
|
185
189
|
s.add_development_dependency(%q<shoulda>.freeze, [">= 2.10"])
|
186
190
|
s.add_development_dependency(%q<test-unit>.freeze, [">= 0"])
|
@@ -188,6 +192,7 @@ Gem::Specification.new do |s|
|
|
188
192
|
else
|
189
193
|
s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
|
190
194
|
s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
|
195
|
+
s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
|
191
196
|
s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
|
192
197
|
s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
|
193
198
|
s.add_dependency(%q<test-unit>.freeze, [">= 0"])
|
@@ -196,6 +201,7 @@ Gem::Specification.new do |s|
|
|
196
201
|
else
|
197
202
|
s.add_dependency(%q<bio>.freeze, [">= 1.5.1"])
|
198
203
|
s.add_dependency(%q<bio-samtools>.freeze, [">= 2.6.2"])
|
204
|
+
s.add_dependency(%q<descriptive_statistics>.freeze, [">= 0"])
|
199
205
|
s.add_dependency(%q<systemu>.freeze, [">= 2.5.2"])
|
200
206
|
s.add_dependency(%q<shoulda>.freeze, [">= 2.10"])
|
201
207
|
s.add_dependency(%q<test-unit>.freeze, [">= 0"])
|
@@ -0,0 +1,114 @@
|
|
1
|
+
class Array
|
2
|
+
def sum
|
3
|
+
inject(0.0) { |result, el| result + el }
|
4
|
+
end
|
5
|
+
|
6
|
+
def mean
|
7
|
+
sum / size
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Bio::PolyploidTools::Mask
|
12
|
+
|
13
|
+
def self.find_end(seqs)
|
14
|
+
size = seqs.values[0].size
|
15
|
+
names = seqs.keys
|
16
|
+
i = size - 1
|
17
|
+
gap_count = 3
|
18
|
+
while i > 0 and gap_count > 0
|
19
|
+
gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1:0 }.inject(0, :+)
|
20
|
+
i -= 1
|
21
|
+
end
|
22
|
+
i + 1
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.find_start(seqs)
|
26
|
+
size = seqs.values[0].size
|
27
|
+
names = seqs.keys
|
28
|
+
i = 0
|
29
|
+
gap_count = 3
|
30
|
+
while i < size and gap_count > 0
|
31
|
+
gap_count = names.map { |chr| seqs[chr][i] == "-" ? 1 : 0 } .inject(0, :+)
|
32
|
+
|
33
|
+
i += 1
|
34
|
+
end
|
35
|
+
i - 1
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.get(seqs, target: nil, seq_start: 0, seq_end: 0)
|
39
|
+
names = seqs.keys
|
40
|
+
target = names[0] if target.nil?
|
41
|
+
masked_snps = seqs[target].downcase
|
42
|
+
i = 0
|
43
|
+
while i < masked_snps.size
|
44
|
+
different = 0
|
45
|
+
cov = 0
|
46
|
+
gap = false
|
47
|
+
names.each do | chr |
|
48
|
+
if seqs[chr][i] != "-" and seqs[chr][i] != "n" and seqs[chr][i] != "N"
|
49
|
+
cov += 1
|
50
|
+
end
|
51
|
+
if chr != target
|
52
|
+
different += 1 if masked_snps[i].upcase != seqs[chr][i].upcase
|
53
|
+
end
|
54
|
+
if seqs[chr][i] == "-" and chr == target
|
55
|
+
gap = true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
masked_snps[i] = "." if different == 0
|
59
|
+
masked_snps[i] = "." if cov == 1
|
60
|
+
masked_snps[i] = "*" if cov == 0
|
61
|
+
expected_snps = names.size - 1
|
62
|
+
masked_snps[i] = masked_snps[i].upcase if different == expected_snps
|
63
|
+
if gap
|
64
|
+
masked_snps[i] = different == expected_snps ? "-" : "_"
|
65
|
+
end
|
66
|
+
masked_snps[i] = "|" if i < seq_start or i > seq_end
|
67
|
+
i += 1
|
68
|
+
end
|
69
|
+
masked_snps
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.stats(mask, triad, gene, genome, reference)
|
73
|
+
specific = []
|
74
|
+
semispecific = []
|
75
|
+
sp_i = 0
|
76
|
+
semi = 0
|
77
|
+
i = 0
|
78
|
+
mask.to_s.each_char do |e|
|
79
|
+
case e
|
80
|
+
when "n","N"
|
81
|
+
i += 1
|
82
|
+
when /[[:lower:]]/ then
|
83
|
+
semispecific << semi
|
84
|
+
semi = 0
|
85
|
+
i += 1
|
86
|
+
when /[[:upper:]]/ then
|
87
|
+
specific << sp_i
|
88
|
+
semispecific << semi
|
89
|
+
sp_i = 0
|
90
|
+
semi = 0
|
91
|
+
i += 1
|
92
|
+
when "." then
|
93
|
+
semi += 1
|
94
|
+
sp_i += 1
|
95
|
+
i += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
{
|
99
|
+
reference: reference,
|
100
|
+
triad: triad,
|
101
|
+
genome: genome,
|
102
|
+
gene: gene,
|
103
|
+
semispecific_mean: semispecific.mean,
|
104
|
+
semispecific_bases: semispecific.size,
|
105
|
+
semispecific_identity: (1 - (semispecific.size.to_f / i)) * 100 ,
|
106
|
+
specific_mean: specific.mean,
|
107
|
+
specific_bases: specific.size,
|
108
|
+
specific_identity: (1 - (specific.size.to_f / i )) * 100,
|
109
|
+
aligned_length: i,
|
110
|
+
specific: specific,
|
111
|
+
semispecific: semispecific
|
112
|
+
}
|
113
|
+
end
|
114
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-polyploid-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricardo H. Ramirez-Gonzalez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 2.6.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: descriptive_statistics
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: systemu
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,10 +128,12 @@ executables:
|
|
114
128
|
- mafft_triads_promoters.rb
|
115
129
|
- map_markers_to_contigs.rb
|
116
130
|
- markers_in_region.rb
|
131
|
+
- mask_triads.rb
|
117
132
|
- polymarker.rb
|
118
133
|
- polymarker_capillary.rb
|
119
134
|
- snp_position_to_polymarker.rb
|
120
135
|
- snps_between_bams.rb
|
136
|
+
- tag_stats.rb
|
121
137
|
- vcfLineToTable.rb
|
122
138
|
extensions: []
|
123
139
|
extra_rdoc_files:
|
@@ -146,10 +162,12 @@ files:
|
|
146
162
|
- bin/mafft_triads_promoters.rb
|
147
163
|
- bin/map_markers_to_contigs.rb
|
148
164
|
- bin/markers_in_region.rb
|
165
|
+
- bin/mask_triads.rb
|
149
166
|
- bin/polymarker.rb
|
150
167
|
- bin/polymarker_capillary.rb
|
151
168
|
- bin/snp_position_to_polymarker.rb
|
152
169
|
- bin/snps_between_bams.rb
|
170
|
+
- bin/tag_stats.rb
|
153
171
|
- bin/vcfLineToTable.rb
|
154
172
|
- bio-polyploid-tools.gemspec
|
155
173
|
- conf/defaults.rb
|
@@ -192,6 +210,7 @@ files:
|
|
192
210
|
- lib/bio/PolyploidTools/ChromosomeArm.rb
|
193
211
|
- lib/bio/PolyploidTools/ExonContainer.rb
|
194
212
|
- lib/bio/PolyploidTools/Marker.rb
|
213
|
+
- lib/bio/PolyploidTools/Mask.rb
|
195
214
|
- lib/bio/PolyploidTools/NoSNPSequence.rb
|
196
215
|
- lib/bio/PolyploidTools/PrimerRegion.rb
|
197
216
|
- lib/bio/PolyploidTools/SNP.rb
|
@@ -293,7 +312,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
293
312
|
version: '0'
|
294
313
|
requirements: []
|
295
314
|
rubyforge_project:
|
296
|
-
rubygems_version: 2.
|
315
|
+
rubygems_version: 2.7.4
|
297
316
|
signing_key:
|
298
317
|
specification_version: 4
|
299
318
|
summary: Tool to work with polyploids, NGS and molecular biology
|