bio-polyploid-tools 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +1 -0
- data/README.md +14 -4
- data/VERSION +1 -1
- data/bin/polymarker.rb +16 -40
- data/bio-polyploid-tools.gemspec +4 -4
- data/lib/bio/PolyploidTools/ChromosomeArm.rb +48 -45
- data/lib/bio/PolyploidTools/ExonContainer.rb +3 -2
- data/lib/bio/PolyploidTools/SNP.rb +8 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 28167dfdf75d85d33f970351d2f9a1b166d179c7
|
4
|
+
data.tar.gz: f1635243148bb245ff2af217eb333ace9087a011
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1cfb4d6a3f49874f430da5bc1342b2bef31ca57cc73e4108272b8e4606426acfc303d21d9035dfcc96569988fcd6f74e35c8e60e5bcc35348ab8d6014a044e7b
|
7
|
+
data.tar.gz: 632d1e2488f566adb856745c9b85999b5fc8cb3e4f7326999f94fd70db7c30555e13963282785b2465ac8e6cab5d2644a06eb54a5fa1367294c99d49c30ffd60
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -17,13 +17,13 @@ You need to have in your ```$PATH``` the following programs:
|
|
17
17
|
* [exonerate](http://www.ebi.ac.uk/~guy/exonerate/)
|
18
18
|
* [blast](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE%3DBlastDocs&DOC_TYPE%3DDownload)
|
19
19
|
|
20
|
-
The code was originally developed on ruby 2.1
|
20
|
+
The code was originally developed on ruby 2.1, 2.3 and 2.5. It may work on older version. However, it is only actively tested in currently supported ruby versions:
|
21
21
|
|
22
22
|
* 2.1.10
|
23
23
|
* 2.2.5
|
24
24
|
* 2.3.5
|
25
25
|
* 2.4.2
|
26
|
-
|
26
|
+
* 2.5.0
|
27
27
|
|
28
28
|
# PolyMarker
|
29
29
|
|
@@ -102,10 +102,10 @@ This file format can be used with ```snp_positions_to_polymarker.rb``` to produc
|
|
102
102
|
By default, the contigs and pseudomolecules from [ensembl](ftp://ftp.ensemblgenomes.org/pub/release-25/plants/fasta/triticum_aestivum/dna/Triticum_aestivum.IWGSC2.25.dna.genome.fa.gz
|
103
103
|
) are used. However, it is possible to use a custom reference. To define the chromosome where each contig belongs the argument ```arm_selection``` is used. The defailt uses ids like: ```IWGSC_CSS_1AL_scaff_110```, where the third field, separated by underscores is used. A simple way to add costum references is to rename the fasta file to follow that convention. Another way is to use the option ```--arm_selection arm_selection_first_two```, where only the first two characters in each contig is used as identifier, useful when pseudomolecules are named after the chromosomes (ie: ">1A" in the fasta file).
|
104
104
|
|
105
|
-
If your contigs follow a different convention, in the file ```
|
105
|
+
If your contigs follow a different convention, in the file ```ChromosomeArm.rb``` it is possible to define new parsers, by adding at the begining, with the rest of the parsers a new lambda like:
|
106
106
|
|
107
107
|
```rb
|
108
|
-
arm_selection_functions[:
|
108
|
+
@@arm_selection_functions[:embl] = lambda do | contig_name|
|
109
109
|
arr = contig_name.split('_')
|
110
110
|
ret = "U"
|
111
111
|
ret = arr[2][0,2] if arr.size >= 3
|
@@ -128,6 +128,16 @@ To use blast instead of exonerate, use the following command:
|
|
128
128
|
|
129
129
|
## Release Notes
|
130
130
|
|
131
|
+
### 0.8.2
|
132
|
+
|
133
|
+
* FEATURE: The functions to select the chromosome arm are now in ```lib/bio/PolyploidTools/ChromosomeArm.rb``` and the help message is updated automatically with the valid options.
|
134
|
+
* FEATURE: Added option ```filter_best``` to replicate the original behaviour of selecting the best hit of each chromosome. Still useful for assemblies which still contain synthetic duplications.
|
135
|
+
|
136
|
+
### 0.8.1
|
137
|
+
|
138
|
+
* BUGFIX: There was an error which prevented the correct localisation of the SNP in markeres with gaps in the local alignment before the position with the snp.
|
139
|
+
* FEATURE: PolyMarker now selects the best hit of the target chromosome. This improves the specificity in regions with a recent duplication. The drawback is that if your assembly has artificial repetitions, the primers won't be marked as 'chromosome specific', but as 'chromosome semi-specific '. In a future version this will be addressed.
|
140
|
+
|
131
141
|
### 0.8
|
132
142
|
|
133
143
|
* FEATURE: ```polymarker.rb``` added the flag ```--aligner blast|exonerate ``` which lets you pick between ```blast``` or ```exonerate``` as the aligner. For blast the default is to have the database with the same name as the ```--contigs``` file. However, it is possible to use a different name vua the option ```--database```.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.2
|
data/bin/polymarker.rb
CHANGED
@@ -10,43 +10,7 @@ $: << File.expand_path('.')
|
|
10
10
|
path= File.expand_path(File.dirname(__FILE__) + '/../lib/bioruby-polyploid-tools.rb')
|
11
11
|
require path
|
12
12
|
|
13
|
-
arm_selection_functions = Hash.new;
|
14
13
|
|
15
|
-
arm_selection_functions[:arm_selection_nrgenes] = lambda do | contig_name |
|
16
|
-
#example format: chr2A
|
17
|
-
ret = contig_name[3,2]
|
18
|
-
return ret
|
19
|
-
end
|
20
|
-
|
21
|
-
arm_selection_functions[:arm_selection_first_two] = lambda do | contig_name |
|
22
|
-
contig_name.gsub!(/chr/,"")
|
23
|
-
ret = contig_name[0,2]
|
24
|
-
return ret
|
25
|
-
end
|
26
|
-
|
27
|
-
#Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
|
28
|
-
#Or the first two characters in the contig name, to deal with
|
29
|
-
#pseudomolecules that start with headers like: "1A"
|
30
|
-
#And with the cases when 3B is named with the prefix: v443
|
31
|
-
arm_selection_functions[:arm_selection_embl] = lambda do | contig_name|
|
32
|
-
|
33
|
-
arr = contig_name.split('_')
|
34
|
-
ret = "U"
|
35
|
-
ret = arr[2][0,2] if arr.size >= 3
|
36
|
-
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
37
|
-
ret = arr[0][0,2] if arr.size == 1
|
38
|
-
return ret
|
39
|
-
end
|
40
|
-
|
41
|
-
arm_selection_functions[:arm_selection_morex] = lambda do | contig_name |
|
42
|
-
ret = contig_name.split(':')[0].split("_")[1];
|
43
|
-
return ret
|
44
|
-
end
|
45
|
-
|
46
|
-
arm_selection_functions[:scaffold] = lambda do | contig_name |
|
47
|
-
ret = contig_name;
|
48
|
-
return ret
|
49
|
-
end
|
50
14
|
|
51
15
|
def validate_files(o)
|
52
16
|
[
|
@@ -66,7 +30,7 @@ options[:chunks] = 1
|
|
66
30
|
options[:bucket_size] = 0
|
67
31
|
options[:bucket] = 1
|
68
32
|
options[:model] = "est2genome"
|
69
|
-
options[:arm_selection] =
|
33
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection("nrgene");
|
70
34
|
options[:flanking_size] = 150;
|
71
35
|
options[:variation_free_region] = 0
|
72
36
|
options[:extract_found_contigs] = false
|
@@ -74,6 +38,7 @@ options[:genomes_count] = 3
|
|
74
38
|
options[:min_identity] = 90
|
75
39
|
options[:scoring] = :genome_specific
|
76
40
|
options[:database] = false
|
41
|
+
options[:filter_best] = false
|
77
42
|
options[:aligner] = :exonerate
|
78
43
|
|
79
44
|
|
@@ -87,6 +52,8 @@ options[:primer_3_preferences] = {
|
|
87
52
|
:primer_thermodynamic_parameters_path=>File.expand_path(File.dirname(__FILE__) + '../../conf/primer3_config/') + '/'
|
88
53
|
}
|
89
54
|
|
55
|
+
|
56
|
+
|
90
57
|
OptionParser.new do |opts|
|
91
58
|
opts.banner = "Usage: polymarker.rb [options]"
|
92
59
|
|
@@ -102,6 +69,11 @@ OptionParser.new do |opts|
|
|
102
69
|
options[:genomes_count] = o.to_i
|
103
70
|
end
|
104
71
|
|
72
|
+
opts.on("-b", "--filter_best", "If set, only keep the best alignment for each chromosome") do
|
73
|
+
options[:filter_best] = true
|
74
|
+
end
|
75
|
+
|
76
|
+
|
105
77
|
opts.on("-s", "--snp_list FILE", "File with the list of snps to search from, requires --reference to get the sequence using a position") do |o|
|
106
78
|
options[:snp_list] = o
|
107
79
|
end
|
@@ -127,7 +99,7 @@ OptionParser.new do |opts|
|
|
127
99
|
options[:model] = o
|
128
100
|
end
|
129
101
|
|
130
|
-
opts.on("-a", "--arm_selection
|
102
|
+
opts.on("-a", "--arm_selection #{Bio::PolyploidTools::ChromosomeArm.getValidFunctions.join('|')}", "Function to decide the chromome arm") do |o|
|
131
103
|
tmp_str = o
|
132
104
|
arr = o.split(",")
|
133
105
|
if arr.size == 2
|
@@ -138,7 +110,7 @@ OptionParser.new do |opts|
|
|
138
110
|
return ret
|
139
111
|
end
|
140
112
|
else
|
141
|
-
options[:arm_selection] =
|
113
|
+
options[:arm_selection] = Bio::PolyploidTools::ChromosomeArm.getArmSelection(o)
|
142
114
|
end
|
143
115
|
|
144
116
|
end
|
@@ -370,7 +342,11 @@ snps.each do |snp|
|
|
370
342
|
snp.variation_free_region = options[:variation_free_region]
|
371
343
|
container.add_snp(snp)
|
372
344
|
end
|
373
|
-
container.add_alignments({
|
345
|
+
container.add_alignments({
|
346
|
+
:exonerate_file=>exonerate_file,
|
347
|
+
:arm_selection=>options[:arm_selection],
|
348
|
+
:min_identity=>min_identity,
|
349
|
+
:filter_best=>options[:filter_best]})
|
374
350
|
|
375
351
|
|
376
352
|
#4.1 generating primer3 file
|
data/bio-polyploid-tools.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: bio-polyploid-tools 0.8.
|
5
|
+
# stub: bio-polyploid-tools 0.8.2 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "bio-polyploid-tools".freeze
|
9
|
-
s.version = "0.8.
|
9
|
+
s.version = "0.8.2"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Ricardo H. Ramirez-Gonzalez".freeze]
|
14
|
-
s.date = "2018-01-
|
14
|
+
s.date = "2018-01-23"
|
15
15
|
s.description = "Repository of tools developed at Crop Genetics in JIC to work with polyploid wheat".freeze
|
16
16
|
s.email = "ricardo.ramirez-gonzalez@jic.ac.uk".freeze
|
17
17
|
s.executables = ["bfr.rb".freeze, "blast_triads.rb".freeze, "blast_triads_promoters.rb".freeze, "count_variations.rb".freeze, "filter_blat_by_target_coverage.rb".freeze, "filter_exonerate_by_identity.rb".freeze, "find_best_blat_hit.rb".freeze, "find_best_exonerate.rb".freeze, "find_homoeologue_variations.rb".freeze, "get_longest_hsp_blastx_triads.rb".freeze, "hexaploid_primers.rb".freeze, "homokaryot_primers.rb".freeze, "mafft_triads.rb".freeze, "mafft_triads_promoters.rb".freeze, "map_markers_to_contigs.rb".freeze, "markers_in_region.rb".freeze, "polymarker.rb".freeze, "polymarker_capillary.rb".freeze, "snp_position_to_polymarker.rb".freeze, "snps_between_bams.rb".freeze, "vcfLineToTable.rb".freeze]
|
@@ -172,7 +172,7 @@ Gem::Specification.new do |s|
|
|
172
172
|
]
|
173
173
|
s.homepage = "http://github.com/tgac/bioruby-polyploid-tools".freeze
|
174
174
|
s.licenses = ["MIT".freeze]
|
175
|
-
s.rubygems_version = "2.
|
175
|
+
s.rubygems_version = "2.6.14".freeze
|
176
176
|
s.summary = "Tool to work with polyploids, NGS and molecular biology".freeze
|
177
177
|
|
178
178
|
if s.respond_to? :specification_version then
|
@@ -1,48 +1,51 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
1
|
+
class Bio::PolyploidTools::ChromosomeArm
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
@@arm_selection_functions = Hash.new;
|
6
|
+
|
7
|
+
#example format: chr2A
|
8
|
+
@@arm_selection_functions[:nrgene] = lambda do | contig_name |
|
9
|
+
ret = contig_name[3,2]
|
10
|
+
return ret
|
11
|
+
end
|
12
|
+
|
13
|
+
@@arm_selection_functions[:first_two] = lambda do | contig_name |
|
14
|
+
contig_name.gsub!(/chr/,"")
|
15
|
+
ret = contig_name[0,2]
|
16
|
+
return ret
|
17
|
+
end
|
18
|
+
|
19
|
+
#Function to parse stuff like: "IWGSC_CSS_1AL_scaff_110"
|
20
|
+
#Or the first two characters in the contig name, to deal with
|
21
|
+
#pseudomolecules that start with headers like: "1A"
|
22
|
+
#And with the cases when 3B is named with the prefix: v443
|
23
|
+
@@arm_selection_functions[:embl] = lambda do | contig_name|
|
24
|
+
|
25
|
+
arr = contig_name.split('_')
|
26
|
+
ret = "U"
|
27
|
+
ret = arr[2][0,2] if arr.size >= 3
|
28
|
+
ret = "3B" if arr.size == 2 and arr[0] == "v443"
|
29
|
+
ret = arr[0][0,2] if arr.size == 1
|
30
|
+
return ret
|
31
|
+
end
|
32
|
+
|
33
|
+
@@arm_selection_functions[:morex] = lambda do | contig_name |
|
34
|
+
ret = contig_name.split(':')[0].split("_")[1];
|
35
|
+
return ret
|
36
|
+
end
|
37
|
+
|
38
|
+
@@arm_selection_functions[:scaffold] = lambda do | contig_name |
|
39
|
+
ret = contig_name;
|
40
|
+
return ret
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.getArmSelection(name)
|
44
|
+
@@arm_selection_functions[name.to_sym]
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.getValidFunctions
|
48
|
+
@@arm_selection_functions.keys.map { |e| e.to_s }
|
46
49
|
end
|
47
50
|
|
48
51
|
end
|
@@ -175,9 +175,10 @@ module Bio::PolyploidTools
|
|
175
175
|
end
|
176
176
|
|
177
177
|
def add_alignments(opts=Hash.new)
|
178
|
-
opts = { :min_identity=>90 }.merge!(opts)
|
178
|
+
opts = { :min_identity=>90, filter_best:false }.merge!(opts)
|
179
179
|
exonerate_filename = opts[:exonerate_file]
|
180
180
|
arm_selection = opts[:arm_selection]
|
181
|
+
filter_best = opts[:filter_best]
|
181
182
|
|
182
183
|
unless arm_selection
|
183
184
|
arm_selection = lambda do | contig_name |
|
@@ -197,7 +198,7 @@ module Bio::PolyploidTools
|
|
197
198
|
if snp != nil and snp.position.between?( (record.query_start + 1) , record.query_end)
|
198
199
|
begin
|
199
200
|
exon = record.exon_on_gene_position(snp.position)
|
200
|
-
snp.add_exon(exon, arm_selection.call(record.target_id))
|
201
|
+
snp.add_exon(exon, arm_selection.call(record.target_id), filter_best:filter_best)
|
201
202
|
rescue Bio::DB::Exonerate::ExonerateException
|
202
203
|
$stderr.puts "Failed for the range #{record.query_start}-#{record.query_end} for position #{snp.position}"
|
203
204
|
end
|
@@ -114,8 +114,14 @@ module Bio::PolyploidTools
|
|
114
114
|
return ">#{self.gene}\n#{self.template_sequence}\n"
|
115
115
|
end
|
116
116
|
|
117
|
-
def add_exon(exon, arm)
|
118
|
-
exon_list[arm]
|
117
|
+
def add_exon(exon, arm, filter_best: true)
|
118
|
+
if filter_best and exon_list[arm].size > 0
|
119
|
+
current = exon_list[arm].first
|
120
|
+
exon_list[arm] = [exon] if exon.record.score > current.record.score
|
121
|
+
else
|
122
|
+
exon_list[arm] << exon
|
123
|
+
end
|
124
|
+
|
119
125
|
end
|
120
126
|
|
121
127
|
def covered_region
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-polyploid-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ricardo H. Ramirez-Gonzalez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01-
|
11
|
+
date: 2018-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bio
|
@@ -293,7 +293,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
293
293
|
version: '0'
|
294
294
|
requirements: []
|
295
295
|
rubyforge_project:
|
296
|
-
rubygems_version: 2.
|
296
|
+
rubygems_version: 2.6.14
|
297
297
|
signing_key:
|
298
298
|
specification_version: 4
|
299
299
|
summary: Tool to work with polyploids, NGS and molecular biology
|