miga-base 1.2.17.1 → 1.2.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +2 -2
- data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
- data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
- data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
- data/utils/enveomics/Scripts/SRA.download.bash +14 -2
- data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
- data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
- data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
- data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
- data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
- data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
- data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
- data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
- data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
- data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
- data/utils/enveomics/enveomics.R/R/utils.R +30 -20
- data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
- data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
- data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
- data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
- data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
- data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
- data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
- data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
- data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
- data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
- data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
- data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
- data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
- metadata +3 -37
- data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
- data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
- data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
- data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
- data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
- data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
- data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
- data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
- data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
- data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
- data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
- data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
- data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
- data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
- data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
- data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
- data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
- data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
- data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
- data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
- data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
- data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
- data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
- data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
- data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
- data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
- data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
- data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 50f8bda6b07f6b7da95f3342e96b290c401a8810c35e2406309d28559111de2d
|
|
4
|
+
data.tar.gz: 99dd4709f330f90fc71b213c42ce60bade2ac32127fb123ae450cb0a54885176
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 18666049e37b300ceaf367cd1fa1847556a1a84a6878095b9e931c6665f1559a427e00a844ec764de470ab25b92ebd1c48753b782a89c779a4974f2eb63b5385
|
|
7
|
+
data.tar.gz: c2b4d46070dc7fa7bef02dc7a4147472a32d1863dd86871a2c29c9fde4a1ea906f3d29659bbadab24ebcd57095d3db826120bff8bb831994d6dc6f372d6b569e
|
data/lib/miga/version.rb
CHANGED
|
@@ -12,7 +12,7 @@ module MiGA
|
|
|
12
12
|
# - String indicating release status:
|
|
13
13
|
# - rc* release candidate, not released as gem
|
|
14
14
|
# - [0-9]+ stable release, released as gem
|
|
15
|
-
VERSION = [1.2, 17,
|
|
15
|
+
VERSION = [1.2, 17, 2].freeze
|
|
16
16
|
|
|
17
17
|
##
|
|
18
18
|
# Nickname for the current major.minor version.
|
|
@@ -20,7 +20,7 @@ module MiGA
|
|
|
20
20
|
|
|
21
21
|
##
|
|
22
22
|
# Date of the current gem relese.
|
|
23
|
-
VERSION_DATE = Date.new(2023, 2,
|
|
23
|
+
VERSION_DATE = Date.new(2023, 2, 21)
|
|
24
24
|
|
|
25
25
|
##
|
|
26
26
|
# References of MiGA
|
|
@@ -2,33 +2,61 @@
|
|
|
2
2
|
"tasks": [
|
|
3
3
|
{
|
|
4
4
|
"task": "BedGraph.tad.rb",
|
|
5
|
-
"description": [
|
|
6
|
-
"
|
|
7
|
-
|
|
5
|
+
"description": [
|
|
6
|
+
"Estimates the truncated average sequencing depth (TAD)",
|
|
7
|
+
"from a BedGraph file."
|
|
8
|
+
],
|
|
9
|
+
"warn": [
|
|
10
|
+
"This script doesn't consider zero-coverage positions if",
|
|
8
11
|
"missing from the file. If you produce your BedGraph file with",
|
|
9
12
|
"bedtools genomecov and want to consider zero-coverage position, be",
|
|
10
|
-
"sure to use -bga (not -bg)."
|
|
11
|
-
|
|
12
|
-
|
|
13
|
+
"sure to use -bga (not -bg)."
|
|
14
|
+
],
|
|
15
|
+
"see_also": [
|
|
16
|
+
"BedGraph.window.rb", "BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"
|
|
17
|
+
],
|
|
13
18
|
"help_arg": "--help",
|
|
14
19
|
"options": [
|
|
15
20
|
{
|
|
16
21
|
"opt": "--input",
|
|
17
22
|
"arg": "in_file",
|
|
18
23
|
"mandatory": true,
|
|
19
|
-
"description":
|
|
24
|
+
"description": [
|
|
25
|
+
"Input BedGraph file.",
|
|
26
|
+
"Supports compression with .gz extension, use - for STDIN."
|
|
27
|
+
]
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"opt": "--output",
|
|
31
|
+
"arg": "out_file",
|
|
32
|
+
"default": "-",
|
|
33
|
+
"description": [
|
|
34
|
+
"Output tab-delimited values (by default, STDOUT).",
|
|
35
|
+
"Supports compression with .gz extension, use - for STDOUT."
|
|
36
|
+
]
|
|
20
37
|
},
|
|
21
38
|
{
|
|
22
39
|
"opt": "--range",
|
|
23
40
|
"arg": "float",
|
|
24
41
|
"default": 0.5,
|
|
25
|
-
"description": [
|
|
26
|
-
"
|
|
42
|
+
"description": [
|
|
43
|
+
"Central range to consider, between 0 and 1.",
|
|
44
|
+
"By default: inter-quartile range (0.5)."
|
|
45
|
+
]
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"opt": "--name",
|
|
49
|
+
"arg": "string",
|
|
50
|
+
"description": [
|
|
51
|
+
"Name (ID) of the sequence, added as first column."
|
|
52
|
+
]
|
|
27
53
|
},
|
|
28
54
|
{
|
|
29
55
|
"opt": "--per-seq",
|
|
30
|
-
"description": [
|
|
31
|
-
"
|
|
56
|
+
"description": [
|
|
57
|
+
"Calculate averages per reference sequence, not total.",
|
|
58
|
+
"Assumes a sorted BedGraph file."
|
|
59
|
+
]
|
|
32
60
|
},
|
|
33
61
|
{
|
|
34
62
|
"opt": "--length",
|
|
@@ -184,7 +184,8 @@
|
|
|
184
184
|
{
|
|
185
185
|
"task": "SRA.download.bash",
|
|
186
186
|
"description": ["Downloads the set of runs from a project, sample, or",
|
|
187
|
-
"experiment in SRA."
|
|
187
|
+
"experiment in SRA. If the expected file already exists, skips the",
|
|
188
|
+
"file if the MD5 hash matches."],
|
|
188
189
|
"help_arg": "",
|
|
189
190
|
"requires": [
|
|
190
191
|
{
|
|
@@ -1,93 +1,138 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
$VERSION = 1.01
|
|
4
|
+
$:.push File.expand_path('../lib', __FILE__)
|
|
5
|
+
require 'enveomics_rb/enveomics'
|
|
4
6
|
|
|
5
|
-
o = {range: 0.5, perseq: false, length: false}
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
o = { range: 0.5, perseq: false, length: false, o: '-' }
|
|
8
|
+
OptionParser.new do |opts|
|
|
9
|
+
opts.version = $VERSION
|
|
10
|
+
banner = <<~BANNER
|
|
11
|
+
Estimates the truncated average sequencing depth (TAD) from a BedGraph file
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
IMPORTANT: This script doesn't consider zero-coverage positions if missing
|
|
14
|
+
from the file. If you produce your BedGraph file with bedtools genomecov and
|
|
15
|
+
want to consider zero-coverage position, be sure to use -bga (not -bg).
|
|
16
|
+
BANNER
|
|
17
|
+
Enveomics.opt_banner(opts, banner, "#{File.basename($0)} [options]")
|
|
14
18
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
'Input BedGraph file
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
'
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
19
|
+
opts.separator 'Mandatory'
|
|
20
|
+
opts.on(
|
|
21
|
+
'-i', '--input PATH',
|
|
22
|
+
'Input BedGraph file',
|
|
23
|
+
'Supports compression with .gz extension, use - for STDIN'
|
|
24
|
+
) { |v| o[:i] = v }
|
|
25
|
+
|
|
26
|
+
opts.separator ''
|
|
27
|
+
opts.separator 'Other Options'
|
|
28
|
+
opts.on(
|
|
29
|
+
'-o', '--out PATH',
|
|
30
|
+
'Output tab-delimited values (by default, STDOUT)',
|
|
31
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
|
32
|
+
) { |v| o[:o] = v }
|
|
33
|
+
opts.on(
|
|
34
|
+
'-r', '--range FLOAT', Float,
|
|
35
|
+
'Central range to consider, between 0 and 1',
|
|
36
|
+
"By default: #{o[:range]} (inter-quartile range)"
|
|
37
|
+
) { |v| o[:range] = v }
|
|
38
|
+
opts.on(
|
|
39
|
+
'-n', '--name STRING',
|
|
40
|
+
'Name (ID) of the sequence (added as first column)'
|
|
41
|
+
) { |v| o[:name] = v }
|
|
42
|
+
opts.on(
|
|
43
|
+
'-s', '--per-seq',
|
|
44
|
+
'Calculate averages per reference sequence, not total',
|
|
45
|
+
'Assumes a sorted BedGraph file'
|
|
46
|
+
) { |v| o[:perseq] = v }
|
|
47
|
+
opts.on(
|
|
48
|
+
'-l', '--length',
|
|
49
|
+
'Add sequence length to the output'
|
|
50
|
+
) { |v| o[:length] = v }
|
|
51
|
+
opts.on(
|
|
52
|
+
'-b', '--breadth',
|
|
53
|
+
'Add sequencing breadth to the output'
|
|
54
|
+
) { |v| o[:breadth] = v }
|
|
55
|
+
opts.on('-h', '--help', 'Display this screen') do
|
|
56
|
+
puts opts
|
|
31
57
|
exit
|
|
32
58
|
end
|
|
33
|
-
|
|
59
|
+
opts.separator ''
|
|
34
60
|
end.parse!
|
|
35
|
-
|
|
61
|
+
raise Enveomics::OptionError.new('-i is mandatory') if o[:i].nil?
|
|
36
62
|
|
|
63
|
+
##
|
|
64
|
+
# Pad an array to include all index values up to +r+ entries:
|
|
65
|
+
# - d: Array of [ depth => counts ]
|
|
66
|
+
# - idx: Array of [ depth, depth, ... ]
|
|
67
|
+
# - r: Expected number of entries in the array
|
|
37
68
|
def pad(d, idx, r)
|
|
38
69
|
idx.each do |i|
|
|
39
70
|
next if d[i].nil?
|
|
71
|
+
|
|
40
72
|
d[i] -= r
|
|
41
73
|
break unless d[i] < 0
|
|
74
|
+
|
|
42
75
|
r = -d[i]
|
|
43
76
|
d[i] = nil
|
|
44
77
|
end
|
|
45
78
|
d
|
|
46
79
|
end
|
|
47
80
|
|
|
81
|
+
##
|
|
82
|
+
# Report the results for:
|
|
83
|
+
# - sq: Contig ID
|
|
84
|
+
# - d: Array of [ depth => counts ]
|
|
85
|
+
# - ln: Length of the sequence
|
|
86
|
+
# - o: CLI Options
|
|
48
87
|
def report(sq, d, ln, o)
|
|
49
88
|
# Estimate padding ranges
|
|
50
|
-
pad = (1.0-o[:range])/2.0
|
|
51
|
-
r = (pad*ln).round
|
|
89
|
+
pad = (1.0 - o[:range]) / 2.0
|
|
90
|
+
r = (pad * ln).round
|
|
91
|
+
zeroes = d[0].to_i
|
|
52
92
|
|
|
53
|
-
# Pad
|
|
54
|
-
d = pad(d, d.each_index.to_a, r+0)
|
|
55
|
-
d = pad(d, d.each_index.to_a.reverse, r+0)
|
|
93
|
+
# Pad (truncation)
|
|
94
|
+
d = pad(d, d.each_index.to_a, r + 0)
|
|
95
|
+
d = pad(d, d.each_index.to_a.reverse, r + 0)
|
|
56
96
|
|
|
57
97
|
# Average
|
|
58
98
|
y = [0.0]
|
|
59
99
|
unless d.compact.empty?
|
|
60
|
-
s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0
|
|
61
|
-
y[0] = s.to_f/d.compact.inject(:+)
|
|
100
|
+
s = d.each_with_index.to_a.map { |v, i| v.nil? ? 0 : i * v }.inject(0, :+)
|
|
101
|
+
y[0] = s.to_f / d.compact.inject(:+)
|
|
62
102
|
end
|
|
63
103
|
|
|
64
104
|
# Report
|
|
65
105
|
y.unshift(sq) if o[:perseq]
|
|
106
|
+
y.unshift(o[:name]) if o[:name]
|
|
66
107
|
y << ln if o[:length]
|
|
67
|
-
|
|
108
|
+
y << (ln - zeroes).to_f / ln if o[:breadth]
|
|
109
|
+
y.join("\t")
|
|
68
110
|
end
|
|
69
111
|
|
|
70
112
|
# Read BedGraph
|
|
71
|
-
d = []
|
|
113
|
+
d = [] # [ depth => count ]
|
|
72
114
|
ln = 0
|
|
73
115
|
pre_sq = nil
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
r.map! { |j| j.to_i }
|
|
85
|
-
l = r[1]-r[0]
|
|
86
|
-
d[ r[2] ] ||= 0
|
|
87
|
-
d[ r[2] ] += l
|
|
88
|
-
ln += l
|
|
89
|
-
pre_sq = sq
|
|
116
|
+
ifh = reader(o[:i])
|
|
117
|
+
ofh = writer(o[:o])
|
|
118
|
+
ifh.each_line do |i|
|
|
119
|
+
next if i =~ /^#/
|
|
120
|
+
r = i.chomp.split("\t")
|
|
121
|
+
sq = r.shift # Contig ID
|
|
122
|
+
if o[:perseq] && !pre_sq.nil? && pre_sq != sq
|
|
123
|
+
ofh.puts(report(pre_sq, d, ln, o))
|
|
124
|
+
d = []
|
|
125
|
+
ln = 0
|
|
90
126
|
end
|
|
127
|
+
r.map!(&:to_i) # From, To, Depth
|
|
128
|
+
l = r[1] - r[0] # Window length: To - From
|
|
129
|
+
d[ r[2] ] ||= 0
|
|
130
|
+
d[ r[2] ] += l # Add these "l" positions with depth "Depth"
|
|
131
|
+
ln += l
|
|
132
|
+
pre_sq = sq
|
|
91
133
|
end
|
|
92
|
-
report(pre_sq, d, ln, o)
|
|
134
|
+
ofh.puts(report(pre_sq, d, ln, o))
|
|
135
|
+
|
|
136
|
+
ifh.close
|
|
137
|
+
ofh.close
|
|
93
138
|
|
|
@@ -4,13 +4,14 @@ DATA_LINK="https://www.ebi.ac.uk/ena/portal/api/filereport"
|
|
|
4
4
|
DATA_OPS="result=read_run&fields=run_accession,fastq_ftp,fastq_md5"
|
|
5
5
|
SRX=$1
|
|
6
6
|
DIR=${2:-$SRX}
|
|
7
|
-
VERSION=
|
|
7
|
+
VERSION=2.0
|
|
8
8
|
|
|
9
9
|
if [[ "$SRX" == "" ]] ; then
|
|
10
10
|
echo "
|
|
11
11
|
[Enveomics Collection: $(basename "$0" .bash) $VERSION]
|
|
12
12
|
|
|
13
13
|
Downloads the set of runs from a project, sample, or experiment in SRA.
|
|
14
|
+
If the expected file already exists, skips the file if the MD5 hash matches.
|
|
14
15
|
|
|
15
16
|
Usage:
|
|
16
17
|
$(basename "$0") <SRA-ID>[ <dir>]
|
|
@@ -42,9 +43,20 @@ tail -n +2 "$DIR/srr_list.txt" | while read ln ; do
|
|
|
42
43
|
echo "o $srr" >&2
|
|
43
44
|
for uri in $(echo "$ftp" | tr ";" " ") ; do
|
|
44
45
|
file="$dir/$(basename $uri)"
|
|
46
|
+
|
|
47
|
+
# Check if it exists and it's complete
|
|
48
|
+
if [[ -s "$file" ]] ; then
|
|
49
|
+
md5obs=$(md5value "$file" 2> /dev/null)
|
|
50
|
+
if [[ "$md5;" == "$md5obs;"* ]] ; then
|
|
51
|
+
md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
|
|
52
|
+
continue
|
|
53
|
+
fi
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
# Otherwise, download and check MD5
|
|
45
57
|
curl "$uri" -o "$file"
|
|
46
58
|
md5obs=$(md5value "$file" 2> /dev/null)
|
|
47
|
-
if [[ "$md5" == "$md5obs"* ]] ; then
|
|
59
|
+
if [[ "$md5;" == "$md5obs;"* ]] ; then
|
|
48
60
|
md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
|
|
49
61
|
else
|
|
50
62
|
echo "Corrupt file: $file" >&2
|
|
Binary file
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Package: enveomics.R
|
|
2
|
-
Version: 1.
|
|
3
|
-
Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
|
|
4
|
-
email="lmrodriguezr@gmail.com"))
|
|
2
|
+
Version: 1.9.0
|
|
3
|
+
Authors@R: c(person("Luis M.", "Rodriguez-R", role = c("aut", "cre"),
|
|
4
|
+
email = "lmrodriguezr@gmail.com"))
|
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
|
6
6
|
Description: A collection of functions for microbial ecology and other
|
|
7
7
|
applications of genomics and metagenomics. Companion package for the
|
|
@@ -9,7 +9,7 @@ Description: A collection of functions for microbial ecology and other
|
|
|
9
9
|
<DOI:10.7287/peerj.preprints.1900v1>).
|
|
10
10
|
Author: Luis M. Rodriguez-R [aut, cre]
|
|
11
11
|
Maintainer: Luis M. Rodriguez-R <lmrodriguezr@gmail.com>
|
|
12
|
-
URL: http://enve-omics.ce.gatech.edu/enveomics
|
|
12
|
+
URL: http://enve-omics.ce.gatech.edu/enveomics/
|
|
13
13
|
Depends:
|
|
14
14
|
R (>= 2.9),
|
|
15
15
|
stats,
|
|
@@ -28,4 +28,4 @@ Suggests:
|
|
|
28
28
|
License: Artistic-2.0
|
|
29
29
|
LazyData: yes
|
|
30
30
|
Encoding: UTF-8
|
|
31
|
-
RoxygenNote: 7.
|
|
31
|
+
RoxygenNote: 7.1.2
|
|
@@ -22,134 +22,146 @@
|
|
|
22
22
|
#'
|
|
23
23
|
#' @export
|
|
24
24
|
|
|
25
|
-
enve.prune.dist <- function
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
){
|
|
34
|
-
if(!requireNamespace("ape", quietly=TRUE))
|
|
25
|
+
enve.prune.dist <- function(
|
|
26
|
+
t,
|
|
27
|
+
dist.quantile = 0.25,
|
|
28
|
+
min_dist,
|
|
29
|
+
quiet = FALSE,
|
|
30
|
+
max_iters = 100,
|
|
31
|
+
min_nodes_random = 4e4,
|
|
32
|
+
random_nodes_frx = 1
|
|
33
|
+
) {
|
|
34
|
+
if (!requireNamespace("ape", quietly = TRUE))
|
|
35
35
|
stop('Unavailable ape library.');
|
|
36
|
-
if(is.character(t)) t <- ape::read.tree(t)
|
|
37
|
-
if(missing(min_dist)){
|
|
38
|
-
if(dist.quantile>0){
|
|
39
|
-
min_dist <- as.numeric(quantile(t$edge.length, dist.quantile))
|
|
40
|
-
}else{
|
|
41
|
-
min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]))
|
|
36
|
+
if (is.character(t)) t <- ape::read.tree(t)
|
|
37
|
+
if (missing(min_dist)) {
|
|
38
|
+
if (dist.quantile > 0) {
|
|
39
|
+
min_dist <- as.numeric(quantile(t$edge.length, dist.quantile))
|
|
40
|
+
} else {
|
|
41
|
+
min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]))
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
-
if(!quiet)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
44
|
+
if (!quiet)
|
|
45
|
+
cat("\nObjective minimum distance: ", min_dist, ", initial tips: ",
|
|
46
|
+
length(t$tip.label), "\n", sep = "")
|
|
47
|
+
|
|
48
|
+
round <- 1
|
|
49
|
+
while (round <= max_iters) {
|
|
50
|
+
if (length(t$tip.label) > min_nodes_random) {
|
|
51
|
+
if (!quiet)
|
|
52
|
+
cat(" | Iter: ", round - 1, ", Tips: ", length(t$tip.label),
|
|
53
|
+
", reducing tip-pairs.\n", sep = "")
|
|
54
|
+
rnd.nodes <- sample(t$tip.label, length(t$tip.label) * random_nodes_frx)
|
|
55
|
+
t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet)
|
|
56
|
+
} else {
|
|
57
|
+
if (!quiet) cat(" Gathering distances...\r")
|
|
58
|
+
d <- cophenetic(t)
|
|
59
|
+
diag(d) <- NA
|
|
60
|
+
if(!quiet)
|
|
61
|
+
cat(" | Iter: ", round - 1, ", Tips: ", length(t$tip.label),
|
|
62
|
+
", Median distance: ", median(d, na.rm = TRUE),
|
|
63
|
+
", Minimum distance: ", min(d, na.rm = TRUE), "\n", sep = "")
|
|
60
64
|
# Run iteration
|
|
61
|
-
if(min(d, na.rm=TRUE) < min_dist){
|
|
62
|
-
t <- enve.__prune.iter(t, d, min_dist, quiet)
|
|
63
|
-
}else{
|
|
64
|
-
break
|
|
65
|
+
if (min(d, na.rm = TRUE) < min_dist) {
|
|
66
|
+
t <- enve.__prune.iter(t, d, min_dist, quiet)
|
|
67
|
+
} else {
|
|
68
|
+
break
|
|
65
69
|
}
|
|
66
70
|
}
|
|
67
|
-
round <- round + 1
|
|
71
|
+
round <- round + 1
|
|
68
72
|
}
|
|
69
|
-
return(t)
|
|
73
|
+
return(t)
|
|
70
74
|
}
|
|
71
75
|
|
|
72
76
|
#' Enveomics: Prune Reduce (Internal Function)
|
|
73
77
|
#'
|
|
74
78
|
#' Internal function for \code{\link{enve.prune.dist}}.
|
|
75
79
|
#'
|
|
76
|
-
#' @param t A \strong{phylo} object
|
|
77
|
-
#' @param nodes Vector of nodes
|
|
78
|
-
#' @param min_dist Minimum distance
|
|
79
|
-
#' @param quiet If running quietly
|
|
80
|
+
#' @param t A \strong{phylo} object.
|
|
81
|
+
#' @param nodes Vector of nodes.
|
|
82
|
+
#' @param min_dist Minimum distance.
|
|
83
|
+
#' @param quiet If running quietly.
|
|
84
|
+
#'
|
|
85
|
+
#' @return A \strong{phylo} object.
|
|
80
86
|
#'
|
|
81
87
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
|
82
88
|
#'
|
|
83
89
|
#' @export
|
|
84
90
|
|
|
85
|
-
enve.__prune.reduce <- function
|
|
86
|
-
(
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
+
enve.__prune.reduce <- function(t, nodes, min_dist, quiet) {
|
|
92
|
+
if (!quiet) pb <- txtProgressBar(1, length(nodes), style = 3)
|
|
93
|
+
for (i in 1:length(nodes)) {
|
|
94
|
+
node.name <- nodes[i]
|
|
95
|
+
if (!quiet) setTxtProgressBar(pb, i)
|
|
96
|
+
|
|
91
97
|
# Get node ID
|
|
92
|
-
node <- which(t$tip.label==node.name)
|
|
93
|
-
if(length(node)==0) next
|
|
98
|
+
node <- which(t$tip.label == node.name)
|
|
99
|
+
if (length(node) == 0) next
|
|
100
|
+
|
|
94
101
|
# Get parent and distance to parent
|
|
95
|
-
parent.node <- t$edge[
|
|
102
|
+
parent.node <- t$edge[t$edge[, 2] == node, 1]
|
|
103
|
+
|
|
96
104
|
# Get edges to parent
|
|
97
|
-
parent.edges <- which(t$edge[,1]==parent.node)
|
|
98
|
-
stopit <- FALSE
|
|
99
|
-
for(j in parent.edges){
|
|
100
|
-
for(k in parent.edges){
|
|
101
|
-
if(j != k & t$edge[j,2]<length(t$tip.label) &
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
+
parent.edges <- which(t$edge[, 1] == parent.node)
|
|
106
|
+
stopit <- FALSE
|
|
107
|
+
for (j in parent.edges) {
|
|
108
|
+
for (k in parent.edges) {
|
|
109
|
+
if (j != k & t$edge[j,2]<length(t$tip.label) &
|
|
110
|
+
t$edge[k,2]<length(t$tip.label) &
|
|
111
|
+
sum(t$edge.length[c(j,k)]) < min_dist) {
|
|
112
|
+
t <- ape::drop.tip(t, t$edge[k,2])
|
|
113
|
+
stopit <- TRUE
|
|
114
|
+
break
|
|
105
115
|
}
|
|
106
116
|
}
|
|
107
|
-
if(stopit) break
|
|
117
|
+
if (stopit) break
|
|
108
118
|
}
|
|
109
119
|
}
|
|
110
|
-
if(!quiet) cat(
|
|
111
|
-
return(t)
|
|
120
|
+
if (!quiet) cat("\n")
|
|
121
|
+
return(t)
|
|
112
122
|
}
|
|
113
123
|
|
|
114
124
|
#' Enveomics: Prune Iter (Internal Function)
|
|
115
125
|
#'
|
|
116
126
|
#' Internal function for \code{\link{enve.prune.dist}}.
|
|
117
127
|
#'
|
|
118
|
-
#' @param t A \strong{phylo} object
|
|
119
|
-
#' @param dist Cophenetic distance matrix
|
|
120
|
-
#' @param min_dist Minimum distance
|
|
121
|
-
#' @param quiet If running quietly
|
|
128
|
+
#' @param t A \strong{phylo} object.
|
|
129
|
+
#' @param dist Cophenetic distance matrix.
|
|
130
|
+
#' @param min_dist Minimum distance.
|
|
131
|
+
#' @param quiet If running quietly.
|
|
132
|
+
#'
|
|
133
|
+
#' @return Returns a \strong{phylo} object.
|
|
122
134
|
#'
|
|
123
135
|
#' @author Luis M. Rodriguez-R [aut, cre]
|
|
124
136
|
#'
|
|
125
137
|
#' @export
|
|
126
138
|
|
|
127
|
-
enve.__prune.iter <- function
|
|
128
|
-
(t
|
|
129
|
-
|
|
130
|
-
min_dist,
|
|
131
|
-
quiet){
|
|
132
|
-
ori_len <- length(t$tip.label);
|
|
139
|
+
enve.__prune.iter <- function(t, dist, min_dist, quiet) {
|
|
140
|
+
ori_len <- length(t$tip.label)
|
|
141
|
+
|
|
133
142
|
# Prune
|
|
134
|
-
if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3)
|
|
135
|
-
ignore <- c()
|
|
136
|
-
for(i in 1:(ncol(dist)-1)){
|
|
137
|
-
if(i %in% ignore) next
|
|
138
|
-
for(j in (i+1):nrow(dist)){
|
|
139
|
-
if(dist[j, i]<min_dist){
|
|
140
|
-
t <- ape::drop.tip(t, rownames(dist)[j])
|
|
141
|
-
ignore <- c(ignore, j)
|
|
142
|
-
break
|
|
143
|
+
if (!quiet) pb <- txtProgressBar(1, ncol(dist) - 1, style = 3)
|
|
144
|
+
ignore <- c()
|
|
145
|
+
for (i in 1:(ncol(dist) - 1)) {
|
|
146
|
+
if (i %in% ignore) next
|
|
147
|
+
for (j in (i + 1):nrow(dist)) {
|
|
148
|
+
if (dist[j, i] < min_dist) {
|
|
149
|
+
t <- ape::drop.tip(t, rownames(dist)[j])
|
|
150
|
+
ignore <- c(ignore, j)
|
|
151
|
+
break
|
|
143
152
|
}
|
|
144
153
|
}
|
|
145
|
-
if(!quiet) setTxtProgressBar(pb, i)
|
|
154
|
+
if (!quiet) setTxtProgressBar(pb, i)
|
|
146
155
|
}
|
|
147
|
-
if(!quiet) cat(
|
|
156
|
+
if(!quiet) cat("\n")
|
|
157
|
+
|
|
148
158
|
# Check if it droped tips
|
|
149
|
-
cur_len <- length(t$tip.label)
|
|
150
|
-
if(cur_len == ori_len){
|
|
151
|
-
stop(
|
|
159
|
+
cur_len <- length(t$tip.label)
|
|
160
|
+
if (cur_len == ori_len){
|
|
161
|
+
stop(
|
|
162
|
+
"Internal error: small edge found in tree, with no equivalent in distance matrix.\n"
|
|
163
|
+
)
|
|
152
164
|
}
|
|
153
|
-
return(t)
|
|
165
|
+
return(t)
|
|
154
166
|
}
|
|
155
167
|
|