miga-base 1.2.17.1 → 1.2.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/version.rb +2 -2
  3. data/utils/enveomics/Manifest/Tasks/mapping.json +39 -11
  4. data/utils/enveomics/Manifest/Tasks/remote.json +2 -1
  5. data/utils/enveomics/Scripts/BedGraph.tad.rb +98 -53
  6. data/utils/enveomics/Scripts/SRA.download.bash +14 -2
  7. data/utils/enveomics/Tests/low-cov.bg.gz +0 -0
  8. data/utils/enveomics/enveomics.R/DESCRIPTION +5 -5
  9. data/utils/enveomics/enveomics.R/R/autoprune.R +99 -87
  10. data/utils/enveomics/enveomics.R/R/barplot.R +116 -97
  11. data/utils/enveomics/enveomics.R/R/cliopts.R +65 -59
  12. data/utils/enveomics/enveomics.R/R/df2dist.R +96 -58
  13. data/utils/enveomics/enveomics.R/R/growthcurve.R +166 -148
  14. data/utils/enveomics/enveomics.R/R/recplot.R +201 -136
  15. data/utils/enveomics/enveomics.R/R/recplot2.R +371 -304
  16. data/utils/enveomics/enveomics.R/R/tribs.R +318 -263
  17. data/utils/enveomics/enveomics.R/R/utils.R +30 -20
  18. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +4 -3
  19. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +2 -2
  20. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +3 -3
  21. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +7 -4
  22. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +7 -4
  23. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +4 -0
  24. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +25 -17
  25. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +10 -0
  26. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +8 -2
  27. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +14 -0
  28. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +20 -1
  29. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +2 -3
  30. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +5 -2
  31. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +50 -42
  32. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +5 -2
  33. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +3 -0
  34. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +3 -0
  35. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +3 -0
  36. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +3 -0
  37. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +9 -4
  38. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +3 -0
  39. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +3 -3
  40. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +0 -2
  41. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +4 -0
  42. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +5 -0
  43. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +11 -7
  44. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +5 -1
  45. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +3 -0
  46. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
  47. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +3 -3
  48. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +2 -2
  49. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +3 -0
  50. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +3 -0
  51. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +6 -3
  52. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
  53. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +3 -0
  54. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +3 -0
  55. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +3 -0
  56. metadata +3 -37
  57. data/utils/enveomics/Pipelines/assembly.pbs/CONFIG.mock.bash +0 -69
  58. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +0 -1
  59. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +0 -1
  60. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +0 -1
  61. data/utils/enveomics/Pipelines/assembly.pbs/README.md +0 -189
  62. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-2.bash +0 -112
  63. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-3.bash +0 -23
  64. data/utils/enveomics/Pipelines/assembly.pbs/RUNME-4.bash +0 -44
  65. data/utils/enveomics/Pipelines/assembly.pbs/RUNME.bash +0 -50
  66. data/utils/enveomics/Pipelines/assembly.pbs/kSelector.R +0 -37
  67. data/utils/enveomics/Pipelines/assembly.pbs/newbler.pbs +0 -68
  68. data/utils/enveomics/Pipelines/assembly.pbs/newbler_preparator.pl +0 -49
  69. data/utils/enveomics/Pipelines/assembly.pbs/soap.pbs +0 -80
  70. data/utils/enveomics/Pipelines/assembly.pbs/stats.pbs +0 -57
  71. data/utils/enveomics/Pipelines/assembly.pbs/velvet.pbs +0 -63
  72. data/utils/enveomics/Pipelines/blast.pbs/01.pbs.bash +0 -38
  73. data/utils/enveomics/Pipelines/blast.pbs/02.pbs.bash +0 -73
  74. data/utils/enveomics/Pipelines/blast.pbs/03.pbs.bash +0 -21
  75. data/utils/enveomics/Pipelines/blast.pbs/BlastTab.recover_job.pl +0 -72
  76. data/utils/enveomics/Pipelines/blast.pbs/CONFIG.mock.bash +0 -98
  77. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +0 -1
  78. data/utils/enveomics/Pipelines/blast.pbs/README.md +0 -127
  79. data/utils/enveomics/Pipelines/blast.pbs/RUNME.bash +0 -109
  80. data/utils/enveomics/Pipelines/blast.pbs/TASK.check.bash +0 -128
  81. data/utils/enveomics/Pipelines/blast.pbs/TASK.dry.bash +0 -16
  82. data/utils/enveomics/Pipelines/blast.pbs/TASK.eo.bash +0 -22
  83. data/utils/enveomics/Pipelines/blast.pbs/TASK.pause.bash +0 -26
  84. data/utils/enveomics/Pipelines/blast.pbs/TASK.run.bash +0 -89
  85. data/utils/enveomics/Pipelines/blast.pbs/sentinel.pbs.bash +0 -29
  86. data/utils/enveomics/Pipelines/idba.pbs/README.md +0 -49
  87. data/utils/enveomics/Pipelines/idba.pbs/RUNME.bash +0 -95
  88. data/utils/enveomics/Pipelines/idba.pbs/run.pbs +0 -56
  89. data/utils/enveomics/Pipelines/trim.pbs/README.md +0 -54
  90. data/utils/enveomics/Pipelines/trim.pbs/RUNME.bash +0 -70
  91. data/utils/enveomics/Pipelines/trim.pbs/run.pbs +0 -130
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 608607327562dd08edc9f866aeeb566407eb85f0adcf8538c37789962c387f72
4
- data.tar.gz: b21cb37dcae1eab3551d2058f21543221a9b6b9a5b6c834074a4d7c6c60a7102
3
+ metadata.gz: 50f8bda6b07f6b7da95f3342e96b290c401a8810c35e2406309d28559111de2d
4
+ data.tar.gz: 99dd4709f330f90fc71b213c42ce60bade2ac32127fb123ae450cb0a54885176
5
5
  SHA512:
6
- metadata.gz: 6708285348840ed44251d64d003c477a0887db497ccfaf97ce17f398b650156420897fc07036cbc395b4107c71bdb6638ad756442e364e530363a36aca8ec9a3
7
- data.tar.gz: fce107bebd89fd53f07d0ca5b814564b76c00b6faacbb0c89b91c0a114b2c5d5911b446fa27f1db5cecab3d74dee2ab3c2d6b10538a3e33056ab2b2ba766c2e9
6
+ metadata.gz: 18666049e37b300ceaf367cd1fa1847556a1a84a6878095b9e931c6665f1559a427e00a844ec764de470ab25b92ebd1c48753b782a89c779a4974f2eb63b5385
7
+ data.tar.gz: c2b4d46070dc7fa7bef02dc7a4147472a32d1863dd86871a2c29c9fde4a1ea906f3d29659bbadab24ebcd57095d3db826120bff8bb831994d6dc6f372d6b569e
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.2, 17, 1].freeze
15
+ VERSION = [1.2, 17, 2].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2023, 2, 14)
23
+ VERSION_DATE = Date.new(2023, 2, 21)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -2,33 +2,61 @@
2
2
  "tasks": [
3
3
  {
4
4
  "task": "BedGraph.tad.rb",
5
- "description": ["Estimates the truncated average sequencing depth (TAD)",
6
- "from a BedGraph file."],
7
- "warn": ["This script doesn't consider zero-coverage positions if",
5
+ "description": [
6
+ "Estimates the truncated average sequencing depth (TAD)",
7
+ "from a BedGraph file."
8
+ ],
9
+ "warn": [
10
+ "This script doesn't consider zero-coverage positions if",
8
11
  "missing from the file. If you produce your BedGraph file with",
9
12
  "bedtools genomecov and want to consider zero-coverage position, be",
10
- "sure to use -bga (not -bg)."],
11
- "see_also": ["BedGraph.window.rb",
12
- "BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"],
13
+ "sure to use -bga (not -bg)."
14
+ ],
15
+ "see_also": [
16
+ "BedGraph.window.rb", "BlastTab.seqdepth.pl", "BlastTab.seqdepth_ZIP.pl"
17
+ ],
13
18
  "help_arg": "--help",
14
19
  "options": [
15
20
  {
16
21
  "opt": "--input",
17
22
  "arg": "in_file",
18
23
  "mandatory": true,
19
- "description": "Input BedGraph file."
24
+ "description": [
25
+ "Input BedGraph file.",
26
+ "Supports compression with .gz extension, use - for STDIN."
27
+ ]
28
+ },
29
+ {
30
+ "opt": "--output",
31
+ "arg": "out_file",
32
+ "default": "-",
33
+ "description": [
34
+ "Output tab-delimited values (by default, STDOUT).",
35
+ "Supports compression with .gz extension, use - for STDOUT."
36
+ ]
20
37
  },
21
38
  {
22
39
  "opt": "--range",
23
40
  "arg": "float",
24
41
  "default": 0.5,
25
- "description": ["Central range to consider, between 0 and 1. By",
26
- "default: inter-quartile range (0.5)."]
42
+ "description": [
43
+ "Central range to consider, between 0 and 1.",
44
+ "By default: inter-quartile range (0.5)."
45
+ ]
46
+ },
47
+ {
48
+ "opt": "--name",
49
+ "arg": "string",
50
+ "description": [
51
+ "Name (ID) of the sequence, added as first column."
52
+ ]
27
53
  },
28
54
  {
29
55
  "opt": "--per-seq",
30
- "description": ["Calculate averages per reference sequence, not",
31
- "total. Assumes a sorted BedGraph file."]
56
+ "description": [
57
+ "Calculate averages per reference sequence, not total.",
58
+ "Assumes a sorted BedGraph file."
59
+ ]
32
60
  },
33
61
  {
34
62
  "opt": "--length",
@@ -184,7 +184,8 @@
184
184
  {
185
185
  "task": "SRA.download.bash",
186
186
  "description": ["Downloads the set of runs from a project, sample, or",
187
- "experiment in SRA."],
187
+ "experiment in SRA. If the expected file already exists, skips the",
188
+ "file if the MD5 hash matches."],
188
189
  "help_arg": "",
189
190
  "requires": [
190
191
  {
@@ -1,93 +1,138 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'optparse'
3
+ $VERSION = 1.01
4
+ $:.push File.expand_path('../lib', __FILE__)
5
+ require 'enveomics_rb/enveomics'
4
6
 
5
- o = {range: 0.5, perseq: false, length: false}
6
- ARGV << '-h' if ARGV.empty?
7
- OptionParser.new do |opt|
8
- opt.banner = "
9
- Estimates the truncated average sequencing depth (TAD) from a BedGraph file.
7
+ o = { range: 0.5, perseq: false, length: false, o: '-' }
8
+ OptionParser.new do |opts|
9
+ opts.version = $VERSION
10
+ banner = <<~BANNER
11
+ Estimates the truncated average sequencing depth (TAD) from a BedGraph file
10
12
 
11
- IMPORTANT: This script doesn't consider zero-coverage positions if missing
12
- from the file. If you produce your BedGraph file with bedtools genomecov and
13
- want to consider zero-coverage position, be sure to use -bga (not -bg).
13
+ IMPORTANT: This script doesn't consider zero-coverage positions if missing
14
+ from the file. If you produce your BedGraph file with bedtools genomecov and
15
+ want to consider zero-coverage position, be sure to use -bga (not -bg).
16
+ BANNER
17
+ Enveomics.opt_banner(opts, banner, "#{File.basename($0)} [options]")
14
18
 
15
- Usage: #{$0} [options]"
16
- opt.separator ''
17
- opt.on('-i', '--input PATH',
18
- 'Input BedGraph file (mandatory).'){ |v| o[:i]=v }
19
- opt.on('-r', '--range FLOAT',
20
- 'Central range to consider, between 0 and 1.',
21
- "By default: #{o[:range]} (inter-quartile range)."
22
- ){ |v| o[:range]=v.to_f }
23
- opt.on('-s', '--per-seq',
24
- 'Calculate averages per reference sequence, not total.',
25
- 'Assumes a sorted BedGraph file.'
26
- ){ |v| o[:perseq] = v }
27
- opt.on('-l', '--length',
28
- 'Add sequence length to the output.'){ |v| o[:length] = v }
29
- opt.on('-h', '--help', 'Display this screen.') do
30
- puts opt
19
+ opts.separator 'Mandatory'
20
+ opts.on(
21
+ '-i', '--input PATH',
22
+ 'Input BedGraph file',
23
+ 'Supports compression with .gz extension, use - for STDIN'
24
+ ) { |v| o[:i] = v }
25
+
26
+ opts.separator ''
27
+ opts.separator 'Other Options'
28
+ opts.on(
29
+ '-o', '--out PATH',
30
+ 'Output tab-delimited values (by default, STDOUT)',
31
+ 'Supports compression with .gz extension, use - for STDOUT'
32
+ ) { |v| o[:o] = v }
33
+ opts.on(
34
+ '-r', '--range FLOAT', Float,
35
+ 'Central range to consider, between 0 and 1',
36
+ "By default: #{o[:range]} (inter-quartile range)"
37
+ ) { |v| o[:range] = v }
38
+ opts.on(
39
+ '-n', '--name STRING',
40
+ 'Name (ID) of the sequence (added as first column)'
41
+ ) { |v| o[:name] = v }
42
+ opts.on(
43
+ '-s', '--per-seq',
44
+ 'Calculate averages per reference sequence, not total',
45
+ 'Assumes a sorted BedGraph file'
46
+ ) { |v| o[:perseq] = v }
47
+ opts.on(
48
+ '-l', '--length',
49
+ 'Add sequence length to the output'
50
+ ) { |v| o[:length] = v }
51
+ opts.on(
52
+ '-b', '--breadth',
53
+ 'Add sequencing breadth to the output'
54
+ ) { |v| o[:breadth] = v }
55
+ opts.on('-h', '--help', 'Display this screen') do
56
+ puts opts
31
57
  exit
32
58
  end
33
- opt.separator ''
59
+ opts.separator ''
34
60
  end.parse!
35
- abort '-i is mandatory.' if o[:i].nil?
61
+ raise Enveomics::OptionError.new('-i is mandatory') if o[:i].nil?
36
62
 
63
+ ##
64
+ # Pad an array to include all index values up to +r+ entries:
65
+ # - d: Array of [ depth => counts ]
66
+ # - idx: Array of [ depth, depth, ... ]
67
+ # - r: Expected number of entries in the array
37
68
  def pad(d, idx, r)
38
69
  idx.each do |i|
39
70
  next if d[i].nil?
71
+
40
72
  d[i] -= r
41
73
  break unless d[i] < 0
74
+
42
75
  r = -d[i]
43
76
  d[i] = nil
44
77
  end
45
78
  d
46
79
  end
47
80
 
81
+ ##
82
+ # Report the results for:
83
+ # - sq: Contig ID
84
+ # - d: Array of [ depth => counts ]
85
+ # - ln: Length of the sequence
86
+ # - o: CLI Options
48
87
  def report(sq, d, ln, o)
49
88
  # Estimate padding ranges
50
- pad = (1.0-o[:range])/2.0
51
- r = (pad*ln).round
89
+ pad = (1.0 - o[:range]) / 2.0
90
+ r = (pad * ln).round
91
+ zeroes = d[0].to_i
52
92
 
53
- # Pad
54
- d = pad(d, d.each_index.to_a, r+0)
55
- d = pad(d, d.each_index.to_a.reverse, r+0)
93
+ # Pad (truncation)
94
+ d = pad(d, d.each_index.to_a, r + 0)
95
+ d = pad(d, d.each_index.to_a.reverse, r + 0)
56
96
 
57
97
  # Average
58
98
  y = [0.0]
59
99
  unless d.compact.empty?
60
- s = d.each_with_index.to_a.map{ |v,i| v.nil? ? 0 : i*v }.inject(0,:+)
61
- y[0] = s.to_f/d.compact.inject(:+)
100
+ s = d.each_with_index.to_a.map { |v, i| v.nil? ? 0 : i * v }.inject(0, :+)
101
+ y[0] = s.to_f / d.compact.inject(:+)
62
102
  end
63
103
 
64
104
  # Report
65
105
  y.unshift(sq) if o[:perseq]
106
+ y.unshift(o[:name]) if o[:name]
66
107
  y << ln if o[:length]
67
- puts y.join("\t")
108
+ y << (ln - zeroes).to_f / ln if o[:breadth]
109
+ y.join("\t")
68
110
  end
69
111
 
70
112
  # Read BedGraph
71
- d = []
113
+ d = [] # [ depth => count ]
72
114
  ln = 0
73
115
  pre_sq = nil
74
- File.open(o[:i], "r") do |ifh|
75
- ifh.each_line do |i|
76
- next if i =~ /^#/
77
- r = i.chomp.split("\t")
78
- sq = r.shift
79
- if o[:perseq] and !pre_sq.nil? and pre_sq!=sq
80
- report(pre_sq, d, ln, o)
81
- d = []
82
- ln = 0
83
- end
84
- r.map! { |j| j.to_i }
85
- l = r[1]-r[0]
86
- d[ r[2] ] ||= 0
87
- d[ r[2] ] += l
88
- ln += l
89
- pre_sq = sq
116
+ ifh = reader(o[:i])
117
+ ofh = writer(o[:o])
118
+ ifh.each_line do |i|
119
+ next if i =~ /^#/
120
+ r = i.chomp.split("\t")
121
+ sq = r.shift # Contig ID
122
+ if o[:perseq] && !pre_sq.nil? && pre_sq != sq
123
+ ofh.puts(report(pre_sq, d, ln, o))
124
+ d = []
125
+ ln = 0
90
126
  end
127
+ r.map!(&:to_i) # From, To, Depth
128
+ l = r[1] - r[0] # Window length: To - From
129
+ d[ r[2] ] ||= 0
130
+ d[ r[2] ] += l # Add these "l" positions with depth "Depth"
131
+ ln += l
132
+ pre_sq = sq
91
133
  end
92
- report(pre_sq, d, ln, o)
134
+ ofh.puts(report(pre_sq, d, ln, o))
135
+
136
+ ifh.close
137
+ ofh.close
93
138
 
@@ -4,13 +4,14 @@ DATA_LINK="https://www.ebi.ac.uk/ena/portal/api/filereport"
4
4
  DATA_OPS="result=read_run&fields=run_accession,fastq_ftp,fastq_md5"
5
5
  SRX=$1
6
6
  DIR=${2:-$SRX}
7
- VERSION=1.0
7
+ VERSION=2.0
8
8
 
9
9
  if [[ "$SRX" == "" ]] ; then
10
10
  echo "
11
11
  [Enveomics Collection: $(basename "$0" .bash) $VERSION]
12
12
 
13
13
  Downloads the set of runs from a project, sample, or experiment in SRA.
14
+ If the expected file already exists, skips the file if the MD5 hash matches.
14
15
 
15
16
  Usage:
16
17
  $(basename "$0") <SRA-ID>[ <dir>]
@@ -42,9 +43,20 @@ tail -n +2 "$DIR/srr_list.txt" | while read ln ; do
42
43
  echo "o $srr" >&2
43
44
  for uri in $(echo "$ftp" | tr ";" " ") ; do
44
45
  file="$dir/$(basename $uri)"
46
+
47
+ # Check if it exists and it's complete
48
+ if [[ -s "$file" ]] ; then
49
+ md5obs=$(md5value "$file" 2> /dev/null)
50
+ if [[ "$md5;" == "$md5obs;"* ]] ; then
51
+ md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
52
+ continue
53
+ fi
54
+ fi
55
+
56
+ # Otherwise, download and check MD5
45
57
  curl "$uri" -o "$file"
46
58
  md5obs=$(md5value "$file" 2> /dev/null)
47
- if [[ "$md5" == "$md5obs"* ]] ; then
59
+ if [[ "$md5;" == "$md5obs;"* ]] ; then
48
60
  md5=$(echo "$md5" | perl -pe 's/^[^;]+;//')
49
61
  else
50
62
  echo "Corrupt file: $file" >&2
Binary file
@@ -1,7 +1,7 @@
1
1
  Package: enveomics.R
2
- Version: 1.8.0
3
- Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
4
- email="lmrodriguezr@gmail.com"))
2
+ Version: 1.9.0
3
+ Authors@R: c(person("Luis M.", "Rodriguez-R", role = c("aut", "cre"),
4
+ email = "lmrodriguezr@gmail.com"))
5
5
  Title: Various Utilities for Microbial Genomics and Metagenomics
6
6
  Description: A collection of functions for microbial ecology and other
7
7
  applications of genomics and metagenomics. Companion package for the
@@ -9,7 +9,7 @@ Description: A collection of functions for microbial ecology and other
9
9
  <DOI:10.7287/peerj.preprints.1900v1>).
10
10
  Author: Luis M. Rodriguez-R [aut, cre]
11
11
  Maintainer: Luis M. Rodriguez-R <lmrodriguezr@gmail.com>
12
- URL: http://enve-omics.ce.gatech.edu/enveomics
12
+ URL: http://enve-omics.ce.gatech.edu/enveomics/
13
13
  Depends:
14
14
  R (>= 2.9),
15
15
  stats,
@@ -28,4 +28,4 @@ Suggests:
28
28
  License: Artistic-2.0
29
29
  LazyData: yes
30
30
  Encoding: UTF-8
31
- RoxygenNote: 7.0.2
31
+ RoxygenNote: 7.1.2
@@ -22,134 +22,146 @@
22
22
  #'
23
23
  #' @export
24
24
 
25
- enve.prune.dist <- function
26
- (t,
27
- dist.quantile=0.25,
28
- min_dist,
29
- quiet=FALSE,
30
- max_iters=100,
31
- min_nodes_random=4e4,
32
- random_nodes_frx=1
33
- ){
34
- if(!requireNamespace("ape", quietly=TRUE))
25
+ enve.prune.dist <- function(
26
+ t,
27
+ dist.quantile = 0.25,
28
+ min_dist,
29
+ quiet = FALSE,
30
+ max_iters = 100,
31
+ min_nodes_random = 4e4,
32
+ random_nodes_frx = 1
33
+ ) {
34
+ if (!requireNamespace("ape", quietly = TRUE))
35
35
  stop('Unavailable ape library.');
36
- if(is.character(t)) t <- ape::read.tree(t)
37
- if(missing(min_dist)){
38
- if(dist.quantile>0){
39
- min_dist <- as.numeric(quantile(t$edge.length, dist.quantile));
40
- }else{
41
- min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]));
36
+ if (is.character(t)) t <- ape::read.tree(t)
37
+ if (missing(min_dist)) {
38
+ if (dist.quantile > 0) {
39
+ min_dist <- as.numeric(quantile(t$edge.length, dist.quantile))
40
+ } else {
41
+ min_dist <- as.numeric(min(t$edge.length[t$edge.length>0]))
42
42
  }
43
43
  }
44
- if(!quiet) cat('\nObjective minimum distance: ',min_dist,', initial tips: ',length(t$tip.label),'\n', sep='');
45
- round=1;
46
- while(round <= max_iters){
47
- if(length(t$tip.label) > min_nodes_random){
48
- if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
49
- ', reducing tip-pairs.\n', sep='');
50
- rnd.nodes <- sample(t$tip.label, length(t$tip.label)*random_nodes_frx);
51
- t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet);
52
- }else{
53
- if(!quiet) cat(' Gathering distances...\r');
54
- d <- cophenetic(t);
55
- diag(d) <- NA;
56
- if(!quiet) cat(' | Iter: ',round-1,', Tips: ', length(t$tip.label),
57
- ', Median distance: ', median(d, na.rm=TRUE),
58
- ', Minimum distance: ', min(d, na.rm=TRUE),
59
- '\n', sep='');
44
+ if (!quiet)
45
+ cat("\nObjective minimum distance: ", min_dist, ", initial tips: ",
46
+ length(t$tip.label), "\n", sep = "")
47
+
48
+ round <- 1
49
+ while (round <= max_iters) {
50
+ if (length(t$tip.label) > min_nodes_random) {
51
+ if (!quiet)
52
+ cat(" | Iter: ", round - 1, ", Tips: ", length(t$tip.label),
53
+ ", reducing tip-pairs.\n", sep = "")
54
+ rnd.nodes <- sample(t$tip.label, length(t$tip.label) * random_nodes_frx)
55
+ t <- enve.__prune.reduce(t, rnd.nodes, min_dist, quiet)
56
+ } else {
57
+ if (!quiet) cat(" Gathering distances...\r")
58
+ d <- cophenetic(t)
59
+ diag(d) <- NA
60
+ if(!quiet)
61
+ cat(" | Iter: ", round - 1, ", Tips: ", length(t$tip.label),
62
+ ", Median distance: ", median(d, na.rm = TRUE),
63
+ ", Minimum distance: ", min(d, na.rm = TRUE), "\n", sep = "")
60
64
  # Run iteration
61
- if(min(d, na.rm=TRUE) < min_dist){
62
- t <- enve.__prune.iter(t, d, min_dist, quiet);
63
- }else{
64
- break;
65
+ if (min(d, na.rm = TRUE) < min_dist) {
66
+ t <- enve.__prune.iter(t, d, min_dist, quiet)
67
+ } else {
68
+ break
65
69
  }
66
70
  }
67
- round <- round + 1;
71
+ round <- round + 1
68
72
  }
69
- return(t);
73
+ return(t)
70
74
  }
71
75
 
72
76
  #' Enveomics: Prune Reduce (Internal Function)
73
77
  #'
74
78
  #' Internal function for \code{\link{enve.prune.dist}}.
75
79
  #'
76
- #' @param t A \strong{phylo} object
77
- #' @param nodes Vector of nodes
78
- #' @param min_dist Minimum distance
79
- #' @param quiet If running quietly
80
+ #' @param t A \strong{phylo} object.
81
+ #' @param nodes Vector of nodes.
82
+ #' @param min_dist Minimum distance.
83
+ #' @param quiet If running quietly.
84
+ #'
85
+ #' @return A \strong{phylo} object.
80
86
  #'
81
87
  #' @author Luis M. Rodriguez-R [aut, cre]
82
88
  #'
83
89
  #' @export
84
90
 
85
- enve.__prune.reduce <- function
86
- (t, nodes, min_dist, quiet){
87
- if(!quiet) pb <- txtProgressBar(1, length(nodes), style=3);
88
- for(i in 1:length(nodes)){
89
- node.name <- nodes[i];
90
- if(!quiet) setTxtProgressBar(pb, i);
91
+ enve.__prune.reduce <- function(t, nodes, min_dist, quiet) {
92
+ if (!quiet) pb <- txtProgressBar(1, length(nodes), style = 3)
93
+ for (i in 1:length(nodes)) {
94
+ node.name <- nodes[i]
95
+ if (!quiet) setTxtProgressBar(pb, i)
96
+
91
97
  # Get node ID
92
- node <- which(t$tip.label==node.name);
93
- if(length(node)==0) next;
98
+ node <- which(t$tip.label == node.name)
99
+ if (length(node) == 0) next
100
+
94
101
  # Get parent and distance to parent
95
- parent.node <- t$edge[ t$edge[,2]==node, 1];
102
+ parent.node <- t$edge[t$edge[, 2] == node, 1]
103
+
96
104
  # Get edges to parent
97
- parent.edges <- which(t$edge[,1]==parent.node);
98
- stopit <- FALSE;
99
- for(j in parent.edges){
100
- for(k in parent.edges){
101
- if(j != k & t$edge[j,2]<length(t$tip.label) & t$edge[k,2]<length(t$tip.label) & sum(t$edge.length[c(j,k)]) < min_dist){
102
- t <- ape::drop.tip(t, t$edge[k,2]);
103
- stopit <- TRUE;
104
- break;
105
+ parent.edges <- which(t$edge[, 1] == parent.node)
106
+ stopit <- FALSE
107
+ for (j in parent.edges) {
108
+ for (k in parent.edges) {
109
+ if (j != k & t$edge[j,2]<length(t$tip.label) &
110
+ t$edge[k,2]<length(t$tip.label) &
111
+ sum(t$edge.length[c(j,k)]) < min_dist) {
112
+ t <- ape::drop.tip(t, t$edge[k,2])
113
+ stopit <- TRUE
114
+ break
105
115
  }
106
116
  }
107
- if(stopit) break;
117
+ if (stopit) break
108
118
  }
109
119
  }
110
- if(!quiet) cat('\n');
111
- return(t);
120
+ if (!quiet) cat("\n")
121
+ return(t)
112
122
  }
113
123
 
114
124
  #' Enveomics: Prune Iter (Internal Function)
115
125
  #'
116
126
  #' Internal function for \code{\link{enve.prune.dist}}.
117
127
  #'
118
- #' @param t A \strong{phylo} object
119
- #' @param dist Cophenetic distance matrix
120
- #' @param min_dist Minimum distance
121
- #' @param quiet If running quietly
128
+ #' @param t A \strong{phylo} object.
129
+ #' @param dist Cophenetic distance matrix.
130
+ #' @param min_dist Minimum distance.
131
+ #' @param quiet If running quietly.
132
+ #'
133
+ #' @return Returns a \strong{phylo} object.
122
134
  #'
123
135
  #' @author Luis M. Rodriguez-R [aut, cre]
124
136
  #'
125
137
  #' @export
126
138
 
127
- enve.__prune.iter <- function
128
- (t,
129
- dist,
130
- min_dist,
131
- quiet){
132
- ori_len <- length(t$tip.label);
139
+ enve.__prune.iter <- function(t, dist, min_dist, quiet) {
140
+ ori_len <- length(t$tip.label)
141
+
133
142
  # Prune
134
- if(!quiet) pb <- txtProgressBar(1, ncol(dist)-1, style=3);
135
- ignore <- c();
136
- for(i in 1:(ncol(dist)-1)){
137
- if(i %in% ignore) next;
138
- for(j in (i+1):nrow(dist)){
139
- if(dist[j, i]<min_dist){
140
- t <- ape::drop.tip(t, rownames(dist)[j]);
141
- ignore <- c(ignore, j);
142
- break;
143
+ if (!quiet) pb <- txtProgressBar(1, ncol(dist) - 1, style = 3)
144
+ ignore <- c()
145
+ for (i in 1:(ncol(dist) - 1)) {
146
+ if (i %in% ignore) next
147
+ for (j in (i + 1):nrow(dist)) {
148
+ if (dist[j, i] < min_dist) {
149
+ t <- ape::drop.tip(t, rownames(dist)[j])
150
+ ignore <- c(ignore, j)
151
+ break
143
152
  }
144
153
  }
145
- if(!quiet) setTxtProgressBar(pb, i);
154
+ if (!quiet) setTxtProgressBar(pb, i)
146
155
  }
147
- if(!quiet) cat('\n');
156
+ if(!quiet) cat("\n")
157
+
148
158
  # Check if it droped tips
149
- cur_len <- length(t$tip.label);
150
- if(cur_len == ori_len){
151
- stop("Internal error: small edge found in tree, with no equivalent in distance matrix.\n");
159
+ cur_len <- length(t$tip.label)
160
+ if (cur_len == ori_len){
161
+ stop(
162
+ "Internal error: small edge found in tree, with no equivalent in distance matrix.\n"
163
+ )
152
164
  }
153
- return(t);
165
+ return(t)
154
166
  }
155
167