miga-base 0.3.9.0 → 0.3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/actions/add.rb +33 -33
  3. data/actions/edit.rb +33 -0
  4. data/actions/new.rb +17 -18
  5. data/actions/next_step.rb +33 -0
  6. data/actions/run.rb +15 -12
  7. data/bin/miga +43 -37
  8. data/lib/miga/daemon.rb +2 -2
  9. data/lib/miga/project/result.rb +16 -1
  10. data/lib/miga/version.rb +2 -2
  11. data/scripts/aai_distances.bash +1 -3
  12. data/scripts/ani_distances.bash +1 -3
  13. data/scripts/assembly.bash +1 -3
  14. data/scripts/cds.bash +1 -3
  15. data/scripts/clade_finding.bash +1 -3
  16. data/scripts/d.bash +13 -0
  17. data/scripts/distances.bash +1 -3
  18. data/scripts/essential_genes.bash +1 -3
  19. data/scripts/haai_distances.bash +1 -3
  20. data/scripts/miga.bash +12 -9
  21. data/scripts/mytaxa.bash +1 -3
  22. data/scripts/mytaxa_scan.bash +1 -3
  23. data/scripts/ogs.bash +36 -33
  24. data/scripts/p.bash +23 -0
  25. data/scripts/project_stats.bash +1 -3
  26. data/scripts/read_quality.bash +1 -3
  27. data/scripts/ssu.bash +1 -3
  28. data/scripts/stats.bash +1 -3
  29. data/scripts/subclades.bash +1 -3
  30. data/scripts/taxonomy.bash +1 -3
  31. data/scripts/trimmed_fasta.bash +1 -3
  32. data/scripts/trimmed_reads.bash +1 -3
  33. data/test/daemon_test.rb +3 -3
  34. data/utils/distance/runner.rb +1 -1
  35. data/utils/enveomics/Docs/recplot2.md +13 -2
  36. data/utils/enveomics/Examples/aai-matrix.bash +3 -3
  37. data/utils/enveomics/Examples/ani-matrix.bash +3 -3
  38. data/utils/enveomics/Makefile +2 -2
  39. data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
  40. data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
  41. data/utils/enveomics/Manifest/Tasks/other.json +49 -0
  42. data/utils/enveomics/Manifest/categories.json +4 -0
  43. data/utils/enveomics/Manifest/examples.json +1 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
  47. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
  48. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
  49. data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
  50. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  51. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  52. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  53. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  54. data/utils/enveomics/Scripts/aai.rb +4 -3
  55. data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
  56. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
  57. data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
  58. data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
  59. data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
  60. data/utils/enveomics/enveomics.R/R/utils.R +19 -1
  61. data/utils/enveomics/enveomics.R/README.md +11 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
  64. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
  65. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
  66. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
  68. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
  69. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
  70. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
  71. data/utils/subclade/runner.rb +4 -0
  72. metadata +14 -3
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # @author Luis M. Rodriguez-R
4
+ # @license Artistic-2.0
5
+
6
+ use warnings;
7
+ use strict;
8
+ use List::Util qw/min max/;
9
+ use Getopt::Std;
10
+
11
+ sub HELP_MESSAGE { die "
12
+
13
+ Description:
14
+ Generates a list of coordinates from a GFF table concatenating the subject
15
+ sequences.
16
+
17
+ See also: BlastTab.recplot2.R and BlastTab.catsbj.pl
18
+
19
+ Usage:
20
+ $0 [options] seq.fa map.gff > abs-coords.tsv
21
+
22
+ seq.fa Subject sequences (contigs) in FastA format.
23
+ map.gff Features to map in GFF.
24
+
25
+ Options:
26
+ -L path Generate a file with the absolute coordinates of the
27
+ concatenated contigs. This is identical to the .lim file
28
+ generated by BlastTab.catsbj.pl.
29
+ -i Preserve exact coordinates and include inter-feature windows as
30
+ separate bins. By default, the coordinates are set in the
31
+ midpoint between features when non-contiguous.
32
+ -s The FastA provided is to be treated as a subset of the subject.
33
+ By default, it expects all the contigs to be present in the
34
+ BLAST.
35
+ -q Run quietly.
36
+ -h Display this message and exit.
37
+
38
+ "; }
39
+
40
+ my %o;
41
+ getopts('L:isqh', \%o);
42
+ my($fa, $map) = @ARGV;
43
+ ($fa and $map) or &HELP_MESSAGE;
44
+ $o{h} and &HELP_MESSAGE;
45
+
46
+ my %seq = ();
47
+ my @seq = ();
48
+ my $tot = 0;
49
+
50
+ SEQ:{
51
+ print STDERR "== Reading reference sequences\n" unless $o{q};
52
+ open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
53
+ my $cur_seq = '';
54
+ while(<FA>){
55
+ chomp;
56
+ if(m/^>(\S+)/){
57
+ my $c = $1;
58
+ $seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
59
+ push @seq, $c;
60
+ $cur_seq = $c;
61
+ }else{
62
+ s/[^A-Za-z]//g;
63
+ $seq{$cur_seq} += length $_;
64
+ }
65
+ }
66
+ close FA;
67
+ print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
68
+ }
69
+
70
+ $o{L} ||= '/dev/null';
71
+ open LIM, ">", $o{L} or die "Cannot create the file: $o{L}: $!\n";
72
+ my $l = 0;
73
+ for my $s (@seq){
74
+ print LIM "$s\t".(++$l)."\t$seq{$s}\n";
75
+ ($l, $seq{$s}) = ($seq{$s}, $l);
76
+ }
77
+ close LIM;
78
+
79
+ MAP: {
80
+ print STDERR "== Reading mapping\n" unless $o{q};
81
+ open GFF, "<", $map or die "Cannot read the file: $map: $!\n";
82
+ my $last_end = 1;
83
+ my $last_name = "NA";
84
+ print "1\tNA\tNA\n";
85
+ my $i = 0;
86
+ FEATURE: while(<GFF>){
87
+ next if /^\s*(#.*)?$/; # Blank or comment lines
88
+ chomp;
89
+ my @ln = split /\t/;
90
+ $ln[4] or die "Cannot parse line $map:$.: $_\n";
91
+ unless(exists $seq{$ln[0]}){
92
+ die "Cannot find the subject sequence: $ln[0]\n" unless $o{s};
93
+ next FEATURE;
94
+ }
95
+ $i++;
96
+ my $start = $seq{$ln[0]}+$ln[3];
97
+ my $end = $seq{$ln[0]}+$ln[4];
98
+ my $name = "feat_$i";
99
+ if($ln[8] =~ /^gene_id=(\d+)/){ # <- GeneMark style
100
+ $name = "gene_id_$1";
101
+ }elsif($ln[8] =~ /^ID=\d+_(\d+)/){ # <- Prodigal style
102
+ $name = $ln[0]."_".$1;
103
+ }elsif($ln[8] =~ /^ID=([^;]+)/){
104
+ $name = $1;
105
+ }
106
+ if($o{i}){
107
+ $start = $last_end if $start < $last_end;
108
+ print "$start\t$last_name~$name\tGAP\n" unless $start==$last_end;
109
+ print "$end\t$name\tFEAT\n";
110
+ }else{
111
+ my $midpoint = int(($last_end + $start)/2);
112
+ print "$last_end\t$last_name\tFEAT\n" unless $last_end==1;
113
+ }
114
+ $last_name = $name;
115
+ $last_end = $end;
116
+ }
117
+ if($last_end > 1){
118
+ if($o{i}){
119
+ print "$l\t$last_name~NA\tGAP\n" unless $last_end==$l;
120
+ }else{
121
+ print "$l\t$last_name\tFEAT\n";
122
+ }
123
+ }
124
+ close GFF;
125
+ print STDERR " done.\n" unless $o{q};
126
+ }
127
+
@@ -261,7 +261,8 @@ Dir.mktmpdir do |dir|
261
261
  # Nothing to do
262
262
  when "diamond"
263
263
  `"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
264
- --db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"`
264
+ --db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}" \
265
+ --quiet`
265
266
  else
266
267
  abort "Unsupported program: #{o[:program]}."
267
268
  end
@@ -297,9 +298,9 @@ Dir.mktmpdir do |dir|
297
298
  `sort -k 1 "#{dir}/#{i}.tab.uns" > "#{dir}/#{i}.tab"`
298
299
  when "diamond"
299
300
  `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
300
- --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" \
301
+ --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
301
302
  && "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt 6 \
302
- --out "#{dir}/#{i}.tab"`
303
+ --out "#{dir}/#{i}.tab" --quiet`
303
304
  else
304
305
  abort "Unsupported program: #{o[:program]}."
305
306
  end
@@ -1 +1 @@
1
- ../../enveomics.R
1
+ utils/enveomics/Scripts/lib/../../enveomics.R
@@ -1,5 +1,5 @@
1
1
  Package: enveomics.R
2
- Version: 1.3
2
+ Version: 1.4.4
3
3
  Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
4
4
  email="lmrodriguezr@gmail.com"))
5
5
  Title: Various Utilities for Microbial Genomics and Metagenomics
@@ -15,7 +15,6 @@ Depends:
15
15
  stats,
16
16
  methods,
17
17
  parallel,
18
- modeest,
19
18
  fitdistrplus,
20
19
  sn,
21
20
  investr
@@ -1,5 +1,4 @@
1
1
  import(methods)
2
- import(modeest)
3
2
  import(parallel)
4
3
  import(fitdistrplus)
5
4
  import(sn)
@@ -33,7 +32,8 @@ export(
33
32
  enve.recplot2.findPeaks.emauto, enve.recplot2.findPeaks.em,
34
33
  enve.recplot2.findPeaks.mower,
35
34
  enve.recplot2.corePeak, enve.recplot2.changeCutoff,
36
- enve.recplot2.extractWindows, enve.recplot2.compareIdentities,
35
+ enve.recplot2.windowDepthThreshold, enve.recplot2.extractWindows,
36
+ enve.recplot2.compareIdentities,
37
37
  enve.recplot2.coordinates, enve.recplot2.seqdepth, enve.recplot2.ANIr,
38
38
  enve.growthcurve, plot.enve.GrowthCurve, summary.enve.GrowthCurve,
39
- enve.col2alpha)
39
+ enve.col2alpha, enve.truncate)
@@ -55,7 +55,7 @@ enve.recplot <- structure(function(
55
55
  ret.recplot=FALSE,
56
56
  ### Indicates if the matrix of the recruitment plot is to be returned.
57
57
  ret.hist=FALSE,
58
- ### Indicates if the vectors of the identity and position histograms are to be returned.
58
+ ### Ignored, for backwards compatibility.
59
59
  ret.mode=FALSE,
60
60
  ### Indicates if the mode of the identity is to be computed. It requires the modeest
61
61
  ### package.
@@ -205,7 +205,6 @@ enve.recplot <- structure(function(
205
205
  legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
206
206
  out <- c(out, list(id.mean=mean(rec[, id.reccol])));
207
207
  out <- c(out, list(id.median=median(rec[, id.reccol])));
208
- if(ret.mode) out <- c(out, list(id.mode=modeest::mlv(rec[, id.reccol], method='mfv')$M));
209
208
  if(ret.hist) out <- c(out, list(id.hist=id.hist));
210
209
 
211
210
  # Position histogram
@@ -287,7 +286,7 @@ enve.recplot <- structure(function(
287
286
  ###
288
287
  ### id.median: Median identity.
289
288
  ###
290
- ### id.mode (if ret.mode=TRUE): Mode of the identity.
289
+ ### id.mode (if ret.mode=TRUE): Mode of the identity. Deprecated.
291
290
  ###
292
291
  ### id.hist (if ret.hist=TRUE): Values of the identity histogram.
293
292
  ###