miga-base 0.3.9.0 → 0.3.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/actions/add.rb +33 -33
  3. data/actions/edit.rb +33 -0
  4. data/actions/new.rb +17 -18
  5. data/actions/next_step.rb +33 -0
  6. data/actions/run.rb +15 -12
  7. data/bin/miga +43 -37
  8. data/lib/miga/daemon.rb +2 -2
  9. data/lib/miga/project/result.rb +16 -1
  10. data/lib/miga/version.rb +2 -2
  11. data/scripts/aai_distances.bash +1 -3
  12. data/scripts/ani_distances.bash +1 -3
  13. data/scripts/assembly.bash +1 -3
  14. data/scripts/cds.bash +1 -3
  15. data/scripts/clade_finding.bash +1 -3
  16. data/scripts/d.bash +13 -0
  17. data/scripts/distances.bash +1 -3
  18. data/scripts/essential_genes.bash +1 -3
  19. data/scripts/haai_distances.bash +1 -3
  20. data/scripts/miga.bash +12 -9
  21. data/scripts/mytaxa.bash +1 -3
  22. data/scripts/mytaxa_scan.bash +1 -3
  23. data/scripts/ogs.bash +36 -33
  24. data/scripts/p.bash +23 -0
  25. data/scripts/project_stats.bash +1 -3
  26. data/scripts/read_quality.bash +1 -3
  27. data/scripts/ssu.bash +1 -3
  28. data/scripts/stats.bash +1 -3
  29. data/scripts/subclades.bash +1 -3
  30. data/scripts/taxonomy.bash +1 -3
  31. data/scripts/trimmed_fasta.bash +1 -3
  32. data/scripts/trimmed_reads.bash +1 -3
  33. data/test/daemon_test.rb +3 -3
  34. data/utils/distance/runner.rb +1 -1
  35. data/utils/enveomics/Docs/recplot2.md +13 -2
  36. data/utils/enveomics/Examples/aai-matrix.bash +3 -3
  37. data/utils/enveomics/Examples/ani-matrix.bash +3 -3
  38. data/utils/enveomics/Makefile +2 -2
  39. data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
  40. data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
  41. data/utils/enveomics/Manifest/Tasks/other.json +49 -0
  42. data/utils/enveomics/Manifest/categories.json +4 -0
  43. data/utils/enveomics/Manifest/examples.json +1 -1
  44. data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
  45. data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
  46. data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
  47. data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
  48. data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
  49. data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
  50. data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
  51. data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
  52. data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
  53. data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
  54. data/utils/enveomics/Scripts/aai.rb +4 -3
  55. data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
  56. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
  57. data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
  58. data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
  59. data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
  60. data/utils/enveomics/enveomics.R/R/utils.R +19 -1
  61. data/utils/enveomics/enveomics.R/README.md +11 -0
  62. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
  63. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
  64. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
  65. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
  66. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
  67. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
  68. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
  69. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
  70. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
  71. data/utils/subclade/runner.rb +4 -0
  72. metadata +14 -3
@@ -0,0 +1,127 @@
1
+ #!/usr/bin/env perl
2
+
3
+ # @author Luis M. Rodriguez-R
4
+ # @license Artistic-2.0
5
+
6
+ use warnings;
7
+ use strict;
8
+ use List::Util qw/min max/;
9
+ use Getopt::Std;
10
+
11
+ sub HELP_MESSAGE { die "
12
+
13
+ Description:
14
+ Generates a list of coordinates from a GFF table concatenating the subject
15
+ sequences.
16
+
17
+ See also: BlastTab.recplot2.R and BlastTab.catsbj.pl
18
+
19
+ Usage:
20
+ $0 [options] seq.fa map.gff > abs-coords.tsv
21
+
22
+ seq.fa Subject sequences (contigs) in FastA format.
23
+ map.gff Features to map in GFF.
24
+
25
+ Options:
26
+ -L path Generate a file with the absolute coordinates of the
27
+ concatenated contigs. This is identical to the .lim file
28
+ generated by BlastTab.catsbj.pl.
29
+ -i Preserve exact coordinates and include inter-feature windows as
30
+ separate bins. By default, the coordinates are set in the
31
+ midpoint between features when non-contiguous.
32
+ -s The FastA provided is to be treated as a subset of the subject.
33
+ By default, it expects all the contigs to be present in the
34
+ BLAST.
35
+ -q Run quietly.
36
+ -h Display this message and exit.
37
+
38
+ "; }
39
+
40
+ my %o;
41
+ getopts('L:isqh', \%o);
42
+ my($fa, $map) = @ARGV;
43
+ ($fa and $map) or &HELP_MESSAGE;
44
+ $o{h} and &HELP_MESSAGE;
45
+
46
+ my %seq = ();
47
+ my @seq = ();
48
+ my $tot = 0;
49
+
50
+ SEQ:{
51
+ print STDERR "== Reading reference sequences\n" unless $o{q};
52
+ open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
53
+ my $cur_seq = '';
54
+ while(<FA>){
55
+ chomp;
56
+ if(m/^>(\S+)/){
57
+ my $c = $1;
58
+ $seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
59
+ push @seq, $c;
60
+ $cur_seq = $c;
61
+ }else{
62
+ s/[^A-Za-z]//g;
63
+ $seq{$cur_seq} += length $_;
64
+ }
65
+ }
66
+ close FA;
67
+ print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
68
+ }
69
+
70
+ $o{L} ||= '/dev/null';
71
+ open LIM, ">", $o{L} or die "Cannot create the file: $o{L}: $!\n";
72
+ my $l = 0;
73
+ for my $s (@seq){
74
+ print LIM "$s\t".(++$l)."\t$seq{$s}\n";
75
+ ($l, $seq{$s}) = ($seq{$s}, $l);
76
+ }
77
+ close LIM;
78
+
79
+ MAP: {
80
+ print STDERR "== Reading mapping\n" unless $o{q};
81
+ open GFF, "<", $map or die "Cannot read the file: $map: $!\n";
82
+ my $last_end = 1;
83
+ my $last_name = "NA";
84
+ print "1\tNA\tNA\n";
85
+ my $i = 0;
86
+ FEATURE: while(<GFF>){
87
+ next if /^\s*(#.*)?$/; # Blank or comment lines
88
+ chomp;
89
+ my @ln = split /\t/;
90
+ $ln[4] or die "Cannot parse line $map:$.: $_\n";
91
+ unless(exists $seq{$ln[0]}){
92
+ die "Cannot find the subject sequence: $ln[0]\n" unless $o{s};
93
+ next FEATURE;
94
+ }
95
+ $i++;
96
+ my $start = $seq{$ln[0]}+$ln[3];
97
+ my $end = $seq{$ln[0]}+$ln[4];
98
+ my $name = "feat_$i";
99
+ if($ln[8] =~ /^gene_id=(\d+)/){ # <- GeneMark style
100
+ $name = "gene_id_$1";
101
+ }elsif($ln[8] =~ /^ID=\d+_(\d+)/){ # <- Prodigal style
102
+ $name = $ln[0]."_".$1;
103
+ }elsif($ln[8] =~ /^ID=([^;]+)/){
104
+ $name = $1;
105
+ }
106
+ if($o{i}){
107
+ $start = $last_end if $start < $last_end;
108
+ print "$start\t$last_name~$name\tGAP\n" unless $start==$last_end;
109
+ print "$end\t$name\tFEAT\n";
110
+ }else{
111
+ my $midpoint = int(($last_end + $start)/2);
112
+ print "$last_end\t$last_name\tFEAT\n" unless $last_end==1;
113
+ }
114
+ $last_name = $name;
115
+ $last_end = $end;
116
+ }
117
+ if($last_end > 1){
118
+ if($o{i}){
119
+ print "$l\t$last_name~NA\tGAP\n" unless $last_end==$l;
120
+ }else{
121
+ print "$l\t$last_name\tFEAT\n";
122
+ }
123
+ }
124
+ close GFF;
125
+ print STDERR " done.\n" unless $o{q};
126
+ }
127
+
@@ -261,7 +261,8 @@ Dir.mktmpdir do |dir|
261
261
  # Nothing to do
262
262
  when "diamond"
263
263
  `"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
264
- --db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"`
264
+ --db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}" \
265
+ --quiet`
265
266
  else
266
267
  abort "Unsupported program: #{o[:program]}."
267
268
  end
@@ -297,9 +298,9 @@ Dir.mktmpdir do |dir|
297
298
  `sort -k 1 "#{dir}/#{i}.tab.uns" > "#{dir}/#{i}.tab"`
298
299
  when "diamond"
299
300
  `"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
300
- --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" \
301
+ --query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
301
302
  && "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt 6 \
302
- --out "#{dir}/#{i}.tab"`
303
+ --out "#{dir}/#{i}.tab" --quiet`
303
304
  else
304
305
  abort "Unsupported program: #{o[:program]}."
305
306
  end
@@ -1 +1 @@
1
- ../../enveomics.R
1
+ utils/enveomics/Scripts/lib/../../enveomics.R
@@ -1,5 +1,5 @@
1
1
  Package: enveomics.R
2
- Version: 1.3
2
+ Version: 1.4.4
3
3
  Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
4
4
  email="lmrodriguezr@gmail.com"))
5
5
  Title: Various Utilities for Microbial Genomics and Metagenomics
@@ -15,7 +15,6 @@ Depends:
15
15
  stats,
16
16
  methods,
17
17
  parallel,
18
- modeest,
19
18
  fitdistrplus,
20
19
  sn,
21
20
  investr
@@ -1,5 +1,4 @@
1
1
  import(methods)
2
- import(modeest)
3
2
  import(parallel)
4
3
  import(fitdistrplus)
5
4
  import(sn)
@@ -33,7 +32,8 @@ export(
33
32
  enve.recplot2.findPeaks.emauto, enve.recplot2.findPeaks.em,
34
33
  enve.recplot2.findPeaks.mower,
35
34
  enve.recplot2.corePeak, enve.recplot2.changeCutoff,
36
- enve.recplot2.extractWindows, enve.recplot2.compareIdentities,
35
+ enve.recplot2.windowDepthThreshold, enve.recplot2.extractWindows,
36
+ enve.recplot2.compareIdentities,
37
37
  enve.recplot2.coordinates, enve.recplot2.seqdepth, enve.recplot2.ANIr,
38
38
  enve.growthcurve, plot.enve.GrowthCurve, summary.enve.GrowthCurve,
39
- enve.col2alpha)
39
+ enve.col2alpha, enve.truncate)
@@ -55,7 +55,7 @@ enve.recplot <- structure(function(
55
55
  ret.recplot=FALSE,
56
56
  ### Indicates if the matrix of the recruitment plot is to be returned.
57
57
  ret.hist=FALSE,
58
- ### Indicates if the vectors of the identity and position histograms are to be returned.
58
+ ### Ignored, for backwards compatibility.
59
59
  ret.mode=FALSE,
60
60
  ### Indicates if the mode of the identity is to be computed. It requires the modeest
61
61
  ### package.
@@ -205,7 +205,6 @@ enve.recplot <- structure(function(
205
205
  legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
206
206
  out <- c(out, list(id.mean=mean(rec[, id.reccol])));
207
207
  out <- c(out, list(id.median=median(rec[, id.reccol])));
208
- if(ret.mode) out <- c(out, list(id.mode=modeest::mlv(rec[, id.reccol], method='mfv')$M));
209
208
  if(ret.hist) out <- c(out, list(id.hist=id.hist));
210
209
 
211
210
  # Position histogram
@@ -287,7 +286,7 @@ enve.recplot <- structure(function(
287
286
  ###
288
287
  ### id.median: Median identity.
289
288
  ###
290
- ### id.mode (if ret.mode=TRUE): Mode of the identity.
289
+ ### id.mode (if ret.mode=TRUE): Mode of the identity. Deprecated.
291
290
  ###
292
291
  ### id.hist (if ret.hist=TRUE): Values of the identity histogram.
293
292
  ###