miga-base 0.3.9.0 → 0.3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/add.rb +33 -33
- data/actions/edit.rb +33 -0
- data/actions/new.rb +17 -18
- data/actions/next_step.rb +33 -0
- data/actions/run.rb +15 -12
- data/bin/miga +43 -37
- data/lib/miga/daemon.rb +2 -2
- data/lib/miga/project/result.rb +16 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +1 -3
- data/scripts/ani_distances.bash +1 -3
- data/scripts/assembly.bash +1 -3
- data/scripts/cds.bash +1 -3
- data/scripts/clade_finding.bash +1 -3
- data/scripts/d.bash +13 -0
- data/scripts/distances.bash +1 -3
- data/scripts/essential_genes.bash +1 -3
- data/scripts/haai_distances.bash +1 -3
- data/scripts/miga.bash +12 -9
- data/scripts/mytaxa.bash +1 -3
- data/scripts/mytaxa_scan.bash +1 -3
- data/scripts/ogs.bash +36 -33
- data/scripts/p.bash +23 -0
- data/scripts/project_stats.bash +1 -3
- data/scripts/read_quality.bash +1 -3
- data/scripts/ssu.bash +1 -3
- data/scripts/stats.bash +1 -3
- data/scripts/subclades.bash +1 -3
- data/scripts/taxonomy.bash +1 -3
- data/scripts/trimmed_fasta.bash +1 -3
- data/scripts/trimmed_reads.bash +1 -3
- data/test/daemon_test.rb +3 -3
- data/utils/distance/runner.rb +1 -1
- data/utils/enveomics/Docs/recplot2.md +13 -2
- data/utils/enveomics/Examples/aai-matrix.bash +3 -3
- data/utils/enveomics/Examples/ani-matrix.bash +3 -3
- data/utils/enveomics/Makefile +2 -2
- data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
- data/utils/enveomics/Manifest/Tasks/other.json +49 -0
- data/utils/enveomics/Manifest/categories.json +4 -0
- data/utils/enveomics/Manifest/examples.json +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/aai.rb +4 -3
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
- data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
- data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
- data/utils/enveomics/enveomics.R/R/utils.R +19 -1
- data/utils/enveomics/enveomics.R/README.md +11 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
- data/utils/subclade/runner.rb +4 -0
- metadata +14 -3
@@ -0,0 +1,127 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
use warnings;
|
7
|
+
use strict;
|
8
|
+
use List::Util qw/min max/;
|
9
|
+
use Getopt::Std;
|
10
|
+
|
11
|
+
sub HELP_MESSAGE { die "
|
12
|
+
|
13
|
+
Description:
|
14
|
+
Generates a list of coordinates from a GFF table concatenating the subject
|
15
|
+
sequences.
|
16
|
+
|
17
|
+
See also: BlastTab.recplot2.R and BlastTab.catsbj.pl
|
18
|
+
|
19
|
+
Usage:
|
20
|
+
$0 [options] seq.fa map.gff > abs-coords.tsv
|
21
|
+
|
22
|
+
seq.fa Subject sequences (contigs) in FastA format.
|
23
|
+
map.gff Features to map in GFF.
|
24
|
+
|
25
|
+
Options:
|
26
|
+
-L path Generate a file with the absolute coordinates of the
|
27
|
+
concatenated contigs. This is identical to the .lim file
|
28
|
+
generated by BlastTab.catsbj.pl.
|
29
|
+
-i Preserve exact coordinates and include inter-feature windows as
|
30
|
+
separate bins. By default, the coordinates are set in the
|
31
|
+
midpoint between features when non-contiguous.
|
32
|
+
-s The FastA provided is to be treated as a subset of the subject.
|
33
|
+
By default, it expects all the contigs to be present in the
|
34
|
+
BLAST.
|
35
|
+
-q Run quietly.
|
36
|
+
-h Display this message and exit.
|
37
|
+
|
38
|
+
"; }
|
39
|
+
|
40
|
+
my %o;
|
41
|
+
getopts('L:isqh', \%o);
|
42
|
+
my($fa, $map) = @ARGV;
|
43
|
+
($fa and $map) or &HELP_MESSAGE;
|
44
|
+
$o{h} and &HELP_MESSAGE;
|
45
|
+
|
46
|
+
my %seq = ();
|
47
|
+
my @seq = ();
|
48
|
+
my $tot = 0;
|
49
|
+
|
50
|
+
SEQ:{
|
51
|
+
print STDERR "== Reading reference sequences\n" unless $o{q};
|
52
|
+
open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
|
53
|
+
my $cur_seq = '';
|
54
|
+
while(<FA>){
|
55
|
+
chomp;
|
56
|
+
if(m/^>(\S+)/){
|
57
|
+
my $c = $1;
|
58
|
+
$seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
|
59
|
+
push @seq, $c;
|
60
|
+
$cur_seq = $c;
|
61
|
+
}else{
|
62
|
+
s/[^A-Za-z]//g;
|
63
|
+
$seq{$cur_seq} += length $_;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
close FA;
|
67
|
+
print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
|
68
|
+
}
|
69
|
+
|
70
|
+
$o{L} ||= '/dev/null';
|
71
|
+
open LIM, ">", $o{L} or die "Cannot create the file: $o{L}: $!\n";
|
72
|
+
my $l = 0;
|
73
|
+
for my $s (@seq){
|
74
|
+
print LIM "$s\t".(++$l)."\t$seq{$s}\n";
|
75
|
+
($l, $seq{$s}) = ($seq{$s}, $l);
|
76
|
+
}
|
77
|
+
close LIM;
|
78
|
+
|
79
|
+
MAP: {
|
80
|
+
print STDERR "== Reading mapping\n" unless $o{q};
|
81
|
+
open GFF, "<", $map or die "Cannot read the file: $map: $!\n";
|
82
|
+
my $last_end = 1;
|
83
|
+
my $last_name = "NA";
|
84
|
+
print "1\tNA\tNA\n";
|
85
|
+
my $i = 0;
|
86
|
+
FEATURE: while(<GFF>){
|
87
|
+
next if /^\s*(#.*)?$/; # Blank or comment lines
|
88
|
+
chomp;
|
89
|
+
my @ln = split /\t/;
|
90
|
+
$ln[4] or die "Cannot parse line $map:$.: $_\n";
|
91
|
+
unless(exists $seq{$ln[0]}){
|
92
|
+
die "Cannot find the subject sequence: $ln[0]\n" unless $o{s};
|
93
|
+
next FEATURE;
|
94
|
+
}
|
95
|
+
$i++;
|
96
|
+
my $start = $seq{$ln[0]}+$ln[3];
|
97
|
+
my $end = $seq{$ln[0]}+$ln[4];
|
98
|
+
my $name = "feat_$i";
|
99
|
+
if($ln[8] =~ /^gene_id=(\d+)/){ # <- GeneMark style
|
100
|
+
$name = "gene_id_$1";
|
101
|
+
}elsif($ln[8] =~ /^ID=\d+_(\d+)/){ # <- Prodigal style
|
102
|
+
$name = $ln[0]."_".$1;
|
103
|
+
}elsif($ln[8] =~ /^ID=([^;]+)/){
|
104
|
+
$name = $1;
|
105
|
+
}
|
106
|
+
if($o{i}){
|
107
|
+
$start = $last_end if $start < $last_end;
|
108
|
+
print "$start\t$last_name~$name\tGAP\n" unless $start==$last_end;
|
109
|
+
print "$end\t$name\tFEAT\n";
|
110
|
+
}else{
|
111
|
+
my $midpoint = int(($last_end + $start)/2);
|
112
|
+
print "$last_end\t$last_name\tFEAT\n" unless $last_end==1;
|
113
|
+
}
|
114
|
+
$last_name = $name;
|
115
|
+
$last_end = $end;
|
116
|
+
}
|
117
|
+
if($last_end > 1){
|
118
|
+
if($o{i}){
|
119
|
+
print "$l\t$last_name~NA\tGAP\n" unless $last_end==$l;
|
120
|
+
}else{
|
121
|
+
print "$l\t$last_name\tFEAT\n";
|
122
|
+
}
|
123
|
+
}
|
124
|
+
close GFF;
|
125
|
+
print STDERR " done.\n" unless $o{q};
|
126
|
+
}
|
127
|
+
|
@@ -261,7 +261,8 @@ Dir.mktmpdir do |dir|
|
|
261
261
|
# Nothing to do
|
262
262
|
when "diamond"
|
263
263
|
`"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
|
264
|
-
--db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"
|
264
|
+
--db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}" \
|
265
|
+
--quiet`
|
265
266
|
else
|
266
267
|
abort "Unsupported program: #{o[:program]}."
|
267
268
|
end
|
@@ -297,9 +298,9 @@ Dir.mktmpdir do |dir|
|
|
297
298
|
`sort -k 1 "#{dir}/#{i}.tab.uns" > "#{dir}/#{i}.tab"`
|
298
299
|
when "diamond"
|
299
300
|
`"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
|
300
|
-
--query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" \
|
301
|
+
--query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
|
301
302
|
&& "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt 6 \
|
302
|
-
--out "#{dir}/#{i}.tab"`
|
303
|
+
--out "#{dir}/#{i}.tab" --quiet`
|
303
304
|
else
|
304
305
|
abort "Unsupported program: #{o[:program]}."
|
305
306
|
end
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Scripts/lib/../../enveomics.R
|
@@ -1,5 +1,5 @@
|
|
1
1
|
Package: enveomics.R
|
2
|
-
Version: 1.
|
2
|
+
Version: 1.4.4
|
3
3
|
Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
|
4
4
|
email="lmrodriguezr@gmail.com"))
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
@@ -15,7 +15,6 @@ Depends:
|
|
15
15
|
stats,
|
16
16
|
methods,
|
17
17
|
parallel,
|
18
|
-
modeest,
|
19
18
|
fitdistrplus,
|
20
19
|
sn,
|
21
20
|
investr
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import(methods)
|
2
|
-
import(modeest)
|
3
2
|
import(parallel)
|
4
3
|
import(fitdistrplus)
|
5
4
|
import(sn)
|
@@ -33,7 +32,8 @@ export(
|
|
33
32
|
enve.recplot2.findPeaks.emauto, enve.recplot2.findPeaks.em,
|
34
33
|
enve.recplot2.findPeaks.mower,
|
35
34
|
enve.recplot2.corePeak, enve.recplot2.changeCutoff,
|
36
|
-
enve.recplot2.
|
35
|
+
enve.recplot2.windowDepthThreshold, enve.recplot2.extractWindows,
|
36
|
+
enve.recplot2.compareIdentities,
|
37
37
|
enve.recplot2.coordinates, enve.recplot2.seqdepth, enve.recplot2.ANIr,
|
38
38
|
enve.growthcurve, plot.enve.GrowthCurve, summary.enve.GrowthCurve,
|
39
|
-
enve.col2alpha)
|
39
|
+
enve.col2alpha, enve.truncate)
|
@@ -55,7 +55,7 @@ enve.recplot <- structure(function(
|
|
55
55
|
ret.recplot=FALSE,
|
56
56
|
### Indicates if the matrix of the recruitment plot is to be returned.
|
57
57
|
ret.hist=FALSE,
|
58
|
-
###
|
58
|
+
### Ignored, for backwards compatibility.
|
59
59
|
ret.mode=FALSE,
|
60
60
|
### Indicates if the mode of the identity is to be computed. It requires the modeest
|
61
61
|
### package.
|
@@ -205,7 +205,6 @@ enve.recplot <- structure(function(
|
|
205
205
|
legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
|
206
206
|
out <- c(out, list(id.mean=mean(rec[, id.reccol])));
|
207
207
|
out <- c(out, list(id.median=median(rec[, id.reccol])));
|
208
|
-
if(ret.mode) out <- c(out, list(id.mode=modeest::mlv(rec[, id.reccol], method='mfv')$M));
|
209
208
|
if(ret.hist) out <- c(out, list(id.hist=id.hist));
|
210
209
|
|
211
210
|
# Position histogram
|
@@ -287,7 +286,7 @@ enve.recplot <- structure(function(
|
|
287
286
|
###
|
288
287
|
### id.median: Median identity.
|
289
288
|
###
|
290
|
-
### id.mode (if ret.mode=TRUE): Mode of the identity.
|
289
|
+
### id.mode (if ret.mode=TRUE): Mode of the identity. Deprecated.
|
291
290
|
###
|
292
291
|
### id.hist (if ret.hist=TRUE): Values of the identity histogram.
|
293
292
|
###
|