miga-base 0.3.9.0 → 0.3.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/add.rb +33 -33
- data/actions/edit.rb +33 -0
- data/actions/new.rb +17 -18
- data/actions/next_step.rb +33 -0
- data/actions/run.rb +15 -12
- data/bin/miga +43 -37
- data/lib/miga/daemon.rb +2 -2
- data/lib/miga/project/result.rb +16 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +1 -3
- data/scripts/ani_distances.bash +1 -3
- data/scripts/assembly.bash +1 -3
- data/scripts/cds.bash +1 -3
- data/scripts/clade_finding.bash +1 -3
- data/scripts/d.bash +13 -0
- data/scripts/distances.bash +1 -3
- data/scripts/essential_genes.bash +1 -3
- data/scripts/haai_distances.bash +1 -3
- data/scripts/miga.bash +12 -9
- data/scripts/mytaxa.bash +1 -3
- data/scripts/mytaxa_scan.bash +1 -3
- data/scripts/ogs.bash +36 -33
- data/scripts/p.bash +23 -0
- data/scripts/project_stats.bash +1 -3
- data/scripts/read_quality.bash +1 -3
- data/scripts/ssu.bash +1 -3
- data/scripts/stats.bash +1 -3
- data/scripts/subclades.bash +1 -3
- data/scripts/taxonomy.bash +1 -3
- data/scripts/trimmed_fasta.bash +1 -3
- data/scripts/trimmed_reads.bash +1 -3
- data/test/daemon_test.rb +3 -3
- data/utils/distance/runner.rb +1 -1
- data/utils/enveomics/Docs/recplot2.md +13 -2
- data/utils/enveomics/Examples/aai-matrix.bash +3 -3
- data/utils/enveomics/Examples/ani-matrix.bash +3 -3
- data/utils/enveomics/Makefile +2 -2
- data/utils/enveomics/Manifest/Tasks/blasttab.json +12 -4
- data/utils/enveomics/Manifest/Tasks/fasta.json +135 -0
- data/utils/enveomics/Manifest/Tasks/other.json +49 -0
- data/utils/enveomics/Manifest/categories.json +4 -0
- data/utils/enveomics/Manifest/examples.json +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/BlastTab.catsbj.pl +63 -65
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -2
- data/utils/enveomics/Scripts/FastA.extract.rb +152 -0
- data/utils/enveomics/Scripts/FastA.mask.rb +89 -0
- data/utils/enveomics/Scripts/FastA.sample.rb +83 -0
- data/utils/enveomics/Scripts/GFF.catsbj.pl +127 -0
- data/utils/enveomics/Scripts/aai.rb +4 -3
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- data/utils/enveomics/enveomics.R/DESCRIPTION +1 -2
- data/utils/enveomics/enveomics.R/NAMESPACE +3 -3
- data/utils/enveomics/enveomics.R/R/recplot.R +2 -3
- data/utils/enveomics/enveomics.R/R/recplot2.R +221 -160
- data/utils/enveomics/enveomics.R/R/utils.R +19 -1
- data/utils/enveomics/enveomics.R/README.md +11 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +1 -0
- data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +13 -5
- data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +10 -8
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +1 -1
- data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +26 -0
- data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +22 -0
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +13 -7
- data/utils/enveomics/enveomics.R/man/z$-methods.Rd +3 -4
- data/utils/subclade/runner.rb +4 -0
- metadata +14 -3
@@ -0,0 +1,127 @@
|
|
1
|
+
#!/usr/bin/env perl
|
2
|
+
|
3
|
+
# @author Luis M. Rodriguez-R
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
use warnings;
|
7
|
+
use strict;
|
8
|
+
use List::Util qw/min max/;
|
9
|
+
use Getopt::Std;
|
10
|
+
|
11
|
+
sub HELP_MESSAGE { die "
|
12
|
+
|
13
|
+
Description:
|
14
|
+
Generates a list of coordinates from a GFF table concatenating the subject
|
15
|
+
sequences.
|
16
|
+
|
17
|
+
See also: BlastTab.recplot2.R and BlastTab.catsbj.pl
|
18
|
+
|
19
|
+
Usage:
|
20
|
+
$0 [options] seq.fa map.gff > abs-coords.tsv
|
21
|
+
|
22
|
+
seq.fa Subject sequences (contigs) in FastA format.
|
23
|
+
map.gff Features to map in GFF.
|
24
|
+
|
25
|
+
Options:
|
26
|
+
-L path Generate a file with the absolute coordinates of the
|
27
|
+
concatenated contigs. This is identical to the .lim file
|
28
|
+
generated by BlastTab.catsbj.pl.
|
29
|
+
-i Preserve exact coordinates and include inter-feature windows as
|
30
|
+
separate bins. By default, the coordinates are set in the
|
31
|
+
midpoint between features when non-contiguous.
|
32
|
+
-s The FastA provided is to be treated as a subset of the subject.
|
33
|
+
By default, it expects all the contigs to be present in the
|
34
|
+
BLAST.
|
35
|
+
-q Run quietly.
|
36
|
+
-h Display this message and exit.
|
37
|
+
|
38
|
+
"; }
|
39
|
+
|
40
|
+
my %o;
|
41
|
+
getopts('L:isqh', \%o);
|
42
|
+
my($fa, $map) = @ARGV;
|
43
|
+
($fa and $map) or &HELP_MESSAGE;
|
44
|
+
$o{h} and &HELP_MESSAGE;
|
45
|
+
|
46
|
+
my %seq = ();
|
47
|
+
my @seq = ();
|
48
|
+
my $tot = 0;
|
49
|
+
|
50
|
+
SEQ:{
|
51
|
+
print STDERR "== Reading reference sequences\n" unless $o{q};
|
52
|
+
open FA, "<", $fa or die "Cannot read the file: $fa: $!\n";
|
53
|
+
my $cur_seq = '';
|
54
|
+
while(<FA>){
|
55
|
+
chomp;
|
56
|
+
if(m/^>(\S+)/){
|
57
|
+
my $c = $1;
|
58
|
+
$seq{$c} = exists $seq{$cur_seq} ? $seq{$cur_seq}+1 : 1;
|
59
|
+
push @seq, $c;
|
60
|
+
$cur_seq = $c;
|
61
|
+
}else{
|
62
|
+
s/[^A-Za-z]//g;
|
63
|
+
$seq{$cur_seq} += length $_;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
close FA;
|
67
|
+
print STDERR " Found ".(scalar @seq)." sequences.\n" unless $o{q};
|
68
|
+
}
|
69
|
+
|
70
|
+
$o{L} ||= '/dev/null';
|
71
|
+
open LIM, ">", $o{L} or die "Cannot create the file: $o{L}: $!\n";
|
72
|
+
my $l = 0;
|
73
|
+
for my $s (@seq){
|
74
|
+
print LIM "$s\t".(++$l)."\t$seq{$s}\n";
|
75
|
+
($l, $seq{$s}) = ($seq{$s}, $l);
|
76
|
+
}
|
77
|
+
close LIM;
|
78
|
+
|
79
|
+
MAP: {
|
80
|
+
print STDERR "== Reading mapping\n" unless $o{q};
|
81
|
+
open GFF, "<", $map or die "Cannot read the file: $map: $!\n";
|
82
|
+
my $last_end = 1;
|
83
|
+
my $last_name = "NA";
|
84
|
+
print "1\tNA\tNA\n";
|
85
|
+
my $i = 0;
|
86
|
+
FEATURE: while(<GFF>){
|
87
|
+
next if /^\s*(#.*)?$/; # Blank or comment lines
|
88
|
+
chomp;
|
89
|
+
my @ln = split /\t/;
|
90
|
+
$ln[4] or die "Cannot parse line $map:$.: $_\n";
|
91
|
+
unless(exists $seq{$ln[0]}){
|
92
|
+
die "Cannot find the subject sequence: $ln[0]\n" unless $o{s};
|
93
|
+
next FEATURE;
|
94
|
+
}
|
95
|
+
$i++;
|
96
|
+
my $start = $seq{$ln[0]}+$ln[3];
|
97
|
+
my $end = $seq{$ln[0]}+$ln[4];
|
98
|
+
my $name = "feat_$i";
|
99
|
+
if($ln[8] =~ /^gene_id=(\d+)/){ # <- GeneMark style
|
100
|
+
$name = "gene_id_$1";
|
101
|
+
}elsif($ln[8] =~ /^ID=\d+_(\d+)/){ # <- Prodigal style
|
102
|
+
$name = $ln[0]."_".$1;
|
103
|
+
}elsif($ln[8] =~ /^ID=([^;]+)/){
|
104
|
+
$name = $1;
|
105
|
+
}
|
106
|
+
if($o{i}){
|
107
|
+
$start = $last_end if $start < $last_end;
|
108
|
+
print "$start\t$last_name~$name\tGAP\n" unless $start==$last_end;
|
109
|
+
print "$end\t$name\tFEAT\n";
|
110
|
+
}else{
|
111
|
+
my $midpoint = int(($last_end + $start)/2);
|
112
|
+
print "$last_end\t$last_name\tFEAT\n" unless $last_end==1;
|
113
|
+
}
|
114
|
+
$last_name = $name;
|
115
|
+
$last_end = $end;
|
116
|
+
}
|
117
|
+
if($last_end > 1){
|
118
|
+
if($o{i}){
|
119
|
+
print "$l\t$last_name~NA\tGAP\n" unless $last_end==$l;
|
120
|
+
}else{
|
121
|
+
print "$l\t$last_name\tFEAT\n";
|
122
|
+
}
|
123
|
+
}
|
124
|
+
close GFF;
|
125
|
+
print STDERR " done.\n" unless $o{q};
|
126
|
+
}
|
127
|
+
|
@@ -261,7 +261,8 @@ Dir.mktmpdir do |dir|
|
|
261
261
|
# Nothing to do
|
262
262
|
when "diamond"
|
263
263
|
`"#{o[:bin]}diamond" makedb --in "#{dir}/#{seq}.fa" \
|
264
|
-
--db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}"
|
264
|
+
--db "#{dir}/#{seq}.fa.dmnd" --threads "#{o[:thr]}" \
|
265
|
+
--quiet`
|
265
266
|
else
|
266
267
|
abort "Unsupported program: #{o[:program]}."
|
267
268
|
end
|
@@ -297,9 +298,9 @@ Dir.mktmpdir do |dir|
|
|
297
298
|
`sort -k 1 "#{dir}/#{i}.tab.uns" > "#{dir}/#{i}.tab"`
|
298
299
|
when "diamond"
|
299
300
|
`"#{o[:bin]}diamond" blastp --threads "#{o[:thr]}" --db "#{s}.dmnd" \
|
300
|
-
--query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" \
|
301
|
+
--query "#{q}" --sensitive --daa "#{dir}/#{i}.daa" --quiet \
|
301
302
|
&& "#{o[:bin]}diamond" view --daa "#{dir}/#{i}.daa" --outfmt 6 \
|
302
|
-
--out "#{dir}/#{i}.tab"`
|
303
|
+
--out "#{dir}/#{i}.tab" --quiet`
|
303
304
|
else
|
304
305
|
abort "Unsupported program: #{o[:program]}."
|
305
306
|
end
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Scripts/lib/../../enveomics.R
|
@@ -1,5 +1,5 @@
|
|
1
1
|
Package: enveomics.R
|
2
|
-
Version: 1.
|
2
|
+
Version: 1.4.4
|
3
3
|
Authors@R: c(person("Luis M.","Rodriguez-R",role=c("aut","cre"),
|
4
4
|
email="lmrodriguezr@gmail.com"))
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
@@ -15,7 +15,6 @@ Depends:
|
|
15
15
|
stats,
|
16
16
|
methods,
|
17
17
|
parallel,
|
18
|
-
modeest,
|
19
18
|
fitdistrplus,
|
20
19
|
sn,
|
21
20
|
investr
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import(methods)
|
2
|
-
import(modeest)
|
3
2
|
import(parallel)
|
4
3
|
import(fitdistrplus)
|
5
4
|
import(sn)
|
@@ -33,7 +32,8 @@ export(
|
|
33
32
|
enve.recplot2.findPeaks.emauto, enve.recplot2.findPeaks.em,
|
34
33
|
enve.recplot2.findPeaks.mower,
|
35
34
|
enve.recplot2.corePeak, enve.recplot2.changeCutoff,
|
36
|
-
enve.recplot2.
|
35
|
+
enve.recplot2.windowDepthThreshold, enve.recplot2.extractWindows,
|
36
|
+
enve.recplot2.compareIdentities,
|
37
37
|
enve.recplot2.coordinates, enve.recplot2.seqdepth, enve.recplot2.ANIr,
|
38
38
|
enve.growthcurve, plot.enve.GrowthCurve, summary.enve.GrowthCurve,
|
39
|
-
enve.col2alpha)
|
39
|
+
enve.col2alpha, enve.truncate)
|
@@ -55,7 +55,7 @@ enve.recplot <- structure(function(
|
|
55
55
|
ret.recplot=FALSE,
|
56
56
|
### Indicates if the matrix of the recruitment plot is to be returned.
|
57
57
|
ret.hist=FALSE,
|
58
|
-
###
|
58
|
+
### Ignored, for backwards compatibility.
|
59
59
|
ret.mode=FALSE,
|
60
60
|
### Indicates if the mode of the identity is to be computed. It requires the modeest
|
61
61
|
### package.
|
@@ -205,7 +205,6 @@ enve.recplot <- structure(function(
|
|
205
205
|
legend('bottomright', paste(id.shortname, 'histogram'), bg=rgb(1,1,1,2/3));
|
206
206
|
out <- c(out, list(id.mean=mean(rec[, id.reccol])));
|
207
207
|
out <- c(out, list(id.median=median(rec[, id.reccol])));
|
208
|
-
if(ret.mode) out <- c(out, list(id.mode=modeest::mlv(rec[, id.reccol], method='mfv')$M));
|
209
208
|
if(ret.hist) out <- c(out, list(id.hist=id.hist));
|
210
209
|
|
211
210
|
# Position histogram
|
@@ -287,7 +286,7 @@ enve.recplot <- structure(function(
|
|
287
286
|
###
|
288
287
|
### id.median: Median identity.
|
289
288
|
###
|
290
|
-
### id.mode (if ret.mode=TRUE): Mode of the identity.
|
289
|
+
### id.mode (if ret.mode=TRUE): Mode of the identity. Deprecated.
|
291
290
|
###
|
292
291
|
### id.hist (if ret.hist=TRUE): Values of the identity histogram.
|
293
292
|
###
|