miga-base 1.3.9.7 → 1.3.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +5 -4
- data/lib/miga/cli/action/ncbi_get.rb +7 -8
- data/lib/miga/remote_dataset.rb +53 -0
- data/lib/miga/version.rb +2 -2
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -3
- data/utils/enveomics/Scripts/Newick.autoprune.R +4 -3
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +23 -9
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +4 -3
- data/utils/enveomics/Scripts/TRIBS.test.R +4 -3
- data/utils/enveomics/Scripts/Table.barplot.R +20 -10
- data/utils/enveomics/Scripts/Table.df2dist.R +4 -3
- data/utils/enveomics/Scripts/Table.prefScore.R +13 -13
- data/utils/enveomics/Scripts/ogs.stats.rb +74 -63
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/R/cliopts.R +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
- metadata +2 -3
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0239e39a0588b73d042da7d970925d2d93a5334c858453e032d51b0af760fa27'
|
4
|
+
data.tar.gz: 81e6903e1feba6571d76fe5d113a60414bd0d3b1b3090d6e26367a93cf8d0da7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d68e55d5335f3da03eb9cea737aad5fa21a7a272e3958db6130e7260387844c1bed92b0b2f655a5a5133772797212b717559da9701723c9885cc9ee7cffc962f
|
7
|
+
data.tar.gz: 8d27c2f580106c0d1f74e6daaf1cb81ffd7c6fbabf86e7569342027ff9e09b1810c245ad560eff0179c3d06a88b2d1a087344c56e34a2d54c799004e2f6370c0
|
@@ -17,10 +17,7 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
17
17
|
cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
|
18
18
|
cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
|
19
19
|
cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
|
20
|
-
opt.on(
|
21
|
-
'--all',
|
22
|
-
'Download all genomes (in any status)'
|
23
|
-
) do
|
20
|
+
opt.on('--all', 'Download all genomes (in any status)') do
|
24
21
|
cli[:complete] = true
|
25
22
|
cli[:chromosome] = true
|
26
23
|
cli[:scaffold] = true
|
@@ -29,6 +26,10 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
29
26
|
opt.on('--ncbi-list-json STRING', '::HIDE::') do |v|
|
30
27
|
cli[:ncbi_list_json] = v
|
31
28
|
end
|
29
|
+
opt.on(
|
30
|
+
'--ncbi-taxonomy-dump STRING',
|
31
|
+
'Path to an NCBI Taxonomy dump directory to query instead of API calls'
|
32
|
+
) { |v| MiGA::RemoteDataset.use_ncbi_taxonomy_dump(v) }
|
32
33
|
end
|
33
34
|
|
34
35
|
def cli_name_modifiers(opt)
|
@@ -24,14 +24,13 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
24
24
|
cli_name_modifiers(opt)
|
25
25
|
cli_filters(opt)
|
26
26
|
cli_save_actions(opt)
|
27
|
-
opt.on(
|
28
|
-
|
29
|
-
'
|
30
|
-
|
31
|
-
opt.on(
|
32
|
-
'
|
33
|
-
|
34
|
-
) { |v| ENV['NCBI_API_KEY'] = v }
|
27
|
+
opt.on('--api-key STRING', '::HIDE::') do |v|
|
28
|
+
warn "The use of --api-key is deprecated, please use --ncbi-api-key"
|
29
|
+
ENV['NCBI_API_KEY'] = v
|
30
|
+
end
|
31
|
+
opt.on('--ncbi-api-key STRING', 'NCBI API key') do |v|
|
32
|
+
ENV['NCBI_API_KEY'] = v
|
33
|
+
end
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -12,6 +12,55 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
12
12
|
# Class-level
|
13
13
|
|
14
14
|
class << self
|
15
|
+
##
|
16
|
+
# Path to a directory with a recent NCBI Taxonomy dump to use instead of
|
17
|
+
# making API calls to NCBI servers, which can be obtained at:
|
18
|
+
# https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
|
19
|
+
def use_ncbi_taxonomy_dump(path)
|
20
|
+
raise "Directory doesn't exist: #{path}" unless File.directory?(path)
|
21
|
+
|
22
|
+
# Structure: { TaxID => ["name", "rank", parent TaxID] }
|
23
|
+
@ncbi_taxonomy_names = {}
|
24
|
+
|
25
|
+
# Read names.dmp
|
26
|
+
File.open(File.join(path, 'names.dmp')) do |fh|
|
27
|
+
fh.each do |ln|
|
28
|
+
row = ln.split(/\t\|\t?/)
|
29
|
+
next unless row[3] == 'scientific name'
|
30
|
+
@ncbi_taxonomy_names[row[0].to_i] = [row[1].strip]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Read nodes.dmp
|
35
|
+
File.open(File.join(path, 'nodes.dmp')) do |fh|
|
36
|
+
fh.each do |ln|
|
37
|
+
row = ln.split(/\t\|\t?/)
|
38
|
+
child = row[0].to_i
|
39
|
+
parent = row[1].to_i
|
40
|
+
@ncbi_taxonomy_names[child][1] = row[2]
|
41
|
+
@ncbi_taxonomy_names[child][2] = parent unless parent == child
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Is a local NCBI Taxonomy dump available?
|
48
|
+
def ncbi_taxonomy_dump?
|
49
|
+
(@ncbi_taxonomy_names ||= nil) ? true : false
|
50
|
+
end
|
51
|
+
|
52
|
+
##
|
53
|
+
# Get the MiGA::Taxonomy object for the lineage of the taxon with TaxID
|
54
|
+
# +id+ using the local NCBI Taxonomy dump.
|
55
|
+
def taxonomy_from_ncbi_dump(id)
|
56
|
+
MiGA::Taxonomy.new(ns: 'ncbi').tap do |tax|
|
57
|
+
while @ncbi_taxonomy_names[id]
|
58
|
+
tax << { @ncbi_taxonomy_names[id][1] => @ncbi_taxonomy_names[id][0] }
|
59
|
+
id = @ncbi_taxonomy_names[id][2]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
15
64
|
##
|
16
65
|
# Translate an NCBI Assembly Accession (+acc+) to corresponding internal
|
17
66
|
# NCBI ID, with up to +retrials+ retrials if the returned JSON document
|
@@ -173,6 +222,10 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
173
222
|
def get_ncbi_taxonomy
|
174
223
|
tax_id = get_ncbi_taxid or return
|
175
224
|
|
225
|
+
if self.class.ncbi_taxonomy_dump?
|
226
|
+
return self.class.taxonomy_from_ncbi_dump(tax_id)
|
227
|
+
end
|
228
|
+
|
176
229
|
lineage = { ns: 'ncbi' }
|
177
230
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
178
231
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 10, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 1,
|
23
|
+
VERSION_DATE = Date.new(2024, 1, 31)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
@@ -6,9 +6,10 @@
|
|
6
6
|
#= Load stuff
|
7
7
|
suppressPackageStartupMessages(library(enveomics.R))
|
8
8
|
args <- commandArgs(trailingOnly = FALSE)
|
9
|
-
enveomics_R <- file.path(
|
10
|
-
sub("^--file=", "", args[grep("^--file=", args)])),
|
11
|
-
"
|
9
|
+
enveomics_R <- file.path(
|
10
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
11
|
+
"..", "enveomics.R"
|
12
|
+
)
|
12
13
|
|
13
14
|
#= Generate interface
|
14
15
|
opt <- enve.cliopts(enve.recplot2,
|
@@ -7,9 +7,10 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
15
|
source(file.path(enveomics_R, "R", "autoprune.R"))
|
15
16
|
|
@@ -7,20 +7,34 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
library(methods)
|
14
15
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
15
16
|
source(file.path(enveomics_R, "R", "recplot2.R"))
|
16
17
|
|
17
18
|
#= Generate interface
|
18
|
-
opt <- enve.cliopts(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
opt <- enve.cliopts(
|
20
|
+
enve.recplot2.compareIdentities,
|
21
|
+
file.path(enveomics_R, "man", "enve.recplot2.compareIdentities.Rd"),
|
22
|
+
positional_arguments = 2,
|
23
|
+
usage = "usage: %prog [options] recplot-A.Rdata recplot-B.Rdata",
|
24
|
+
number = c("pseudocounts", "max.deviation"),
|
25
|
+
ignore = c("x", "y"),
|
26
|
+
p_desc = "Calculates the difference between identity distributions of two recruitment plots.",
|
27
|
+
o_desc = list(
|
28
|
+
method = paste(
|
29
|
+
"Distance method to use. This should be (an unambiguous abbreviation of)",
|
30
|
+
"one of: \"hellinger\" (Hellinger, 1090, doi:10.1515/crll.1909.136.210),",
|
31
|
+
"\"bhattacharyya\" (Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35),",
|
32
|
+
"\"kl\" or \"kullback-leibler\" (Kullback & Leibler, 1951,",
|
33
|
+
"doi:10.1214/aoms/1177729694), \"euclidean\"",
|
34
|
+
sep = "\n "
|
35
|
+
)
|
36
|
+
)
|
37
|
+
)
|
24
38
|
|
25
39
|
#= Run it!
|
26
40
|
load(opt$args[1])
|
@@ -8,9 +8,10 @@
|
|
8
8
|
#= Load stuff
|
9
9
|
suppressPackageStartupMessages(library(enveomics.R))
|
10
10
|
args <- commandArgs(trailingOnly = F)
|
11
|
-
enveomics_R <- file.path(
|
12
|
-
|
13
|
-
|
11
|
+
enveomics_R <- file.path(
|
12
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
13
|
+
"..", "enveomics.R"
|
14
|
+
)
|
14
15
|
|
15
16
|
#= Generate interface
|
16
17
|
opt <- enve.cliopts(plot.enve.TRIBStest,
|
@@ -8,9 +8,10 @@
|
|
8
8
|
#= Load stuff
|
9
9
|
suppressPackageStartupMessages(library(enveomics.R))
|
10
10
|
args <- commandArgs(trailingOnly = F)
|
11
|
-
enveomics_R <- file.path(
|
12
|
-
|
13
|
-
|
11
|
+
enveomics_R <- file.path(
|
12
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
13
|
+
"..", "enveomics.R"
|
14
|
+
)
|
14
15
|
|
15
16
|
#= Generate interface
|
16
17
|
opt <- suppressWarnings(enve.cliopts(enve.tribs,
|
@@ -7,21 +7,31 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
15
|
source(file.path(enveomics_R, "R", "utils.R"))
|
15
16
|
source(file.path(enveomics_R, "R", "barplot.R"))
|
16
17
|
|
17
18
|
#= Generate interface
|
18
|
-
opt <- enve.cliopts(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
opt <- enve.cliopts(
|
20
|
+
enve.barplot,
|
21
|
+
file.path(enveomics_R, "man", "enve.barplot.Rd"),
|
22
|
+
positional_arguments = c(1, 3),
|
23
|
+
usage = "usage: %prog [options] output.pdf [width height]",
|
24
|
+
mandatory = c("x"),
|
25
|
+
vectorize = c("sizes", "order", "col"),
|
26
|
+
number = c("sizes", "order"),
|
27
|
+
o_desc = list(
|
28
|
+
x = paste(
|
29
|
+
"A tab-delimited file containing header (first row) and row names",
|
30
|
+
"(first column)."
|
31
|
+
),
|
32
|
+
order = "If passed, the custom order to be used (as row indexes)"
|
33
|
+
)
|
34
|
+
)
|
25
35
|
|
26
36
|
#= Run it!
|
27
37
|
args = as.list(opt$args)
|
@@ -7,9 +7,10 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
15
|
source(file.path(enveomics_R, "R", "df2dist.R"))
|
15
16
|
|
@@ -3,26 +3,26 @@
|
|
3
3
|
#= Load stuff
|
4
4
|
args <- commandArgs(trailingOnly = FALSE)
|
5
5
|
enveomics_R <- file.path(
|
6
|
-
dirname(sub(
|
7
|
-
|
8
|
-
'enveomics.R'
|
6
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
7
|
+
"..", "enveomics.R"
|
9
8
|
)
|
10
|
-
for(file in c(
|
11
|
-
source(file.path(enveomics_R,
|
9
|
+
for(file in c("cliopts.R", "utils.R", "prefscore.R")) {
|
10
|
+
source(file.path(enveomics_R, "R", file))
|
11
|
+
}
|
12
12
|
|
13
13
|
#= Generate interface
|
14
14
|
opt <- enve.cliopts(
|
15
15
|
enve.prefscore,
|
16
|
-
file.path(enveomics_R,
|
16
|
+
file.path(enveomics_R, "man", "enve.prefscore.Rd"),
|
17
17
|
positional_arguments = c(1, 4),
|
18
|
-
usage =
|
19
|
-
mandatory = c(
|
20
|
-
number = c(
|
21
|
-
ignore = c(
|
18
|
+
usage = "usage: %prog [options] output.tsv [output.pdf [width height]]",
|
19
|
+
mandatory = c("x", "set"),
|
20
|
+
number = c("signif.thr"),
|
21
|
+
ignore = c("plot"),
|
22
22
|
o_desc = list(
|
23
|
-
x =
|
24
|
-
set =
|
25
|
-
ignore =
|
23
|
+
x = "A tab-delimited table of presence/absence (1/0) with species as rows and samples as columns.",
|
24
|
+
set = "A list of sample names that constitute the test set, one per line",
|
25
|
+
ignore = "A list of species to exclude from the analysis, one per line"
|
26
26
|
)
|
27
27
|
)
|
28
28
|
|
@@ -1,127 +1,138 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @license: Artistic-2.0
|
6
|
-
#
|
3
|
+
$:.push File.expand_path('../lib', __FILE__)
|
7
4
|
|
8
|
-
|
5
|
+
require 'enveomics_rb/enveomics'
|
9
6
|
require 'enveomics_rb/og'
|
10
|
-
require 'optparse'
|
11
7
|
require 'json'
|
8
|
+
$VERSION = 1.1
|
12
9
|
|
13
|
-
o = {q:false, a:false}
|
14
|
-
ARGV << '-h' if ARGV.size==0
|
10
|
+
o = { q: false, a: false }
|
15
11
|
OptionParser.new do |opts|
|
16
|
-
opts.
|
17
|
-
|
12
|
+
opts.version = $VERSION
|
13
|
+
cmd = File.basename($0)
|
14
|
+
Enveomics.opt_banner(
|
15
|
+
opts,
|
16
|
+
'Estimates some descriptive statistics on a set of Orthology Groups (OGs)',
|
17
|
+
"#{cmd} -o file.ogs [options]"
|
18
|
+
)
|
18
19
|
|
19
|
-
|
20
|
-
opts.
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
opts.separator
|
26
|
-
opts.
|
27
|
-
opts.on(
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
opts.separator 'Mandatory'
|
21
|
+
opts.on(
|
22
|
+
'-o', '--ogs FILE',
|
23
|
+
'Input file containing the precomputed OGs',
|
24
|
+
'Supports compression with .gz extension, use - for STDIN'
|
25
|
+
) { |v| o[:ogs] = v }
|
26
|
+
opts.separator ''
|
27
|
+
opts.separator 'Other Options'
|
28
|
+
opts.on(
|
29
|
+
'-j', '--json FILE', 'Output file in JSON format',
|
30
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
31
|
+
) { |v| o[:json] = v }
|
32
|
+
opts.on(
|
33
|
+
'-t', '--tab FILE', 'Output file in tabular format',
|
34
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
35
|
+
) { |v| o[:tab] = v }
|
36
|
+
opts.on(
|
37
|
+
'-T', '--transposed-tab FILE',
|
38
|
+
'Output file in transposed tabular format',
|
39
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
40
|
+
){ |v| o[:ttab] = v }
|
41
|
+
opts.on('-a', '--auto', 'Run completely quietly (no STDERR or STDOUT)') do
|
31
42
|
o[:q] = true
|
32
43
|
o[:a] = true
|
33
44
|
end
|
34
|
-
opts.on(
|
35
|
-
opts.on(
|
45
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
46
|
+
opts.on('-h', '--help', 'Display this screen') do
|
36
47
|
puts opts
|
37
48
|
exit
|
38
49
|
end
|
39
|
-
opts.separator
|
50
|
+
opts.separator ''
|
40
51
|
end.parse!
|
41
|
-
abort "-o is mandatory" if o[:ogs].nil?
|
42
52
|
|
43
|
-
|
53
|
+
raise Enveomics::OptionError.new('-i is mandatory') if o[:ogs].nil?
|
54
|
+
|
44
55
|
begin
|
45
56
|
# Initialize the collection of OGs.
|
46
57
|
collection = OGCollection.new
|
47
|
-
|
58
|
+
|
48
59
|
# Read the pre-computed OGs
|
49
60
|
$stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
|
50
|
-
f =
|
61
|
+
f = reader(o[:ogs])
|
51
62
|
h = f.gets.chomp.split /\t/
|
52
63
|
while ln = f.gets
|
53
64
|
collection << OG.new(h, ln.chomp.split(/\t/))
|
54
65
|
end
|
55
66
|
f.close
|
56
67
|
$stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
|
57
|
-
|
68
|
+
|
58
69
|
# Estimate descriptive stats
|
59
70
|
stat_name = {
|
60
|
-
genomes:
|
61
|
-
pan:
|
62
|
-
core:
|
63
|
-
core90pc:
|
64
|
-
core80pc:
|
65
|
-
unus:
|
66
|
-
avg:
|
67
|
-
avg_pan:
|
68
|
-
core_avg:
|
69
|
-
core_pan:
|
70
|
-
ogs_shannon:
|
71
|
+
genomes: 'Number of genomes',
|
72
|
+
pan: 'Pangenome (OGs)',
|
73
|
+
core: 'Core genome (OGs)',
|
74
|
+
core90pc: 'OGs in 90% of the genomes',
|
75
|
+
core80pc: 'OGs in 80% of the genomes',
|
76
|
+
unus: 'Unus genome, core genome discarding paralogs (OGs)',
|
77
|
+
avg: 'Average number of OGs in a genome',
|
78
|
+
avg_pan: 'Average genome (OGs) / Pangenome (OGs)',
|
79
|
+
core_avg: 'Core genome (OGs) / Average genome (OGs)',
|
80
|
+
core_pan: 'Core genome (OGs) / Pangenome (OGs)',
|
81
|
+
ogs_shannon: 'Entropy of the OG frequencies (bits)'
|
71
82
|
}
|
72
83
|
stats = {}
|
73
84
|
stats[:genomes] = Gene.genomes.length
|
74
85
|
stats[:pan] = collection.ogs.length
|
75
86
|
stats[:core] = collection.ogs.map do |og|
|
76
87
|
(og.genomes.length == Gene.genomes.length) ? 1 : 0
|
77
|
-
end.inject(0
|
88
|
+
end.inject(0, :+)
|
78
89
|
stats[:core90pc] = collection.ogs.map do |og|
|
79
|
-
(og.genomes.length >= 0.9*Gene.genomes.length) ? 1 : 0
|
80
|
-
end.inject(0
|
90
|
+
(og.genomes.length >= 0.9 * Gene.genomes.length) ? 1 : 0
|
91
|
+
end.inject(0, :+)
|
81
92
|
stats[:core80pc] = collection.ogs.map do |og|
|
82
|
-
(og.genomes.length >= 0.8*Gene.genomes.length) ? 1 : 0
|
83
|
-
end.inject(0
|
93
|
+
(og.genomes.length >= 0.8 * Gene.genomes.length) ? 1 : 0
|
94
|
+
end.inject(0, :+)
|
84
95
|
stats[:unus] = collection.ogs.map do |og|
|
85
96
|
(og.genomes.length != Gene.genomes.length) ? 0 :
|
86
|
-
(og.genes.all?{ |i| i.size==1 }) ? 1 : 0
|
87
|
-
end.inject(0
|
88
|
-
og_genomes
|
89
|
-
stats[:avg]
|
90
|
-
stats[:avg_pan]
|
91
|
-
stats[:core_avg] = stats[:core].to_f/stats[:avg]
|
92
|
-
stats[:core_pan] = stats[:core].to_f/stats[:pan]
|
97
|
+
(og.genes.all? { |i| i.size == 1 }) ? 1 : 0
|
98
|
+
end.inject(0, :+)
|
99
|
+
og_genomes = collection.ogs.map { |og| og.genomes.length }.inject(0, :+)
|
100
|
+
stats[:avg] = og_genomes.to_f / Gene.genomes.length
|
101
|
+
stats[:avg_pan] = stats[:avg] / stats[:pan]
|
102
|
+
stats[:core_avg] = stats[:core].to_f / stats[:avg]
|
103
|
+
stats[:core_pan] = stats[:core].to_f / stats[:pan]
|
93
104
|
stats[:ogs_shannon] = -1 * collection.ogs.map do |og|
|
94
|
-
pi = og.genomes.length.to_f/Gene.genomes.length
|
105
|
+
pi = og.genomes.length.to_f / Gene.genomes.length
|
95
106
|
pi * Math.log(pi)
|
96
|
-
end.inject(0.0
|
107
|
+
end.inject(0.0, :+)
|
97
108
|
|
98
109
|
# Show result
|
99
|
-
$stderr.puts
|
100
|
-
stats.each_pair{ |k,v| puts " #{stat_name[k]}: #{v}" } unless o[:a]
|
110
|
+
$stderr.puts 'Generating reports' unless o[:q]
|
111
|
+
stats.each_pair { |k, v| puts " #{stat_name[k]}: #{v}" } unless o[:a]
|
101
112
|
|
102
113
|
# Save results in JSON
|
103
114
|
unless o[:json].nil?
|
104
|
-
ohf =
|
105
|
-
ohf.puts
|
115
|
+
ohf = writer(o[:json])
|
116
|
+
ohf.puts(JSON.pretty_generate(stats))
|
106
117
|
ohf.close
|
107
118
|
end
|
108
119
|
|
109
120
|
# Save results in tab
|
110
121
|
unless o[:tab].nil?
|
111
|
-
ohf =
|
112
|
-
stats.each_pair{ |k,v| ohf.puts "#{k}\t#{v}" }
|
122
|
+
ohf = writer(o[:tab])
|
123
|
+
stats.each_pair { |k, v| ohf.puts "#{k}\t#{v}" }
|
113
124
|
ohf.close
|
114
125
|
end
|
115
126
|
|
116
127
|
# Save results in T(tab)
|
117
128
|
unless o[:ttab].nil?
|
118
|
-
ohf =
|
129
|
+
ohf = writer(o[:ttab])
|
119
130
|
ohf.puts stats.keys.join("\t")
|
120
131
|
ohf.puts stats.values.join("\t")
|
121
132
|
ohf.close
|
122
133
|
end
|
123
134
|
|
124
|
-
$stderr.puts
|
135
|
+
$stderr.puts 'Done' unless o[:q]
|
125
136
|
rescue => err
|
126
137
|
$stderr.puts "Exception: #{err}\n\n"
|
127
138
|
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
@@ -1,5 +1,5 @@
|
|
1
1
|
Package: enveomics.R
|
2
|
-
Version: 1.9.
|
2
|
+
Version: 1.9.1
|
3
3
|
Authors@R: c(person("Luis M.", "Rodriguez-R", role = c("aut", "cre"),
|
4
4
|
email = "lmrodriguezr@gmail.com"))
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
@@ -28,4 +28,4 @@ Suggests:
|
|
28
28
|
License: Artistic-2.0
|
29
29
|
LazyData: yes
|
30
30
|
Encoding: UTF-8
|
31
|
-
RoxygenNote: 7.1
|
31
|
+
RoxygenNote: 7.3.1
|
@@ -90,8 +90,8 @@ enve.cliopts <- function(
|
|
90
90
|
|
91
91
|
optopt <- list(help = "")
|
92
92
|
if (length(o_desc[[i]]) == 1) optopt$help <- o_desc[[i]]
|
93
|
-
if (!is.null(f[[i]]) && !suppressWarnings(is.na(f[[i]])) &&
|
94
|
-
is.logical(f[[i]])){
|
93
|
+
if (!is.null(f[[i]])[1] && !suppressWarnings(is.na(f[[i]]))[1] &&
|
94
|
+
is.logical(f[[i]])[1]){
|
95
95
|
optopt$opt_str <- paste(ifelse(f[[i]], "--no-", "--"), flag, sep = "")
|
96
96
|
optopt$action <- ifelse(f[[i]], "store_false", "store_true")
|
97
97
|
} else {
|
@@ -9,8 +9,8 @@ enve.recplot2.findPeaks.mower(
|
|
9
9
|
min.points = 10,
|
10
10
|
quant.est = c(0.002, 0.998),
|
11
11
|
mlv.opts = list(method = "parzen"),
|
12
|
-
fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start
|
13
|
-
|
12
|
+
fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start =
|
13
|
+
list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
|
14
14
|
fitdist.opts.norm = list(distr = "norm", method = "qme", probs = c(0.4, 0.6), start =
|
15
15
|
list(sd = 1), lower = c(0, -Inf)),
|
16
16
|
rm.top = 0.05,
|
@@ -17,8 +17,8 @@
|
|
17
17
|
id.lim = range(x$id.breaks),
|
18
18
|
pos.lim = range(x$pos.breaks),
|
19
19
|
pos.units = c("Mbp", "Kbp", "bp"),
|
20
|
-
mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4,
|
21
|
-
|
20
|
+
mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4, 1)
|
21
|
+
+ 0.1, `3` = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1, `4` =
|
22
22
|
c(ifelse(any(layout == 1), 1, 5), ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1, `5` =
|
23
23
|
c(5, 3, 4, 1) + 0.1, `6` = c(5, 4, 4, 2) + 0.1),
|
24
24
|
pos.splines = 0,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-01-
|
11
|
+
date: 2024-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -472,7 +472,6 @@ files:
|
|
472
472
|
- utils/enveomics/Scripts/in_silico_GA_GI.pl
|
473
473
|
- utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz
|
474
474
|
- utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz
|
475
|
-
- utils/enveomics/Scripts/lib/enveomics.R
|
476
475
|
- utils/enveomics/Scripts/lib/enveomics_rb/anir.rb
|
477
476
|
- utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb
|
478
477
|
- utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb
|
@@ -1 +0,0 @@
|
|
1
|
-
../../enveomics.R
|