miga-base 1.3.9.7 → 1.3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +5 -4
- data/lib/miga/cli/action/ncbi_get.rb +7 -8
- data/lib/miga/remote_dataset.rb +53 -0
- data/lib/miga/version.rb +2 -2
- data/utils/enveomics/Scripts/BlastTab.recplot2.R +4 -3
- data/utils/enveomics/Scripts/Newick.autoprune.R +4 -3
- data/utils/enveomics/Scripts/RecPlot2.compareIdentities.R +23 -9
- data/utils/enveomics/Scripts/TRIBS.plot-test.R +4 -3
- data/utils/enveomics/Scripts/TRIBS.test.R +4 -3
- data/utils/enveomics/Scripts/Table.barplot.R +20 -10
- data/utils/enveomics/Scripts/Table.df2dist.R +4 -3
- data/utils/enveomics/Scripts/Table.prefScore.R +13 -13
- data/utils/enveomics/Scripts/ogs.stats.rb +74 -63
- data/utils/enveomics/enveomics.R/DESCRIPTION +2 -2
- data/utils/enveomics/enveomics.R/R/cliopts.R +2 -2
- data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +2 -2
- data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +2 -2
- metadata +2 -3
- data/utils/enveomics/Scripts/lib/enveomics.R +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0239e39a0588b73d042da7d970925d2d93a5334c858453e032d51b0af760fa27'
|
4
|
+
data.tar.gz: 81e6903e1feba6571d76fe5d113a60414bd0d3b1b3090d6e26367a93cf8d0da7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d68e55d5335f3da03eb9cea737aad5fa21a7a272e3958db6130e7260387844c1bed92b0b2f655a5a5133772797212b717559da9701723c9885cc9ee7cffc962f
|
7
|
+
data.tar.gz: 8d27c2f580106c0d1f74e6daaf1cb81ffd7c6fbabf86e7569342027ff9e09b1810c245ad560eff0179c3d06a88b2d1a087344c56e34a2d54c799004e2f6370c0
|
@@ -17,10 +17,7 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
17
17
|
cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
|
18
18
|
cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
|
19
19
|
cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
|
20
|
-
opt.on(
|
21
|
-
'--all',
|
22
|
-
'Download all genomes (in any status)'
|
23
|
-
) do
|
20
|
+
opt.on('--all', 'Download all genomes (in any status)') do
|
24
21
|
cli[:complete] = true
|
25
22
|
cli[:chromosome] = true
|
26
23
|
cli[:scaffold] = true
|
@@ -29,6 +26,10 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
29
26
|
opt.on('--ncbi-list-json STRING', '::HIDE::') do |v|
|
30
27
|
cli[:ncbi_list_json] = v
|
31
28
|
end
|
29
|
+
opt.on(
|
30
|
+
'--ncbi-taxonomy-dump STRING',
|
31
|
+
'Path to an NCBI Taxonomy dump directory to query instead of API calls'
|
32
|
+
) { |v| MiGA::RemoteDataset.use_ncbi_taxonomy_dump(v) }
|
32
33
|
end
|
33
34
|
|
34
35
|
def cli_name_modifiers(opt)
|
@@ -24,14 +24,13 @@ class MiGA::Cli::Action::NcbiGet < MiGA::Cli::Action
|
|
24
24
|
cli_name_modifiers(opt)
|
25
25
|
cli_filters(opt)
|
26
26
|
cli_save_actions(opt)
|
27
|
-
opt.on(
|
28
|
-
|
29
|
-
'
|
30
|
-
|
31
|
-
opt.on(
|
32
|
-
'
|
33
|
-
|
34
|
-
) { |v| ENV['NCBI_API_KEY'] = v }
|
27
|
+
opt.on('--api-key STRING', '::HIDE::') do |v|
|
28
|
+
warn "The use of --api-key is deprecated, please use --ncbi-api-key"
|
29
|
+
ENV['NCBI_API_KEY'] = v
|
30
|
+
end
|
31
|
+
opt.on('--ncbi-api-key STRING', 'NCBI API key') do |v|
|
32
|
+
ENV['NCBI_API_KEY'] = v
|
33
|
+
end
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -12,6 +12,55 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
12
12
|
# Class-level
|
13
13
|
|
14
14
|
class << self
|
15
|
+
##
|
16
|
+
# Path to a directory with a recent NCBI Taxonomy dump to use instead of
|
17
|
+
# making API calls to NCBI servers, which can be obtained at:
|
18
|
+
# https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
|
19
|
+
def use_ncbi_taxonomy_dump(path)
|
20
|
+
raise "Directory doesn't exist: #{path}" unless File.directory?(path)
|
21
|
+
|
22
|
+
# Structure: { TaxID => ["name", "rank", parent TaxID] }
|
23
|
+
@ncbi_taxonomy_names = {}
|
24
|
+
|
25
|
+
# Read names.dmp
|
26
|
+
File.open(File.join(path, 'names.dmp')) do |fh|
|
27
|
+
fh.each do |ln|
|
28
|
+
row = ln.split(/\t\|\t?/)
|
29
|
+
next unless row[3] == 'scientific name'
|
30
|
+
@ncbi_taxonomy_names[row[0].to_i] = [row[1].strip]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Read nodes.dmp
|
35
|
+
File.open(File.join(path, 'nodes.dmp')) do |fh|
|
36
|
+
fh.each do |ln|
|
37
|
+
row = ln.split(/\t\|\t?/)
|
38
|
+
child = row[0].to_i
|
39
|
+
parent = row[1].to_i
|
40
|
+
@ncbi_taxonomy_names[child][1] = row[2]
|
41
|
+
@ncbi_taxonomy_names[child][2] = parent unless parent == child
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Is a local NCBI Taxonomy dump available?
|
48
|
+
def ncbi_taxonomy_dump?
|
49
|
+
(@ncbi_taxonomy_names ||= nil) ? true : false
|
50
|
+
end
|
51
|
+
|
52
|
+
##
|
53
|
+
# Get the MiGA::Taxonomy object for the lineage of the taxon with TaxID
|
54
|
+
# +id+ using the local NCBI Taxonomy dump.
|
55
|
+
def taxonomy_from_ncbi_dump(id)
|
56
|
+
MiGA::Taxonomy.new(ns: 'ncbi').tap do |tax|
|
57
|
+
while @ncbi_taxonomy_names[id]
|
58
|
+
tax << { @ncbi_taxonomy_names[id][1] => @ncbi_taxonomy_names[id][0] }
|
59
|
+
id = @ncbi_taxonomy_names[id][2]
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
15
64
|
##
|
16
65
|
# Translate an NCBI Assembly Accession (+acc+) to corresponding internal
|
17
66
|
# NCBI ID, with up to +retrials+ retrials if the returned JSON document
|
@@ -173,6 +222,10 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
173
222
|
def get_ncbi_taxonomy
|
174
223
|
tax_id = get_ncbi_taxid or return
|
175
224
|
|
225
|
+
if self.class.ncbi_taxonomy_dump?
|
226
|
+
return self.class.taxonomy_from_ncbi_dump(tax_id)
|
227
|
+
end
|
228
|
+
|
176
229
|
lineage = { ns: 'ncbi' }
|
177
230
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
178
231
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 10, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 1,
|
23
|
+
VERSION_DATE = Date.new(2024, 1, 31)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
@@ -6,9 +6,10 @@
|
|
6
6
|
#= Load stuff
|
7
7
|
suppressPackageStartupMessages(library(enveomics.R))
|
8
8
|
args <- commandArgs(trailingOnly = FALSE)
|
9
|
-
enveomics_R <- file.path(
|
10
|
-
sub("^--file=", "", args[grep("^--file=", args)])),
|
11
|
-
"
|
9
|
+
enveomics_R <- file.path(
|
10
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
11
|
+
"..", "enveomics.R"
|
12
|
+
)
|
12
13
|
|
13
14
|
#= Generate interface
|
14
15
|
opt <- enve.cliopts(enve.recplot2,
|
@@ -7,9 +7,10 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
15
|
source(file.path(enveomics_R, "R", "autoprune.R"))
|
15
16
|
|
@@ -7,20 +7,34 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
library(methods)
|
14
15
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
15
16
|
source(file.path(enveomics_R, "R", "recplot2.R"))
|
16
17
|
|
17
18
|
#= Generate interface
|
18
|
-
opt <- enve.cliopts(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
opt <- enve.cliopts(
|
20
|
+
enve.recplot2.compareIdentities,
|
21
|
+
file.path(enveomics_R, "man", "enve.recplot2.compareIdentities.Rd"),
|
22
|
+
positional_arguments = 2,
|
23
|
+
usage = "usage: %prog [options] recplot-A.Rdata recplot-B.Rdata",
|
24
|
+
number = c("pseudocounts", "max.deviation"),
|
25
|
+
ignore = c("x", "y"),
|
26
|
+
p_desc = "Calculates the difference between identity distributions of two recruitment plots.",
|
27
|
+
o_desc = list(
|
28
|
+
method = paste(
|
29
|
+
"Distance method to use. This should be (an unambiguous abbreviation of)",
|
30
|
+
"one of: \"hellinger\" (Hellinger, 1090, doi:10.1515/crll.1909.136.210),",
|
31
|
+
"\"bhattacharyya\" (Bhattacharyya, 1943, Bull. Calcutta Math. Soc. 35),",
|
32
|
+
"\"kl\" or \"kullback-leibler\" (Kullback & Leibler, 1951,",
|
33
|
+
"doi:10.1214/aoms/1177729694), \"euclidean\"",
|
34
|
+
sep = "\n "
|
35
|
+
)
|
36
|
+
)
|
37
|
+
)
|
24
38
|
|
25
39
|
#= Run it!
|
26
40
|
load(opt$args[1])
|
@@ -8,9 +8,10 @@
|
|
8
8
|
#= Load stuff
|
9
9
|
suppressPackageStartupMessages(library(enveomics.R))
|
10
10
|
args <- commandArgs(trailingOnly = F)
|
11
|
-
enveomics_R <- file.path(
|
12
|
-
|
13
|
-
|
11
|
+
enveomics_R <- file.path(
|
12
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
13
|
+
"..", "enveomics.R"
|
14
|
+
)
|
14
15
|
|
15
16
|
#= Generate interface
|
16
17
|
opt <- enve.cliopts(plot.enve.TRIBStest,
|
@@ -8,9 +8,10 @@
|
|
8
8
|
#= Load stuff
|
9
9
|
suppressPackageStartupMessages(library(enveomics.R))
|
10
10
|
args <- commandArgs(trailingOnly = F)
|
11
|
-
enveomics_R <- file.path(
|
12
|
-
|
13
|
-
|
11
|
+
enveomics_R <- file.path(
|
12
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
13
|
+
"..", "enveomics.R"
|
14
|
+
)
|
14
15
|
|
15
16
|
#= Generate interface
|
16
17
|
opt <- suppressWarnings(enve.cliopts(enve.tribs,
|
@@ -7,21 +7,31 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
15
|
source(file.path(enveomics_R, "R", "utils.R"))
|
15
16
|
source(file.path(enveomics_R, "R", "barplot.R"))
|
16
17
|
|
17
18
|
#= Generate interface
|
18
|
-
opt <- enve.cliopts(
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
19
|
+
opt <- enve.cliopts(
|
20
|
+
enve.barplot,
|
21
|
+
file.path(enveomics_R, "man", "enve.barplot.Rd"),
|
22
|
+
positional_arguments = c(1, 3),
|
23
|
+
usage = "usage: %prog [options] output.pdf [width height]",
|
24
|
+
mandatory = c("x"),
|
25
|
+
vectorize = c("sizes", "order", "col"),
|
26
|
+
number = c("sizes", "order"),
|
27
|
+
o_desc = list(
|
28
|
+
x = paste(
|
29
|
+
"A tab-delimited file containing header (first row) and row names",
|
30
|
+
"(first column)."
|
31
|
+
),
|
32
|
+
order = "If passed, the custom order to be used (as row indexes)"
|
33
|
+
)
|
34
|
+
)
|
25
35
|
|
26
36
|
#= Run it!
|
27
37
|
args = as.list(opt$args)
|
@@ -7,9 +7,10 @@
|
|
7
7
|
|
8
8
|
#= Load stuff
|
9
9
|
args <- commandArgs(trailingOnly = F)
|
10
|
-
enveomics_R <- file.path(
|
11
|
-
|
12
|
-
|
10
|
+
enveomics_R <- file.path(
|
11
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
12
|
+
"..", "enveomics.R"
|
13
|
+
)
|
13
14
|
source(file.path(enveomics_R, "R", "cliopts.R"))
|
14
15
|
source(file.path(enveomics_R, "R", "df2dist.R"))
|
15
16
|
|
@@ -3,26 +3,26 @@
|
|
3
3
|
#= Load stuff
|
4
4
|
args <- commandArgs(trailingOnly = FALSE)
|
5
5
|
enveomics_R <- file.path(
|
6
|
-
dirname(sub(
|
7
|
-
|
8
|
-
'enveomics.R'
|
6
|
+
dirname(sub("^--file=", "", args[grep("^--file=", args)])),
|
7
|
+
"..", "enveomics.R"
|
9
8
|
)
|
10
|
-
for(file in c(
|
11
|
-
source(file.path(enveomics_R,
|
9
|
+
for(file in c("cliopts.R", "utils.R", "prefscore.R")) {
|
10
|
+
source(file.path(enveomics_R, "R", file))
|
11
|
+
}
|
12
12
|
|
13
13
|
#= Generate interface
|
14
14
|
opt <- enve.cliopts(
|
15
15
|
enve.prefscore,
|
16
|
-
file.path(enveomics_R,
|
16
|
+
file.path(enveomics_R, "man", "enve.prefscore.Rd"),
|
17
17
|
positional_arguments = c(1, 4),
|
18
|
-
usage =
|
19
|
-
mandatory = c(
|
20
|
-
number = c(
|
21
|
-
ignore = c(
|
18
|
+
usage = "usage: %prog [options] output.tsv [output.pdf [width height]]",
|
19
|
+
mandatory = c("x", "set"),
|
20
|
+
number = c("signif.thr"),
|
21
|
+
ignore = c("plot"),
|
22
22
|
o_desc = list(
|
23
|
-
x =
|
24
|
-
set =
|
25
|
-
ignore =
|
23
|
+
x = "A tab-delimited table of presence/absence (1/0) with species as rows and samples as columns.",
|
24
|
+
set = "A list of sample names that constitute the test set, one per line",
|
25
|
+
ignore = "A list of species to exclude from the analysis, one per line"
|
26
26
|
)
|
27
27
|
)
|
28
28
|
|
@@ -1,127 +1,138 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
# @author: Luis M. Rodriguez-R
|
5
|
-
# @license: Artistic-2.0
|
6
|
-
#
|
3
|
+
$:.push File.expand_path('../lib', __FILE__)
|
7
4
|
|
8
|
-
|
5
|
+
require 'enveomics_rb/enveomics'
|
9
6
|
require 'enveomics_rb/og'
|
10
|
-
require 'optparse'
|
11
7
|
require 'json'
|
8
|
+
$VERSION = 1.1
|
12
9
|
|
13
|
-
o = {q:false, a:false}
|
14
|
-
ARGV << '-h' if ARGV.size==0
|
10
|
+
o = { q: false, a: false }
|
15
11
|
OptionParser.new do |opts|
|
16
|
-
opts.
|
17
|
-
|
12
|
+
opts.version = $VERSION
|
13
|
+
cmd = File.basename($0)
|
14
|
+
Enveomics.opt_banner(
|
15
|
+
opts,
|
16
|
+
'Estimates some descriptive statistics on a set of Orthology Groups (OGs)',
|
17
|
+
"#{cmd} -o file.ogs [options]"
|
18
|
+
)
|
18
19
|
|
19
|
-
|
20
|
-
opts.
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
opts.separator
|
26
|
-
opts.
|
27
|
-
opts.on(
|
28
|
-
|
29
|
-
|
30
|
-
|
20
|
+
opts.separator 'Mandatory'
|
21
|
+
opts.on(
|
22
|
+
'-o', '--ogs FILE',
|
23
|
+
'Input file containing the precomputed OGs',
|
24
|
+
'Supports compression with .gz extension, use - for STDIN'
|
25
|
+
) { |v| o[:ogs] = v }
|
26
|
+
opts.separator ''
|
27
|
+
opts.separator 'Other Options'
|
28
|
+
opts.on(
|
29
|
+
'-j', '--json FILE', 'Output file in JSON format',
|
30
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
31
|
+
) { |v| o[:json] = v }
|
32
|
+
opts.on(
|
33
|
+
'-t', '--tab FILE', 'Output file in tabular format',
|
34
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
35
|
+
) { |v| o[:tab] = v }
|
36
|
+
opts.on(
|
37
|
+
'-T', '--transposed-tab FILE',
|
38
|
+
'Output file in transposed tabular format',
|
39
|
+
'Supports compression with .gz extension, use - for STDOUT'
|
40
|
+
){ |v| o[:ttab] = v }
|
41
|
+
opts.on('-a', '--auto', 'Run completely quietly (no STDERR or STDOUT)') do
|
31
42
|
o[:q] = true
|
32
43
|
o[:a] = true
|
33
44
|
end
|
34
|
-
opts.on(
|
35
|
-
opts.on(
|
45
|
+
opts.on('-q', '--quiet', 'Run quietly (no STDERR output)') { o[:q] = true }
|
46
|
+
opts.on('-h', '--help', 'Display this screen') do
|
36
47
|
puts opts
|
37
48
|
exit
|
38
49
|
end
|
39
|
-
opts.separator
|
50
|
+
opts.separator ''
|
40
51
|
end.parse!
|
41
|
-
abort "-o is mandatory" if o[:ogs].nil?
|
42
52
|
|
43
|
-
|
53
|
+
raise Enveomics::OptionError.new('-i is mandatory') if o[:ogs].nil?
|
54
|
+
|
44
55
|
begin
|
45
56
|
# Initialize the collection of OGs.
|
46
57
|
collection = OGCollection.new
|
47
|
-
|
58
|
+
|
48
59
|
# Read the pre-computed OGs
|
49
60
|
$stderr.puts "Reading pre-computed OGs in '#{o[:ogs]}'." unless o[:q]
|
50
|
-
f =
|
61
|
+
f = reader(o[:ogs])
|
51
62
|
h = f.gets.chomp.split /\t/
|
52
63
|
while ln = f.gets
|
53
64
|
collection << OG.new(h, ln.chomp.split(/\t/))
|
54
65
|
end
|
55
66
|
f.close
|
56
67
|
$stderr.puts " Loaded OGs: #{collection.ogs.length}." unless o[:q]
|
57
|
-
|
68
|
+
|
58
69
|
# Estimate descriptive stats
|
59
70
|
stat_name = {
|
60
|
-
genomes:
|
61
|
-
pan:
|
62
|
-
core:
|
63
|
-
core90pc:
|
64
|
-
core80pc:
|
65
|
-
unus:
|
66
|
-
avg:
|
67
|
-
avg_pan:
|
68
|
-
core_avg:
|
69
|
-
core_pan:
|
70
|
-
ogs_shannon:
|
71
|
+
genomes: 'Number of genomes',
|
72
|
+
pan: 'Pangenome (OGs)',
|
73
|
+
core: 'Core genome (OGs)',
|
74
|
+
core90pc: 'OGs in 90% of the genomes',
|
75
|
+
core80pc: 'OGs in 80% of the genomes',
|
76
|
+
unus: 'Unus genome, core genome discarding paralogs (OGs)',
|
77
|
+
avg: 'Average number of OGs in a genome',
|
78
|
+
avg_pan: 'Average genome (OGs) / Pangenome (OGs)',
|
79
|
+
core_avg: 'Core genome (OGs) / Average genome (OGs)',
|
80
|
+
core_pan: 'Core genome (OGs) / Pangenome (OGs)',
|
81
|
+
ogs_shannon: 'Entropy of the OG frequencies (bits)'
|
71
82
|
}
|
72
83
|
stats = {}
|
73
84
|
stats[:genomes] = Gene.genomes.length
|
74
85
|
stats[:pan] = collection.ogs.length
|
75
86
|
stats[:core] = collection.ogs.map do |og|
|
76
87
|
(og.genomes.length == Gene.genomes.length) ? 1 : 0
|
77
|
-
end.inject(0
|
88
|
+
end.inject(0, :+)
|
78
89
|
stats[:core90pc] = collection.ogs.map do |og|
|
79
|
-
(og.genomes.length >= 0.9*Gene.genomes.length) ? 1 : 0
|
80
|
-
end.inject(0
|
90
|
+
(og.genomes.length >= 0.9 * Gene.genomes.length) ? 1 : 0
|
91
|
+
end.inject(0, :+)
|
81
92
|
stats[:core80pc] = collection.ogs.map do |og|
|
82
|
-
(og.genomes.length >= 0.8*Gene.genomes.length) ? 1 : 0
|
83
|
-
end.inject(0
|
93
|
+
(og.genomes.length >= 0.8 * Gene.genomes.length) ? 1 : 0
|
94
|
+
end.inject(0, :+)
|
84
95
|
stats[:unus] = collection.ogs.map do |og|
|
85
96
|
(og.genomes.length != Gene.genomes.length) ? 0 :
|
86
|
-
(og.genes.all?{ |i| i.size==1 }) ? 1 : 0
|
87
|
-
end.inject(0
|
88
|
-
og_genomes
|
89
|
-
stats[:avg]
|
90
|
-
stats[:avg_pan]
|
91
|
-
stats[:core_avg] = stats[:core].to_f/stats[:avg]
|
92
|
-
stats[:core_pan] = stats[:core].to_f/stats[:pan]
|
97
|
+
(og.genes.all? { |i| i.size == 1 }) ? 1 : 0
|
98
|
+
end.inject(0, :+)
|
99
|
+
og_genomes = collection.ogs.map { |og| og.genomes.length }.inject(0, :+)
|
100
|
+
stats[:avg] = og_genomes.to_f / Gene.genomes.length
|
101
|
+
stats[:avg_pan] = stats[:avg] / stats[:pan]
|
102
|
+
stats[:core_avg] = stats[:core].to_f / stats[:avg]
|
103
|
+
stats[:core_pan] = stats[:core].to_f / stats[:pan]
|
93
104
|
stats[:ogs_shannon] = -1 * collection.ogs.map do |og|
|
94
|
-
pi = og.genomes.length.to_f/Gene.genomes.length
|
105
|
+
pi = og.genomes.length.to_f / Gene.genomes.length
|
95
106
|
pi * Math.log(pi)
|
96
|
-
end.inject(0.0
|
107
|
+
end.inject(0.0, :+)
|
97
108
|
|
98
109
|
# Show result
|
99
|
-
$stderr.puts
|
100
|
-
stats.each_pair{ |k,v| puts " #{stat_name[k]}: #{v}" } unless o[:a]
|
110
|
+
$stderr.puts 'Generating reports' unless o[:q]
|
111
|
+
stats.each_pair { |k, v| puts " #{stat_name[k]}: #{v}" } unless o[:a]
|
101
112
|
|
102
113
|
# Save results in JSON
|
103
114
|
unless o[:json].nil?
|
104
|
-
ohf =
|
105
|
-
ohf.puts
|
115
|
+
ohf = writer(o[:json])
|
116
|
+
ohf.puts(JSON.pretty_generate(stats))
|
106
117
|
ohf.close
|
107
118
|
end
|
108
119
|
|
109
120
|
# Save results in tab
|
110
121
|
unless o[:tab].nil?
|
111
|
-
ohf =
|
112
|
-
stats.each_pair{ |k,v| ohf.puts "#{k}\t#{v}" }
|
122
|
+
ohf = writer(o[:tab])
|
123
|
+
stats.each_pair { |k, v| ohf.puts "#{k}\t#{v}" }
|
113
124
|
ohf.close
|
114
125
|
end
|
115
126
|
|
116
127
|
# Save results in T(tab)
|
117
128
|
unless o[:ttab].nil?
|
118
|
-
ohf =
|
129
|
+
ohf = writer(o[:ttab])
|
119
130
|
ohf.puts stats.keys.join("\t")
|
120
131
|
ohf.puts stats.values.join("\t")
|
121
132
|
ohf.close
|
122
133
|
end
|
123
134
|
|
124
|
-
$stderr.puts
|
135
|
+
$stderr.puts 'Done' unless o[:q]
|
125
136
|
rescue => err
|
126
137
|
$stderr.puts "Exception: #{err}\n\n"
|
127
138
|
err.backtrace.each { |l| $stderr.puts l + "\n" }
|
@@ -1,5 +1,5 @@
|
|
1
1
|
Package: enveomics.R
|
2
|
-
Version: 1.9.
|
2
|
+
Version: 1.9.1
|
3
3
|
Authors@R: c(person("Luis M.", "Rodriguez-R", role = c("aut", "cre"),
|
4
4
|
email = "lmrodriguezr@gmail.com"))
|
5
5
|
Title: Various Utilities for Microbial Genomics and Metagenomics
|
@@ -28,4 +28,4 @@ Suggests:
|
|
28
28
|
License: Artistic-2.0
|
29
29
|
LazyData: yes
|
30
30
|
Encoding: UTF-8
|
31
|
-
RoxygenNote: 7.1
|
31
|
+
RoxygenNote: 7.3.1
|
@@ -90,8 +90,8 @@ enve.cliopts <- function(
|
|
90
90
|
|
91
91
|
optopt <- list(help = "")
|
92
92
|
if (length(o_desc[[i]]) == 1) optopt$help <- o_desc[[i]]
|
93
|
-
if (!is.null(f[[i]]) && !suppressWarnings(is.na(f[[i]])) &&
|
94
|
-
is.logical(f[[i]])){
|
93
|
+
if (!is.null(f[[i]])[1] && !suppressWarnings(is.na(f[[i]]))[1] &&
|
94
|
+
is.logical(f[[i]])[1]){
|
95
95
|
optopt$opt_str <- paste(ifelse(f[[i]], "--no-", "--"), flag, sep = "")
|
96
96
|
optopt$action <- ifelse(f[[i]], "store_false", "store_true")
|
97
97
|
} else {
|
@@ -9,8 +9,8 @@ enve.recplot2.findPeaks.mower(
|
|
9
9
|
min.points = 10,
|
10
10
|
quant.est = c(0.002, 0.998),
|
11
11
|
mlv.opts = list(method = "parzen"),
|
12
|
-
fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start
|
13
|
-
|
12
|
+
fitdist.opts.sn = list(distr = "sn", method = "qme", probs = c(0.1, 0.5, 0.8), start =
|
13
|
+
list(omega = 1, alpha = -1), lower = c(0, -Inf, -Inf)),
|
14
14
|
fitdist.opts.norm = list(distr = "norm", method = "qme", probs = c(0.4, 0.6), start =
|
15
15
|
list(sd = 1), lower = c(0, -Inf)),
|
16
16
|
rm.top = 0.05,
|
@@ -17,8 +17,8 @@
|
|
17
17
|
id.lim = range(x$id.breaks),
|
18
18
|
pos.lim = range(x$pos.breaks),
|
19
19
|
pos.units = c("Mbp", "Kbp", "bp"),
|
20
|
-
mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4,
|
21
|
-
|
20
|
+
mar = list(`1` = c(5, 4, 1, 1) + 0.1, `2` = c(ifelse(any(layout == 1), 1, 5), 4, 4, 1)
|
21
|
+
+ 0.1, `3` = c(5, ifelse(any(layout == 1), 1, 4), 1, 2) + 0.1, `4` =
|
22
22
|
c(ifelse(any(layout == 1), 1, 5), ifelse(any(layout == 2), 1, 4), 4, 2) + 0.1, `5` =
|
23
23
|
c(5, 3, 4, 1) + 0.1, `6` = c(5, 4, 4, 2) + 0.1),
|
24
24
|
pos.splines = 0,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-01-
|
11
|
+
date: 2024-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -472,7 +472,6 @@ files:
|
|
472
472
|
- utils/enveomics/Scripts/in_silico_GA_GI.pl
|
473
473
|
- utils/enveomics/Scripts/lib/data/dupont_2012_essential.hmm.gz
|
474
474
|
- utils/enveomics/Scripts/lib/data/lee_2019_essential.hmm.gz
|
475
|
-
- utils/enveomics/Scripts/lib/enveomics.R
|
476
475
|
- utils/enveomics/Scripts/lib/enveomics_rb/anir.rb
|
477
476
|
- utils/enveomics/Scripts/lib/enveomics_rb/bm_set.rb
|
478
477
|
- utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb
|
@@ -1 +0,0 @@
|
|
1
|
-
../../enveomics.R
|