miga-base 1.3.13.2 → 1.3.13.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/doctor/base.rb +6 -12
- data/lib/miga/cli/action/get.rb +6 -2
- data/lib/miga/cli.rb +6 -1
- data/lib/miga/dataset/result.rb +5 -6
- data/lib/miga/project/dataset.rb +20 -0
- data/lib/miga/remote_dataset/download.rb +0 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/aai_distances.bash +2 -4
- data/scripts/haai_distances.bash +3 -0
- data/scripts/project_stats.bash +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bdf054cb57b5a84d9ffb450b440a4dc6310e8aaf826d9b2160f0eecabce16385
|
4
|
+
data.tar.gz: 237517417fa21ad566df29aa9da67a8307b8b6a3988b0fae16fd4b594d2cc6f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aae8695fab93405b5bb96063c35b12218eed2c78fcea1b2f308589bf29e600c7fbd89515cff59fc1b378b090b0786d55d627444d88810c7c25b202c0d772a310
|
7
|
+
data.tar.gz: 923d60bb47caeab98c9b062dc8af575e8dbe2950cf0602e123cfe8e7533ccdb0aaa5034d1bf0006d5e5f418f7004777c08f861a1e2eb9d143a79044ece69a2d3
|
@@ -12,8 +12,7 @@ module MiGA::Cli::Action::Doctor::Base
|
|
12
12
|
# list, and therefore the databases need to be cleaned.
|
13
13
|
# This is a subtask of +check_dist+
|
14
14
|
def check_dist_eval(cli, p, res)
|
15
|
-
|
16
|
-
fix = {}
|
15
|
+
y = { notok: Set.new, fix: Set.new }
|
17
16
|
Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
|
18
17
|
lineno = 0
|
19
18
|
fh.each_line do |ln|
|
@@ -23,16 +22,11 @@ module MiGA::Cli::Action::Doctor::Base
|
|
23
22
|
names = [r[0], r[1]]
|
24
23
|
next unless names.any? { |i| p.dataset(i).nil? }
|
25
24
|
|
26
|
-
names.each
|
27
|
-
if p.dataset(i).nil? || !p.dataset(i).active?
|
28
|
-
notok[i] = true
|
29
|
-
else
|
30
|
-
fix[i] = true
|
31
|
-
end
|
32
|
-
end
|
25
|
+
names.each { |i| y[p.dataset(i)&.active? ? :fix : :notok] << i }
|
33
26
|
end
|
34
27
|
end
|
35
|
-
|
28
|
+
# The code below is more readable than `y.values.map(&:to_a)`
|
29
|
+
[y[:notok].to_a, y[:fix].to_a]
|
36
30
|
end
|
37
31
|
|
38
32
|
##
|
@@ -43,8 +37,8 @@ module MiGA::Cli::Action::Doctor::Base
|
|
43
37
|
return if fix.empty?
|
44
38
|
|
45
39
|
cli.say("- Fixing #{fix.size} datasets")
|
46
|
-
fix.
|
47
|
-
cli.
|
40
|
+
fix.each_with_index do |d_n, k|
|
41
|
+
cli.advance(' > Fixing', k + 1, fix.size, false)
|
48
42
|
p.dataset(d_n).cleanup_distances!
|
49
43
|
end
|
50
44
|
end
|
data/lib/miga/cli/action/get.rb
CHANGED
@@ -77,6 +77,9 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
77
77
|
'Path to an NCBI Taxonomy dump directory to query instead of API calls',
|
78
78
|
'If the path is not passed, the dump is automatically downloaded'
|
79
79
|
) { |v| cli[:ncbi_taxonomy_dump] = v || true }
|
80
|
+
opt.on(
|
81
|
+
'--ignore-file', '::HIDE::' # Only for internal use
|
82
|
+
) { |v| cli[:ignore_file] = v }
|
80
83
|
end
|
81
84
|
end
|
82
85
|
|
@@ -100,9 +103,10 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
100
103
|
private
|
101
104
|
|
102
105
|
def get_sub_cli
|
103
|
-
return [cli] if cli[:file].nil?
|
106
|
+
return [cli] if cli[:file].nil? || cli[:ignore_file]
|
104
107
|
|
105
108
|
glob = []
|
109
|
+
cli_default = cli.original_argv + ['--ignore-file']
|
106
110
|
File.open(cli[:file], 'r') do |fh|
|
107
111
|
h = nil
|
108
112
|
fh.each do |ln|
|
@@ -110,7 +114,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
|
|
110
114
|
if h.nil?
|
111
115
|
h = r
|
112
116
|
else
|
113
|
-
argv_i =
|
117
|
+
argv_i = cli_default.dup
|
114
118
|
h.each_with_index do |field, k|
|
115
119
|
case field.downcase
|
116
120
|
when *%w[query ignore-dup get-metadata only-metadata]
|
data/lib/miga/cli.rb
CHANGED
@@ -19,9 +19,13 @@ class MiGA::Cli < MiGA::MiGA
|
|
19
19
|
attr_accessor :task
|
20
20
|
|
21
21
|
##
|
22
|
-
# The CLI parameters (except the task),
|
22
|
+
# The unparsed CLI parameters (except the task), an Array of String
|
23
23
|
attr_accessor :argv
|
24
24
|
|
25
|
+
##
|
26
|
+
# The original ARGV passed to the CLI, an Array of String
|
27
|
+
attr_accessor :original_argv
|
28
|
+
|
25
29
|
##
|
26
30
|
# Action to launch, an object inheriting from MiGA::Cli::Action
|
27
31
|
attr_accessor :action
|
@@ -62,6 +66,7 @@ class MiGA::Cli < MiGA::MiGA
|
|
62
66
|
@data = {}
|
63
67
|
@defaults = { verbose: false, tabular: false }
|
64
68
|
@opt_common = true
|
69
|
+
@original_argv = argv.dup
|
65
70
|
@objects = {}
|
66
71
|
if argv[0].nil? or argv[0].to_s[0] == '-'
|
67
72
|
@task = :generic
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -97,15 +97,14 @@ module MiGA::Dataset::Result
|
|
97
97
|
|
98
98
|
##
|
99
99
|
# Clean-up all the stored distances, removing values for datasets no longer in
|
100
|
-
# the project as reference datasets.
|
101
|
-
|
100
|
+
# the project as reference datasets. All metrics are processed unless +metric+
|
101
|
+
# is passed (Array[Symbol], including :haai, :aai, :ani)
|
102
|
+
def cleanup_distances!(metrics = %i[haai aai ani])
|
102
103
|
return if get_result(:distances).nil?
|
103
104
|
|
104
105
|
require 'miga/sqlite'
|
105
|
-
ref = project.
|
106
|
-
|
107
|
-
cleanup_distances_by_metric!(ref, metric)
|
108
|
-
end
|
106
|
+
ref = project.dataset_ref_active.map(&:name)
|
107
|
+
metrics.each { |metric| cleanup_distances_by_metric!(ref, metric) }
|
109
108
|
end
|
110
109
|
|
111
110
|
private
|
data/lib/miga/project/dataset.rb
CHANGED
@@ -31,6 +31,22 @@ module MiGA::Project::Dataset
|
|
31
31
|
@dataset_names_set ||= Set.new(dataset_names)
|
32
32
|
end
|
33
33
|
|
34
|
+
##
|
35
|
+
# Cache for the special set of datasets which are both reference and
|
36
|
+
# active, returned as an Array. Use carefully, as it doesn't get
|
37
|
+
# recalculated upon dataset (in)activation once loaded. To force
|
38
|
+
# recalculating, use +dataset_ref_active!+
|
39
|
+
def dataset_ref_active
|
40
|
+
@dataset_ref_active ||= dataset_ref_active!
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Force recalculation of +dataset_ref_active+ and returns the Array
|
45
|
+
# of MiGA::Dataset objects
|
46
|
+
def dataset_ref_active!
|
47
|
+
@dataset_ref_active = datasets.select(&:ref?).select(&:active?)
|
48
|
+
end
|
49
|
+
|
34
50
|
##
|
35
51
|
# Returns MiGA::Dataset
|
36
52
|
def dataset(name)
|
@@ -60,6 +76,7 @@ module MiGA::Project::Dataset
|
|
60
76
|
@metadata[:datasets] << name
|
61
77
|
@dataset_names_hash[name] = true if @dataset_names_hash
|
62
78
|
@dataset_names_set << name if @dataset_names_set
|
79
|
+
@dataset_ref_active = nil
|
63
80
|
save
|
64
81
|
if d.ref? && d.active?
|
65
82
|
recalculate_tasks("Reference dataset added: #{d.name}")
|
@@ -75,6 +92,9 @@ module MiGA::Project::Dataset
|
|
75
92
|
d = dataset(name)
|
76
93
|
return nil if d.nil?
|
77
94
|
|
95
|
+
@dataset_names_hash = nil
|
96
|
+
@dataset_names_set = nil
|
97
|
+
@dataset_ref_active = nil
|
78
98
|
self.metadata[:datasets].delete(name)
|
79
99
|
save
|
80
100
|
if d.ref? && d.active?
|
@@ -186,7 +186,6 @@ class MiGA::RemoteDataset
|
|
186
186
|
# Looks for the entry +id+ in +dbfrom+, and returns the linked
|
187
187
|
# identifier in +db+ (or nil).
|
188
188
|
def ncbi_map(id, dbfrom, db)
|
189
|
-
attempts = 0
|
190
189
|
doc = download(:ncbi_map, dbfrom, id, :json, nil, db: db)
|
191
190
|
return if doc.empty?
|
192
191
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3, 13,
|
15
|
+
VERSION = [1.3, 13, 4].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024, 3,
|
23
|
+
VERSION_DATE = Date.new(2024, 3, 20)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/aai_distances.bash
CHANGED
@@ -24,10 +24,8 @@ function aai_tsv {
|
|
24
24
|
echo "a b value sd n omega" | tr " " "\\t"
|
25
25
|
if [[ ${#DS[@]} -gt 40000 ]] ; then
|
26
26
|
# Use comparisons in strictly one direction only for huge projects
|
27
|
-
|
28
|
-
|
29
|
-
{ if ($1 > $2) { a=$1; $1=$2; $2=a; } } { print $0 }' \
|
30
|
-
| sort -k 1,2 -u
|
27
|
+
# (assuming the distances are truly bidirectional!)
|
28
|
+
foreach_database_aai | awk -F"\t" '$1 >= $2'
|
31
29
|
else
|
32
30
|
foreach_database_aai
|
33
31
|
fi
|
data/scripts/haai_distances.bash
CHANGED
@@ -12,6 +12,9 @@ miga_start_project_step "$DIR"
|
|
12
12
|
# Cleanup databases
|
13
13
|
ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
|
14
14
|
|
15
|
+
# Ensure bidirectional reference distances
|
16
|
+
miga doctor -P "$PROJECT" --only bidirectional -v -t "$CORES"
|
17
|
+
|
15
18
|
# No real need for hAAI distributions at all
|
16
19
|
echo -n "" > miga-project.log
|
17
20
|
echo -n "" > miga-project.txt
|
data/scripts/project_stats.bash
CHANGED
@@ -9,9 +9,9 @@ DIR="$PROJECT/data/90.stats"
|
|
9
9
|
# Initialize
|
10
10
|
miga_start_project_step "$DIR"
|
11
11
|
|
12
|
-
# Execute doctor
|
12
|
+
# Execute doctor (bidirectional is checked in haai_distances)
|
13
13
|
echo "# Doctor"
|
14
|
-
miga doctor -P "$PROJECT" -t "$CORES" -v
|
14
|
+
miga doctor -P "$PROJECT" -t "$CORES" -v --ignore bidirectional
|
15
15
|
|
16
16
|
# Index taxonomy
|
17
17
|
echo "# Index taxonomy"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.13.
|
4
|
+
version: 1.3.13.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|