miga-base 1.3.13.2 → 1.3.13.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7452e6019471294f9f1b21ef675ff65a4b582eb51df3f0d59c4ee8a084f79540
4
- data.tar.gz: 1fb9ba522229fa36265c9f5b261c57c44f98e0d31275acd7903c0e4e79f9791a
3
+ metadata.gz: bdf054cb57b5a84d9ffb450b440a4dc6310e8aaf826d9b2160f0eecabce16385
4
+ data.tar.gz: 237517417fa21ad566df29aa9da67a8307b8b6a3988b0fae16fd4b594d2cc6f8
5
5
  SHA512:
6
- metadata.gz: 6519c3bdcf8ce9bfb252a023f9a0dc19ab6534e44f21e8db3199148ae7c2d9588848e18739111b6d444287cf0c6e383ace2b75f77de5bef5da55ea7b7979450e
7
- data.tar.gz: cb8b25241a73c301c8ceef0c2e08286348047df7a47576a46687444c89d7716f057c87cb5bfa9ed9016a44bcc63aeb5ed96cac1bb46442fe3b574a9923254b69
6
+ metadata.gz: aae8695fab93405b5bb96063c35b12218eed2c78fcea1b2f308589bf29e600c7fbd89515cff59fc1b378b090b0786d55d627444d88810c7c25b202c0d772a310
7
+ data.tar.gz: 923d60bb47caeab98c9b062dc8af575e8dbe2950cf0602e123cfe8e7533ccdb0aaa5034d1bf0006d5e5f418f7004777c08f861a1e2eb9d143a79044ece69a2d3
@@ -12,8 +12,7 @@ module MiGA::Cli::Action::Doctor::Base
12
12
  # list, and therefore the databases need to be cleaned.
13
13
  # This is a subtask of +check_dist+
14
14
  def check_dist_eval(cli, p, res)
15
- notok = {}
16
- fix = {}
15
+ y = { notok: Set.new, fix: Set.new }
17
16
  Zlib::GzipReader.open(res.file_path(:matrix)) do |fh|
18
17
  lineno = 0
19
18
  fh.each_line do |ln|
@@ -23,16 +22,11 @@ module MiGA::Cli::Action::Doctor::Base
23
22
  names = [r[0], r[1]]
24
23
  next unless names.any? { |i| p.dataset(i).nil? }
25
24
 
26
- names.each do |i|
27
- if p.dataset(i).nil? || !p.dataset(i).active?
28
- notok[i] = true
29
- else
30
- fix[i] = true
31
- end
32
- end
25
+ names.each { |i| y[p.dataset(i)&.active? ? :fix : :notok] << i }
33
26
  end
34
27
  end
35
- [notok.keys, fix.keys]
28
+ # The code below is more readable than `y.values.map(&:to_a)`
29
+ [y[:notok].to_a, y[:fix].to_a]
36
30
  end
37
31
 
38
32
  ##
@@ -43,8 +37,8 @@ module MiGA::Cli::Action::Doctor::Base
43
37
  return if fix.empty?
44
38
 
45
39
  cli.say("- Fixing #{fix.size} datasets")
46
- fix.each do |d_n|
47
- cli.say " > Fixing #{d_n}."
40
+ fix.each_with_index do |d_n, k|
41
+ cli.advance(' > Fixing', k + 1, fix.size, false)
48
42
  p.dataset(d_n).cleanup_distances!
49
43
  end
50
44
  end
@@ -77,6 +77,9 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
77
77
  'Path to an NCBI Taxonomy dump directory to query instead of API calls',
78
78
  'If the path is not passed, the dump is automatically downloaded'
79
79
  ) { |v| cli[:ncbi_taxonomy_dump] = v || true }
80
+ opt.on(
81
+ '--ignore-file', '::HIDE::' # Only for internal use
82
+ ) { |v| cli[:ignore_file] = v }
80
83
  end
81
84
  end
82
85
 
@@ -100,9 +103,10 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
100
103
  private
101
104
 
102
105
  def get_sub_cli
103
- return [cli] if cli[:file].nil?
106
+ return [cli] if cli[:file].nil? || cli[:ignore_file]
104
107
 
105
108
  glob = []
109
+ cli_default = cli.original_argv + ['--ignore-file']
106
110
  File.open(cli[:file], 'r') do |fh|
107
111
  h = nil
108
112
  fh.each do |ln|
@@ -110,7 +114,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
110
114
  if h.nil?
111
115
  h = r
112
116
  else
113
- argv_i = [self.name]
117
+ argv_i = cli_default.dup
114
118
  h.each_with_index do |field, k|
115
119
  case field.downcase
116
120
  when *%w[query ignore-dup get-metadata only-metadata]
data/lib/miga/cli.rb CHANGED
@@ -19,9 +19,13 @@ class MiGA::Cli < MiGA::MiGA
19
19
  attr_accessor :task
20
20
 
21
21
  ##
22
- # The CLI parameters (except the task), and Array of String
22
+ # The unparsed CLI parameters (except the task), an Array of String
23
23
  attr_accessor :argv
24
24
 
25
+ ##
26
+ # The original ARGV passed to the CLI, an Array of String
27
+ attr_accessor :original_argv
28
+
25
29
  ##
26
30
  # Action to launch, an object inheriting from MiGA::Cli::Action
27
31
  attr_accessor :action
@@ -62,6 +66,7 @@ class MiGA::Cli < MiGA::MiGA
62
66
  @data = {}
63
67
  @defaults = { verbose: false, tabular: false }
64
68
  @opt_common = true
69
+ @original_argv = argv.dup
65
70
  @objects = {}
66
71
  if argv[0].nil? or argv[0].to_s[0] == '-'
67
72
  @task = :generic
@@ -97,15 +97,14 @@ module MiGA::Dataset::Result
97
97
 
98
98
  ##
99
99
  # Clean-up all the stored distances, removing values for datasets no longer in
100
- # the project as reference datasets.
101
- def cleanup_distances!
100
+ # the project as reference datasets. All metrics are processed unless +metric+
101
+ # is passed (Array[Symbol], including :haai, :aai, :ani)
102
+ def cleanup_distances!(metrics = %i[haai aai ani])
102
103
  return if get_result(:distances).nil?
103
104
 
104
105
  require 'miga/sqlite'
105
- ref = project.datasets.select(&:ref?).select(&:active?).map(&:name)
106
- %i[haai aai ani].each do |metric|
107
- cleanup_distances_by_metric!(ref, metric)
108
- end
106
+ ref = project.dataset_ref_active.map(&:name)
107
+ metrics.each { |metric| cleanup_distances_by_metric!(ref, metric) }
109
108
  end
110
109
 
111
110
  private
@@ -31,6 +31,22 @@ module MiGA::Project::Dataset
31
31
  @dataset_names_set ||= Set.new(dataset_names)
32
32
  end
33
33
 
34
+ ##
35
+ # Cache for the special set of datasets which are both reference and
36
+ # active, returned as an Array. Use carefully, as it doesn't get
37
+ # recalculated upon dataset (in)activation once loaded. To force
38
+ # recalculating, use +dataset_ref_active!+
39
+ def dataset_ref_active
40
+ @dataset_ref_active ||= dataset_ref_active!
41
+ end
42
+
43
+ ##
44
+ # Force recalculation of +dataset_ref_active+ and returns the Array
45
+ # of MiGA::Dataset objects
46
+ def dataset_ref_active!
47
+ @dataset_ref_active = datasets.select(&:ref?).select(&:active?)
48
+ end
49
+
34
50
  ##
35
51
  # Returns MiGA::Dataset
36
52
  def dataset(name)
@@ -60,6 +76,7 @@ module MiGA::Project::Dataset
60
76
  @metadata[:datasets] << name
61
77
  @dataset_names_hash[name] = true if @dataset_names_hash
62
78
  @dataset_names_set << name if @dataset_names_set
79
+ @dataset_ref_active = nil
63
80
  save
64
81
  if d.ref? && d.active?
65
82
  recalculate_tasks("Reference dataset added: #{d.name}")
@@ -75,6 +92,9 @@ module MiGA::Project::Dataset
75
92
  d = dataset(name)
76
93
  return nil if d.nil?
77
94
 
95
+ @dataset_names_hash = nil
96
+ @dataset_names_set = nil
97
+ @dataset_ref_active = nil
78
98
  self.metadata[:datasets].delete(name)
79
99
  save
80
100
  if d.ref? && d.active?
@@ -186,7 +186,6 @@ class MiGA::RemoteDataset
186
186
  # Looks for the entry +id+ in +dbfrom+, and returns the linked
187
187
  # identifier in +db+ (or nil).
188
188
  def ncbi_map(id, dbfrom, db)
189
- attempts = 0
190
189
  doc = download(:ncbi_map, dbfrom, id, :json, nil, db: db)
191
190
  return if doc.empty?
192
191
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 13, 2].freeze
15
+ VERSION = [1.3, 13, 4].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 3, 15)
23
+ VERSION_DATE = Date.new(2024, 3, 20)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -24,10 +24,8 @@ function aai_tsv {
24
24
  echo "a b value sd n omega" | tr " " "\\t"
25
25
  if [[ ${#DS[@]} -gt 40000 ]] ; then
26
26
  # Use comparisons in strictly one direction only for huge projects
27
- foreach_database_aai \
28
- | awk -F"\t" 'BEGIN { OFS="\t" }
29
- { if ($1 > $2) { a=$1; $1=$2; $2=a; } } { print $0 }' \
30
- | sort -k 1,2 -u
27
+ # (assuming the distances are truly bidirectional!)
28
+ foreach_database_aai | awk -F"\t" '$1 >= $2'
31
29
  else
32
30
  foreach_database_aai
33
31
  fi
@@ -12,6 +12,9 @@ miga_start_project_step "$DIR"
12
12
  # Cleanup databases
13
13
  ruby -I "$MIGA/lib" "$MIGA/utils/cleanup-databases.rb" "$PROJECT" "$CORES"
14
14
 
15
+ # Ensure bidirectional reference distances
16
+ miga doctor -P "$PROJECT" --only bidirectional -v -t "$CORES"
17
+
15
18
  # No real need for hAAI distributions at all
16
19
  echo -n "" > miga-project.log
17
20
  echo -n "" > miga-project.txt
@@ -9,9 +9,9 @@ DIR="$PROJECT/data/90.stats"
9
9
  # Initialize
10
10
  miga_start_project_step "$DIR"
11
11
 
12
- # Execute doctor
12
+ # Execute doctor (bidirectional is checked in haai_distances)
13
13
  echo "# Doctor"
14
- miga doctor -P "$PROJECT" -t "$CORES" -v
14
+ miga doctor -P "$PROJECT" -t "$CORES" -v --ignore bidirectional
15
15
 
16
16
  # Index taxonomy
17
17
  echo "# Index taxonomy"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.13.2
4
+ version: 1.3.13.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-15 00:00:00.000000000 Z
11
+ date: 2024-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons