miga-base 1.3.10.0 → 1.3.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0239e39a0588b73d042da7d970925d2d93a5334c858453e032d51b0af760fa27'
4
- data.tar.gz: 81e6903e1feba6571d76fe5d113a60414bd0d3b1b3090d6e26367a93cf8d0da7
3
+ metadata.gz: 601e480270ea7f04ff3f5057fa5f031146ac2e73247a7c191ea391a17e971bfc
4
+ data.tar.gz: 1577f19ec6b7798305a847da42556a16ff9e6a144c2e317e3349a8b49a0069fd
5
5
  SHA512:
6
- metadata.gz: d68e55d5335f3da03eb9cea737aad5fa21a7a272e3958db6130e7260387844c1bed92b0b2f655a5a5133772797212b717559da9701723c9885cc9ee7cffc962f
7
- data.tar.gz: 8d27c2f580106c0d1f74e6daaf1cb81ffd7c6fbabf86e7569342027ff9e09b1810c245ad560eff0179c3d06a88b2d1a087344c56e34a2d54c799004e2f6370c0
6
+ metadata.gz: '0293ffeea41a27ddef7a7b077d8cd93f86f5b9e95537576f8ce87120242cf3ced5f9554966cf648dcb817134977fbe6ee13a45b0815069cf2471014d780ce8c4'
7
+ data.tar.gz: b7ab71aa3a78d32861e1e9f6ff685424530093fd485185be722c6cc139a81d30692e00ccb6cbad4799bbcbd077ce32c7693db8336c98e8dfe0b18196748af174
@@ -29,7 +29,7 @@ module MiGA::Cli::Action::Download::Ncbi
29
29
  opt.on(
30
30
  '--ncbi-taxonomy-dump STRING',
31
31
  'Path to an NCBI Taxonomy dump directory to query instead of API calls'
32
- ) { |v| MiGA::RemoteDataset.use_ncbi_taxonomy_dump(v) }
32
+ ) { |v| cli[:ncbi_taxonomy_dump] = v }
33
33
  end
34
34
 
35
35
  def cli_name_modifiers(opt)
@@ -55,11 +55,16 @@ module MiGA::Cli::Action::Download::Ncbi
55
55
  end
56
56
 
57
57
  def remote_list
58
+ if cli[:ncbi_taxonomy_dump]
59
+ cli.say "Reading NCBI Taxonomy dump: #{cli[:ncbi_taxonomy_dump]}"
60
+ MiGA::RemoteDataset.use_ncbi_taxonomy_dump(cli[:ncbi_taxonomy_dump], cli)
61
+ end
62
+
58
63
  if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
59
- cli.say "Reusing remote list: #{cli[:ncbi_list_json]}"
60
- return MiGA::Json.parse(cli[:ncbi_list_json])
64
+ return read_ncbi_list_json(cli[:ncbi_list_json])
61
65
  end
62
66
 
67
+ cli.say "Obtaining remote list of datasets"
63
68
  list = {}
64
69
  query = remote_list_query
65
70
  loop do
@@ -74,18 +79,45 @@ module MiGA::Cli::Action::Download::Ncbi
74
79
  list.merge!(parse_reports_as_datasets(page[:reports]))
75
80
 
76
81
  # Next page
82
+ cli.advance('Datasets:', list.size, page[:total_count])
77
83
  break unless page[:next_page_token]
78
84
  query[:page_token] = page[:next_page_token]
79
85
  end
86
+ cli.say
87
+
88
+ write_ncbi_list_json(cli[:ncbi_list_json], list) if cli[:ncbi_list_json]
89
+ list
90
+ end
80
91
 
81
- if cli[:ncbi_list_json]
82
- cli.say "Saving remote list: #{cli[:ncbi_list_json]}"
83
- MiGA::Json.generate_fast(list, cli[:ncbi_list_json])
92
+ def read_ncbi_list_json(file)
93
+ cli.say "Reusing remote list: #{file}"
94
+ list = {}
95
+ n_tot = nil
96
+ File.open(file, 'r') do |fh|
97
+ n_tot = fh.gets.chomp.sub(/^# /, '').to_i
98
+ fh.each_with_index do |ln, k|
99
+ row = ln.chomp.split("\t", 2)
100
+ list[row[0]] = MiGA::Json.parse(row[1], contents: true)
101
+ cli.advance('Lines:', k, n_tot)
102
+ end
103
+ cli.say
84
104
  end
105
+ return list
106
+ end
85
107
 
86
- list
108
+ def write_ncbi_list_json(file, list)
109
+ cli.say "Saving remote list: #{file}"
110
+ File.open(file, 'w') do |fh|
111
+ fh.puts('# %i' % list.size)
112
+ kk = 0
113
+ list.each do |k, v|
114
+ fh.puts([k, MiGA::Json.generate_fast(v)].join("\t"))
115
+ cli.advance('Datasets:', kk += 1, list.size)
116
+ end
117
+ cli.say
118
+ end
87
119
  end
88
-
120
+
89
121
  def parse_reports_as_datasets(reports)
90
122
  ds = {}
91
123
  reports.each do |r|
@@ -20,7 +20,10 @@ module MiGA::Dataset::Status
20
20
  old_status = metadata[:status]
21
21
  metadata[:status] =
22
22
  !active? ? 'inactive' : done_preprocessing? ? 'complete' : 'incomplete'
23
- self.save if save && (old_status.nil? || old_status != metadata[:status])
23
+ if save && (old_status.nil? || old_status != metadata[:status])
24
+ self.save
25
+ MiGA::MiGA.DEBUG "Status changed: #{old_status} -> #{metadata[:status]}"
26
+ end
24
27
  metadata[:status].to_sym
25
28
  end
26
29
  end
data/lib/miga/dataset.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
+ require'set'
6
7
  require 'miga/metadata'
7
8
  require 'miga/dataset/result'
8
9
  require 'miga/dataset/status'
@@ -27,7 +28,7 @@ class MiGA::Dataset < MiGA::MiGA
27
28
  ##
28
29
  # Does the +project+ already have a dataset with that +name+?
29
30
  def exist?(project, name)
30
- !project.dataset_names_hash[name].nil?
31
+ project.dataset_names_set.include? name
31
32
  end
32
33
 
33
34
  ##
@@ -61,6 +62,7 @@ class MiGA::Dataset < MiGA::MiGA
61
62
  @project, @name, @metadata = project, name, nil
62
63
  metadata[:ref] = is_ref
63
64
  metadata[:type] ||= :empty
65
+ metadata[:status] ||= 'incomplete'
64
66
  @metadata_future = [
65
67
  File.join(project.path, 'metadata', "#{name}.json"),
66
68
  metadata
@@ -84,15 +86,18 @@ class MiGA::Dataset < MiGA::MiGA
84
86
  ##
85
87
  # Save any changes you've made in the dataset
86
88
  def save
87
- MiGA.DEBUG "Dataset.metadata: #{metadata.data}"
89
+ MiGA.DEBUG "Dataset.save: #{name}"
88
90
  metadata.save
89
91
  pull_hook :on_save
90
92
  end
91
93
 
92
94
  ##
93
- # Currently +save!+ is simply an alias of +save+, for compatibility with the
94
- # +Project+ interface
95
- alias :save! :save
95
+ # Forces a save even if nothing has changed in the metadata
96
+ def save!
97
+ MiGA.DEBUG "Dataset.save!: #{name}"
98
+ metadata.save!
99
+ pull_hook :on_save
100
+ end
96
101
 
97
102
  ##
98
103
  # Delete the dataset with all it's contents (including results) and returns
@@ -148,7 +153,7 @@ class MiGA::Dataset < MiGA::MiGA
148
153
  ##
149
154
  # Is this dataset active?
150
155
  def active?
151
- metadata[:inactive].nil? or !metadata[:inactive]
156
+ metadata[:inactive].nil? || !metadata[:inactive]
152
157
  end
153
158
 
154
159
  ##
data/lib/miga/json.rb CHANGED
@@ -15,6 +15,8 @@ class MiGA::Json < MiGA::MiGA
15
15
  # - +:symbolize+: If names should be symbolized. By default it's true if
16
16
  # additions is false, or false otherwise. They can both be false, but an
17
17
  # exception will be raised if both are true
18
+ # - +:large_file+: If passed, the file is treated as a file with very long
19
+ # lines (possibly a single long line)
18
20
  def default_opts(opts = {})
19
21
  opts[:contents] ||= false
20
22
  opts[:additions] ||= false
@@ -36,11 +38,18 @@ class MiGA::Json < MiGA::MiGA
36
38
 
37
39
  # Read JSON
38
40
  cont = path
39
- 12.times do
40
- cont = File.read(path)
41
- break unless cont.empty?
42
- sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
43
- end unless opts[:contents]
41
+ if opts[:large_file]
42
+ cont = ''
43
+ File.open(path, 'r') do |fh|
44
+ cont += fh.read(2 ** 16) until fh.eof?
45
+ end
46
+ elsif !opts[:contents]
47
+ 12.times do
48
+ cont = File.read(path)
49
+ break unless cont.empty?
50
+ sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
51
+ end
52
+ end
44
53
  raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
45
54
 
46
55
  # Parse JSON
data/lib/miga/metadata.rb CHANGED
@@ -26,12 +26,17 @@ class MiGA::Metadata < MiGA::MiGA
26
26
  # Path to the JSON file describing the metadata
27
27
  attr_reader :path
28
28
 
29
+ ##
30
+ # Hash (Integer) of the last saved data Hash (object)
31
+ attr_reader :saved_hash
32
+
29
33
  ##
30
34
  # Initiate a MiGA::Metadata object with description in +path+.
31
35
  # It will create it if it doesn't exist.
32
36
  def initialize(path, defaults = {})
33
37
  @data = nil
34
38
  @path = File.absolute_path(path)
39
+ @saved_hash = nil
35
40
  unless File.exist? path
36
41
  @data = {}
37
42
  defaults.each { |k, v| self[k] = v }
@@ -57,35 +62,41 @@ class MiGA::Metadata < MiGA::MiGA
57
62
  # Save the metadata into #path
58
63
  def save
59
64
  return if self[:never_save]
65
+ return if !saved_hash.nil? && saved_hash == data.hash
60
66
 
61
67
  MiGA::MiGA.DEBUG "Metadata.save #{path}"
68
+ path_tmp = "#{path}.tmp"
62
69
  self[:updated] = Time.now.to_s
70
+ @saved_hash = data.hash
63
71
  json = to_json
64
72
  wait_for_lock
65
73
  FileUtils.touch(lock_file)
66
- ofh = File.open("#{path}.tmp", 'w')
67
- ofh.puts json
68
- ofh.close
74
+ File.open(path_tmp, 'w') { |ofh| ofh.puts json }
69
75
 
70
- unless File.exist?("#{path}.tmp") && File.exist?(lock_file)
76
+ unless File.exist?(path_tmp) && File.exist?(lock_file)
71
77
  raise "Lock-racing detected for #{path}"
72
78
  end
73
79
 
74
- File.rename("#{path}.tmp", path)
80
+ File.rename(path_tmp, path)
75
81
  File.unlink(lock_file)
76
82
  end
77
83
 
84
+ ##
85
+ # Force +save+ even if nothing has changed since the last save
86
+ # or load. However, it doesn't save if +:never_save+ is true.
87
+ def save!
88
+ @saved_hash = nil
89
+ save
90
+ end
91
+
78
92
  ##
79
93
  # (Re-)load metadata stored in #path
80
94
  def load
81
- sleeper = 0.0
82
- while File.exist? lock_file
83
- sleeper += 0.1 if sleeper <= 10.0
84
- sleep(sleeper.to_i)
85
- end
95
+ wait_for_lock
86
96
  tmp = MiGA::Json.parse(path, additions: true)
87
97
  @data = {}
88
98
  tmp.each { |k, v| self[k] = v }
99
+ @saved_hash = data.hash
89
100
  end
90
101
 
91
102
  ##
@@ -105,7 +116,7 @@ class MiGA::Metadata < MiGA::MiGA
105
116
  ##
106
117
  # Return the value of +k+ in #data
107
118
  def [](k)
108
- if k.to_s =~ /(.+):(.+)/
119
+ if k.to_s =~ /^([^:]+):(.+)$/
109
120
  data[$1.to_sym]&.fetch($2)
110
121
  else
111
122
  data[k.to_sym]
@@ -5,24 +5,32 @@
5
5
  # Helper module including specific functions handle datasets.
6
6
  module MiGA::Project::Dataset
7
7
  ##
8
- # Returns Array of MiGA::Dataset.
8
+ # Returns Array of MiGA::Dataset
9
9
  def datasets
10
10
  metadata[:datasets].map { |name| dataset(name) }
11
11
  end
12
12
 
13
13
  ##
14
- # Returns Array of String (without evaluating dataset objects).
14
+ # Returns Array of String (without evaluating dataset objects)
15
15
  def dataset_names
16
16
  metadata[:datasets]
17
17
  end
18
18
 
19
19
  ##
20
- # Returns Hash of Strings => true. Similar to +dataset_names+ but as
21
- # Hash for efficiency.
20
+ # Returns Hash of +{ String => true }+. Similar to +dataset_names+ but as
21
+ # Hash for efficiency
22
22
  def dataset_names_hash
23
+ warn 'The Project#dataset_names_hash method will be deprecated soon'
23
24
  @dataset_names_hash ||= Hash[dataset_names.map { |i| [i, true] }]
24
25
  end
25
26
 
27
+ ##
28
+ # Returns Set of Strings. Similar to +dataset_names+ but as Set for
29
+ # efficiency
30
+ def dataset_names_set
31
+ @dataset_names_set ||= Set.new(dataset_names)
32
+ end
33
+
26
34
  ##
27
35
  # Returns MiGA::Dataset
28
36
  def dataset(name)
@@ -50,7 +58,8 @@ module MiGA::Project::Dataset
50
58
  unless metadata[:datasets].include? name
51
59
  d = MiGA::Dataset.new(self, name)
52
60
  @metadata[:datasets] << name
53
- @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
61
+ @dataset_names_hash[name] = true if @dataset_names_hash
62
+ @dataset_names_set << name if @dataset_names_set
54
63
  save
55
64
  if d.ref? && d.active?
56
65
  recalculate_tasks("Reference dataset added: #{d.name}")
data/lib/miga/project.rb CHANGED
@@ -67,7 +67,7 @@ class MiGA::Project < MiGA::MiGA
67
67
  ##
68
68
  # Save any changes persistently, regardless of +do_not_save+
69
69
  def save!
70
- metadata.save
70
+ metadata.save!
71
71
  pull_hook :on_save
72
72
  self.load
73
73
  end
@@ -77,6 +77,7 @@ class MiGA::Project < MiGA::MiGA
77
77
  def load
78
78
  @datasets = {}
79
79
  @dataset_names_hash = nil
80
+ @dataset_names_set = nil
80
81
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
81
82
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
82
83
 
@@ -16,30 +16,44 @@ class MiGA::RemoteDataset < MiGA::MiGA
16
16
  # Path to a directory with a recent NCBI Taxonomy dump to use instead of
17
17
  # making API calls to NCBI servers, which can be obtained at:
18
18
  # https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
19
- def use_ncbi_taxonomy_dump(path)
19
+ #
20
+ # The +cli+ parameter, if passed, should be a MiGA::Cli object that will
21
+ # be used to report advance in the reading. Other objects can be passed,
22
+ # minimally supporting the MiGA::Cli#say and MiGA::Cli#advance method
23
+ # interfaces
24
+ def use_ncbi_taxonomy_dump(path, cli = nil)
20
25
  raise "Directory doesn't exist: #{path}" unless File.directory?(path)
21
26
 
22
27
  # Structure: { TaxID => ["name", "rank", parent TaxID] }
28
+ MiGA::MiGA.DEBUG "Loading NCBI Taxonomy dump: #{path}"
23
29
  @ncbi_taxonomy_names = {}
24
30
 
25
31
  # Read names.dmp
26
- File.open(File.join(path, 'names.dmp')) do |fh|
32
+ File.open(file = File.join(path, 'names.dmp')) do |fh|
33
+ read = 0
34
+ size = File.size(file)
27
35
  fh.each do |ln|
36
+ cli&.advance('- names.dmp:', read += ln.size, size)
28
37
  row = ln.split(/\t\|\t?/)
29
38
  next unless row[3] == 'scientific name'
30
39
  @ncbi_taxonomy_names[row[0].to_i] = [row[1].strip]
31
40
  end
41
+ cli&.say
32
42
  end
33
43
 
34
44
  # Read nodes.dmp
35
- File.open(File.join(path, 'nodes.dmp')) do |fh|
45
+ File.open(file = File.join(path, 'nodes.dmp')) do |fh|
46
+ read = 0
47
+ size = File.size(file)
36
48
  fh.each do |ln|
49
+ cli&.advance('- nodes.dmp:', read += ln.size, size)
37
50
  row = ln.split(/\t\|\t?/)
38
51
  child = row[0].to_i
39
52
  parent = row[1].to_i
40
53
  @ncbi_taxonomy_names[child][1] = row[2]
41
54
  @ncbi_taxonomy_names[child][2] = parent unless parent == child
42
55
  end
56
+ cli&.say
43
57
  end
44
58
  end
45
59
 
data/lib/miga/taxonomy.rb CHANGED
@@ -188,7 +188,7 @@ class MiGA::Taxonomy < MiGA::MiGA
188
188
  when Array, Hash
189
189
  self << str
190
190
  else
191
- "#{str} ".scan(/([A-Za-z]+):([^:]*)( )/) { |r, n, _| self << { r => n } }
191
+ " #{str} ".scan(/(?<= )([A-Za-z]+):([^:]*) /) { |r, n| self << { r => n } }
192
192
  end
193
193
  end
194
194
 
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.3, 10, 0].freeze
15
+ VERSION = [1.3, 10, 2].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2024, 1, 31)
23
+ VERSION_DATE = Date.new(2024, 2, 6)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -13,7 +13,7 @@ class MetadataTest < Test::Unit::TestCase
13
13
  File.unlink(md1.lock_file)
14
14
  end
15
15
  t1 = Time.new
16
- md1.save
16
+ md1.save!
17
17
  t2 = Time.new
18
18
  assert_path_not_exist(md1.lock_file)
19
19
  assert_ge(t2 - t1, 1.0)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.10.0
4
+ version: 1.3.10.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-31 00:00:00.000000000 Z
11
+ date: 2024-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons