miga-base 1.3.10.0 → 1.3.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +40 -8
- data/lib/miga/dataset/status.rb +4 -1
- data/lib/miga/dataset.rb +11 -6
- data/lib/miga/json.rb +14 -5
- data/lib/miga/metadata.rb +22 -11
- data/lib/miga/project/dataset.rb +14 -5
- data/lib/miga/project.rb +2 -1
- data/lib/miga/remote_dataset.rb +17 -3
- data/lib/miga/taxonomy.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/test/metadata_test.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 601e480270ea7f04ff3f5057fa5f031146ac2e73247a7c191ea391a17e971bfc
|
4
|
+
data.tar.gz: 1577f19ec6b7798305a847da42556a16ff9e6a144c2e317e3349a8b49a0069fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0293ffeea41a27ddef7a7b077d8cd93f86f5b9e95537576f8ce87120242cf3ced5f9554966cf648dcb817134977fbe6ee13a45b0815069cf2471014d780ce8c4'
|
7
|
+
data.tar.gz: b7ab71aa3a78d32861e1e9f6ff685424530093fd485185be722c6cc139a81d30692e00ccb6cbad4799bbcbd077ce32c7693db8336c98e8dfe0b18196748af174
|
@@ -29,7 +29,7 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
29
29
|
opt.on(
|
30
30
|
'--ncbi-taxonomy-dump STRING',
|
31
31
|
'Path to an NCBI Taxonomy dump directory to query instead of API calls'
|
32
|
-
) { |v|
|
32
|
+
) { |v| cli[:ncbi_taxonomy_dump] = v }
|
33
33
|
end
|
34
34
|
|
35
35
|
def cli_name_modifiers(opt)
|
@@ -55,11 +55,16 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def remote_list
|
58
|
+
if cli[:ncbi_taxonomy_dump]
|
59
|
+
cli.say "Reading NCBI Taxonomy dump: #{cli[:ncbi_taxonomy_dump]}"
|
60
|
+
MiGA::RemoteDataset.use_ncbi_taxonomy_dump(cli[:ncbi_taxonomy_dump], cli)
|
61
|
+
end
|
62
|
+
|
58
63
|
if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
|
59
|
-
|
60
|
-
return MiGA::Json.parse(cli[:ncbi_list_json])
|
64
|
+
return read_ncbi_list_json(cli[:ncbi_list_json])
|
61
65
|
end
|
62
66
|
|
67
|
+
cli.say "Obtaining remote list of datasets"
|
63
68
|
list = {}
|
64
69
|
query = remote_list_query
|
65
70
|
loop do
|
@@ -74,18 +79,45 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
74
79
|
list.merge!(parse_reports_as_datasets(page[:reports]))
|
75
80
|
|
76
81
|
# Next page
|
82
|
+
cli.advance('Datasets:', list.size, page[:total_count])
|
77
83
|
break unless page[:next_page_token]
|
78
84
|
query[:page_token] = page[:next_page_token]
|
79
85
|
end
|
86
|
+
cli.say
|
87
|
+
|
88
|
+
write_ncbi_list_json(cli[:ncbi_list_json], list) if cli[:ncbi_list_json]
|
89
|
+
list
|
90
|
+
end
|
80
91
|
|
81
|
-
|
82
|
-
|
83
|
-
|
92
|
+
def read_ncbi_list_json(file)
|
93
|
+
cli.say "Reusing remote list: #{file}"
|
94
|
+
list = {}
|
95
|
+
n_tot = nil
|
96
|
+
File.open(file, 'r') do |fh|
|
97
|
+
n_tot = fh.gets.chomp.sub(/^# /, '').to_i
|
98
|
+
fh.each_with_index do |ln, k|
|
99
|
+
row = ln.chomp.split("\t", 2)
|
100
|
+
list[row[0]] = MiGA::Json.parse(row[1], contents: true)
|
101
|
+
cli.advance('Lines:', k, n_tot)
|
102
|
+
end
|
103
|
+
cli.say
|
84
104
|
end
|
105
|
+
return list
|
106
|
+
end
|
85
107
|
|
86
|
-
|
108
|
+
def write_ncbi_list_json(file, list)
|
109
|
+
cli.say "Saving remote list: #{file}"
|
110
|
+
File.open(file, 'w') do |fh|
|
111
|
+
fh.puts('# %i' % list.size)
|
112
|
+
kk = 0
|
113
|
+
list.each do |k, v|
|
114
|
+
fh.puts([k, MiGA::Json.generate_fast(v)].join("\t"))
|
115
|
+
cli.advance('Datasets:', kk += 1, list.size)
|
116
|
+
end
|
117
|
+
cli.say
|
118
|
+
end
|
87
119
|
end
|
88
|
-
|
120
|
+
|
89
121
|
def parse_reports_as_datasets(reports)
|
90
122
|
ds = {}
|
91
123
|
reports.each do |r|
|
data/lib/miga/dataset/status.rb
CHANGED
@@ -20,7 +20,10 @@ module MiGA::Dataset::Status
|
|
20
20
|
old_status = metadata[:status]
|
21
21
|
metadata[:status] =
|
22
22
|
!active? ? 'inactive' : done_preprocessing? ? 'complete' : 'incomplete'
|
23
|
-
|
23
|
+
if save && (old_status.nil? || old_status != metadata[:status])
|
24
|
+
self.save
|
25
|
+
MiGA::MiGA.DEBUG "Status changed: #{old_status} -> #{metadata[:status]}"
|
26
|
+
end
|
24
27
|
metadata[:status].to_sym
|
25
28
|
end
|
26
29
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
+
require'set'
|
6
7
|
require 'miga/metadata'
|
7
8
|
require 'miga/dataset/result'
|
8
9
|
require 'miga/dataset/status'
|
@@ -27,7 +28,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
27
28
|
##
|
28
29
|
# Does the +project+ already have a dataset with that +name+?
|
29
30
|
def exist?(project, name)
|
30
|
-
|
31
|
+
project.dataset_names_set.include? name
|
31
32
|
end
|
32
33
|
|
33
34
|
##
|
@@ -61,6 +62,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
61
62
|
@project, @name, @metadata = project, name, nil
|
62
63
|
metadata[:ref] = is_ref
|
63
64
|
metadata[:type] ||= :empty
|
65
|
+
metadata[:status] ||= 'incomplete'
|
64
66
|
@metadata_future = [
|
65
67
|
File.join(project.path, 'metadata', "#{name}.json"),
|
66
68
|
metadata
|
@@ -84,15 +86,18 @@ class MiGA::Dataset < MiGA::MiGA
|
|
84
86
|
##
|
85
87
|
# Save any changes you've made in the dataset
|
86
88
|
def save
|
87
|
-
MiGA.DEBUG "Dataset.
|
89
|
+
MiGA.DEBUG "Dataset.save: #{name}"
|
88
90
|
metadata.save
|
89
91
|
pull_hook :on_save
|
90
92
|
end
|
91
93
|
|
92
94
|
##
|
93
|
-
#
|
94
|
-
|
95
|
-
|
95
|
+
# Forces a save even if nothing has changed in the metadata
|
96
|
+
def save!
|
97
|
+
MiGA.DEBUG "Dataset.save!: #{name}"
|
98
|
+
metadata.save!
|
99
|
+
pull_hook :on_save
|
100
|
+
end
|
96
101
|
|
97
102
|
##
|
98
103
|
# Delete the dataset with all it's contents (including results) and returns
|
@@ -148,7 +153,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
148
153
|
##
|
149
154
|
# Is this dataset active?
|
150
155
|
def active?
|
151
|
-
metadata[:inactive].nil?
|
156
|
+
metadata[:inactive].nil? || !metadata[:inactive]
|
152
157
|
end
|
153
158
|
|
154
159
|
##
|
data/lib/miga/json.rb
CHANGED
@@ -15,6 +15,8 @@ class MiGA::Json < MiGA::MiGA
|
|
15
15
|
# - +:symbolize+: If names should be symbolized. By default it's true if
|
16
16
|
# additions is false, or false otherwise. They can both be false, but an
|
17
17
|
# exception will be raised if both are true
|
18
|
+
# - +:large_file+: If passed, the file is treated as a file with very long
|
19
|
+
# lines (possibly a single long line)
|
18
20
|
def default_opts(opts = {})
|
19
21
|
opts[:contents] ||= false
|
20
22
|
opts[:additions] ||= false
|
@@ -36,11 +38,18 @@ class MiGA::Json < MiGA::MiGA
|
|
36
38
|
|
37
39
|
# Read JSON
|
38
40
|
cont = path
|
39
|
-
|
40
|
-
cont =
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
if opts[:large_file]
|
42
|
+
cont = ''
|
43
|
+
File.open(path, 'r') do |fh|
|
44
|
+
cont += fh.read(2 ** 16) until fh.eof?
|
45
|
+
end
|
46
|
+
elsif !opts[:contents]
|
47
|
+
12.times do
|
48
|
+
cont = File.read(path)
|
49
|
+
break unless cont.empty?
|
50
|
+
sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
|
51
|
+
end
|
52
|
+
end
|
44
53
|
raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
|
45
54
|
|
46
55
|
# Parse JSON
|
data/lib/miga/metadata.rb
CHANGED
@@ -26,12 +26,17 @@ class MiGA::Metadata < MiGA::MiGA
|
|
26
26
|
# Path to the JSON file describing the metadata
|
27
27
|
attr_reader :path
|
28
28
|
|
29
|
+
##
|
30
|
+
# Hash (Integer) of the last saved data Hash (object)
|
31
|
+
attr_reader :saved_hash
|
32
|
+
|
29
33
|
##
|
30
34
|
# Initiate a MiGA::Metadata object with description in +path+.
|
31
35
|
# It will create it if it doesn't exist.
|
32
36
|
def initialize(path, defaults = {})
|
33
37
|
@data = nil
|
34
38
|
@path = File.absolute_path(path)
|
39
|
+
@saved_hash = nil
|
35
40
|
unless File.exist? path
|
36
41
|
@data = {}
|
37
42
|
defaults.each { |k, v| self[k] = v }
|
@@ -57,35 +62,41 @@ class MiGA::Metadata < MiGA::MiGA
|
|
57
62
|
# Save the metadata into #path
|
58
63
|
def save
|
59
64
|
return if self[:never_save]
|
65
|
+
return if !saved_hash.nil? && saved_hash == data.hash
|
60
66
|
|
61
67
|
MiGA::MiGA.DEBUG "Metadata.save #{path}"
|
68
|
+
path_tmp = "#{path}.tmp"
|
62
69
|
self[:updated] = Time.now.to_s
|
70
|
+
@saved_hash = data.hash
|
63
71
|
json = to_json
|
64
72
|
wait_for_lock
|
65
73
|
FileUtils.touch(lock_file)
|
66
|
-
|
67
|
-
ofh.puts json
|
68
|
-
ofh.close
|
74
|
+
File.open(path_tmp, 'w') { |ofh| ofh.puts json }
|
69
75
|
|
70
|
-
unless File.exist?(
|
76
|
+
unless File.exist?(path_tmp) && File.exist?(lock_file)
|
71
77
|
raise "Lock-racing detected for #{path}"
|
72
78
|
end
|
73
79
|
|
74
|
-
File.rename(
|
80
|
+
File.rename(path_tmp, path)
|
75
81
|
File.unlink(lock_file)
|
76
82
|
end
|
77
83
|
|
84
|
+
##
|
85
|
+
# Force +save+ even if nothing has changed since the last save
|
86
|
+
# or load. However, it doesn't save if +:never_save+ is true.
|
87
|
+
def save!
|
88
|
+
@saved_hash = nil
|
89
|
+
save
|
90
|
+
end
|
91
|
+
|
78
92
|
##
|
79
93
|
# (Re-)load metadata stored in #path
|
80
94
|
def load
|
81
|
-
|
82
|
-
while File.exist? lock_file
|
83
|
-
sleeper += 0.1 if sleeper <= 10.0
|
84
|
-
sleep(sleeper.to_i)
|
85
|
-
end
|
95
|
+
wait_for_lock
|
86
96
|
tmp = MiGA::Json.parse(path, additions: true)
|
87
97
|
@data = {}
|
88
98
|
tmp.each { |k, v| self[k] = v }
|
99
|
+
@saved_hash = data.hash
|
89
100
|
end
|
90
101
|
|
91
102
|
##
|
@@ -105,7 +116,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
105
116
|
##
|
106
117
|
# Return the value of +k+ in #data
|
107
118
|
def [](k)
|
108
|
-
if k.to_s =~
|
119
|
+
if k.to_s =~ /^([^:]+):(.+)$/
|
109
120
|
data[$1.to_sym]&.fetch($2)
|
110
121
|
else
|
111
122
|
data[k.to_sym]
|
data/lib/miga/project/dataset.rb
CHANGED
@@ -5,24 +5,32 @@
|
|
5
5
|
# Helper module including specific functions handle datasets.
|
6
6
|
module MiGA::Project::Dataset
|
7
7
|
##
|
8
|
-
# Returns Array of MiGA::Dataset
|
8
|
+
# Returns Array of MiGA::Dataset
|
9
9
|
def datasets
|
10
10
|
metadata[:datasets].map { |name| dataset(name) }
|
11
11
|
end
|
12
12
|
|
13
13
|
##
|
14
|
-
# Returns Array of String (without evaluating dataset objects)
|
14
|
+
# Returns Array of String (without evaluating dataset objects)
|
15
15
|
def dataset_names
|
16
16
|
metadata[:datasets]
|
17
17
|
end
|
18
18
|
|
19
19
|
##
|
20
|
-
# Returns Hash of
|
21
|
-
# Hash for efficiency
|
20
|
+
# Returns Hash of +{ String => true }+. Similar to +dataset_names+ but as
|
21
|
+
# Hash for efficiency
|
22
22
|
def dataset_names_hash
|
23
|
+
warn 'The Project#dataset_names_hash method will be deprecated soon'
|
23
24
|
@dataset_names_hash ||= Hash[dataset_names.map { |i| [i, true] }]
|
24
25
|
end
|
25
26
|
|
27
|
+
##
|
28
|
+
# Returns Set of Strings. Similar to +dataset_names+ but as Set for
|
29
|
+
# efficiency
|
30
|
+
def dataset_names_set
|
31
|
+
@dataset_names_set ||= Set.new(dataset_names)
|
32
|
+
end
|
33
|
+
|
26
34
|
##
|
27
35
|
# Returns MiGA::Dataset
|
28
36
|
def dataset(name)
|
@@ -50,7 +58,8 @@ module MiGA::Project::Dataset
|
|
50
58
|
unless metadata[:datasets].include? name
|
51
59
|
d = MiGA::Dataset.new(self, name)
|
52
60
|
@metadata[:datasets] << name
|
53
|
-
@dataset_names_hash =
|
61
|
+
@dataset_names_hash[name] = true if @dataset_names_hash
|
62
|
+
@dataset_names_set << name if @dataset_names_set
|
54
63
|
save
|
55
64
|
if d.ref? && d.active?
|
56
65
|
recalculate_tasks("Reference dataset added: #{d.name}")
|
data/lib/miga/project.rb
CHANGED
@@ -67,7 +67,7 @@ class MiGA::Project < MiGA::MiGA
|
|
67
67
|
##
|
68
68
|
# Save any changes persistently, regardless of +do_not_save+
|
69
69
|
def save!
|
70
|
-
metadata.save
|
70
|
+
metadata.save!
|
71
71
|
pull_hook :on_save
|
72
72
|
self.load
|
73
73
|
end
|
@@ -77,6 +77,7 @@ class MiGA::Project < MiGA::MiGA
|
|
77
77
|
def load
|
78
78
|
@datasets = {}
|
79
79
|
@dataset_names_hash = nil
|
80
|
+
@dataset_names_set = nil
|
80
81
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
81
82
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
82
83
|
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -16,30 +16,44 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
16
16
|
# Path to a directory with a recent NCBI Taxonomy dump to use instead of
|
17
17
|
# making API calls to NCBI servers, which can be obtained at:
|
18
18
|
# https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
|
19
|
-
|
19
|
+
#
|
20
|
+
# The +cli+ parameter, if passed, should be a MiGA::Cli object that will
|
21
|
+
# be used to report advance in the reading. Other objects can be passed,
|
22
|
+
# minimally supporting the MiGA::Cli#say and MiGA::Cli#advance method
|
23
|
+
# interfaces
|
24
|
+
def use_ncbi_taxonomy_dump(path, cli = nil)
|
20
25
|
raise "Directory doesn't exist: #{path}" unless File.directory?(path)
|
21
26
|
|
22
27
|
# Structure: { TaxID => ["name", "rank", parent TaxID] }
|
28
|
+
MiGA::MiGA.DEBUG "Loading NCBI Taxonomy dump: #{path}"
|
23
29
|
@ncbi_taxonomy_names = {}
|
24
30
|
|
25
31
|
# Read names.dmp
|
26
|
-
File.open(File.join(path, 'names.dmp')) do |fh|
|
32
|
+
File.open(file = File.join(path, 'names.dmp')) do |fh|
|
33
|
+
read = 0
|
34
|
+
size = File.size(file)
|
27
35
|
fh.each do |ln|
|
36
|
+
cli&.advance('- names.dmp:', read += ln.size, size)
|
28
37
|
row = ln.split(/\t\|\t?/)
|
29
38
|
next unless row[3] == 'scientific name'
|
30
39
|
@ncbi_taxonomy_names[row[0].to_i] = [row[1].strip]
|
31
40
|
end
|
41
|
+
cli&.say
|
32
42
|
end
|
33
43
|
|
34
44
|
# Read nodes.dmp
|
35
|
-
File.open(File.join(path, 'nodes.dmp')) do |fh|
|
45
|
+
File.open(file = File.join(path, 'nodes.dmp')) do |fh|
|
46
|
+
read = 0
|
47
|
+
size = File.size(file)
|
36
48
|
fh.each do |ln|
|
49
|
+
cli&.advance('- nodes.dmp:', read += ln.size, size)
|
37
50
|
row = ln.split(/\t\|\t?/)
|
38
51
|
child = row[0].to_i
|
39
52
|
parent = row[1].to_i
|
40
53
|
@ncbi_taxonomy_names[child][1] = row[2]
|
41
54
|
@ncbi_taxonomy_names[child][2] = parent unless parent == child
|
42
55
|
end
|
56
|
+
cli&.say
|
43
57
|
end
|
44
58
|
end
|
45
59
|
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -188,7 +188,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
188
188
|
when Array, Hash
|
189
189
|
self << str
|
190
190
|
else
|
191
|
-
"#{str} ".scan(/([A-Za-z]+):([^:]*)
|
191
|
+
" #{str} ".scan(/(?<= )([A-Za-z]+):([^:]*) /) { |r, n| self << { r => n } }
|
192
192
|
end
|
193
193
|
end
|
194
194
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3, 10,
|
15
|
+
VERSION = [1.3, 10, 2].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024,
|
23
|
+
VERSION_DATE = Date.new(2024, 2, 6)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/test/metadata_test.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.10.
|
4
|
+
version: 1.3.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|