miga-base 1.3.10.0 → 1.3.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/download/ncbi.rb +40 -8
- data/lib/miga/dataset/status.rb +4 -1
- data/lib/miga/dataset.rb +11 -6
- data/lib/miga/json.rb +14 -5
- data/lib/miga/metadata.rb +22 -11
- data/lib/miga/project/dataset.rb +14 -5
- data/lib/miga/project.rb +2 -1
- data/lib/miga/remote_dataset.rb +17 -3
- data/lib/miga/taxonomy.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/test/metadata_test.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 601e480270ea7f04ff3f5057fa5f031146ac2e73247a7c191ea391a17e971bfc
|
4
|
+
data.tar.gz: 1577f19ec6b7798305a847da42556a16ff9e6a144c2e317e3349a8b49a0069fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0293ffeea41a27ddef7a7b077d8cd93f86f5b9e95537576f8ce87120242cf3ced5f9554966cf648dcb817134977fbe6ee13a45b0815069cf2471014d780ce8c4'
|
7
|
+
data.tar.gz: b7ab71aa3a78d32861e1e9f6ff685424530093fd485185be722c6cc139a81d30692e00ccb6cbad4799bbcbd077ce32c7693db8336c98e8dfe0b18196748af174
|
@@ -29,7 +29,7 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
29
29
|
opt.on(
|
30
30
|
'--ncbi-taxonomy-dump STRING',
|
31
31
|
'Path to an NCBI Taxonomy dump directory to query instead of API calls'
|
32
|
-
) { |v|
|
32
|
+
) { |v| cli[:ncbi_taxonomy_dump] = v }
|
33
33
|
end
|
34
34
|
|
35
35
|
def cli_name_modifiers(opt)
|
@@ -55,11 +55,16 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def remote_list
|
58
|
+
if cli[:ncbi_taxonomy_dump]
|
59
|
+
cli.say "Reading NCBI Taxonomy dump: #{cli[:ncbi_taxonomy_dump]}"
|
60
|
+
MiGA::RemoteDataset.use_ncbi_taxonomy_dump(cli[:ncbi_taxonomy_dump], cli)
|
61
|
+
end
|
62
|
+
|
58
63
|
if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
|
59
|
-
|
60
|
-
return MiGA::Json.parse(cli[:ncbi_list_json])
|
64
|
+
return read_ncbi_list_json(cli[:ncbi_list_json])
|
61
65
|
end
|
62
66
|
|
67
|
+
cli.say "Obtaining remote list of datasets"
|
63
68
|
list = {}
|
64
69
|
query = remote_list_query
|
65
70
|
loop do
|
@@ -74,18 +79,45 @@ module MiGA::Cli::Action::Download::Ncbi
|
|
74
79
|
list.merge!(parse_reports_as_datasets(page[:reports]))
|
75
80
|
|
76
81
|
# Next page
|
82
|
+
cli.advance('Datasets:', list.size, page[:total_count])
|
77
83
|
break unless page[:next_page_token]
|
78
84
|
query[:page_token] = page[:next_page_token]
|
79
85
|
end
|
86
|
+
cli.say
|
87
|
+
|
88
|
+
write_ncbi_list_json(cli[:ncbi_list_json], list) if cli[:ncbi_list_json]
|
89
|
+
list
|
90
|
+
end
|
80
91
|
|
81
|
-
|
82
|
-
|
83
|
-
|
92
|
+
def read_ncbi_list_json(file)
|
93
|
+
cli.say "Reusing remote list: #{file}"
|
94
|
+
list = {}
|
95
|
+
n_tot = nil
|
96
|
+
File.open(file, 'r') do |fh|
|
97
|
+
n_tot = fh.gets.chomp.sub(/^# /, '').to_i
|
98
|
+
fh.each_with_index do |ln, k|
|
99
|
+
row = ln.chomp.split("\t", 2)
|
100
|
+
list[row[0]] = MiGA::Json.parse(row[1], contents: true)
|
101
|
+
cli.advance('Lines:', k, n_tot)
|
102
|
+
end
|
103
|
+
cli.say
|
84
104
|
end
|
105
|
+
return list
|
106
|
+
end
|
85
107
|
|
86
|
-
|
108
|
+
def write_ncbi_list_json(file, list)
|
109
|
+
cli.say "Saving remote list: #{file}"
|
110
|
+
File.open(file, 'w') do |fh|
|
111
|
+
fh.puts('# %i' % list.size)
|
112
|
+
kk = 0
|
113
|
+
list.each do |k, v|
|
114
|
+
fh.puts([k, MiGA::Json.generate_fast(v)].join("\t"))
|
115
|
+
cli.advance('Datasets:', kk += 1, list.size)
|
116
|
+
end
|
117
|
+
cli.say
|
118
|
+
end
|
87
119
|
end
|
88
|
-
|
120
|
+
|
89
121
|
def parse_reports_as_datasets(reports)
|
90
122
|
ds = {}
|
91
123
|
reports.each do |r|
|
data/lib/miga/dataset/status.rb
CHANGED
@@ -20,7 +20,10 @@ module MiGA::Dataset::Status
|
|
20
20
|
old_status = metadata[:status]
|
21
21
|
metadata[:status] =
|
22
22
|
!active? ? 'inactive' : done_preprocessing? ? 'complete' : 'incomplete'
|
23
|
-
|
23
|
+
if save && (old_status.nil? || old_status != metadata[:status])
|
24
|
+
self.save
|
25
|
+
MiGA::MiGA.DEBUG "Status changed: #{old_status} -> #{metadata[:status]}"
|
26
|
+
end
|
24
27
|
metadata[:status].to_sym
|
25
28
|
end
|
26
29
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
+
require'set'
|
6
7
|
require 'miga/metadata'
|
7
8
|
require 'miga/dataset/result'
|
8
9
|
require 'miga/dataset/status'
|
@@ -27,7 +28,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
27
28
|
##
|
28
29
|
# Does the +project+ already have a dataset with that +name+?
|
29
30
|
def exist?(project, name)
|
30
|
-
|
31
|
+
project.dataset_names_set.include? name
|
31
32
|
end
|
32
33
|
|
33
34
|
##
|
@@ -61,6 +62,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
61
62
|
@project, @name, @metadata = project, name, nil
|
62
63
|
metadata[:ref] = is_ref
|
63
64
|
metadata[:type] ||= :empty
|
65
|
+
metadata[:status] ||= 'incomplete'
|
64
66
|
@metadata_future = [
|
65
67
|
File.join(project.path, 'metadata', "#{name}.json"),
|
66
68
|
metadata
|
@@ -84,15 +86,18 @@ class MiGA::Dataset < MiGA::MiGA
|
|
84
86
|
##
|
85
87
|
# Save any changes you've made in the dataset
|
86
88
|
def save
|
87
|
-
MiGA.DEBUG "Dataset.
|
89
|
+
MiGA.DEBUG "Dataset.save: #{name}"
|
88
90
|
metadata.save
|
89
91
|
pull_hook :on_save
|
90
92
|
end
|
91
93
|
|
92
94
|
##
|
93
|
-
#
|
94
|
-
|
95
|
-
|
95
|
+
# Forces a save even if nothing has changed in the metadata
|
96
|
+
def save!
|
97
|
+
MiGA.DEBUG "Dataset.save!: #{name}"
|
98
|
+
metadata.save!
|
99
|
+
pull_hook :on_save
|
100
|
+
end
|
96
101
|
|
97
102
|
##
|
98
103
|
# Delete the dataset with all it's contents (including results) and returns
|
@@ -148,7 +153,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
148
153
|
##
|
149
154
|
# Is this dataset active?
|
150
155
|
def active?
|
151
|
-
metadata[:inactive].nil?
|
156
|
+
metadata[:inactive].nil? || !metadata[:inactive]
|
152
157
|
end
|
153
158
|
|
154
159
|
##
|
data/lib/miga/json.rb
CHANGED
@@ -15,6 +15,8 @@ class MiGA::Json < MiGA::MiGA
|
|
15
15
|
# - +:symbolize+: If names should be symbolized. By default it's true if
|
16
16
|
# additions is false, or false otherwise. They can both be false, but an
|
17
17
|
# exception will be raised if both are true
|
18
|
+
# - +:large_file+: If passed, the file is treated as a file with very long
|
19
|
+
# lines (possibly a single long line)
|
18
20
|
def default_opts(opts = {})
|
19
21
|
opts[:contents] ||= false
|
20
22
|
opts[:additions] ||= false
|
@@ -36,11 +38,18 @@ class MiGA::Json < MiGA::MiGA
|
|
36
38
|
|
37
39
|
# Read JSON
|
38
40
|
cont = path
|
39
|
-
|
40
|
-
cont =
|
41
|
-
|
42
|
-
|
43
|
-
|
41
|
+
if opts[:large_file]
|
42
|
+
cont = ''
|
43
|
+
File.open(path, 'r') do |fh|
|
44
|
+
cont += fh.read(2 ** 16) until fh.eof?
|
45
|
+
end
|
46
|
+
elsif !opts[:contents]
|
47
|
+
12.times do
|
48
|
+
cont = File.read(path)
|
49
|
+
break unless cont.empty?
|
50
|
+
sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
|
51
|
+
end
|
52
|
+
end
|
44
53
|
raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
|
45
54
|
|
46
55
|
# Parse JSON
|
data/lib/miga/metadata.rb
CHANGED
@@ -26,12 +26,17 @@ class MiGA::Metadata < MiGA::MiGA
|
|
26
26
|
# Path to the JSON file describing the metadata
|
27
27
|
attr_reader :path
|
28
28
|
|
29
|
+
##
|
30
|
+
# Hash (Integer) of the last saved data Hash (object)
|
31
|
+
attr_reader :saved_hash
|
32
|
+
|
29
33
|
##
|
30
34
|
# Initiate a MiGA::Metadata object with description in +path+.
|
31
35
|
# It will create it if it doesn't exist.
|
32
36
|
def initialize(path, defaults = {})
|
33
37
|
@data = nil
|
34
38
|
@path = File.absolute_path(path)
|
39
|
+
@saved_hash = nil
|
35
40
|
unless File.exist? path
|
36
41
|
@data = {}
|
37
42
|
defaults.each { |k, v| self[k] = v }
|
@@ -57,35 +62,41 @@ class MiGA::Metadata < MiGA::MiGA
|
|
57
62
|
# Save the metadata into #path
|
58
63
|
def save
|
59
64
|
return if self[:never_save]
|
65
|
+
return if !saved_hash.nil? && saved_hash == data.hash
|
60
66
|
|
61
67
|
MiGA::MiGA.DEBUG "Metadata.save #{path}"
|
68
|
+
path_tmp = "#{path}.tmp"
|
62
69
|
self[:updated] = Time.now.to_s
|
70
|
+
@saved_hash = data.hash
|
63
71
|
json = to_json
|
64
72
|
wait_for_lock
|
65
73
|
FileUtils.touch(lock_file)
|
66
|
-
|
67
|
-
ofh.puts json
|
68
|
-
ofh.close
|
74
|
+
File.open(path_tmp, 'w') { |ofh| ofh.puts json }
|
69
75
|
|
70
|
-
unless File.exist?(
|
76
|
+
unless File.exist?(path_tmp) && File.exist?(lock_file)
|
71
77
|
raise "Lock-racing detected for #{path}"
|
72
78
|
end
|
73
79
|
|
74
|
-
File.rename(
|
80
|
+
File.rename(path_tmp, path)
|
75
81
|
File.unlink(lock_file)
|
76
82
|
end
|
77
83
|
|
84
|
+
##
|
85
|
+
# Force +save+ even if nothing has changed since the last save
|
86
|
+
# or load. However, it doesn't save if +:never_save+ is true.
|
87
|
+
def save!
|
88
|
+
@saved_hash = nil
|
89
|
+
save
|
90
|
+
end
|
91
|
+
|
78
92
|
##
|
79
93
|
# (Re-)load metadata stored in #path
|
80
94
|
def load
|
81
|
-
|
82
|
-
while File.exist? lock_file
|
83
|
-
sleeper += 0.1 if sleeper <= 10.0
|
84
|
-
sleep(sleeper.to_i)
|
85
|
-
end
|
95
|
+
wait_for_lock
|
86
96
|
tmp = MiGA::Json.parse(path, additions: true)
|
87
97
|
@data = {}
|
88
98
|
tmp.each { |k, v| self[k] = v }
|
99
|
+
@saved_hash = data.hash
|
89
100
|
end
|
90
101
|
|
91
102
|
##
|
@@ -105,7 +116,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
105
116
|
##
|
106
117
|
# Return the value of +k+ in #data
|
107
118
|
def [](k)
|
108
|
-
if k.to_s =~
|
119
|
+
if k.to_s =~ /^([^:]+):(.+)$/
|
109
120
|
data[$1.to_sym]&.fetch($2)
|
110
121
|
else
|
111
122
|
data[k.to_sym]
|
data/lib/miga/project/dataset.rb
CHANGED
@@ -5,24 +5,32 @@
|
|
5
5
|
# Helper module including specific functions handle datasets.
|
6
6
|
module MiGA::Project::Dataset
|
7
7
|
##
|
8
|
-
# Returns Array of MiGA::Dataset
|
8
|
+
# Returns Array of MiGA::Dataset
|
9
9
|
def datasets
|
10
10
|
metadata[:datasets].map { |name| dataset(name) }
|
11
11
|
end
|
12
12
|
|
13
13
|
##
|
14
|
-
# Returns Array of String (without evaluating dataset objects)
|
14
|
+
# Returns Array of String (without evaluating dataset objects)
|
15
15
|
def dataset_names
|
16
16
|
metadata[:datasets]
|
17
17
|
end
|
18
18
|
|
19
19
|
##
|
20
|
-
# Returns Hash of
|
21
|
-
# Hash for efficiency
|
20
|
+
# Returns Hash of +{ String => true }+. Similar to +dataset_names+ but as
|
21
|
+
# Hash for efficiency
|
22
22
|
def dataset_names_hash
|
23
|
+
warn 'The Project#dataset_names_hash method will be deprecated soon'
|
23
24
|
@dataset_names_hash ||= Hash[dataset_names.map { |i| [i, true] }]
|
24
25
|
end
|
25
26
|
|
27
|
+
##
|
28
|
+
# Returns Set of Strings. Similar to +dataset_names+ but as Set for
|
29
|
+
# efficiency
|
30
|
+
def dataset_names_set
|
31
|
+
@dataset_names_set ||= Set.new(dataset_names)
|
32
|
+
end
|
33
|
+
|
26
34
|
##
|
27
35
|
# Returns MiGA::Dataset
|
28
36
|
def dataset(name)
|
@@ -50,7 +58,8 @@ module MiGA::Project::Dataset
|
|
50
58
|
unless metadata[:datasets].include? name
|
51
59
|
d = MiGA::Dataset.new(self, name)
|
52
60
|
@metadata[:datasets] << name
|
53
|
-
@dataset_names_hash =
|
61
|
+
@dataset_names_hash[name] = true if @dataset_names_hash
|
62
|
+
@dataset_names_set << name if @dataset_names_set
|
54
63
|
save
|
55
64
|
if d.ref? && d.active?
|
56
65
|
recalculate_tasks("Reference dataset added: #{d.name}")
|
data/lib/miga/project.rb
CHANGED
@@ -67,7 +67,7 @@ class MiGA::Project < MiGA::MiGA
|
|
67
67
|
##
|
68
68
|
# Save any changes persistently, regardless of +do_not_save+
|
69
69
|
def save!
|
70
|
-
metadata.save
|
70
|
+
metadata.save!
|
71
71
|
pull_hook :on_save
|
72
72
|
self.load
|
73
73
|
end
|
@@ -77,6 +77,7 @@ class MiGA::Project < MiGA::MiGA
|
|
77
77
|
def load
|
78
78
|
@datasets = {}
|
79
79
|
@dataset_names_hash = nil
|
80
|
+
@dataset_names_set = nil
|
80
81
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
81
82
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
82
83
|
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -16,30 +16,44 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
16
16
|
# Path to a directory with a recent NCBI Taxonomy dump to use instead of
|
17
17
|
# making API calls to NCBI servers, which can be obtained at:
|
18
18
|
# https://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
|
19
|
-
|
19
|
+
#
|
20
|
+
# The +cli+ parameter, if passed, should be a MiGA::Cli object that will
|
21
|
+
# be used to report advance in the reading. Other objects can be passed,
|
22
|
+
# minimally supporting the MiGA::Cli#say and MiGA::Cli#advance method
|
23
|
+
# interfaces
|
24
|
+
def use_ncbi_taxonomy_dump(path, cli = nil)
|
20
25
|
raise "Directory doesn't exist: #{path}" unless File.directory?(path)
|
21
26
|
|
22
27
|
# Structure: { TaxID => ["name", "rank", parent TaxID] }
|
28
|
+
MiGA::MiGA.DEBUG "Loading NCBI Taxonomy dump: #{path}"
|
23
29
|
@ncbi_taxonomy_names = {}
|
24
30
|
|
25
31
|
# Read names.dmp
|
26
|
-
File.open(File.join(path, 'names.dmp')) do |fh|
|
32
|
+
File.open(file = File.join(path, 'names.dmp')) do |fh|
|
33
|
+
read = 0
|
34
|
+
size = File.size(file)
|
27
35
|
fh.each do |ln|
|
36
|
+
cli&.advance('- names.dmp:', read += ln.size, size)
|
28
37
|
row = ln.split(/\t\|\t?/)
|
29
38
|
next unless row[3] == 'scientific name'
|
30
39
|
@ncbi_taxonomy_names[row[0].to_i] = [row[1].strip]
|
31
40
|
end
|
41
|
+
cli&.say
|
32
42
|
end
|
33
43
|
|
34
44
|
# Read nodes.dmp
|
35
|
-
File.open(File.join(path, 'nodes.dmp')) do |fh|
|
45
|
+
File.open(file = File.join(path, 'nodes.dmp')) do |fh|
|
46
|
+
read = 0
|
47
|
+
size = File.size(file)
|
36
48
|
fh.each do |ln|
|
49
|
+
cli&.advance('- nodes.dmp:', read += ln.size, size)
|
37
50
|
row = ln.split(/\t\|\t?/)
|
38
51
|
child = row[0].to_i
|
39
52
|
parent = row[1].to_i
|
40
53
|
@ncbi_taxonomy_names[child][1] = row[2]
|
41
54
|
@ncbi_taxonomy_names[child][2] = parent unless parent == child
|
42
55
|
end
|
56
|
+
cli&.say
|
43
57
|
end
|
44
58
|
end
|
45
59
|
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -188,7 +188,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
188
188
|
when Array, Hash
|
189
189
|
self << str
|
190
190
|
else
|
191
|
-
"#{str} ".scan(/([A-Za-z]+):([^:]*)
|
191
|
+
" #{str} ".scan(/(?<= )([A-Za-z]+):([^:]*) /) { |r, n| self << { r => n } }
|
192
192
|
end
|
193
193
|
end
|
194
194
|
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3, 10,
|
15
|
+
VERSION = [1.3, 10, 2].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2024,
|
23
|
+
VERSION_DATE = Date.new(2024, 2, 6)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/test/metadata_test.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.10.
|
4
|
+
version: 1.3.10.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|