miga-base 0.3.5.0 → 0.3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/actions/add.rb +3 -1
- data/actions/get.rb +18 -7
- data/bin/miga +7 -3
- data/lib/miga/dataset/result.rb +2 -1
- data/lib/miga/dataset.rb +1 -1
- data/lib/miga/project/dataset.rb +7 -1
- data/lib/miga/project.rb +1 -0
- data/lib/miga/remote_dataset/download.rb +3 -3
- data/lib/miga/remote_dataset.rb +22 -10
- data/lib/miga/version.rb +1 -1
- data/scripts/aai_distances.bash +1 -1
- data/scripts/ani_distances.bash +1 -1
- data/scripts/haai_distances.bash +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 25f6a339288dbdbeda1f84f5da6b5de697d3790c0b38d138a4416ff762cae936
|
4
|
+
data.tar.gz: 60187223750e983fafd6088935a912d016bbf4acafaf79b548e206c50076cc04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60b59ccc8fc3bf9f5a584f3221c268839ba4cd41eb3a6c16911f1808944c20100758f74908797f13ada9d2475e4fda382c1e204b257cf6c6adcfd17db694efcf
|
7
|
+
data.tar.gz: e62de30119c51dc4e92e64aec954c263f84a4820f1541f014c49d93dadd8de1efb5d331c35ecbda6ccfa79c402393894b7b6ffd52edf2362b8e117fe505ff230
|
data/actions/add.rb
CHANGED
@@ -60,7 +60,9 @@ def cp_result(o, d, p, sym, res_sym, ext)
|
|
60
60
|
r_dir = MiGA::Dataset.RESULT_DIRS[res_sym]
|
61
61
|
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
62
62
|
ext.each_index do |i|
|
63
|
-
|
63
|
+
next if o[sym][i].nil?
|
64
|
+
gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
|
65
|
+
FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
|
64
66
|
end
|
65
67
|
File.open("#{r_path}.done", "w") { |f| f.print Time.now.to_s }
|
66
68
|
end
|
data/actions/get.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
require 'miga/remote_dataset'
|
7
7
|
|
8
|
-
o = {q: true, query: false, universe: :ebi, db: :embl}
|
8
|
+
o = {q: true, query: false, universe: :ebi, db: :embl, get_md: false}
|
9
9
|
OptionParser.new do |opt|
|
10
10
|
opt_banner(opt)
|
11
11
|
opt_object(opt, o, [:project, :dataset, :dataset_type])
|
@@ -36,6 +36,9 @@ OptionParser.new do |opt|
|
|
36
36
|
'Metadata as key-value pairs separated by = and delimited by comma.',
|
37
37
|
'Values are saved as strings except for booleans (true / false) or nil.'
|
38
38
|
){ |v| o[:metadata]=v }
|
39
|
+
opt.on('--get-metadata',
|
40
|
+
'Only download and update metadata for existing datasets'
|
41
|
+
){ |v| o[:get_md] = v }
|
39
42
|
opt_common(opt, o)
|
40
43
|
end.parse!
|
41
44
|
|
@@ -74,12 +77,20 @@ glob.each do |o_i|
|
|
74
77
|
$stderr.puts 'Locating remote dataset.' unless o_i[:q]
|
75
78
|
rd = MiGA::RemoteDataset.new(o_i[:ids], o_i[:db], o_i[:universe])
|
76
79
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
80
|
+
if o[:get_md]
|
81
|
+
$stderr.puts 'Updating dataset.' unless o_i[:q]
|
82
|
+
d = p.dataset(o_i[:dataset])
|
83
|
+
next if d.nil?
|
84
|
+
md = add_metadata(o_i, d).metadata.data
|
85
|
+
rd.update_metadata(d, md)
|
86
|
+
else
|
87
|
+
$stderr.puts 'Creating dataset.' unless o_i[:q]
|
88
|
+
dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
|
89
|
+
md = add_metadata(o_i, dummy_d).metadata.data
|
90
|
+
dummy_d.remove!
|
91
|
+
rd.save_to(p, o_i[:dataset], !o_i[:query], md)
|
92
|
+
p.add_dataset(o_i[:dataset])
|
93
|
+
end
|
83
94
|
|
84
95
|
$stderr.puts 'Done.' unless o_i[:q]
|
85
96
|
end
|
data/bin/miga
CHANGED
@@ -19,7 +19,7 @@ $task_desc = {
|
|
19
19
|
# Datasets
|
20
20
|
add: "Creates an empty dataset in a pre-existing MiGA project.",
|
21
21
|
get: "Downloads a dataset from public databases into a MiGA project.",
|
22
|
-
ncbi_get: "Downloads all genomes in a taxon
|
22
|
+
ncbi_get: "Downloads all genomes in a taxon from NCBI into a MiGA project.",
|
23
23
|
rm: "Removes a dataset from an MiGA project.",
|
24
24
|
find: "Finds unregistered datasets based on result files.",
|
25
25
|
ln: "Link datasets (including results) from one project to another.",
|
@@ -137,13 +137,16 @@ def opt_common(opt, o)
|
|
137
137
|
end
|
138
138
|
|
139
139
|
# OptParse flags to filter lists of datasets.
|
140
|
-
def opt_filter_datasets(opt, o, what=[:ref, :multi, :taxonomy])
|
140
|
+
def opt_filter_datasets(opt, o, what=[:ref, :multi, :active, :taxonomy])
|
141
141
|
opt.on("--[no-]ref",
|
142
142
|
"If set, uses only reference (or only non-reference) datasets."
|
143
143
|
){ |v| o[:ref]=v } if what.include? :ref
|
144
144
|
opt.on("--[no-]multi",
|
145
145
|
"If set, uses only multi-species (or only single-species) datasets."
|
146
146
|
){ |v| o[:multi]=v } if what.include? :multi
|
147
|
+
opt.on("--[no-]active",
|
148
|
+
"If set, uses only active (or inactive) datasets."
|
149
|
+
){ |v| o[:active]=v } if what.include? :active
|
147
150
|
opt.on("-t", "--taxonomy RANK:TAXON", "Filter by taxonomy."
|
148
151
|
){ |v| o[:taxonomy]=MiGA::Taxonomy.new v } if what.include? :taxonomy
|
149
152
|
opt.on("-k", "--key INTEGER",
|
@@ -163,7 +166,8 @@ end
|
|
163
166
|
|
164
167
|
# Filters datasets by keys set in +opt_filter_datasets+.
|
165
168
|
def filter_datasets!(ds, o)
|
166
|
-
ds.select!{|d| d.is_ref? == o[:ref] } unless o[:ref].nil?
|
169
|
+
ds.select! { |d| d.is_ref? == o[:ref] } unless o[:ref].nil?
|
170
|
+
ds.select! { |d| d.is_active? == o[:active] } unless o[:active].nil?
|
167
171
|
ds.select! do |d|
|
168
172
|
o[:multi] ? d.is_multi? : d.is_nonmulti?
|
169
173
|
end unless o[:multi].nil?
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -122,6 +122,7 @@ module MiGA::Dataset::Result
|
|
122
122
|
# the project as reference datasets.
|
123
123
|
def cleanup_distances!
|
124
124
|
r = get_result(:distances)
|
125
|
+
ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
|
125
126
|
return if r.nil?
|
126
127
|
[:haai_db, :aai_db, :ani_db].each do |db_type|
|
127
128
|
db = r.file_path(db_type)
|
@@ -130,7 +131,7 @@ module MiGA::Dataset::Result
|
|
130
131
|
table = db_type[-6..-4]
|
131
132
|
val = sqlite_db.execute "select seq2 from #{table}"
|
132
133
|
next if val.empty?
|
133
|
-
(val.map
|
134
|
+
(val.map(&:first) - ref).each do |extra|
|
134
135
|
sqlite_db.execute "delete from #{table} where seq2=?", extra
|
135
136
|
end
|
136
137
|
end
|
data/lib/miga/dataset.rb
CHANGED
data/lib/miga/project/dataset.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Helper module including specific functions handle datasets.
|
6
6
|
module MiGA::Project::Dataset
|
7
7
|
|
8
|
-
|
9
8
|
##
|
10
9
|
# Returns Array of MiGA::Dataset.
|
11
10
|
def datasets
|
@@ -17,6 +16,13 @@ module MiGA::Project::Dataset
|
|
17
16
|
def dataset_names
|
18
17
|
metadata[:datasets]
|
19
18
|
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Returns Hash of Strings => true. Similar to +dataset_names+ but as
|
22
|
+
# Hash for efficiency.
|
23
|
+
def dataset_names_hash
|
24
|
+
@dataset_names_hash ||= Hash[dataset_names.map{ |i| [i,true] }]
|
25
|
+
end
|
20
26
|
|
21
27
|
##
|
22
28
|
# Returns MiGA::Dataset.
|
data/lib/miga/project.rb
CHANGED
@@ -63,6 +63,7 @@ class MiGA::Project < MiGA::MiGA
|
|
63
63
|
# (Re-)load project data and metadata.
|
64
64
|
def load
|
65
65
|
@datasets = {}
|
66
|
+
@dataset_names_hash = nil
|
66
67
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
67
68
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
68
69
|
end
|
@@ -50,10 +50,10 @@ class MiGA::RemoteDataset
|
|
50
50
|
doc = ''
|
51
51
|
@timeout_try = 0
|
52
52
|
begin
|
53
|
-
open(url,
|
54
|
-
rescue
|
53
|
+
open(url, read_timeout: 600) { |f| doc = f.read }
|
54
|
+
rescue => e
|
55
55
|
@timeout_try += 1
|
56
|
-
raise
|
56
|
+
raise e if @timeout_try >= 3
|
57
57
|
retry
|
58
58
|
end
|
59
59
|
doc
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -17,6 +17,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
17
17
|
attr_reader :db
|
18
18
|
# Array of IDs of the entries composing the dataset.
|
19
19
|
attr_reader :ids
|
20
|
+
# Internal metadata hash
|
21
|
+
attr_reader :metadata
|
20
22
|
|
21
23
|
##
|
22
24
|
# Initialize MiGA::RemoteDataset with +ids+ in database +db+ from +universe+.
|
@@ -25,6 +27,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
25
27
|
@ids = (ids.is_a?(Array) ? ids : [ids])
|
26
28
|
@db = db.to_sym
|
27
29
|
@universe = universe.to_sym
|
30
|
+
@metadata = {}
|
31
|
+
@metadata[:"#{universe}_#{db}"] = ids.join(",")
|
28
32
|
@@UNIVERSE.keys.include?(@universe) or
|
29
33
|
raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
|
30
34
|
@@UNIVERSE[@universe][:dbs].include?(@db) or
|
@@ -37,15 +41,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
37
41
|
|
38
42
|
##
|
39
43
|
# Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
|
40
|
-
# indicates if it should be a reference dataset, and contains +
|
41
|
-
def save_to(project, name = nil, is_ref = true,
|
44
|
+
# indicates if it should be a reference dataset, and contains +metadata_def+.
|
45
|
+
def save_to(project, name = nil, is_ref = true, metadata_def = {})
|
42
46
|
name ||= ids.join('_').miga_name
|
43
47
|
project = MiGA::Project.new(project) if project.is_a? String
|
44
48
|
MiGA::Dataset.exist?(project, name) and
|
45
49
|
raise "Dataset #{name} exists in the project, aborting..."
|
46
|
-
metadata = get_metadata(
|
50
|
+
@metadata = get_metadata(metadata_def)
|
47
51
|
udb = @@UNIVERSE[universe][:dbs][db]
|
48
|
-
metadata["#{universe}_#{db}"] = ids.join(',')
|
52
|
+
@metadata["#{universe}_#{db}"] = ids.join(',')
|
49
53
|
respond_to?("save_#{udb[:stage]}_to", true) or
|
50
54
|
raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
|
51
55
|
send "save_#{udb[:stage]}_to", project, name, udb
|
@@ -70,15 +74,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
70
74
|
|
71
75
|
##
|
72
76
|
# Get metadata from the remote location.
|
73
|
-
def get_metadata(
|
77
|
+
def get_metadata(metadata_def = {})
|
78
|
+
metadata_def.each { |k,v| @metadata[k] = v }
|
74
79
|
case universe
|
75
|
-
when :ebi, :ncbi
|
80
|
+
when :ebi, :ncbi, :web
|
76
81
|
# Get taxonomy
|
77
|
-
metadata[:tax] = get_ncbi_taxonomy
|
82
|
+
@metadata[:tax] = get_ncbi_taxonomy
|
78
83
|
end
|
79
|
-
metadata
|
80
|
-
metadata = get_type_status(metadata)
|
81
|
-
metadata
|
84
|
+
@metadata = get_type_status(metadata)
|
82
85
|
end
|
83
86
|
|
84
87
|
##
|
@@ -118,6 +121,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
118
121
|
|
119
122
|
private
|
120
123
|
|
124
|
+
def get_ncbi_taxid_from_web
|
125
|
+
return nil unless metadata[:ncbi_asm]
|
126
|
+
base_url = 'https://www.ncbi.nlm.nih.gov/assembly'
|
127
|
+
doc = self.class.download_url(
|
128
|
+
"#{base_url}/#{metadata[:ncbi_asm]}?report=xml&format=text")
|
129
|
+
taxid = doc.scan(%r{<Taxid>(\S+)</Taxid>}).first
|
130
|
+
taxid.nil? ? taxid : taxid.first
|
131
|
+
end
|
132
|
+
|
121
133
|
def get_ncbi_taxid_from_ncbi
|
122
134
|
doc = self.class.download(universe, db, ids, :gb).split(/\n/)
|
123
135
|
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3, 5,
|
13
|
+
VERSION = [0.3, 5, 1]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
data/scripts/aai_distances.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/02.aai"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
echo -n "" > miga-project.log
|
15
|
-
DS=$(miga
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
16
16
|
|
17
17
|
# Extract values
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
data/scripts/ani_distances.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/03.ani"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
echo -n "" > miga-project.log
|
15
|
-
DS=$(miga
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
16
16
|
|
17
17
|
# Extract values
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
data/scripts/haai_distances.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/01.haai"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
echo -n "" > miga-project.log
|
15
|
-
DS=$(miga
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
16
16
|
|
17
17
|
# Extract values
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Scripts/lib/../../enveomics.R
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.5.
|
4
|
+
version: 0.3.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -495,7 +495,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
495
495
|
version: '0'
|
496
496
|
requirements: []
|
497
497
|
rubyforge_project:
|
498
|
-
rubygems_version: 2.
|
498
|
+
rubygems_version: 2.7.7
|
499
499
|
signing_key:
|
500
500
|
specification_version: 4
|
501
501
|
summary: MiGA
|