miga-base 0.3.5.0 → 0.3.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/actions/add.rb +3 -1
- data/actions/get.rb +18 -7
- data/bin/miga +7 -3
- data/lib/miga/dataset/result.rb +2 -1
- data/lib/miga/dataset.rb +1 -1
- data/lib/miga/project/dataset.rb +7 -1
- data/lib/miga/project.rb +1 -0
- data/lib/miga/remote_dataset/download.rb +3 -3
- data/lib/miga/remote_dataset.rb +22 -10
- data/lib/miga/version.rb +1 -1
- data/scripts/aai_distances.bash +1 -1
- data/scripts/ani_distances.bash +1 -1
- data/scripts/haai_distances.bash +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 25f6a339288dbdbeda1f84f5da6b5de697d3790c0b38d138a4416ff762cae936
|
4
|
+
data.tar.gz: 60187223750e983fafd6088935a912d016bbf4acafaf79b548e206c50076cc04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60b59ccc8fc3bf9f5a584f3221c268839ba4cd41eb3a6c16911f1808944c20100758f74908797f13ada9d2475e4fda382c1e204b257cf6c6adcfd17db694efcf
|
7
|
+
data.tar.gz: e62de30119c51dc4e92e64aec954c263f84a4820f1541f014c49d93dadd8de1efb5d331c35ecbda6ccfa79c402393894b7b6ffd52edf2362b8e117fe505ff230
|
data/actions/add.rb
CHANGED
@@ -60,7 +60,9 @@ def cp_result(o, d, p, sym, res_sym, ext)
|
|
60
60
|
r_dir = MiGA::Dataset.RESULT_DIRS[res_sym]
|
61
61
|
r_path = File.expand_path("data/#{r_dir}/#{d.name}", p.path)
|
62
62
|
ext.each_index do |i|
|
63
|
-
|
63
|
+
next if o[sym][i].nil?
|
64
|
+
gz = o[sym][i] =~ /\.gz/ ? '.gz' : ''
|
65
|
+
FileUtils.cp(o[sym][i], "#{r_path}#{ext[i]}#{gz}")
|
64
66
|
end
|
65
67
|
File.open("#{r_path}.done", "w") { |f| f.print Time.now.to_s }
|
66
68
|
end
|
data/actions/get.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
require 'miga/remote_dataset'
|
7
7
|
|
8
|
-
o = {q: true, query: false, universe: :ebi, db: :embl}
|
8
|
+
o = {q: true, query: false, universe: :ebi, db: :embl, get_md: false}
|
9
9
|
OptionParser.new do |opt|
|
10
10
|
opt_banner(opt)
|
11
11
|
opt_object(opt, o, [:project, :dataset, :dataset_type])
|
@@ -36,6 +36,9 @@ OptionParser.new do |opt|
|
|
36
36
|
'Metadata as key-value pairs separated by = and delimited by comma.',
|
37
37
|
'Values are saved as strings except for booleans (true / false) or nil.'
|
38
38
|
){ |v| o[:metadata]=v }
|
39
|
+
opt.on('--get-metadata',
|
40
|
+
'Only download and update metadata for existing datasets'
|
41
|
+
){ |v| o[:get_md] = v }
|
39
42
|
opt_common(opt, o)
|
40
43
|
end.parse!
|
41
44
|
|
@@ -74,12 +77,20 @@ glob.each do |o_i|
|
|
74
77
|
$stderr.puts 'Locating remote dataset.' unless o_i[:q]
|
75
78
|
rd = MiGA::RemoteDataset.new(o_i[:ids], o_i[:db], o_i[:universe])
|
76
79
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
80
|
+
if o[:get_md]
|
81
|
+
$stderr.puts 'Updating dataset.' unless o_i[:q]
|
82
|
+
d = p.dataset(o_i[:dataset])
|
83
|
+
next if d.nil?
|
84
|
+
md = add_metadata(o_i, d).metadata.data
|
85
|
+
rd.update_metadata(d, md)
|
86
|
+
else
|
87
|
+
$stderr.puts 'Creating dataset.' unless o_i[:q]
|
88
|
+
dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
|
89
|
+
md = add_metadata(o_i, dummy_d).metadata.data
|
90
|
+
dummy_d.remove!
|
91
|
+
rd.save_to(p, o_i[:dataset], !o_i[:query], md)
|
92
|
+
p.add_dataset(o_i[:dataset])
|
93
|
+
end
|
83
94
|
|
84
95
|
$stderr.puts 'Done.' unless o_i[:q]
|
85
96
|
end
|
data/bin/miga
CHANGED
@@ -19,7 +19,7 @@ $task_desc = {
|
|
19
19
|
# Datasets
|
20
20
|
add: "Creates an empty dataset in a pre-existing MiGA project.",
|
21
21
|
get: "Downloads a dataset from public databases into a MiGA project.",
|
22
|
-
ncbi_get: "Downloads all genomes in a taxon
|
22
|
+
ncbi_get: "Downloads all genomes in a taxon from NCBI into a MiGA project.",
|
23
23
|
rm: "Removes a dataset from an MiGA project.",
|
24
24
|
find: "Finds unregistered datasets based on result files.",
|
25
25
|
ln: "Link datasets (including results) from one project to another.",
|
@@ -137,13 +137,16 @@ def opt_common(opt, o)
|
|
137
137
|
end
|
138
138
|
|
139
139
|
# OptParse flags to filter lists of datasets.
|
140
|
-
def opt_filter_datasets(opt, o, what=[:ref, :multi, :taxonomy])
|
140
|
+
def opt_filter_datasets(opt, o, what=[:ref, :multi, :active, :taxonomy])
|
141
141
|
opt.on("--[no-]ref",
|
142
142
|
"If set, uses only reference (or only non-reference) datasets."
|
143
143
|
){ |v| o[:ref]=v } if what.include? :ref
|
144
144
|
opt.on("--[no-]multi",
|
145
145
|
"If set, uses only multi-species (or only single-species) datasets."
|
146
146
|
){ |v| o[:multi]=v } if what.include? :multi
|
147
|
+
opt.on("--[no-]active",
|
148
|
+
"If set, uses only active (or inactive) datasets."
|
149
|
+
){ |v| o[:active]=v } if what.include? :active
|
147
150
|
opt.on("-t", "--taxonomy RANK:TAXON", "Filter by taxonomy."
|
148
151
|
){ |v| o[:taxonomy]=MiGA::Taxonomy.new v } if what.include? :taxonomy
|
149
152
|
opt.on("-k", "--key INTEGER",
|
@@ -163,7 +166,8 @@ end
|
|
163
166
|
|
164
167
|
# Filters datasets by keys set in +opt_filter_datasets+.
|
165
168
|
def filter_datasets!(ds, o)
|
166
|
-
ds.select!{|d| d.is_ref? == o[:ref] } unless o[:ref].nil?
|
169
|
+
ds.select! { |d| d.is_ref? == o[:ref] } unless o[:ref].nil?
|
170
|
+
ds.select! { |d| d.is_active? == o[:active] } unless o[:active].nil?
|
167
171
|
ds.select! do |d|
|
168
172
|
o[:multi] ? d.is_multi? : d.is_nonmulti?
|
169
173
|
end unless o[:multi].nil?
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -122,6 +122,7 @@ module MiGA::Dataset::Result
|
|
122
122
|
# the project as reference datasets.
|
123
123
|
def cleanup_distances!
|
124
124
|
r = get_result(:distances)
|
125
|
+
ref = project.datasets.select(&:is_ref?).select(&:is_active?).map(&:name)
|
125
126
|
return if r.nil?
|
126
127
|
[:haai_db, :aai_db, :ani_db].each do |db_type|
|
127
128
|
db = r.file_path(db_type)
|
@@ -130,7 +131,7 @@ module MiGA::Dataset::Result
|
|
130
131
|
table = db_type[-6..-4]
|
131
132
|
val = sqlite_db.execute "select seq2 from #{table}"
|
132
133
|
next if val.empty?
|
133
|
-
(val.map
|
134
|
+
(val.map(&:first) - ref).each do |extra|
|
134
135
|
sqlite_db.execute "delete from #{table} where seq2=?", extra
|
135
136
|
end
|
136
137
|
end
|
data/lib/miga/dataset.rb
CHANGED
data/lib/miga/project/dataset.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Helper module including specific functions handle datasets.
|
6
6
|
module MiGA::Project::Dataset
|
7
7
|
|
8
|
-
|
9
8
|
##
|
10
9
|
# Returns Array of MiGA::Dataset.
|
11
10
|
def datasets
|
@@ -17,6 +16,13 @@ module MiGA::Project::Dataset
|
|
17
16
|
def dataset_names
|
18
17
|
metadata[:datasets]
|
19
18
|
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Returns Hash of Strings => true. Similar to +dataset_names+ but as
|
22
|
+
# Hash for efficiency.
|
23
|
+
def dataset_names_hash
|
24
|
+
@dataset_names_hash ||= Hash[dataset_names.map{ |i| [i,true] }]
|
25
|
+
end
|
20
26
|
|
21
27
|
##
|
22
28
|
# Returns MiGA::Dataset.
|
data/lib/miga/project.rb
CHANGED
@@ -63,6 +63,7 @@ class MiGA::Project < MiGA::MiGA
|
|
63
63
|
# (Re-)load project data and metadata.
|
64
64
|
def load
|
65
65
|
@datasets = {}
|
66
|
+
@dataset_names_hash = nil
|
66
67
|
@metadata = MiGA::Metadata.load "#{path}/miga.project.json"
|
67
68
|
raise "Couldn't find project metadata at #{path}" if metadata.nil?
|
68
69
|
end
|
@@ -50,10 +50,10 @@ class MiGA::RemoteDataset
|
|
50
50
|
doc = ''
|
51
51
|
@timeout_try = 0
|
52
52
|
begin
|
53
|
-
open(url,
|
54
|
-
rescue
|
53
|
+
open(url, read_timeout: 600) { |f| doc = f.read }
|
54
|
+
rescue => e
|
55
55
|
@timeout_try += 1
|
56
|
-
raise
|
56
|
+
raise e if @timeout_try >= 3
|
57
57
|
retry
|
58
58
|
end
|
59
59
|
doc
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -17,6 +17,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
17
17
|
attr_reader :db
|
18
18
|
# Array of IDs of the entries composing the dataset.
|
19
19
|
attr_reader :ids
|
20
|
+
# Internal metadata hash
|
21
|
+
attr_reader :metadata
|
20
22
|
|
21
23
|
##
|
22
24
|
# Initialize MiGA::RemoteDataset with +ids+ in database +db+ from +universe+.
|
@@ -25,6 +27,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
25
27
|
@ids = (ids.is_a?(Array) ? ids : [ids])
|
26
28
|
@db = db.to_sym
|
27
29
|
@universe = universe.to_sym
|
30
|
+
@metadata = {}
|
31
|
+
@metadata[:"#{universe}_#{db}"] = ids.join(",")
|
28
32
|
@@UNIVERSE.keys.include?(@universe) or
|
29
33
|
raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
|
30
34
|
@@UNIVERSE[@universe][:dbs].include?(@db) or
|
@@ -37,15 +41,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
37
41
|
|
38
42
|
##
|
39
43
|
# Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
|
40
|
-
# indicates if it should be a reference dataset, and contains +
|
41
|
-
def save_to(project, name = nil, is_ref = true,
|
44
|
+
# indicates if it should be a reference dataset, and contains +metadata_def+.
|
45
|
+
def save_to(project, name = nil, is_ref = true, metadata_def = {})
|
42
46
|
name ||= ids.join('_').miga_name
|
43
47
|
project = MiGA::Project.new(project) if project.is_a? String
|
44
48
|
MiGA::Dataset.exist?(project, name) and
|
45
49
|
raise "Dataset #{name} exists in the project, aborting..."
|
46
|
-
metadata = get_metadata(
|
50
|
+
@metadata = get_metadata(metadata_def)
|
47
51
|
udb = @@UNIVERSE[universe][:dbs][db]
|
48
|
-
metadata["#{universe}_#{db}"] = ids.join(',')
|
52
|
+
@metadata["#{universe}_#{db}"] = ids.join(',')
|
49
53
|
respond_to?("save_#{udb[:stage]}_to", true) or
|
50
54
|
raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
|
51
55
|
send "save_#{udb[:stage]}_to", project, name, udb
|
@@ -70,15 +74,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
70
74
|
|
71
75
|
##
|
72
76
|
# Get metadata from the remote location.
|
73
|
-
def get_metadata(
|
77
|
+
def get_metadata(metadata_def = {})
|
78
|
+
metadata_def.each { |k,v| @metadata[k] = v }
|
74
79
|
case universe
|
75
|
-
when :ebi, :ncbi
|
80
|
+
when :ebi, :ncbi, :web
|
76
81
|
# Get taxonomy
|
77
|
-
metadata[:tax] = get_ncbi_taxonomy
|
82
|
+
@metadata[:tax] = get_ncbi_taxonomy
|
78
83
|
end
|
79
|
-
metadata
|
80
|
-
metadata = get_type_status(metadata)
|
81
|
-
metadata
|
84
|
+
@metadata = get_type_status(metadata)
|
82
85
|
end
|
83
86
|
|
84
87
|
##
|
@@ -118,6 +121,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
118
121
|
|
119
122
|
private
|
120
123
|
|
124
|
+
def get_ncbi_taxid_from_web
|
125
|
+
return nil unless metadata[:ncbi_asm]
|
126
|
+
base_url = 'https://www.ncbi.nlm.nih.gov/assembly'
|
127
|
+
doc = self.class.download_url(
|
128
|
+
"#{base_url}/#{metadata[:ncbi_asm]}?report=xml&format=text")
|
129
|
+
taxid = doc.scan(%r{<Taxid>(\S+)</Taxid>}).first
|
130
|
+
taxid.nil? ? taxid : taxid.first
|
131
|
+
end
|
132
|
+
|
121
133
|
def get_ncbi_taxid_from_ncbi
|
122
134
|
doc = self.class.download(universe, db, ids, :gb).split(/\n/)
|
123
135
|
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3, 5,
|
13
|
+
VERSION = [0.3, 5, 1]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
data/scripts/aai_distances.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/02.aai"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
echo -n "" > miga-project.log
|
15
|
-
DS=$(miga
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
16
16
|
|
17
17
|
# Extract values
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
data/scripts/ani_distances.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/03.ani"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
echo -n "" > miga-project.log
|
15
|
-
DS=$(miga
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
16
16
|
|
17
17
|
# Extract values
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
data/scripts/haai_distances.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$PROJECT/data/09.distances/01.haai"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
echo -n "" > miga-project.log
|
15
|
-
DS=$(miga
|
15
|
+
DS=$(miga ls -P "$PROJECT" --ref --no-multi --active)
|
16
16
|
|
17
17
|
# Extract values
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t" >miga-project.txt
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
utils/enveomics/Scripts/lib/../../enveomics.R
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.5.
|
4
|
+
version: 0.3.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-11-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -495,7 +495,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
495
495
|
version: '0'
|
496
496
|
requirements: []
|
497
497
|
rubyforge_project:
|
498
|
-
rubygems_version: 2.
|
498
|
+
rubygems_version: 2.7.7
|
499
499
|
signing_key:
|
500
500
|
specification_version: 4
|
501
501
|
summary: MiGA
|