miga-base 0.3.4.2 → 0.3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/get.rb +32 -29
- data/actions/init.rb +1 -1
- data/actions/ncbi_get.rb +2 -5
- data/lib/miga/daemon.rb +1 -2
- data/lib/miga/project/result.rb +3 -1
- data/lib/miga/remote_dataset.rb +8 -8
- data/lib/miga/remote_dataset/base.rb +1 -2
- data/lib/miga/remote_dataset/download.rb +12 -14
- data/lib/miga/version.rb +1 -1
- data/scripts/init.bash +1 -1
- data/utils/representatives.rb +36 -0
- metadata +3 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7f9c242373a48634effe2905d33ddd49c2c1bce
|
4
|
+
data.tar.gz: cbdf5d91e364987718b6f0d29b57a0bab372afdc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d790bb6056fc556ae86915093d6d16973222009e3249f1d33351ae9e35ab11614ea9163722a63592e2e1e95a0ea1345a537f98f8ffa675ef421ce6dd1f077642
|
7
|
+
data.tar.gz: d435fcda801d87f51f1dcc8d090812c7fbae12e74dfbbea5470ed573a993ad61edcf6855376f2ef3ea9acb9900407072b827852716c24ab64c4252e87a982d95
|
data/actions/get.rb
CHANGED
@@ -3,35 +3,39 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
-
require
|
6
|
+
require 'miga/remote_dataset'
|
7
7
|
|
8
|
-
o = {q:true, query:false, universe: :ebi, db: :embl}
|
8
|
+
o = {q: true, query: false, universe: :ebi, db: :embl}
|
9
9
|
OptionParser.new do |opt|
|
10
10
|
opt_banner(opt)
|
11
11
|
opt_object(opt, o, [:project, :dataset, :dataset_type])
|
12
|
-
opt.on(
|
13
|
-
|
12
|
+
opt.on('-I', '--ids ID1,ID2,...',
|
13
|
+
'(Mandatory unless -F) IDs in the remote database separated by commas.'
|
14
14
|
){ |v| o[:ids]=v }
|
15
|
-
opt.on(
|
15
|
+
opt.on('-U', '--universe STRING',
|
16
16
|
"Universe where the remote database lives. By default: #{o[:universe]}."
|
17
17
|
){ |v| o[:universe]=v.to_sym }
|
18
|
-
opt.on(
|
18
|
+
opt.on('--db STRING',
|
19
19
|
"Name of the remote database. By default: #{o[:db]}."
|
20
20
|
){ |v| o[:db]=v.to_sym }
|
21
|
-
opt.on(
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
opt.on('-F', '--file PATH',
|
22
|
+
'Tab-delimited file (with header) listing the datasets to download.',
|
23
|
+
'The long form of all the options are supported as header (without the --)',
|
24
|
+
'including dataset, ids, universe, and db. For query use true/false values.'
|
25
25
|
){ |v| o[:file] = v }
|
26
|
-
opt.on(
|
27
|
-
|
26
|
+
opt.on('-q', '--query',
|
27
|
+
'If set, the dataset is registered as a query, not a reference dataset.'
|
28
28
|
){ |v| o[:query]=v }
|
29
|
-
opt.on(
|
30
|
-
|
31
|
-
opt.on(
|
32
|
-
|
33
|
-
opt.on(
|
34
|
-
|
29
|
+
opt.on('--ignore-dup',
|
30
|
+
'If set, ignores datasets that already exist.'){ |v| o[:ignore_dup]=v }
|
31
|
+
opt.on('-d', '--description STRING',
|
32
|
+
'Description of the dataset.'){ |v| o[:description]=v }
|
33
|
+
opt.on('-c', '--comments STRING',
|
34
|
+
'Comments on the dataset.'){ |v| o[:comments]=v }
|
35
|
+
opt.on('-m', '--metadata STRING',
|
36
|
+
'Metadata as key-value pairs separated by = and delimited by comma.',
|
37
|
+
'Values are saved as strings except for booleans (true / false) or nil.'
|
38
|
+
){ |v| o[:metadata]=v }
|
35
39
|
opt_common(opt, o)
|
36
40
|
end.parse!
|
37
41
|
|
@@ -40,7 +44,7 @@ end.parse!
|
|
40
44
|
glob = [o]
|
41
45
|
unless o[:file].nil?
|
42
46
|
glob = []
|
43
|
-
fh = File.open(o[:file],
|
47
|
+
fh = File.open(o[:file], 'r')
|
44
48
|
h = nil
|
45
49
|
fh.each do |ln|
|
46
50
|
r = ln.chomp.split(/\t/)
|
@@ -49,7 +53,7 @@ unless o[:file].nil?
|
|
49
53
|
else
|
50
54
|
glob << o.dup
|
51
55
|
h.each_index do |i|
|
52
|
-
glob[glob.size-1][h[i].to_sym] = h[i]==
|
56
|
+
glob[glob.size-1][h[i].to_sym] = h[i]=='query' ? r[i]=='true' :
|
53
57
|
%w[type universe db].include?(h[i]) ? r[i].to_sym : r[i]
|
54
58
|
end
|
55
59
|
end
|
@@ -58,25 +62,24 @@ unless o[:file].nil?
|
|
58
62
|
end
|
59
63
|
|
60
64
|
glob.each do |o_i|
|
61
|
-
opt_require(o_i, project:
|
65
|
+
opt_require(o_i, project: '-P', dataset: '-D', ids: '-I')
|
62
66
|
|
63
67
|
$stderr.puts "Dataset: #{o_i[:dataset]}" unless o_i[:q]
|
64
|
-
$stderr.puts
|
68
|
+
$stderr.puts 'Loading project.' unless o_i[:q]
|
65
69
|
p = MiGA::Project.load(o_i[:project])
|
66
70
|
raise "Impossible to load project: #{o_i[:project]}" if p.nil?
|
67
71
|
|
68
72
|
next if o_i[:ignore_dup] and not p.dataset(o_i[:dataset]).nil?
|
69
73
|
|
70
|
-
$stderr.puts
|
74
|
+
$stderr.puts 'Locating remote dataset.' unless o_i[:q]
|
71
75
|
rd = MiGA::RemoteDataset.new(o_i[:ids], o_i[:db], o_i[:universe])
|
72
76
|
|
73
|
-
$stderr.puts
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
end
|
77
|
+
$stderr.puts 'Creating dataset.' unless o_i[:q]
|
78
|
+
dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
|
79
|
+
md = add_metadata(o_i, dummy_d).metadata.data
|
80
|
+
dummy_d.remove!
|
78
81
|
rd.save_to(p, o_i[:dataset], !o_i[:query], md)
|
79
82
|
p.add_dataset(o_i[:dataset])
|
80
83
|
|
81
|
-
$stderr.puts
|
84
|
+
$stderr.puts 'Done.' unless o_i[:q]
|
82
85
|
end
|
data/actions/init.rb
CHANGED
@@ -172,7 +172,7 @@ $stderr.puts ""
|
|
172
172
|
|
173
173
|
# Check for Ruby gems
|
174
174
|
$stderr.puts "Looking for Ruby gems:"
|
175
|
-
%w(
|
175
|
+
%w(sqlite3 daemons json).each do |pkg|
|
176
176
|
$stderr.print "Testing #{pkg}... "
|
177
177
|
`#{paths["ruby"].shellescape} -r "#{pkg}" -e "" 2>/dev/null`
|
178
178
|
if $?.success?
|
data/actions/ncbi_get.rb
CHANGED
@@ -77,11 +77,7 @@ def get_list(taxon, status)
|
|
77
77
|
status: status }
|
78
78
|
end
|
79
79
|
url = url_base + URI.encode_www_form(url_param)
|
80
|
-
|
81
|
-
unless response.code == 200
|
82
|
-
raise "Unable to reach NCBI, error code #{response.code}."
|
83
|
-
end
|
84
|
-
response.to_s
|
80
|
+
MiGA::RemoteDataset.download_url url
|
85
81
|
end
|
86
82
|
|
87
83
|
# Download IDs with reference status
|
@@ -135,6 +131,7 @@ if o[:scaffold] or o[:contig]
|
|
135
131
|
map{ |i| "#{i}/#{File.basename(i)}_genomic.fna.gz" }
|
136
132
|
next if ids.empty?
|
137
133
|
n = "#{r[0]}_#{asm}".miga_name
|
134
|
+
asm.gsub!(/\(.*\)/, '')
|
138
135
|
ds[n] = {ids: ids, md: {type: :genome, ncbi_asm: asm},
|
139
136
|
db: :assembly_gz, universe: :web}
|
140
137
|
end
|
data/lib/miga/daemon.rb
CHANGED
@@ -19,9 +19,8 @@ class MiGA::Daemon < MiGA::MiGA
|
|
19
19
|
DateTime.parse(File.read(f))
|
20
20
|
end
|
21
21
|
|
22
|
-
#
|
22
|
+
# Array of all spawned daemons.
|
23
23
|
$_MIGA_DAEMON_LAIR = []
|
24
|
-
END { $_MIGA_DAEMON_LAIR.each(&:terminate) }
|
25
24
|
|
26
25
|
# MiGA::Project in which the daemon is running.
|
27
26
|
attr_reader :project
|
data/lib/miga/project/result.rb
CHANGED
@@ -34,7 +34,9 @@ module MiGA::Project::Result
|
|
34
34
|
def add_result(name, save=true, opts={})
|
35
35
|
return nil if @@RESULT_DIRS[name].nil?
|
36
36
|
base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
|
37
|
-
|
37
|
+
if opts[:force]
|
38
|
+
FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
|
39
|
+
else
|
38
40
|
r_pre = MiGA::Result.load("#{base}.json")
|
39
41
|
return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
|
40
42
|
end
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -103,15 +103,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
103
103
|
##
|
104
104
|
# Get NCBI taxonomy as MiGA::Taxonomy.
|
105
105
|
def get_ncbi_taxonomy
|
106
|
-
lineage = {}
|
107
106
|
tax_id = get_ncbi_taxid
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
rank =
|
113
|
-
|
114
|
-
|
107
|
+
lineage = {}
|
108
|
+
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
109
|
+
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
110
|
+
name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
|
111
|
+
rank = i.scan(%r{<Rank>(.*)</Rank>}).first.to_a.first
|
112
|
+
rank = nil if rank == 'no rank' or rank.empty?
|
113
|
+
rank = 'dataset' if lineage.empty? and rank.nil?
|
114
|
+
lineage[rank] = name unless rank.nil? or rank.nil?
|
115
115
|
end
|
116
116
|
MiGA::Taxonomy.new(lineage)
|
117
117
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
|
2
|
-
require 'rest-client'
|
3
2
|
require 'open-uri'
|
4
3
|
require 'cgi'
|
5
4
|
|
@@ -56,7 +55,7 @@ module MiGA::RemoteDataset::Base
|
|
56
55
|
biosample: {stage: :metadata, map_to: [:assembly], format: :json}
|
57
56
|
},
|
58
57
|
url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
|
59
|
-
method: :
|
58
|
+
method: :net,
|
60
59
|
map_to_universe: :ncbi
|
61
60
|
}
|
62
61
|
}
|
@@ -33,30 +33,28 @@ class MiGA::RemoteDataset
|
|
33
33
|
# using +extra+. Returns the doc as String.
|
34
34
|
def download_rest(universe, db, ids, format, extra = [])
|
35
35
|
u = @@UNIVERSE[universe]
|
36
|
-
url
|
37
|
-
|
38
|
-
unless response.code == 200
|
39
|
-
raise "Unable to reach #{universe} client, error code #{response.code}."
|
40
|
-
end
|
41
|
-
response.to_s
|
36
|
+
url = sprintf(u[:url], db, ids.join(","), format, *extra)
|
37
|
+
download_url url
|
42
38
|
end
|
43
39
|
|
44
40
|
##
|
45
41
|
# Download data using a GET request from the +universe+ in the database +db+
|
46
42
|
# with IDs +ids+ and in +format+. Additional URL parameters can be passed
|
47
43
|
# using +extra+. Returns the doc as String.
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
44
|
+
alias download_net download_rest
|
45
|
+
|
46
|
+
##
|
47
|
+
# Download the given +url+ and return the result regardless of response
|
48
|
+
# code. Attempts download up to three times before raising Net::ReadTimeout.
|
49
|
+
def download_url(url)
|
50
|
+
doc = ''
|
52
51
|
@timeout_try = 0
|
53
52
|
begin
|
54
|
-
open(url) { |f| doc = f.read }
|
53
|
+
open(url, open_timeout: 600, read_timeout: 600) { |f| doc = f.read }
|
55
54
|
rescue Net::ReadTimeout
|
56
55
|
@timeout_try += 1
|
57
|
-
if @timeout_try
|
58
|
-
|
59
|
-
end
|
56
|
+
raise Net::ReadTimeout if @timeout_try >= 3
|
57
|
+
retry
|
60
58
|
end
|
61
59
|
doc
|
62
60
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3,
|
13
|
+
VERSION = [0.3, 5, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
data/scripts/init.bash
CHANGED
@@ -121,7 +121,7 @@ done
|
|
121
121
|
# Check for ruby gems
|
122
122
|
echo "
|
123
123
|
Looking for Ruby gems:" >&2
|
124
|
-
GEMS="
|
124
|
+
GEMS="sqlite3 daemons json"
|
125
125
|
for gem in $GEMS ; do
|
126
126
|
if ! check_gem "$gem" ; then
|
127
127
|
echo "+ Installing $gem (user-only)" >&2
|
@@ -0,0 +1,36 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$:.push File.expand_path('../../lib', __FILE__)
|
4
|
+
require 'miga'
|
5
|
+
|
6
|
+
proj_path = ARGV.shift or raise "Usage: #{$0} path/to/project"
|
7
|
+
|
8
|
+
# Load MiGA object
|
9
|
+
p = MiGA::Project.load(proj_path) or raise "Cannot load project: #{proj_path}"
|
10
|
+
pr = p.result(:clade_finding) or raise "Unavailable result: clade_finding"
|
11
|
+
pf = pr.file_path(:clades_ani95) or raise "Unavailable result file: proposal"
|
12
|
+
|
13
|
+
# Read ANIspp
|
14
|
+
ani_spp = []
|
15
|
+
File.open(pf, 'r') do |fh|
|
16
|
+
fh.each_line do |ln|
|
17
|
+
next if $.==1
|
18
|
+
ani_spp << ln.chomp.split(',')
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Find the best candidate
|
23
|
+
ani_spp.each_with_index do |datasets, i|
|
24
|
+
best = nil
|
25
|
+
datasets.each do |ds_name|
|
26
|
+
d = p.dataset(ds_name) or next
|
27
|
+
dr = d.result(:essential_genes) or next
|
28
|
+
q = dr[:stats][:quality] or next
|
29
|
+
if best.nil? or q > best[:q]
|
30
|
+
best = {d: d, q: q}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
raise "Unavailable statistics for any of:\n#{datasets}\n" if best.nil?
|
34
|
+
puts "ANIsp_#{i+1}\t#{best[:d].name}"
|
35
|
+
end
|
36
|
+
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: rest-client
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.7'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - "~>"
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '1.7'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: daemons
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -473,6 +459,7 @@ files:
|
|
473
459
|
- utils/mytaxa_scan.rb
|
474
460
|
- utils/plot-taxdist.R
|
475
461
|
- utils/ref-tree.R
|
462
|
+
- utils/representatives.rb
|
476
463
|
- utils/requirements.txt
|
477
464
|
- utils/subclade/base.rb
|
478
465
|
- utils/subclade/pipeline.rb
|