miga-base 0.3.4.2 → 0.3.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0bb2e8453ba42c927af174b0212b8ab8b5af9281
4
- data.tar.gz: 00750cc643ff8eb2a62f5fc83525984b6fe7b4ab
3
+ metadata.gz: c7f9c242373a48634effe2905d33ddd49c2c1bce
4
+ data.tar.gz: cbdf5d91e364987718b6f0d29b57a0bab372afdc
5
5
  SHA512:
6
- metadata.gz: 2fd6479b87d09a021884598d6ad5bb43c849ba0809821de0b2a14eb2a6670bbf5331eb634fbee54c9def0c3f1192c4deef8b4576b0dc6721f193b90e32bf366c
7
- data.tar.gz: 65cb9f2ac275ed29c34a18db17543e29a3f252374cef4ddc7aedbc9da8210700669b3412b70adb190b3693717149140e39d7d2a89444d5871d7ed9af02f20e74
6
+ metadata.gz: d790bb6056fc556ae86915093d6d16973222009e3249f1d33351ae9e35ab11614ea9163722a63592e2e1e95a0ea1345a537f98f8ffa675ef421ce6dd1f077642
7
+ data.tar.gz: d435fcda801d87f51f1dcc8d090812c7fbae12e74dfbbea5470ed573a993ad61edcf6855376f2ef3ea9acb9900407072b827852716c24ab64c4252e87a982d95
data/actions/get.rb CHANGED
@@ -3,35 +3,39 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- require "miga/remote_dataset"
6
+ require 'miga/remote_dataset'
7
7
 
8
- o = {q:true, query:false, universe: :ebi, db: :embl}
8
+ o = {q: true, query: false, universe: :ebi, db: :embl}
9
9
  OptionParser.new do |opt|
10
10
  opt_banner(opt)
11
11
  opt_object(opt, o, [:project, :dataset, :dataset_type])
12
- opt.on("-I", "--ids ID1,ID2,...",
13
- "(Mandatory unless -F) IDs in the remote database separated by commas."
12
+ opt.on('-I', '--ids ID1,ID2,...',
13
+ '(Mandatory unless -F) IDs in the remote database separated by commas.'
14
14
  ){ |v| o[:ids]=v }
15
- opt.on("-U", "--universe STRING",
15
+ opt.on('-U', '--universe STRING',
16
16
  "Universe where the remote database lives. By default: #{o[:universe]}."
17
17
  ){ |v| o[:universe]=v.to_sym }
18
- opt.on("--db STRING",
18
+ opt.on('--db STRING',
19
19
  "Name of the remote database. By default: #{o[:db]}."
20
20
  ){ |v| o[:db]=v.to_sym }
21
- opt.on("-F", "--file PATH",
22
- "Tab-delimited file (with header) listing the datasets to download.",
23
- "The long form of all the options are supported as header (without the --)",
24
- "including dataset, ids, universe, and db. For query use true/false values."
21
+ opt.on('-F', '--file PATH',
22
+ 'Tab-delimited file (with header) listing the datasets to download.',
23
+ 'The long form of all the options are supported as header (without the --)',
24
+ 'including dataset, ids, universe, and db. For query use true/false values.'
25
25
  ){ |v| o[:file] = v }
26
- opt.on("-q", "--query",
27
- "If set, the dataset is registered as a query, not a reference dataset."
26
+ opt.on('-q', '--query',
27
+ 'If set, the dataset is registered as a query, not a reference dataset.'
28
28
  ){ |v| o[:query]=v }
29
- opt.on("--ignore-dup",
30
- "If set, ignores datasets that already exist."){ |v| o[:ignore_dup]=v }
31
- opt.on("-d", "--description STRING",
32
- "Description of the dataset."){ |v| o[:description]=v }
33
- opt.on("-c", "--comments STRING",
34
- "Comments on the dataset."){ |v| o[:comments]=v }
29
+ opt.on('--ignore-dup',
30
+ 'If set, ignores datasets that already exist.'){ |v| o[:ignore_dup]=v }
31
+ opt.on('-d', '--description STRING',
32
+ 'Description of the dataset.'){ |v| o[:description]=v }
33
+ opt.on('-c', '--comments STRING',
34
+ 'Comments on the dataset.'){ |v| o[:comments]=v }
35
+ opt.on('-m', '--metadata STRING',
36
+ 'Metadata as key-value pairs separated by = and delimited by comma.',
37
+ 'Values are saved as strings except for booleans (true / false) or nil.'
38
+ ){ |v| o[:metadata]=v }
35
39
  opt_common(opt, o)
36
40
  end.parse!
37
41
 
@@ -40,7 +44,7 @@ end.parse!
40
44
  glob = [o]
41
45
  unless o[:file].nil?
42
46
  glob = []
43
- fh = File.open(o[:file], "r")
47
+ fh = File.open(o[:file], 'r')
44
48
  h = nil
45
49
  fh.each do |ln|
46
50
  r = ln.chomp.split(/\t/)
@@ -49,7 +53,7 @@ unless o[:file].nil?
49
53
  else
50
54
  glob << o.dup
51
55
  h.each_index do |i|
52
- glob[glob.size-1][h[i].to_sym] = h[i]=="query" ? r[i]=="true" :
56
+ glob[glob.size-1][h[i].to_sym] = h[i]=='query' ? r[i]=='true' :
53
57
  %w[type universe db].include?(h[i]) ? r[i].to_sym : r[i]
54
58
  end
55
59
  end
@@ -58,25 +62,24 @@ unless o[:file].nil?
58
62
  end
59
63
 
60
64
  glob.each do |o_i|
61
- opt_require(o_i, project:"-P", dataset:"-D", ids:"-I")
65
+ opt_require(o_i, project: '-P', dataset: '-D', ids: '-I')
62
66
 
63
67
  $stderr.puts "Dataset: #{o_i[:dataset]}" unless o_i[:q]
64
- $stderr.puts "Loading project." unless o_i[:q]
68
+ $stderr.puts 'Loading project.' unless o_i[:q]
65
69
  p = MiGA::Project.load(o_i[:project])
66
70
  raise "Impossible to load project: #{o_i[:project]}" if p.nil?
67
71
 
68
72
  next if o_i[:ignore_dup] and not p.dataset(o_i[:dataset]).nil?
69
73
 
70
- $stderr.puts "Locating remote dataset." unless o_i[:q]
74
+ $stderr.puts 'Locating remote dataset.' unless o_i[:q]
71
75
  rd = MiGA::RemoteDataset.new(o_i[:ids], o_i[:db], o_i[:universe])
72
76
 
73
- $stderr.puts "Creating dataset." unless o_i[:q]
74
- md = {}
75
- [:type, :description, :user, :comments].each do |k|
76
- md[k]=o_i[k] unless o_i[k].nil?
77
- end
77
+ $stderr.puts 'Creating dataset.' unless o_i[:q]
78
+ dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
79
+ md = add_metadata(o_i, dummy_d).metadata.data
80
+ dummy_d.remove!
78
81
  rd.save_to(p, o_i[:dataset], !o_i[:query], md)
79
82
  p.add_dataset(o_i[:dataset])
80
83
 
81
- $stderr.puts "Done." unless o_i[:q]
84
+ $stderr.puts 'Done.' unless o_i[:q]
82
85
  end
data/actions/init.rb CHANGED
@@ -172,7 +172,7 @@ $stderr.puts ""
172
172
 
173
173
  # Check for Ruby gems
174
174
  $stderr.puts "Looking for Ruby gems:"
175
- %w(rest-client sqlite3 daemons json).each do |pkg|
175
+ %w(sqlite3 daemons json).each do |pkg|
176
176
  $stderr.print "Testing #{pkg}... "
177
177
  `#{paths["ruby"].shellescape} -r "#{pkg}" -e "" 2>/dev/null`
178
178
  if $?.success?
data/actions/ncbi_get.rb CHANGED
@@ -77,11 +77,7 @@ def get_list(taxon, status)
77
77
  status: status }
78
78
  end
79
79
  url = url_base + URI.encode_www_form(url_param)
80
- response = RestClient::Request.execute(method: :get, url:url, timeout:600)
81
- unless response.code == 200
82
- raise "Unable to reach NCBI, error code #{response.code}."
83
- end
84
- response.to_s
80
+ MiGA::RemoteDataset.download_url url
85
81
  end
86
82
 
87
83
  # Download IDs with reference status
@@ -135,6 +131,7 @@ if o[:scaffold] or o[:contig]
135
131
  map{ |i| "#{i}/#{File.basename(i)}_genomic.fna.gz" }
136
132
  next if ids.empty?
137
133
  n = "#{r[0]}_#{asm}".miga_name
134
+ asm.gsub!(/\(.*\)/, '')
138
135
  ds[n] = {ids: ids, md: {type: :genome, ncbi_asm: asm},
139
136
  db: :assembly_gz, universe: :web}
140
137
  end
data/lib/miga/daemon.rb CHANGED
@@ -19,9 +19,8 @@ class MiGA::Daemon < MiGA::MiGA
19
19
  DateTime.parse(File.read(f))
20
20
  end
21
21
 
22
- # Shutdown all spawned daemons before exit.
22
+ # Array of all spawned daemons.
23
23
  $_MIGA_DAEMON_LAIR = []
24
- END { $_MIGA_DAEMON_LAIR.each(&:terminate) }
25
24
 
26
25
  # MiGA::Project in which the daemon is running.
27
26
  attr_reader :project
@@ -34,7 +34,9 @@ module MiGA::Project::Result
34
34
  def add_result(name, save=true, opts={})
35
35
  return nil if @@RESULT_DIRS[name].nil?
36
36
  base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
37
- unless opts[:force]
37
+ if opts[:force]
38
+ FileUtils.rm("#{base}.json") if File.exist?("#{base}.json")
39
+ else
38
40
  r_pre = MiGA::Result.load("#{base}.json")
39
41
  return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
40
42
  end
@@ -103,15 +103,15 @@ class MiGA::RemoteDataset < MiGA::MiGA
103
103
  ##
104
104
  # Get NCBI taxonomy as MiGA::Taxonomy.
105
105
  def get_ncbi_taxonomy
106
- lineage = {}
107
106
  tax_id = get_ncbi_taxid
108
- until [nil, '0', '1'].include? tax_id
109
- doc = MiGA::RemoteDataset.download(:ebi, :taxonomy, tax_id, '')
110
- name = doc.scan(/SCIENTIFIC NAME\s+:\s+(.+)/).first.to_a.first
111
- rank = doc.scan(/RANK\s+:\s+(.+)/).first.to_a.first
112
- rank = 'dataset' if lineage.empty? and rank == 'no rank'
113
- lineage[rank] = name unless rank.nil?
114
- tax_id = doc.scan(/PARENT ID\s+:\s+(.+)/).first.to_a.first
107
+ lineage = {}
108
+ doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
109
+ doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
110
+ name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
111
+ rank = i.scan(%r{<Rank>(.*)</Rank>}).first.to_a.first
112
+ rank = nil if rank == 'no rank' or rank.empty?
113
+ rank = 'dataset' if lineage.empty? and rank.nil?
114
+ lineage[rank] = name unless rank.nil? or rank.nil?
115
115
  end
116
116
  MiGA::Taxonomy.new(lineage)
117
117
  end
@@ -1,5 +1,4 @@
1
1
 
2
- require 'rest-client'
3
2
  require 'open-uri'
4
3
  require 'cgi'
5
4
 
@@ -56,7 +55,7 @@ module MiGA::RemoteDataset::Base
56
55
  biosample: {stage: :metadata, map_to: [:assembly], format: :json}
57
56
  },
58
57
  url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
59
- method: :rest,
58
+ method: :net,
60
59
  map_to_universe: :ncbi
61
60
  }
62
61
  }
@@ -33,30 +33,28 @@ class MiGA::RemoteDataset
33
33
  # using +extra+. Returns the doc as String.
34
34
  def download_rest(universe, db, ids, format, extra = [])
35
35
  u = @@UNIVERSE[universe]
36
- url ||= sprintf(u[:url], db, ids.join(","), format, *extra)
37
- response = RestClient::Request.execute(method: :get, url:url, timeout:600)
38
- unless response.code == 200
39
- raise "Unable to reach #{universe} client, error code #{response.code}."
40
- end
41
- response.to_s
36
+ url = sprintf(u[:url], db, ids.join(","), format, *extra)
37
+ download_url url
42
38
  end
43
39
 
44
40
  ##
45
41
  # Download data using a GET request from the +universe+ in the database +db+
46
42
  # with IDs +ids+ and in +format+. Additional URL parameters can be passed
47
43
  # using +extra+. Returns the doc as String.
48
- def download_net(universe, db, ids, format, extra = [])
49
- u = @@UNIVERSE[universe]
50
- url = sprintf(u[:url], db, ids.join(","), format, *extra)
51
- doc = ""
44
+ alias download_net download_rest
45
+
46
+ ##
47
+ # Download the given +url+ and return the result regardless of response
48
+ # code. Attempts download up to three times before raising Net::ReadTimeout.
49
+ def download_url(url)
50
+ doc = ''
52
51
  @timeout_try = 0
53
52
  begin
54
- open(url) { |f| doc = f.read }
53
+ open(url, open_timeout: 600, read_timeout: 600) { |f| doc = f.read }
55
54
  rescue Net::ReadTimeout
56
55
  @timeout_try += 1
57
- if @timeout_try > 3 ; raise Net::ReadTimeout
58
- else ; retry
59
- end
56
+ raise Net::ReadTimeout if @timeout_try >= 3
57
+ retry
60
58
  end
61
59
  doc
62
60
  end
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 4, 2]
13
+ VERSION = [0.3, 5, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
data/scripts/init.bash CHANGED
@@ -121,7 +121,7 @@ done
121
121
  # Check for ruby gems
122
122
  echo "
123
123
  Looking for Ruby gems:" >&2
124
- GEMS="rest-client sqlite3 daemons json"
124
+ GEMS="sqlite3 daemons json"
125
125
  for gem in $GEMS ; do
126
126
  if ! check_gem "$gem" ; then
127
127
  echo "+ Installing $gem (user-only)" >&2
@@ -0,0 +1,36 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $:.push File.expand_path('../../lib', __FILE__)
4
+ require 'miga'
5
+
6
+ proj_path = ARGV.shift or raise "Usage: #{$0} path/to/project"
7
+
8
+ # Load MiGA object
9
+ p = MiGA::Project.load(proj_path) or raise "Cannot load project: #{proj_path}"
10
+ pr = p.result(:clade_finding) or raise "Unavailable result: clade_finding"
11
+ pf = pr.file_path(:clades_ani95) or raise "Unavailable result file: proposal"
12
+
13
+ # Read ANIspp
14
+ ani_spp = []
15
+ File.open(pf, 'r') do |fh|
16
+ fh.each_line do |ln|
17
+ next if $.==1
18
+ ani_spp << ln.chomp.split(',')
19
+ end
20
+ end
21
+
22
+ # Find the best candidate
23
+ ani_spp.each_with_index do |datasets, i|
24
+ best = nil
25
+ datasets.each do |ds_name|
26
+ d = p.dataset(ds_name) or next
27
+ dr = d.result(:essential_genes) or next
28
+ q = dr[:stats][:quality] or next
29
+ if best.nil? or q > best[:q]
30
+ best = {d: d, q: q}
31
+ end
32
+ end
33
+ raise "Unavailable statistics for any of:\n#{datasets}\n" if best.nil?
34
+ puts "ANIsp_#{i+1}\t#{best[:d].name}"
35
+ end
36
+
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4.2
4
+ version: 0.3.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-06 00:00:00.000000000 Z
11
+ date: 2018-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: rest-client
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.7'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '1.7'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: daemons
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -473,6 +459,7 @@ files:
473
459
  - utils/mytaxa_scan.rb
474
460
  - utils/plot-taxdist.R
475
461
  - utils/ref-tree.R
462
+ - utils/representatives.rb
476
463
  - utils/requirements.txt
477
464
  - utils/subclade/base.rb
478
465
  - utils/subclade/pipeline.rb