miga-base 0.3.9.1 → 0.3.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 89d27413bd8dc321a75fc3dc434a191c1eba336bfab3beaaa1e815852048e30a
4
- data.tar.gz: 59eeab21ff1ef81bf76147c1b98c3a49e31535831fe19e8ec2757b59a666dd4e
3
+ metadata.gz: 27d4c5ad304eb34c537ac2e625501b21682421567c555eb7152453d00b65a49e
4
+ data.tar.gz: bacf38077a05bec6c25dcbdd630b7a4f18f140ed4588877ac78254f3d9fc381d
5
5
  SHA512:
6
- metadata.gz: 2e4d975743a230c951578605e93a1b4f3061e8e3d1c91cf147db5c297bfa2427de344a72f118c50d0a3b3e9b2e67137baed9cc14157e86aa3d98738bb09d334f
7
- data.tar.gz: d9761542220413d20b162bceeada84cf414d9d17b10764d049b3a76c2f9a3d757389b24c0ac290e02801246c114496e4729c45da5e5b863c22fde009c2eeca66
6
+ metadata.gz: 9b4d8fe08eaa514a76d3c6b62d0788aa44eb2f95e4cb56eb256671033a6f2ee29482c8ffc5186c614d4f587a4af1a6d85dd1b87a980fd2edc11ebb94330b4e07
7
+ data.tar.gz: a02db245387ff5185541ad98fb7556d018320eb69763cafbb37a96e1fb2fe8918b9701f13de6513a97ecd5902104414b47d380a62abf7528bb0dab5d35dc61cd
data/actions/daemon.rb CHANGED
@@ -3,53 +3,53 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- require "miga/daemon"
6
+ require 'miga/daemon'
7
7
 
8
- task = ARGV.shift unless ["-h","--help"].include? ARGV.first
9
- ARGV << "-h" if ARGV.empty?
10
- o = {q:true, daemon_opts:[]}
8
+ task = ARGV.shift unless %w[-h --help].include? ARGV.first
9
+ ARGV << '-h' if ARGV.empty?
10
+ o = {q: true, daemon_opts: []}
11
11
  OptionParser.new do |opt|
12
12
  opt_banner(opt)
13
- opt.separator "task:"
14
- { start: "Start an instance of the application.",
15
- stop: "Start an instance of the application.",
16
- restart: "Stop all instances and restart them afterwards.",
17
- reload: "Send a SIGHUP to all instances of the application.",
18
- run: "Start the application and stay on top.",
19
- zap: "Set the application to a stopped state.",
20
- status: "Show status (PID) of application instances."
21
- }.each{ |k,v| opt.separator sprintf " %*s%s", -33, k, v }
22
- opt.separator ""
23
- opt.separator "MiGA options:"
13
+ opt.separator 'task:'
14
+ { start: 'Start an instance of the application.',
15
+ stop: 'Start an instance of the application.',
16
+ restart: 'Stop all instances and restart them afterwards.',
17
+ reload: 'Send a SIGHUP to all instances of the application.',
18
+ run: 'Start the application and stay on top.',
19
+ zap: 'Set the application to a stopped state.',
20
+ status: 'Show status (PID) of application instances.'
21
+ }.each{ |k,v| opt.separator sprintf ' %*s%s', -33, k, v }
22
+ opt.separator ''
23
+ opt.separator 'MiGA options:'
24
24
  opt_object(opt, o, [:project])
25
- opt.on("--shutdown-when-done",
26
- "If passed, the daemon will exit when all processing is done.",
27
- "Otherwise (default), it will stay idle awaiting for new data."
25
+ opt.on('--shutdown-when-done',
26
+ 'If passed, the daemon will exit when all processing is done.',
27
+ 'Otherwise (default), it will stay idle awaiting for new data.'
28
28
  ){ |v| o[:shutdown_when_done] = v }
29
- opt.on("--latency INT",
30
- "Number of seconds the daemon will be sleeping."
29
+ opt.on('--latency INT',
30
+ 'Number of seconds the daemon will be sleeping.'
31
31
  ){ |v| o[:latency]=v.to_i }
32
- opt.on("--max-jobs INT",
33
- "Maximum number of jobs to use simultaneously."){ |v| o[:maxjobs]=v.to_i }
34
- opt.on("--ppn INT",
35
- "Maximum number of cores to use in a single job."){ |v| o[:ppn]=v.to_i }
32
+ opt.on('--max-jobs INT',
33
+ 'Maximum number of jobs to use simultaneously.'){ |v| o[:maxjobs]=v.to_i }
34
+ opt.on('--ppn INT',
35
+ 'Maximum number of cores to use in a single job.'){ |v| o[:ppn]=v.to_i }
36
36
  opt_common(opt, o)
37
- opt.separator "Daemon options:"
38
- opt.on("-t", "--ontop",
39
- "Stay on top (does not daemonize)."){ o[:daemon_opts] << '-t' }
40
- opt.on("-f", "--force", "Force operation."){ o[:daemon_opts] << '-f' }
41
- opt.on("-n", "--no_wait",
42
- "Do not wait for processes to stop."){ o[:daemon_opts] << '-n' }
43
- opt.on("--shush", "Silence the daemon."){ o[:daemon_opts] << '--shush' }
37
+ opt.separator 'Daemon options:'
38
+ opt.on('-t', '--ontop',
39
+ 'Stay on top (does not daemonize).'){ o[:daemon_opts] << '-t' }
40
+ opt.on('-f', '--force', 'Force operation.'){ o[:daemon_opts] << '-f' }
41
+ opt.on('-n', '--no_wait',
42
+ 'Do not wait for processes to stop.'){ o[:daemon_opts] << '-n' }
43
+ opt.on('--shush', 'Silence the daemon.'){ o[:daemon_opts] << '--shush' }
44
44
  end.parse!
45
45
 
46
46
  ##=> Main <=
47
- opt_require(o, project:"-P")
47
+ opt_require(o, project: '-P')
48
48
 
49
- raise "Project doesn't exist, aborting." unless MiGA::Project.exist? o[:project]
50
- p = MiGA::Project.new(o[:project])
49
+ p = MiGA::Project.load(o[:project]) or raise 'Project doesn\'t exist, aborting.'
51
50
  d = MiGA::Daemon.new(p)
52
51
  [:latency, :maxjobs, :ppn, :shutdown_when_done].each do |k|
53
52
  d.runopts(k, o[k]) unless o[k].nil?
54
53
  end
55
54
  d.daemon(task, o[:daemon_opts])
55
+
data/actions/get.rb CHANGED
@@ -5,7 +5,8 @@
5
5
 
6
6
  require 'miga/remote_dataset'
7
7
 
8
- o = {q: true, query: false, universe: :ebi, db: :embl, get_md: false}
8
+ o = {q: true, query: false, universe: :ebi, db: :embl,
9
+ get_md: false, only_md: false}
9
10
  OptionParser.new do |opt|
10
11
  opt_banner(opt)
11
12
  opt_object(opt, o, [:project, :dataset, :dataset_type])
@@ -39,6 +40,9 @@ OptionParser.new do |opt|
39
40
  opt.on('--get-metadata',
40
41
  'Only download and update metadata for existing datasets'
41
42
  ){ |v| o[:get_md] = v }
43
+ opt.on('--only-metadata',
44
+ 'Create datasets without input data but retrieve all metadata.'
45
+ ){ |v| o[:only_md] = v }
42
46
  opt.on('--api-key STRING',
43
47
  'API key for the given universe.'){ |v| o[:api_key] = v }
44
48
  opt_common(opt, o)
@@ -91,6 +95,7 @@ glob.each do |o_i|
91
95
  $stderr.puts 'Creating dataset.' unless o_i[:q]
92
96
  dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
93
97
  md = add_metadata(o_i, dummy_d).metadata.data
98
+ md[:metadata_only] = true if o[:only_md]
94
99
  dummy_d.remove!
95
100
  rd.save_to(p, o_i[:dataset], !o_i[:query], md)
96
101
  p.add_dataset(o_i[:dataset])
data/actions/ncbi_get.rb CHANGED
@@ -6,11 +6,11 @@
6
6
  require 'miga/remote_dataset'
7
7
  require 'csv'
8
8
 
9
- o = {q:true, query:false, unlink:false,
9
+ o = {q: true, query: false, unlink: false,
10
10
  reference: false, legacy_name: false,
11
11
  complete: false, chromosome: false,
12
12
  scaffold: false, contig: false, add_version: true, dry: false,
13
- get_md: false}
13
+ get_md: false, only_md: false, save_every: 1}
14
14
  OptionParser.new do |opt|
15
15
  opt_banner(opt)
16
16
  opt_object(opt, o, [:project])
@@ -43,6 +43,13 @@ OptionParser.new do |opt|
43
43
  opt.on('--get-metadata',
44
44
  'Only download and update metadata for existing datasets'
45
45
  ){ |v| o[:get_md] = v }
46
+ opt.on('--only-metadata',
47
+ 'Create datasets without input data but retrieve all metadata.'
48
+ ){ |v| o[:only_md] = v }
49
+ opt.on('--save-every INT',
50
+ 'Save project every this many downloaded datasets.',
51
+ 'If zero, it saves the project only once upon completion.',
52
+ 'By default: 1.'){ |v| o[:save_every] = v.to_i }
46
53
  opt.on('-q', '--query',
47
54
  'Register the datasets as queries, not reference datasets.'
48
55
  ){ |v| o[:query]=v }
@@ -61,6 +68,7 @@ opt_require(o, taxon: '-T') unless o[:reference]
61
68
  unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
62
69
  raise 'No action requested. Pick at least one type of genome.'
63
70
  end
71
+ o[:save_every] = 1 if o[:dry]
64
72
 
65
73
  ##=> Main <=
66
74
  $stderr.puts "Loading project." unless o[:q]
@@ -100,6 +108,7 @@ doc = MiGA::RemoteDataset.download_url(url)
100
108
  CSV.parse(doc, headers: true).each do |r|
101
109
  asm = r['assembly']
102
110
  next if asm.nil? or asm.empty? or asm == '-'
111
+ next unless r['ftp_path_genbank']
103
112
 
104
113
  # Get replicons
105
114
  rep = r['replicons'].nil? ? nil : r['replicons'].
@@ -141,14 +150,16 @@ end
141
150
 
142
151
  # Download entries
143
152
  $stderr.puts "Downloading #{ds.size} " +
144
- (ds.size == 1 ? "entry" : "entries") unless o[:q]
145
- ds.each do |name,body|
153
+ (ds.size == 1 ? 'entry' : 'entries') unless o[:q]
154
+ p.do_not_save = true if o[:save_every] <= 1
155
+ ds.each do |name, body|
146
156
  d << name
147
157
  puts name
148
158
  next if p.dataset(name).nil? == o[:get_md]
149
159
  downloaded += 1
150
160
  next if o[:dry]
151
161
  $stderr.puts ' Locating remote dataset.' unless o[:q]
162
+ body[:md][:metadata_only] = true if o[:only_md]
152
163
  rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
153
164
  if o[:get_md]
154
165
  $stderr.puts ' Updating dataset.' unless o[:q]
@@ -158,8 +169,12 @@ ds.each do |name,body|
158
169
  rd.save_to(p, name, !o[:query], body[:md])
159
170
  p.add_dataset(name)
160
171
  end
172
+ p.save! if o[:save_every] > 1 and (downloaded % o[:save_every]) == 0
161
173
  end
162
174
 
175
+ p.do_not_save = false
176
+ p.save! if o[:save_every] != 1
177
+
163
178
  # Finalize
164
179
  $stderr.puts "Datasets listed: #{d.size}" unless o[:q]
165
180
  $stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
data/actions/rm.rb CHANGED
@@ -3,26 +3,27 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- o = {q:true, remove:false}
6
+ o = {q: true, remove: false}
7
7
  OptionParser.new do |opt|
8
8
  opt_banner(opt)
9
9
  opt_object(opt, o)
10
- opt.on("-r", "--remove", "Also remove all associated files.",
11
- "By default, only unlinks from metadata."){ o[:remove]=true }
10
+ opt.on('-r', '--remove', 'Also remove all associated files.',
11
+ 'By default, only unlinks from metadata.'){ o[:remove] = true }
12
12
  opt_common(opt, o)
13
13
  end.parse!
14
14
 
15
15
  ##=> Main <=
16
16
  opt_require(o)
17
17
 
18
- $stderr.puts "Loading project." unless o[:q]
18
+ $stderr.puts 'Loading project.' unless o[:q]
19
19
  p = MiGA::Project.load(o[:project])
20
20
  raise "Impossible to load project: #{o[:project]}" if p.nil?
21
21
 
22
- $stderr.puts "Unlinking dataset." unless o[:q]
23
- raise "Dataset doesn't exist, aborting." unless
22
+ $stderr.puts 'Unlinking dataset.' unless o[:q]
23
+ raise 'Dataset doesn\'t exist, aborting.' unless
24
24
  MiGA::Dataset.exist?(p, o[:dataset])
25
25
  d = p.unlink_dataset(o[:dataset])
26
26
  d.remove! if o[:remove]
27
27
 
28
- $stderr.puts "Done." unless o[:q]
28
+ $stderr.puts 'Done.' unless o[:q]
29
+
data/lib/miga/common.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'json'
5
4
  require 'miga/version'
5
+ require 'miga/json'
6
6
  require 'miga/common/base'
7
7
  require 'miga/common/path'
8
8
  require 'miga/common/format'
@@ -11,7 +11,7 @@ module MiGA::Daemon::Base
11
11
  # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
12
12
  # (or nil to use as getter). Skips consistency tests if +force+. Returns new
13
13
  # value.
14
- def runopts(k, v=nil, force=false)
14
+ def runopts(k, v = nil, force = false)
15
15
  k = k.to_sym
16
16
  unless v.nil?
17
17
  if [:latency, :maxjobs, :ppn].include?(k)
@@ -19,16 +19,9 @@ module MiGA::Daemon::Base
19
19
  elsif [:shutdown_when_done].include?(k)
20
20
  v = !!v
21
21
  end
22
- raise "Daemon's #{k} cannot be set to zero." if !force and v==0
22
+ raise "Daemon's #{k} cannot be set to zero." if !force and v == 0
23
23
  @runopts[k] = v
24
24
  end
25
- if k==:kill and v.nil?
26
- case @runopts[:type].to_s
27
- when 'bash' then return "kill -9 '%s'"
28
- when 'qsub' then return "qdel '%s'"
29
- else return "canceljob '%s'"
30
- end
31
- end
32
25
  @runopts[k]
33
26
  end
34
27
 
@@ -51,19 +44,19 @@ module MiGA::Daemon::Base
51
44
 
52
45
  ##
53
46
  # Initializes the daemon with +opts+.
54
- def start(opts=[]) daemon('start', opts); end
47
+ def start(opts = []) daemon('start', opts); end
55
48
 
56
49
  ##
57
50
  # Stops the daemon with +opts+.
58
- def stop(opts=[]) daemon('stop', opts); end
51
+ def stop(opts = []) daemon('stop', opts); end
59
52
 
60
53
  ##
61
54
  # Restarts the daemon with +opts+.
62
- def restart(opts=[]) daemon('restart', opts); end
55
+ def restart(opts = []) daemon('restart', opts); end
63
56
 
64
57
  ##
65
58
  # Returns the status of the daemon with +opts+.
66
- def status(opts=[]) daemon('status', opts); end
59
+ def status(opts = []) daemon('status', opts); end
67
60
 
68
61
  end
69
62
 
data/lib/miga/daemon.rb CHANGED
@@ -39,9 +39,9 @@ class MiGA::Daemon < MiGA::MiGA
39
39
  def initialize(project)
40
40
  $_MIGA_DAEMON_LAIR << self
41
41
  @project = project
42
- @runopts = JSON.parse(
43
- File.read(File.expand_path('daemon/daemon.json', project.path)),
44
- symbolize_names: true)
42
+ @runopts = MiGA::Json.parse(
43
+ File.expand_path('daemon/daemon.json', project.path),
44
+ default: File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
45
45
  @jobs_to_run = []
46
46
  @jobs_running = []
47
47
  @loop_i = -1
@@ -84,10 +84,9 @@ class MiGA::Daemon < MiGA::MiGA
84
84
  ##
85
85
  # Report status in a JSON file.
86
86
  def report_status
87
- f = File.open(File.expand_path('daemon/status.json', project.path), 'w')
88
- f.print JSON.pretty_generate(
89
- jobs_running: @jobs_running, jobs_to_run: @jobs_to_run)
90
- f.close
87
+ MiGA::Json.generate(
88
+ {jobs_running: @jobs_running, jobs_to_run: @jobs_to_run},
89
+ File.expand_path('daemon/status.json', project.path))
91
90
  end
92
91
 
93
92
  ##
@@ -96,7 +95,7 @@ class MiGA::Daemon < MiGA::MiGA
96
95
  f_path = File.expand_path('daemon/status.json', project.path)
97
96
  return unless File.size? f_path
98
97
  say 'Loading previous status in daemon/status.json:'
99
- status = JSON.parse(File.read(f_path), symbolize_names: true)
98
+ status = MiGA::Json.parse(f_path)
100
99
  status.keys.each do |i|
101
100
  status[i].map! do |j|
102
101
  j.tap do |k|
@@ -255,11 +254,6 @@ class MiGA::Daemon < MiGA::MiGA
255
254
  def terminate
256
255
  say 'Terminating daemon...'
257
256
  report_status
258
- k = runopts(:kill)
259
- @jobs_running.each do |i|
260
- `#{k % i[:pid]}`
261
- puts "Terminating pid:#{i[:pid]} for #{i[:task_name]}"
262
- end
263
257
  f = File.expand_path('daemon/alive', project.path)
264
258
  File.unlink(f) if File.exist? f
265
259
  end
data/lib/miga/json.rb ADDED
@@ -0,0 +1,62 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'json'
5
+
6
+ ##
7
+ # Taxonomic classifications in MiGA.
8
+ class MiGA::Json < MiGA::MiGA
9
+
10
+ class << self
11
+
12
+ ##
13
+ # Default parsing options. Supported +opts+ keys:
14
+ # - +:contents+: If true, the input is assumed to be the contents to parse,
15
+ # not the path to a JSON file.
16
+ # - +:default+: A base to attach the parsed hash. A Hash or a String (path).
17
+ # - +:additions+: If addition classes should be parsed. By default is false.
18
+ # - +:symbolize+: If names should be symbolized. By default it's true if
19
+ # additions is false, or false otherwise. They can both be false, but an
20
+ # exception will be raised if both are true
21
+ def default_opts(opts = {})
22
+ opts[:contents] ||= false
23
+ opts[:additions] ||= false
24
+ opts[:symbolize] = !opts[:additions] if opts[:symbolize].nil?
25
+ if opts[:additions] and opts[:symbolize]
26
+ raise 'JSON additions are not supported with symbolized names'
27
+ end
28
+ opts
29
+ end
30
+
31
+ ##
32
+ # Parse a JSON file in +path+ and return a hash. Optionally,
33
+ # use +default+ as the base to attach the parsed hash. +default+
34
+ # can be a Hash or a String (path). See +default_opts+ for supported
35
+ # +opts+.
36
+ def parse(path, opts = {})
37
+ opts = default_opts(opts)
38
+ cont = opts[:contents] ? path : File.read(path)
39
+ raise "Empty descriptor: #{opts[:contents] ? "''" : path}." if cont.empty?
40
+ y = JSON.parse(cont,
41
+ symbolize_names: opts[:symbolize],
42
+ create_additions: opts[:additions])
43
+ unless opts[:default].nil?
44
+ opts[:default] = parse(opts[:default]) if opts[:default].is_a? String
45
+ y.each{ |k, v| opts[:default][k] = v }
46
+ y = opts[:default]
47
+ end
48
+ y
49
+ end
50
+
51
+ ##
52
+ # Generates and returns prettyfied JSON to represent +obj+.
53
+ # If +path+ is passed, it saves the JSON in that file.
54
+ def generate(obj, path = nil)
55
+ y = JSON.pretty_generate(obj)
56
+ File.open(path, 'w') { |fh| fh.print y } unless path.nil?
57
+ y
58
+ end
59
+
60
+ end
61
+
62
+ end
data/lib/miga/metadata.rb CHANGED
@@ -57,14 +57,17 @@ class MiGA::Metadata < MiGA::MiGA
57
57
  def save
58
58
  MiGA.DEBUG "Metadata.save #{path}"
59
59
  self[:updated] = Time.now.to_s
60
- json = JSON.pretty_generate(data)
60
+ json = MiGA::Json.generate(data)
61
61
  sleeper = 0.0
62
+ slept = 0
62
63
  while File.exist?(lock_file)
63
64
  sleeper += 0.1 if sleeper <= 10.0
64
65
  sleep(sleeper.to_i)
66
+ slept += sleeper.to_i
67
+ raise "Lock detected for over 10 minutes: #{lock_file}" if slept > 600
65
68
  end
66
69
  FileUtils.touch lock_file
67
- ofh = File.open("#{path}.tmp", "w")
70
+ ofh = File.open("#{path}.tmp", 'w')
68
71
  ofh.puts json
69
72
  ofh.close
70
73
  raise "Lock-racing detected for #{path}." unless
@@ -81,9 +84,7 @@ class MiGA::Metadata < MiGA::MiGA
81
84
  sleeper += 0.1 if sleeper <= 10.0
82
85
  sleep(sleeper.to_i)
83
86
  end
84
- # :symbolize_names does not play nicely with :create_additions
85
- tmp = JSON.parse(File.read(path),
86
- {:symbolize_names=>false, :create_additions=>true})
87
+ tmp = MiGA::Json.parse(path, additions: true)
87
88
  @data = {}
88
89
  tmp.each_pair{ |k,v| self[k] = v }
89
90
  end
data/lib/miga/project.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/dataset"
5
- require "miga/project/result"
6
- require "miga/project/dataset"
7
- require "miga/project/plugins"
4
+ require 'miga/dataset'
5
+ require 'miga/project/result'
6
+ require 'miga/project/dataset'
7
+ require 'miga/project/plugins'
8
8
 
9
9
  ##
10
10
  # MiGA representation of a project.
@@ -17,16 +17,21 @@ class MiGA::Project < MiGA::MiGA
17
17
  ##
18
18
  # Absolute path to the project folder.
19
19
  attr_reader :path
20
-
20
+
21
21
  ##
22
22
  # Information about the project as MiGA::Metadata.
23
23
  attr_reader :metadata
24
24
 
25
+ ##
26
+ # If true, it doesn't save changes
27
+ attr_accessor :do_not_save
28
+
25
29
  ##
26
30
  # Create a new MiGA::Project at +path+, if it doesn't exist and +update+ is
27
31
  # false, or load an existing one.
28
32
  def initialize(path, update=false)
29
33
  @datasets = {}
34
+ @do_not_save = false
30
35
  @path = File.absolute_path(path)
31
36
  self.create if not update and not Project.exist? self.path
32
37
  self.load if self.metadata.nil?
@@ -39,7 +44,7 @@ class MiGA::Project < MiGA::MiGA
39
44
  # Create an empty project.
40
45
  def create
41
46
  unless MiGA::MiGA.initialized?
42
- raise "Impossible to create project in uninitialized MiGA."
47
+ raise 'Impossible to create project in uninitialized MiGA.'
43
48
  end
44
49
  dirs = [path] + @@FOLDERS.map{|d| "#{path}/#{d}" } +
45
50
  @@DATA_FOLDERS.map{ |d| "#{path}/data/#{d}"}
@@ -51,14 +56,20 @@ class MiGA::Project < MiGA::MiGA
51
56
  File.exist? "#{path}/daemon/daemon.json"
52
57
  self.load
53
58
  end
54
-
59
+
55
60
  ##
56
- # Save any changes persistently.
61
+ # Save any changes persistently. Do nothing if +do_not_save+ is true.
57
62
  def save
63
+ save! unless do_not_save
64
+ end
65
+
66
+ ##
67
+ # Save any changes persistently, regardless of +do_not_save+.
68
+ def save!
58
69
  metadata.save
59
70
  self.load
60
71
  end
61
-
72
+
62
73
  ##
63
74
  # (Re-)load project data and metadata.
64
75
  def load
@@ -67,7 +78,7 @@ class MiGA::Project < MiGA::MiGA
67
78
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
68
79
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
69
80
  end
70
-
81
+
71
82
  ##
72
83
  # Name of the project.
73
84
  def name ; metadata[:name] ; end
@@ -83,5 +94,5 @@ class MiGA::Project < MiGA::MiGA
83
94
  ##
84
95
  # Is this a project for multi-organism datasets?
85
96
  def is_multi? ; @@KNOWN_TYPES[type][:multi] ; end
86
-
97
+
87
98
  end
@@ -91,7 +91,7 @@ class MiGA::RemoteDataset
91
91
  def ncbi_map(id, dbfrom, db)
92
92
  doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
93
93
  return if doc.empty?
94
- tree = JSON.parse(doc, symbolize_names: true)
94
+ tree = MiGA::Json.parse(doc, contents: true)
95
95
  [:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
96
96
  tree = tree[i]
97
97
  break if tree.nil?
@@ -13,7 +13,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
13
13
  class << self
14
14
  def ncbi_asm_acc2id(acc)
15
15
  return acc if acc =~ /^\d+$/
16
- search_doc = JSON.parse download(:ncbi_search, :assembly, acc, :json)
16
+ search_doc = MiGA::Json.parse(
17
+ download(:ncbi_search, :assembly, acc, :json),
18
+ symbolize: false, contents: true)
17
19
  search_doc['esearchresult']['idlist'].first
18
20
  end
19
21
  end
@@ -55,6 +57,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
55
57
  ##
56
58
  # Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
57
59
  # indicates if it should be a reference dataset, and contains +metadata_def+.
60
+ # If +metadata_def+ includes +metadata_only: true+, no input data is
61
+ # downloaded.
58
62
  def save_to(project, name = nil, is_ref = true, metadata_def = {})
59
63
  name ||= ids.join('_').miga_name
60
64
  project = MiGA::Project.new(project) if project.is_a? String
@@ -63,16 +67,20 @@ class MiGA::RemoteDataset < MiGA::MiGA
63
67
  @metadata = get_metadata(metadata_def)
64
68
  udb = @@UNIVERSE[universe][:dbs][db]
65
69
  @metadata["#{universe}_#{db}"] = ids.join(',')
66
- respond_to?("save_#{udb[:stage]}_to", true) or
67
- raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
68
- send "save_#{udb[:stage]}_to", project, name, udb
70
+ unless @metadata[:metadata_only]
71
+ respond_to?("save_#{udb[:stage]}_to", true) or
72
+ raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
73
+ send "save_#{udb[:stage]}_to", project, name, udb
74
+ end
69
75
  dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
70
76
  project.add_dataset(dataset.name)
71
- result = dataset.add_result(udb[:stage], true, is_clean: true)
72
- result.nil? and
73
- raise 'Empty dataset: seed result not added due to incomplete files.'
74
- result.clean!
75
- result.save
77
+ unless @metadata[:metadata_only]
78
+ result = dataset.add_result(udb[:stage], true, is_clean: true)
79
+ result.nil? and
80
+ raise 'Empty dataset: seed result not added due to incomplete files.'
81
+ result.clean!
82
+ result.save
83
+ end
76
84
  dataset
77
85
  end
78
86
 
@@ -141,8 +149,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
141
149
  metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
142
150
  return nil unless metadata[:ncbi_asm]
143
151
  ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
144
- doc = JSON.parse(
145
- self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json))
152
+ doc = MiGA::Json.parse(
153
+ self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
154
+ symbolize: false, contents: true)
146
155
  @_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
147
156
  end
148
157
 
data/lib/miga/result.rb CHANGED
@@ -122,19 +122,14 @@ class MiGA::Result < MiGA::MiGA
122
122
  @data[:started] = File.read(s).chomp
123
123
  File.unlink s
124
124
  end
125
- json = JSON.pretty_generate data
126
- ofh = File.open(path, "w")
127
- ofh.puts json
128
- ofh.close
125
+ MiGA::Json.generate(data, path)
129
126
  self.load
130
127
  end
131
128
 
132
129
  ##
133
130
  # Load (or reload) result data in the JSON file #path.
134
131
  def load
135
- json = File.read(path)
136
- raise "Impossible to load result, empty descriptor: #{path}." if json.empty?
137
- @data = JSON.parse(json, {:symbolize_names=>true})
132
+ @data = MiGA::Json.parse(path)
138
133
  @data[:files] ||= {}
139
134
  @results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
140
135
  end
@@ -57,8 +57,8 @@ class MiGA::TaxIndex < MiGA::MiGA
57
57
  ##
58
58
  # Generate JSON String for the index.
59
59
  def to_json
60
- JSON.generate({ root:root.to_hash,
61
- datasets:datasets.map{ |d| d.name } })
60
+ MiGA::Json.generate(
61
+ { root: root.to_hash, datasets: datasets.map{ |d| d.name } })
62
62
  end
63
63
 
64
64
  ##
data/lib/miga/taxonomy.rb CHANGED
@@ -15,37 +15,37 @@ class MiGA::Taxonomy < MiGA::MiGA
15
15
  ##
16
16
  # Long names of the cannonical ranks.
17
17
  def self.LONG_RANKS() @@LONG_RANKS ; end
18
- @@LONG_RANKS = {root: "root", ns: "namespace", d: "domain", k: "kingdom",
19
- p: "phylum", c: "class", o: "order", f: "family", g: "genus", s: "species",
20
- ssp: "subspecies", str: "strain", ds: "dataset"}
18
+ @@LONG_RANKS = {root: 'root', ns: 'namespace', d: 'domain', k: 'kingdom',
19
+ p: 'phylum', c: 'class', o: 'order', f: 'family', g: 'genus', s: 'species',
20
+ ssp: 'subspecies', str: 'strain', ds: 'dataset'}
21
21
 
22
22
  ##
23
23
  # Synonms for cannonical ranks.
24
24
  @@RANK_SYNONYMS = {
25
- "namespace"=>"ns",
26
- "domain"=>"d","superkingdom"=>"d",
27
- "kingdom"=>"k",
28
- "phylum"=>"p",
29
- "class"=>"c",
30
- "order"=>"o",
31
- "family"=>"f",
32
- "genus"=>"g",
33
- "species"=>"s","sp"=>"s",
34
- "subspecies"=>"ssp",
35
- "strain"=>"str","isolate"=>"str","culture"=>"str",
36
- "dataset"=>"ds","organism"=>"ds","genome"=>"ds","specimen"=>"ds"
25
+ 'namespace' => 'ns',
26
+ 'domain' => 'd', 'superkingdom' => 'd',
27
+ 'kingdom' => 'k',
28
+ 'phylum' => 'p',
29
+ 'class' => 'c',
30
+ 'order' => 'o',
31
+ 'family' => 'f',
32
+ 'genus' => 'g',
33
+ 'species' => 's', 'sp' => 's',
34
+ 'subspecies' => 'ssp',
35
+ 'strain' => 'str', 'isolate' => 'str', 'culture' => 'str',
36
+ 'dataset' => 'ds', 'organism' => 'ds', 'genome' => 'ds', 'specimen' => 'ds'
37
37
  }
38
38
 
39
39
  ##
40
40
  # Initialize from JSON-derived Hash +o+.
41
- def self.json_create(o) new(o["str"]) ; end
41
+ def self.json_create(o) new(o['str']) ; end
42
42
 
43
43
  ##
44
44
  # Returns cannonical rank (Symbol) for the +rank+ String.
45
45
  def self.normalize_rank(rank)
46
46
  return rank.to_sym if @@_KNOWN_RANKS_H[rank.to_sym]
47
47
  rank = rank.to_s.downcase
48
- return nil if rank=="no rank"
48
+ return nil if rank == 'no rank'
49
49
  rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
50
50
  rank = rank.to_sym
51
51
  return nil unless @@_KNOWN_RANKS_H[rank]
@@ -64,7 +64,7 @@ class MiGA::Taxonomy < MiGA::MiGA
64
64
  # either a rank:value pair (if +ranks+ is nil), or just values in the same
65
65
  # order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
66
66
  # value pairs is also supported.
67
- def initialize(str, ranks=nil)
67
+ def initialize(str, ranks = nil)
68
68
  @ranks = {}
69
69
  if ranks.nil?
70
70
  case str when Array, Hash
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 9, 1]
13
+ VERSION = [0.3, 10, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2019, 03, 8)
21
+ VERSION_DATE = Date.new(2019, 04, 9)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/test/daemon_test.rb CHANGED
@@ -114,11 +114,6 @@ class DaemonTest < Test::Unit::TestCase
114
114
  $d1.runopts(:latency, "!")
115
115
  end
116
116
  assert_equal("bash", $d1.runopts(:type))
117
- assert_equal("kill -9 '%s'", $d1.runopts(:kill))
118
- $d1.runopts(:type, "qsub")
119
- assert_equal("qdel '%s'", $d1.runopts(:kill))
120
- $d1.runopts(:type, "msub")
121
- assert_equal("canceljob '%s'", $d1.runopts(:kill))
122
117
  end
123
118
 
124
119
  def test_say
data/test/json_test.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'test_helper'
2
+
3
+ class JsonTest < Test::Unit::TestCase
4
+
5
+ def test_parse
6
+ assert_equal({a: 1, b: 2},
7
+ MiGA::Json.parse('{"a": 1, "b": 2}', contents: true))
8
+ assert_equal({'a' => 1, 'b' => 2},
9
+ MiGA::Json.parse('{"a": 1, "b": 2}', contents: true, symbolize: false))
10
+ assert_equal(1.0, MiGA::Json.parse('1.0', contents: true))
11
+ end
12
+
13
+ def test_defaults
14
+ tmp1 = Tempfile.new('test-parse-1.json')
15
+ tmp1.puts '{"a": 123, "k": false, "t": null}'
16
+ tmp1.close
17
+ assert_equal({a: 123, k: false, t: nil}, MiGA::Json.parse(tmp1.path))
18
+
19
+ tmp2 = Tempfile.new('test-parse-2.json')
20
+ tmp2.puts '{"a": 456, "kb": false, "t": 10.0}'
21
+ tmp2.close
22
+ assert_equal({a: 456, kb: false, t: 10.0}, MiGA::Json.parse(tmp2.path))
23
+
24
+ assert_equal(
25
+ {a: 123, k: false, kb: false, t: nil},
26
+ MiGA::Json.parse(tmp1.path, default: tmp2.path))
27
+ assert_equal(
28
+ {a: 456, k: false, kb: false, t: 10.0},
29
+ MiGA::Json.parse(tmp2.path, default: tmp1.path))
30
+ end
31
+
32
+ def test_generate
33
+ assert_equal("{\n \"a\": 1,\n \"b\": 2\n}",
34
+ MiGA::Json.generate({a: 1, b: 2}))
35
+ end
36
+
37
+ end
@@ -5,17 +5,22 @@ require 'miga'
5
5
 
6
6
  ARGV[1] or abort "Usage: #{$0} path/to/project threads"
7
7
 
8
- $stderr.puts "Cleaning databases..."
8
+ $stderr.puts 'Cleaning databases...'
9
9
  p = MiGA::Project.load(ARGV[0])
10
10
  ds_names = p.dataset_names
11
11
  thr = ARGV[1].to_i
12
12
 
13
+ pc = [0] + (1 .. 100).map{ |i| ds_names.size * i / 100 }
14
+ $stderr.puts (('.'*9 + '|')*10) + ' 100%'
15
+
13
16
  (0 .. thr-1).each do |t|
14
17
  fork do
15
- k = -1
16
- ds_names.each do |i|
17
- k = (k+1) % thr
18
- next unless k == t
18
+ ds_names.each_with_index do |i, idx|
19
+ while t == 0 and idx+1 > pc.first
20
+ $stderr.print '#'
21
+ pc.shift
22
+ end
23
+ next unless (idx % thr) == t
19
24
  d = p.dataset(i)
20
25
  next unless d.is_ref? and d.is_active?
21
26
  d.cleanup_distances!
@@ -23,4 +28,5 @@ thr = ARGV[1].to_i
23
28
  end
24
29
  end
25
30
  Process.waitall
31
+ $stderr.puts ' Done'
26
32
 
@@ -108,7 +108,7 @@ class MiGA::DistanceRunner
108
108
 
109
109
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
110
110
  cl_path = res.file_path :clades_ani95
111
- if File.size? cl_path and tsk[0] == :clade_finding
111
+ if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
112
112
  File.foreach(cl_path).
113
113
  map { |i| i.chomp.split(',') }.
114
114
  find( lambda{[]} ){ |i| i.include? closest[:ds] }.
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
data/utils/ref-tree.R CHANGED
@@ -7,15 +7,23 @@
7
7
  #= Load stuff
8
8
  argv <- commandArgs(trailingOnly=T)
9
9
  suppressPackageStartupMessages(library(ape))
10
- suppressPackageStartupMessages(library(phytools))
11
- suppressPackageStartupMessages(library(phangorn))
12
10
  suppressPackageStartupMessages(library(enveomics.R))
11
+ inst <- c("phangorn", "phytools") %in% rownames(installed.packages())
12
+ if(inst[1]){
13
+ suppressPackageStartupMessages(library(phangorn))
14
+ reroot.fun <- midpoint
15
+ }else if(inst[2]){
16
+ suppressPackageStartupMessages(library(phytools))
17
+ reroot.fun <- midpoint.root
18
+ }else{
19
+ reroot.fun <- function(x) return(x)
20
+ }
13
21
 
14
22
  #= Main function
15
23
  ref_tree <- function(ani_file, out_base, q_dataset) {
16
24
  a <- read.table(ani_file, sep="\t", header=TRUE, as.is=TRUE)
17
25
  ani.d <- enve.df2dist(a[,1:3], default.d=0.9, max.sim=100)
18
- ani.ph <- midpoint(bionj(ani.d))
26
+ ani.ph <- reroot.fun(bionj(ani.d))
19
27
  write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
20
28
  pdf(paste(out_base, ".nwk.pdf", sep=""), 7, 7)
21
29
  plot(ani.ph, cex=1/3, type='fan',
@@ -24,42 +32,6 @@ ref_tree <- function(ani_file, out_base, q_dataset) {
24
32
  dev.off()
25
33
  }
26
34
 
27
- # Ancilliary functions
28
- midpoint <- function(tree){
29
- dm = cophenetic(tree)
30
- tree = unroot(tree)
31
- rn = max(tree$edge)+1
32
- maxdm = max(dm)
33
- ind = which(dm==maxdm,arr=TRUE)[1,]
34
- tmproot = Ancestors(tree, ind[1], "parent")
35
- tree = phangorn:::reroot(tree, tmproot)
36
- edge = tree$edge
37
- el = tree$edge.length
38
- children = tree$edge[,2]
39
- left = match(ind[1], children)
40
- tmp = Ancestors(tree, ind[2], "all")
41
- tmp= c(ind[2], tmp[-length(tmp)])
42
- right = match(tmp, children)
43
- if(el[left]>= (maxdm/2)){
44
- edge = rbind(edge, c(rn, ind[1]))
45
- edge[left,2] = rn
46
- el[left] = el[left] - (maxdm/2)
47
- el = c(el, maxdm/2)
48
- }else{
49
- sel = cumsum(el[right])
50
- i = which(sel>(maxdm/2))[1]
51
- edge = rbind(edge, c(rn, tmp[i]))
52
- edge[right[i],2] = rn
53
- eltmp = sel[i] - (maxdm/2)
54
- el = c(el, el[right[i]] - eltmp)
55
- el[right[i]] = eltmp
56
- }
57
- tree$edge.length = el
58
- tree$edge=edge
59
- tree$Nnode = tree$Nnode+1
60
- phangorn:::reorderPruning(phangorn:::reroot(tree, rn))
61
- }
62
-
63
35
  #= Main
64
36
  ref_tree(ani_file=argv[1], out_base=argv[2], q_dataset=argv[3])
65
37
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9.1
4
+ version: 0.3.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-08 00:00:00.000000000 Z
11
+ date: 2019-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -142,6 +142,7 @@ files:
142
142
  - lib/miga/dataset.rb
143
143
  - lib/miga/dataset/base.rb
144
144
  - lib/miga/dataset/result.rb
145
+ - lib/miga/json.rb
145
146
  - lib/miga/metadata.rb
146
147
  - lib/miga/project.rb
147
148
  - lib/miga/project/base.rb
@@ -184,6 +185,7 @@ files:
184
185
  - test/common_test.rb
185
186
  - test/daemon_test.rb
186
187
  - test/dataset_test.rb
188
+ - test/json_test.rb
187
189
  - test/metadata_test.rb
188
190
  - test/project_test.rb
189
191
  - test/remote_dataset_test.rb
@@ -506,8 +508,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
506
508
  - !ruby/object:Gem::Version
507
509
  version: '0'
508
510
  requirements: []
509
- rubyforge_project:
510
- rubygems_version: 2.7.7
511
+ rubygems_version: 3.0.2
511
512
  signing_key:
512
513
  specification_version: 4
513
514
  summary: MiGA