miga-base 0.3.9.1 → 0.3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 89d27413bd8dc321a75fc3dc434a191c1eba336bfab3beaaa1e815852048e30a
4
- data.tar.gz: 59eeab21ff1ef81bf76147c1b98c3a49e31535831fe19e8ec2757b59a666dd4e
3
+ metadata.gz: 27d4c5ad304eb34c537ac2e625501b21682421567c555eb7152453d00b65a49e
4
+ data.tar.gz: bacf38077a05bec6c25dcbdd630b7a4f18f140ed4588877ac78254f3d9fc381d
5
5
  SHA512:
6
- metadata.gz: 2e4d975743a230c951578605e93a1b4f3061e8e3d1c91cf147db5c297bfa2427de344a72f118c50d0a3b3e9b2e67137baed9cc14157e86aa3d98738bb09d334f
7
- data.tar.gz: d9761542220413d20b162bceeada84cf414d9d17b10764d049b3a76c2f9a3d757389b24c0ac290e02801246c114496e4729c45da5e5b863c22fde009c2eeca66
6
+ metadata.gz: 9b4d8fe08eaa514a76d3c6b62d0788aa44eb2f95e4cb56eb256671033a6f2ee29482c8ffc5186c614d4f587a4af1a6d85dd1b87a980fd2edc11ebb94330b4e07
7
+ data.tar.gz: a02db245387ff5185541ad98fb7556d018320eb69763cafbb37a96e1fb2fe8918b9701f13de6513a97ecd5902104414b47d380a62abf7528bb0dab5d35dc61cd
data/actions/daemon.rb CHANGED
@@ -3,53 +3,53 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- require "miga/daemon"
6
+ require 'miga/daemon'
7
7
 
8
- task = ARGV.shift unless ["-h","--help"].include? ARGV.first
9
- ARGV << "-h" if ARGV.empty?
10
- o = {q:true, daemon_opts:[]}
8
+ task = ARGV.shift unless %w[-h --help].include? ARGV.first
9
+ ARGV << '-h' if ARGV.empty?
10
+ o = {q: true, daemon_opts: []}
11
11
  OptionParser.new do |opt|
12
12
  opt_banner(opt)
13
- opt.separator "task:"
14
- { start: "Start an instance of the application.",
15
- stop: "Start an instance of the application.",
16
- restart: "Stop all instances and restart them afterwards.",
17
- reload: "Send a SIGHUP to all instances of the application.",
18
- run: "Start the application and stay on top.",
19
- zap: "Set the application to a stopped state.",
20
- status: "Show status (PID) of application instances."
21
- }.each{ |k,v| opt.separator sprintf " %*s%s", -33, k, v }
22
- opt.separator ""
23
- opt.separator "MiGA options:"
13
+ opt.separator 'task:'
14
+ { start: 'Start an instance of the application.',
15
+ stop: 'Start an instance of the application.',
16
+ restart: 'Stop all instances and restart them afterwards.',
17
+ reload: 'Send a SIGHUP to all instances of the application.',
18
+ run: 'Start the application and stay on top.',
19
+ zap: 'Set the application to a stopped state.',
20
+ status: 'Show status (PID) of application instances.'
21
+ }.each{ |k,v| opt.separator sprintf ' %*s%s', -33, k, v }
22
+ opt.separator ''
23
+ opt.separator 'MiGA options:'
24
24
  opt_object(opt, o, [:project])
25
- opt.on("--shutdown-when-done",
26
- "If passed, the daemon will exit when all processing is done.",
27
- "Otherwise (default), it will stay idle awaiting for new data."
25
+ opt.on('--shutdown-when-done',
26
+ 'If passed, the daemon will exit when all processing is done.',
27
+ 'Otherwise (default), it will stay idle awaiting for new data.'
28
28
  ){ |v| o[:shutdown_when_done] = v }
29
- opt.on("--latency INT",
30
- "Number of seconds the daemon will be sleeping."
29
+ opt.on('--latency INT',
30
+ 'Number of seconds the daemon will be sleeping.'
31
31
  ){ |v| o[:latency]=v.to_i }
32
- opt.on("--max-jobs INT",
33
- "Maximum number of jobs to use simultaneously."){ |v| o[:maxjobs]=v.to_i }
34
- opt.on("--ppn INT",
35
- "Maximum number of cores to use in a single job."){ |v| o[:ppn]=v.to_i }
32
+ opt.on('--max-jobs INT',
33
+ 'Maximum number of jobs to use simultaneously.'){ |v| o[:maxjobs]=v.to_i }
34
+ opt.on('--ppn INT',
35
+ 'Maximum number of cores to use in a single job.'){ |v| o[:ppn]=v.to_i }
36
36
  opt_common(opt, o)
37
- opt.separator "Daemon options:"
38
- opt.on("-t", "--ontop",
39
- "Stay on top (does not daemonize)."){ o[:daemon_opts] << '-t' }
40
- opt.on("-f", "--force", "Force operation."){ o[:daemon_opts] << '-f' }
41
- opt.on("-n", "--no_wait",
42
- "Do not wait for processes to stop."){ o[:daemon_opts] << '-n' }
43
- opt.on("--shush", "Silence the daemon."){ o[:daemon_opts] << '--shush' }
37
+ opt.separator 'Daemon options:'
38
+ opt.on('-t', '--ontop',
39
+ 'Stay on top (does not daemonize).'){ o[:daemon_opts] << '-t' }
40
+ opt.on('-f', '--force', 'Force operation.'){ o[:daemon_opts] << '-f' }
41
+ opt.on('-n', '--no_wait',
42
+ 'Do not wait for processes to stop.'){ o[:daemon_opts] << '-n' }
43
+ opt.on('--shush', 'Silence the daemon.'){ o[:daemon_opts] << '--shush' }
44
44
  end.parse!
45
45
 
46
46
  ##=> Main <=
47
- opt_require(o, project:"-P")
47
+ opt_require(o, project: '-P')
48
48
 
49
- raise "Project doesn't exist, aborting." unless MiGA::Project.exist? o[:project]
50
- p = MiGA::Project.new(o[:project])
49
+ p = MiGA::Project.load(o[:project]) or raise 'Project doesn\'t exist, aborting.'
51
50
  d = MiGA::Daemon.new(p)
52
51
  [:latency, :maxjobs, :ppn, :shutdown_when_done].each do |k|
53
52
  d.runopts(k, o[k]) unless o[k].nil?
54
53
  end
55
54
  d.daemon(task, o[:daemon_opts])
55
+
data/actions/get.rb CHANGED
@@ -5,7 +5,8 @@
5
5
 
6
6
  require 'miga/remote_dataset'
7
7
 
8
- o = {q: true, query: false, universe: :ebi, db: :embl, get_md: false}
8
+ o = {q: true, query: false, universe: :ebi, db: :embl,
9
+ get_md: false, only_md: false}
9
10
  OptionParser.new do |opt|
10
11
  opt_banner(opt)
11
12
  opt_object(opt, o, [:project, :dataset, :dataset_type])
@@ -39,6 +40,9 @@ OptionParser.new do |opt|
39
40
  opt.on('--get-metadata',
40
41
  'Only download and update metadata for existing datasets'
41
42
  ){ |v| o[:get_md] = v }
43
+ opt.on('--only-metadata',
44
+ 'Create datasets without input data but retrieve all metadata.'
45
+ ){ |v| o[:only_md] = v }
42
46
  opt.on('--api-key STRING',
43
47
  'API key for the given universe.'){ |v| o[:api_key] = v }
44
48
  opt_common(opt, o)
@@ -91,6 +95,7 @@ glob.each do |o_i|
91
95
  $stderr.puts 'Creating dataset.' unless o_i[:q]
92
96
  dummy_d = MiGA::Dataset.new(p, o_i[:dataset])
93
97
  md = add_metadata(o_i, dummy_d).metadata.data
98
+ md[:metadata_only] = true if o[:only_md]
94
99
  dummy_d.remove!
95
100
  rd.save_to(p, o_i[:dataset], !o_i[:query], md)
96
101
  p.add_dataset(o_i[:dataset])
data/actions/ncbi_get.rb CHANGED
@@ -6,11 +6,11 @@
6
6
  require 'miga/remote_dataset'
7
7
  require 'csv'
8
8
 
9
- o = {q:true, query:false, unlink:false,
9
+ o = {q: true, query: false, unlink: false,
10
10
  reference: false, legacy_name: false,
11
11
  complete: false, chromosome: false,
12
12
  scaffold: false, contig: false, add_version: true, dry: false,
13
- get_md: false}
13
+ get_md: false, only_md: false, save_every: 1}
14
14
  OptionParser.new do |opt|
15
15
  opt_banner(opt)
16
16
  opt_object(opt, o, [:project])
@@ -43,6 +43,13 @@ OptionParser.new do |opt|
43
43
  opt.on('--get-metadata',
44
44
  'Only download and update metadata for existing datasets'
45
45
  ){ |v| o[:get_md] = v }
46
+ opt.on('--only-metadata',
47
+ 'Create datasets without input data but retrieve all metadata.'
48
+ ){ |v| o[:only_md] = v }
49
+ opt.on('--save-every INT',
50
+ 'Save project every this many downloaded datasets.',
51
+ 'If zero, it saves the project only once upon completion.',
52
+ 'By default: 1.'){ |v| o[:save_every] = v.to_i }
46
53
  opt.on('-q', '--query',
47
54
  'Register the datasets as queries, not reference datasets.'
48
55
  ){ |v| o[:query]=v }
@@ -61,6 +68,7 @@ opt_require(o, taxon: '-T') unless o[:reference]
61
68
  unless %w[reference complete chromosome scaffold contig].any?{ |i| o[i.to_sym] }
62
69
  raise 'No action requested. Pick at least one type of genome.'
63
70
  end
71
+ o[:save_every] = 1 if o[:dry]
64
72
 
65
73
  ##=> Main <=
66
74
  $stderr.puts "Loading project." unless o[:q]
@@ -100,6 +108,7 @@ doc = MiGA::RemoteDataset.download_url(url)
100
108
  CSV.parse(doc, headers: true).each do |r|
101
109
  asm = r['assembly']
102
110
  next if asm.nil? or asm.empty? or asm == '-'
111
+ next unless r['ftp_path_genbank']
103
112
 
104
113
  # Get replicons
105
114
  rep = r['replicons'].nil? ? nil : r['replicons'].
@@ -141,14 +150,16 @@ end
141
150
 
142
151
  # Download entries
143
152
  $stderr.puts "Downloading #{ds.size} " +
144
- (ds.size == 1 ? "entry" : "entries") unless o[:q]
145
- ds.each do |name,body|
153
+ (ds.size == 1 ? 'entry' : 'entries') unless o[:q]
154
+ p.do_not_save = true if o[:save_every] <= 1
155
+ ds.each do |name, body|
146
156
  d << name
147
157
  puts name
148
158
  next if p.dataset(name).nil? == o[:get_md]
149
159
  downloaded += 1
150
160
  next if o[:dry]
151
161
  $stderr.puts ' Locating remote dataset.' unless o[:q]
162
+ body[:md][:metadata_only] = true if o[:only_md]
152
163
  rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
153
164
  if o[:get_md]
154
165
  $stderr.puts ' Updating dataset.' unless o[:q]
@@ -158,8 +169,12 @@ ds.each do |name,body|
158
169
  rd.save_to(p, name, !o[:query], body[:md])
159
170
  p.add_dataset(name)
160
171
  end
172
+ p.save! if o[:save_every] > 1 and (downloaded % o[:save_every]) == 0
161
173
  end
162
174
 
175
+ p.do_not_save = false
176
+ p.save! if o[:save_every] != 1
177
+
163
178
  # Finalize
164
179
  $stderr.puts "Datasets listed: #{d.size}" unless o[:q]
165
180
  $stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
data/actions/rm.rb CHANGED
@@ -3,26 +3,27 @@
3
3
  # @package MiGA
4
4
  # @license Artistic-2.0
5
5
 
6
- o = {q:true, remove:false}
6
+ o = {q: true, remove: false}
7
7
  OptionParser.new do |opt|
8
8
  opt_banner(opt)
9
9
  opt_object(opt, o)
10
- opt.on("-r", "--remove", "Also remove all associated files.",
11
- "By default, only unlinks from metadata."){ o[:remove]=true }
10
+ opt.on('-r', '--remove', 'Also remove all associated files.',
11
+ 'By default, only unlinks from metadata.'){ o[:remove] = true }
12
12
  opt_common(opt, o)
13
13
  end.parse!
14
14
 
15
15
  ##=> Main <=
16
16
  opt_require(o)
17
17
 
18
- $stderr.puts "Loading project." unless o[:q]
18
+ $stderr.puts 'Loading project.' unless o[:q]
19
19
  p = MiGA::Project.load(o[:project])
20
20
  raise "Impossible to load project: #{o[:project]}" if p.nil?
21
21
 
22
- $stderr.puts "Unlinking dataset." unless o[:q]
23
- raise "Dataset doesn't exist, aborting." unless
22
+ $stderr.puts 'Unlinking dataset.' unless o[:q]
23
+ raise 'Dataset doesn\'t exist, aborting.' unless
24
24
  MiGA::Dataset.exist?(p, o[:dataset])
25
25
  d = p.unlink_dataset(o[:dataset])
26
26
  d.remove! if o[:remove]
27
27
 
28
- $stderr.puts "Done." unless o[:q]
28
+ $stderr.puts 'Done.' unless o[:q]
29
+
data/lib/miga/common.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'json'
5
4
  require 'miga/version'
5
+ require 'miga/json'
6
6
  require 'miga/common/base'
7
7
  require 'miga/common/path'
8
8
  require 'miga/common/format'
@@ -11,7 +11,7 @@ module MiGA::Daemon::Base
11
11
  # Set/get #options, where +k+ is the Symbol of the option and +v+ is the value
12
12
  # (or nil to use as getter). Skips consistency tests if +force+. Returns new
13
13
  # value.
14
- def runopts(k, v=nil, force=false)
14
+ def runopts(k, v = nil, force = false)
15
15
  k = k.to_sym
16
16
  unless v.nil?
17
17
  if [:latency, :maxjobs, :ppn].include?(k)
@@ -19,16 +19,9 @@ module MiGA::Daemon::Base
19
19
  elsif [:shutdown_when_done].include?(k)
20
20
  v = !!v
21
21
  end
22
- raise "Daemon's #{k} cannot be set to zero." if !force and v==0
22
+ raise "Daemon's #{k} cannot be set to zero." if !force and v == 0
23
23
  @runopts[k] = v
24
24
  end
25
- if k==:kill and v.nil?
26
- case @runopts[:type].to_s
27
- when 'bash' then return "kill -9 '%s'"
28
- when 'qsub' then return "qdel '%s'"
29
- else return "canceljob '%s'"
30
- end
31
- end
32
25
  @runopts[k]
33
26
  end
34
27
 
@@ -51,19 +44,19 @@ module MiGA::Daemon::Base
51
44
 
52
45
  ##
53
46
  # Initializes the daemon with +opts+.
54
- def start(opts=[]) daemon('start', opts); end
47
+ def start(opts = []) daemon('start', opts); end
55
48
 
56
49
  ##
57
50
  # Stops the daemon with +opts+.
58
- def stop(opts=[]) daemon('stop', opts); end
51
+ def stop(opts = []) daemon('stop', opts); end
59
52
 
60
53
  ##
61
54
  # Restarts the daemon with +opts+.
62
- def restart(opts=[]) daemon('restart', opts); end
55
+ def restart(opts = []) daemon('restart', opts); end
63
56
 
64
57
  ##
65
58
  # Returns the status of the daemon with +opts+.
66
- def status(opts=[]) daemon('status', opts); end
59
+ def status(opts = []) daemon('status', opts); end
67
60
 
68
61
  end
69
62
 
data/lib/miga/daemon.rb CHANGED
@@ -39,9 +39,9 @@ class MiGA::Daemon < MiGA::MiGA
39
39
  def initialize(project)
40
40
  $_MIGA_DAEMON_LAIR << self
41
41
  @project = project
42
- @runopts = JSON.parse(
43
- File.read(File.expand_path('daemon/daemon.json', project.path)),
44
- symbolize_names: true)
42
+ @runopts = MiGA::Json.parse(
43
+ File.expand_path('daemon/daemon.json', project.path),
44
+ default: File.expand_path('.miga_daemon.json', ENV['MIGA_HOME']))
45
45
  @jobs_to_run = []
46
46
  @jobs_running = []
47
47
  @loop_i = -1
@@ -84,10 +84,9 @@ class MiGA::Daemon < MiGA::MiGA
84
84
  ##
85
85
  # Report status in a JSON file.
86
86
  def report_status
87
- f = File.open(File.expand_path('daemon/status.json', project.path), 'w')
88
- f.print JSON.pretty_generate(
89
- jobs_running: @jobs_running, jobs_to_run: @jobs_to_run)
90
- f.close
87
+ MiGA::Json.generate(
88
+ {jobs_running: @jobs_running, jobs_to_run: @jobs_to_run},
89
+ File.expand_path('daemon/status.json', project.path))
91
90
  end
92
91
 
93
92
  ##
@@ -96,7 +95,7 @@ class MiGA::Daemon < MiGA::MiGA
96
95
  f_path = File.expand_path('daemon/status.json', project.path)
97
96
  return unless File.size? f_path
98
97
  say 'Loading previous status in daemon/status.json:'
99
- status = JSON.parse(File.read(f_path), symbolize_names: true)
98
+ status = MiGA::Json.parse(f_path)
100
99
  status.keys.each do |i|
101
100
  status[i].map! do |j|
102
101
  j.tap do |k|
@@ -255,11 +254,6 @@ class MiGA::Daemon < MiGA::MiGA
255
254
  def terminate
256
255
  say 'Terminating daemon...'
257
256
  report_status
258
- k = runopts(:kill)
259
- @jobs_running.each do |i|
260
- `#{k % i[:pid]}`
261
- puts "Terminating pid:#{i[:pid]} for #{i[:task_name]}"
262
- end
263
257
  f = File.expand_path('daemon/alive', project.path)
264
258
  File.unlink(f) if File.exist? f
265
259
  end
data/lib/miga/json.rb ADDED
@@ -0,0 +1,62 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'json'
5
+
6
+ ##
7
+ # Taxonomic classifications in MiGA.
8
+ class MiGA::Json < MiGA::MiGA
9
+
10
+ class << self
11
+
12
+ ##
13
+ # Default parsing options. Supported +opts+ keys:
14
+ # - +:contents+: If true, the input is assumed to be the contents to parse,
15
+ # not the path to a JSON file.
16
+ # - +:default+: A base to attach the parsed hash. A Hash or a String (path).
17
+ # - +:additions+: If addition classes should be parsed. By default is false.
18
+ # - +:symbolize+: If names should be symbolized. By default it's true if
19
+ # additions is false, or false otherwise. They can both be false, but an
20
+ # exception will be raised if both are true
21
+ def default_opts(opts = {})
22
+ opts[:contents] ||= false
23
+ opts[:additions] ||= false
24
+ opts[:symbolize] = !opts[:additions] if opts[:symbolize].nil?
25
+ if opts[:additions] and opts[:symbolize]
26
+ raise 'JSON additions are not supported with symbolized names'
27
+ end
28
+ opts
29
+ end
30
+
31
+ ##
32
+ # Parse a JSON file in +path+ and return a hash. Optionally,
33
+ # use +default+ as the base to attach the parsed hash. +default+
34
+ # can be a Hash or a String (path). See +default_opts+ for supported
35
+ # +opts+.
36
+ def parse(path, opts = {})
37
+ opts = default_opts(opts)
38
+ cont = opts[:contents] ? path : File.read(path)
39
+ raise "Empty descriptor: #{opts[:contents] ? "''" : path}." if cont.empty?
40
+ y = JSON.parse(cont,
41
+ symbolize_names: opts[:symbolize],
42
+ create_additions: opts[:additions])
43
+ unless opts[:default].nil?
44
+ opts[:default] = parse(opts[:default]) if opts[:default].is_a? String
45
+ y.each{ |k, v| opts[:default][k] = v }
46
+ y = opts[:default]
47
+ end
48
+ y
49
+ end
50
+
51
+ ##
52
+ # Generates and returns prettyfied JSON to represent +obj+.
53
+ # If +path+ is passed, it saves the JSON in that file.
54
+ def generate(obj, path = nil)
55
+ y = JSON.pretty_generate(obj)
56
+ File.open(path, 'w') { |fh| fh.print y } unless path.nil?
57
+ y
58
+ end
59
+
60
+ end
61
+
62
+ end
data/lib/miga/metadata.rb CHANGED
@@ -57,14 +57,17 @@ class MiGA::Metadata < MiGA::MiGA
57
57
  def save
58
58
  MiGA.DEBUG "Metadata.save #{path}"
59
59
  self[:updated] = Time.now.to_s
60
- json = JSON.pretty_generate(data)
60
+ json = MiGA::Json.generate(data)
61
61
  sleeper = 0.0
62
+ slept = 0
62
63
  while File.exist?(lock_file)
63
64
  sleeper += 0.1 if sleeper <= 10.0
64
65
  sleep(sleeper.to_i)
66
+ slept += sleeper.to_i
67
+ raise "Lock detected for over 10 minutes: #{lock_file}" if slept > 600
65
68
  end
66
69
  FileUtils.touch lock_file
67
- ofh = File.open("#{path}.tmp", "w")
70
+ ofh = File.open("#{path}.tmp", 'w')
68
71
  ofh.puts json
69
72
  ofh.close
70
73
  raise "Lock-racing detected for #{path}." unless
@@ -81,9 +84,7 @@ class MiGA::Metadata < MiGA::MiGA
81
84
  sleeper += 0.1 if sleeper <= 10.0
82
85
  sleep(sleeper.to_i)
83
86
  end
84
- # :symbolize_names does not play nicely with :create_additions
85
- tmp = JSON.parse(File.read(path),
86
- {:symbolize_names=>false, :create_additions=>true})
87
+ tmp = MiGA::Json.parse(path, additions: true)
87
88
  @data = {}
88
89
  tmp.each_pair{ |k,v| self[k] = v }
89
90
  end
data/lib/miga/project.rb CHANGED
@@ -1,10 +1,10 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "miga/dataset"
5
- require "miga/project/result"
6
- require "miga/project/dataset"
7
- require "miga/project/plugins"
4
+ require 'miga/dataset'
5
+ require 'miga/project/result'
6
+ require 'miga/project/dataset'
7
+ require 'miga/project/plugins'
8
8
 
9
9
  ##
10
10
  # MiGA representation of a project.
@@ -17,16 +17,21 @@ class MiGA::Project < MiGA::MiGA
17
17
  ##
18
18
  # Absolute path to the project folder.
19
19
  attr_reader :path
20
-
20
+
21
21
  ##
22
22
  # Information about the project as MiGA::Metadata.
23
23
  attr_reader :metadata
24
24
 
25
+ ##
26
+ # If true, it doesn't save changes
27
+ attr_accessor :do_not_save
28
+
25
29
  ##
26
30
  # Create a new MiGA::Project at +path+, if it doesn't exist and +update+ is
27
31
  # false, or load an existing one.
28
32
  def initialize(path, update=false)
29
33
  @datasets = {}
34
+ @do_not_save = false
30
35
  @path = File.absolute_path(path)
31
36
  self.create if not update and not Project.exist? self.path
32
37
  self.load if self.metadata.nil?
@@ -39,7 +44,7 @@ class MiGA::Project < MiGA::MiGA
39
44
  # Create an empty project.
40
45
  def create
41
46
  unless MiGA::MiGA.initialized?
42
- raise "Impossible to create project in uninitialized MiGA."
47
+ raise 'Impossible to create project in uninitialized MiGA.'
43
48
  end
44
49
  dirs = [path] + @@FOLDERS.map{|d| "#{path}/#{d}" } +
45
50
  @@DATA_FOLDERS.map{ |d| "#{path}/data/#{d}"}
@@ -51,14 +56,20 @@ class MiGA::Project < MiGA::MiGA
51
56
  File.exist? "#{path}/daemon/daemon.json"
52
57
  self.load
53
58
  end
54
-
59
+
55
60
  ##
56
- # Save any changes persistently.
61
+ # Save any changes persistently. Do nothing if +do_not_save+ is true.
57
62
  def save
63
+ save! unless do_not_save
64
+ end
65
+
66
+ ##
67
+ # Save any changes persistently, regardless of +do_not_save+.
68
+ def save!
58
69
  metadata.save
59
70
  self.load
60
71
  end
61
-
72
+
62
73
  ##
63
74
  # (Re-)load project data and metadata.
64
75
  def load
@@ -67,7 +78,7 @@ class MiGA::Project < MiGA::MiGA
67
78
  @metadata = MiGA::Metadata.load "#{path}/miga.project.json"
68
79
  raise "Couldn't find project metadata at #{path}" if metadata.nil?
69
80
  end
70
-
81
+
71
82
  ##
72
83
  # Name of the project.
73
84
  def name ; metadata[:name] ; end
@@ -83,5 +94,5 @@ class MiGA::Project < MiGA::MiGA
83
94
  ##
84
95
  # Is this a project for multi-organism datasets?
85
96
  def is_multi? ; @@KNOWN_TYPES[type][:multi] ; end
86
-
97
+
87
98
  end
@@ -91,7 +91,7 @@ class MiGA::RemoteDataset
91
91
  def ncbi_map(id, dbfrom, db)
92
92
  doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
93
93
  return if doc.empty?
94
- tree = JSON.parse(doc, symbolize_names: true)
94
+ tree = MiGA::Json.parse(doc, contents: true)
95
95
  [:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
96
96
  tree = tree[i]
97
97
  break if tree.nil?
@@ -13,7 +13,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
13
13
  class << self
14
14
  def ncbi_asm_acc2id(acc)
15
15
  return acc if acc =~ /^\d+$/
16
- search_doc = JSON.parse download(:ncbi_search, :assembly, acc, :json)
16
+ search_doc = MiGA::Json.parse(
17
+ download(:ncbi_search, :assembly, acc, :json),
18
+ symbolize: false, contents: true)
17
19
  search_doc['esearchresult']['idlist'].first
18
20
  end
19
21
  end
@@ -55,6 +57,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
55
57
  ##
56
58
  # Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
57
59
  # indicates if it should be a reference dataset, and contains +metadata_def+.
60
+ # If +metadata_def+ includes +metadata_only: true+, no input data is
61
+ # downloaded.
58
62
  def save_to(project, name = nil, is_ref = true, metadata_def = {})
59
63
  name ||= ids.join('_').miga_name
60
64
  project = MiGA::Project.new(project) if project.is_a? String
@@ -63,16 +67,20 @@ class MiGA::RemoteDataset < MiGA::MiGA
63
67
  @metadata = get_metadata(metadata_def)
64
68
  udb = @@UNIVERSE[universe][:dbs][db]
65
69
  @metadata["#{universe}_#{db}"] = ids.join(',')
66
- respond_to?("save_#{udb[:stage]}_to", true) or
67
- raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
68
- send "save_#{udb[:stage]}_to", project, name, udb
70
+ unless @metadata[:metadata_only]
71
+ respond_to?("save_#{udb[:stage]}_to", true) or
72
+ raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
73
+ send "save_#{udb[:stage]}_to", project, name, udb
74
+ end
69
75
  dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
70
76
  project.add_dataset(dataset.name)
71
- result = dataset.add_result(udb[:stage], true, is_clean: true)
72
- result.nil? and
73
- raise 'Empty dataset: seed result not added due to incomplete files.'
74
- result.clean!
75
- result.save
77
+ unless @metadata[:metadata_only]
78
+ result = dataset.add_result(udb[:stage], true, is_clean: true)
79
+ result.nil? and
80
+ raise 'Empty dataset: seed result not added due to incomplete files.'
81
+ result.clean!
82
+ result.save
83
+ end
76
84
  dataset
77
85
  end
78
86
 
@@ -141,8 +149,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
141
149
  metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
142
150
  return nil unless metadata[:ncbi_asm]
143
151
  ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
144
- doc = JSON.parse(
145
- self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json))
152
+ doc = MiGA::Json.parse(
153
+ self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
154
+ symbolize: false, contents: true)
146
155
  @_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
147
156
  end
148
157
 
data/lib/miga/result.rb CHANGED
@@ -122,19 +122,14 @@ class MiGA::Result < MiGA::MiGA
122
122
  @data[:started] = File.read(s).chomp
123
123
  File.unlink s
124
124
  end
125
- json = JSON.pretty_generate data
126
- ofh = File.open(path, "w")
127
- ofh.puts json
128
- ofh.close
125
+ MiGA::Json.generate(data, path)
129
126
  self.load
130
127
  end
131
128
 
132
129
  ##
133
130
  # Load (or reload) result data in the JSON file #path.
134
131
  def load
135
- json = File.read(path)
136
- raise "Impossible to load result, empty descriptor: #{path}." if json.empty?
137
- @data = JSON.parse(json, {:symbolize_names=>true})
132
+ @data = MiGA::Json.parse(path)
138
133
  @data[:files] ||= {}
139
134
  @results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
140
135
  end
@@ -57,8 +57,8 @@ class MiGA::TaxIndex < MiGA::MiGA
57
57
  ##
58
58
  # Generate JSON String for the index.
59
59
  def to_json
60
- JSON.generate({ root:root.to_hash,
61
- datasets:datasets.map{ |d| d.name } })
60
+ MiGA::Json.generate(
61
+ { root: root.to_hash, datasets: datasets.map{ |d| d.name } })
62
62
  end
63
63
 
64
64
  ##
data/lib/miga/taxonomy.rb CHANGED
@@ -15,37 +15,37 @@ class MiGA::Taxonomy < MiGA::MiGA
15
15
  ##
16
16
  # Long names of the cannonical ranks.
17
17
  def self.LONG_RANKS() @@LONG_RANKS ; end
18
- @@LONG_RANKS = {root: "root", ns: "namespace", d: "domain", k: "kingdom",
19
- p: "phylum", c: "class", o: "order", f: "family", g: "genus", s: "species",
20
- ssp: "subspecies", str: "strain", ds: "dataset"}
18
+ @@LONG_RANKS = {root: 'root', ns: 'namespace', d: 'domain', k: 'kingdom',
19
+ p: 'phylum', c: 'class', o: 'order', f: 'family', g: 'genus', s: 'species',
20
+ ssp: 'subspecies', str: 'strain', ds: 'dataset'}
21
21
 
22
22
  ##
23
23
  # Synonms for cannonical ranks.
24
24
  @@RANK_SYNONYMS = {
25
- "namespace"=>"ns",
26
- "domain"=>"d","superkingdom"=>"d",
27
- "kingdom"=>"k",
28
- "phylum"=>"p",
29
- "class"=>"c",
30
- "order"=>"o",
31
- "family"=>"f",
32
- "genus"=>"g",
33
- "species"=>"s","sp"=>"s",
34
- "subspecies"=>"ssp",
35
- "strain"=>"str","isolate"=>"str","culture"=>"str",
36
- "dataset"=>"ds","organism"=>"ds","genome"=>"ds","specimen"=>"ds"
25
+ 'namespace' => 'ns',
26
+ 'domain' => 'd', 'superkingdom' => 'd',
27
+ 'kingdom' => 'k',
28
+ 'phylum' => 'p',
29
+ 'class' => 'c',
30
+ 'order' => 'o',
31
+ 'family' => 'f',
32
+ 'genus' => 'g',
33
+ 'species' => 's', 'sp' => 's',
34
+ 'subspecies' => 'ssp',
35
+ 'strain' => 'str', 'isolate' => 'str', 'culture' => 'str',
36
+ 'dataset' => 'ds', 'organism' => 'ds', 'genome' => 'ds', 'specimen' => 'ds'
37
37
  }
38
38
 
39
39
  ##
40
40
  # Initialize from JSON-derived Hash +o+.
41
- def self.json_create(o) new(o["str"]) ; end
41
+ def self.json_create(o) new(o['str']) ; end
42
42
 
43
43
  ##
44
44
  # Returns cannonical rank (Symbol) for the +rank+ String.
45
45
  def self.normalize_rank(rank)
46
46
  return rank.to_sym if @@_KNOWN_RANKS_H[rank.to_sym]
47
47
  rank = rank.to_s.downcase
48
- return nil if rank=="no rank"
48
+ return nil if rank == 'no rank'
49
49
  rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
50
50
  rank = rank.to_sym
51
51
  return nil unless @@_KNOWN_RANKS_H[rank]
@@ -64,7 +64,7 @@ class MiGA::Taxonomy < MiGA::MiGA
64
64
  # either a rank:value pair (if +ranks+ is nil), or just values in the same
65
65
  # order as ther ranks in +ranks+. Alternatively, +str+ as a Hash with rank =>
66
66
  # value pairs is also supported.
67
- def initialize(str, ranks=nil)
67
+ def initialize(str, ranks = nil)
68
68
  @ranks = {}
69
69
  if ranks.nil?
70
70
  case str when Array, Hash
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 9, 1]
13
+ VERSION = [0.3, 10, 0]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2019, 03, 8)
21
+ VERSION_DATE = Date.new(2019, 04, 9)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
data/test/daemon_test.rb CHANGED
@@ -114,11 +114,6 @@ class DaemonTest < Test::Unit::TestCase
114
114
  $d1.runopts(:latency, "!")
115
115
  end
116
116
  assert_equal("bash", $d1.runopts(:type))
117
- assert_equal("kill -9 '%s'", $d1.runopts(:kill))
118
- $d1.runopts(:type, "qsub")
119
- assert_equal("qdel '%s'", $d1.runopts(:kill))
120
- $d1.runopts(:type, "msub")
121
- assert_equal("canceljob '%s'", $d1.runopts(:kill))
122
117
  end
123
118
 
124
119
  def test_say
data/test/json_test.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'test_helper'
2
+
3
+ class JsonTest < Test::Unit::TestCase
4
+
5
+ def test_parse
6
+ assert_equal({a: 1, b: 2},
7
+ MiGA::Json.parse('{"a": 1, "b": 2}', contents: true))
8
+ assert_equal({'a' => 1, 'b' => 2},
9
+ MiGA::Json.parse('{"a": 1, "b": 2}', contents: true, symbolize: false))
10
+ assert_equal(1.0, MiGA::Json.parse('1.0', contents: true))
11
+ end
12
+
13
+ def test_defaults
14
+ tmp1 = Tempfile.new('test-parse-1.json')
15
+ tmp1.puts '{"a": 123, "k": false, "t": null}'
16
+ tmp1.close
17
+ assert_equal({a: 123, k: false, t: nil}, MiGA::Json.parse(tmp1.path))
18
+
19
+ tmp2 = Tempfile.new('test-parse-2.json')
20
+ tmp2.puts '{"a": 456, "kb": false, "t": 10.0}'
21
+ tmp2.close
22
+ assert_equal({a: 456, kb: false, t: 10.0}, MiGA::Json.parse(tmp2.path))
23
+
24
+ assert_equal(
25
+ {a: 123, k: false, kb: false, t: nil},
26
+ MiGA::Json.parse(tmp1.path, default: tmp2.path))
27
+ assert_equal(
28
+ {a: 456, k: false, kb: false, t: 10.0},
29
+ MiGA::Json.parse(tmp2.path, default: tmp1.path))
30
+ end
31
+
32
+ def test_generate
33
+ assert_equal("{\n \"a\": 1,\n \"b\": 2\n}",
34
+ MiGA::Json.generate({a: 1, b: 2}))
35
+ end
36
+
37
+ end
@@ -5,17 +5,22 @@ require 'miga'
5
5
 
6
6
  ARGV[1] or abort "Usage: #{$0} path/to/project threads"
7
7
 
8
- $stderr.puts "Cleaning databases..."
8
+ $stderr.puts 'Cleaning databases...'
9
9
  p = MiGA::Project.load(ARGV[0])
10
10
  ds_names = p.dataset_names
11
11
  thr = ARGV[1].to_i
12
12
 
13
+ pc = [0] + (1 .. 100).map{ |i| ds_names.size * i / 100 }
14
+ $stderr.puts (('.'*9 + '|')*10) + ' 100%'
15
+
13
16
  (0 .. thr-1).each do |t|
14
17
  fork do
15
- k = -1
16
- ds_names.each do |i|
17
- k = (k+1) % thr
18
- next unless k == t
18
+ ds_names.each_with_index do |i, idx|
19
+ while t == 0 and idx+1 > pc.first
20
+ $stderr.print '#'
21
+ pc.shift
22
+ end
23
+ next unless (idx % thr) == t
19
24
  d = p.dataset(i)
20
25
  next unless d.is_ref? and d.is_active?
21
26
  d.cleanup_distances!
@@ -23,4 +28,5 @@ thr = ARGV[1].to_i
23
28
  end
24
29
  end
25
30
  Process.waitall
31
+ $stderr.puts ' Done'
26
32
 
@@ -108,7 +108,7 @@ class MiGA::DistanceRunner
108
108
 
109
109
  # Calculate all the AAIs/ANIs against the closest ANI95-clade (if AAI > 80%)
110
110
  cl_path = res.file_path :clades_ani95
111
- if File.size? cl_path and tsk[0] == :clade_finding
111
+ if !cl_path.nil? and File.size? cl_path and tsk[0] == :clade_finding
112
112
  File.foreach(cl_path).
113
113
  map { |i| i.chomp.split(',') }.
114
114
  find( lambda{[]} ){ |i| i.include? closest[:ds] }.
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
data/utils/ref-tree.R CHANGED
@@ -7,15 +7,23 @@
7
7
  #= Load stuff
8
8
  argv <- commandArgs(trailingOnly=T)
9
9
  suppressPackageStartupMessages(library(ape))
10
- suppressPackageStartupMessages(library(phytools))
11
- suppressPackageStartupMessages(library(phangorn))
12
10
  suppressPackageStartupMessages(library(enveomics.R))
11
+ inst <- c("phangorn", "phytools") %in% rownames(installed.packages())
12
+ if(inst[1]){
13
+ suppressPackageStartupMessages(library(phangorn))
14
+ reroot.fun <- midpoint
15
+ }else if(inst[2]){
16
+ suppressPackageStartupMessages(library(phytools))
17
+ reroot.fun <- midpoint.root
18
+ }else{
19
+ reroot.fun <- function(x) return(x)
20
+ }
13
21
 
14
22
  #= Main function
15
23
  ref_tree <- function(ani_file, out_base, q_dataset) {
16
24
  a <- read.table(ani_file, sep="\t", header=TRUE, as.is=TRUE)
17
25
  ani.d <- enve.df2dist(a[,1:3], default.d=0.9, max.sim=100)
18
- ani.ph <- midpoint(bionj(ani.d))
26
+ ani.ph <- reroot.fun(bionj(ani.d))
19
27
  write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
20
28
  pdf(paste(out_base, ".nwk.pdf", sep=""), 7, 7)
21
29
  plot(ani.ph, cex=1/3, type='fan',
@@ -24,42 +32,6 @@ ref_tree <- function(ani_file, out_base, q_dataset) {
24
32
  dev.off()
25
33
  }
26
34
 
27
- # Ancilliary functions
28
- midpoint <- function(tree){
29
- dm = cophenetic(tree)
30
- tree = unroot(tree)
31
- rn = max(tree$edge)+1
32
- maxdm = max(dm)
33
- ind = which(dm==maxdm,arr=TRUE)[1,]
34
- tmproot = Ancestors(tree, ind[1], "parent")
35
- tree = phangorn:::reroot(tree, tmproot)
36
- edge = tree$edge
37
- el = tree$edge.length
38
- children = tree$edge[,2]
39
- left = match(ind[1], children)
40
- tmp = Ancestors(tree, ind[2], "all")
41
- tmp= c(ind[2], tmp[-length(tmp)])
42
- right = match(tmp, children)
43
- if(el[left]>= (maxdm/2)){
44
- edge = rbind(edge, c(rn, ind[1]))
45
- edge[left,2] = rn
46
- el[left] = el[left] - (maxdm/2)
47
- el = c(el, maxdm/2)
48
- }else{
49
- sel = cumsum(el[right])
50
- i = which(sel>(maxdm/2))[1]
51
- edge = rbind(edge, c(rn, tmp[i]))
52
- edge[right[i],2] = rn
53
- eltmp = sel[i] - (maxdm/2)
54
- el = c(el, el[right[i]] - eltmp)
55
- el[right[i]] = eltmp
56
- }
57
- tree$edge.length = el
58
- tree$edge=edge
59
- tree$Nnode = tree$Nnode+1
60
- phangorn:::reorderPruning(phangorn:::reroot(tree, rn))
61
- }
62
-
63
35
  #= Main
64
36
  ref_tree(ani_file=argv[1], out_base=argv[2], q_dataset=argv[3])
65
37
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9.1
4
+ version: 0.3.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-08 00:00:00.000000000 Z
11
+ date: 2019-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -142,6 +142,7 @@ files:
142
142
  - lib/miga/dataset.rb
143
143
  - lib/miga/dataset/base.rb
144
144
  - lib/miga/dataset/result.rb
145
+ - lib/miga/json.rb
145
146
  - lib/miga/metadata.rb
146
147
  - lib/miga/project.rb
147
148
  - lib/miga/project/base.rb
@@ -184,6 +185,7 @@ files:
184
185
  - test/common_test.rb
185
186
  - test/daemon_test.rb
186
187
  - test/dataset_test.rb
188
+ - test/json_test.rb
187
189
  - test/metadata_test.rb
188
190
  - test/project_test.rb
189
191
  - test/remote_dataset_test.rb
@@ -506,8 +508,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
506
508
  - !ruby/object:Gem::Version
507
509
  version: '0'
508
510
  requirements: []
509
- rubyforge_project:
510
- rubygems_version: 2.7.7
511
+ rubygems_version: 3.0.2
511
512
  signing_key:
512
513
  specification_version: 4
513
514
  summary: MiGA