miga-base 0.2.0.9 → 0.2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae747bf6d7acc800354df0bb0ca0f39d7495cd95
4
- data.tar.gz: 066b442431e8976c848cc6be009a21d5566ab6d9
3
+ metadata.gz: f914d4f05c7c1cb97b6036fa1af3bb5c6e95c341
4
+ data.tar.gz: 3812c256d78cbe8125ee5aa14a1785c3de57918f
5
5
  SHA512:
6
- metadata.gz: 56cdbe0f216acab5ddf14cd672e92fd282d161ff03b7a037a37f178cdd14082b7958f80fdbd0c4ad8418b3c21a6fd705457cedd0adbd4929225a297dab2a1003
7
- data.tar.gz: 85e068491a8c34194ff2bb9b9c721a97b830c1bc607c588b9043a026400cd39f757dd499471e03555c1ebfe4acea7fc9d6269ae6b50e54ebf40eda67b56eebec
6
+ metadata.gz: cdc6692724bb6b7c39a313c35eaabe51405d104d8af465618983a83601435af66ca1f7eca0a3e9fbc19b24ce13cbde8f9962beaf643ad1f9e3970bb10067f671
7
+ data.tar.gz: 45e19af2b3e14e225c9f6a89193b0c400fe8ac685296314b4320885a563ab2b3c4472563a45f98c5a7d15edd74573cf1f9176a59905a2764a809d6bc3850b90d
data/Rakefile CHANGED
@@ -5,6 +5,9 @@ SOURCES = FileList["lib/**/*.rb"]
5
5
  desc "Default Task"
6
6
  task :default => "test:all"
7
7
 
8
+ desc "Default tests"
9
+ task :test => "test:all"
10
+
8
11
  namespace :test do
9
12
  desc "All tests"
10
13
  Rake::TestTask.new(:all) do |t|
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true}
7
+ opts = OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt.on("-r", "--result STRING",
11
+ "(Mandatory) Name of the result to add.",
12
+ "Recognized names for dataset-specific results include:",
13
+ *MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
14
+ "Recognized names for project-wide results include:",
15
+ *MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}){ |v| o[:name]=v }
16
+ opt_common(opt, o)
17
+ end.parse!
18
+
19
+ ##=> Main <=
20
+ opts.parse!
21
+ opt_require(o, project:"-P", name:"-r")
22
+
23
+ $stderr.puts "Loading project." unless o[:q]
24
+ p = MiGA::Project.load(o[:project])
25
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
26
+
27
+ $stderr.puts "Registering result." unless o[:q]
28
+ if o[:dataset].nil?
29
+ r = p.add_result o[:name].to_sym
30
+ else
31
+ d = p.dataset(o[:dataset])
32
+ r = d.add_result o[:name].to_sym
33
+ end
34
+
35
+ raise "Cannot add result, incomplete expected files." if r.nil?
36
+
37
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt.on("-s", "--tax-string STRING",
11
+ "String corresponding to the taxonomy of the dataset.",
12
+ "The MiGA format of string taxonomy is a space-delimited",
13
+ "set of 'rank:name' pairs."){ |v| o[:taxstring]=v }
14
+ opt.on("-t", "--tax-file PATH",
15
+ "(Mandatory unless -D and -s are provided) Tab-delimited file containing",
16
+ "datasets taxonomy. Each row corresponds to a datasets and each column",
17
+ "corresponds to a rank. The first row must be a header with the rank ",
18
+ "names, and the first column must contain dataset names."
19
+ ){ |v| o[:taxfile]=v }
20
+ opt_common(opt, o)
21
+ end.parse!
22
+
23
+ ##=> Main <=
24
+ opt_require(o, project:"-P")
25
+ raise "-D is mandatory unless -t is provided." if
26
+ o[:dataset].nil? and o[:taxfile].nil?
27
+ raise "-s is mandatory unless -t is provided." if
28
+ o[:taxstring].nil? and o[:taxfile].nil?
29
+
30
+ $stderr.puts "Loading project." unless o[:q]
31
+ p = MiGA::Project.load(o[:project])
32
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
33
+
34
+ if not o[:taxfile].nil?
35
+ $stderr.puts "Reading tax-file and registering taxonomy." unless o[:q]
36
+ tfh = File.open(o[:taxfile], "r")
37
+ header = nil
38
+ tfh.each_line do |ln|
39
+ next if ln =~ /^\s*?$/
40
+ r = ln.chomp.split(/\t/, -1)
41
+ dn = r.shift
42
+ if header.nil?
43
+ header = r
44
+ next
45
+ end
46
+ d = p.dataset dn
47
+ if d.nil?
48
+ warn "Impossible to find dataset at line #{$.}: #{dn}. Ignoring..."
49
+ next
50
+ end
51
+ d.metadata[:tax] = MiGA::Taxonomy.new(r, header)
52
+ d.save
53
+ $stderr.puts " #{d.name} registered." unless o[:q]
54
+ end
55
+ tfh.close
56
+ else
57
+ $stderr.puts "Registering taxonomy." unless o[:q]
58
+ d = p.dataset o[:dataset]
59
+ d.metadata[:tax] = MiGA::Taxonomy.new(o[:taxstring])
60
+ d.save
61
+ end
62
+
63
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, ref:true, update:false}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset, :dataset_type])
10
+ opt.on("-q", "--query",
11
+ "If set, the dataset is registered as a query, not a reference dataset."
12
+ ){ |v| o[:ref]=!v }
13
+ opt.on("-d", "--description STRING",
14
+ "Description of the dataset."){ |v| o[:description]=v }
15
+ opt.on("-u", "--user STRING",
16
+ "Owner of the dataset."){ |v| o[:user]=v }
17
+ opt.on("-c", "--comments STRING",
18
+ "Comments on the dataset."){ |v| o[:comments]=v }
19
+ opt.on("--update",
20
+ "Updates the dataset if it already exists."){ o[:update]=true }
21
+ opt_common(opt, o)
22
+ end.parse!
23
+
24
+ ##=> Main <=
25
+ opt_require(o)
26
+
27
+ $stderr.puts "Loading project." unless o[:q]
28
+ p = MiGA::Project.load(o[:project])
29
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
30
+
31
+ raise "Dataset already exists, aborting." unless
32
+ o[:update] or not MiGA::Dataset.exist?(p, o[:dataset])
33
+ $stderr.puts "Loading dataset." unless o[:q]
34
+ d = o[:update] ? p.dataset(o[:dataset]) :
35
+ MiGA::Dataset.new(p, o[:dataset], o[:ref], {})
36
+ raise "Dataset does not exist." if d.nil?
37
+ [:type, :description, :user, :comments].each do |k|
38
+ d.metadata[k]=o[k] unless o[k].nil?
39
+ end
40
+
41
+ if o[:update]
42
+ d.save
43
+ else
44
+ p.add_dataset(o[:dataset])
45
+ end
46
+ res = d.first_preprocessing(true)
47
+ $stderr.puts "- #{res}" unless o[:q]
48
+
49
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, update:false}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :project_type])
10
+ opt.on("-n", "--name STRING",
11
+ "Name of the project."){ |v| o[:name]=v }
12
+ opt.on("-d", "--description STRING",
13
+ "Description of the project."){ |v| o[:description]=v }
14
+ opt.on("-u", "--user STRING", "Owner of the project."){ |v| o[:user]=v }
15
+ opt.on("-c", "--comments STRING",
16
+ "Comments on the project."){ |v| o[:comments]=v }
17
+ opt.on("--update",
18
+ "Updates the project if it already exists."){ o[:update]=true }
19
+ opt_common(opt, o)
20
+ end.parse!
21
+
22
+ ##=> Main <=
23
+ opt_require(o, project:"-P")
24
+
25
+ unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
26
+ File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
27
+ puts "You must initialize MiGA before creating the first project.\n" +
28
+ "Do you want to initialize MiGA now? (yes / no)"
29
+ `'#{File.dirname(__FILE__)}/../scripts/init.bash'` if
30
+ $stdin.gets.chomp == 'yes'
31
+ end
32
+
33
+ $stderr.puts "Creating project." unless o[:q]
34
+ raise "Project already exists, aborting." unless
35
+ o[:update] or not MiGA::Project.exist? o[:project]
36
+ p = MiGA::Project.new(o[:project], o[:update])
37
+ # The following check is redundant with MiGA::Project#create,
38
+ # but allows upgrading projects from (very) early code versions
39
+ o[:name] = File.basename(p.path) if
40
+ o[:update] and o[:name].nil?
41
+ [:name, :description, :user, :comments, :type].each do |k|
42
+ p.metadata[k] = o[k] unless o[k].nil?
43
+ end
44
+ p.save
45
+
46
+ $stderr.puts "Done." unless o[:q]
data/actions/daemon.rb ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ require "miga/daemon"
7
+
8
+ task = ARGV.shift unless ["-h","--help"].include? ARGV.first
9
+ ARGV << "-h" if ARGV.empty?
10
+ o = {q:true, daemon_opts:[]}
11
+ OptionParser.new do |opt|
12
+ opt_banner(opt)
13
+ opt.separator "task:"
14
+ { start: "Start an instance of the application.",
15
+ stop: "Start an instance of the application.",
16
+ restart: "Stop all instances and restart them afterwards.",
17
+ reload: "Send a SIGHUP to all instances of the application.",
18
+ run: "Start the application and stay on top.",
19
+ zap: "Set the application to a stopped state.",
20
+ status: "Show status (PID) of application instances."
21
+ }.each{ |k,v| opt.separator sprintf " %*s%s", -33, k, v }
22
+ opt.separator ""
23
+ opt.separator "MiGA options:"
24
+ opt_object(opt, o, [:project])
25
+ opt.on("--latency INT",
26
+ "Number of seconds the daemon will be sleeping."
27
+ ){ |v| o[:latency]=v.to_i }
28
+ opt.on("--max-jobs INT",
29
+ "Maximum number of jobs to use simultaneously."){ |v| o[:maxjobs]=v.to_i }
30
+ opt.on("--ppn INT",
31
+ "Maximum number of cores to use in a single job."){ |v| o[:ppn]=v.to_i }
32
+ opt_common(opt, o)
33
+ opt.separator "Daemon options:"
34
+ opt.on("-t", "--ontop",
35
+ "Stay on top (does not daemonize)"){ o[:daemon_opts] << '-t' }
36
+ opt.on("-f", "--force", "Force operation"){ o[:daemon_opts] << '-f' }
37
+ opt.on("-n", "--no_wait",
38
+ "Do not wait for processes to stop"){ o[:daemon_opts] << '-n' }
39
+ end.parse!
40
+
41
+ ##=> Main <=
42
+ opt_require(o, project:"-P")
43
+
44
+ raise "Project doesn't exist, aborting." unless MiGA::Project.exist? o[:project]
45
+ p = MiGA::Project.new(o[:project])
46
+ d = MiGA::Daemon.new(p)
47
+ [:latency, :maxjobs, :ppn].each do |k|
48
+ d.runopts(k, o[k]) unless o[k].nil?
49
+ end
50
+ d.daemon(task, o[:daemon_opts])
data/actions/date.rb ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {}
7
+ opts = OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_common(opt, o)
10
+ end.parse!
11
+
12
+ ##=> Main <=
13
+ opts.parse!
14
+ puts Time.now.to_s
@@ -7,16 +7,8 @@ require "miga/remote_dataset"
7
7
 
8
8
  o = {q:true, query:false, universe: :ebi, db: :embl}
9
9
  OptionParser.new do |opt|
10
- opt.banner = <<BAN
11
- Creates an empty dataset in a pre-existing MiGA project.
12
-
13
- Usage: #{$0} #{File.basename(__FILE__)} [options]
14
- BAN
15
- opt.separator ""
16
- opt.on("-P", "--project PATH",
17
- "(Mandatory) Path to the project to use."){ |v| o[:project]=v }
18
- opt.on("-D", "--dataset STRING",
19
- "(Mandatory unless -F) ID of the dataset to create."){ |v| o[:dataset]=v }
10
+ opt_banner(opt)
11
+ opt_object(opt, o, [:project, :dataset, :dataset_type])
20
12
  opt.on("-I", "--ids ID1,ID2,...",
21
13
  "(Mandatory unless -F) IDs in the remote database separated by commas."
22
14
  ){ |v| o[:ids]=v }
@@ -31,10 +23,6 @@ BAN
31
23
  "The long form of all the options are supported as header (without the --)",
32
24
  "including dataset, ids, universe, and db. For query use true/false values."
33
25
  ){ |v| o[:file] = v }
34
- opt.on("-t", "--type STRING",
35
- "Type of dataset. Recognized types include:",
36
- *MiGA::Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}" }
37
- ){ |v| o[:type]=v.to_sym }
38
26
  opt.on("-q", "--query",
39
27
  "If set, the dataset is registered as a query, not a reference dataset."
40
28
  ){ |v| o[:query]=v }
@@ -46,20 +34,11 @@ BAN
46
34
  "Owner of the dataset."){ |v| o[:user]=v }
47
35
  opt.on("-c", "--comments STRING",
48
36
  "Comments on the dataset."){ |v| o[:comments]=v }
49
- opt.on("-v", "--verbose",
50
- "Print additional information to STDERR."){ o[:q]=false }
51
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
52
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
53
- end
54
- opt.on("-h", "--help", "Display this screen.") do
55
- puts opt
56
- exit
57
- end
58
- opt.separator ""
37
+ opt_common(opt, o)
59
38
  end.parse!
60
39
 
61
40
 
62
- ### MAIN
41
+ ##=> Main <=
63
42
  glob = [o]
64
43
  unless o[:file].nil?
65
44
  glob = []
@@ -81,9 +60,7 @@ unless o[:file].nil?
81
60
  end
82
61
 
83
62
  glob.each do |o_i|
84
- raise "-P is mandatory." if o_i[:project].nil?
85
- raise "-D is mandatory." if o_i[:dataset].nil?
86
- raise "-I is mandatory." if o_i[:ids].nil?
63
+ opt_require(o_i, project:"-P", dataset:"-D", ids:"-I")
87
64
 
88
65
  $stderr.puts "Dataset: #{o_i[:dataset]}" unless o_i[:q]
89
66
  $stderr.puts "Loading project." unless o_i[:q]
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, add:false, ref:true}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_type])
10
+ opt.on("-a", "--add",
11
+ "Register the datasets found. By default, only lists them (dry run)."
12
+ ){ |v| o[:add]=v }
13
+ opt.on("-q", "--query",
14
+ "If set, all datasets are registered as query datasets."
15
+ ){ |v| o[:ref]=!v }
16
+ opt.on("-u", "--user STRING", "Owner of the dataset."){ |v| o[:user]=v }
17
+ opt_common(opt, o)
18
+ end.parse!
19
+
20
+ ##=> Main <=
21
+ opt_require(o, project:"-P")
22
+
23
+ $stderr.puts "Loading project." unless o[:q]
24
+ p = MiGA::Project.load(o[:project])
25
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
26
+
27
+ $stderr.puts "Finding datasets." unless o[:q]
28
+ ud = p.unregistered_datasets
29
+ ud.each do |dn|
30
+ puts dn
31
+ if o[:add]
32
+ md = {}
33
+ [:type, :user].each{ |k| md[k]=o[k] unless o[k].nil? }
34
+ d = MiGA::Dataset.new(p, dn, o[:ref], md)
35
+ p.add_dataset(dn)
36
+ res = d.first_preprocessing(true)
37
+ puts "- #{res}" unless o[:q]
38
+ end
39
+ end
40
+
41
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, info:false, force:false, method: :hardlink }
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt.on("-Q", "--project-target PATH",
11
+ "(Mandatory) Path to the project where to link the dataset."
12
+ ){ |v| o[:project2]=v }
13
+ opt.on("-f", "--force",
14
+ "Forces linking, even if dataset's preprocessing is incomplete."
15
+ ){ |v| o[:force]=v }
16
+ opt.on("-s", "--symlink",
17
+ "Creates symlinks instead of the default hard links."
18
+ ){ o[:method] = :symlink }
19
+ opt.on("-c", "--copy",
20
+ "Creates copies instead of the default hard links."){ o[:method] = :copy }
21
+ opt_filter_datasets(opt, o)
22
+ opt_common(opt, o)
23
+ end.parse!
24
+
25
+ ##=> Main <=
26
+ opt_require(o, project:"-P", project2:"-Q")
27
+
28
+ $stderr.puts "Loading project." unless o[:q]
29
+ p = MiGA::Project.load(o[:project])
30
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
31
+ q = MiGA::Project.load(o[:project2])
32
+ raise "Impossible to load project: #{o[:project2]}" if q.nil?
33
+
34
+ $stderr.puts "Listing dataset." unless o[:q]
35
+ if o[:dataset].nil?
36
+ ds = p.datasets
37
+ else
38
+ ds = [p.dataset(o[:dataset])]
39
+ end
40
+ ds = filter_datasets!(ds, o)
41
+ ds.each do |d|
42
+ next unless o[:force] or d.done_preprocessing?
43
+ puts d.name
44
+ q.import_dataset(d, o[:method])
45
+ end
46
+
47
+ $stderr.puts "Done." unless o[:q]