miga-base 0.2.0.9 → 0.2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ae747bf6d7acc800354df0bb0ca0f39d7495cd95
4
- data.tar.gz: 066b442431e8976c848cc6be009a21d5566ab6d9
3
+ metadata.gz: f914d4f05c7c1cb97b6036fa1af3bb5c6e95c341
4
+ data.tar.gz: 3812c256d78cbe8125ee5aa14a1785c3de57918f
5
5
  SHA512:
6
- metadata.gz: 56cdbe0f216acab5ddf14cd672e92fd282d161ff03b7a037a37f178cdd14082b7958f80fdbd0c4ad8418b3c21a6fd705457cedd0adbd4929225a297dab2a1003
7
- data.tar.gz: 85e068491a8c34194ff2bb9b9c721a97b830c1bc607c588b9043a026400cd39f757dd499471e03555c1ebfe4acea7fc9d6269ae6b50e54ebf40eda67b56eebec
6
+ metadata.gz: cdc6692724bb6b7c39a313c35eaabe51405d104d8af465618983a83601435af66ca1f7eca0a3e9fbc19b24ce13cbde8f9962beaf643ad1f9e3970bb10067f671
7
+ data.tar.gz: 45e19af2b3e14e225c9f6a89193b0c400fe8ac685296314b4320885a563ab2b3c4472563a45f98c5a7d15edd74573cf1f9176a59905a2764a809d6bc3850b90d
data/Rakefile CHANGED
@@ -5,6 +5,9 @@ SOURCES = FileList["lib/**/*.rb"]
5
5
  desc "Default Task"
6
6
  task :default => "test:all"
7
7
 
8
+ desc "Default tests"
9
+ task :test => "test:all"
10
+
8
11
  namespace :test do
9
12
  desc "All tests"
10
13
  Rake::TestTask.new(:all) do |t|
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true}
7
+ opts = OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt.on("-r", "--result STRING",
11
+ "(Mandatory) Name of the result to add.",
12
+ "Recognized names for dataset-specific results include:",
13
+ *MiGA::Dataset.RESULT_DIRS.keys.map{|n| " ~ #{n}"},
14
+ "Recognized names for project-wide results include:",
15
+ *MiGA::Project.RESULT_DIRS.keys.map{|n| " ~ #{n}"}){ |v| o[:name]=v }
16
+ opt_common(opt, o)
17
+ end.parse!
18
+
19
+ ##=> Main <=
20
+ opts.parse!
21
+ opt_require(o, project:"-P", name:"-r")
22
+
23
+ $stderr.puts "Loading project." unless o[:q]
24
+ p = MiGA::Project.load(o[:project])
25
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
26
+
27
+ $stderr.puts "Registering result." unless o[:q]
28
+ if o[:dataset].nil?
29
+ r = p.add_result o[:name].to_sym
30
+ else
31
+ d = p.dataset(o[:dataset])
32
+ r = d.add_result o[:name].to_sym
33
+ end
34
+
35
+ raise "Cannot add result, incomplete expected files." if r.nil?
36
+
37
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt.on("-s", "--tax-string STRING",
11
+ "String corresponding to the taxonomy of the dataset.",
12
+ "The MiGA format of string taxonomy is a space-delimited",
13
+ "set of 'rank:name' pairs."){ |v| o[:taxstring]=v }
14
+ opt.on("-t", "--tax-file PATH",
15
+ "(Mandatory unless -D and -s are provided) Tab-delimited file containing",
16
+ "datasets taxonomy. Each row corresponds to a datasets and each column",
17
+ "corresponds to a rank. The first row must be a header with the rank ",
18
+ "names, and the first column must contain dataset names."
19
+ ){ |v| o[:taxfile]=v }
20
+ opt_common(opt, o)
21
+ end.parse!
22
+
23
+ ##=> Main <=
24
+ opt_require(o, project:"-P")
25
+ raise "-D is mandatory unless -t is provided." if
26
+ o[:dataset].nil? and o[:taxfile].nil?
27
+ raise "-s is mandatory unless -t is provided." if
28
+ o[:taxstring].nil? and o[:taxfile].nil?
29
+
30
+ $stderr.puts "Loading project." unless o[:q]
31
+ p = MiGA::Project.load(o[:project])
32
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
33
+
34
+ if not o[:taxfile].nil?
35
+ $stderr.puts "Reading tax-file and registering taxonomy." unless o[:q]
36
+ tfh = File.open(o[:taxfile], "r")
37
+ header = nil
38
+ tfh.each_line do |ln|
39
+ next if ln =~ /^\s*?$/
40
+ r = ln.chomp.split(/\t/, -1)
41
+ dn = r.shift
42
+ if header.nil?
43
+ header = r
44
+ next
45
+ end
46
+ d = p.dataset dn
47
+ if d.nil?
48
+ warn "Impossible to find dataset at line #{$.}: #{dn}. Ignoring..."
49
+ next
50
+ end
51
+ d.metadata[:tax] = MiGA::Taxonomy.new(r, header)
52
+ d.save
53
+ $stderr.puts " #{d.name} registered." unless o[:q]
54
+ end
55
+ tfh.close
56
+ else
57
+ $stderr.puts "Registering taxonomy." unless o[:q]
58
+ d = p.dataset o[:dataset]
59
+ d.metadata[:tax] = MiGA::Taxonomy.new(o[:taxstring])
60
+ d.save
61
+ end
62
+
63
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, ref:true, update:false}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset, :dataset_type])
10
+ opt.on("-q", "--query",
11
+ "If set, the dataset is registered as a query, not a reference dataset."
12
+ ){ |v| o[:ref]=!v }
13
+ opt.on("-d", "--description STRING",
14
+ "Description of the dataset."){ |v| o[:description]=v }
15
+ opt.on("-u", "--user STRING",
16
+ "Owner of the dataset."){ |v| o[:user]=v }
17
+ opt.on("-c", "--comments STRING",
18
+ "Comments on the dataset."){ |v| o[:comments]=v }
19
+ opt.on("--update",
20
+ "Updates the dataset if it already exists."){ o[:update]=true }
21
+ opt_common(opt, o)
22
+ end.parse!
23
+
24
+ ##=> Main <=
25
+ opt_require(o)
26
+
27
+ $stderr.puts "Loading project." unless o[:q]
28
+ p = MiGA::Project.load(o[:project])
29
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
30
+
31
+ raise "Dataset already exists, aborting." unless
32
+ o[:update] or not MiGA::Dataset.exist?(p, o[:dataset])
33
+ $stderr.puts "Loading dataset." unless o[:q]
34
+ d = o[:update] ? p.dataset(o[:dataset]) :
35
+ MiGA::Dataset.new(p, o[:dataset], o[:ref], {})
36
+ raise "Dataset does not exist." if d.nil?
37
+ [:type, :description, :user, :comments].each do |k|
38
+ d.metadata[k]=o[k] unless o[k].nil?
39
+ end
40
+
41
+ if o[:update]
42
+ d.save
43
+ else
44
+ p.add_dataset(o[:dataset])
45
+ end
46
+ res = d.first_preprocessing(true)
47
+ $stderr.puts "- #{res}" unless o[:q]
48
+
49
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, update:false}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :project_type])
10
+ opt.on("-n", "--name STRING",
11
+ "Name of the project."){ |v| o[:name]=v }
12
+ opt.on("-d", "--description STRING",
13
+ "Description of the project."){ |v| o[:description]=v }
14
+ opt.on("-u", "--user STRING", "Owner of the project."){ |v| o[:user]=v }
15
+ opt.on("-c", "--comments STRING",
16
+ "Comments on the project."){ |v| o[:comments]=v }
17
+ opt.on("--update",
18
+ "Updates the project if it already exists."){ o[:update]=true }
19
+ opt_common(opt, o)
20
+ end.parse!
21
+
22
+ ##=> Main <=
23
+ opt_require(o, project:"-P")
24
+
25
+ unless File.exist? "#{ENV["HOME"]}/.miga_rc" and
26
+ File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
27
+ puts "You must initialize MiGA before creating the first project.\n" +
28
+ "Do you want to initialize MiGA now? (yes / no)"
29
+ `'#{File.dirname(__FILE__)}/../scripts/init.bash'` if
30
+ $stdin.gets.chomp == 'yes'
31
+ end
32
+
33
+ $stderr.puts "Creating project." unless o[:q]
34
+ raise "Project already exists, aborting." unless
35
+ o[:update] or not MiGA::Project.exist? o[:project]
36
+ p = MiGA::Project.new(o[:project], o[:update])
37
+ # The following check is redundant with MiGA::Project#create,
38
+ # but allows upgrading projects from (very) early code versions
39
+ o[:name] = File.basename(p.path) if
40
+ o[:update] and o[:name].nil?
41
+ [:name, :description, :user, :comments, :type].each do |k|
42
+ p.metadata[k] = o[k] unless o[k].nil?
43
+ end
44
+ p.save
45
+
46
+ $stderr.puts "Done." unless o[:q]
data/actions/daemon.rb ADDED
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ require "miga/daemon"
7
+
8
+ task = ARGV.shift unless ["-h","--help"].include? ARGV.first
9
+ ARGV << "-h" if ARGV.empty?
10
+ o = {q:true, daemon_opts:[]}
11
+ OptionParser.new do |opt|
12
+ opt_banner(opt)
13
+ opt.separator "task:"
14
+ { start: "Start an instance of the application.",
15
+ stop: "Start an instance of the application.",
16
+ restart: "Stop all instances and restart them afterwards.",
17
+ reload: "Send a SIGHUP to all instances of the application.",
18
+ run: "Start the application and stay on top.",
19
+ zap: "Set the application to a stopped state.",
20
+ status: "Show status (PID) of application instances."
21
+ }.each{ |k,v| opt.separator sprintf " %*s%s", -33, k, v }
22
+ opt.separator ""
23
+ opt.separator "MiGA options:"
24
+ opt_object(opt, o, [:project])
25
+ opt.on("--latency INT",
26
+ "Number of seconds the daemon will be sleeping."
27
+ ){ |v| o[:latency]=v.to_i }
28
+ opt.on("--max-jobs INT",
29
+ "Maximum number of jobs to use simultaneously."){ |v| o[:maxjobs]=v.to_i }
30
+ opt.on("--ppn INT",
31
+ "Maximum number of cores to use in a single job."){ |v| o[:ppn]=v.to_i }
32
+ opt_common(opt, o)
33
+ opt.separator "Daemon options:"
34
+ opt.on("-t", "--ontop",
35
+ "Stay on top (does not daemonize)"){ o[:daemon_opts] << '-t' }
36
+ opt.on("-f", "--force", "Force operation"){ o[:daemon_opts] << '-f' }
37
+ opt.on("-n", "--no_wait",
38
+ "Do not wait for processes to stop"){ o[:daemon_opts] << '-n' }
39
+ end.parse!
40
+
41
+ ##=> Main <=
42
+ opt_require(o, project:"-P")
43
+
44
+ raise "Project doesn't exist, aborting." unless MiGA::Project.exist? o[:project]
45
+ p = MiGA::Project.new(o[:project])
46
+ d = MiGA::Daemon.new(p)
47
+ [:latency, :maxjobs, :ppn].each do |k|
48
+ d.runopts(k, o[k]) unless o[k].nil?
49
+ end
50
+ d.daemon(task, o[:daemon_opts])
data/actions/date.rb ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {}
7
+ opts = OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_common(opt, o)
10
+ end.parse!
11
+
12
+ ##=> Main <=
13
+ opts.parse!
14
+ puts Time.now.to_s
@@ -7,16 +7,8 @@ require "miga/remote_dataset"
7
7
 
8
8
  o = {q:true, query:false, universe: :ebi, db: :embl}
9
9
  OptionParser.new do |opt|
10
- opt.banner = <<BAN
11
- Creates an empty dataset in a pre-existing MiGA project.
12
-
13
- Usage: #{$0} #{File.basename(__FILE__)} [options]
14
- BAN
15
- opt.separator ""
16
- opt.on("-P", "--project PATH",
17
- "(Mandatory) Path to the project to use."){ |v| o[:project]=v }
18
- opt.on("-D", "--dataset STRING",
19
- "(Mandatory unless -F) ID of the dataset to create."){ |v| o[:dataset]=v }
10
+ opt_banner(opt)
11
+ opt_object(opt, o, [:project, :dataset, :dataset_type])
20
12
  opt.on("-I", "--ids ID1,ID2,...",
21
13
  "(Mandatory unless -F) IDs in the remote database separated by commas."
22
14
  ){ |v| o[:ids]=v }
@@ -31,10 +23,6 @@ BAN
31
23
  "The long form of all the options are supported as header (without the --)",
32
24
  "including dataset, ids, universe, and db. For query use true/false values."
33
25
  ){ |v| o[:file] = v }
34
- opt.on("-t", "--type STRING",
35
- "Type of dataset. Recognized types include:",
36
- *MiGA::Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}" }
37
- ){ |v| o[:type]=v.to_sym }
38
26
  opt.on("-q", "--query",
39
27
  "If set, the dataset is registered as a query, not a reference dataset."
40
28
  ){ |v| o[:query]=v }
@@ -46,20 +34,11 @@ BAN
46
34
  "Owner of the dataset."){ |v| o[:user]=v }
47
35
  opt.on("-c", "--comments STRING",
48
36
  "Comments on the dataset."){ |v| o[:comments]=v }
49
- opt.on("-v", "--verbose",
50
- "Print additional information to STDERR."){ o[:q]=false }
51
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
52
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
53
- end
54
- opt.on("-h", "--help", "Display this screen.") do
55
- puts opt
56
- exit
57
- end
58
- opt.separator ""
37
+ opt_common(opt, o)
59
38
  end.parse!
60
39
 
61
40
 
62
- ### MAIN
41
+ ##=> Main <=
63
42
  glob = [o]
64
43
  unless o[:file].nil?
65
44
  glob = []
@@ -81,9 +60,7 @@ unless o[:file].nil?
81
60
  end
82
61
 
83
62
  glob.each do |o_i|
84
- raise "-P is mandatory." if o_i[:project].nil?
85
- raise "-D is mandatory." if o_i[:dataset].nil?
86
- raise "-I is mandatory." if o_i[:ids].nil?
63
+ opt_require(o_i, project:"-P", dataset:"-D", ids:"-I")
87
64
 
88
65
  $stderr.puts "Dataset: #{o_i[:dataset]}" unless o_i[:q]
89
66
  $stderr.puts "Loading project." unless o_i[:q]
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, add:false, ref:true}
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_type])
10
+ opt.on("-a", "--add",
11
+ "Register the datasets found. By default, only lists them (dry run)."
12
+ ){ |v| o[:add]=v }
13
+ opt.on("-q", "--query",
14
+ "If set, all datasets are registered as query datasets."
15
+ ){ |v| o[:ref]=!v }
16
+ opt.on("-u", "--user STRING", "Owner of the dataset."){ |v| o[:user]=v }
17
+ opt_common(opt, o)
18
+ end.parse!
19
+
20
+ ##=> Main <=
21
+ opt_require(o, project:"-P")
22
+
23
+ $stderr.puts "Loading project." unless o[:q]
24
+ p = MiGA::Project.load(o[:project])
25
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
26
+
27
+ $stderr.puts "Finding datasets." unless o[:q]
28
+ ud = p.unregistered_datasets
29
+ ud.each do |dn|
30
+ puts dn
31
+ if o[:add]
32
+ md = {}
33
+ [:type, :user].each{ |k| md[k]=o[k] unless o[k].nil? }
34
+ d = MiGA::Dataset.new(p, dn, o[:ref], md)
35
+ p.add_dataset(dn)
36
+ res = d.first_preprocessing(true)
37
+ puts "- #{res}" unless o[:q]
38
+ end
39
+ end
40
+
41
+ $stderr.puts "Done." unless o[:q]
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # @package MiGA
4
+ # @license Artistic-2.0
5
+
6
+ o = {q:true, info:false, force:false, method: :hardlink }
7
+ OptionParser.new do |opt|
8
+ opt_banner(opt)
9
+ opt_object(opt, o, [:project, :dataset_opt])
10
+ opt.on("-Q", "--project-target PATH",
11
+ "(Mandatory) Path to the project where to link the dataset."
12
+ ){ |v| o[:project2]=v }
13
+ opt.on("-f", "--force",
14
+ "Forces linking, even if dataset's preprocessing is incomplete."
15
+ ){ |v| o[:force]=v }
16
+ opt.on("-s", "--symlink",
17
+ "Creates symlinks instead of the default hard links."
18
+ ){ o[:method] = :symlink }
19
+ opt.on("-c", "--copy",
20
+ "Creates copies instead of the default hard links."){ o[:method] = :copy }
21
+ opt_filter_datasets(opt, o)
22
+ opt_common(opt, o)
23
+ end.parse!
24
+
25
+ ##=> Main <=
26
+ opt_require(o, project:"-P", project2:"-Q")
27
+
28
+ $stderr.puts "Loading project." unless o[:q]
29
+ p = MiGA::Project.load(o[:project])
30
+ raise "Impossible to load project: #{o[:project]}" if p.nil?
31
+ q = MiGA::Project.load(o[:project2])
32
+ raise "Impossible to load project: #{o[:project2]}" if q.nil?
33
+
34
+ $stderr.puts "Listing dataset." unless o[:q]
35
+ if o[:dataset].nil?
36
+ ds = p.datasets
37
+ else
38
+ ds = [p.dataset(o[:dataset])]
39
+ end
40
+ ds = filter_datasets!(ds, o)
41
+ ds.each do |d|
42
+ next unless o[:force] or d.done_preprocessing?
43
+ puts d.name
44
+ q.import_dataset(d, o[:method])
45
+ end
46
+
47
+ $stderr.puts "Done." unless o[:q]