miga-base 0.2.0.9 → 0.2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +3 -0
  3. data/actions/add_result.rb +37 -0
  4. data/actions/add_taxonomy.rb +63 -0
  5. data/actions/create_dataset.rb +49 -0
  6. data/actions/create_project.rb +46 -0
  7. data/actions/daemon.rb +50 -0
  8. data/actions/date.rb +14 -0
  9. data/actions/{download_dataset → download_dataset.rb} +5 -28
  10. data/actions/find_datasets.rb +41 -0
  11. data/actions/import_datasets.rb +47 -0
  12. data/actions/index_taxonomy.rb +46 -0
  13. data/actions/list_datasets.rb +50 -0
  14. data/actions/list_files.rb +43 -0
  15. data/actions/project_info.rb +40 -0
  16. data/actions/unlink_dataset.rb +28 -0
  17. data/bin/miga +129 -33
  18. data/lib/miga/daemon.rb +48 -34
  19. data/lib/miga/dataset.rb +7 -123
  20. data/lib/miga/dataset_result.rb +177 -0
  21. data/lib/miga/project.rb +32 -12
  22. data/lib/miga/version.rb +2 -2
  23. data/scripts/_distances_functions.bash +82 -0
  24. data/scripts/_distances_noref_nomulti.bash +96 -67
  25. data/scripts/_distances_ref_nomulti.bash +54 -85
  26. data/scripts/assembly.bash +16 -3
  27. data/scripts/clade_finding.bash +20 -18
  28. data/scripts/distances.bash +2 -1
  29. data/scripts/init.bash +2 -6
  30. data/scripts/subclades.bash +4 -5
  31. data/test/common_test.rb +2 -2
  32. data/test/daemon_test.rb +73 -1
  33. data/test/project_test.rb +26 -2
  34. data/test/taxonomy_test.rb +10 -0
  35. data/test/test_helper.rb +1 -1
  36. data/utils/subclades-compile.rb +4 -2
  37. data/utils/subclades.R +140 -158
  38. metadata +48 -44
  39. data/actions/add_result +0 -58
  40. data/actions/add_taxonomy +0 -83
  41. data/actions/create_dataset +0 -61
  42. data/actions/create_project +0 -67
  43. data/actions/daemon +0 -66
  44. data/actions/find_datasets +0 -61
  45. data/actions/import_datasets +0 -83
  46. data/actions/index_taxonomy +0 -68
  47. data/actions/list_datasets +0 -81
  48. data/actions/list_files +0 -63
  49. data/actions/unlink_dataset +0 -49
data/actions/daemon DELETED
@@ -1,66 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require "miga/daemon"
7
-
8
- task = ARGV.shift unless ["-h","--help"].include? ARGV.first
9
- ARGV << "-h" if ARGV.empty?
10
- o = {q:true, update:false, daemon_opts:[]}
11
- OptionParser.new do |opt|
12
- opt.banner = <<BAN
13
- Controls the daemon of a MiGA project.
14
-
15
- Usage: #{$0} #{File.basename(__FILE__)} {task} [options]
16
- BAN
17
- opt.separator "task:"
18
- { start: "Start an instance of the application.",
19
- stop: "Start an instance of the application.",
20
- restart: "Stop all instances and restart them afterwards.",
21
- reload: "Send a SIGHUP to all instances of the application.",
22
- run: "Start the application and stay on top.",
23
- zap: "Set the application to a stopped state.",
24
- status: "Show status (PID) of application instances."
25
- }.each{ |k,v| opt.separator sprintf " %*s%s", -33, k, v }
26
- opt.separator ""
27
- opt.separator "MiGA options:"
28
- opt.on("-P", "--project PATH",
29
- "(Mandatory) Path to the project to use."){ |v| o[:project]=v }
30
- opt.on("--latency INT",
31
- "Number of seconds the daemon will be sleeping."
32
- ){ |v| o[:latency]=v.to_i }
33
- opt.on("--max-jobs INT",
34
- "Maximum number of jobs to use simultaneously."){ |v| o[:maxjobs]=v.to_i }
35
- opt.on("--ppn INT",
36
- "Maximum number of cores to use in a single job."){ |v| o[:ppn]=v.to_i }
37
- opt.on("-v", "--verbose",
38
- "Print additional information to STDERR."){ o[:q]=false }
39
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
40
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
41
- end
42
- opt.on("-h", "--help", "Display this screen.") do
43
- puts opt
44
- exit
45
- end
46
- opt.separator ""
47
- opt.separator "Daemon options:"
48
- opt.on("-t", "--ontop",
49
- "Stay on top (does not daemonize)"){ o[:daemon_opts] << '-t' }
50
- opt.on("-f", "--force", "Force operation"){ o[:daemon_opts] << '-f' }
51
- opt.on("-n", "--no_wait",
52
- "Do not wait for processes to stop"){ o[:daemon_opts] << '-n' }
53
- end.parse!
54
-
55
- ### MAIN
56
- raise "Project is mandatory." if o[:project].nil?
57
-
58
- raise "Project doesn't exist, aborting." unless
59
- MiGA::Project.exist? o[:project]
60
- p = MiGA::Project.new(o[:project])
61
- d = MiGA::Daemon.new(p)
62
- [:latency, :maxjobs, :ppn].each do |k|
63
- d.runopts(k, o[k]) unless o[k].nil?
64
- end
65
- d.daemon(task, o[:daemon_opts])
66
-
@@ -1,61 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, add:false, ref:false}
7
- OptionParser.new do |opt|
8
- opt.banner = <<BAN
9
- Finds unregistered datasets based on result files.
10
-
11
- Usage: #{$0} #{File.basename(__FILE__)} [options]
12
- BAN
13
- opt.separator ""
14
- opt.on("-P", "--project PATH",
15
- "(Mandatory) Path to the project to use."){ |v| o[:project]=v }
16
- opt.on("-a", "--add",
17
- "Register the datasets found. By default, only lists them (dry run)."
18
- ){ |v| o[:add]=v }
19
- opt.on("-t", "--type STRING",
20
- "Type of datasets. Recognized types include:",
21
- *MiGA::Dataset.KNOWN_TYPES.map{ |k,v| "~ #{k}: #{v[:description]}"}
22
- ){ |v| o[:type]=v.to_sym }
23
- opt.on("-r", "--ref",
24
- "If set, all datasets are registered as reference datasets."
25
- ){ |v| o[:ref]=v }
26
- opt.on("-u", "--user STRING", "Owner of the dataset."){ |v| o[:user]=v }
27
- opt.on("-v", "--verbose",
28
- "Print additional information to STDERR."){ o[:q]=false }
29
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
30
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
31
- end
32
- opt.on("-h", "--help", "Display this screen.") do
33
- puts opt
34
- exit
35
- end
36
- opt.separator ""
37
- end.parse!
38
-
39
-
40
- ### MAIN
41
- raise "-P is mandatory." if o[:project].nil?
42
-
43
- $stderr.puts "Loading project." unless o[:q]
44
- p = MiGA::Project.load(o[:project])
45
- raise "Impossible to load project: #{o[:project]}" if p.nil?
46
-
47
- $stderr.puts "Finding datasets." unless o[:q]
48
- ud = p.unregistered_datasets
49
- ud.each do |dn|
50
- puts dn
51
- if o[:add]
52
- md = {}
53
- [:type, :user].each{ |k| md[k]=o[k] unless o[k].nil? }
54
- d = MiGA::Dataset.new(p, dn, o[:ref], md)
55
- p.add_dataset(dn)
56
- res = d.first_preprocessing
57
- puts "- #{res}" unless o[:q]
58
- end
59
- end
60
-
61
- $stderr.puts "Done." unless o[:q]
@@ -1,83 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, info:false, force:false, method: :hardlink }
7
- OptionParser.new do |opt|
8
- opt.banner = <<BAN
9
- Link datasets (including results) from one project to another.
10
-
11
- Usage: #{$0} #{File.basename(__FILE__)} [options]
12
- BAN
13
- opt.separator ""
14
- opt.on("-P", "--project-source PATH",
15
- "(Mandatory) Path to the project that contains the dataset."
16
- ){ |v| o[:project1]=v }
17
- opt.on("-Q", "--project-target PATH",
18
- "(Mandatory) Path to the project where to link the dataset."
19
- ){ |v| o[:project2]=v }
20
- opt.on("-D", "--dataset STRING",
21
- "ID of the dataset to link."){ |v| o[:dataset]=v.miga_name }
22
- opt.on("-f", "--force",
23
- "Forces linking, even if dataset's preprocessing is incomplete."
24
- ){ |v| o[:force]=v }
25
- opt.on("-s", "--symlink",
26
- "Creates symlinks instead of the default hard links."
27
- ){ o[:method] = :symlink }
28
- opt.on("-c", "--copy",
29
- "Creates copies instead of the default hard links."){ o[:method] = :copy }
30
- opt.on("--[no-]ref",
31
- "If set, links only reference (or only non-reference) datasets."
32
- ){ |v| o[:ref]=v }
33
- opt.on("--[no-]multi",
34
- "If set, links only multi-species (or only single-species) datasets."
35
- ){ |v| o[:multi]=v }
36
- opt.on("-t", "--taxonomy RANK:TAXON",
37
- "Filter by taxonomy."){ |v| o[:taxonomy]=MiGA::Taxonomy.new v }
38
- opt.on("-v", "--verbose",
39
- "Print additional information to STDERR."){ o[:q]=false }
40
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
41
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
42
- end
43
- opt.on("-h", "--help", "Display this screen.") do
44
- puts opt
45
- exit
46
- end
47
- opt.separator ""
48
- end.parse!
49
-
50
-
51
- ### MAIN
52
- raise "-P is mandatory." if o[:project1].nil?
53
- raise "-Q is mandatory." if o[:project2].nil?
54
-
55
- $stderr.puts "Loading project." unless o[:q]
56
- p = MiGA::Project.load(o[:project1])
57
- raise "Impossible to load project: #{o[:project1]}" if p.nil?
58
- q = MiGA::Project.load(o[:project2])
59
- raise "Impossible to load project: #{o[:project2]}" if q.nil?
60
-
61
- $stderr.puts "Listing dataset." unless o[:q]
62
- if o[:dataset].nil?
63
- ds = p.datasets
64
- else
65
- ds = [p.dataset(o[:dataset])]
66
- end
67
- ds.select!{|d| d.name == o[:dataset]} unless o[:dataset].nil?
68
- ds.select!{|d| d.is_ref? == o[:ref] } unless o[:ref].nil?
69
- ds.select! do |d|
70
- (not d.metadata[:type].nil?) and
71
- (MiGA::Dataset.KNOWN_TYPES[d.metadata[:type]][:multi] == o[:multi])
72
- end unless o[:multi].nil?
73
- ds.select! do |d|
74
- (not d.metadata[:tax].nil?) and d.metadata[:tax].is_in?(o[:taxonomy])
75
- end unless o[:taxonomy].nil?
76
- ds.each do |d|
77
- next unless o[:force] or d.done_preprocessing?
78
- puts d.name
79
- q.import_dataset(d, o[:method])
80
- end
81
-
82
- $stderr.puts "Done." unless o[:q]
83
-
@@ -1,68 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- require "miga/tax_index"
7
-
8
- o = {q:true, format: :json}
9
- OptionParser.new do |opt|
10
- opt.banner = <<BAN
11
- Creates a taxonomy-indexed list of the datasets.
12
-
13
- Usage: #{$0} #{File.basename(__FILE__)} [options]
14
- BAN
15
- opt.separator ""
16
- opt.on("-P", "--project PATH",
17
- "(Mandatory) Path to the project to read."){ |v| o[:project]=v }
18
- opt.on("-i", "--index PATH",
19
- "(Mandatory) File to create with the index."){ |v| o[:index]=v }
20
- opt.on("-f", "--format STRING",
21
- "Format of the index file. By default: #{o[:format]}. Supported: " +
22
- "json, tab."){ |v| o[:format]=v.to_sym }
23
- opt.on("--[no-]multi",
24
- "If set, lists only multi-species (or only single-species) datasets."
25
- ){ |v| o[:multi]=v }
26
- opt.on("-v", "--verbose",
27
- "Print additional information to STDERR."){ o[:q]=false }
28
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
29
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
30
- end
31
- opt.on("-h", "--help", "Display this screen.") do
32
- puts opt
33
- exit
34
- end
35
- opt.separator ""
36
- end.parse!
37
-
38
- ### MAIN
39
- raise "-P is mandatory." if o[:project].nil?
40
- raise "-i is mandatory." if o[:index].nil?
41
-
42
- $stderr.puts "Loading project." unless o[:q]
43
- p = MiGA::Project.load(o[:project])
44
- raise "Impossible to load project: #{o[:project]}" if p.nil?
45
-
46
- $stderr.puts "Loading datasets." unless o[:q]
47
- ds = p.datasets
48
- ds.select!{|d| not d.metadata[:tax].nil? }
49
- ds.select! do |d|
50
- (not d.metadata[:type].nil?) and
51
- (MiGA::Dataset.KNOWN_TYPES[d.metadata[:type]][:multi] == o[:multi])
52
- end unless o[:multi].nil?
53
-
54
- $stderr.puts "Indexing taxonomy." unless o[:q]
55
- tax_index = MiGA::TaxIndex.new
56
- ds.each { |d| tax_index << d }
57
-
58
- $stderr.puts "Saving index." unless o[:q]
59
- fh = File.open(o[:index], "w")
60
- if o[:format]==:json
61
- fh.print tax_index.to_json
62
- elsif o[:format]==:tab
63
- fh.print tax_index.to_tab
64
- end
65
- fh.close
66
-
67
- $stderr.puts "Done." unless o[:q]
68
-
@@ -1,81 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, info:false, processing:false}
7
- OptionParser.new do |opt|
8
- opt.banner = <<BAN
9
- Lists all registered datasets in an MiGA project.
10
-
11
- Usage: #{$0} #{File.basename(__FILE__)} [options]
12
- BAN
13
- opt.separator ""
14
- opt.on("-P", "--project PATH",
15
- "(Mandatory) Path to the project to read."){ |v| o[:project]=v }
16
- opt.on("-D", "--dataset STRING",
17
- "ID of the dataset to read."){ |v| o[:dataset]=v.miga_name }
18
- opt.on("--[no-]ref",
19
- "If set, lists only reference (or only non-reference) datasets."
20
- ){ |v| o[:ref]=v }
21
- opt.on("--[no-]multi",
22
- "If set, lists only multi-species (or only single-species) datasets."
23
- ){ |v| o[:multi]=v }
24
- opt.on("-i", "--info",
25
- "Print additional information on each dataset."){ |v| o[:info]=v }
26
- opt.on("-p", "--processing",
27
- "Print information on processing advance."){ |v| o[:processing]=v }
28
- opt.on("-t", "--taxonomy RANK:TAXON",
29
- "Filter by taxonomy."){ |v| o[:taxonomy]=MiGA::Taxonomy.new v }
30
- opt.on("-m", "--metadata STRING",
31
- "Print name and metadata field only. If set, ignores -i."
32
- ){ |v| o[:datum]=v }
33
- opt.on("-v", "--verbose",
34
- "Print additional information to STDERR."){ o[:q]=false }
35
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
36
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
37
- end
38
- opt.on("-h", "--help", "Display this screen.") do
39
- puts opt
40
- exit
41
- end
42
- opt.separator ""
43
- end.parse!
44
-
45
-
46
- ### MAIN
47
- raise "-P is mandatory." if o[:project].nil?
48
-
49
- $stderr.puts "Loading project." unless o[:q]
50
- p = MiGA::Project.load(o[:project])
51
- raise "Impossible to load project: #{o[:project]}" if p.nil?
52
-
53
- $stderr.puts "Listing datasets." unless o[:q]
54
- if o[:dataset].nil?
55
- ds = p.datasets
56
- elsif MiGA::Dataset.exist? p, o[:dataset]
57
- ds = [p.dataset(o[:dataset])]
58
- else
59
- ds = []
60
- end
61
- ds.select!{|d| d.is_ref? == o[:ref] } unless o[:ref].nil?
62
- ds.select! do |d|
63
- (not d.metadata[:type].nil?) and
64
- (MiGA::Dataset.KNOWN_TYPES[d.metadata[:type]][:multi] == o[:multi])
65
- end unless o[:multi].nil?
66
- ds.select! do |d|
67
- (not d.metadata[:tax].nil?) and d.metadata[:tax].is_in?(o[:taxonomy])
68
- end unless o[:taxonomy].nil?
69
- if not o[:datum].nil?
70
- ds.each{|d| puts "#{d.name}\t#{d.metadata[ o[:datum] ] || "?"}"}
71
- elsif o[:info]
72
- puts MiGA::MiGA.tabulate(MiGA::Dataset.INFO_FIELDS, ds.map{ |d| d.info })
73
- elsif o[:processing]
74
- comp = ["undef","done","queued"]
75
- puts MiGA::MiGA.tabulate([:name] + MiGA::Dataset.PREPROCESSING_TASKS,
76
- ds.map{ |d| [d.name] + d.profile_advance.map{ |i| comp[i] } })
77
- else
78
- ds.each{|d| puts d.name}
79
- end
80
-
81
- $stderr.puts "Done." unless o[:q]
data/actions/list_files DELETED
@@ -1,63 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, details:false, json:true}
7
- OptionParser.new do |opt|
8
- opt.banner = <<BAN
9
- Lists all registered files from the results of a dataset or a project.
10
-
11
- Usage: #{$0} #{File.basename(__FILE__)} [options]
12
- BAN
13
- opt.separator ""
14
- opt.on("-P", "--project PATH",
15
- "(Mandatory) Path to the project to read."){ |v| o[:project]=v }
16
- opt.on("-D", "--dataset STRING",
17
- "ID of the dataset to read. If not set, project-wide results are shown."
18
- ){ |v| o[:dataset]=v.miga_name }
19
- opt.on("-i", "--info",
20
- "If set, it prints additional details for each file."
21
- ){ |v| o[:details]=v }
22
- opt.on("--[no-]json",
23
- "If set to no, excludes json files containing results metadata."
24
- ){ |v| o[:json]=v }
25
- opt.on("-v", "--verbose",
26
- "Print additional information to STDERR."){ o[:q]=false }
27
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
28
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
29
- end
30
- opt.on("-h", "--help", "Display this screen.") do
31
- puts opt
32
- exit
33
- end
34
- opt.separator ""
35
- end.parse!
36
-
37
-
38
- ### MAIN
39
- raise "-P is mandatory." if o[:project].nil?
40
-
41
- $stderr.puts "Loading project." unless o[:q]
42
- p = MiGA::Project.load(o[:project])
43
- raise "Impossible to load project: #{o[:project]}" if p.nil?
44
-
45
- if o[:dataset].nil?
46
- results = p.results
47
- else
48
- $stderr.puts "Loading dataset." unless o[:q]
49
- ds = p.dataset(o[:dataset])
50
- raise "Impossible to load dataset: #{o[:dataset]}" if ds.nil?
51
- results = ds.results
52
- end
53
-
54
- $stderr.puts "Listing files." unless o[:q]
55
- results.each do |result|
56
- puts "#{ "#{result.path}\t\t" if o[:details] }#{result.path}" if o[:json]
57
- result.each_file do |k,f|
58
- puts "#{ "#{result.path}\t#{k}\t" if o[:details] }#{result.dir}/#{f}"
59
- end
60
- end
61
-
62
- $stderr.puts "Done." unless o[:q]
63
-
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # @package MiGA
4
- # @license Artistic-2.0
5
-
6
- o = {q:true, remove:false}
7
- OptionParser.new do |opt|
8
- opt.banner = <<BAN
9
- Removes a dataset from an MiGA project.
10
-
11
- Usage: #{$0} #{File.basename(__FILE__)} [options]
12
- BAN
13
- opt.separator ""
14
- opt.on("-P", "--project PATH",
15
- "(Mandatory) Path to the project to use."){ |v| o[:project]=v }
16
- opt.on("-D", "--dataset PATH",
17
- "(Mandatory) ID of the dataset to create."){ |v| o[:dataset]=v }
18
- opt.on("-r", "--remove",
19
- "Also remove all associated files.",
20
- "By default, only unlinks from metadata."){ o[:remove]=true }
21
- opt.on("-v", "--verbose",
22
- "Print additional information to STDERR."){ o[:q]=false }
23
- opt.on("-d", "--debug INT", "Print debugging information to STDERR.") do |v|
24
- v.to_i>1 ? MiGA::MiGA.DEBUG_TRACE_ON : MiGA::MiGA.DEBUG_ON
25
- end
26
- opt.on("-h", "--help", "Display this screen.") do
27
- puts opt
28
- exit
29
- end
30
- opt.separator ""
31
- end.parse!
32
-
33
-
34
- ### MAIN
35
- raise "-P is mandatory." if o[:project].nil?
36
- raise "-D is mandatory." if o[:dataset].nil?
37
-
38
- $stderr.puts "Loading project." unless o[:q]
39
- p = MiGA::Project.load(o[:project])
40
- raise "Impossible to load project: #{o[:project]}" if p.nil?
41
-
42
- $stderr.puts "Unlinking dataset." unless o[:q]
43
- raise "Dataset doesn't exist, aborting." unless
44
- MiGA::Dataset.exist?(p, o[:dataset])
45
- d = p.unlink_dataset(o[:dataset])
46
- d.remove! if o[:remove]
47
-
48
- $stderr.puts "Done." unless o[:q]
49
-