miga-base 0.4.3.0 → 0.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -67,66 +67,82 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
67
67
  end
68
68
 
69
69
  def perform
70
- glob = [cli]
71
- unless cli[:file].nil?
72
- glob = []
73
- File.open(cli[:file], 'r') do |fh|
74
- h = nil
75
- fh.each do |ln|
76
- r = ln.chomp.split(/\t/)
77
- if h.nil?
78
- h = r
79
- else
80
- argv_i = [self.name]
81
- h.each_with_index do |field, k|
82
- case field.downcase
83
- when *%w[query ignore-dup get-metadata only-metadata]
84
- argv_i << "--#{field.downcase}" if r[k].downcase == 'true'
85
- when *%w[project file verbose help debug]
86
- raise "Unsupported header: #{field}"
87
- else
88
- argv_i += ["--#{field.downcase}", r[k]]
89
- end
70
+ glob = get_sub_cli
71
+ p = cli.load_project
72
+ glob.each do |sub_cli|
73
+ rd = create_remote_dataset(sub_cli)
74
+ next if rd.nil?
75
+ if sub_cli[:get_md]
76
+ update_metadata(sub_cli, p, rd)
77
+ else
78
+ create_dataset(sub_cli, p, rd)
79
+ end
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def get_sub_cli
86
+ return [cli] if cli[:file].nil?
87
+ glob = []
88
+ File.open(cli[:file], 'r') do |fh|
89
+ h = nil
90
+ fh.each do |ln|
91
+ r = ln.chomp.split(/\t/)
92
+ if h.nil?
93
+ h = r
94
+ else
95
+ argv_i = [self.name]
96
+ h.each_with_index do |field, k|
97
+ case field.downcase
98
+ when *%w[query ignore-dup get-metadata only-metadata]
99
+ argv_i << "--#{field.downcase}" if r[k].downcase == 'true'
100
+ when *%w[project file verbose help debug]
101
+ raise "Unsupported header: #{field}"
102
+ else
103
+ argv_i += ["--#{field.downcase}", r[k]]
90
104
  end
91
- sub_cli = MiGA::Cli.new(argv_i)
92
- sub_cli.defaults = cli.data
93
- sub_cli.action.parse_cli
94
- glob << sub_cli
95
105
  end
106
+ sub_cli = MiGA::Cli.new(argv_i)
107
+ sub_cli.defaults = cli.data
108
+ sub_cli.action.parse_cli
109
+ glob << sub_cli
96
110
  end
97
111
  end
98
112
  end
113
+ glob
114
+ end
99
115
 
100
- p = cli.load_project
101
- glob.each do |sub_cli|
102
- sub_cli.ensure_par(dataset: '-D', ids: '-I')
103
- unless sub_cli[:api_key].nil?
104
- ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
105
- end
116
+ def create_remote_dataset(sub_cli)
117
+ sub_cli.ensure_par(dataset: '-D', ids: '-I')
118
+ unless sub_cli[:api_key].nil?
119
+ ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
120
+ end
106
121
 
107
- sub_cli.say "Dataset: #{sub_cli[:dataset]}"
108
- if sub_cli[:ignore_dup] && !sub_cli[:get_md]
109
- next if Dataset.exist?(p, sub_cli[:dataset])
110
- end
122
+ sub_cli.say "Dataset: #{sub_cli[:dataset]}"
123
+ if sub_cli[:ignore_dup] && !sub_cli[:get_md]
124
+ return if Dataset.exist?(p, sub_cli[:dataset])
125
+ end
111
126
 
112
- sub_cli.say 'Locating remote dataset'
113
- rd = RemoteDataset.new(sub_cli[:ids], sub_cli[:db], sub_cli[:universe])
127
+ sub_cli.say 'Locating remote dataset'
128
+ RemoteDataset.new(sub_cli[:ids], sub_cli[:db], sub_cli[:universe])
129
+ end
114
130
 
115
- if sub_cli[:get_md]
116
- sub_cli.say 'Updating dataset'
117
- d = p.dataset(sub_cli[:dataset])
118
- next if d.nil?
119
- md = sub_cli.add_metadata(d).metadata.data
120
- rd.update_metadata(d, md)
121
- else
122
- sub_cli.say 'Creating dataset'
123
- dummy_d = Dataset.new(p, sub_cli[:dataset])
124
- md = sub_cli.add_metadata(dummy_d).metadata.data
125
- md[:metadata_only] = true if cli[:only_md]
126
- dummy_d.remove!
127
- rd.save_to(p, sub_cli[:dataset], !sub_cli[:query], md)
128
- p.add_dataset(sub_cli[:dataset])
129
- end
130
- end
131
+ def update_metadata(sub_cli, p, rd)
132
+ sub_cli.say 'Updating dataset'
133
+ d = p.dataset(sub_cli[:dataset])
134
+ return if d.nil?
135
+ md = sub_cli.add_metadata(d).metadata.data
136
+ rd.update_metadata(d, md)
137
+ end
138
+
139
+ def create_dataset(sub_cli, p, rd)
140
+ sub_cli.say 'Creating dataset'
141
+ dummy_d = Dataset.new(p, sub_cli[:dataset])
142
+ md = sub_cli.add_metadata(dummy_d).metadata.data
143
+ md[:metadata_only] = true if cli[:only_md]
144
+ dummy_d.remove!
145
+ rd.save_to(p, sub_cli[:dataset], !sub_cli[:query], md)
146
+ p.add_dataset(sub_cli[:dataset])
131
147
  end
132
148
  end
@@ -0,0 +1,206 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'net/ftp'
6
+ require 'digest/md5'
7
+
8
+ class MiGA::Cli::Action::GetDb < MiGA::Cli::Action
9
+
10
+ def parse_cli
11
+ cli.defaults = {
12
+ database: :recommended,
13
+ version: :latest,
14
+ local: File.expand_path('.miga_db', ENV['MIGA_HOME']),
15
+ host: 'ftp://microbial-genomes.org/db',
16
+ pb: true,
17
+ overwrite: true
18
+ }
19
+ cli.parse do |opt|
20
+ opt.on(
21
+ '-n', '--database STRING',
22
+ "Name of the database to download. By default: #{cli[:database]}"
23
+ ) { |v| cli[:database] = v.to_sym }
24
+ opt.on(
25
+ '--db-version STRING',
26
+ "Database version to download. By default: #{cli[:version]}"
27
+ ) { |v| cli[:version] = v.to_sym }
28
+ opt.on(
29
+ '-l', '--local-dir PATH',
30
+ "Local directory to store the database. By default: #{cli[:local]}"
31
+ ) { |v| cli[:local] = v }
32
+ opt.on(
33
+ '-h', '--host STRING',
34
+ "Remote host of the database. By default: #{cli[:host]}"
35
+ ) { |v| cli[:db] = v.to_sym }
36
+ opt.on(
37
+ '--list',
38
+ 'List available databases and exit'
39
+ ) { |v| cli[:list_databases] = v }
40
+ opt.on(
41
+ '--list-versions',
42
+ 'List available versions of the database and exit'
43
+ ) { |v| cli[:list_versions] = v }
44
+ opt.on(
45
+ '--no-overwrite',
46
+ 'Exit without downloading if the target database already exists'
47
+ ) { |v| cli[:overwrite] = v }
48
+ opt.on('--no-progress', 'Supress progress bars') { |v| cli[:pb] = v }
49
+ end
50
+ end
51
+
52
+ def perform
53
+ @ftp = remote_connection
54
+ manif = remote_manifest(@ftp)
55
+ cli.puts "# Host: #{manif[:host]}"
56
+ cli.puts "# Manifest last update: #{manif[:last_update]}"
57
+ list_databases(manif) and return
58
+ db = db_requested(manif)
59
+ list_versions(db) and return
60
+ ver = version_requested(db)
61
+ check_target and return
62
+ file = download_file(@ftp, ver[:path])
63
+ check_digest(ver, file)
64
+ unarchive(file)
65
+ register_database(manif, db, ver)
66
+ end
67
+
68
+ def empty_action
69
+ cli.puts 'Downloading latest version of the default database'
70
+ end
71
+
72
+ def complete
73
+ @ftp.close unless @ftp.nil?
74
+ super
75
+ end
76
+
77
+ private
78
+
79
+ def remote_connection
80
+ cli.say "Connecting to '#{cli[:host]}'"
81
+ uri = URI.parse(cli[:host])
82
+ raise 'Only FTP hosts are supported' unless uri.scheme == 'ftp'
83
+ ftp = Net::FTP.open(uri.host, port: uri.port)
84
+ ftp.login
85
+ ftp.chdir(uri.path)
86
+ ftp
87
+ end
88
+
89
+ def download_file(ftp, path)
90
+ cli.say "Downloading '#{path}'"
91
+ Dir.mkdir(cli[:local]) unless Dir.exist? cli[:local]
92
+ file = File.expand_path(path, cli[:local])
93
+ filesize = ftp.size(path)
94
+ transferred = 0
95
+ ftp.getbinaryfile(path, file, 1024) do |data|
96
+ if cli[:pb]
97
+ transferred += data.size
98
+ cli.advance("#{path}:", transferred, filesize)
99
+ end
100
+ end
101
+ cli.print "\n" if cli[:pb]
102
+ file
103
+ end
104
+
105
+ def remote_manifest(ftp)
106
+ file = download_file(ftp, '_manif.json')
107
+ MiGA::Json.parse(file)
108
+ end
109
+
110
+ def db_requested(manif)
111
+ [:recommended, :test].each do |n|
112
+ if cli[:database] == n
113
+ raise "This host has no #{n} database" if manif[n].nil?
114
+ cli[:database] = manif[n].to_sym
115
+ end
116
+ end
117
+ db = manif[:databases][cli[:database]]
118
+ raise 'Cannot find database in this host' if db.nil?
119
+ db
120
+ end
121
+
122
+ def version_requested(db)
123
+ if cli[:version] == :latest
124
+ cli[:version] = db[:latest].to_sym
125
+ end
126
+ ver = db[:versions][cli[:version]]
127
+ raise 'Cannot find database version' if ver.nil?
128
+ cli.puts "# Database size: #{version_size(ver)}"
129
+ ver
130
+ end
131
+
132
+ def list_databases(manif)
133
+ return false unless cli[:list_databases]
134
+ cli.puts "# Recommended database: #{manif[:recommended]}"
135
+ cli.puts ''
136
+ cli.table(
137
+ %w[name description latest versions],
138
+ manif[:databases].map do |name, i|
139
+ [name, i[:description], i[:latest], i[:versions].size.to_s]
140
+ end
141
+ )
142
+ true
143
+ end
144
+
145
+ def list_versions(db)
146
+ return false unless cli[:list_versions]
147
+ cli.puts "# Database: #{cli[:database]}"
148
+ cli.puts ''
149
+ cli.table(
150
+ %w[version updated size datasets],
151
+ db[:versions].map do |name, i|
152
+ [name, i[:last_update], version_size(i), i[:datasets]]
153
+ end
154
+ )
155
+ true
156
+ end
157
+
158
+ def check_target
159
+ return false if cli[:overwrite]
160
+ file = File.expand_path(cli[:database], cli[:local])
161
+ if Dir.exist? file
162
+ warn "The target directory already exists: #{file}"
163
+ true
164
+ else
165
+ false
166
+ end
167
+ end
168
+
169
+ def check_digest(ver, file)
170
+ cli.say 'Checking MD5 digest'
171
+ cli.say "Expected: #{ver[:MD5]}"
172
+ md5 = Digest::MD5.new
173
+ File.open(file, 'rb') do |fh|
174
+ until fh.eof?
175
+ md5.update fh.read(1024)
176
+ end
177
+ end
178
+ dig = md5.hexdigest
179
+ cli.say "Observed: #{dig}"
180
+ raise 'Corrupt file, MD5 does not match' unless dig == ver[:MD5]
181
+ end
182
+
183
+ def version_size(ver)
184
+ cli.num_suffix(ver[:size], true) + ' (' +
185
+ cli.num_suffix(ver[:size_unarchived], true) + ')'
186
+ end
187
+
188
+ def unarchive(file)
189
+ cli.say "Unarchiving #{file}"
190
+ `cd "#{cli[:local]}" && tar -zxf "#{file}"`
191
+ end
192
+
193
+ def register_database(manif, db, ver)
194
+ cli.say "Registering database locally"
195
+ local_manif = File.expand_path('_local_manif.json', cli[:local])
196
+ reg = File.exist?(local_manif) ? MiGA::Json.parse(local_manif) : {}
197
+ reg[:last_update] = Time.now.to_s
198
+ reg[:databases] ||= {}
199
+ reg[:databases][cli[:database]] ||= {}
200
+ reg[:databases][cli[:database]][:manif_last_update] = manif[:last_update]
201
+ reg[:databases][cli[:database]][:manif_host] = manif[:host]
202
+ db.each { |k,v| reg[:databases][cli[:database]][k] = v }
203
+ reg[:databases][cli[:database]][:local_version] = ver
204
+ MiGA::Json.generate(reg, local_manif)
205
+ end
206
+ end
@@ -0,0 +1,31 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::IndexWf < MiGA::Cli::Action
7
+ require 'miga/cli/action/wf'
8
+ include MiGA::Cli::Action::Wf
9
+
10
+ def parse_cli
11
+ default_opts_for_wf
12
+ cli.defaults = { mytaxa: false }
13
+ cli.parse do |opt|
14
+ opt.on(
15
+ '-m', '--mytaxa-scan',
16
+ 'Perform MyTaxa scan analysis'
17
+ ) { |v| cli[:mytaxa] = v }
18
+ opts_for_wf_distances(opt)
19
+ opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)',
20
+ cleanup: false, project_type: true)
21
+ end
22
+ end
23
+
24
+ def perform
25
+ # Input data
26
+ p = create_project(:assembly, {}, run_mytaxa_scan: cli[:mytaxa])
27
+ # Run
28
+ run_daemon
29
+ summarize
30
+ end
31
+ end
@@ -5,38 +5,39 @@ require 'miga/cli/action'
5
5
  require 'shellwords'
6
6
 
7
7
  class MiGA::Cli::Action::Init < MiGA::Cli::Action
8
-
8
+ require 'miga/cli/action/init/daemon_helper'
9
+ include MiGA::Cli::Action::Init::DaemonHelper
10
+
9
11
  def parse_cli
10
12
  cli.interactive = true
11
- cli.defaults = {mytaxa: nil,
13
+ cli.defaults = { mytaxa: nil,
12
14
  config: File.expand_path('.miga_modules', ENV['HOME']),
13
- ask: false, auto: false, dtype: :bash}
15
+ ask: false, auto: false, dtype: :bash }
14
16
  cli.parse do |opt|
15
17
  opt.on(
16
18
  '-c', '--config PATH',
17
19
  'Path to the Bash configuration file',
18
20
  "By default: #{cli[:config]}"
19
- ){ |v| cli[:config] = v }
21
+ ){ |v| cli[:config] = v }
20
22
  opt.on(
21
23
  '--[no-]mytaxa',
22
24
  'Should I try setting up MyTaxa its dependencies?',
23
25
  'By default: interactive (true if --auto)'
24
- ){ |v| cli[:mytaxa] = v }
26
+ ){ |v| cli[:mytaxa] = v }
25
27
  opt.on(
26
28
  '--daemon-type STRING',
27
29
  'Type of daemon launcher, one of: bash, qsub, msub, slurm',
28
30
  "By default: interactive (#{cli[:dtype]} if --auto)"
29
- ){ |v| cli[:dtype] = v.to_sym }
31
+ ){ |v| cli[:dtype] = v.to_sym }
30
32
  opt.on(
31
33
  '--ask-all',
32
34
  'Ask for the location of all software',
33
35
  'By default, only the locations missing in PATH are requested'
34
- ){ |v| cli[:ask] = v }
36
+ ){ |v| cli[:ask] = v }
35
37
  end
36
38
  end
37
39
 
38
40
  def perform
39
- miga = MiGA.root_path
40
41
  cli.puts <<BANNER
41
42
  ===[ Welcome to MiGA, the Microbial Genome Atlas ]===
42
43
 
@@ -44,17 +45,70 @@ I'm the initialization script, and I'll sniff around your computer to
44
45
  make sure you have all the requirements for MiGA data processing.
45
46
 
46
47
  BANNER
48
+ list_requirements
49
+ rc_fh = open_rc_file
50
+ check_configuration_script rc_fh
51
+ paths = check_software_requirements rc_fh
52
+ check_additional_files paths
53
+ check_r_packages paths
54
+ check_ruby_gems paths
55
+ configure_daemon
56
+ close_rc_file rc_fh
57
+ cli.puts 'Configuration complete. MiGA is ready to work!'
58
+ cli.puts ''
59
+ end
60
+
61
+ def empty_action
62
+ end
63
+
64
+ def run_cmd(cli, cmd)
65
+ `. "#{cli[:config]}" && #{cmd}`
66
+ end
67
+
68
+ def run_r_cmd(cli, paths, cmd)
69
+ run_cmd(cli,
70
+ "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
71
+ end
72
+
73
+ def test_r_package(cli, paths, pkg)
74
+ run_r_cmd(cli, paths, "library('#{pkg}')")
75
+ $?.success?
76
+ end
77
+
78
+ def install_r_package(cli, paths, pkg)
79
+ r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
80
+ run_r_cmd(cli, paths, r_cmd)
81
+ end
47
82
 
83
+ def test_ruby_gem(cli, paths, pkg)
84
+ run_cmd(cli,
85
+ "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
86
+ $?.success?
87
+ end
88
+
89
+ def install_ruby_gem(cli, paths, pkg)
90
+ gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
91
+ run_cmd(cli, "#{paths['ruby'].shellescape} \
92
+ -r rubygems -r rubygems/gem_runner \
93
+ -e #{gem_cmd.shellescape} 2>&1")
94
+ end
95
+
96
+ def list_requirements
48
97
  if cli.ask_user(
49
98
  'Would you like to see all the requirements before starting?',
50
99
  'no', %w(yes no)) == 'yes'
51
100
  cli.puts ''
52
- File.open(File.expand_path('utils/requirements.txt', miga), 'r') do |fh|
101
+ req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
102
+ File.open(req_path, 'r') do |fh|
53
103
  fh.each_line { |ln| cli.puts ln }
54
104
  end
55
105
  cli.puts ''
56
106
  end
107
+ end
108
+
109
+ private
57
110
 
111
+ def open_rc_file
58
112
  rc_path = File.expand_path('.miga_rc', ENV['HOME'])
59
113
  if File.exist? rc_path
60
114
  if cli.ask_user(
@@ -70,8 +124,10 @@ BANNER
70
124
  # `miga init` made this on #{Time.now}
71
125
 
72
126
  BASH
127
+ rc_fh
128
+ end
73
129
 
74
- # Check bash configuration file
130
+ def check_configuration_script(rc_fh)
75
131
  unless File.exist? cli[:config]
76
132
  cli[:config] = cli.ask_user(
77
133
  'Is there a script I need to load at startup?',
@@ -86,47 +142,62 @@ BASH
86
142
  cli[:config] = '/dev/null'
87
143
  end
88
144
  cli.puts ''
145
+ end
89
146
 
90
- # Check for software requirements
147
+ def check_software_requirements(rc_fh)
91
148
  cli.puts 'Looking for requirements:'
92
- if cli[:mytaxa].nil?
93
- cli[:mytaxa] = cli.ask_user(
94
- 'Should I include MyTaxa modules?',
95
- 'yes', %w(yes no)) == 'yes'
96
- end
149
+ ask_for_mytaxa
97
150
  rc_fh.puts 'export MIGA_MYTAXA="no"' unless cli[:mytaxa]
98
151
  paths = {}
99
152
  rc_fh.puts 'MIGA_PATH=""'
100
- File.open(File.expand_path('utils/requirements.txt', miga), 'r') do |fh|
153
+ req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
154
+ File.open(req_path, 'r') do |fh|
101
155
  fh.each_line do |ln|
102
156
  next if $. < 3
103
157
  r = ln.chomp.split(/\t+/)
104
158
  next if r[0] =~ /\(opt\)$/ && !cli[:mytaxa]
105
159
  cli.print "Testing #{r[0]}#{" (#{r[3]})" if r[3]}... "
106
- path = nil
107
- loop do
108
- d_path = File.dirname(run_cmd(cli, "which #{r[1].shellescape}"))
109
- if cli[:ask] || d_path == '.'
110
- path = cli.ask_user('Where can I find it?', d_path, nil, true)
111
- else
112
- path = d_path
113
- cli.puts path
114
- end
115
- if File.executable?(File.expand_path(r[1], path))
116
- if d_path != path
117
- rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{r[1]}"
118
- end
119
- break
120
- end
121
- cli.print "I cannot find #{r[1]} "
122
- end
160
+ path = find_software(r[1])
123
161
  paths[r[1]] = File.expand_path(r[1], path).shellescape
124
162
  end
125
163
  end
126
164
  rc_fh.puts 'export PATH="$MIGA_PATH$PATH"'
127
165
  cli.puts ''
166
+ paths
167
+ end
168
+
169
+ def ask_for_mytaxa
170
+ if cli[:mytaxa].nil?
171
+ cli[:mytaxa] =
172
+ cli.ask_user(
173
+ 'Should I include MyTaxa modules?',
174
+ 'yes', %w(yes no)
175
+ ) == 'yes'
176
+ end
177
+ end
178
+
179
+ def find_software(exec)
180
+ path = nil
181
+ loop do
182
+ d_path = File.dirname(run_cmd(cli, "which #{exec.shellescape}"))
183
+ if cli[:ask] || d_path == '.'
184
+ path = cli.ask_user('Where can I find it?', d_path, nil, true)
185
+ else
186
+ path = d_path
187
+ cli.puts path
188
+ end
189
+ if File.executable?(File.expand_path(exec, path))
190
+ if d_path != path
191
+ rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{exec}"
192
+ end
193
+ break
194
+ end
195
+ cli.print "I cannot find #{exec} "
196
+ end
197
+ path
198
+ end
128
199
 
129
- # Check for other files
200
+ def check_additional_files(paths)
130
201
  if cli[:mytaxa]
131
202
  cli.puts 'Looking for MyTaxa databases:'
132
203
  mt = File.dirname paths["MyTaxa"]
@@ -145,8 +216,9 @@ BASH
145
216
  end
146
217
  cli.puts ''
147
218
  end
219
+ end
148
220
 
149
- # Check for R packages
221
+ def check_r_packages(paths)
150
222
  cli.puts 'Looking for R packages:'
151
223
  %w(enveomics.R ape cluster vegan).each do |pkg|
152
224
  cli.print "Testing #{pkg}... "
@@ -161,8 +233,9 @@ BASH
161
233
  end
162
234
  end
163
235
  cli.puts ''
236
+ end
164
237
 
165
- # Check for Ruby gems
238
+ def check_ruby_gems(paths)
166
239
  cli.puts 'Looking for Ruby gems:'
167
240
  %w(sqlite3 daemons json).each do |pkg|
168
241
  cli.print "Testing #{pkg}... "
@@ -171,8 +244,8 @@ BASH
171
244
  else
172
245
  cli.puts 'no, installing'
173
246
  # This hackey mess is meant to ensure the test and installation are done
174
- # on the configuration Ruby, not on the Ruby currently executing the init
175
- # action
247
+ # on the configuration Ruby, not on the Ruby currently executing the
248
+ # init action
176
249
  cli.print install_ruby_gem(cli, paths, pkg)
177
250
  unless test_ruby_gem(cli, paths, pkg)
178
251
  raise "Unable to auto-install Ruby gem: #{pkg}"
@@ -180,119 +253,9 @@ BASH
180
253
  end
181
254
  end
182
255
  cli.puts ''
256
+ end
183
257
 
184
- # Configure daemon
185
- cli.puts 'Default daemon configuration:'
186
- daemon_f = File.expand_path('.miga_daemon.json', ENV['HOME'])
187
- unless File.exist?(daemon_f) and cli.ask_user(
188
- 'A template daemon already exists, do you want to preserve it?',
189
- 'yes', %w(yes no)) == 'yes'
190
- v = {created: Time.now.to_s, updated: Time.now.to_s}
191
- v[:type] = cli.ask_user(
192
- 'Please select the type of daemon you want to setup',
193
- cli[:dtype], %w(bash qsub msub slurm))
194
- case v[:type]
195
- when 'bash'
196
- v[:latency] = cli.ask_user(
197
- 'How long should I sleep? (in seconds)', '30').to_i
198
- v[:maxjobs] = cli.ask_user(
199
- 'How many jobs can I launch at once?', '6').to_i
200
- v[:ppn] = cli.ask_user(
201
- 'How many CPUs can I use per job?', '2').to_i
202
- cli.puts 'Setting up internal daemon defaults.'
203
- cli.puts 'If you don\'t understand this just leave default values:'
204
- v[:cmd] = cli.ask_user(
205
- "How should I launch tasks?\n %1$s: script path, " \
206
- "%2$s: variables, %3$d: CPUs, %4$s: log file, %5$s: task name.\n",
207
- "%2$s '%1$s' > '%4$s' 2>&1")
208
- v[:var] = cli.ask_user(
209
- "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
210
- "%1$s=%2$s")
211
- v[:varsep] = cli.ask_user(
212
- 'What should I use to separate variables?', ' ')
213
- v[:alive] = cli.ask_user(
214
- "How can I know that a process is still alive?\n %1$s: PID, " \
215
- "output should be 1 for running and 0 for non-running.\n",
216
- "ps -p '%1$s'|tail -n+2|wc -l")
217
- v[:kill] = cli.ask_user(
218
- "How should I terminate tasks?\n %s: process ID.", "kill -9 '%s'")
219
- when 'slurm'
220
- queue = cli.ask_user(
221
- 'What queue should I use?', nil, nil, true)
222
- v[:latency] = cli.ask_user(
223
- 'How long should I sleep? (in seconds)', '150').to_i
224
- v[:maxjobs] = cli.ask_user(
225
- 'How many jobs can I launch at once?', '300').to_i
226
- v[:ppn] = cli.ask_user(
227
- 'How many CPUs can I use per job?', '2').to_i
228
- cli.puts 'Setting up internal daemon defaults'
229
- cli.puts 'If you don\'t understand this just leave default values:'
230
- v[:cmd] = cli.ask_user(
231
- "How should I launch tasks?\n %1$s: script path, " \
232
- "%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
233
- "%2$s sbatch --partition='#{queue}' --export=ALL " \
234
- "--nodes=1 --ntasks-per-node=%3$d --output='%4$s' " \
235
- "--job-name='%5$s' --mem=9G --time=12:00:00 %1$s " \
236
- "| perl -pe 's/.* //'")
237
- v[:var] = cli.ask_user(
238
- "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
239
- "%1$s=%2$s")
240
- v[:varsep] = cli.ask_user(
241
- 'What should I use to separate variables?', ' ')
242
- v[:alive] = cli.ask_user(
243
- "How can I know that a process is still alive?\n %1$s: job id, " \
244
- "output should be 1 for running and 0 for non-running.\n",
245
- "squeue -h -o %%t -j '%1$s' | grep '^PD\\|R\\|CF\\|CG$' " \
246
- "| tail -n 1 | wc -l")
247
- v[:kill] = cli.ask_user(
248
- "How should I terminate tasks?\n %s: process ID.", "scancel '%s'")
249
- else # [qm]sub
250
- queue = cli.ask_user('What queue should I use?', nil, nil, true)
251
- v[:latency] = cli.ask_user(
252
- 'How long should I sleep? (in seconds)', '150').to_i
253
- v[:maxjobs] = cli.ask_user(
254
- 'How many jobs can I launch at once?', '300').to_i
255
- v[:ppn] = cli.ask_user(
256
- 'How many CPUs can I use per job?', '2').to_i
257
- cli.puts 'Setting up internal daemon defaults.'
258
- cli.puts 'If you don\'t understand this just leave default values:'
259
- v[:cmd] = cli.ask_user(
260
- "How should I launch tasks?\n %1$s: script path, " \
261
- "%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
262
- "#{v[:type]} -q '#{queue}' -v '%2$s' -l nodes=1:ppn=%3$d %1$s " \
263
- "-j oe -o '%4$s' -N '%5$s' -l mem=9g -l walltime=12:00:00 " \
264
- "| grep .")
265
- v[:var] = cli.ask_user(
266
- "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
267
- "%1$s=%2$s")
268
- v[:varsep] = cli.ask_user(
269
- 'What should I use to separate variables?', ',')
270
- if v[:type] == 'qsub'
271
- v[:alive] = cli.ask_user(
272
- "How can I know that a process is still alive?\n " \
273
- "%1$s: job id, output should be 1 for running and " \
274
- "0 for non-running.\n",
275
- "qstat -f '%1$s'|grep ' job_state ='|perl -pe 's/.*= //'" \
276
- "|grep '[^C]'|tail -n1|wc -l|awk '{print $1}'")
277
- v[:kill] = cli.ask_user(
278
- "How should I terminate tasks?\n %s: process ID.", "qdel '%s'")
279
- else # msub
280
- v[:alive] = cli.ask_user(
281
- "How can I know that a process is still alive?\n " \
282
- "%1$s: job id, output should be 1 for running and " \
283
- "0 for non-running.\n",
284
- "checkjob '%1$s'|grep '^State:'|perl -pe 's/.*: //'" \
285
- "|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'" \
286
- "|tail -n1|wc -l|awk '{print $1}'")
287
- v[:kill] = cli.ask_user(
288
- "How should I terminate tasks?\n %s: process ID.",
289
- "canceljob '%s'")
290
- end
291
- end
292
- File.open(daemon_f, 'w') { |fh| fh.puts JSON.pretty_generate(v) }
293
- end
294
- cli.puts ''
295
-
258
+ def close_rc_file(rc_fh)
296
259
  rc_fh.puts <<FOOT
297
260
 
298
261
  MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
@@ -300,44 +263,6 @@ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
300
263
  MIGA_CONFIG_DATE='#{Time.now}'
301
264
 
302
265
  FOOT
303
-
304
- cli.puts 'Configuration complete. MiGA is ready to work!'
305
- cli.puts ''
306
-
307
- end
308
-
309
- def empty_action
310
- end
311
-
312
- def run_cmd(cli, cmd)
313
- `. "#{cli[:config]}" && #{cmd}`
314
- end
315
-
316
- def run_r_cmd(cli, paths, cmd)
317
- run_cmd(cli,
318
- "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
319
- end
320
-
321
- def test_r_package(cli, paths, pkg)
322
- run_r_cmd(cli, paths, "library('#{pkg}')")
323
- $?.success?
324
- end
325
-
326
- def install_r_package(cli, paths, pkg)
327
- r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
328
- run_r_cmd(cli, paths, r_cmd)
329
- end
330
-
331
- def test_ruby_gem(cli, paths, pkg)
332
- run_cmd(cli,
333
- "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
334
- $?.success?
335
- end
336
-
337
- def install_ruby_gem(cli, paths, pkg)
338
- gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
339
- run_cmd(cli, "#{paths['ruby'].shellescape} \
340
- -r rubygems -r rubygems/gem_runner \
341
- -e #{gem_cmd.shellescape} 2>&1")
266
+ rc_fh.close
342
267
  end
343
268
  end