miga-base 0.4.3.0 → 0.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/lib/miga/cli.rb +43 -223
  4. data/lib/miga/cli/action/add.rb +91 -62
  5. data/lib/miga/cli/action/classify_wf.rb +97 -0
  6. data/lib/miga/cli/action/daemon.rb +14 -10
  7. data/lib/miga/cli/action/derep_wf.rb +95 -0
  8. data/lib/miga/cli/action/doctor.rb +83 -55
  9. data/lib/miga/cli/action/get.rb +68 -52
  10. data/lib/miga/cli/action/get_db.rb +206 -0
  11. data/lib/miga/cli/action/index_wf.rb +31 -0
  12. data/lib/miga/cli/action/init.rb +115 -190
  13. data/lib/miga/cli/action/init/daemon_helper.rb +124 -0
  14. data/lib/miga/cli/action/ls.rb +20 -11
  15. data/lib/miga/cli/action/ncbi_get.rb +199 -157
  16. data/lib/miga/cli/action/preproc_wf.rb +46 -0
  17. data/lib/miga/cli/action/quality_wf.rb +45 -0
  18. data/lib/miga/cli/action/stats.rb +147 -99
  19. data/lib/miga/cli/action/summary.rb +10 -4
  20. data/lib/miga/cli/action/tax_dist.rb +61 -46
  21. data/lib/miga/cli/action/tax_test.rb +46 -39
  22. data/lib/miga/cli/action/wf.rb +178 -0
  23. data/lib/miga/cli/base.rb +11 -0
  24. data/lib/miga/cli/objects_helper.rb +88 -0
  25. data/lib/miga/cli/opt_helper.rb +160 -0
  26. data/lib/miga/daemon.rb +7 -4
  27. data/lib/miga/dataset/base.rb +5 -5
  28. data/lib/miga/project/base.rb +4 -4
  29. data/lib/miga/project/result.rb +2 -1
  30. data/lib/miga/remote_dataset/base.rb +5 -5
  31. data/lib/miga/remote_dataset/download.rb +1 -1
  32. data/lib/miga/version.rb +3 -3
  33. data/scripts/cds.bash +3 -1
  34. data/scripts/essential_genes.bash +1 -0
  35. data/scripts/stats.bash +1 -1
  36. data/scripts/trimmed_fasta.bash +5 -3
  37. data/utils/distance/runner.rb +3 -0
  38. data/utils/distance/temporal.rb +10 -1
  39. data/utils/enveomics/Manifest/Tasks/fasta.json +5 -0
  40. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +7 -0
  41. data/utils/enveomics/Scripts/BlastTab.addlen.rb +33 -31
  42. data/utils/enveomics/Scripts/FastA.tag.rb +42 -41
  43. data/utils/enveomics/Scripts/HMM.essential.rb +85 -55
  44. data/utils/enveomics/Scripts/HMM.haai.rb +29 -20
  45. data/utils/enveomics/Scripts/SRA.download.bash +1 -1
  46. data/utils/enveomics/Scripts/aai.rb +163 -128
  47. data/utils/enveomics/build_enveomics_r.bash +11 -10
  48. data/utils/enveomics/enveomics.R/DESCRIPTION +3 -2
  49. data/utils/enveomics/enveomics.R/R/autoprune.R +141 -107
  50. data/utils/enveomics/enveomics.R/R/barplot.R +105 -86
  51. data/utils/enveomics/enveomics.R/R/cliopts.R +131 -115
  52. data/utils/enveomics/enveomics.R/R/df2dist.R +144 -106
  53. data/utils/enveomics/enveomics.R/R/growthcurve.R +201 -133
  54. data/utils/enveomics/enveomics.R/R/recplot.R +350 -315
  55. data/utils/enveomics/enveomics.R/R/recplot2.R +1334 -914
  56. data/utils/enveomics/enveomics.R/R/tribs.R +521 -361
  57. data/utils/enveomics/enveomics.R/R/utils.R +31 -15
  58. data/utils/enveomics/enveomics.R/README.md +7 -0
  59. data/utils/enveomics/enveomics.R/man/cash-enve.GrowthCurve-method.Rd +17 -0
  60. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2-method.Rd +17 -0
  61. data/utils/enveomics/enveomics.R/man/cash-enve.RecPlot2.Peak-method.Rd +17 -0
  62. data/utils/enveomics/enveomics.R/man/enve.GrowthCurve-class.Rd +16 -21
  63. data/utils/enveomics/enveomics.R/man/enve.TRIBS-class.Rd +31 -28
  64. data/utils/enveomics/enveomics.R/man/enve.TRIBS.merge.Rd +23 -19
  65. data/utils/enveomics/enveomics.R/man/enve.TRIBStest-class.Rd +36 -26
  66. data/utils/enveomics/enveomics.R/man/enve.__prune.iter.Rd +23 -24
  67. data/utils/enveomics/enveomics.R/man/enve.__prune.reduce.Rd +23 -24
  68. data/utils/enveomics/enveomics.R/man/enve.__tribs.Rd +32 -33
  69. data/utils/enveomics/enveomics.R/man/enve.barplot.Rd +91 -64
  70. data/utils/enveomics/enveomics.R/man/enve.cliopts.Rd +57 -37
  71. data/utils/enveomics/enveomics.R/man/enve.col.alpha.Rd +24 -19
  72. data/utils/enveomics/enveomics.R/man/enve.col2alpha.Rd +19 -18
  73. data/utils/enveomics/enveomics.R/man/enve.df2dist.Rd +39 -26
  74. data/utils/enveomics/enveomics.R/man/enve.df2dist.group.Rd +38 -25
  75. data/utils/enveomics/enveomics.R/man/enve.df2dist.list.Rd +40 -26
  76. data/utils/enveomics/enveomics.R/man/enve.growthcurve.Rd +67 -49
  77. data/utils/enveomics/enveomics.R/man/enve.prune.dist.Rd +37 -28
  78. data/utils/enveomics/enveomics.R/man/enve.recplot.Rd +122 -97
  79. data/utils/enveomics/enveomics.R/man/enve.recplot2-class.Rd +35 -31
  80. data/utils/enveomics/enveomics.R/man/enve.recplot2.ANIr.Rd +24 -23
  81. data/utils/enveomics/enveomics.R/man/enve.recplot2.Rd +68 -51
  82. data/utils/enveomics/enveomics.R/man/enve.recplot2.__counts.Rd +25 -24
  83. data/utils/enveomics/enveomics.R/man/enve.recplot2.__peakHist.Rd +21 -22
  84. data/utils/enveomics/enveomics.R/man/enve.recplot2.__whichClosestPeak.Rd +19 -20
  85. data/utils/enveomics/enveomics.R/man/enve.recplot2.changeCutoff.Rd +19 -18
  86. data/utils/enveomics/enveomics.R/man/enve.recplot2.compareIdentities.Rd +41 -32
  87. data/utils/enveomics/enveomics.R/man/enve.recplot2.coordinates.Rd +29 -24
  88. data/utils/enveomics/enveomics.R/man/enve.recplot2.corePeak.Rd +18 -18
  89. data/utils/enveomics/enveomics.R/man/enve.recplot2.extractWindows.Rd +40 -34
  90. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.Rd +36 -24
  91. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_e.Rd +19 -20
  92. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__em_m.Rd +19 -20
  93. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__emauto_one.Rd +27 -29
  94. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mow_one.Rd +41 -42
  95. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.__mower.Rd +17 -18
  96. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.em.Rd +43 -33
  97. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.emauto.Rd +36 -28
  98. data/utils/enveomics/enveomics.R/man/enve.recplot2.findPeaks.mower.Rd +74 -56
  99. data/utils/enveomics/enveomics.R/man/enve.recplot2.peak-class.Rd +44 -31
  100. data/utils/enveomics/enveomics.R/man/enve.recplot2.seqdepth.Rd +27 -22
  101. data/utils/enveomics/enveomics.R/man/enve.recplot2.windowDepthThreshold.Rd +32 -26
  102. data/utils/enveomics/enveomics.R/man/enve.tribs.Rd +59 -44
  103. data/utils/enveomics/enveomics.R/man/enve.tribs.test.Rd +28 -21
  104. data/utils/enveomics/enveomics.R/man/enve.truncate.Rd +27 -22
  105. data/utils/enveomics/enveomics.R/man/plot.enve.GrowthCurve.Rd +63 -43
  106. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBS.Rd +38 -29
  107. data/utils/enveomics/enveomics.R/man/plot.enve.TRIBStest.Rd +38 -30
  108. data/utils/enveomics/enveomics.R/man/plot.enve.recplot2.Rd +111 -83
  109. data/utils/enveomics/enveomics.R/man/summary.enve.GrowthCurve.Rd +19 -18
  110. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBS.Rd +19 -18
  111. data/utils/enveomics/enveomics.R/man/summary.enve.TRIBStest.Rd +19 -18
  112. data/utils/find-medoid.R +3 -2
  113. data/utils/representatives.rb +5 -3
  114. data/utils/subclade/pipeline.rb +22 -11
  115. data/utils/subclade/runner.rb +5 -1
  116. data/utils/subclades-compile.rb +1 -1
  117. data/utils/subclades.R +9 -3
  118. metadata +15 -4
  119. data/utils/enveomics/enveomics.R/man/enveomics.R-package.Rd +0 -15
  120. data/utils/enveomics/enveomics.R/man/z$-methods.Rd +0 -26
@@ -67,66 +67,82 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
67
67
  end
68
68
 
69
69
  def perform
70
- glob = [cli]
71
- unless cli[:file].nil?
72
- glob = []
73
- File.open(cli[:file], 'r') do |fh|
74
- h = nil
75
- fh.each do |ln|
76
- r = ln.chomp.split(/\t/)
77
- if h.nil?
78
- h = r
79
- else
80
- argv_i = [self.name]
81
- h.each_with_index do |field, k|
82
- case field.downcase
83
- when *%w[query ignore-dup get-metadata only-metadata]
84
- argv_i << "--#{field.downcase}" if r[k].downcase == 'true'
85
- when *%w[project file verbose help debug]
86
- raise "Unsupported header: #{field}"
87
- else
88
- argv_i += ["--#{field.downcase}", r[k]]
89
- end
70
+ glob = get_sub_cli
71
+ p = cli.load_project
72
+ glob.each do |sub_cli|
73
+ rd = create_remote_dataset(sub_cli)
74
+ next if rd.nil?
75
+ if sub_cli[:get_md]
76
+ update_metadata(sub_cli, p, rd)
77
+ else
78
+ create_dataset(sub_cli, p, rd)
79
+ end
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def get_sub_cli
86
+ return [cli] if cli[:file].nil?
87
+ glob = []
88
+ File.open(cli[:file], 'r') do |fh|
89
+ h = nil
90
+ fh.each do |ln|
91
+ r = ln.chomp.split(/\t/)
92
+ if h.nil?
93
+ h = r
94
+ else
95
+ argv_i = [self.name]
96
+ h.each_with_index do |field, k|
97
+ case field.downcase
98
+ when *%w[query ignore-dup get-metadata only-metadata]
99
+ argv_i << "--#{field.downcase}" if r[k].downcase == 'true'
100
+ when *%w[project file verbose help debug]
101
+ raise "Unsupported header: #{field}"
102
+ else
103
+ argv_i += ["--#{field.downcase}", r[k]]
90
104
  end
91
- sub_cli = MiGA::Cli.new(argv_i)
92
- sub_cli.defaults = cli.data
93
- sub_cli.action.parse_cli
94
- glob << sub_cli
95
105
  end
106
+ sub_cli = MiGA::Cli.new(argv_i)
107
+ sub_cli.defaults = cli.data
108
+ sub_cli.action.parse_cli
109
+ glob << sub_cli
96
110
  end
97
111
  end
98
112
  end
113
+ glob
114
+ end
99
115
 
100
- p = cli.load_project
101
- glob.each do |sub_cli|
102
- sub_cli.ensure_par(dataset: '-D', ids: '-I')
103
- unless sub_cli[:api_key].nil?
104
- ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
105
- end
116
+ def create_remote_dataset(sub_cli)
117
+ sub_cli.ensure_par(dataset: '-D', ids: '-I')
118
+ unless sub_cli[:api_key].nil?
119
+ ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
120
+ end
106
121
 
107
- sub_cli.say "Dataset: #{sub_cli[:dataset]}"
108
- if sub_cli[:ignore_dup] && !sub_cli[:get_md]
109
- next if Dataset.exist?(p, sub_cli[:dataset])
110
- end
122
+ sub_cli.say "Dataset: #{sub_cli[:dataset]}"
123
+ if sub_cli[:ignore_dup] && !sub_cli[:get_md]
124
+ return if Dataset.exist?(p, sub_cli[:dataset])
125
+ end
111
126
 
112
- sub_cli.say 'Locating remote dataset'
113
- rd = RemoteDataset.new(sub_cli[:ids], sub_cli[:db], sub_cli[:universe])
127
+ sub_cli.say 'Locating remote dataset'
128
+ RemoteDataset.new(sub_cli[:ids], sub_cli[:db], sub_cli[:universe])
129
+ end
114
130
 
115
- if sub_cli[:get_md]
116
- sub_cli.say 'Updating dataset'
117
- d = p.dataset(sub_cli[:dataset])
118
- next if d.nil?
119
- md = sub_cli.add_metadata(d).metadata.data
120
- rd.update_metadata(d, md)
121
- else
122
- sub_cli.say 'Creating dataset'
123
- dummy_d = Dataset.new(p, sub_cli[:dataset])
124
- md = sub_cli.add_metadata(dummy_d).metadata.data
125
- md[:metadata_only] = true if cli[:only_md]
126
- dummy_d.remove!
127
- rd.save_to(p, sub_cli[:dataset], !sub_cli[:query], md)
128
- p.add_dataset(sub_cli[:dataset])
129
- end
130
- end
131
+ def update_metadata(sub_cli, p, rd)
132
+ sub_cli.say 'Updating dataset'
133
+ d = p.dataset(sub_cli[:dataset])
134
+ return if d.nil?
135
+ md = sub_cli.add_metadata(d).metadata.data
136
+ rd.update_metadata(d, md)
137
+ end
138
+
139
+ def create_dataset(sub_cli, p, rd)
140
+ sub_cli.say 'Creating dataset'
141
+ dummy_d = Dataset.new(p, sub_cli[:dataset])
142
+ md = sub_cli.add_metadata(dummy_d).metadata.data
143
+ md[:metadata_only] = true if cli[:only_md]
144
+ dummy_d.remove!
145
+ rd.save_to(p, sub_cli[:dataset], !sub_cli[:query], md)
146
+ p.add_dataset(sub_cli[:dataset])
131
147
  end
132
148
  end
@@ -0,0 +1,206 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+ require 'net/ftp'
6
+ require 'digest/md5'
7
+
8
+ class MiGA::Cli::Action::GetDb < MiGA::Cli::Action
9
+
10
+ def parse_cli
11
+ cli.defaults = {
12
+ database: :recommended,
13
+ version: :latest,
14
+ local: File.expand_path('.miga_db', ENV['MIGA_HOME']),
15
+ host: 'ftp://microbial-genomes.org/db',
16
+ pb: true,
17
+ overwrite: true
18
+ }
19
+ cli.parse do |opt|
20
+ opt.on(
21
+ '-n', '--database STRING',
22
+ "Name of the database to download. By default: #{cli[:database]}"
23
+ ) { |v| cli[:database] = v.to_sym }
24
+ opt.on(
25
+ '--db-version STRING',
26
+ "Database version to download. By default: #{cli[:version]}"
27
+ ) { |v| cli[:version] = v.to_sym }
28
+ opt.on(
29
+ '-l', '--local-dir PATH',
30
+ "Local directory to store the database. By default: #{cli[:local]}"
31
+ ) { |v| cli[:local] = v }
32
+ opt.on(
33
+ '-h', '--host STRING',
34
+ "Remote host of the database. By default: #{cli[:host]}"
35
+ ) { |v| cli[:db] = v.to_sym }
36
+ opt.on(
37
+ '--list',
38
+ 'List available databases and exit'
39
+ ) { |v| cli[:list_databases] = v }
40
+ opt.on(
41
+ '--list-versions',
42
+ 'List available versions of the database and exit'
43
+ ) { |v| cli[:list_versions] = v }
44
+ opt.on(
45
+ '--no-overwrite',
46
+ 'Exit without downloading if the target database already exists'
47
+ ) { |v| cli[:overwrite] = v }
48
+ opt.on('--no-progress', 'Supress progress bars') { |v| cli[:pb] = v }
49
+ end
50
+ end
51
+
52
+ def perform
53
+ @ftp = remote_connection
54
+ manif = remote_manifest(@ftp)
55
+ cli.puts "# Host: #{manif[:host]}"
56
+ cli.puts "# Manifest last update: #{manif[:last_update]}"
57
+ list_databases(manif) and return
58
+ db = db_requested(manif)
59
+ list_versions(db) and return
60
+ ver = version_requested(db)
61
+ check_target and return
62
+ file = download_file(@ftp, ver[:path])
63
+ check_digest(ver, file)
64
+ unarchive(file)
65
+ register_database(manif, db, ver)
66
+ end
67
+
68
+ def empty_action
69
+ cli.puts 'Downloading latest version of the default database'
70
+ end
71
+
72
+ def complete
73
+ @ftp.close unless @ftp.nil?
74
+ super
75
+ end
76
+
77
+ private
78
+
79
+ def remote_connection
80
+ cli.say "Connecting to '#{cli[:host]}'"
81
+ uri = URI.parse(cli[:host])
82
+ raise 'Only FTP hosts are supported' unless uri.scheme == 'ftp'
83
+ ftp = Net::FTP.open(uri.host, port: uri.port)
84
+ ftp.login
85
+ ftp.chdir(uri.path)
86
+ ftp
87
+ end
88
+
89
+ def download_file(ftp, path)
90
+ cli.say "Downloading '#{path}'"
91
+ Dir.mkdir(cli[:local]) unless Dir.exist? cli[:local]
92
+ file = File.expand_path(path, cli[:local])
93
+ filesize = ftp.size(path)
94
+ transferred = 0
95
+ ftp.getbinaryfile(path, file, 1024) do |data|
96
+ if cli[:pb]
97
+ transferred += data.size
98
+ cli.advance("#{path}:", transferred, filesize)
99
+ end
100
+ end
101
+ cli.print "\n" if cli[:pb]
102
+ file
103
+ end
104
+
105
+ def remote_manifest(ftp)
106
+ file = download_file(ftp, '_manif.json')
107
+ MiGA::Json.parse(file)
108
+ end
109
+
110
+ def db_requested(manif)
111
+ [:recommended, :test].each do |n|
112
+ if cli[:database] == n
113
+ raise "This host has no #{n} database" if manif[n].nil?
114
+ cli[:database] = manif[n].to_sym
115
+ end
116
+ end
117
+ db = manif[:databases][cli[:database]]
118
+ raise 'Cannot find database in this host' if db.nil?
119
+ db
120
+ end
121
+
122
+ def version_requested(db)
123
+ if cli[:version] == :latest
124
+ cli[:version] = db[:latest].to_sym
125
+ end
126
+ ver = db[:versions][cli[:version]]
127
+ raise 'Cannot find database version' if ver.nil?
128
+ cli.puts "# Database size: #{version_size(ver)}"
129
+ ver
130
+ end
131
+
132
+ def list_databases(manif)
133
+ return false unless cli[:list_databases]
134
+ cli.puts "# Recommended database: #{manif[:recommended]}"
135
+ cli.puts ''
136
+ cli.table(
137
+ %w[name description latest versions],
138
+ manif[:databases].map do |name, i|
139
+ [name, i[:description], i[:latest], i[:versions].size.to_s]
140
+ end
141
+ )
142
+ true
143
+ end
144
+
145
+ def list_versions(db)
146
+ return false unless cli[:list_versions]
147
+ cli.puts "# Database: #{cli[:database]}"
148
+ cli.puts ''
149
+ cli.table(
150
+ %w[version updated size datasets],
151
+ db[:versions].map do |name, i|
152
+ [name, i[:last_update], version_size(i), i[:datasets]]
153
+ end
154
+ )
155
+ true
156
+ end
157
+
158
+ def check_target
159
+ return false if cli[:overwrite]
160
+ file = File.expand_path(cli[:database], cli[:local])
161
+ if Dir.exist? file
162
+ warn "The target directory already exists: #{file}"
163
+ true
164
+ else
165
+ false
166
+ end
167
+ end
168
+
169
+ def check_digest(ver, file)
170
+ cli.say 'Checking MD5 digest'
171
+ cli.say "Expected: #{ver[:MD5]}"
172
+ md5 = Digest::MD5.new
173
+ File.open(file, 'rb') do |fh|
174
+ until fh.eof?
175
+ md5.update fh.read(1024)
176
+ end
177
+ end
178
+ dig = md5.hexdigest
179
+ cli.say "Observed: #{dig}"
180
+ raise 'Corrupt file, MD5 does not match' unless dig == ver[:MD5]
181
+ end
182
+
183
+ def version_size(ver)
184
+ cli.num_suffix(ver[:size], true) + ' (' +
185
+ cli.num_suffix(ver[:size_unarchived], true) + ')'
186
+ end
187
+
188
+ def unarchive(file)
189
+ cli.say "Unarchiving #{file}"
190
+ `cd "#{cli[:local]}" && tar -zxf "#{file}"`
191
+ end
192
+
193
+ def register_database(manif, db, ver)
194
+ cli.say "Registering database locally"
195
+ local_manif = File.expand_path('_local_manif.json', cli[:local])
196
+ reg = File.exist?(local_manif) ? MiGA::Json.parse(local_manif) : {}
197
+ reg[:last_update] = Time.now.to_s
198
+ reg[:databases] ||= {}
199
+ reg[:databases][cli[:database]] ||= {}
200
+ reg[:databases][cli[:database]][:manif_last_update] = manif[:last_update]
201
+ reg[:databases][cli[:database]][:manif_host] = manif[:host]
202
+ db.each { |k,v| reg[:databases][cli[:database]][k] = v }
203
+ reg[:databases][cli[:database]][:local_version] = ver
204
+ MiGA::Json.generate(reg, local_manif)
205
+ end
206
+ end
@@ -0,0 +1,31 @@
1
+ # @package MiGA
2
+ # @license Artistic-2.0
3
+
4
+ require 'miga/cli/action'
5
+
6
+ class MiGA::Cli::Action::IndexWf < MiGA::Cli::Action
7
+ require 'miga/cli/action/wf'
8
+ include MiGA::Cli::Action::Wf
9
+
10
+ def parse_cli
11
+ default_opts_for_wf
12
+ cli.defaults = { mytaxa: false }
13
+ cli.parse do |opt|
14
+ opt.on(
15
+ '-m', '--mytaxa-scan',
16
+ 'Perform MyTaxa scan analysis'
17
+ ) { |v| cli[:mytaxa] = v }
18
+ opts_for_wf_distances(opt)
19
+ opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)',
20
+ cleanup: false, project_type: true)
21
+ end
22
+ end
23
+
24
+ def perform
25
+ # Input data
26
+ p = create_project(:assembly, {}, run_mytaxa_scan: cli[:mytaxa])
27
+ # Run
28
+ run_daemon
29
+ summarize
30
+ end
31
+ end
@@ -5,38 +5,39 @@ require 'miga/cli/action'
5
5
  require 'shellwords'
6
6
 
7
7
  class MiGA::Cli::Action::Init < MiGA::Cli::Action
8
-
8
+ require 'miga/cli/action/init/daemon_helper'
9
+ include MiGA::Cli::Action::Init::DaemonHelper
10
+
9
11
  def parse_cli
10
12
  cli.interactive = true
11
- cli.defaults = {mytaxa: nil,
13
+ cli.defaults = { mytaxa: nil,
12
14
  config: File.expand_path('.miga_modules', ENV['HOME']),
13
- ask: false, auto: false, dtype: :bash}
15
+ ask: false, auto: false, dtype: :bash }
14
16
  cli.parse do |opt|
15
17
  opt.on(
16
18
  '-c', '--config PATH',
17
19
  'Path to the Bash configuration file',
18
20
  "By default: #{cli[:config]}"
19
- ){ |v| cli[:config] = v }
21
+ ){ |v| cli[:config] = v }
20
22
  opt.on(
21
23
  '--[no-]mytaxa',
22
24
  'Should I try setting up MyTaxa its dependencies?',
23
25
  'By default: interactive (true if --auto)'
24
- ){ |v| cli[:mytaxa] = v }
26
+ ){ |v| cli[:mytaxa] = v }
25
27
  opt.on(
26
28
  '--daemon-type STRING',
27
29
  'Type of daemon launcher, one of: bash, qsub, msub, slurm',
28
30
  "By default: interactive (#{cli[:dtype]} if --auto)"
29
- ){ |v| cli[:dtype] = v.to_sym }
31
+ ){ |v| cli[:dtype] = v.to_sym }
30
32
  opt.on(
31
33
  '--ask-all',
32
34
  'Ask for the location of all software',
33
35
  'By default, only the locations missing in PATH are requested'
34
- ){ |v| cli[:ask] = v }
36
+ ){ |v| cli[:ask] = v }
35
37
  end
36
38
  end
37
39
 
38
40
  def perform
39
- miga = MiGA.root_path
40
41
  cli.puts <<BANNER
41
42
  ===[ Welcome to MiGA, the Microbial Genome Atlas ]===
42
43
 
@@ -44,17 +45,70 @@ I'm the initialization script, and I'll sniff around your computer to
44
45
  make sure you have all the requirements for MiGA data processing.
45
46
 
46
47
  BANNER
48
+ list_requirements
49
+ rc_fh = open_rc_file
50
+ check_configuration_script rc_fh
51
+ paths = check_software_requirements rc_fh
52
+ check_additional_files paths
53
+ check_r_packages paths
54
+ check_ruby_gems paths
55
+ configure_daemon
56
+ close_rc_file rc_fh
57
+ cli.puts 'Configuration complete. MiGA is ready to work!'
58
+ cli.puts ''
59
+ end
60
+
61
+ def empty_action
62
+ end
63
+
64
+ def run_cmd(cli, cmd)
65
+ `. "#{cli[:config]}" && #{cmd}`
66
+ end
67
+
68
+ def run_r_cmd(cli, paths, cmd)
69
+ run_cmd(cli,
70
+ "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
71
+ end
72
+
73
+ def test_r_package(cli, paths, pkg)
74
+ run_r_cmd(cli, paths, "library('#{pkg}')")
75
+ $?.success?
76
+ end
77
+
78
+ def install_r_package(cli, paths, pkg)
79
+ r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
80
+ run_r_cmd(cli, paths, r_cmd)
81
+ end
47
82
 
83
+ def test_ruby_gem(cli, paths, pkg)
84
+ run_cmd(cli,
85
+ "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
86
+ $?.success?
87
+ end
88
+
89
+ def install_ruby_gem(cli, paths, pkg)
90
+ gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
91
+ run_cmd(cli, "#{paths['ruby'].shellescape} \
92
+ -r rubygems -r rubygems/gem_runner \
93
+ -e #{gem_cmd.shellescape} 2>&1")
94
+ end
95
+
96
+ def list_requirements
48
97
  if cli.ask_user(
49
98
  'Would you like to see all the requirements before starting?',
50
99
  'no', %w(yes no)) == 'yes'
51
100
  cli.puts ''
52
- File.open(File.expand_path('utils/requirements.txt', miga), 'r') do |fh|
101
+ req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
102
+ File.open(req_path, 'r') do |fh|
53
103
  fh.each_line { |ln| cli.puts ln }
54
104
  end
55
105
  cli.puts ''
56
106
  end
107
+ end
108
+
109
+ private
57
110
 
111
+ def open_rc_file
58
112
  rc_path = File.expand_path('.miga_rc', ENV['HOME'])
59
113
  if File.exist? rc_path
60
114
  if cli.ask_user(
@@ -70,8 +124,10 @@ BANNER
70
124
  # `miga init` made this on #{Time.now}
71
125
 
72
126
  BASH
127
+ rc_fh
128
+ end
73
129
 
74
- # Check bash configuration file
130
+ def check_configuration_script(rc_fh)
75
131
  unless File.exist? cli[:config]
76
132
  cli[:config] = cli.ask_user(
77
133
  'Is there a script I need to load at startup?',
@@ -86,47 +142,62 @@ BASH
86
142
  cli[:config] = '/dev/null'
87
143
  end
88
144
  cli.puts ''
145
+ end
89
146
 
90
- # Check for software requirements
147
+ def check_software_requirements(rc_fh)
91
148
  cli.puts 'Looking for requirements:'
92
- if cli[:mytaxa].nil?
93
- cli[:mytaxa] = cli.ask_user(
94
- 'Should I include MyTaxa modules?',
95
- 'yes', %w(yes no)) == 'yes'
96
- end
149
+ ask_for_mytaxa
97
150
  rc_fh.puts 'export MIGA_MYTAXA="no"' unless cli[:mytaxa]
98
151
  paths = {}
99
152
  rc_fh.puts 'MIGA_PATH=""'
100
- File.open(File.expand_path('utils/requirements.txt', miga), 'r') do |fh|
153
+ req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
154
+ File.open(req_path, 'r') do |fh|
101
155
  fh.each_line do |ln|
102
156
  next if $. < 3
103
157
  r = ln.chomp.split(/\t+/)
104
158
  next if r[0] =~ /\(opt\)$/ && !cli[:mytaxa]
105
159
  cli.print "Testing #{r[0]}#{" (#{r[3]})" if r[3]}... "
106
- path = nil
107
- loop do
108
- d_path = File.dirname(run_cmd(cli, "which #{r[1].shellescape}"))
109
- if cli[:ask] || d_path == '.'
110
- path = cli.ask_user('Where can I find it?', d_path, nil, true)
111
- else
112
- path = d_path
113
- cli.puts path
114
- end
115
- if File.executable?(File.expand_path(r[1], path))
116
- if d_path != path
117
- rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{r[1]}"
118
- end
119
- break
120
- end
121
- cli.print "I cannot find #{r[1]} "
122
- end
160
+ path = find_software(r[1])
123
161
  paths[r[1]] = File.expand_path(r[1], path).shellescape
124
162
  end
125
163
  end
126
164
  rc_fh.puts 'export PATH="$MIGA_PATH$PATH"'
127
165
  cli.puts ''
166
+ paths
167
+ end
168
+
169
+ def ask_for_mytaxa
170
+ if cli[:mytaxa].nil?
171
+ cli[:mytaxa] =
172
+ cli.ask_user(
173
+ 'Should I include MyTaxa modules?',
174
+ 'yes', %w(yes no)
175
+ ) == 'yes'
176
+ end
177
+ end
178
+
179
+ def find_software(exec)
180
+ path = nil
181
+ loop do
182
+ d_path = File.dirname(run_cmd(cli, "which #{exec.shellescape}"))
183
+ if cli[:ask] || d_path == '.'
184
+ path = cli.ask_user('Where can I find it?', d_path, nil, true)
185
+ else
186
+ path = d_path
187
+ cli.puts path
188
+ end
189
+ if File.executable?(File.expand_path(exec, path))
190
+ if d_path != path
191
+ rc_fh.puts "MIGA_PATH=\"#{path}:$MIGA_PATH\" # #{exec}"
192
+ end
193
+ break
194
+ end
195
+ cli.print "I cannot find #{exec} "
196
+ end
197
+ path
198
+ end
128
199
 
129
- # Check for other files
200
+ def check_additional_files(paths)
130
201
  if cli[:mytaxa]
131
202
  cli.puts 'Looking for MyTaxa databases:'
132
203
  mt = File.dirname paths["MyTaxa"]
@@ -145,8 +216,9 @@ BASH
145
216
  end
146
217
  cli.puts ''
147
218
  end
219
+ end
148
220
 
149
- # Check for R packages
221
+ def check_r_packages(paths)
150
222
  cli.puts 'Looking for R packages:'
151
223
  %w(enveomics.R ape cluster vegan).each do |pkg|
152
224
  cli.print "Testing #{pkg}... "
@@ -161,8 +233,9 @@ BASH
161
233
  end
162
234
  end
163
235
  cli.puts ''
236
+ end
164
237
 
165
- # Check for Ruby gems
238
+ def check_ruby_gems(paths)
166
239
  cli.puts 'Looking for Ruby gems:'
167
240
  %w(sqlite3 daemons json).each do |pkg|
168
241
  cli.print "Testing #{pkg}... "
@@ -171,8 +244,8 @@ BASH
171
244
  else
172
245
  cli.puts 'no, installing'
173
246
  # This hackey mess is meant to ensure the test and installation are done
174
- # on the configuration Ruby, not on the Ruby currently executing the init
175
- # action
247
+ # on the configuration Ruby, not on the Ruby currently executing the
248
+ # init action
176
249
  cli.print install_ruby_gem(cli, paths, pkg)
177
250
  unless test_ruby_gem(cli, paths, pkg)
178
251
  raise "Unable to auto-install Ruby gem: #{pkg}"
@@ -180,119 +253,9 @@ BASH
180
253
  end
181
254
  end
182
255
  cli.puts ''
256
+ end
183
257
 
184
- # Configure daemon
185
- cli.puts 'Default daemon configuration:'
186
- daemon_f = File.expand_path('.miga_daemon.json', ENV['HOME'])
187
- unless File.exist?(daemon_f) and cli.ask_user(
188
- 'A template daemon already exists, do you want to preserve it?',
189
- 'yes', %w(yes no)) == 'yes'
190
- v = {created: Time.now.to_s, updated: Time.now.to_s}
191
- v[:type] = cli.ask_user(
192
- 'Please select the type of daemon you want to setup',
193
- cli[:dtype], %w(bash qsub msub slurm))
194
- case v[:type]
195
- when 'bash'
196
- v[:latency] = cli.ask_user(
197
- 'How long should I sleep? (in seconds)', '30').to_i
198
- v[:maxjobs] = cli.ask_user(
199
- 'How many jobs can I launch at once?', '6').to_i
200
- v[:ppn] = cli.ask_user(
201
- 'How many CPUs can I use per job?', '2').to_i
202
- cli.puts 'Setting up internal daemon defaults.'
203
- cli.puts 'If you don\'t understand this just leave default values:'
204
- v[:cmd] = cli.ask_user(
205
- "How should I launch tasks?\n %1$s: script path, " \
206
- "%2$s: variables, %3$d: CPUs, %4$s: log file, %5$s: task name.\n",
207
- "%2$s '%1$s' > '%4$s' 2>&1")
208
- v[:var] = cli.ask_user(
209
- "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
210
- "%1$s=%2$s")
211
- v[:varsep] = cli.ask_user(
212
- 'What should I use to separate variables?', ' ')
213
- v[:alive] = cli.ask_user(
214
- "How can I know that a process is still alive?\n %1$s: PID, " \
215
- "output should be 1 for running and 0 for non-running.\n",
216
- "ps -p '%1$s'|tail -n+2|wc -l")
217
- v[:kill] = cli.ask_user(
218
- "How should I terminate tasks?\n %s: process ID.", "kill -9 '%s'")
219
- when 'slurm'
220
- queue = cli.ask_user(
221
- 'What queue should I use?', nil, nil, true)
222
- v[:latency] = cli.ask_user(
223
- 'How long should I sleep? (in seconds)', '150').to_i
224
- v[:maxjobs] = cli.ask_user(
225
- 'How many jobs can I launch at once?', '300').to_i
226
- v[:ppn] = cli.ask_user(
227
- 'How many CPUs can I use per job?', '2').to_i
228
- cli.puts 'Setting up internal daemon defaults'
229
- cli.puts 'If you don\'t understand this just leave default values:'
230
- v[:cmd] = cli.ask_user(
231
- "How should I launch tasks?\n %1$s: script path, " \
232
- "%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
233
- "%2$s sbatch --partition='#{queue}' --export=ALL " \
234
- "--nodes=1 --ntasks-per-node=%3$d --output='%4$s' " \
235
- "--job-name='%5$s' --mem=9G --time=12:00:00 %1$s " \
236
- "| perl -pe 's/.* //'")
237
- v[:var] = cli.ask_user(
238
- "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
239
- "%1$s=%2$s")
240
- v[:varsep] = cli.ask_user(
241
- 'What should I use to separate variables?', ' ')
242
- v[:alive] = cli.ask_user(
243
- "How can I know that a process is still alive?\n %1$s: job id, " \
244
- "output should be 1 for running and 0 for non-running.\n",
245
- "squeue -h -o %%t -j '%1$s' | grep '^PD\\|R\\|CF\\|CG$' " \
246
- "| tail -n 1 | wc -l")
247
- v[:kill] = cli.ask_user(
248
- "How should I terminate tasks?\n %s: process ID.", "scancel '%s'")
249
- else # [qm]sub
250
- queue = cli.ask_user('What queue should I use?', nil, nil, true)
251
- v[:latency] = cli.ask_user(
252
- 'How long should I sleep? (in seconds)', '150').to_i
253
- v[:maxjobs] = cli.ask_user(
254
- 'How many jobs can I launch at once?', '300').to_i
255
- v[:ppn] = cli.ask_user(
256
- 'How many CPUs can I use per job?', '2').to_i
257
- cli.puts 'Setting up internal daemon defaults.'
258
- cli.puts 'If you don\'t understand this just leave default values:'
259
- v[:cmd] = cli.ask_user(
260
- "How should I launch tasks?\n %1$s: script path, " \
261
- "%2$s: variables, %3$d: CPUs, %4$d: log file, %5$s: task name.\n",
262
- "#{v[:type]} -q '#{queue}' -v '%2$s' -l nodes=1:ppn=%3$d %1$s " \
263
- "-j oe -o '%4$s' -N '%5$s' -l mem=9g -l walltime=12:00:00 " \
264
- "| grep .")
265
- v[:var] = cli.ask_user(
266
- "How should I pass variables?\n %1$s: keys, %2$s: values.\n",
267
- "%1$s=%2$s")
268
- v[:varsep] = cli.ask_user(
269
- 'What should I use to separate variables?', ',')
270
- if v[:type] == 'qsub'
271
- v[:alive] = cli.ask_user(
272
- "How can I know that a process is still alive?\n " \
273
- "%1$s: job id, output should be 1 for running and " \
274
- "0 for non-running.\n",
275
- "qstat -f '%1$s'|grep ' job_state ='|perl -pe 's/.*= //'" \
276
- "|grep '[^C]'|tail -n1|wc -l|awk '{print $1}'")
277
- v[:kill] = cli.ask_user(
278
- "How should I terminate tasks?\n %s: process ID.", "qdel '%s'")
279
- else # msub
280
- v[:alive] = cli.ask_user(
281
- "How can I know that a process is still alive?\n " \
282
- "%1$s: job id, output should be 1 for running and " \
283
- "0 for non-running.\n",
284
- "checkjob '%1$s'|grep '^State:'|perl -pe 's/.*: //'" \
285
- "|grep 'Deferred\\|Hold\\|Idle\\|Starting\\|Running\\|Blocked'" \
286
- "|tail -n1|wc -l|awk '{print $1}'")
287
- v[:kill] = cli.ask_user(
288
- "How should I terminate tasks?\n %s: process ID.",
289
- "canceljob '%s'")
290
- end
291
- end
292
- File.open(daemon_f, 'w') { |fh| fh.puts JSON.pretty_generate(v) }
293
- end
294
- cli.puts ''
295
-
258
+ def close_rc_file(rc_fh)
296
259
  rc_fh.puts <<FOOT
297
260
 
298
261
  MIGA_CONFIG_VERSION='#{MiGA::MiGA.VERSION}'
@@ -300,44 +263,6 @@ MIGA_CONFIG_LONGVERSION='#{MiGA::MiGA.LONG_VERSION}'
300
263
  MIGA_CONFIG_DATE='#{Time.now}'
301
264
 
302
265
  FOOT
303
-
304
- cli.puts 'Configuration complete. MiGA is ready to work!'
305
- cli.puts ''
306
-
307
- end
308
-
309
- def empty_action
310
- end
311
-
312
- def run_cmd(cli, cmd)
313
- `. "#{cli[:config]}" && #{cmd}`
314
- end
315
-
316
- def run_r_cmd(cli, paths, cmd)
317
- run_cmd(cli,
318
- "echo #{cmd.shellescape} | #{paths['R'].shellescape} --vanilla -q 2>&1")
319
- end
320
-
321
- def test_r_package(cli, paths, pkg)
322
- run_r_cmd(cli, paths, "library('#{pkg}')")
323
- $?.success?
324
- end
325
-
326
- def install_r_package(cli, paths, pkg)
327
- r_cmd = "install.packages('#{pkg}', repos='http://cran.rstudio.com/')"
328
- run_r_cmd(cli, paths, r_cmd)
329
- end
330
-
331
- def test_ruby_gem(cli, paths, pkg)
332
- run_cmd(cli,
333
- "#{paths['ruby'].shellescape} -r #{pkg.shellescape} -e '' 2>/dev/null")
334
- $?.success?
335
- end
336
-
337
- def install_ruby_gem(cli, paths, pkg)
338
- gem_cmd = "Gem::GemRunner.new.run %w(install --user #{pkg})"
339
- run_cmd(cli, "#{paths['ruby'].shellescape} \
340
- -r rubygems -r rubygems/gem_runner \
341
- -e #{gem_cmd.shellescape} 2>&1")
266
+ rc_fh.close
342
267
  end
343
268
  end