sushi_fabric 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.bzrignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/sushi_fabric.rb +3 -0
- data/lib/sushi_fabric/sushiApp.rb +677 -0
- data/lib/sushi_fabric/sushi_configure.yml +4 -0
- data/lib/sushi_fabric/version.rb +3 -0
- data/sample/WordCountApp.rb +41 -0
- data/sample/sample_dataset.tsv +2 -0
- data/sample/sample_parameterset.tsv +2 -0
- data/sushi_fabric.gemspec +24 -0
- metadata +92 -0
data/.bzrignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Functional Genomics Center Zurich
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# SushiFabric
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'sushi_fabric'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install sushi_fabric
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/lib/sushi_fabric.rb
ADDED
@@ -0,0 +1,677 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
Version = '20131107-104530'
|
4
|
+
|
5
|
+
require 'csv'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'active_record'
|
8
|
+
require 'yaml'
|
9
|
+
require 'drb/drb'
|
10
|
+
|
11
|
+
module SushiFabric
|
12
|
+
CONFIG = 'sushi_configure.yml'
|
13
|
+
current_dir = File.dirname(File.expand_path(__FILE__))
|
14
|
+
config_yml = File.join(current_dir, CONFIG)
|
15
|
+
config = if File.exist?(config_yml)
|
16
|
+
YAML.load(File.read(config_yml))
|
17
|
+
else
|
18
|
+
{}
|
19
|
+
end
|
20
|
+
WORKFLOW_MANAGER = config[:workflow_manager]||'druby://localhost:3000'
|
21
|
+
GSTORE_DIR = config[:gstore_dir]||'gstore'
|
22
|
+
#sushi_app_dir = File.expand_path('../..', __FILE__)
|
23
|
+
sushi_app_dir = Dir.pwd
|
24
|
+
SUSHI_APP_DIR = config[:sushi_app_dir]||sushi_app_dir
|
25
|
+
SCRATCH_DIR = config[:scratch_dir]||'/tmp/scratch'
|
26
|
+
no_ror = nil
|
27
|
+
begin
|
28
|
+
::Project
|
29
|
+
no_ror = false
|
30
|
+
rescue
|
31
|
+
if File.exist?(File.join(SUSHI_APP_DIR, "app/models"))
|
32
|
+
ActiveRecord::Base.establish_connection(
|
33
|
+
:adapter => 'sqlite3',
|
34
|
+
:database => "#{SUSHI_APP_DIR}/db/development.sqlite3"
|
35
|
+
)
|
36
|
+
require "#{SUSHI_APP_DIR}/app/models/project"
|
37
|
+
require "#{SUSHI_APP_DIR}/app/models/data_set"
|
38
|
+
require "#{SUSHI_APP_DIR}/app/models/sample"
|
39
|
+
no_ror = false
|
40
|
+
else
|
41
|
+
no_ror = true
|
42
|
+
end
|
43
|
+
end
|
44
|
+
NO_ROR = no_ror
|
45
|
+
|
46
|
+
=begin
|
47
|
+
def save_data_set(data_set_arr, headers, rows)
|
48
|
+
data_set_hash = Hash[*data_set_arr]
|
49
|
+
if project = Project.find_by_number(data_set_hash['ProjectNumber'].to_i)
|
50
|
+
data_set = DataSet.new
|
51
|
+
data_set.name = data_set_hash['DataSetName']
|
52
|
+
data_set.project = project
|
53
|
+
if parent_id = data_set_hash['ParentID'] and parent_data_set = DataSet.find_by_id(parent_id.to_i)
|
54
|
+
data_set.data_set = parent_data_set
|
55
|
+
end
|
56
|
+
if comment = data_set_hash['Comment'] and !comment.to_s.empty?
|
57
|
+
data_set.comment = comment
|
58
|
+
end
|
59
|
+
|
60
|
+
sample_hash = {}
|
61
|
+
rows.each do |row|
|
62
|
+
headers.each_with_index do |header, i|
|
63
|
+
sample_hash[header]=row[i]
|
64
|
+
end
|
65
|
+
sample = Sample.new
|
66
|
+
sample.key_value = sample_hash.to_s
|
67
|
+
sample.save unless sample.saved?
|
68
|
+
data_set.samples << sample
|
69
|
+
end
|
70
|
+
|
71
|
+
data_set.md5 = data_set.md5hexdigest
|
72
|
+
unless data_set.saved?
|
73
|
+
project.data_sets << data_set
|
74
|
+
parent_data_set.data_sets << data_set if parent_data_set
|
75
|
+
data_set.save
|
76
|
+
end
|
77
|
+
data_set.id
|
78
|
+
end
|
79
|
+
end
|
80
|
+
=end
|
81
|
+
|
82
|
+
class ::Hash
|
83
|
+
attr_reader :defaults
|
84
|
+
alias :set :[]=
|
85
|
+
alias :get :[]
|
86
|
+
def []=(k1,k2,v=nil)
|
87
|
+
if v
|
88
|
+
@desc ||= {}
|
89
|
+
@desc.set([k1,k2].join('_'),v)
|
90
|
+
else
|
91
|
+
@defaults ||= {}
|
92
|
+
if !@defaults[k1] and k2
|
93
|
+
if k2.instance_of?(Array)
|
94
|
+
@defaults.set(k1,k2.first)
|
95
|
+
elsif k2.instance_of?(Hash) and k2.first
|
96
|
+
@defaults.set(k1,k2.first.last)
|
97
|
+
else
|
98
|
+
@defaults.set(k1,k2)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
set(k1,k2)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
def default_value(k,v=nil)
|
105
|
+
if v
|
106
|
+
@defaults[k] = v
|
107
|
+
else
|
108
|
+
@defaults[k]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
def data_type(k)
|
112
|
+
@defaults[k].class
|
113
|
+
end
|
114
|
+
def data_types
|
115
|
+
Hash[@defaults.map{|k,v| [k, v.class]}]
|
116
|
+
end
|
117
|
+
def [](k1, k2=nil)
|
118
|
+
if k2
|
119
|
+
if @desc
|
120
|
+
@desc.get([k1,k2].join('_'))
|
121
|
+
else
|
122
|
+
nil
|
123
|
+
end
|
124
|
+
else
|
125
|
+
get(k1)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
class ::String
|
130
|
+
def tag?(tag)
|
131
|
+
scan(/\[(.*)\]/).flatten.join =~ /#{tag}/
|
132
|
+
end
|
133
|
+
end
|
134
|
+
class SushiApp
|
135
|
+
attr_reader :params
|
136
|
+
attr_reader :job_ids
|
137
|
+
attr_reader :next_dataset_id
|
138
|
+
attr_reader :required_columns
|
139
|
+
attr_reader :required_params
|
140
|
+
attr_reader :dataset_hash
|
141
|
+
attr_reader :analysis_category
|
142
|
+
attr_reader :description
|
143
|
+
attr_reader :name
|
144
|
+
attr_accessor :dataset_tsv_file
|
145
|
+
attr_accessor :parameterset_tsv_file
|
146
|
+
attr_accessor :dataset_sushi_id
|
147
|
+
attr_accessor :project
|
148
|
+
attr_accessor :user
|
149
|
+
attr_accessor :next_dataset_name
|
150
|
+
attr_accessor :next_dataset_comment
|
151
|
+
def initialize
|
152
|
+
@gstore_dir = GSTORE_DIR
|
153
|
+
@project = nil
|
154
|
+
@name = nil
|
155
|
+
@params = {}
|
156
|
+
@params['cores'] = nil
|
157
|
+
@params['ram'] = nil
|
158
|
+
@params['scratch'] = nil
|
159
|
+
@params['node'] = ''
|
160
|
+
@params['process_mode'] = 'SAMPLE'
|
161
|
+
@job_ids = []
|
162
|
+
@required_columns = []
|
163
|
+
@workflow_manager = DRbObject.new_with_uri(WORKFLOW_MANAGER)
|
164
|
+
end
|
165
|
+
def set_input_dataset
|
166
|
+
if @dataset_tsv_file
|
167
|
+
dataset_tsv = CSV.readlines(@dataset_tsv_file, {:headers=>true, :col_sep=>"\t"})
|
168
|
+
@dataset_hash = []
|
169
|
+
dataset_tsv.each do |row|
|
170
|
+
@dataset_hash << row.to_hash
|
171
|
+
end
|
172
|
+
elsif @dataset_sushi_id
|
173
|
+
@dataset_hash = []
|
174
|
+
if dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
|
175
|
+
dataset.samples.each do |sample|
|
176
|
+
@dataset_hash << sample.to_hash
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
@dataset_hash
|
181
|
+
end
|
182
|
+
def get_columns_with_tag(tag)
|
183
|
+
#@factor_cols = @dataset_hash.first.keys.select{|header| header =~ /\[#{tag}\]/}.map{|header| header.gsub(/\[.+\]/,'').strip}
|
184
|
+
@dataset_hash.map{|row|
|
185
|
+
Hash[*row.select{|k,v| k=~/\[#{tag}\]/}.map{|k,v| [k.gsub(/\[.+\]/,'').strip,v]}.flatten]
|
186
|
+
}
|
187
|
+
end
|
188
|
+
def set_output_files
|
189
|
+
@dataset = {}
|
190
|
+
next_dataset.keys.select{|header| header.tag?('File')}.each do |header|
|
191
|
+
@output_files ||= []
|
192
|
+
@output_files << header
|
193
|
+
end
|
194
|
+
@output_files = @output_files.uniq
|
195
|
+
end
|
196
|
+
def check_required_columns
|
197
|
+
if @dataset_hash and @required_columns and (@required_columns-@dataset_hash.map{|row| row.keys}.flatten.uniq.map{|colname| colname.gsub(/\[.+\]/,'').strip}).empty?
|
198
|
+
true
|
199
|
+
else
|
200
|
+
false
|
201
|
+
end
|
202
|
+
end
|
203
|
+
def check_application_parameters
|
204
|
+
if @required_params and (@required_params - @params.keys).empty?
|
205
|
+
@output_params = @params.clone
|
206
|
+
end
|
207
|
+
end
|
208
|
+
def set_user_parameters
|
209
|
+
# this should be done in an instance of applicaiton subclass
|
210
|
+
if @parameterset_tsv_file
|
211
|
+
parameterset_tsv = CSV.readlines(@parameterset_tsv_file, :col_sep=>"\t")
|
212
|
+
headers = []
|
213
|
+
parameterset_tsv.each do |row|
|
214
|
+
header, value = row
|
215
|
+
headers << header
|
216
|
+
@params[header] = if @params.data_type(header) == String
|
217
|
+
value
|
218
|
+
else
|
219
|
+
eval(value)
|
220
|
+
end
|
221
|
+
end
|
222
|
+
(@params.keys - headers).each do |key|
|
223
|
+
@params[key] = @params.default_value(key)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
@params
|
227
|
+
end
|
228
|
+
def set_dir_paths
|
229
|
+
## sushi figures out where to put the resulting dataset
|
230
|
+
unless @name and @project
|
231
|
+
raise "should set #name and #project"
|
232
|
+
end
|
233
|
+
@name.gsub!(/\s/,'_')
|
234
|
+
result_dir_base = if @next_dataset_name
|
235
|
+
[@next_dataset_name, Time.now.strftime("%Y-%m-%d--%H-%M-%S")].join("_")
|
236
|
+
else
|
237
|
+
[@analysis_category, @name, @dataset_sushi_id.to_s, Time.now.strftime("%Y-%m-%d--%H-%M-%S")].join("_")
|
238
|
+
end
|
239
|
+
@result_dir = File.join(@project, result_dir_base)
|
240
|
+
@scratch_result_dir = File.join(SCRATCH_DIR, result_dir_base)
|
241
|
+
@job_script_dir = File.join(@scratch_result_dir, 'scripts')
|
242
|
+
@gstore_result_dir = File.join(@gstore_dir, @result_dir)
|
243
|
+
@gstore_script_dir = File.join(@gstore_result_dir, 'scripts')
|
244
|
+
@gstore_project_dir = File.join(@gstore_dir, @project)
|
245
|
+
set_file_paths
|
246
|
+
end
|
247
|
+
def prepare_result_dir
|
248
|
+
FileUtils.mkdir_p(@scratch_result_dir)
|
249
|
+
FileUtils.mkdir_p(@job_script_dir)
|
250
|
+
end
|
251
|
+
def job_header
|
252
|
+
@scratch_dir = if @params['process_mode'] == 'SAMPLE'
|
253
|
+
@scratch_result_dir + "_" + @dataset['Name']
|
254
|
+
else
|
255
|
+
@scratch_result_dir
|
256
|
+
end
|
257
|
+
@out.print <<-EOF
|
258
|
+
#!/bin/bash
|
259
|
+
set -e
|
260
|
+
set -o pipefail
|
261
|
+
|
262
|
+
#### SET THE STAGE
|
263
|
+
SCRATCH_DIR=#{@scratch_dir}
|
264
|
+
GSTORE_DIR=#{@gstore_dir}
|
265
|
+
mkdir $SCRATCH_DIR || exit 1
|
266
|
+
cd $SCRATCH_DIR || exit 1
|
267
|
+
echo "Job runs on `hostname`"
|
268
|
+
echo "at $SCRATCH_DIR"
|
269
|
+
|
270
|
+
EOF
|
271
|
+
end
|
272
|
+
def job_footer
|
273
|
+
@out.print "#### JOB IS DONE WE PUT THINGS IN PLACE AND CLEAN AUP\n"
|
274
|
+
if @output_files
|
275
|
+
@output_files.map{|header| next_dataset[header]}.each do |file|
|
276
|
+
# in actual case, to save under /srv/gstore/
|
277
|
+
src_file = File.basename(file)
|
278
|
+
dest_dir = File.dirname(File.join(@gstore_dir, file))
|
279
|
+
@out.print copy_commands(src_file, dest_dir).join("\n"), "\n"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
@out.print <<-EOF
|
283
|
+
cd ~
|
284
|
+
rm -rf #{@scratch_dir} || exit 1
|
285
|
+
EOF
|
286
|
+
|
287
|
+
end
|
288
|
+
def job_main
|
289
|
+
@out.print "#### NOW THE ACTUAL JOBS STARTS\n"
|
290
|
+
@out.print commands, "\n\n"
|
291
|
+
end
|
292
|
+
def next_dataset
|
293
|
+
# this should be overwritten in a subclass
|
294
|
+
end
|
295
|
+
def commands
|
296
|
+
# this should be overwritten in a subclass
|
297
|
+
end
|
298
|
+
def submit_command(job_script)
|
299
|
+
gsub_options = []
|
300
|
+
gsub_options << "-c #{@params['cores']}" unless @params['cores'].to_s.empty?
|
301
|
+
gsub_options << "-n #{@params['node']}" unless @params['node'].to_s.empty?
|
302
|
+
gsub_options << "-r #{@params['ram']}" unless @params['ram'].to_s.empty?
|
303
|
+
gsub_options << "-s #{@params['scratch']}" unless @params['scratch'].to_s.empty?
|
304
|
+
gsub_options << "-u #{@user}" if @user
|
305
|
+
command = "wfm_monitoring --server #{WORKFLOW_MANAGER} --project #{@project.gsub(/p/,'')} --logdir #{@gstore_script_dir} #{job_script} #{gsub_options.join(' ')}"
|
306
|
+
end
|
307
|
+
def submit(job_script)
|
308
|
+
command = submit_command(job_script)
|
309
|
+
puts "submit: #{command}"
|
310
|
+
job_id = `#{command}`
|
311
|
+
job_id = job_id.to_i
|
312
|
+
unless job_id.to_i > 1
|
313
|
+
raise 'failed in job submitting'
|
314
|
+
end
|
315
|
+
job_id
|
316
|
+
end
|
317
|
+
def preprocess
|
318
|
+
# this should be overwritten in a subclass
|
319
|
+
end
|
320
|
+
def set_file_paths
|
321
|
+
@parameter_file = 'parameters.tsv'
|
322
|
+
@input_dataset_file = 'input_dataset.tsv'
|
323
|
+
@next_dataset_file = 'dataset.tsv'
|
324
|
+
@input_dataset_tsv_path = File.join(@gstore_result_dir, @input_dataset_file)
|
325
|
+
@parameters_tsv_path = File.join(@gstore_result_dir, @input_dataset_file)
|
326
|
+
@next_dataset_tsv_path = File.join(@gstore_result_dir, @next_dataset_file)
|
327
|
+
end
|
328
|
+
def save_parameters_as_tsv
|
329
|
+
file_path = File.join(@scratch_result_dir, @parameter_file)
|
330
|
+
CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
|
331
|
+
@output_params.each do |key, value|
|
332
|
+
out << [key, value]
|
333
|
+
end
|
334
|
+
end
|
335
|
+
file_path
|
336
|
+
end
|
337
|
+
def save_input_dataset_as_tsv
|
338
|
+
file_path = File.join(@scratch_result_dir, @input_dataset_file)
|
339
|
+
CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
|
340
|
+
headers = @dataset_hash.map{|row| row.keys}.flatten.uniq
|
341
|
+
out << headers
|
342
|
+
@dataset_hash.each do |row|
|
343
|
+
out << headers.map{|header| row[header]}
|
344
|
+
end
|
345
|
+
end
|
346
|
+
file_path
|
347
|
+
end
|
348
|
+
def save_next_dataset_as_tsv
|
349
|
+
headers = @result_dataset.map{|row| row.keys}.flatten.uniq
|
350
|
+
file_path = File.join(@scratch_result_dir, @next_dataset_file)
|
351
|
+
CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
|
352
|
+
out << headers
|
353
|
+
@result_dataset.each do |row_hash|
|
354
|
+
out << headers.map{|header| row_hash[header]}
|
355
|
+
end
|
356
|
+
end
|
357
|
+
file_path
|
358
|
+
end
|
359
|
+
def copy_commands(org_dir, dest_parent_dir)
|
360
|
+
@workflow_manager.copy_commands(org_dir, dest_parent_dir)
|
361
|
+
end
|
362
|
+
def copy_dataset_parameter_jobscripts
|
363
|
+
org = @scratch_result_dir
|
364
|
+
dest = @gstore_project_dir
|
365
|
+
copy_commands(org, dest).each do |command|
|
366
|
+
puts command
|
367
|
+
unless system command
|
368
|
+
raise "fails in copying next_dataset files from /scratch to /gstore"
|
369
|
+
end
|
370
|
+
end
|
371
|
+
sleep 1
|
372
|
+
command = "rm -rf #{@scratch_result_dir}"
|
373
|
+
`#{command}`
|
374
|
+
end
|
375
|
+
def make_job_script
|
376
|
+
@out = open(@job_script, 'w')
|
377
|
+
job_header
|
378
|
+
job_main
|
379
|
+
job_footer
|
380
|
+
@out.close
|
381
|
+
end
|
382
|
+
def sample_mode
|
383
|
+
@dataset_hash.each do |row|
|
384
|
+
@dataset = Hash[*row.map{|key,value| [key.gsub(/\[.+\]/,'').strip, value]}.flatten]
|
385
|
+
## WRITE THE JOB SCRIPT
|
386
|
+
sample_name = @dataset['Name']||@dataset.first
|
387
|
+
@job_script = if @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
|
388
|
+
File.join(@job_script_dir, @analysis_category + '_' + sample_name) + '_' + dataset.name.gsub(/\s+/,'_') + '.sh'
|
389
|
+
else
|
390
|
+
File.join(@job_script_dir, @analysis_category + '_' + sample_name) + '.sh'
|
391
|
+
end
|
392
|
+
make_job_script
|
393
|
+
@job_scripts << @job_script
|
394
|
+
@result_dataset << next_dataset
|
395
|
+
end
|
396
|
+
end
|
397
|
+
def dataset_mode
|
398
|
+
@job_script = if @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
|
399
|
+
File.join(@job_script_dir, @analysis_category + '_' + dataset.name.gsub(/\s+/,'_') + '.sh')
|
400
|
+
else
|
401
|
+
File.join(@job_script_dir, @analysis_category + '_' + 'job_script.sh')
|
402
|
+
end
|
403
|
+
make_job_script
|
404
|
+
@job_scripts << @job_script
|
405
|
+
@result_dataset << next_dataset
|
406
|
+
end
|
407
|
+
def save_data_set(data_set_arr, headers, rows)
|
408
|
+
data_set_hash = Hash[*data_set_arr]
|
409
|
+
if project = Project.find_by_number(data_set_hash['ProjectNumber'].to_i)
|
410
|
+
data_set = DataSet.new
|
411
|
+
data_set.name = data_set_hash['DataSetName']
|
412
|
+
data_set.project = project
|
413
|
+
if parent_id = data_set_hash['ParentID'] and parent_data_set = DataSet.find_by_id(parent_id.to_i)
|
414
|
+
data_set.data_set = parent_data_set
|
415
|
+
end
|
416
|
+
if comment = data_set_hash['Comment'] and !comment.to_s.empty?
|
417
|
+
data_set.comment = comment
|
418
|
+
end
|
419
|
+
|
420
|
+
sample_hash = {}
|
421
|
+
rows.each do |row|
|
422
|
+
headers.each_with_index do |header, i|
|
423
|
+
sample_hash[header]=row[i]
|
424
|
+
end
|
425
|
+
sample = Sample.new
|
426
|
+
sample.key_value = sample_hash.to_s
|
427
|
+
sample.save unless sample.saved?
|
428
|
+
data_set.samples << sample
|
429
|
+
end
|
430
|
+
|
431
|
+
data_set.md5 = data_set.md5hexdigest
|
432
|
+
unless data_set.saved?
|
433
|
+
project.data_sets << data_set
|
434
|
+
parent_data_set.data_sets << data_set if parent_data_set
|
435
|
+
data_set.save
|
436
|
+
end
|
437
|
+
data_set.id
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
def run
|
442
|
+
test_run
|
443
|
+
|
444
|
+
## the user presses RUN
|
445
|
+
prepare_result_dir
|
446
|
+
|
447
|
+
## copy application data to gstore
|
448
|
+
save_parameters_as_tsv
|
449
|
+
save_input_dataset_as_tsv
|
450
|
+
|
451
|
+
|
452
|
+
## sushi writes creates the job scripts and builds the result data set that is to be generated
|
453
|
+
@result_dataset = []
|
454
|
+
@job_scripts = []
|
455
|
+
if @params['process_mode'] == 'SAMPLE'
|
456
|
+
sample_mode
|
457
|
+
elsif @params['process_mode'] == 'DATASET'
|
458
|
+
dataset_mode
|
459
|
+
else
|
460
|
+
#stop
|
461
|
+
warn "the process mode (#{@params['process_mode']}) is not defined"
|
462
|
+
raise "stop job submitting"
|
463
|
+
end
|
464
|
+
|
465
|
+
# job submittion
|
466
|
+
@job_scripts.each_with_index do |job_script, i|
|
467
|
+
job_id = submit(job_script)
|
468
|
+
@job_ids << job_id
|
469
|
+
print "Submit job #{File.basename(job_script)} job_id=#{job_id}"
|
470
|
+
end
|
471
|
+
|
472
|
+
puts
|
473
|
+
print 'job scripts: '
|
474
|
+
p @job_scripts
|
475
|
+
print 'result dataset: '
|
476
|
+
p @result_dataset
|
477
|
+
|
478
|
+
# copy application data to gstore
|
479
|
+
next_dataset_tsv_path = save_next_dataset_as_tsv
|
480
|
+
|
481
|
+
if !@job_ids.empty? and @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
|
482
|
+
data_set_arr = []
|
483
|
+
headers = []
|
484
|
+
rows = []
|
485
|
+
next_dataset_name = if name = @next_dataset_name
|
486
|
+
name.to_s
|
487
|
+
else
|
488
|
+
"#{@analysis_category}_#{@name.gsub(/\s/,'').gsub(/_/,'')}_#{dataset.id}"
|
489
|
+
end
|
490
|
+
data_set_arr = {'DataSetName'=>next_dataset_name, 'ProjectNumber'=>@project.gsub(/p/,''), 'ParentID'=>@dataset_sushi_id, 'Comment'=>@next_dataset_comment.to_s}
|
491
|
+
csv = CSV.readlines(next_dataset_tsv_path, :col_sep=>"\t")
|
492
|
+
csv.each do |row|
|
493
|
+
if headers.empty?
|
494
|
+
headers = row
|
495
|
+
else
|
496
|
+
rows << row
|
497
|
+
end
|
498
|
+
end
|
499
|
+
unless NO_ROR
|
500
|
+
@next_dataset_id = save_data_set(data_set_arr.to_a.flatten, headers, rows)
|
501
|
+
end
|
502
|
+
end
|
503
|
+
Thread.new do
|
504
|
+
copy_dataset_parameter_jobscripts
|
505
|
+
end
|
506
|
+
end
|
507
|
+
def test_run
|
508
|
+
set_dir_paths
|
509
|
+
set_input_dataset
|
510
|
+
preprocess
|
511
|
+
set_output_files
|
512
|
+
set_user_parameters
|
513
|
+
|
514
|
+
failures = 0
|
515
|
+
print 'check project name: '
|
516
|
+
unless @project
|
517
|
+
puts "\e[31mFAILURE\e[0m: project number is required but not found. you should set it in usecase."
|
518
|
+
puts "\tex.)"
|
519
|
+
puts "\tapp = #{self.class}.new"
|
520
|
+
puts "\tapp.project = 'p1001'"
|
521
|
+
failures += 1
|
522
|
+
else
|
523
|
+
puts "\e[32mPASSED\e[0m:\n\t@project=#{@project}"
|
524
|
+
end
|
525
|
+
|
526
|
+
print 'check user name: '
|
527
|
+
unless @user
|
528
|
+
puts "\e[31mWARNING\e[0m: user number is ought to be added but not found. you should set it in usecase. Default will be 'sushi lover'"
|
529
|
+
puts "\tex.)"
|
530
|
+
puts "\tapp = #{self.class}.new"
|
531
|
+
puts "\tapp.user = 'masa'"
|
532
|
+
else
|
533
|
+
puts "\e[32mPASSED\e[0m:\n\t@user=#{@user}"
|
534
|
+
end
|
535
|
+
|
536
|
+
print 'check application name: '
|
537
|
+
if @name.to_s.empty?
|
538
|
+
puts "\e[31mFAILURE\e[0m: application name is required but not found. you should set it in application class."
|
539
|
+
puts "\tex.)"
|
540
|
+
puts "\tclass #{self.class}"
|
541
|
+
puts "\t def initialize"
|
542
|
+
puts "\t @name = '#{self.class}'"
|
543
|
+
puts "\t end"
|
544
|
+
puts "\tend"
|
545
|
+
failures += 1
|
546
|
+
else
|
547
|
+
puts "\e[32mPASSED\e[0m:\n\t@name=#{@name}"
|
548
|
+
end
|
549
|
+
|
550
|
+
print 'check analysis_category: '
|
551
|
+
if @analysis_category.to_s.empty?
|
552
|
+
puts "\e[31mFAILURE\e[0m: analysis_category is required but not found. you should set it in application class."
|
553
|
+
puts "\tex.)"
|
554
|
+
puts "\tclass #{self.class}"
|
555
|
+
puts "\t def initialize"
|
556
|
+
puts "\t @analysis_category = 'Mapping'"
|
557
|
+
puts "\t end"
|
558
|
+
puts "\tend"
|
559
|
+
failures += 1
|
560
|
+
else
|
561
|
+
puts "\e[32mPASSED\e[0m:\n\t@analysis_category=#{@analysis_category}"
|
562
|
+
end
|
563
|
+
|
564
|
+
print 'check dataset: '
|
565
|
+
if !@dataset_hash or @dataset_hash.empty?
|
566
|
+
puts "\e[31mFAILURE\e[0m: dataset is not found. you should set it by using #{self.class}#dataset_sushi_id or #{self.class}#dataset_tsv_file properties"
|
567
|
+
puts "\tex.)"
|
568
|
+
puts "\tusecase = #{self.class}.new"
|
569
|
+
puts "\tusecase.dataset_tsv_file = \"dataset.tsv\""
|
570
|
+
failures += 1
|
571
|
+
else
|
572
|
+
puts "\e[32mPASSED\e[0m:\n\t@dataset_hash.length = #{@dataset_hash.length}"
|
573
|
+
end
|
574
|
+
|
575
|
+
print 'check required columns: '
|
576
|
+
unless check_required_columns
|
577
|
+
puts "\e[31mFAILURE\e[0m: required_column(s) is not found in dataset. you should set it in application class."
|
578
|
+
puts "\tex.)"
|
579
|
+
puts "\tdef initialize"
|
580
|
+
puts "\t @required_columns = ['Name', 'Read1']"
|
581
|
+
puts
|
582
|
+
failures += 1
|
583
|
+
else
|
584
|
+
puts "\e[32mPASSED\e[0m:"
|
585
|
+
end
|
586
|
+
puts "\trequired columns: #{@required_columns}"
|
587
|
+
puts "\tdataset columns: #{@dataset_hash.map{|row| row.keys}.flatten.uniq}" if @dataset_hash
|
588
|
+
|
589
|
+
print 'check required parameters: '
|
590
|
+
unless check_application_parameters
|
591
|
+
puts "\e[31mFAILURE\e[0m: required_param(s) is not set yet. you should set it in usecase"
|
592
|
+
puts "\tmissing params: #{@required_params-@params.keys}" if @required_params
|
593
|
+
puts "\tex.)"
|
594
|
+
puts "\tusecase = #{self.class}.new"
|
595
|
+
if @required_params
|
596
|
+
puts "\tusecase.params['#{(@required_params-@params.keys)[0]}'] = parameter"
|
597
|
+
else
|
598
|
+
puts "\tusecase.params['parameter name'] = default_parameter"
|
599
|
+
end
|
600
|
+
puts
|
601
|
+
failures += 1
|
602
|
+
else
|
603
|
+
puts "\e[32mPASSED\e[0m:"
|
604
|
+
end
|
605
|
+
puts "\tparameters: #{@params.keys}"
|
606
|
+
puts "\trequired : #{@required_params}"
|
607
|
+
|
608
|
+
print 'check next dataset: '
|
609
|
+
@dataset={}
|
610
|
+
unless self.next_dataset
|
611
|
+
puts "\e[31mFAILURE\e[0m: next dataset is not set yet. you should overwrite SushiApp#next_dataset method in #{self.class}"
|
612
|
+
puts "\tnote: the return value should be Hash (key: column title, value: value in a tsv table)"
|
613
|
+
failures += 1
|
614
|
+
else
|
615
|
+
puts "\e[32mPASSED\e[0m:"
|
616
|
+
end
|
617
|
+
|
618
|
+
print 'check output files: '
|
619
|
+
if !@output_files or @output_files.empty?
|
620
|
+
puts "\e[31mWARNING\e[0m: no output files. you will not get any output files after the job running. you can set @output_files (array) in #{self.class}"
|
621
|
+
puts "\tnote: usually it should be define in initialize method"
|
622
|
+
puts "\t the elements of @output_files should be chosen from #{self.class}#next_dataset.keys"
|
623
|
+
puts "\t #{self.class}#next_dataset.keys: #{self.next_dataset.keys}" if self.next_dataset
|
624
|
+
else
|
625
|
+
puts "\e[32mPASSED\e[0m:"
|
626
|
+
end
|
627
|
+
|
628
|
+
print 'check commands: '
|
629
|
+
if @params['process_mode'] == 'SAMPLE'
|
630
|
+
@dataset_hash.each do |row|
|
631
|
+
@dataset = Hash[*row.map{|key,value| [key.gsub(/\[.+\]/,'').strip, value]}.flatten]
|
632
|
+
unless com = commands
|
633
|
+
puts "\e[31mFAILURE\e[0m: any commands is not defined yet. you should overwrite SushiApp#commands method in #{self.class}"
|
634
|
+
puts "\tnote: the return value should be String (this will be in the main body of submitted job script)"
|
635
|
+
failures += 1
|
636
|
+
else
|
637
|
+
puts "\e[32mPASSED\e[0m:"
|
638
|
+
puts "generated command will be:"
|
639
|
+
puts "\t"+com.split(/\n/).join("\n\t")+"\n"
|
640
|
+
end
|
641
|
+
end
|
642
|
+
elsif @params['process_mode'] == 'DATASET'
|
643
|
+
unless com = commands
|
644
|
+
puts "\e[31mFAILURE\e[0m: any commands is not defined yet. you should overwrite SushiApp#commands method in #{self.class}"
|
645
|
+
puts "\tnote: the return value should be String (this will be in the main body of submitted job script)"
|
646
|
+
failures += 1
|
647
|
+
else
|
648
|
+
puts "\e[32mPASSED\e[0m:"
|
649
|
+
puts "generated command will be:"
|
650
|
+
puts "\t"+com.split(/\n/).join("\n\t")+"\n"
|
651
|
+
end
|
652
|
+
end
|
653
|
+
|
654
|
+
print 'check workflow manager: '
|
655
|
+
begin
|
656
|
+
hello = `wfm_hello #{WORKFLOW_MANAGER}`
|
657
|
+
rescue
|
658
|
+
end
|
659
|
+
unless hello =~ /hello/
|
660
|
+
puts "\e[31mFAILURE\e[0m: workflow_manager does not reply. check if workflow_manager is working"
|
661
|
+
failures += 1
|
662
|
+
else
|
663
|
+
puts "\e[32mPASSED\e[0m:"
|
664
|
+
end
|
665
|
+
|
666
|
+
if failures > 0
|
667
|
+
puts
|
668
|
+
puts "\e[31mFailures (#{failures})\e[0m: All failures should be solved"
|
669
|
+
raise "test run fails"
|
670
|
+
else
|
671
|
+
puts "All checks \e[32mPASSED\e[0m"
|
672
|
+
end
|
673
|
+
end
|
674
|
+
end
|
675
|
+
|
676
|
+
|
677
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: utf-8
|
3
|
+
|
4
|
+
require 'sushi_fabric'
|
5
|
+
|
6
|
+
class WordCountApp < SushiFabric::SushiApp
|
7
|
+
def initialize
|
8
|
+
super
|
9
|
+
@name = 'Word_Count'
|
10
|
+
@analysis_category = 'Stats'
|
11
|
+
@required_columns = ['Name', 'Read1']
|
12
|
+
@required_params = []
|
13
|
+
end
|
14
|
+
def next_dataset
|
15
|
+
{'Name'=>@dataset['Name'],'Stats [File]'=>File.join(@result_dir, @dataset['Name'].to_s + '.stats')}
|
16
|
+
end
|
17
|
+
def preprocess
|
18
|
+
@factors = get_columns_with_tag 'Factor'
|
19
|
+
@factor_cols = @factors.first.keys
|
20
|
+
end
|
21
|
+
def commands
|
22
|
+
commands = ''
|
23
|
+
commands << "gunzip -c $GSTORE_DIR/#{@dataset['Read1']} |wc > #{@dataset['Name']}.stats\n"
|
24
|
+
commands << "echo 'Factor columns: [#{@factor_cols.join(',')}]'\n"
|
25
|
+
commands << "echo 'Factors: [#{@factors.join(',')}]'\n"
|
26
|
+
commands
|
27
|
+
end
|
28
|
+
end
|
29
|
+
if __FILE__ == $0
|
30
|
+
usecase = WordCountApp.new
|
31
|
+
|
32
|
+
usecase.project = "p1001"
|
33
|
+
usecase.user = 'sushi_lover'
|
34
|
+
usecase.parameterset_tsv_file = 'sample_parameterset.tsv'
|
35
|
+
usecase.dataset_tsv_file = 'sample_dataset.tsv'
|
36
|
+
#usecase.dataset_sushi_id = 26
|
37
|
+
|
38
|
+
# run (submit to workflow_manager)
|
39
|
+
usecase.run
|
40
|
+
#usecase.test_run
|
41
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'sushi_fabric/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "sushi_fabric"
|
8
|
+
spec.version = SushiFabric::VERSION
|
9
|
+
spec.authors = ["Functional Genomics Center Zurich"]
|
10
|
+
spec.email = ["masaomi.hatakeyama@fgcz.uzh.ch"]
|
11
|
+
spec.description = %q{This library provides us with the methods to submit a job cooperating with workflow manager.}
|
12
|
+
spec.summary = %q{workflow manager client.}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
#spec.files = `git ls-files`.split($/)
|
17
|
+
spec.files = `bzr ls --versioned --recursive`.split($/).select{|file| !File.directory?(file)}
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake"
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sushi_fabric
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Functional Genomics Center Zurich
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-11-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.3'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.3'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: This library provides us with the methods to submit a job cooperating
|
47
|
+
with workflow manager.
|
48
|
+
email:
|
49
|
+
- masaomi.hatakeyama@fgcz.uzh.ch
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .bzrignore
|
55
|
+
- Gemfile
|
56
|
+
- LICENSE.txt
|
57
|
+
- README.md
|
58
|
+
- Rakefile
|
59
|
+
- lib/sushi_fabric/sushiApp.rb
|
60
|
+
- lib/sushi_fabric/sushi_configure.yml
|
61
|
+
- lib/sushi_fabric/version.rb
|
62
|
+
- lib/sushi_fabric.rb
|
63
|
+
- sample/WordCountApp.rb
|
64
|
+
- sample/sample_dataset.tsv
|
65
|
+
- sample/sample_parameterset.tsv
|
66
|
+
- sushi_fabric.gemspec
|
67
|
+
homepage: ''
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.8.24
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: workflow manager client.
|
92
|
+
test_files: []
|