sushi_fabric 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.bzrignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sushi_fabric.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Functional Genomics Center Zurich
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # SushiFabric
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'sushi_fabric'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install sushi_fabric
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,3 @@
1
+ require "sushi_fabric/version"
2
+
3
+ require 'sushi_fabric/sushiApp'
@@ -0,0 +1,677 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+ Version = '20131107-104530'
4
+
5
+ require 'csv'
6
+ require 'fileutils'
7
+ require 'active_record'
8
+ require 'yaml'
9
+ require 'drb/drb'
10
+
11
+ module SushiFabric
12
+ CONFIG = 'sushi_configure.yml'
13
+ current_dir = File.dirname(File.expand_path(__FILE__))
14
+ config_yml = File.join(current_dir, CONFIG)
15
+ config = if File.exist?(config_yml)
16
+ YAML.load(File.read(config_yml))
17
+ else
18
+ {}
19
+ end
20
+ WORKFLOW_MANAGER = config[:workflow_manager]||'druby://localhost:3000'
21
+ GSTORE_DIR = config[:gstore_dir]||'gstore'
22
+ #sushi_app_dir = File.expand_path('../..', __FILE__)
23
+ sushi_app_dir = Dir.pwd
24
+ SUSHI_APP_DIR = config[:sushi_app_dir]||sushi_app_dir
25
+ SCRATCH_DIR = config[:scratch_dir]||'/tmp/scratch'
26
+ no_ror = nil
27
+ begin
28
+ ::Project
29
+ no_ror = false
30
+ rescue
31
+ if File.exist?(File.join(SUSHI_APP_DIR, "app/models"))
32
+ ActiveRecord::Base.establish_connection(
33
+ :adapter => 'sqlite3',
34
+ :database => "#{SUSHI_APP_DIR}/db/development.sqlite3"
35
+ )
36
+ require "#{SUSHI_APP_DIR}/app/models/project"
37
+ require "#{SUSHI_APP_DIR}/app/models/data_set"
38
+ require "#{SUSHI_APP_DIR}/app/models/sample"
39
+ no_ror = false
40
+ else
41
+ no_ror = true
42
+ end
43
+ end
44
+ NO_ROR = no_ror
45
+
46
+ =begin
47
+ def save_data_set(data_set_arr, headers, rows)
48
+ data_set_hash = Hash[*data_set_arr]
49
+ if project = Project.find_by_number(data_set_hash['ProjectNumber'].to_i)
50
+ data_set = DataSet.new
51
+ data_set.name = data_set_hash['DataSetName']
52
+ data_set.project = project
53
+ if parent_id = data_set_hash['ParentID'] and parent_data_set = DataSet.find_by_id(parent_id.to_i)
54
+ data_set.data_set = parent_data_set
55
+ end
56
+ if comment = data_set_hash['Comment'] and !comment.to_s.empty?
57
+ data_set.comment = comment
58
+ end
59
+
60
+ sample_hash = {}
61
+ rows.each do |row|
62
+ headers.each_with_index do |header, i|
63
+ sample_hash[header]=row[i]
64
+ end
65
+ sample = Sample.new
66
+ sample.key_value = sample_hash.to_s
67
+ sample.save unless sample.saved?
68
+ data_set.samples << sample
69
+ end
70
+
71
+ data_set.md5 = data_set.md5hexdigest
72
+ unless data_set.saved?
73
+ project.data_sets << data_set
74
+ parent_data_set.data_sets << data_set if parent_data_set
75
+ data_set.save
76
+ end
77
+ data_set.id
78
+ end
79
+ end
80
+ =end
81
+
82
+ class ::Hash
83
+ attr_reader :defaults
84
+ alias :set :[]=
85
+ alias :get :[]
86
+ def []=(k1,k2,v=nil)
87
+ if v
88
+ @desc ||= {}
89
+ @desc.set([k1,k2].join('_'),v)
90
+ else
91
+ @defaults ||= {}
92
+ if !@defaults[k1] and k2
93
+ if k2.instance_of?(Array)
94
+ @defaults.set(k1,k2.first)
95
+ elsif k2.instance_of?(Hash) and k2.first
96
+ @defaults.set(k1,k2.first.last)
97
+ else
98
+ @defaults.set(k1,k2)
99
+ end
100
+ end
101
+ set(k1,k2)
102
+ end
103
+ end
104
+ def default_value(k,v=nil)
105
+ if v
106
+ @defaults[k] = v
107
+ else
108
+ @defaults[k]
109
+ end
110
+ end
111
+ def data_type(k)
112
+ @defaults[k].class
113
+ end
114
+ def data_types
115
+ Hash[@defaults.map{|k,v| [k, v.class]}]
116
+ end
117
+ def [](k1, k2=nil)
118
+ if k2
119
+ if @desc
120
+ @desc.get([k1,k2].join('_'))
121
+ else
122
+ nil
123
+ end
124
+ else
125
+ get(k1)
126
+ end
127
+ end
128
+ end
129
+ class ::String
130
+ def tag?(tag)
131
+ scan(/\[(.*)\]/).flatten.join =~ /#{tag}/
132
+ end
133
+ end
134
+ class SushiApp
135
+ attr_reader :params
136
+ attr_reader :job_ids
137
+ attr_reader :next_dataset_id
138
+ attr_reader :required_columns
139
+ attr_reader :required_params
140
+ attr_reader :dataset_hash
141
+ attr_reader :analysis_category
142
+ attr_reader :description
143
+ attr_reader :name
144
+ attr_accessor :dataset_tsv_file
145
+ attr_accessor :parameterset_tsv_file
146
+ attr_accessor :dataset_sushi_id
147
+ attr_accessor :project
148
+ attr_accessor :user
149
+ attr_accessor :next_dataset_name
150
+ attr_accessor :next_dataset_comment
151
+ def initialize
152
+ @gstore_dir = GSTORE_DIR
153
+ @project = nil
154
+ @name = nil
155
+ @params = {}
156
+ @params['cores'] = nil
157
+ @params['ram'] = nil
158
+ @params['scratch'] = nil
159
+ @params['node'] = ''
160
+ @params['process_mode'] = 'SAMPLE'
161
+ @job_ids = []
162
+ @required_columns = []
163
+ @workflow_manager = DRbObject.new_with_uri(WORKFLOW_MANAGER)
164
+ end
165
+ def set_input_dataset
166
+ if @dataset_tsv_file
167
+ dataset_tsv = CSV.readlines(@dataset_tsv_file, {:headers=>true, :col_sep=>"\t"})
168
+ @dataset_hash = []
169
+ dataset_tsv.each do |row|
170
+ @dataset_hash << row.to_hash
171
+ end
172
+ elsif @dataset_sushi_id
173
+ @dataset_hash = []
174
+ if dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
175
+ dataset.samples.each do |sample|
176
+ @dataset_hash << sample.to_hash
177
+ end
178
+ end
179
+ end
180
+ @dataset_hash
181
+ end
182
+ def get_columns_with_tag(tag)
183
+ #@factor_cols = @dataset_hash.first.keys.select{|header| header =~ /\[#{tag}\]/}.map{|header| header.gsub(/\[.+\]/,'').strip}
184
+ @dataset_hash.map{|row|
185
+ Hash[*row.select{|k,v| k=~/\[#{tag}\]/}.map{|k,v| [k.gsub(/\[.+\]/,'').strip,v]}.flatten]
186
+ }
187
+ end
188
+ def set_output_files
189
+ @dataset = {}
190
+ next_dataset.keys.select{|header| header.tag?('File')}.each do |header|
191
+ @output_files ||= []
192
+ @output_files << header
193
+ end
194
+ @output_files = @output_files.uniq
195
+ end
196
+ def check_required_columns
197
+ if @dataset_hash and @required_columns and (@required_columns-@dataset_hash.map{|row| row.keys}.flatten.uniq.map{|colname| colname.gsub(/\[.+\]/,'').strip}).empty?
198
+ true
199
+ else
200
+ false
201
+ end
202
+ end
203
+ def check_application_parameters
204
+ if @required_params and (@required_params - @params.keys).empty?
205
+ @output_params = @params.clone
206
+ end
207
+ end
208
+ def set_user_parameters
209
+ # this should be done in an instance of applicaiton subclass
210
+ if @parameterset_tsv_file
211
+ parameterset_tsv = CSV.readlines(@parameterset_tsv_file, :col_sep=>"\t")
212
+ headers = []
213
+ parameterset_tsv.each do |row|
214
+ header, value = row
215
+ headers << header
216
+ @params[header] = if @params.data_type(header) == String
217
+ value
218
+ else
219
+ eval(value)
220
+ end
221
+ end
222
+ (@params.keys - headers).each do |key|
223
+ @params[key] = @params.default_value(key)
224
+ end
225
+ end
226
+ @params
227
+ end
228
+ def set_dir_paths
229
+ ## sushi figures out where to put the resulting dataset
230
+ unless @name and @project
231
+ raise "should set #name and #project"
232
+ end
233
+ @name.gsub!(/\s/,'_')
234
+ result_dir_base = if @next_dataset_name
235
+ [@next_dataset_name, Time.now.strftime("%Y-%m-%d--%H-%M-%S")].join("_")
236
+ else
237
+ [@analysis_category, @name, @dataset_sushi_id.to_s, Time.now.strftime("%Y-%m-%d--%H-%M-%S")].join("_")
238
+ end
239
+ @result_dir = File.join(@project, result_dir_base)
240
+ @scratch_result_dir = File.join(SCRATCH_DIR, result_dir_base)
241
+ @job_script_dir = File.join(@scratch_result_dir, 'scripts')
242
+ @gstore_result_dir = File.join(@gstore_dir, @result_dir)
243
+ @gstore_script_dir = File.join(@gstore_result_dir, 'scripts')
244
+ @gstore_project_dir = File.join(@gstore_dir, @project)
245
+ set_file_paths
246
+ end
247
+ def prepare_result_dir
248
+ FileUtils.mkdir_p(@scratch_result_dir)
249
+ FileUtils.mkdir_p(@job_script_dir)
250
+ end
251
+ def job_header
252
+ @scratch_dir = if @params['process_mode'] == 'SAMPLE'
253
+ @scratch_result_dir + "_" + @dataset['Name']
254
+ else
255
+ @scratch_result_dir
256
+ end
257
+ @out.print <<-EOF
258
+ #!/bin/bash
259
+ set -e
260
+ set -o pipefail
261
+
262
+ #### SET THE STAGE
263
+ SCRATCH_DIR=#{@scratch_dir}
264
+ GSTORE_DIR=#{@gstore_dir}
265
+ mkdir $SCRATCH_DIR || exit 1
266
+ cd $SCRATCH_DIR || exit 1
267
+ echo "Job runs on `hostname`"
268
+ echo "at $SCRATCH_DIR"
269
+
270
+ EOF
271
+ end
272
+ def job_footer
273
+ @out.print "#### JOB IS DONE WE PUT THINGS IN PLACE AND CLEAN AUP\n"
274
+ if @output_files
275
+ @output_files.map{|header| next_dataset[header]}.each do |file|
276
+ # in actual case, to save under /srv/gstore/
277
+ src_file = File.basename(file)
278
+ dest_dir = File.dirname(File.join(@gstore_dir, file))
279
+ @out.print copy_commands(src_file, dest_dir).join("\n"), "\n"
280
+ end
281
+ end
282
+ @out.print <<-EOF
283
+ cd ~
284
+ rm -rf #{@scratch_dir} || exit 1
285
+ EOF
286
+
287
+ end
288
+ def job_main
289
+ @out.print "#### NOW THE ACTUAL JOBS STARTS\n"
290
+ @out.print commands, "\n\n"
291
+ end
292
+ def next_dataset
293
+ # this should be overwritten in a subclass
294
+ end
295
+ def commands
296
+ # this should be overwritten in a subclass
297
+ end
298
+ def submit_command(job_script)
299
+ gsub_options = []
300
+ gsub_options << "-c #{@params['cores']}" unless @params['cores'].to_s.empty?
301
+ gsub_options << "-n #{@params['node']}" unless @params['node'].to_s.empty?
302
+ gsub_options << "-r #{@params['ram']}" unless @params['ram'].to_s.empty?
303
+ gsub_options << "-s #{@params['scratch']}" unless @params['scratch'].to_s.empty?
304
+ gsub_options << "-u #{@user}" if @user
305
+ command = "wfm_monitoring --server #{WORKFLOW_MANAGER} --project #{@project.gsub(/p/,'')} --logdir #{@gstore_script_dir} #{job_script} #{gsub_options.join(' ')}"
306
+ end
307
+ def submit(job_script)
308
+ command = submit_command(job_script)
309
+ puts "submit: #{command}"
310
+ job_id = `#{command}`
311
+ job_id = job_id.to_i
312
+ unless job_id.to_i > 1
313
+ raise 'failed in job submitting'
314
+ end
315
+ job_id
316
+ end
317
+ def preprocess
318
+ # this should be overwritten in a subclass
319
+ end
320
+ def set_file_paths
321
+ @parameter_file = 'parameters.tsv'
322
+ @input_dataset_file = 'input_dataset.tsv'
323
+ @next_dataset_file = 'dataset.tsv'
324
+ @input_dataset_tsv_path = File.join(@gstore_result_dir, @input_dataset_file)
325
+ @parameters_tsv_path = File.join(@gstore_result_dir, @input_dataset_file)
326
+ @next_dataset_tsv_path = File.join(@gstore_result_dir, @next_dataset_file)
327
+ end
328
+ def save_parameters_as_tsv
329
+ file_path = File.join(@scratch_result_dir, @parameter_file)
330
+ CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
331
+ @output_params.each do |key, value|
332
+ out << [key, value]
333
+ end
334
+ end
335
+ file_path
336
+ end
337
+ def save_input_dataset_as_tsv
338
+ file_path = File.join(@scratch_result_dir, @input_dataset_file)
339
+ CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
340
+ headers = @dataset_hash.map{|row| row.keys}.flatten.uniq
341
+ out << headers
342
+ @dataset_hash.each do |row|
343
+ out << headers.map{|header| row[header]}
344
+ end
345
+ end
346
+ file_path
347
+ end
348
+ def save_next_dataset_as_tsv
349
+ headers = @result_dataset.map{|row| row.keys}.flatten.uniq
350
+ file_path = File.join(@scratch_result_dir, @next_dataset_file)
351
+ CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
352
+ out << headers
353
+ @result_dataset.each do |row_hash|
354
+ out << headers.map{|header| row_hash[header]}
355
+ end
356
+ end
357
+ file_path
358
+ end
359
+ def copy_commands(org_dir, dest_parent_dir)
360
+ @workflow_manager.copy_commands(org_dir, dest_parent_dir)
361
+ end
362
+ def copy_dataset_parameter_jobscripts
363
+ org = @scratch_result_dir
364
+ dest = @gstore_project_dir
365
+ copy_commands(org, dest).each do |command|
366
+ puts command
367
+ unless system command
368
+ raise "fails in copying next_dataset files from /scratch to /gstore"
369
+ end
370
+ end
371
+ sleep 1
372
+ command = "rm -rf #{@scratch_result_dir}"
373
+ `#{command}`
374
+ end
375
+ def make_job_script
376
+ @out = open(@job_script, 'w')
377
+ job_header
378
+ job_main
379
+ job_footer
380
+ @out.close
381
+ end
382
+ def sample_mode
383
+ @dataset_hash.each do |row|
384
+ @dataset = Hash[*row.map{|key,value| [key.gsub(/\[.+\]/,'').strip, value]}.flatten]
385
+ ## WRITE THE JOB SCRIPT
386
+ sample_name = @dataset['Name']||@dataset.first
387
+ @job_script = if @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
388
+ File.join(@job_script_dir, @analysis_category + '_' + sample_name) + '_' + dataset.name.gsub(/\s+/,'_') + '.sh'
389
+ else
390
+ File.join(@job_script_dir, @analysis_category + '_' + sample_name) + '.sh'
391
+ end
392
+ make_job_script
393
+ @job_scripts << @job_script
394
+ @result_dataset << next_dataset
395
+ end
396
+ end
397
+ def dataset_mode
398
+ @job_script = if @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
399
+ File.join(@job_script_dir, @analysis_category + '_' + dataset.name.gsub(/\s+/,'_') + '.sh')
400
+ else
401
+ File.join(@job_script_dir, @analysis_category + '_' + 'job_script.sh')
402
+ end
403
+ make_job_script
404
+ @job_scripts << @job_script
405
+ @result_dataset << next_dataset
406
+ end
407
+ def save_data_set(data_set_arr, headers, rows)
408
+ data_set_hash = Hash[*data_set_arr]
409
+ if project = Project.find_by_number(data_set_hash['ProjectNumber'].to_i)
410
+ data_set = DataSet.new
411
+ data_set.name = data_set_hash['DataSetName']
412
+ data_set.project = project
413
+ if parent_id = data_set_hash['ParentID'] and parent_data_set = DataSet.find_by_id(parent_id.to_i)
414
+ data_set.data_set = parent_data_set
415
+ end
416
+ if comment = data_set_hash['Comment'] and !comment.to_s.empty?
417
+ data_set.comment = comment
418
+ end
419
+
420
+ sample_hash = {}
421
+ rows.each do |row|
422
+ headers.each_with_index do |header, i|
423
+ sample_hash[header]=row[i]
424
+ end
425
+ sample = Sample.new
426
+ sample.key_value = sample_hash.to_s
427
+ sample.save unless sample.saved?
428
+ data_set.samples << sample
429
+ end
430
+
431
+ data_set.md5 = data_set.md5hexdigest
432
+ unless data_set.saved?
433
+ project.data_sets << data_set
434
+ parent_data_set.data_sets << data_set if parent_data_set
435
+ data_set.save
436
+ end
437
+ data_set.id
438
+ end
439
+ end
440
+
441
+ def run
442
+ test_run
443
+
444
+ ## the user presses RUN
445
+ prepare_result_dir
446
+
447
+ ## copy application data to gstore
448
+ save_parameters_as_tsv
449
+ save_input_dataset_as_tsv
450
+
451
+
452
+ ## sushi writes creates the job scripts and builds the result data set that is to be generated
453
+ @result_dataset = []
454
+ @job_scripts = []
455
+ if @params['process_mode'] == 'SAMPLE'
456
+ sample_mode
457
+ elsif @params['process_mode'] == 'DATASET'
458
+ dataset_mode
459
+ else
460
+ #stop
461
+ warn "the process mode (#{@params['process_mode']}) is not defined"
462
+ raise "stop job submitting"
463
+ end
464
+
465
+ # job submittion
466
+ @job_scripts.each_with_index do |job_script, i|
467
+ job_id = submit(job_script)
468
+ @job_ids << job_id
469
+ print "Submit job #{File.basename(job_script)} job_id=#{job_id}"
470
+ end
471
+
472
+ puts
473
+ print 'job scripts: '
474
+ p @job_scripts
475
+ print 'result dataset: '
476
+ p @result_dataset
477
+
478
+ # copy application data to gstore
479
+ next_dataset_tsv_path = save_next_dataset_as_tsv
480
+
481
+ if !@job_ids.empty? and @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
482
+ data_set_arr = []
483
+ headers = []
484
+ rows = []
485
+ next_dataset_name = if name = @next_dataset_name
486
+ name.to_s
487
+ else
488
+ "#{@analysis_category}_#{@name.gsub(/\s/,'').gsub(/_/,'')}_#{dataset.id}"
489
+ end
490
+ data_set_arr = {'DataSetName'=>next_dataset_name, 'ProjectNumber'=>@project.gsub(/p/,''), 'ParentID'=>@dataset_sushi_id, 'Comment'=>@next_dataset_comment.to_s}
491
+ csv = CSV.readlines(next_dataset_tsv_path, :col_sep=>"\t")
492
+ csv.each do |row|
493
+ if headers.empty?
494
+ headers = row
495
+ else
496
+ rows << row
497
+ end
498
+ end
499
+ unless NO_ROR
500
+ @next_dataset_id = save_data_set(data_set_arr.to_a.flatten, headers, rows)
501
+ end
502
+ end
503
+ Thread.new do
504
+ copy_dataset_parameter_jobscripts
505
+ end
506
+ end
507
+ def test_run
508
+ set_dir_paths
509
+ set_input_dataset
510
+ preprocess
511
+ set_output_files
512
+ set_user_parameters
513
+
514
+ failures = 0
515
+ print 'check project name: '
516
+ unless @project
517
+ puts "\e[31mFAILURE\e[0m: project number is required but not found. you should set it in usecase."
518
+ puts "\tex.)"
519
+ puts "\tapp = #{self.class}.new"
520
+ puts "\tapp.project = 'p1001'"
521
+ failures += 1
522
+ else
523
+ puts "\e[32mPASSED\e[0m:\n\t@project=#{@project}"
524
+ end
525
+
526
+ print 'check user name: '
527
+ unless @user
528
+ puts "\e[31mWARNING\e[0m: user number is ought to be added but not found. you should set it in usecase. Default will be 'sushi lover'"
529
+ puts "\tex.)"
530
+ puts "\tapp = #{self.class}.new"
531
+ puts "\tapp.user = 'masa'"
532
+ else
533
+ puts "\e[32mPASSED\e[0m:\n\t@user=#{@user}"
534
+ end
535
+
536
+ print 'check application name: '
537
+ if @name.to_s.empty?
538
+ puts "\e[31mFAILURE\e[0m: application name is required but not found. you should set it in application class."
539
+ puts "\tex.)"
540
+ puts "\tclass #{self.class}"
541
+ puts "\t def initialize"
542
+ puts "\t @name = '#{self.class}'"
543
+ puts "\t end"
544
+ puts "\tend"
545
+ failures += 1
546
+ else
547
+ puts "\e[32mPASSED\e[0m:\n\t@name=#{@name}"
548
+ end
549
+
550
+ print 'check analysis_category: '
551
+ if @analysis_category.to_s.empty?
552
+ puts "\e[31mFAILURE\e[0m: analysis_category is required but not found. you should set it in application class."
553
+ puts "\tex.)"
554
+ puts "\tclass #{self.class}"
555
+ puts "\t def initialize"
556
+ puts "\t @analysis_category = 'Mapping'"
557
+ puts "\t end"
558
+ puts "\tend"
559
+ failures += 1
560
+ else
561
+ puts "\e[32mPASSED\e[0m:\n\t@analysis_category=#{@analysis_category}"
562
+ end
563
+
564
+ print 'check dataset: '
565
+ if !@dataset_hash or @dataset_hash.empty?
566
+ puts "\e[31mFAILURE\e[0m: dataset is not found. you should set it by using #{self.class}#dataset_sushi_id or #{self.class}#dataset_tsv_file properties"
567
+ puts "\tex.)"
568
+ puts "\tusecase = #{self.class}.new"
569
+ puts "\tusecase.dataset_tsv_file = \"dataset.tsv\""
570
+ failures += 1
571
+ else
572
+ puts "\e[32mPASSED\e[0m:\n\t@dataset_hash.length = #{@dataset_hash.length}"
573
+ end
574
+
575
+ print 'check required columns: '
576
+ unless check_required_columns
577
+ puts "\e[31mFAILURE\e[0m: required_column(s) is not found in dataset. you should set it in application class."
578
+ puts "\tex.)"
579
+ puts "\tdef initialize"
580
+ puts "\t @required_columns = ['Name', 'Read1']"
581
+ puts
582
+ failures += 1
583
+ else
584
+ puts "\e[32mPASSED\e[0m:"
585
+ end
586
+ puts "\trequired columns: #{@required_columns}"
587
+ puts "\tdataset columns: #{@dataset_hash.map{|row| row.keys}.flatten.uniq}" if @dataset_hash
588
+
589
+ print 'check required parameters: '
590
+ unless check_application_parameters
591
+ puts "\e[31mFAILURE\e[0m: required_param(s) is not set yet. you should set it in usecase"
592
+ puts "\tmissing params: #{@required_params-@params.keys}" if @required_params
593
+ puts "\tex.)"
594
+ puts "\tusecase = #{self.class}.new"
595
+ if @required_params
596
+ puts "\tusecase.params['#{(@required_params-@params.keys)[0]}'] = parameter"
597
+ else
598
+ puts "\tusecase.params['parameter name'] = default_parameter"
599
+ end
600
+ puts
601
+ failures += 1
602
+ else
603
+ puts "\e[32mPASSED\e[0m:"
604
+ end
605
+ puts "\tparameters: #{@params.keys}"
606
+ puts "\trequired : #{@required_params}"
607
+
608
+ print 'check next dataset: '
609
+ @dataset={}
610
+ unless self.next_dataset
611
+ puts "\e[31mFAILURE\e[0m: next dataset is not set yet. you should overwrite SushiApp#next_dataset method in #{self.class}"
612
+ puts "\tnote: the return value should be Hash (key: column title, value: value in a tsv table)"
613
+ failures += 1
614
+ else
615
+ puts "\e[32mPASSED\e[0m:"
616
+ end
617
+
618
+ print 'check output files: '
619
+ if !@output_files or @output_files.empty?
620
+ puts "\e[31mWARNING\e[0m: no output files. you will not get any output files after the job running. you can set @output_files (array) in #{self.class}"
621
+ puts "\tnote: usually it should be define in initialize method"
622
+ puts "\t the elements of @output_files should be chosen from #{self.class}#next_dataset.keys"
623
+ puts "\t #{self.class}#next_dataset.keys: #{self.next_dataset.keys}" if self.next_dataset
624
+ else
625
+ puts "\e[32mPASSED\e[0m:"
626
+ end
627
+
628
+ print 'check commands: '
629
+ if @params['process_mode'] == 'SAMPLE'
630
+ @dataset_hash.each do |row|
631
+ @dataset = Hash[*row.map{|key,value| [key.gsub(/\[.+\]/,'').strip, value]}.flatten]
632
+ unless com = commands
633
+ puts "\e[31mFAILURE\e[0m: any commands is not defined yet. you should overwrite SushiApp#commands method in #{self.class}"
634
+ puts "\tnote: the return value should be String (this will be in the main body of submitted job script)"
635
+ failures += 1
636
+ else
637
+ puts "\e[32mPASSED\e[0m:"
638
+ puts "generated command will be:"
639
+ puts "\t"+com.split(/\n/).join("\n\t")+"\n"
640
+ end
641
+ end
642
+ elsif @params['process_mode'] == 'DATASET'
643
+ unless com = commands
644
+ puts "\e[31mFAILURE\e[0m: any commands is not defined yet. you should overwrite SushiApp#commands method in #{self.class}"
645
+ puts "\tnote: the return value should be String (this will be in the main body of submitted job script)"
646
+ failures += 1
647
+ else
648
+ puts "\e[32mPASSED\e[0m:"
649
+ puts "generated command will be:"
650
+ puts "\t"+com.split(/\n/).join("\n\t")+"\n"
651
+ end
652
+ end
653
+
654
+ print 'check workflow manager: '
655
+ begin
656
+ hello = `wfm_hello #{WORKFLOW_MANAGER}`
657
+ rescue
658
+ end
659
+ unless hello =~ /hello/
660
+ puts "\e[31mFAILURE\e[0m: workflow_manager does not reply. check if workflow_manager is working"
661
+ failures += 1
662
+ else
663
+ puts "\e[32mPASSED\e[0m:"
664
+ end
665
+
666
+ if failures > 0
667
+ puts
668
+ puts "\e[31mFailures (#{failures})\e[0m: All failures should be solved"
669
+ raise "test run fails"
670
+ else
671
+ puts "All checks \e[32mPASSED\e[0m"
672
+ end
673
+ end
674
+ end
675
+
676
+
677
+ end
@@ -0,0 +1,4 @@
1
+ ---
2
+ :workflow_manager: druby://localhost:12345
3
+ :gstore_dir: ~/Desktop/gstore/projects
4
+ :scratch_dir: /tmp/scratch
@@ -0,0 +1,3 @@
1
+ module SushiFabric
2
+ VERSION = "0.0.5"
3
+ end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'sushi_fabric'
5
+
6
+ class WordCountApp < SushiFabric::SushiApp
7
+ def initialize
8
+ super
9
+ @name = 'Word_Count'
10
+ @analysis_category = 'Stats'
11
+ @required_columns = ['Name', 'Read1']
12
+ @required_params = []
13
+ end
14
+ def next_dataset
15
+ {'Name'=>@dataset['Name'],'Stats [File]'=>File.join(@result_dir, @dataset['Name'].to_s + '.stats')}
16
+ end
17
+ def preprocess
18
+ @factors = get_columns_with_tag 'Factor'
19
+ @factor_cols = @factors.first.keys
20
+ end
21
+ def commands
22
+ commands = ''
23
+ commands << "gunzip -c $GSTORE_DIR/#{@dataset['Read1']} |wc > #{@dataset['Name']}.stats\n"
24
+ commands << "echo 'Factor columns: [#{@factor_cols.join(',')}]'\n"
25
+ commands << "echo 'Factors: [#{@factors.join(',')}]'\n"
26
+ commands
27
+ end
28
+ end
29
+ if __FILE__ == $0
30
+ usecase = WordCountApp.new
31
+
32
+ usecase.project = "p1001"
33
+ usecase.user = 'sushi_lover'
34
+ usecase.parameterset_tsv_file = 'sample_parameterset.tsv'
35
+ usecase.dataset_tsv_file = 'sample_dataset.tsv'
36
+ #usecase.dataset_sushi_id = 26
37
+
38
+ # run (submit to workflow_manager)
39
+ usecase.run
40
+ #usecase.test_run
41
+ end
@@ -0,0 +1,2 @@
1
+ Name Read1 Species
2
+ sample1 p1001/data/short-ama_E1_R1.fastq.gz Arabidopsis
@@ -0,0 +1,2 @@
1
+ process_mode SAMPLE
2
+ lines_only true
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sushi_fabric/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "sushi_fabric"
8
+ spec.version = SushiFabric::VERSION
9
+ spec.authors = ["Functional Genomics Center Zurich"]
10
+ spec.email = ["masaomi.hatakeyama@fgcz.uzh.ch"]
11
+ spec.description = %q{This library provides us with the methods to submit a job cooperating with workflow manager.}
12
+ spec.summary = %q{workflow manager client.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ #spec.files = `git ls-files`.split($/)
17
+ spec.files = `bzr ls --versioned --recursive`.split($/).select{|file| !File.directory?(file)}
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.3"
23
+ spec.add_development_dependency "rake"
24
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sushi_fabric
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Functional Genomics Center Zurich
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-11-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.3'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: This library provides us with the methods to submit a job cooperating
47
+ with workflow manager.
48
+ email:
49
+ - masaomi.hatakeyama@fgcz.uzh.ch
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .bzrignore
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - lib/sushi_fabric/sushiApp.rb
60
+ - lib/sushi_fabric/sushi_configure.yml
61
+ - lib/sushi_fabric/version.rb
62
+ - lib/sushi_fabric.rb
63
+ - sample/WordCountApp.rb
64
+ - sample/sample_dataset.tsv
65
+ - sample/sample_parameterset.tsv
66
+ - sushi_fabric.gemspec
67
+ homepage: ''
68
+ licenses:
69
+ - MIT
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 1.8.24
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: workflow manager client.
92
+ test_files: []