rbbt-util 3.2.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -80,7 +80,7 @@ module Open
80
80
  # Cache
81
81
 
82
82
  def self.in_cache(url, options = {})
83
- digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
83
+ digest = Digest::MD5.hexdigest([url, options["--post-data"], (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
84
84
 
85
85
  filename = File.join(REMOTE_CACHEDIR, digest)
86
86
  if File.exists? filename
@@ -91,7 +91,7 @@ module Open
91
91
  end
92
92
 
93
93
  def self.add_cache(url, data, options = {})
94
- digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
94
+ digest = Digest::MD5.hexdigest([url, options["--post-data"], (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
95
95
  Misc.sensiblewrite(File.join(REMOTE_CACHEDIR, digest), data)
96
96
  end
97
97
 
@@ -181,6 +181,13 @@ module Open
181
181
  else
182
182
  io
183
183
  end
184
+
185
+ class << io;
186
+ attr_accessor :filename
187
+ end
188
+
189
+ io.filename = url.to_s
190
+ io
184
191
  end
185
192
 
186
193
  def self.can_open?(file)
@@ -21,7 +21,7 @@ module Persistence
21
21
  end
22
22
 
23
23
  def self.get_persistence_file(file, prefix, options = {})
24
- persistence_dir = Misc.process_options options, :persistence_dir
24
+ persistence_dir = Misc.process_options options, :dir
25
25
  persistence_dir ||= CACHEDIR
26
26
 
27
27
  if options.include? :filters
@@ -32,7 +32,8 @@ class Task
32
32
 
33
33
  def previous_jobs_rec
34
34
  return [] if previous_jobs.nil?
35
- previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
35
+ prev = previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
36
+ NamedArray.name prev, prev.collect{|job| job.task.name}
36
37
  end
37
38
 
38
39
  def previous_jobs=(previous_jobs)
@@ -221,6 +222,7 @@ class Task
221
222
  def run
222
223
  return self if recursive_done?
223
224
  begin
225
+ FileUtils.rm info_file if File.exists? info_file
224
226
  step(:started)
225
227
  start
226
228
  step(:done)
@@ -0,0 +1,193 @@
1
+ require 'rbbt/workflow/task'
2
+ require 'rbbt/workflow/step'
3
+ require 'rbbt/workflow/annotate'
4
+ require 'rbbt/workflow/accessor'
5
+
6
+ module Workflow
7
+ class << self
8
+ attr_accessor :workflows
9
+ end
10
+ self.workflows = []
11
+
12
+ def self.require_workflow(wf_name, wf_dir = nil)
13
+ require 'rbbt/resource/path'
14
+
15
+ if File.exists?(wf_name) or File.exists?(wf_name + '.rb')
16
+ $LOAD_PATH.unshift(File.join(File.expand_path(File.dirname(wf_name)), 'lib'))
17
+ require wf_name
18
+ return
19
+ end
20
+
21
+ wf_dir ||= case
22
+ when File.exists?(File.join(File.dirname(Path.caller_lib_dir), wf_name))
23
+ dir = File.join(File.dirname(Path.caller_lib_dir), wf_name)
24
+ Log.debug "Loading workflow from lib dir: #{dir}"
25
+ dir
26
+ File.join(File.dirname(Path.caller_lib_dir), wf_name)
27
+ when defined? Rbbt
28
+ if Rbbt.etc.workflow_dir.exists?
29
+ dir = File.join(Rbbt.etc.workflow_dir.read.strip, wf_name)
30
+ Log.debug "Loading workflow from etc dir: #{dir}"
31
+ dir
32
+ else
33
+ dir = Rbbt.workflows[wf_name]
34
+ Log.debug "Loading workflow from main dir: #{dir}"
35
+ dir
36
+ end
37
+ else
38
+ dir = File.join(ENV["HOME"], '.workflows')
39
+ Log.debug "Loading workflow from home dir: #{dir}"
40
+ dir
41
+ end
42
+
43
+ wf_dir = Path.setup(wf_dir)
44
+
45
+ $LOAD_PATH.unshift(File.join(File.dirname(wf_dir["workflow.rb"].find), 'lib'))
46
+ require wf_dir["workflow.rb"].find
47
+ end
48
+
49
+ def self.extended(base)
50
+ if not base.respond_to? :workdir
51
+ base.extend AnnotatedModule
52
+ class << base
53
+ attr_accessor :libdir, :workdir, :tasks, :task_dependencies, :task_description, :dependencies, :asynchronous_exports, :synchronous_exports, :exec_exports, :last_task
54
+
55
+ def dependencies
56
+ i = @dependencies; @dependencies = []; i
57
+ end
58
+
59
+ def task_dependencies
60
+ IndiferentHash.setup(@task_dependencies || {})
61
+ end
62
+
63
+ def tasks
64
+ IndiferentHash.setup(@tasks || {})
65
+ end
66
+ end
67
+
68
+ if defined? Rbbt
69
+ base.workdir = Rbbt.var.jobs.find
70
+ else
71
+ base.workdir = Path.setup('var/jobs')
72
+ end
73
+ base.tasks = {}
74
+ base.dependencies = []
75
+ base.task_dependencies = {}
76
+ base.task_description = {}
77
+ base.asynchronous_exports = []
78
+ base.synchronous_exports = []
79
+ base.exec_exports = []
80
+ base.libdir = Path.caller_lib_dir
81
+ end
82
+ self.workflows << base
83
+ end
84
+
85
+ # {{{ Task definition helpers
86
+
87
+ def task(name, &block)
88
+ if Hash === name
89
+ result_type = name.first.last
90
+ name = name.first.first
91
+ else
92
+ result_type = :marshal
93
+ end
94
+
95
+ name = name.to_sym
96
+
97
+ block = self.method(name) unless block_given?
98
+
99
+ result_type = result_type
100
+ task = Task.setup({
101
+ :name => name,
102
+ :inputs => inputs,
103
+ :description => description,
104
+ :input_types => input_types,
105
+ :result_type => Array == result_type ? result_type.to_sym : result_type,
106
+ :input_defaults => input_defaults,
107
+ :input_descriptions => input_descriptions
108
+ }, &block)
109
+
110
+ @last_task = task
111
+ @tasks[name] = task
112
+ @task_dependencies[name] = dependencies
113
+ end
114
+
115
+ def export_exec(*names)
116
+ @exec_exports.concat names
117
+ end
118
+
119
+ def export_asynchronous(*names)
120
+ @asynchronous_exports.concat names
121
+ end
122
+
123
+ def export_synchronous(*names)
124
+ @synchronous_exports.concat names
125
+ end
126
+
127
+ # {{{ Job management
128
+
129
+ def resolve_locals(inputs)
130
+ inputs.each do |name, value|
131
+ if value =~ /^local:(.*?):(.*)/ or
132
+ (Array === value and value.length == 1 and value.first =~ /^local:(.*?):(.*)/) or
133
+ (TSV === value and value.size == 1 and value.keys.first =~ /^local:(.*?):(.*)/)
134
+ task_name = $1
135
+ jobname = $2
136
+ value = load_id(File.join(task_name, jobname)).load
137
+ end
138
+ inputs[name] = value
139
+ end
140
+ end
141
+
142
+ def job(taskname, jobname = nil, inputs = {})
143
+ jobname ||= "Default"
144
+ task = tasks[taskname]
145
+ raise "Task not found: #{ taskname }" if task.nil?
146
+
147
+
148
+ IndiferentHash.setup(inputs)
149
+
150
+ resolve_locals(inputs)
151
+
152
+
153
+ dependencies = real_dependencies(task, jobname, inputs, task_dependencies[taskname] || [])
154
+
155
+ input_values = task.take_input_values(inputs)
156
+
157
+ step_path = step_path taskname, jobname, input_values, dependencies
158
+
159
+ step = Step.new step_path, task, input_values, dependencies
160
+
161
+ step
162
+ end
163
+
164
+ def load(path)
165
+ task = task_for path
166
+ Step.new path, tasks[task]
167
+ end
168
+
169
+ def load_id(id)
170
+ path = File.join(workdir, id)
171
+ task = task_for path
172
+ step = Step.new path, tasks[task]
173
+ if step.info.include? :dependencies
174
+ step.dependencies = step.info[:dependencies].collect do |task, job|
175
+ Step.new File.join(workdir, task.to_s, job), tasks[task]
176
+ end
177
+ end
178
+ step
179
+ end
180
+
181
+ def jobs(task, query = nil)
182
+ task_dir = File.join(workdir.find, task.to_s)
183
+ if query.nil?
184
+ path = File.join(task_dir, "**/*.info")
185
+ else
186
+ path = File.join(task_dir, query + "*.info")
187
+ end
188
+
189
+ Dir.glob(path).collect{|f|
190
+ Misc.path_relative_to(task_dir, f).sub(".info",'')
191
+ }
192
+ end
193
+ end
@@ -0,0 +1,249 @@
1
+ require 'rbbt/util/open'
2
+ require 'yaml'
3
+
4
+ class Step
5
+
6
+ def name
7
+ @path.sub(/.*\/#{Regexp.quote task.name.to_s}\/(.*)/, '\1')
8
+ end
9
+
10
+ def clean_name
11
+ name.sub(/(.*)_.*/, '\1')
12
+ end
13
+
14
+ # {{{ INFO
15
+
16
+ def info_file
17
+ @path + '.info'
18
+ end
19
+
20
+ def info
21
+ return {} if not File.exists? info_file
22
+ YAML.load(Open.open(info_file)) || {}
23
+ end
24
+
25
+ def set_info(key, value)
26
+ Misc.lock(info_file) do
27
+ i = info
28
+ i[key] = value
29
+ Open.write(info_file, i.to_yaml)
30
+ value
31
+ end
32
+ end
33
+
34
+ def status
35
+ info[:status]
36
+ end
37
+
38
+ def status=(status)
39
+ set_info(:status, status)
40
+ end
41
+
42
+ def messages
43
+ info[:messages] || set_info(:messages, [])
44
+ end
45
+
46
+ def message(message)
47
+ set_info(:messages, messages << message)
48
+ end
49
+
50
+ def log(status, message = nil)
51
+ if message
52
+ Log.low "#{ status }: #{ message }"
53
+ else
54
+ Log.low "#{ status }"
55
+ end
56
+ self.status = status
57
+ message(message) unless message.nil?
58
+ end
59
+
60
+ def done?
61
+ info[:status] == :done or info[:status] == :error
62
+ end
63
+
64
+ def error?
65
+ info[:status] == :error
66
+ end
67
+
68
+ # {{{ INFO
69
+
70
+ def files_dir
71
+ @path + '.files'
72
+ end
73
+
74
+ def files
75
+ Dir.glob(File.join(files_dir, '*')).collect do |path| File.basename(path) end
76
+ end
77
+
78
+ def file(name)
79
+ Path.setup(File.join(files_dir, name.to_s))
80
+ end
81
+
82
+ def save_file(name, content)
83
+ content = case
84
+ when String === content
85
+ content
86
+ when Array === content
87
+ content * "\n"
88
+ when TSV === content
89
+ content.to_s
90
+ when Hash === content
91
+ content.collect{|*p| p * "\t"} * "\n"
92
+ else
93
+ content.to_s
94
+ end
95
+ Open.write(file(name), content)
96
+ end
97
+
98
+ def load_file(name, type = nil, options = {})
99
+ if type.nil? and name =~ /.*\.(\w+)$/
100
+ extension = name.match(/.*\.(\w+)$/)[1]
101
+ case extension
102
+ when "tc"
103
+ type = :tc
104
+ when "tsv"
105
+ type = :tsv
106
+ when "list", "ary", "array"
107
+ type = :array
108
+ when "yaml"
109
+ type = :yaml
110
+ when "marshal"
111
+ type = :marshal
112
+ else
113
+ type = :other
114
+ end
115
+ else
116
+ type ||= :other
117
+ end
118
+
119
+ case type.to_sym
120
+ when :tc
121
+ Persist.open_tokyocabinet(file(name), false)
122
+ when :tsv
123
+ TSV.open Open.open(file(name)), options
124
+ when :array
125
+ Open.read(file(name)).split /\n|,\s*/
126
+ when :yaml
127
+ YAML.load(Open.open(file(name)))
128
+ when :marshal
129
+ Marshal.load(Open.open(file(name)))
130
+ else
131
+ Open.read(file(name))
132
+ end
133
+ end
134
+
135
+
136
+ end
137
+
138
+ module Workflow
139
+ def task_info(name)
140
+ task = tasks[name]
141
+ description = task.description
142
+ result_type = task.result_type
143
+ inputs = rec_inputs(name)
144
+ input_types = rec_input_types(name)
145
+ input_descriptions = rec_input_descriptions(name)
146
+ input_defaults = rec_input_defaults(name)
147
+ export = case
148
+ when (synchronous_exports.include?(name.to_sym) or synchronous_exports.include?(name.to_s))
149
+ :synchronous
150
+ when (asynchronous_exports.include?(name.to_sym) or asynchronous_exports.include?(name.to_s))
151
+ :asynchronous
152
+ when (exec_exports.include?(name.to_sym) or exec_exports.include?(name.to_s))
153
+ :exec
154
+ else
155
+ :none
156
+ end
157
+
158
+
159
+ dependencies = @task_dependencies[name].select{|dep| String === dep or Symbol === dep}
160
+ { :id => File.join(self.to_s, name.to_s),
161
+ :description => description,
162
+ :export => export,
163
+ :inputs => inputs,
164
+ :input_types => input_types,
165
+ :input_descriptions => input_descriptions,
166
+ :input_defaults => input_defaults,
167
+ :result_type => result_type,
168
+ :dependencies => dependencies
169
+ }
170
+ end
171
+
172
+ def rec_dependencies(taskname)
173
+ if task_dependencies.include? taskname
174
+ deps = task_dependencies[taskname].select{|dep| String === dep or Symbol === dep}
175
+ deps.concat deps.collect{|dep| rec_dependencies(dep)}.flatten
176
+ deps.uniq
177
+ else
178
+ []
179
+ end
180
+ end
181
+
182
+ def rec_inputs(taskname)
183
+ [taskname].concat(rec_dependencies(taskname)).inject([]){|acc, tn| acc.concat tasks[tn].inputs}
184
+ end
185
+
186
+ def rec_input_defaults(taskname)
187
+ [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn].input_defaults}
188
+ end
189
+
190
+ def rec_input_types(taskname)
191
+ [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn].input_types}
192
+ end
193
+
194
+ def rec_input_descriptions(taskname)
195
+ [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn].input_descriptions}
196
+ end
197
+
198
+ def real_dependencies(task, jobname, inputs, dependencies)
199
+ real_dependencies = []
200
+ dependencies.each do |dependency|
201
+ real_dependencies << case
202
+ when Step === dependency
203
+ dependency
204
+ when Symbol === dependency
205
+ job dependency, jobname, inputs
206
+ when Proc === dependency
207
+ dependency.call jobname, inputs
208
+ end
209
+ end
210
+ real_dependencies.flatten.compact
211
+ end
212
+
213
+ TAG = :hash
214
+ def step_path(taskname, jobname, inputs, dependencies)
215
+ raise "Jobname makes an invalid path: #{ jobname }" if jobname =~ /\.\./
216
+ if inputs.any? or dependencies.any?
217
+ tagged_jobname = case TAG
218
+ when :hash
219
+ jobname + '_' + Misc.digest((inputs + dependencies.collect{|dep| dep.name}).inspect)
220
+ else
221
+ jobname
222
+ end
223
+ else
224
+ tagged_jobname = jobname
225
+ end
226
+
227
+ workdir[taskname][tagged_jobname].find
228
+ end
229
+
230
+
231
+ def id_for(path)
232
+ if workdir.respond_to? :find
233
+ workdir_find = workdir.find
234
+ else
235
+ workdir_find = workdir
236
+ end
237
+ Misc.path_relative_to workdir_find, path
238
+ end
239
+
240
+ def task_for(path)
241
+ if workdir.respond_to? :find
242
+ workdir_find = workdir.find
243
+ else
244
+ workdir_find = workdir
245
+ end
246
+
247
+ Misc.path_relative_to(workdir_find, File.dirname(path)).sub(/([^\/]+)\/.*/,'\1')
248
+ end
249
+ end