rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -80,7 +80,7 @@ module Open
80
80
  # Cache
81
81
 
82
82
  def self.in_cache(url, options = {})
83
- digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
83
+ digest = Digest::MD5.hexdigest([url, options["--post-data"], (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
84
84
 
85
85
  filename = File.join(REMOTE_CACHEDIR, digest)
86
86
  if File.exists? filename
@@ -91,7 +91,7 @@ module Open
91
91
  end
92
92
 
93
93
  def self.add_cache(url, data, options = {})
94
- digest = Digest::MD5.hexdigest([url, options["--post-data"]].inspect)
94
+ digest = Digest::MD5.hexdigest([url, options["--post-data"], (options.include?("--post-file")? Open.read(options["--post-file"]) : "")].inspect)
95
95
  Misc.sensiblewrite(File.join(REMOTE_CACHEDIR, digest), data)
96
96
  end
97
97
 
@@ -181,6 +181,13 @@ module Open
181
181
  else
182
182
  io
183
183
  end
184
+
185
+ class << io;
186
+ attr_accessor :filename
187
+ end
188
+
189
+ io.filename = url.to_s
190
+ io
184
191
  end
185
192
 
186
193
  def self.can_open?(file)
@@ -21,7 +21,7 @@ module Persistence
21
21
  end
22
22
 
23
23
  def self.get_persistence_file(file, prefix, options = {})
24
- persistence_dir = Misc.process_options options, :persistence_dir
24
+ persistence_dir = Misc.process_options options, :dir
25
25
  persistence_dir ||= CACHEDIR
26
26
 
27
27
  if options.include? :filters
@@ -32,7 +32,8 @@ class Task
32
32
 
33
33
  def previous_jobs_rec
34
34
  return [] if previous_jobs.nil?
35
- previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
35
+ prev = previous_jobs + previous_jobs.collect{|job| job.previous_jobs_rec}.flatten
36
+ NamedArray.name prev, prev.collect{|job| job.task.name}
36
37
  end
37
38
 
38
39
  def previous_jobs=(previous_jobs)
@@ -221,6 +222,7 @@ class Task
221
222
  def run
222
223
  return self if recursive_done?
223
224
  begin
225
+ FileUtils.rm info_file if File.exists? info_file
224
226
  step(:started)
225
227
  start
226
228
  step(:done)
@@ -0,0 +1,193 @@
1
+ require 'rbbt/workflow/task'
2
+ require 'rbbt/workflow/step'
3
+ require 'rbbt/workflow/annotate'
4
+ require 'rbbt/workflow/accessor'
5
+
6
+ module Workflow
7
+ class << self
8
+ attr_accessor :workflows
9
+ end
10
+ self.workflows = []
11
+
12
+ def self.require_workflow(wf_name, wf_dir = nil)
13
+ require 'rbbt/resource/path'
14
+
15
+ if File.exists?(wf_name) or File.exists?(wf_name + '.rb')
16
+ $LOAD_PATH.unshift(File.join(File.expand_path(File.dirname(wf_name)), 'lib'))
17
+ require wf_name
18
+ return
19
+ end
20
+
21
+ wf_dir ||= case
22
+ when File.exists?(File.join(File.dirname(Path.caller_lib_dir), wf_name))
23
+ dir = File.join(File.dirname(Path.caller_lib_dir), wf_name)
24
+ Log.debug "Loading workflow from lib dir: #{dir}"
25
+ dir
26
+ File.join(File.dirname(Path.caller_lib_dir), wf_name)
27
+ when defined? Rbbt
28
+ if Rbbt.etc.workflow_dir.exists?
29
+ dir = File.join(Rbbt.etc.workflow_dir.read.strip, wf_name)
30
+ Log.debug "Loading workflow from etc dir: #{dir}"
31
+ dir
32
+ else
33
+ dir = Rbbt.workflows[wf_name]
34
+ Log.debug "Loading workflow from main dir: #{dir}"
35
+ dir
36
+ end
37
+ else
38
+ dir = File.join(ENV["HOME"], '.workflows')
39
+ Log.debug "Loading workflow from home dir: #{dir}"
40
+ dir
41
+ end
42
+
43
+ wf_dir = Path.setup(wf_dir)
44
+
45
+ $LOAD_PATH.unshift(File.join(File.dirname(wf_dir["workflow.rb"].find), 'lib'))
46
+ require wf_dir["workflow.rb"].find
47
+ end
48
+
49
+ def self.extended(base)
50
+ if not base.respond_to? :workdir
51
+ base.extend AnnotatedModule
52
+ class << base
53
+ attr_accessor :libdir, :workdir, :tasks, :task_dependencies, :task_description, :dependencies, :asynchronous_exports, :synchronous_exports, :exec_exports, :last_task
54
+
55
+ def dependencies
56
+ i = @dependencies; @dependencies = []; i
57
+ end
58
+
59
+ def task_dependencies
60
+ IndiferentHash.setup(@task_dependencies || {})
61
+ end
62
+
63
+ def tasks
64
+ IndiferentHash.setup(@tasks || {})
65
+ end
66
+ end
67
+
68
+ if defined? Rbbt
69
+ base.workdir = Rbbt.var.jobs.find
70
+ else
71
+ base.workdir = Path.setup('var/jobs')
72
+ end
73
+ base.tasks = {}
74
+ base.dependencies = []
75
+ base.task_dependencies = {}
76
+ base.task_description = {}
77
+ base.asynchronous_exports = []
78
+ base.synchronous_exports = []
79
+ base.exec_exports = []
80
+ base.libdir = Path.caller_lib_dir
81
+ end
82
+ self.workflows << base
83
+ end
84
+
85
+ # {{{ Task definition helpers
86
+
87
+ def task(name, &block)
88
+ if Hash === name
89
+ result_type = name.first.last
90
+ name = name.first.first
91
+ else
92
+ result_type = :marshal
93
+ end
94
+
95
+ name = name.to_sym
96
+
97
+ block = self.method(name) unless block_given?
98
+
99
+ result_type = result_type
100
+ task = Task.setup({
101
+ :name => name,
102
+ :inputs => inputs,
103
+ :description => description,
104
+ :input_types => input_types,
105
+ :result_type => Array == result_type ? result_type.to_sym : result_type,
106
+ :input_defaults => input_defaults,
107
+ :input_descriptions => input_descriptions
108
+ }, &block)
109
+
110
+ @last_task = task
111
+ @tasks[name] = task
112
+ @task_dependencies[name] = dependencies
113
+ end
114
+
115
+ def export_exec(*names)
116
+ @exec_exports.concat names
117
+ end
118
+
119
+ def export_asynchronous(*names)
120
+ @asynchronous_exports.concat names
121
+ end
122
+
123
+ def export_synchronous(*names)
124
+ @synchronous_exports.concat names
125
+ end
126
+
127
+ # {{{ Job management
128
+
129
+ def resolve_locals(inputs)
130
+ inputs.each do |name, value|
131
+ if value =~ /^local:(.*?):(.*)/ or
132
+ (Array === value and value.length == 1 and value.first =~ /^local:(.*?):(.*)/) or
133
+ (TSV === value and value.size == 1 and value.keys.first =~ /^local:(.*?):(.*)/)
134
+ task_name = $1
135
+ jobname = $2
136
+ value = load_id(File.join(task_name, jobname)).load
137
+ end
138
+ inputs[name] = value
139
+ end
140
+ end
141
+
142
+ def job(taskname, jobname = nil, inputs = {})
143
+ jobname ||= "Default"
144
+ task = tasks[taskname]
145
+ raise "Task not found: #{ taskname }" if task.nil?
146
+
147
+
148
+ IndiferentHash.setup(inputs)
149
+
150
+ resolve_locals(inputs)
151
+
152
+
153
+ dependencies = real_dependencies(task, jobname, inputs, task_dependencies[taskname] || [])
154
+
155
+ input_values = task.take_input_values(inputs)
156
+
157
+ step_path = step_path taskname, jobname, input_values, dependencies
158
+
159
+ step = Step.new step_path, task, input_values, dependencies
160
+
161
+ step
162
+ end
163
+
164
+ def load(path)
165
+ task = task_for path
166
+ Step.new path, tasks[task]
167
+ end
168
+
169
+ def load_id(id)
170
+ path = File.join(workdir, id)
171
+ task = task_for path
172
+ step = Step.new path, tasks[task]
173
+ if step.info.include? :dependencies
174
+ step.dependencies = step.info[:dependencies].collect do |task, job|
175
+ Step.new File.join(workdir, task.to_s, job), tasks[task]
176
+ end
177
+ end
178
+ step
179
+ end
180
+
181
+ def jobs(task, query = nil)
182
+ task_dir = File.join(workdir.find, task.to_s)
183
+ if query.nil?
184
+ path = File.join(task_dir, "**/*.info")
185
+ else
186
+ path = File.join(task_dir, query + "*.info")
187
+ end
188
+
189
+ Dir.glob(path).collect{|f|
190
+ Misc.path_relative_to(task_dir, f).sub(".info",'')
191
+ }
192
+ end
193
+ end
@@ -0,0 +1,249 @@
1
+ require 'rbbt/util/open'
2
+ require 'yaml'
3
+
4
+ class Step
5
+
6
+ def name
7
+ @path.sub(/.*\/#{Regexp.quote task.name.to_s}\/(.*)/, '\1')
8
+ end
9
+
10
+ def clean_name
11
+ name.sub(/(.*)_.*/, '\1')
12
+ end
13
+
14
+ # {{{ INFO
15
+
16
+ def info_file
17
+ @path + '.info'
18
+ end
19
+
20
+ def info
21
+ return {} if not File.exists? info_file
22
+ YAML.load(Open.open(info_file)) || {}
23
+ end
24
+
25
+ def set_info(key, value)
26
+ Misc.lock(info_file) do
27
+ i = info
28
+ i[key] = value
29
+ Open.write(info_file, i.to_yaml)
30
+ value
31
+ end
32
+ end
33
+
34
+ def status
35
+ info[:status]
36
+ end
37
+
38
+ def status=(status)
39
+ set_info(:status, status)
40
+ end
41
+
42
+ def messages
43
+ info[:messages] || set_info(:messages, [])
44
+ end
45
+
46
+ def message(message)
47
+ set_info(:messages, messages << message)
48
+ end
49
+
50
+ def log(status, message = nil)
51
+ if message
52
+ Log.low "#{ status }: #{ message }"
53
+ else
54
+ Log.low "#{ status }"
55
+ end
56
+ self.status = status
57
+ message(message) unless message.nil?
58
+ end
59
+
60
+ def done?
61
+ info[:status] == :done or info[:status] == :error
62
+ end
63
+
64
+ def error?
65
+ info[:status] == :error
66
+ end
67
+
68
+ # {{{ INFO
69
+
70
+ def files_dir
71
+ @path + '.files'
72
+ end
73
+
74
+ def files
75
+ Dir.glob(File.join(files_dir, '*')).collect do |path| File.basename(path) end
76
+ end
77
+
78
+ def file(name)
79
+ Path.setup(File.join(files_dir, name.to_s))
80
+ end
81
+
82
+ def save_file(name, content)
83
+ content = case
84
+ when String === content
85
+ content
86
+ when Array === content
87
+ content * "\n"
88
+ when TSV === content
89
+ content.to_s
90
+ when Hash === content
91
+ content.collect{|*p| p * "\t"} * "\n"
92
+ else
93
+ content.to_s
94
+ end
95
+ Open.write(file(name), content)
96
+ end
97
+
98
+ def load_file(name, type = nil, options = {})
99
+ if type.nil? and name =~ /.*\.(\w+)$/
100
+ extension = name.match(/.*\.(\w+)$/)[1]
101
+ case extension
102
+ when "tc"
103
+ type = :tc
104
+ when "tsv"
105
+ type = :tsv
106
+ when "list", "ary", "array"
107
+ type = :array
108
+ when "yaml"
109
+ type = :yaml
110
+ when "marshal"
111
+ type = :marshal
112
+ else
113
+ type = :other
114
+ end
115
+ else
116
+ type ||= :other
117
+ end
118
+
119
+ case type.to_sym
120
+ when :tc
121
+ Persist.open_tokyocabinet(file(name), false)
122
+ when :tsv
123
+ TSV.open Open.open(file(name)), options
124
+ when :array
125
+ Open.read(file(name)).split /\n|,\s*/
126
+ when :yaml
127
+ YAML.load(Open.open(file(name)))
128
+ when :marshal
129
+ Marshal.load(Open.open(file(name)))
130
+ else
131
+ Open.read(file(name))
132
+ end
133
+ end
134
+
135
+
136
+ end
137
+
138
+ module Workflow
139
+ def task_info(name)
140
+ task = tasks[name]
141
+ description = task.description
142
+ result_type = task.result_type
143
+ inputs = rec_inputs(name)
144
+ input_types = rec_input_types(name)
145
+ input_descriptions = rec_input_descriptions(name)
146
+ input_defaults = rec_input_defaults(name)
147
+ export = case
148
+ when (synchronous_exports.include?(name.to_sym) or synchronous_exports.include?(name.to_s))
149
+ :synchronous
150
+ when (asynchronous_exports.include?(name.to_sym) or asynchronous_exports.include?(name.to_s))
151
+ :asynchronous
152
+ when (exec_exports.include?(name.to_sym) or exec_exports.include?(name.to_s))
153
+ :exec
154
+ else
155
+ :none
156
+ end
157
+
158
+
159
+ dependencies = @task_dependencies[name].select{|dep| String === dep or Symbol === dep}
160
+ { :id => File.join(self.to_s, name.to_s),
161
+ :description => description,
162
+ :export => export,
163
+ :inputs => inputs,
164
+ :input_types => input_types,
165
+ :input_descriptions => input_descriptions,
166
+ :input_defaults => input_defaults,
167
+ :result_type => result_type,
168
+ :dependencies => dependencies
169
+ }
170
+ end
171
+
172
+ def rec_dependencies(taskname)
173
+ if task_dependencies.include? taskname
174
+ deps = task_dependencies[taskname].select{|dep| String === dep or Symbol === dep}
175
+ deps.concat deps.collect{|dep| rec_dependencies(dep)}.flatten
176
+ deps.uniq
177
+ else
178
+ []
179
+ end
180
+ end
181
+
182
+ def rec_inputs(taskname)
183
+ [taskname].concat(rec_dependencies(taskname)).inject([]){|acc, tn| acc.concat tasks[tn].inputs}
184
+ end
185
+
186
+ def rec_input_defaults(taskname)
187
+ [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn].input_defaults}
188
+ end
189
+
190
+ def rec_input_types(taskname)
191
+ [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn].input_types}
192
+ end
193
+
194
+ def rec_input_descriptions(taskname)
195
+ [taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn].input_descriptions}
196
+ end
197
+
198
+ def real_dependencies(task, jobname, inputs, dependencies)
199
+ real_dependencies = []
200
+ dependencies.each do |dependency|
201
+ real_dependencies << case
202
+ when Step === dependency
203
+ dependency
204
+ when Symbol === dependency
205
+ job dependency, jobname, inputs
206
+ when Proc === dependency
207
+ dependency.call jobname, inputs
208
+ end
209
+ end
210
+ real_dependencies.flatten.compact
211
+ end
212
+
213
+ TAG = :hash
214
+ def step_path(taskname, jobname, inputs, dependencies)
215
+ raise "Jobname makes an invalid path: #{ jobname }" if jobname =~ /\.\./
216
+ if inputs.any? or dependencies.any?
217
+ tagged_jobname = case TAG
218
+ when :hash
219
+ jobname + '_' + Misc.digest((inputs + dependencies.collect{|dep| dep.name}).inspect)
220
+ else
221
+ jobname
222
+ end
223
+ else
224
+ tagged_jobname = jobname
225
+ end
226
+
227
+ workdir[taskname][tagged_jobname].find
228
+ end
229
+
230
+
231
+ def id_for(path)
232
+ if workdir.respond_to? :find
233
+ workdir_find = workdir.find
234
+ else
235
+ workdir_find = workdir
236
+ end
237
+ Misc.path_relative_to workdir_find, path
238
+ end
239
+
240
+ def task_for(path)
241
+ if workdir.respond_to? :find
242
+ workdir_find = workdir.find
243
+ else
244
+ workdir_find = workdir
245
+ end
246
+
247
+ Misc.path_relative_to(workdir_find, File.dirname(path)).sub(/([^\/]+)\/.*/,'\1')
248
+ end
249
+ end