rbbt-util 3.2.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,345 @@
1
+ require 'rbbt/util/chain_methods'
2
+
3
+ module TSV
4
+ extend ChainMethods
5
+ self.chain_prefix = :tsv
6
+
7
+ attr_accessor :unnamed
8
+
9
+ def with_unnamed
10
+ saved_unnamed = @unnamed
11
+ @unnamed = true
12
+ res = yield
13
+ @unnamed = saved_unnamed
14
+ res
15
+ end
16
+
17
+ def with_monitor(value = true)
18
+ saved_monitor = @monitor
19
+ @monitor = value
20
+ res = yield
21
+ @monitor = saved_monitor
22
+ res
23
+ end
24
+
25
+ def self.extended(data)
26
+ setup_chains(data)
27
+
28
+ if not data.respond_to? :write
29
+ class << data
30
+ attr_accessor :writable
31
+
32
+ def close
33
+ end
34
+
35
+ def read(force = false)
36
+ @writable = false
37
+ self
38
+ end
39
+
40
+ def write(force = false)
41
+ @writable = true
42
+ self
43
+ end
44
+
45
+ def write?
46
+ @writable
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ KEY_PREFIX = "__tsv_hash_"
53
+
54
+ ENTRIES = []
55
+ ENTRY_KEYS = []
56
+
57
+ def serialized_get(key)
58
+ raise "Uninitialized serializer" if serializer == :type
59
+ serialized_value = tsv_clean_get_brackets(key)
60
+ SERIALIZER_ALIAS[serializer.to_sym].load(serialized_value) unless serialized_value.nil?
61
+ end
62
+
63
+ def serialized_set(key, value)
64
+ raise "Uninitialized serializer" if serializer == :type
65
+ if value.nil?
66
+ tsv_clean_set_brackets(key, nil)
67
+ else
68
+ tsv_clean_set_brackets(key, SERIALIZER_ALIAS[serializer.to_sym].dump(value))
69
+ end
70
+ end
71
+
72
+ #{{{ Chained Methods
73
+ def tsv_get_brackets(key)
74
+ value = if serializer.nil?
75
+ tsv_clean_get_brackets(key)
76
+ else
77
+ serialized_get(key)
78
+ end
79
+
80
+ NamedArray.setup value, fields if Array === value and not @unnamed
81
+ value
82
+ end
83
+
84
+ def tsv_set_brackets(key,value)
85
+ if serializer.nil?
86
+ tsv_clean_set_brackets(key, value)
87
+ else
88
+ serialized_set(key, value)
89
+ end
90
+ end
91
+
92
+ def tsv_keys
93
+ tsv_clean_keys - ENTRY_KEYS
94
+ end
95
+
96
+ def tsv_values
97
+ values = values_at(*keys)
98
+ values.each{|value| NamedArray.setup value, fields} if Array === values.first and not @unnamed
99
+ values
100
+ end
101
+
102
+ def tsv_each
103
+ tsv_clean_each do |key, value|
104
+ next if ENTRY_KEYS.include? key
105
+
106
+ value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil?
107
+ NamedArray.setup value, fields if Array === value and not @unnamed
108
+ yield key, value if block_given?
109
+ [key, value]
110
+ end
111
+ end
112
+
113
+ def tsv_collect
114
+ tsv_clean_collect do |key, value|
115
+ next if ENTRY_KEYS.include? key
116
+ value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil? or not String === value
117
+ NamedArray.setup value, fields if Array === value and not @unnamed
118
+ if block_given?
119
+ yield key, value
120
+ else
121
+ [key, value]
122
+ end
123
+ end
124
+ end
125
+
126
+ def tsv_size
127
+ keys.length
128
+ end
129
+
130
+ def tsv_length
131
+ keys.length
132
+ end
133
+
134
+ def tsv_values_at(*keys)
135
+ keys.collect do |key|
136
+ self[key]
137
+ end
138
+ end
139
+
140
+ #{{{ Sorting
141
+
142
+ def tsv_sort_by(field = nil, just_keys = false, &block)
143
+ field = :all if field.nil?
144
+ if field == :all
145
+ elems = collect
146
+ else
147
+ elems = []
148
+ through :key, field do |key, fields|
149
+ elems << [key, fields.first]
150
+ end
151
+ end
152
+
153
+ if not block_given?
154
+ if fields == :all
155
+ if just_keys
156
+ elems.sort_by{|key, value| key }.collect{|key, values| key}
157
+ else
158
+ elems.sort_by{|key, value| key }
159
+ end
160
+ else
161
+ if just_keys
162
+ elems.sort_by{|key, value| value }.collect{|key, value| key}
163
+ else
164
+ elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
165
+ end
166
+ end
167
+ else
168
+ if just_keys
169
+ elems.sort_by(&block).collect{|key, value| key}
170
+ else
171
+ elems.sort_by(&block).collect{|key, value| [key, self[key]]}
172
+ end
173
+ end
174
+ end
175
+
176
+ def tsv_sort(&block)
177
+ collect.sort &block
178
+ end
179
+
180
+ # Starts in page 1
181
+ def page(pnum, psize, field = nil, just_keys = false, &block)
182
+ if pnum.to_s =~ /-(.*)/
183
+ reverse = true
184
+ pnum = $1.to_i
185
+ else
186
+ reverse = false
187
+ end
188
+
189
+ with_unnamed do
190
+ pstart = psize * (pnum - 1)
191
+ pend = psize * pnum - 1
192
+ field = :key if field == "key"
193
+ keys = sort_by(field || :key, true, &block)
194
+ keys.reverse! if reverse
195
+
196
+ if just_keys
197
+ keys[pstart..pend]
198
+ else
199
+ select :key => keys[pstart..pend]
200
+ end
201
+ end
202
+ end
203
+
204
+
205
+ def self.entry(*entries)
206
+ entries = entries.collect{|entry| entry.to_s}
207
+ ENTRIES.concat entries
208
+ entries.each do |entry|
209
+ key = KEY_PREFIX + entry
210
+ ENTRY_KEYS << key
211
+ self.module_eval "
212
+ attr_accessor :#{entry}
213
+
214
+ def #{ entry }
215
+ if not defined? @#{entry}
216
+ @#{entry} = YAML.load(self.tsv_clean_get_brackets('#{key}') || nil.to_yaml)
217
+ end
218
+ @#{entry}
219
+ end
220
+
221
+ def #{ entry }=(value)
222
+ @#{entry} = value
223
+ self.tsv_clean_set_brackets '#{key}', value.to_yaml
224
+ end
225
+ "
226
+ end
227
+ end
228
+
229
+ entry :key_field,
230
+ :fields,
231
+ :type,
232
+ :cast,
233
+ :identifiers,
234
+ :namespace,
235
+ :filename,
236
+ :serializer
237
+
238
+ def fields
239
+ @fields ||= YAML.load(self.tsv_clean_get_brackets("__tsv_hash_fields") || nil.to_yaml)
240
+ if @fields.nil? or @unnamed
241
+ @fields
242
+ else
243
+ NamedArray.setup @fields, @fields
244
+ end
245
+ end
246
+
247
+ def self.zip_fields(list, fields = nil)
248
+ return [] if list.nil? || list.empty?
249
+ fields ||= list.fields if list.respond_to? :fields
250
+ zipped = list[0].zip(*list[1..-1])
251
+ zipped = zipped.collect{|v| NamedArray.setup(v, fields)} if fields
252
+ zipped
253
+ end
254
+
255
+ def identifier_files
256
+ case
257
+ when (identifiers and TSV === identifiers)
258
+ [identifiers]
259
+ when (identifiers and Array === identifiers)
260
+ case
261
+ when (TSV === identifiers.first or identifiers.empty?)
262
+ identifiers
263
+ when
264
+ identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
265
+ end
266
+ when identifiers
267
+ [ Path === identifiers ? identifiers : Path.setup(identifiers, nil, namespace) ]
268
+ when Path === filename
269
+ filename.identifier_files
270
+ when filename
271
+ Path.setup(filename).identifier_files
272
+ else
273
+ []
274
+ end
275
+ end
276
+
277
+ def options
278
+ options = {}
279
+ ENTRIES.each do |entry|
280
+ options[entry] = self.send(entry)
281
+ end
282
+ IndiferentHash.setup options
283
+ end
284
+
285
+
286
+ def all_fields
287
+ [key_field] + fields
288
+ end
289
+
290
+ def values_to_s(values)
291
+ case
292
+ when (values.nil? and fields.nil?)
293
+ "\n"
294
+ when (values.nil? and not fields.nil?)
295
+ "\t" << ([""] * fields.length) * "\t" << "\n"
296
+ when (not Array === values)
297
+ "\t" << values.to_s << "\n"
298
+ else
299
+ "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
300
+ end
301
+ end
302
+
303
+ def to_s(keys = nil, no_options = false)
304
+ if FalseClass === keys or TrueClass === keys
305
+ no_options = keys
306
+ keys = nil
307
+ end
308
+
309
+ if keys == :sort
310
+ keys = self.keys.sort
311
+ end
312
+
313
+ str = ""
314
+
315
+ str << "#: " << Misc.hash2string(ENTRIES.collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
316
+ if fields
317
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
318
+ end
319
+
320
+ saved_unnamed = @unnamed
321
+ @unnamed = false
322
+ if keys.nil?
323
+ each do |key, values|
324
+ key = key.to_s if Symbol === key
325
+ str << key.dup
326
+ str << values_to_s(values)
327
+ end
328
+ else
329
+ keys.zip(values_at(*keys)).each do |key, values|
330
+ key = key.to_s if Symbol === key
331
+ str << key.dup << values_to_s(values)
332
+ end
333
+ end
334
+
335
+ @unnamed = saved_unnamed
336
+ str
337
+ end
338
+
339
+ def value_peek
340
+ peek = {}
341
+ keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
342
+ peek
343
+ end
344
+ end
345
+
@@ -0,0 +1,183 @@
1
+ require 'rbbt/tsv'
2
+ require 'rbbt/tsv/attach/util'
3
+ module TSV
4
+
5
+ # Merge columns from different rows of a file
6
+ def self.merge_row_fields(input, output, sep = "\t")
7
+ is = case
8
+ when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
9
+ CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
10
+ when (String === input or StringIO === input)
11
+ CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
12
+ else
13
+ input
14
+ end
15
+
16
+ current_key = nil
17
+ current_parts = []
18
+
19
+ done = false
20
+ Open.write(output) do |os|
21
+
22
+ done = is.eof?
23
+ while not done
24
+ key, *parts = is.gets.sub("\n",'').split(sep, -1)
25
+ current_key ||= key
26
+ case
27
+ when key.nil?
28
+ when current_key == key
29
+ parts.each_with_index do |part,i|
30
+ if current_parts[i].nil?
31
+ current_parts[i] = part
32
+ else
33
+ current_parts[i] = current_parts[i] << "|" << part
34
+ end
35
+ end
36
+ when current_key != key
37
+ os.puts [current_key, current_parts].flatten * sep
38
+ current_key = key
39
+ current_parts = parts
40
+ end
41
+
42
+ done = is.eof?
43
+ end
44
+
45
+ end
46
+ end
47
+
48
+ # Merge two files with the same keys and different fields
49
+ def self.merge_different_fields(file1, file2, output, sep = "\t")
50
+ case
51
+ when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
52
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
53
+ when (String === file1 or StringIO === file1)
54
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
55
+ when TSV === file1
56
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
57
+ end
58
+
59
+ case
60
+ when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
61
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
62
+ when (String === file2 or StringIO === file2)
63
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
64
+ when TSV === file2
65
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
66
+ end
67
+
68
+ output = File.open(output, 'w') if String === output
69
+
70
+ cols1 = nil
71
+ cols2 = nil
72
+
73
+ done1 = false
74
+ done2 = false
75
+
76
+ key1 = key2 = nil
77
+ while key1.nil?
78
+ while (line1 = file1.gets) =~ /#/; end
79
+ key1, *parts1 = line1.sub("\n",'').split(sep, -1)
80
+ cols1 = parts1.length
81
+ end
82
+
83
+ while key2.nil?
84
+ while (line2 = file2.gets) =~ /#/; end
85
+ key2, *parts2 = line2.sub("\n",'').split(sep, -1)
86
+ cols2 = parts2.length
87
+ end
88
+
89
+ key = key1 < key2 ? key1 : key2
90
+ parts = [""] * (cols1 + cols2)
91
+ while not (done1 and done2)
92
+ while (not done1 and key1 == key)
93
+ parts1.each_with_index do |part, i|
94
+ parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
95
+ end
96
+ key1 = nil
97
+ while key1.nil? and not done1
98
+ if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
99
+ end
100
+ end
101
+ while (not done2 and key2 == key)
102
+ parts2.each_with_index do |part, i|
103
+ i += cols1
104
+ parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
105
+ end
106
+ key2 = nil
107
+ while key2.nil? and not done2
108
+ if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
109
+ end
110
+ end
111
+
112
+ output.puts [key, parts].flatten * sep
113
+ parts = [""] * (cols1 + cols2)
114
+
115
+ case
116
+ when done1
117
+ key = key2
118
+ when done2
119
+ key = key1
120
+ else
121
+ key = key1 < key2 ? key1 : key2
122
+ end
123
+ end
124
+
125
+ output.close
126
+ end
127
+
128
+ # Merge columns from different files
129
+ def self.merge_paste(files, delim = "$")
130
+ CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
131
+ end
132
+
133
+ def attach(other, options = {})
134
+ options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
135
+ fields, one2one = Misc.process_options options, :fields, :one2one
136
+ in_namespace = options[:in_namespace]
137
+
138
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil? or fields == :all
139
+ if in_namespace
140
+ fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
141
+ else
142
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
143
+ end
144
+
145
+ Log.medium("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
146
+
147
+ other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
148
+ case
149
+ when key_field == other.key_field
150
+ attach_same_key other, fields
151
+ when (not in_namespace and self.fields.include?(other.key_field))
152
+ Log.debug "Found other's key field: #{other.key_field}"
153
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
154
+ when (in_namespace and self.fields_in_namespace.include?(other.key_field))
155
+ Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
156
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
157
+ else
158
+ index = TSV.find_traversal(self, other, options)
159
+ raise "Cannot traverse identifiers" if index.nil?
160
+ attach_index other, index, fields
161
+ end
162
+ Log.debug("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
163
+
164
+ self
165
+ end
166
+
167
+ def detach(file)
168
+ file_fields = file.fields.collect{|field| field.fullname}
169
+ detached_fields = []
170
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
171
+ reorder :key, detached_fields
172
+ end
173
+
174
+ def merge_different_fields(other, options = {})
175
+ TmpFile.with_file do |output|
176
+ TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
177
+ tsv = TSV.open output, options
178
+ tsv.key_field = self.key_field unless self.key_field.nil?
179
+ tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
180
+ tsv
181
+ end
182
+ end
183
+ end