rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,345 @@
1
+ require 'rbbt/util/chain_methods'
2
+
3
+ module TSV
4
+ extend ChainMethods
5
+ self.chain_prefix = :tsv
6
+
7
+ attr_accessor :unnamed
8
+
9
+ def with_unnamed
10
+ saved_unnamed = @unnamed
11
+ @unnamed = true
12
+ res = yield
13
+ @unnamed = saved_unnamed
14
+ res
15
+ end
16
+
17
+ def with_monitor(value = true)
18
+ saved_monitor = @monitor
19
+ @monitor = value
20
+ res = yield
21
+ @monitor = saved_monitor
22
+ res
23
+ end
24
+
25
+ def self.extended(data)
26
+ setup_chains(data)
27
+
28
+ if not data.respond_to? :write
29
+ class << data
30
+ attr_accessor :writable
31
+
32
+ def close
33
+ end
34
+
35
+ def read(force = false)
36
+ @writable = false
37
+ self
38
+ end
39
+
40
+ def write(force = false)
41
+ @writable = true
42
+ self
43
+ end
44
+
45
+ def write?
46
+ @writable
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ KEY_PREFIX = "__tsv_hash_"
53
+
54
+ ENTRIES = []
55
+ ENTRY_KEYS = []
56
+
57
+ def serialized_get(key)
58
+ raise "Uninitialized serializer" if serializer == :type
59
+ serialized_value = tsv_clean_get_brackets(key)
60
+ SERIALIZER_ALIAS[serializer.to_sym].load(serialized_value) unless serialized_value.nil?
61
+ end
62
+
63
+ def serialized_set(key, value)
64
+ raise "Uninitialized serializer" if serializer == :type
65
+ if value.nil?
66
+ tsv_clean_set_brackets(key, nil)
67
+ else
68
+ tsv_clean_set_brackets(key, SERIALIZER_ALIAS[serializer.to_sym].dump(value))
69
+ end
70
+ end
71
+
72
+ #{{{ Chained Methods
73
+ def tsv_get_brackets(key)
74
+ value = if serializer.nil?
75
+ tsv_clean_get_brackets(key)
76
+ else
77
+ serialized_get(key)
78
+ end
79
+
80
+ NamedArray.setup value, fields if Array === value and not @unnamed
81
+ value
82
+ end
83
+
84
+ def tsv_set_brackets(key,value)
85
+ if serializer.nil?
86
+ tsv_clean_set_brackets(key, value)
87
+ else
88
+ serialized_set(key, value)
89
+ end
90
+ end
91
+
92
+ def tsv_keys
93
+ tsv_clean_keys - ENTRY_KEYS
94
+ end
95
+
96
+ def tsv_values
97
+ values = values_at(*keys)
98
+ values.each{|value| NamedArray.setup value, fields} if Array === values.first and not @unnamed
99
+ values
100
+ end
101
+
102
+ def tsv_each
103
+ tsv_clean_each do |key, value|
104
+ next if ENTRY_KEYS.include? key
105
+
106
+ value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil?
107
+ NamedArray.setup value, fields if Array === value and not @unnamed
108
+ yield key, value if block_given?
109
+ [key, value]
110
+ end
111
+ end
112
+
113
+ def tsv_collect
114
+ tsv_clean_collect do |key, value|
115
+ next if ENTRY_KEYS.include? key
116
+ value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil? or not String === value
117
+ NamedArray.setup value, fields if Array === value and not @unnamed
118
+ if block_given?
119
+ yield key, value
120
+ else
121
+ [key, value]
122
+ end
123
+ end
124
+ end
125
+
126
+ def tsv_size
127
+ keys.length
128
+ end
129
+
130
+ def tsv_length
131
+ keys.length
132
+ end
133
+
134
+ def tsv_values_at(*keys)
135
+ keys.collect do |key|
136
+ self[key]
137
+ end
138
+ end
139
+
140
+ #{{{ Sorting
141
+
142
+ def tsv_sort_by(field = nil, just_keys = false, &block)
143
+ field = :all if field.nil?
144
+ if field == :all
145
+ elems = collect
146
+ else
147
+ elems = []
148
+ through :key, field do |key, fields|
149
+ elems << [key, fields.first]
150
+ end
151
+ end
152
+
153
+ if not block_given?
154
+ if fields == :all
155
+ if just_keys
156
+ elems.sort_by{|key, value| key }.collect{|key, values| key}
157
+ else
158
+ elems.sort_by{|key, value| key }
159
+ end
160
+ else
161
+ if just_keys
162
+ elems.sort_by{|key, value| value }.collect{|key, value| key}
163
+ else
164
+ elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
165
+ end
166
+ end
167
+ else
168
+ if just_keys
169
+ elems.sort_by(&block).collect{|key, value| key}
170
+ else
171
+ elems.sort_by(&block).collect{|key, value| [key, self[key]]}
172
+ end
173
+ end
174
+ end
175
+
176
+ def tsv_sort(&block)
177
+ collect.sort &block
178
+ end
179
+
180
+ # Starts in page 1
181
+ def page(pnum, psize, field = nil, just_keys = false, &block)
182
+ if pnum.to_s =~ /-(.*)/
183
+ reverse = true
184
+ pnum = $1.to_i
185
+ else
186
+ reverse = false
187
+ end
188
+
189
+ with_unnamed do
190
+ pstart = psize * (pnum - 1)
191
+ pend = psize * pnum - 1
192
+ field = :key if field == "key"
193
+ keys = sort_by(field || :key, true, &block)
194
+ keys.reverse! if reverse
195
+
196
+ if just_keys
197
+ keys[pstart..pend]
198
+ else
199
+ select :key => keys[pstart..pend]
200
+ end
201
+ end
202
+ end
203
+
204
+
205
+ def self.entry(*entries)
206
+ entries = entries.collect{|entry| entry.to_s}
207
+ ENTRIES.concat entries
208
+ entries.each do |entry|
209
+ key = KEY_PREFIX + entry
210
+ ENTRY_KEYS << key
211
+ self.module_eval "
212
+ attr_accessor :#{entry}
213
+
214
+ def #{ entry }
215
+ if not defined? @#{entry}
216
+ @#{entry} = YAML.load(self.tsv_clean_get_brackets('#{key}') || nil.to_yaml)
217
+ end
218
+ @#{entry}
219
+ end
220
+
221
+ def #{ entry }=(value)
222
+ @#{entry} = value
223
+ self.tsv_clean_set_brackets '#{key}', value.to_yaml
224
+ end
225
+ "
226
+ end
227
+ end
228
+
229
+ entry :key_field,
230
+ :fields,
231
+ :type,
232
+ :cast,
233
+ :identifiers,
234
+ :namespace,
235
+ :filename,
236
+ :serializer
237
+
238
+ def fields
239
+ @fields ||= YAML.load(self.tsv_clean_get_brackets("__tsv_hash_fields") || nil.to_yaml)
240
+ if @fields.nil? or @unnamed
241
+ @fields
242
+ else
243
+ NamedArray.setup @fields, @fields
244
+ end
245
+ end
246
+
247
+ def self.zip_fields(list, fields = nil)
248
+ return [] if list.nil? || list.empty?
249
+ fields ||= list.fields if list.respond_to? :fields
250
+ zipped = list[0].zip(*list[1..-1])
251
+ zipped = zipped.collect{|v| NamedArray.setup(v, fields)} if fields
252
+ zipped
253
+ end
254
+
255
+ def identifier_files
256
+ case
257
+ when (identifiers and TSV === identifiers)
258
+ [identifiers]
259
+ when (identifiers and Array === identifiers)
260
+ case
261
+ when (TSV === identifiers.first or identifiers.empty?)
262
+ identifiers
263
+ when
264
+ identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
265
+ end
266
+ when identifiers
267
+ [ Path === identifiers ? identifiers : Path.setup(identifiers, nil, namespace) ]
268
+ when Path === filename
269
+ filename.identifier_files
270
+ when filename
271
+ Path.setup(filename).identifier_files
272
+ else
273
+ []
274
+ end
275
+ end
276
+
277
+ def options
278
+ options = {}
279
+ ENTRIES.each do |entry|
280
+ options[entry] = self.send(entry)
281
+ end
282
+ IndiferentHash.setup options
283
+ end
284
+
285
+
286
+ def all_fields
287
+ [key_field] + fields
288
+ end
289
+
290
+ def values_to_s(values)
291
+ case
292
+ when (values.nil? and fields.nil?)
293
+ "\n"
294
+ when (values.nil? and not fields.nil?)
295
+ "\t" << ([""] * fields.length) * "\t" << "\n"
296
+ when (not Array === values)
297
+ "\t" << values.to_s << "\n"
298
+ else
299
+ "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
300
+ end
301
+ end
302
+
303
+ def to_s(keys = nil, no_options = false)
304
+ if FalseClass === keys or TrueClass === keys
305
+ no_options = keys
306
+ keys = nil
307
+ end
308
+
309
+ if keys == :sort
310
+ keys = self.keys.sort
311
+ end
312
+
313
+ str = ""
314
+
315
+ str << "#: " << Misc.hash2string(ENTRIES.collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
316
+ if fields
317
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
318
+ end
319
+
320
+ saved_unnamed = @unnamed
321
+ @unnamed = false
322
+ if keys.nil?
323
+ each do |key, values|
324
+ key = key.to_s if Symbol === key
325
+ str << key.dup
326
+ str << values_to_s(values)
327
+ end
328
+ else
329
+ keys.zip(values_at(*keys)).each do |key, values|
330
+ key = key.to_s if Symbol === key
331
+ str << key.dup << values_to_s(values)
332
+ end
333
+ end
334
+
335
+ @unnamed = saved_unnamed
336
+ str
337
+ end
338
+
339
+ def value_peek
340
+ peek = {}
341
+ keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
342
+ peek
343
+ end
344
+ end
345
+
@@ -0,0 +1,183 @@
1
+ require 'rbbt/tsv'
2
+ require 'rbbt/tsv/attach/util'
3
+ module TSV
4
+
5
+ # Merge columns from different rows of a file
6
+ def self.merge_row_fields(input, output, sep = "\t")
7
+ is = case
8
+ when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
9
+ CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
10
+ when (String === input or StringIO === input)
11
+ CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
12
+ else
13
+ input
14
+ end
15
+
16
+ current_key = nil
17
+ current_parts = []
18
+
19
+ done = false
20
+ Open.write(output) do |os|
21
+
22
+ done = is.eof?
23
+ while not done
24
+ key, *parts = is.gets.sub("\n",'').split(sep, -1)
25
+ current_key ||= key
26
+ case
27
+ when key.nil?
28
+ when current_key == key
29
+ parts.each_with_index do |part,i|
30
+ if current_parts[i].nil?
31
+ current_parts[i] = part
32
+ else
33
+ current_parts[i] = current_parts[i] << "|" << part
34
+ end
35
+ end
36
+ when current_key != key
37
+ os.puts [current_key, current_parts].flatten * sep
38
+ current_key = key
39
+ current_parts = parts
40
+ end
41
+
42
+ done = is.eof?
43
+ end
44
+
45
+ end
46
+ end
47
+
48
+ # Merge two files with the same keys and different fields
49
+ def self.merge_different_fields(file1, file2, output, sep = "\t")
50
+ case
51
+ when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
52
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
53
+ when (String === file1 or StringIO === file1)
54
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
55
+ when TSV === file1
56
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
57
+ end
58
+
59
+ case
60
+ when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
61
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
62
+ when (String === file2 or StringIO === file2)
63
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
64
+ when TSV === file2
65
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
66
+ end
67
+
68
+ output = File.open(output, 'w') if String === output
69
+
70
+ cols1 = nil
71
+ cols2 = nil
72
+
73
+ done1 = false
74
+ done2 = false
75
+
76
+ key1 = key2 = nil
77
+ while key1.nil?
78
+ while (line1 = file1.gets) =~ /#/; end
79
+ key1, *parts1 = line1.sub("\n",'').split(sep, -1)
80
+ cols1 = parts1.length
81
+ end
82
+
83
+ while key2.nil?
84
+ while (line2 = file2.gets) =~ /#/; end
85
+ key2, *parts2 = line2.sub("\n",'').split(sep, -1)
86
+ cols2 = parts2.length
87
+ end
88
+
89
+ key = key1 < key2 ? key1 : key2
90
+ parts = [""] * (cols1 + cols2)
91
+ while not (done1 and done2)
92
+ while (not done1 and key1 == key)
93
+ parts1.each_with_index do |part, i|
94
+ parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
95
+ end
96
+ key1 = nil
97
+ while key1.nil? and not done1
98
+ if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
99
+ end
100
+ end
101
+ while (not done2 and key2 == key)
102
+ parts2.each_with_index do |part, i|
103
+ i += cols1
104
+ parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
105
+ end
106
+ key2 = nil
107
+ while key2.nil? and not done2
108
+ if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
109
+ end
110
+ end
111
+
112
+ output.puts [key, parts].flatten * sep
113
+ parts = [""] * (cols1 + cols2)
114
+
115
+ case
116
+ when done1
117
+ key = key2
118
+ when done2
119
+ key = key1
120
+ else
121
+ key = key1 < key2 ? key1 : key2
122
+ end
123
+ end
124
+
125
+ output.close
126
+ end
127
+
128
+ # Merge columns from different files
129
+ def self.merge_paste(files, delim = "$")
130
+ CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
131
+ end
132
+
133
+ def attach(other, options = {})
134
+ options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
135
+ fields, one2one = Misc.process_options options, :fields, :one2one
136
+ in_namespace = options[:in_namespace]
137
+
138
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil? or fields == :all
139
+ if in_namespace
140
+ fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
141
+ else
142
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
143
+ end
144
+
145
+ Log.medium("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
146
+
147
+ other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
148
+ case
149
+ when key_field == other.key_field
150
+ attach_same_key other, fields
151
+ when (not in_namespace and self.fields.include?(other.key_field))
152
+ Log.debug "Found other's key field: #{other.key_field}"
153
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
154
+ when (in_namespace and self.fields_in_namespace.include?(other.key_field))
155
+ Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
156
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
157
+ else
158
+ index = TSV.find_traversal(self, other, options)
159
+ raise "Cannot traverse identifiers" if index.nil?
160
+ attach_index other, index, fields
161
+ end
162
+ Log.debug("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
163
+
164
+ self
165
+ end
166
+
167
+ def detach(file)
168
+ file_fields = file.fields.collect{|field| field.fullname}
169
+ detached_fields = []
170
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
171
+ reorder :key, detached_fields
172
+ end
173
+
174
+ def merge_different_fields(other, options = {})
175
+ TmpFile.with_file do |output|
176
+ TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
177
+ tsv = TSV.open output, options
178
+ tsv.key_field = self.key_field unless self.key_field.nil?
179
+ tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
180
+ tsv
181
+ end
182
+ end
183
+ end