rbbt-util 3.0.3 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rbbt_Rutil.rb +4 -0
- data/bin/rbbt_exec.rb +33 -0
- data/lib/rbbt/util/R.rb +3 -2
- data/lib/rbbt/util/cmd.rb +13 -2
- data/lib/rbbt/util/fix_width_table.rb +2 -0
- data/lib/rbbt/util/misc.rb +8 -1
- data/lib/rbbt/util/open.rb +2 -2
- data/lib/rbbt/util/persistence.rb +27 -18
- data/lib/rbbt/util/resource.rb +3 -2
- data/lib/rbbt/util/task.rb +77 -5
- data/lib/rbbt/util/task/job.rb +20 -4
- data/lib/rbbt/util/tc_hash.rb +2 -1
- data/lib/rbbt/util/tsv.rb +59 -33
- data/lib/rbbt/util/tsv/accessor.rb +27 -2
- data/lib/rbbt/util/tsv/attach.rb +48 -121
- data/lib/rbbt/util/tsv/index.rb +4 -0
- data/lib/rbbt/util/tsv/manipulate.rb +25 -3
- data/lib/rbbt/util/tsv/misc.rb +31 -0
- data/lib/rbbt/util/tsv/parse.rb +27 -4
- data/lib/rbbt/util/tsv/resource.rb +6 -0
- data/lib/rbbt/util/workflow.rb +1 -1
- data/lib/rbbt/util/workflow/soap.rb +117 -0
- data/share/lib/R/util.R +52 -2
- data/test/rbbt/util/test_misc.rb +11 -11
- data/test/rbbt/util/test_persistence.rb +13 -0
- data/test/rbbt/util/test_tc_hash.rb +4 -2
- data/test/rbbt/util/test_tsv.rb +31 -4
- data/test/rbbt/util/test_workflow.rb +11 -3
- data/test/rbbt/util/tsv/test_attach.rb +35 -1
- data/test/rbbt/util/tsv/test_index.rb +1 -3
- metadata +12 -6
data/lib/rbbt/util/tsv.rb
CHANGED
@@ -41,9 +41,6 @@ class TSV
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
-
def self.encapsulate_persistence(file, options)
|
45
|
-
end
|
46
|
-
|
47
44
|
def initialize(file = {}, type = nil, options = {})
|
48
45
|
# Process Options
|
49
46
|
|
@@ -58,7 +55,7 @@ class TSV
|
|
58
55
|
file = $1
|
59
56
|
end
|
60
57
|
|
61
|
-
options = Misc.add_defaults options, :persistence => false, :type => type
|
58
|
+
options = Misc.add_defaults options, :persistence => false, :type => type, :in_situ_persistence => true
|
62
59
|
|
63
60
|
# Extract Filename
|
64
61
|
|
@@ -114,50 +111,79 @@ class TSV
|
|
114
111
|
end
|
115
112
|
end
|
116
113
|
else
|
117
|
-
|
114
|
+
in_situ_persistence = Misc.process_options(options, :in_situ_persistence)
|
115
|
+
@data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename, persistence_file|
|
118
116
|
data, extra = nil
|
119
117
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
data
|
141
|
-
else
|
142
|
-
raise "Unknown input in TSV.new #{file.inspect}"
|
118
|
+
if in_situ_persistence and persistence_file
|
119
|
+
|
120
|
+
cast = options[:cast]
|
121
|
+
type = options[:type]
|
122
|
+
serializer = case
|
123
|
+
when ((cast == "to_i" or cast == :to_i) and type == :single)
|
124
|
+
:integer
|
125
|
+
when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
|
126
|
+
:integer_array
|
127
|
+
when type == :double
|
128
|
+
:double
|
129
|
+
when type == :list
|
130
|
+
:list
|
131
|
+
when type == :single
|
132
|
+
:single
|
133
|
+
else
|
134
|
+
:marshal
|
135
|
+
end
|
136
|
+
|
137
|
+
options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, serializer)
|
143
138
|
end
|
144
139
|
|
145
|
-
|
140
|
+
begin
|
141
|
+
case
|
142
|
+
## Parse source
|
143
|
+
when Resource::Path === file #(String === file and file.respond_to? :open)
|
144
|
+
data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
|
145
|
+
extra[:namespace] ||= file.namespace
|
146
|
+
extra[:datadir] ||= file.datadir
|
147
|
+
when StringIO === file
|
148
|
+
data, extra = TSV.parse(file, options)
|
149
|
+
when Open.can_open?(file)
|
150
|
+
Open.open(file, :grep => options[:grep]) do |f|
|
151
|
+
data, extra = TSV.parse(f, options)
|
152
|
+
end
|
153
|
+
when File === file
|
154
|
+
path = file.path
|
155
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
156
|
+
data, extra = TSV.parse(file, options)
|
157
|
+
when IO === file
|
158
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
159
|
+
data, extra = TSV.parse(file, options)
|
160
|
+
when block_given?
|
161
|
+
data
|
162
|
+
else
|
163
|
+
raise "Unknown input in TSV.new #{file.inspect}"
|
164
|
+
end
|
165
|
+
|
166
|
+
extra[:filename] = filename
|
167
|
+
rescue Exception
|
168
|
+
FileUtils.rm persistence_file if persistence_file and File.exists?(persistence_file)
|
169
|
+
raise $!
|
170
|
+
end
|
146
171
|
|
147
172
|
[data, extra]
|
148
173
|
end
|
149
174
|
end
|
150
175
|
end
|
151
176
|
|
152
|
-
if not extra.nil?
|
177
|
+
if not extra.nil?
|
153
178
|
%w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
|
154
179
|
if extra.include? key.to_sym
|
155
180
|
self.send("#{key}=".to_sym, extra[key.to_sym])
|
156
|
-
if @data.respond_to? "#{key}=".to_sym
|
157
|
-
|
158
|
-
end
|
181
|
+
#if @data.respond_to? "#{key}=".to_sym
|
182
|
+
# @data.send("#{key}=".to_sym, extra[key.to_sym])
|
183
|
+
#end
|
159
184
|
end
|
160
185
|
end
|
186
|
+
@data.read if Persistence::TSV === @data
|
161
187
|
end
|
162
188
|
end
|
163
189
|
|
@@ -85,6 +85,13 @@ class TSV
|
|
85
85
|
fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
86
86
|
end
|
87
87
|
|
88
|
+
def key_field
|
89
|
+
return nil if @key_field.nil?
|
90
|
+
k = @key_field.dup
|
91
|
+
k.extend Field
|
92
|
+
k
|
93
|
+
end
|
94
|
+
|
88
95
|
def fields
|
89
96
|
return nil if @fields.nil?
|
90
97
|
fds = @fields
|
@@ -113,7 +120,11 @@ class TSV
|
|
113
120
|
|
114
121
|
def self.identify_field(key, fields, field)
|
115
122
|
return field if Integer === field
|
116
|
-
|
123
|
+
if String === field
|
124
|
+
field = field.dup
|
125
|
+
field.extend Field
|
126
|
+
end
|
127
|
+
return :key if field.nil? or field == 0 or field.to_sym == :key or field == key
|
117
128
|
return nil if fields.nil?
|
118
129
|
return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
|
119
130
|
return fields.index field
|
@@ -136,7 +147,7 @@ class TSV
|
|
136
147
|
end
|
137
148
|
end if Array === new_fields
|
138
149
|
@fields = new_fields
|
139
|
-
@data.fields = new_fields if @data.respond_to? :fields=
|
150
|
+
@data.fields = new_fields if @data.respond_to? :fields= and @data.write?
|
140
151
|
end
|
141
152
|
|
142
153
|
def old_fields=(new_fields)
|
@@ -196,6 +207,10 @@ class TSV
|
|
196
207
|
follow @data[key]
|
197
208
|
end
|
198
209
|
|
210
|
+
def delete(key)
|
211
|
+
@data.delete(key)
|
212
|
+
end
|
213
|
+
|
199
214
|
def values_at(*keys)
|
200
215
|
keys.collect{|k|
|
201
216
|
self[k]
|
@@ -258,6 +273,10 @@ class TSV
|
|
258
273
|
keys = nil
|
259
274
|
end
|
260
275
|
|
276
|
+
if keys == :sort
|
277
|
+
keys = self.keys.sort
|
278
|
+
end
|
279
|
+
|
261
280
|
str = ""
|
262
281
|
|
263
282
|
str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n" unless no_options
|
@@ -279,4 +298,10 @@ class TSV
|
|
279
298
|
|
280
299
|
str
|
281
300
|
end
|
301
|
+
|
302
|
+
def value_peek
|
303
|
+
peek = {}
|
304
|
+
keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
|
305
|
+
peek
|
306
|
+
end
|
282
307
|
end
|
data/lib/rbbt/util/tsv/attach.rb
CHANGED
@@ -47,6 +47,8 @@ class TSV
|
|
47
47
|
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
48
48
|
when (String === file1 or StringIO === file1)
|
49
49
|
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
|
50
|
+
when TSV === file1
|
51
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
|
50
52
|
end
|
51
53
|
|
52
54
|
case
|
@@ -54,6 +56,8 @@ class TSV
|
|
54
56
|
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
|
55
57
|
when (String === file2 or StringIO === file2)
|
56
58
|
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
|
59
|
+
when TSV === file2
|
60
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
|
57
61
|
end
|
58
62
|
|
59
63
|
output = File.open(output, 'w') if String === output
|
@@ -115,110 +119,6 @@ class TSV
|
|
115
119
|
|
116
120
|
output.close
|
117
121
|
end
|
118
|
-
|
119
|
-
def self.paste(file1, file2, output, sep = "\t")
|
120
|
-
case
|
121
|
-
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
|
122
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } ", :pipe => true)
|
123
|
-
when String === file1
|
124
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file1, :pipe => true)
|
125
|
-
end
|
126
|
-
|
127
|
-
case
|
128
|
-
when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
|
129
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } ", :pipe => true)
|
130
|
-
when String === file2
|
131
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file2, :pipe => true)
|
132
|
-
end
|
133
|
-
|
134
|
-
output = File.open(output, 'w') if String === output
|
135
|
-
|
136
|
-
cols1 = nil
|
137
|
-
cols2 = nil
|
138
|
-
|
139
|
-
done1 = false
|
140
|
-
done2 = false
|
141
|
-
|
142
|
-
while (line1 = file1.gets) =~ /#/; end
|
143
|
-
line1.strip!
|
144
|
-
parts1 = line1.split(sep)
|
145
|
-
key1 = parts1.shift
|
146
|
-
cols1 = parts1.length
|
147
|
-
|
148
|
-
while (line2 = file2.gets) =~ /#/; end
|
149
|
-
line2.strip!
|
150
|
-
parts2 = line2.split(sep)
|
151
|
-
key2 = parts2.shift
|
152
|
-
cols2 = parts2.length
|
153
|
-
while not (done1 or done2)
|
154
|
-
case
|
155
|
-
when key1 < key2
|
156
|
-
output.puts [key1, parts1, [""] * cols2] * sep
|
157
|
-
if file1.eof?
|
158
|
-
done1 = true
|
159
|
-
else
|
160
|
-
line1 = file1.gets
|
161
|
-
line1.strip!
|
162
|
-
parts1 = line1.split(sep)
|
163
|
-
key1 = parts1.shift
|
164
|
-
end
|
165
|
-
when key2 < key1
|
166
|
-
output.puts [key2, [""] * cols1, parts2] * sep
|
167
|
-
if file2.eof?
|
168
|
-
done2 = true
|
169
|
-
else
|
170
|
-
line2 = file2.gets
|
171
|
-
line2.strip!
|
172
|
-
parts2 = line2.split(sep)
|
173
|
-
key2 = parts2.shift
|
174
|
-
end
|
175
|
-
when key1 == key2
|
176
|
-
output.puts [key1, parts1, parts2] * sep
|
177
|
-
if file1.eof?
|
178
|
-
done1 = true
|
179
|
-
else
|
180
|
-
line1 = file1.gets
|
181
|
-
line1.strip!
|
182
|
-
parts1 = line1.split(sep)
|
183
|
-
key1 = parts1.shift
|
184
|
-
end
|
185
|
-
if file2.eof?
|
186
|
-
done2 = true
|
187
|
-
else
|
188
|
-
line2 = file2.gets
|
189
|
-
line2.strip!
|
190
|
-
parts2 = line2.split(sep)
|
191
|
-
key2 = parts2.shift
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
while not done1
|
197
|
-
output.puts [key1, parts1, [""] * cols2] * sep
|
198
|
-
if file1.eof?
|
199
|
-
done1 = true
|
200
|
-
else
|
201
|
-
line1 = file1.gets
|
202
|
-
line1.strip!
|
203
|
-
parts1 = line1.split(sep)
|
204
|
-
key1 = parts1.shift
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
while not done2
|
209
|
-
output.puts [key2, [""] * cols1, parts2] * sep
|
210
|
-
if file2.eof?
|
211
|
-
done2 = true
|
212
|
-
else
|
213
|
-
line2 = file2.gets
|
214
|
-
line2.strip!
|
215
|
-
parts2 = line2.split(sep)
|
216
|
-
key2 = parts2.shift
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
output.close
|
221
|
-
end
|
222
122
|
#{{{ Attach Methods
|
223
123
|
|
224
124
|
def attach_same_key(other, fields = nil)
|
@@ -296,6 +196,8 @@ class TSV
|
|
296
196
|
field_positions = fields.collect{|field| other.identify_field field}
|
297
197
|
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
298
198
|
|
199
|
+
|
200
|
+
length = self.fields.length
|
299
201
|
through do |key, values|
|
300
202
|
source_keys = index[key]
|
301
203
|
if source_keys.nil? or source_keys.empty?
|
@@ -315,7 +217,7 @@ class TSV
|
|
315
217
|
other[source_key][pos]
|
316
218
|
end
|
317
219
|
end
|
318
|
-
new_values.collect!{|v| [v]}
|
220
|
+
new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
|
319
221
|
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
320
222
|
all_new_values << new_values
|
321
223
|
end
|
@@ -323,17 +225,28 @@ class TSV
|
|
323
225
|
|
324
226
|
if all_new_values.empty?
|
325
227
|
if type == :double
|
326
|
-
|
228
|
+
all_new_values = [[[]] * field_positions.length]
|
327
229
|
else
|
328
|
-
|
230
|
+
all_new_values = [[""] * field_positions.length]
|
329
231
|
end
|
232
|
+
end
|
233
|
+
|
234
|
+
current = self[key]
|
235
|
+
|
236
|
+
if current.length > length
|
237
|
+
all_new_values << current.slice!(length..current.length - 1)
|
238
|
+
end
|
239
|
+
|
240
|
+
if type == :double
|
241
|
+
all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
330
242
|
else
|
331
|
-
|
332
|
-
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
333
|
-
else
|
334
|
-
self[key] = self[key].concat all_new_values.first
|
335
|
-
end
|
243
|
+
all_new_values = all_new_values.first
|
336
244
|
end
|
245
|
+
|
246
|
+
current += all_new_values
|
247
|
+
|
248
|
+
self[key] = current
|
249
|
+
|
337
250
|
end
|
338
251
|
|
339
252
|
self.fields = self.fields.concat field_names
|
@@ -385,13 +298,13 @@ class TSV
|
|
385
298
|
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
386
299
|
|
387
300
|
data_key, data_file = path.shift
|
388
|
-
if data_key == data_file.key_field
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
301
|
+
data_index = if data_key == data_file.key_field
|
302
|
+
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
303
|
+
nil
|
304
|
+
else
|
305
|
+
Log.debug "Data index required"
|
306
|
+
data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
|
307
|
+
end
|
395
308
|
|
396
309
|
current_index = data_index
|
397
310
|
current_key = data_key
|
@@ -402,7 +315,7 @@ class TSV
|
|
402
315
|
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
403
316
|
else
|
404
317
|
next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
405
|
-
current_index.process current_index.fields.first do |
|
318
|
+
current_index.process current_index.fields.first do |values|
|
406
319
|
if values.nil?
|
407
320
|
nil
|
408
321
|
else
|
@@ -411,6 +324,7 @@ class TSV
|
|
411
324
|
end
|
412
325
|
current_index.fields = [next_key]
|
413
326
|
end
|
327
|
+
current_key = next_key
|
414
328
|
end
|
415
329
|
|
416
330
|
current_index
|
@@ -470,6 +384,8 @@ class TSV
|
|
470
384
|
attach_index other, index, fields
|
471
385
|
end
|
472
386
|
Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
387
|
+
|
388
|
+
self
|
473
389
|
end
|
474
390
|
|
475
391
|
def detach(file)
|
@@ -489,10 +405,21 @@ class TSV
|
|
489
405
|
if self.fields and other.fields
|
490
406
|
new.fields = self.fields + other.fields
|
491
407
|
end
|
492
|
-
|
408
|
+
|
493
409
|
FileUtils.rm tmpfile if File.exists? tmpfile
|
494
410
|
|
495
411
|
new
|
496
412
|
end
|
497
413
|
|
414
|
+
|
415
|
+
def paste(other, options = {})
|
416
|
+
TmpFile.with_file do |output|
|
417
|
+
TSV.paste_merge(self, other, output, options[:sep] || "\t")
|
418
|
+
TSV.new output, options
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
def self.fast_paste(files, delim = "$")
|
423
|
+
CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
|
424
|
+
end
|
498
425
|
end
|
data/lib/rbbt/util/tsv/index.rb
CHANGED
@@ -34,6 +34,7 @@ class TSV
|
|
34
34
|
list = [list] unless Array === list
|
35
35
|
i += 1 if fields.nil?
|
36
36
|
list.each do |elem|
|
37
|
+
next if elem.empty?
|
37
38
|
elem.downcase if case_insensitive
|
38
39
|
new[elem] ||= []
|
39
40
|
new[elem][i] ||= []
|
@@ -73,6 +74,7 @@ class TSV
|
|
73
74
|
end
|
74
75
|
list.collect!{|e| e.downcase} if case_insensitive
|
75
76
|
list.each do |elem|
|
77
|
+
next if elem.empty?
|
76
78
|
new[elem] ||= []
|
77
79
|
if double_keys
|
78
80
|
new[elem].concat key
|
@@ -233,6 +235,8 @@ class TSV
|
|
233
235
|
|
234
236
|
def self.field_matches(tsv, values)
|
235
237
|
values = [values] if not Array === values
|
238
|
+
Log.debug "Matcing #{values.length} values to #{tsv.filename}"
|
239
|
+
|
236
240
|
if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
|
237
241
|
return {}
|
238
242
|
end
|