rbbt-util 3.0.3 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/rbbt_Rutil.rb +4 -0
- data/bin/rbbt_exec.rb +33 -0
- data/lib/rbbt/util/R.rb +3 -2
- data/lib/rbbt/util/cmd.rb +13 -2
- data/lib/rbbt/util/fix_width_table.rb +2 -0
- data/lib/rbbt/util/misc.rb +8 -1
- data/lib/rbbt/util/open.rb +2 -2
- data/lib/rbbt/util/persistence.rb +27 -18
- data/lib/rbbt/util/resource.rb +3 -2
- data/lib/rbbt/util/task.rb +77 -5
- data/lib/rbbt/util/task/job.rb +20 -4
- data/lib/rbbt/util/tc_hash.rb +2 -1
- data/lib/rbbt/util/tsv.rb +59 -33
- data/lib/rbbt/util/tsv/accessor.rb +27 -2
- data/lib/rbbt/util/tsv/attach.rb +48 -121
- data/lib/rbbt/util/tsv/index.rb +4 -0
- data/lib/rbbt/util/tsv/manipulate.rb +25 -3
- data/lib/rbbt/util/tsv/misc.rb +31 -0
- data/lib/rbbt/util/tsv/parse.rb +27 -4
- data/lib/rbbt/util/tsv/resource.rb +6 -0
- data/lib/rbbt/util/workflow.rb +1 -1
- data/lib/rbbt/util/workflow/soap.rb +117 -0
- data/share/lib/R/util.R +52 -2
- data/test/rbbt/util/test_misc.rb +11 -11
- data/test/rbbt/util/test_persistence.rb +13 -0
- data/test/rbbt/util/test_tc_hash.rb +4 -2
- data/test/rbbt/util/test_tsv.rb +31 -4
- data/test/rbbt/util/test_workflow.rb +11 -3
- data/test/rbbt/util/tsv/test_attach.rb +35 -1
- data/test/rbbt/util/tsv/test_index.rb +1 -3
- metadata +12 -6
data/lib/rbbt/util/tsv.rb
CHANGED
@@ -41,9 +41,6 @@ class TSV
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
-
def self.encapsulate_persistence(file, options)
|
45
|
-
end
|
46
|
-
|
47
44
|
def initialize(file = {}, type = nil, options = {})
|
48
45
|
# Process Options
|
49
46
|
|
@@ -58,7 +55,7 @@ class TSV
|
|
58
55
|
file = $1
|
59
56
|
end
|
60
57
|
|
61
|
-
options = Misc.add_defaults options, :persistence => false, :type => type
|
58
|
+
options = Misc.add_defaults options, :persistence => false, :type => type, :in_situ_persistence => true
|
62
59
|
|
63
60
|
# Extract Filename
|
64
61
|
|
@@ -114,50 +111,79 @@ class TSV
|
|
114
111
|
end
|
115
112
|
end
|
116
113
|
else
|
117
|
-
|
114
|
+
in_situ_persistence = Misc.process_options(options, :in_situ_persistence)
|
115
|
+
@data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename, persistence_file|
|
118
116
|
data, extra = nil
|
119
117
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
data
|
141
|
-
else
|
142
|
-
raise "Unknown input in TSV.new #{file.inspect}"
|
118
|
+
if in_situ_persistence and persistence_file
|
119
|
+
|
120
|
+
cast = options[:cast]
|
121
|
+
type = options[:type]
|
122
|
+
serializer = case
|
123
|
+
when ((cast == "to_i" or cast == :to_i) and type == :single)
|
124
|
+
:integer
|
125
|
+
when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
|
126
|
+
:integer_array
|
127
|
+
when type == :double
|
128
|
+
:double
|
129
|
+
when type == :list
|
130
|
+
:list
|
131
|
+
when type == :single
|
132
|
+
:single
|
133
|
+
else
|
134
|
+
:marshal
|
135
|
+
end
|
136
|
+
|
137
|
+
options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, serializer)
|
143
138
|
end
|
144
139
|
|
145
|
-
|
140
|
+
begin
|
141
|
+
case
|
142
|
+
## Parse source
|
143
|
+
when Resource::Path === file #(String === file and file.respond_to? :open)
|
144
|
+
data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
|
145
|
+
extra[:namespace] ||= file.namespace
|
146
|
+
extra[:datadir] ||= file.datadir
|
147
|
+
when StringIO === file
|
148
|
+
data, extra = TSV.parse(file, options)
|
149
|
+
when Open.can_open?(file)
|
150
|
+
Open.open(file, :grep => options[:grep]) do |f|
|
151
|
+
data, extra = TSV.parse(f, options)
|
152
|
+
end
|
153
|
+
when File === file
|
154
|
+
path = file.path
|
155
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
156
|
+
data, extra = TSV.parse(file, options)
|
157
|
+
when IO === file
|
158
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
159
|
+
data, extra = TSV.parse(file, options)
|
160
|
+
when block_given?
|
161
|
+
data
|
162
|
+
else
|
163
|
+
raise "Unknown input in TSV.new #{file.inspect}"
|
164
|
+
end
|
165
|
+
|
166
|
+
extra[:filename] = filename
|
167
|
+
rescue Exception
|
168
|
+
FileUtils.rm persistence_file if persistence_file and File.exists?(persistence_file)
|
169
|
+
raise $!
|
170
|
+
end
|
146
171
|
|
147
172
|
[data, extra]
|
148
173
|
end
|
149
174
|
end
|
150
175
|
end
|
151
176
|
|
152
|
-
if not extra.nil?
|
177
|
+
if not extra.nil?
|
153
178
|
%w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
|
154
179
|
if extra.include? key.to_sym
|
155
180
|
self.send("#{key}=".to_sym, extra[key.to_sym])
|
156
|
-
if @data.respond_to? "#{key}=".to_sym
|
157
|
-
|
158
|
-
end
|
181
|
+
#if @data.respond_to? "#{key}=".to_sym
|
182
|
+
# @data.send("#{key}=".to_sym, extra[key.to_sym])
|
183
|
+
#end
|
159
184
|
end
|
160
185
|
end
|
186
|
+
@data.read if Persistence::TSV === @data
|
161
187
|
end
|
162
188
|
end
|
163
189
|
|
@@ -85,6 +85,13 @@ class TSV
|
|
85
85
|
fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
86
86
|
end
|
87
87
|
|
88
|
+
def key_field
|
89
|
+
return nil if @key_field.nil?
|
90
|
+
k = @key_field.dup
|
91
|
+
k.extend Field
|
92
|
+
k
|
93
|
+
end
|
94
|
+
|
88
95
|
def fields
|
89
96
|
return nil if @fields.nil?
|
90
97
|
fds = @fields
|
@@ -113,7 +120,11 @@ class TSV
|
|
113
120
|
|
114
121
|
def self.identify_field(key, fields, field)
|
115
122
|
return field if Integer === field
|
116
|
-
|
123
|
+
if String === field
|
124
|
+
field = field.dup
|
125
|
+
field.extend Field
|
126
|
+
end
|
127
|
+
return :key if field.nil? or field == 0 or field.to_sym == :key or field == key
|
117
128
|
return nil if fields.nil?
|
118
129
|
return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
|
119
130
|
return fields.index field
|
@@ -136,7 +147,7 @@ class TSV
|
|
136
147
|
end
|
137
148
|
end if Array === new_fields
|
138
149
|
@fields = new_fields
|
139
|
-
@data.fields = new_fields if @data.respond_to? :fields=
|
150
|
+
@data.fields = new_fields if @data.respond_to? :fields= and @data.write?
|
140
151
|
end
|
141
152
|
|
142
153
|
def old_fields=(new_fields)
|
@@ -196,6 +207,10 @@ class TSV
|
|
196
207
|
follow @data[key]
|
197
208
|
end
|
198
209
|
|
210
|
+
def delete(key)
|
211
|
+
@data.delete(key)
|
212
|
+
end
|
213
|
+
|
199
214
|
def values_at(*keys)
|
200
215
|
keys.collect{|k|
|
201
216
|
self[k]
|
@@ -258,6 +273,10 @@ class TSV
|
|
258
273
|
keys = nil
|
259
274
|
end
|
260
275
|
|
276
|
+
if keys == :sort
|
277
|
+
keys = self.keys.sort
|
278
|
+
end
|
279
|
+
|
261
280
|
str = ""
|
262
281
|
|
263
282
|
str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n" unless no_options
|
@@ -279,4 +298,10 @@ class TSV
|
|
279
298
|
|
280
299
|
str
|
281
300
|
end
|
301
|
+
|
302
|
+
def value_peek
|
303
|
+
peek = {}
|
304
|
+
keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
|
305
|
+
peek
|
306
|
+
end
|
282
307
|
end
|
data/lib/rbbt/util/tsv/attach.rb
CHANGED
@@ -47,6 +47,8 @@ class TSV
|
|
47
47
|
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
48
48
|
when (String === file1 or StringIO === file1)
|
49
49
|
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
|
50
|
+
when TSV === file1
|
51
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
|
50
52
|
end
|
51
53
|
|
52
54
|
case
|
@@ -54,6 +56,8 @@ class TSV
|
|
54
56
|
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
|
55
57
|
when (String === file2 or StringIO === file2)
|
56
58
|
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
|
59
|
+
when TSV === file2
|
60
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
|
57
61
|
end
|
58
62
|
|
59
63
|
output = File.open(output, 'w') if String === output
|
@@ -115,110 +119,6 @@ class TSV
|
|
115
119
|
|
116
120
|
output.close
|
117
121
|
end
|
118
|
-
|
119
|
-
def self.paste(file1, file2, output, sep = "\t")
|
120
|
-
case
|
121
|
-
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
|
122
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } ", :pipe => true)
|
123
|
-
when String === file1
|
124
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file1, :pipe => true)
|
125
|
-
end
|
126
|
-
|
127
|
-
case
|
128
|
-
when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
|
129
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } ", :pipe => true)
|
130
|
-
when String === file2
|
131
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file2, :pipe => true)
|
132
|
-
end
|
133
|
-
|
134
|
-
output = File.open(output, 'w') if String === output
|
135
|
-
|
136
|
-
cols1 = nil
|
137
|
-
cols2 = nil
|
138
|
-
|
139
|
-
done1 = false
|
140
|
-
done2 = false
|
141
|
-
|
142
|
-
while (line1 = file1.gets) =~ /#/; end
|
143
|
-
line1.strip!
|
144
|
-
parts1 = line1.split(sep)
|
145
|
-
key1 = parts1.shift
|
146
|
-
cols1 = parts1.length
|
147
|
-
|
148
|
-
while (line2 = file2.gets) =~ /#/; end
|
149
|
-
line2.strip!
|
150
|
-
parts2 = line2.split(sep)
|
151
|
-
key2 = parts2.shift
|
152
|
-
cols2 = parts2.length
|
153
|
-
while not (done1 or done2)
|
154
|
-
case
|
155
|
-
when key1 < key2
|
156
|
-
output.puts [key1, parts1, [""] * cols2] * sep
|
157
|
-
if file1.eof?
|
158
|
-
done1 = true
|
159
|
-
else
|
160
|
-
line1 = file1.gets
|
161
|
-
line1.strip!
|
162
|
-
parts1 = line1.split(sep)
|
163
|
-
key1 = parts1.shift
|
164
|
-
end
|
165
|
-
when key2 < key1
|
166
|
-
output.puts [key2, [""] * cols1, parts2] * sep
|
167
|
-
if file2.eof?
|
168
|
-
done2 = true
|
169
|
-
else
|
170
|
-
line2 = file2.gets
|
171
|
-
line2.strip!
|
172
|
-
parts2 = line2.split(sep)
|
173
|
-
key2 = parts2.shift
|
174
|
-
end
|
175
|
-
when key1 == key2
|
176
|
-
output.puts [key1, parts1, parts2] * sep
|
177
|
-
if file1.eof?
|
178
|
-
done1 = true
|
179
|
-
else
|
180
|
-
line1 = file1.gets
|
181
|
-
line1.strip!
|
182
|
-
parts1 = line1.split(sep)
|
183
|
-
key1 = parts1.shift
|
184
|
-
end
|
185
|
-
if file2.eof?
|
186
|
-
done2 = true
|
187
|
-
else
|
188
|
-
line2 = file2.gets
|
189
|
-
line2.strip!
|
190
|
-
parts2 = line2.split(sep)
|
191
|
-
key2 = parts2.shift
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
while not done1
|
197
|
-
output.puts [key1, parts1, [""] * cols2] * sep
|
198
|
-
if file1.eof?
|
199
|
-
done1 = true
|
200
|
-
else
|
201
|
-
line1 = file1.gets
|
202
|
-
line1.strip!
|
203
|
-
parts1 = line1.split(sep)
|
204
|
-
key1 = parts1.shift
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
while not done2
|
209
|
-
output.puts [key2, [""] * cols1, parts2] * sep
|
210
|
-
if file2.eof?
|
211
|
-
done2 = true
|
212
|
-
else
|
213
|
-
line2 = file2.gets
|
214
|
-
line2.strip!
|
215
|
-
parts2 = line2.split(sep)
|
216
|
-
key2 = parts2.shift
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
output.close
|
221
|
-
end
|
222
122
|
#{{{ Attach Methods
|
223
123
|
|
224
124
|
def attach_same_key(other, fields = nil)
|
@@ -296,6 +196,8 @@ class TSV
|
|
296
196
|
field_positions = fields.collect{|field| other.identify_field field}
|
297
197
|
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
298
198
|
|
199
|
+
|
200
|
+
length = self.fields.length
|
299
201
|
through do |key, values|
|
300
202
|
source_keys = index[key]
|
301
203
|
if source_keys.nil? or source_keys.empty?
|
@@ -315,7 +217,7 @@ class TSV
|
|
315
217
|
other[source_key][pos]
|
316
218
|
end
|
317
219
|
end
|
318
|
-
new_values.collect!{|v| [v]}
|
220
|
+
new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
|
319
221
|
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
320
222
|
all_new_values << new_values
|
321
223
|
end
|
@@ -323,17 +225,28 @@ class TSV
|
|
323
225
|
|
324
226
|
if all_new_values.empty?
|
325
227
|
if type == :double
|
326
|
-
|
228
|
+
all_new_values = [[[]] * field_positions.length]
|
327
229
|
else
|
328
|
-
|
230
|
+
all_new_values = [[""] * field_positions.length]
|
329
231
|
end
|
232
|
+
end
|
233
|
+
|
234
|
+
current = self[key]
|
235
|
+
|
236
|
+
if current.length > length
|
237
|
+
all_new_values << current.slice!(length..current.length - 1)
|
238
|
+
end
|
239
|
+
|
240
|
+
if type == :double
|
241
|
+
all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
330
242
|
else
|
331
|
-
|
332
|
-
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
333
|
-
else
|
334
|
-
self[key] = self[key].concat all_new_values.first
|
335
|
-
end
|
243
|
+
all_new_values = all_new_values.first
|
336
244
|
end
|
245
|
+
|
246
|
+
current += all_new_values
|
247
|
+
|
248
|
+
self[key] = current
|
249
|
+
|
337
250
|
end
|
338
251
|
|
339
252
|
self.fields = self.fields.concat field_names
|
@@ -385,13 +298,13 @@ class TSV
|
|
385
298
|
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
386
299
|
|
387
300
|
data_key, data_file = path.shift
|
388
|
-
if data_key == data_file.key_field
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
301
|
+
data_index = if data_key == data_file.key_field
|
302
|
+
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
303
|
+
nil
|
304
|
+
else
|
305
|
+
Log.debug "Data index required"
|
306
|
+
data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
|
307
|
+
end
|
395
308
|
|
396
309
|
current_index = data_index
|
397
310
|
current_key = data_key
|
@@ -402,7 +315,7 @@ class TSV
|
|
402
315
|
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
403
316
|
else
|
404
317
|
next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
405
|
-
current_index.process current_index.fields.first do |
|
318
|
+
current_index.process current_index.fields.first do |values|
|
406
319
|
if values.nil?
|
407
320
|
nil
|
408
321
|
else
|
@@ -411,6 +324,7 @@ class TSV
|
|
411
324
|
end
|
412
325
|
current_index.fields = [next_key]
|
413
326
|
end
|
327
|
+
current_key = next_key
|
414
328
|
end
|
415
329
|
|
416
330
|
current_index
|
@@ -470,6 +384,8 @@ class TSV
|
|
470
384
|
attach_index other, index, fields
|
471
385
|
end
|
472
386
|
Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
387
|
+
|
388
|
+
self
|
473
389
|
end
|
474
390
|
|
475
391
|
def detach(file)
|
@@ -489,10 +405,21 @@ class TSV
|
|
489
405
|
if self.fields and other.fields
|
490
406
|
new.fields = self.fields + other.fields
|
491
407
|
end
|
492
|
-
|
408
|
+
|
493
409
|
FileUtils.rm tmpfile if File.exists? tmpfile
|
494
410
|
|
495
411
|
new
|
496
412
|
end
|
497
413
|
|
414
|
+
|
415
|
+
def paste(other, options = {})
|
416
|
+
TmpFile.with_file do |output|
|
417
|
+
TSV.paste_merge(self, other, output, options[:sep] || "\t")
|
418
|
+
TSV.new output, options
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
def self.fast_paste(files, delim = "$")
|
423
|
+
CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
|
424
|
+
end
|
498
425
|
end
|
data/lib/rbbt/util/tsv/index.rb
CHANGED
@@ -34,6 +34,7 @@ class TSV
|
|
34
34
|
list = [list] unless Array === list
|
35
35
|
i += 1 if fields.nil?
|
36
36
|
list.each do |elem|
|
37
|
+
next if elem.empty?
|
37
38
|
elem.downcase if case_insensitive
|
38
39
|
new[elem] ||= []
|
39
40
|
new[elem][i] ||= []
|
@@ -73,6 +74,7 @@ class TSV
|
|
73
74
|
end
|
74
75
|
list.collect!{|e| e.downcase} if case_insensitive
|
75
76
|
list.each do |elem|
|
77
|
+
next if elem.empty?
|
76
78
|
new[elem] ||= []
|
77
79
|
if double_keys
|
78
80
|
new[elem].concat key
|
@@ -233,6 +235,8 @@ class TSV
|
|
233
235
|
|
234
236
|
def self.field_matches(tsv, values)
|
235
237
|
values = [values] if not Array === values
|
238
|
+
Log.debug "Matcing #{values.length} values to #{tsv.filename}"
|
239
|
+
|
236
240
|
if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
|
237
241
|
return {}
|
238
242
|
end
|