rbbt-util 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,345 @@
|
|
1
|
+
require 'rbbt/util/chain_methods'
|
2
|
+
|
3
|
+
module TSV
|
4
|
+
extend ChainMethods
|
5
|
+
self.chain_prefix = :tsv
|
6
|
+
|
7
|
+
attr_accessor :unnamed
|
8
|
+
|
9
|
+
def with_unnamed
|
10
|
+
saved_unnamed = @unnamed
|
11
|
+
@unnamed = true
|
12
|
+
res = yield
|
13
|
+
@unnamed = saved_unnamed
|
14
|
+
res
|
15
|
+
end
|
16
|
+
|
17
|
+
def with_monitor(value = true)
|
18
|
+
saved_monitor = @monitor
|
19
|
+
@monitor = value
|
20
|
+
res = yield
|
21
|
+
@monitor = saved_monitor
|
22
|
+
res
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.extended(data)
|
26
|
+
setup_chains(data)
|
27
|
+
|
28
|
+
if not data.respond_to? :write
|
29
|
+
class << data
|
30
|
+
attr_accessor :writable
|
31
|
+
|
32
|
+
def close
|
33
|
+
end
|
34
|
+
|
35
|
+
def read(force = false)
|
36
|
+
@writable = false
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def write(force = false)
|
41
|
+
@writable = true
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
def write?
|
46
|
+
@writable
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
KEY_PREFIX = "__tsv_hash_"
|
53
|
+
|
54
|
+
ENTRIES = []
|
55
|
+
ENTRY_KEYS = []
|
56
|
+
|
57
|
+
def serialized_get(key)
|
58
|
+
raise "Uninitialized serializer" if serializer == :type
|
59
|
+
serialized_value = tsv_clean_get_brackets(key)
|
60
|
+
SERIALIZER_ALIAS[serializer.to_sym].load(serialized_value) unless serialized_value.nil?
|
61
|
+
end
|
62
|
+
|
63
|
+
def serialized_set(key, value)
|
64
|
+
raise "Uninitialized serializer" if serializer == :type
|
65
|
+
if value.nil?
|
66
|
+
tsv_clean_set_brackets(key, nil)
|
67
|
+
else
|
68
|
+
tsv_clean_set_brackets(key, SERIALIZER_ALIAS[serializer.to_sym].dump(value))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
#{{{ Chained Methods
|
73
|
+
def tsv_get_brackets(key)
|
74
|
+
value = if serializer.nil?
|
75
|
+
tsv_clean_get_brackets(key)
|
76
|
+
else
|
77
|
+
serialized_get(key)
|
78
|
+
end
|
79
|
+
|
80
|
+
NamedArray.setup value, fields if Array === value and not @unnamed
|
81
|
+
value
|
82
|
+
end
|
83
|
+
|
84
|
+
def tsv_set_brackets(key,value)
|
85
|
+
if serializer.nil?
|
86
|
+
tsv_clean_set_brackets(key, value)
|
87
|
+
else
|
88
|
+
serialized_set(key, value)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def tsv_keys
|
93
|
+
tsv_clean_keys - ENTRY_KEYS
|
94
|
+
end
|
95
|
+
|
96
|
+
def tsv_values
|
97
|
+
values = values_at(*keys)
|
98
|
+
values.each{|value| NamedArray.setup value, fields} if Array === values.first and not @unnamed
|
99
|
+
values
|
100
|
+
end
|
101
|
+
|
102
|
+
def tsv_each
|
103
|
+
tsv_clean_each do |key, value|
|
104
|
+
next if ENTRY_KEYS.include? key
|
105
|
+
|
106
|
+
value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil?
|
107
|
+
NamedArray.setup value, fields if Array === value and not @unnamed
|
108
|
+
yield key, value if block_given?
|
109
|
+
[key, value]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def tsv_collect
|
114
|
+
tsv_clean_collect do |key, value|
|
115
|
+
next if ENTRY_KEYS.include? key
|
116
|
+
value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil? or not String === value
|
117
|
+
NamedArray.setup value, fields if Array === value and not @unnamed
|
118
|
+
if block_given?
|
119
|
+
yield key, value
|
120
|
+
else
|
121
|
+
[key, value]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def tsv_size
|
127
|
+
keys.length
|
128
|
+
end
|
129
|
+
|
130
|
+
def tsv_length
|
131
|
+
keys.length
|
132
|
+
end
|
133
|
+
|
134
|
+
def tsv_values_at(*keys)
|
135
|
+
keys.collect do |key|
|
136
|
+
self[key]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
#{{{ Sorting
|
141
|
+
|
142
|
+
def tsv_sort_by(field = nil, just_keys = false, &block)
|
143
|
+
field = :all if field.nil?
|
144
|
+
if field == :all
|
145
|
+
elems = collect
|
146
|
+
else
|
147
|
+
elems = []
|
148
|
+
through :key, field do |key, fields|
|
149
|
+
elems << [key, fields.first]
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
if not block_given?
|
154
|
+
if fields == :all
|
155
|
+
if just_keys
|
156
|
+
elems.sort_by{|key, value| key }.collect{|key, values| key}
|
157
|
+
else
|
158
|
+
elems.sort_by{|key, value| key }
|
159
|
+
end
|
160
|
+
else
|
161
|
+
if just_keys
|
162
|
+
elems.sort_by{|key, value| value }.collect{|key, value| key}
|
163
|
+
else
|
164
|
+
elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
|
165
|
+
end
|
166
|
+
end
|
167
|
+
else
|
168
|
+
if just_keys
|
169
|
+
elems.sort_by(&block).collect{|key, value| key}
|
170
|
+
else
|
171
|
+
elems.sort_by(&block).collect{|key, value| [key, self[key]]}
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def tsv_sort(&block)
|
177
|
+
collect.sort &block
|
178
|
+
end
|
179
|
+
|
180
|
+
# Starts in page 1
|
181
|
+
def page(pnum, psize, field = nil, just_keys = false, &block)
|
182
|
+
if pnum.to_s =~ /-(.*)/
|
183
|
+
reverse = true
|
184
|
+
pnum = $1.to_i
|
185
|
+
else
|
186
|
+
reverse = false
|
187
|
+
end
|
188
|
+
|
189
|
+
with_unnamed do
|
190
|
+
pstart = psize * (pnum - 1)
|
191
|
+
pend = psize * pnum - 1
|
192
|
+
field = :key if field == "key"
|
193
|
+
keys = sort_by(field || :key, true, &block)
|
194
|
+
keys.reverse! if reverse
|
195
|
+
|
196
|
+
if just_keys
|
197
|
+
keys[pstart..pend]
|
198
|
+
else
|
199
|
+
select :key => keys[pstart..pend]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
def self.entry(*entries)
|
206
|
+
entries = entries.collect{|entry| entry.to_s}
|
207
|
+
ENTRIES.concat entries
|
208
|
+
entries.each do |entry|
|
209
|
+
key = KEY_PREFIX + entry
|
210
|
+
ENTRY_KEYS << key
|
211
|
+
self.module_eval "
|
212
|
+
attr_accessor :#{entry}
|
213
|
+
|
214
|
+
def #{ entry }
|
215
|
+
if not defined? @#{entry}
|
216
|
+
@#{entry} = YAML.load(self.tsv_clean_get_brackets('#{key}') || nil.to_yaml)
|
217
|
+
end
|
218
|
+
@#{entry}
|
219
|
+
end
|
220
|
+
|
221
|
+
def #{ entry }=(value)
|
222
|
+
@#{entry} = value
|
223
|
+
self.tsv_clean_set_brackets '#{key}', value.to_yaml
|
224
|
+
end
|
225
|
+
"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
entry :key_field,
|
230
|
+
:fields,
|
231
|
+
:type,
|
232
|
+
:cast,
|
233
|
+
:identifiers,
|
234
|
+
:namespace,
|
235
|
+
:filename,
|
236
|
+
:serializer
|
237
|
+
|
238
|
+
def fields
|
239
|
+
@fields ||= YAML.load(self.tsv_clean_get_brackets("__tsv_hash_fields") || nil.to_yaml)
|
240
|
+
if @fields.nil? or @unnamed
|
241
|
+
@fields
|
242
|
+
else
|
243
|
+
NamedArray.setup @fields, @fields
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def self.zip_fields(list, fields = nil)
|
248
|
+
return [] if list.nil? || list.empty?
|
249
|
+
fields ||= list.fields if list.respond_to? :fields
|
250
|
+
zipped = list[0].zip(*list[1..-1])
|
251
|
+
zipped = zipped.collect{|v| NamedArray.setup(v, fields)} if fields
|
252
|
+
zipped
|
253
|
+
end
|
254
|
+
|
255
|
+
def identifier_files
|
256
|
+
case
|
257
|
+
when (identifiers and TSV === identifiers)
|
258
|
+
[identifiers]
|
259
|
+
when (identifiers and Array === identifiers)
|
260
|
+
case
|
261
|
+
when (TSV === identifiers.first or identifiers.empty?)
|
262
|
+
identifiers
|
263
|
+
when
|
264
|
+
identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
|
265
|
+
end
|
266
|
+
when identifiers
|
267
|
+
[ Path === identifiers ? identifiers : Path.setup(identifiers, nil, namespace) ]
|
268
|
+
when Path === filename
|
269
|
+
filename.identifier_files
|
270
|
+
when filename
|
271
|
+
Path.setup(filename).identifier_files
|
272
|
+
else
|
273
|
+
[]
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def options
|
278
|
+
options = {}
|
279
|
+
ENTRIES.each do |entry|
|
280
|
+
options[entry] = self.send(entry)
|
281
|
+
end
|
282
|
+
IndiferentHash.setup options
|
283
|
+
end
|
284
|
+
|
285
|
+
|
286
|
+
def all_fields
|
287
|
+
[key_field] + fields
|
288
|
+
end
|
289
|
+
|
290
|
+
def values_to_s(values)
|
291
|
+
case
|
292
|
+
when (values.nil? and fields.nil?)
|
293
|
+
"\n"
|
294
|
+
when (values.nil? and not fields.nil?)
|
295
|
+
"\t" << ([""] * fields.length) * "\t" << "\n"
|
296
|
+
when (not Array === values)
|
297
|
+
"\t" << values.to_s << "\n"
|
298
|
+
else
|
299
|
+
"\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def to_s(keys = nil, no_options = false)
|
304
|
+
if FalseClass === keys or TrueClass === keys
|
305
|
+
no_options = keys
|
306
|
+
keys = nil
|
307
|
+
end
|
308
|
+
|
309
|
+
if keys == :sort
|
310
|
+
keys = self.keys.sort
|
311
|
+
end
|
312
|
+
|
313
|
+
str = ""
|
314
|
+
|
315
|
+
str << "#: " << Misc.hash2string(ENTRIES.collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
|
316
|
+
if fields
|
317
|
+
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
318
|
+
end
|
319
|
+
|
320
|
+
saved_unnamed = @unnamed
|
321
|
+
@unnamed = false
|
322
|
+
if keys.nil?
|
323
|
+
each do |key, values|
|
324
|
+
key = key.to_s if Symbol === key
|
325
|
+
str << key.dup
|
326
|
+
str << values_to_s(values)
|
327
|
+
end
|
328
|
+
else
|
329
|
+
keys.zip(values_at(*keys)).each do |key, values|
|
330
|
+
key = key.to_s if Symbol === key
|
331
|
+
str << key.dup << values_to_s(values)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
@unnamed = saved_unnamed
|
336
|
+
str
|
337
|
+
end
|
338
|
+
|
339
|
+
def value_peek
|
340
|
+
peek = {}
|
341
|
+
keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
|
342
|
+
peek
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'rbbt/tsv'
|
2
|
+
require 'rbbt/tsv/attach/util'
|
3
|
+
module TSV
|
4
|
+
|
5
|
+
# Merge columns from different rows of a file
|
6
|
+
def self.merge_row_fields(input, output, sep = "\t")
|
7
|
+
is = case
|
8
|
+
when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
|
9
|
+
CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
10
|
+
when (String === input or StringIO === input)
|
11
|
+
CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
|
12
|
+
else
|
13
|
+
input
|
14
|
+
end
|
15
|
+
|
16
|
+
current_key = nil
|
17
|
+
current_parts = []
|
18
|
+
|
19
|
+
done = false
|
20
|
+
Open.write(output) do |os|
|
21
|
+
|
22
|
+
done = is.eof?
|
23
|
+
while not done
|
24
|
+
key, *parts = is.gets.sub("\n",'').split(sep, -1)
|
25
|
+
current_key ||= key
|
26
|
+
case
|
27
|
+
when key.nil?
|
28
|
+
when current_key == key
|
29
|
+
parts.each_with_index do |part,i|
|
30
|
+
if current_parts[i].nil?
|
31
|
+
current_parts[i] = part
|
32
|
+
else
|
33
|
+
current_parts[i] = current_parts[i] << "|" << part
|
34
|
+
end
|
35
|
+
end
|
36
|
+
when current_key != key
|
37
|
+
os.puts [current_key, current_parts].flatten * sep
|
38
|
+
current_key = key
|
39
|
+
current_parts = parts
|
40
|
+
end
|
41
|
+
|
42
|
+
done = is.eof?
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Merge two files with the same keys and different fields
|
49
|
+
def self.merge_different_fields(file1, file2, output, sep = "\t")
|
50
|
+
case
|
51
|
+
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
|
52
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
53
|
+
when (String === file1 or StringIO === file1)
|
54
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
|
55
|
+
when TSV === file1
|
56
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
|
57
|
+
end
|
58
|
+
|
59
|
+
case
|
60
|
+
when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
|
61
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
|
62
|
+
when (String === file2 or StringIO === file2)
|
63
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
|
64
|
+
when TSV === file2
|
65
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
|
66
|
+
end
|
67
|
+
|
68
|
+
output = File.open(output, 'w') if String === output
|
69
|
+
|
70
|
+
cols1 = nil
|
71
|
+
cols2 = nil
|
72
|
+
|
73
|
+
done1 = false
|
74
|
+
done2 = false
|
75
|
+
|
76
|
+
key1 = key2 = nil
|
77
|
+
while key1.nil?
|
78
|
+
while (line1 = file1.gets) =~ /#/; end
|
79
|
+
key1, *parts1 = line1.sub("\n",'').split(sep, -1)
|
80
|
+
cols1 = parts1.length
|
81
|
+
end
|
82
|
+
|
83
|
+
while key2.nil?
|
84
|
+
while (line2 = file2.gets) =~ /#/; end
|
85
|
+
key2, *parts2 = line2.sub("\n",'').split(sep, -1)
|
86
|
+
cols2 = parts2.length
|
87
|
+
end
|
88
|
+
|
89
|
+
key = key1 < key2 ? key1 : key2
|
90
|
+
parts = [""] * (cols1 + cols2)
|
91
|
+
while not (done1 and done2)
|
92
|
+
while (not done1 and key1 == key)
|
93
|
+
parts1.each_with_index do |part, i|
|
94
|
+
parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
95
|
+
end
|
96
|
+
key1 = nil
|
97
|
+
while key1.nil? and not done1
|
98
|
+
if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
while (not done2 and key2 == key)
|
102
|
+
parts2.each_with_index do |part, i|
|
103
|
+
i += cols1
|
104
|
+
parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
105
|
+
end
|
106
|
+
key2 = nil
|
107
|
+
while key2.nil? and not done2
|
108
|
+
if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
output.puts [key, parts].flatten * sep
|
113
|
+
parts = [""] * (cols1 + cols2)
|
114
|
+
|
115
|
+
case
|
116
|
+
when done1
|
117
|
+
key = key2
|
118
|
+
when done2
|
119
|
+
key = key1
|
120
|
+
else
|
121
|
+
key = key1 < key2 ? key1 : key2
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
output.close
|
126
|
+
end
|
127
|
+
|
128
|
+
# Merge columns from different files
|
129
|
+
def self.merge_paste(files, delim = "$")
|
130
|
+
CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
|
131
|
+
end
|
132
|
+
|
133
|
+
def attach(other, options = {})
|
134
|
+
options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
|
135
|
+
fields, one2one = Misc.process_options options, :fields, :one2one
|
136
|
+
in_namespace = options[:in_namespace]
|
137
|
+
|
138
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil? or fields == :all
|
139
|
+
if in_namespace
|
140
|
+
fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
141
|
+
else
|
142
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
143
|
+
end
|
144
|
+
|
145
|
+
Log.medium("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
|
146
|
+
|
147
|
+
other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
|
148
|
+
case
|
149
|
+
when key_field == other.key_field
|
150
|
+
attach_same_key other, fields
|
151
|
+
when (not in_namespace and self.fields.include?(other.key_field))
|
152
|
+
Log.debug "Found other's key field: #{other.key_field}"
|
153
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
154
|
+
when (in_namespace and self.fields_in_namespace.include?(other.key_field))
|
155
|
+
Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
|
156
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
157
|
+
else
|
158
|
+
index = TSV.find_traversal(self, other, options)
|
159
|
+
raise "Cannot traverse identifiers" if index.nil?
|
160
|
+
attach_index other, index, fields
|
161
|
+
end
|
162
|
+
Log.debug("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
163
|
+
|
164
|
+
self
|
165
|
+
end
|
166
|
+
|
167
|
+
def detach(file)
|
168
|
+
file_fields = file.fields.collect{|field| field.fullname}
|
169
|
+
detached_fields = []
|
170
|
+
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
171
|
+
reorder :key, detached_fields
|
172
|
+
end
|
173
|
+
|
174
|
+
def merge_different_fields(other, options = {})
|
175
|
+
TmpFile.with_file do |output|
|
176
|
+
TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
|
177
|
+
tsv = TSV.open output, options
|
178
|
+
tsv.key_field = self.key_field unless self.key_field.nil?
|
179
|
+
tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
180
|
+
tsv
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|