rbbt-util 3.2.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,345 @@
|
|
1
|
+
require 'rbbt/util/chain_methods'
|
2
|
+
|
3
|
+
module TSV
|
4
|
+
extend ChainMethods
|
5
|
+
self.chain_prefix = :tsv
|
6
|
+
|
7
|
+
attr_accessor :unnamed
|
8
|
+
|
9
|
+
def with_unnamed
|
10
|
+
saved_unnamed = @unnamed
|
11
|
+
@unnamed = true
|
12
|
+
res = yield
|
13
|
+
@unnamed = saved_unnamed
|
14
|
+
res
|
15
|
+
end
|
16
|
+
|
17
|
+
def with_monitor(value = true)
|
18
|
+
saved_monitor = @monitor
|
19
|
+
@monitor = value
|
20
|
+
res = yield
|
21
|
+
@monitor = saved_monitor
|
22
|
+
res
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.extended(data)
|
26
|
+
setup_chains(data)
|
27
|
+
|
28
|
+
if not data.respond_to? :write
|
29
|
+
class << data
|
30
|
+
attr_accessor :writable
|
31
|
+
|
32
|
+
def close
|
33
|
+
end
|
34
|
+
|
35
|
+
def read(force = false)
|
36
|
+
@writable = false
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def write(force = false)
|
41
|
+
@writable = true
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
def write?
|
46
|
+
@writable
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
KEY_PREFIX = "__tsv_hash_"
|
53
|
+
|
54
|
+
ENTRIES = []
|
55
|
+
ENTRY_KEYS = []
|
56
|
+
|
57
|
+
def serialized_get(key)
|
58
|
+
raise "Uninitialized serializer" if serializer == :type
|
59
|
+
serialized_value = tsv_clean_get_brackets(key)
|
60
|
+
SERIALIZER_ALIAS[serializer.to_sym].load(serialized_value) unless serialized_value.nil?
|
61
|
+
end
|
62
|
+
|
63
|
+
def serialized_set(key, value)
|
64
|
+
raise "Uninitialized serializer" if serializer == :type
|
65
|
+
if value.nil?
|
66
|
+
tsv_clean_set_brackets(key, nil)
|
67
|
+
else
|
68
|
+
tsv_clean_set_brackets(key, SERIALIZER_ALIAS[serializer.to_sym].dump(value))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
#{{{ Chained Methods
|
73
|
+
def tsv_get_brackets(key)
|
74
|
+
value = if serializer.nil?
|
75
|
+
tsv_clean_get_brackets(key)
|
76
|
+
else
|
77
|
+
serialized_get(key)
|
78
|
+
end
|
79
|
+
|
80
|
+
NamedArray.setup value, fields if Array === value and not @unnamed
|
81
|
+
value
|
82
|
+
end
|
83
|
+
|
84
|
+
def tsv_set_brackets(key,value)
|
85
|
+
if serializer.nil?
|
86
|
+
tsv_clean_set_brackets(key, value)
|
87
|
+
else
|
88
|
+
serialized_set(key, value)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def tsv_keys
|
93
|
+
tsv_clean_keys - ENTRY_KEYS
|
94
|
+
end
|
95
|
+
|
96
|
+
def tsv_values
|
97
|
+
values = values_at(*keys)
|
98
|
+
values.each{|value| NamedArray.setup value, fields} if Array === values.first and not @unnamed
|
99
|
+
values
|
100
|
+
end
|
101
|
+
|
102
|
+
def tsv_each
|
103
|
+
tsv_clean_each do |key, value|
|
104
|
+
next if ENTRY_KEYS.include? key
|
105
|
+
|
106
|
+
value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil?
|
107
|
+
NamedArray.setup value, fields if Array === value and not @unnamed
|
108
|
+
yield key, value if block_given?
|
109
|
+
[key, value]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def tsv_collect
|
114
|
+
tsv_clean_collect do |key, value|
|
115
|
+
next if ENTRY_KEYS.include? key
|
116
|
+
value = SERIALIZER_ALIAS[serializer].load(value) unless serializer.nil? or not String === value
|
117
|
+
NamedArray.setup value, fields if Array === value and not @unnamed
|
118
|
+
if block_given?
|
119
|
+
yield key, value
|
120
|
+
else
|
121
|
+
[key, value]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def tsv_size
|
127
|
+
keys.length
|
128
|
+
end
|
129
|
+
|
130
|
+
def tsv_length
|
131
|
+
keys.length
|
132
|
+
end
|
133
|
+
|
134
|
+
def tsv_values_at(*keys)
|
135
|
+
keys.collect do |key|
|
136
|
+
self[key]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
#{{{ Sorting
|
141
|
+
|
142
|
+
def tsv_sort_by(field = nil, just_keys = false, &block)
|
143
|
+
field = :all if field.nil?
|
144
|
+
if field == :all
|
145
|
+
elems = collect
|
146
|
+
else
|
147
|
+
elems = []
|
148
|
+
through :key, field do |key, fields|
|
149
|
+
elems << [key, fields.first]
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
if not block_given?
|
154
|
+
if fields == :all
|
155
|
+
if just_keys
|
156
|
+
elems.sort_by{|key, value| key }.collect{|key, values| key}
|
157
|
+
else
|
158
|
+
elems.sort_by{|key, value| key }
|
159
|
+
end
|
160
|
+
else
|
161
|
+
if just_keys
|
162
|
+
elems.sort_by{|key, value| value }.collect{|key, value| key}
|
163
|
+
else
|
164
|
+
elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
|
165
|
+
end
|
166
|
+
end
|
167
|
+
else
|
168
|
+
if just_keys
|
169
|
+
elems.sort_by(&block).collect{|key, value| key}
|
170
|
+
else
|
171
|
+
elems.sort_by(&block).collect{|key, value| [key, self[key]]}
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def tsv_sort(&block)
|
177
|
+
collect.sort &block
|
178
|
+
end
|
179
|
+
|
180
|
+
# Starts in page 1
|
181
|
+
def page(pnum, psize, field = nil, just_keys = false, &block)
|
182
|
+
if pnum.to_s =~ /-(.*)/
|
183
|
+
reverse = true
|
184
|
+
pnum = $1.to_i
|
185
|
+
else
|
186
|
+
reverse = false
|
187
|
+
end
|
188
|
+
|
189
|
+
with_unnamed do
|
190
|
+
pstart = psize * (pnum - 1)
|
191
|
+
pend = psize * pnum - 1
|
192
|
+
field = :key if field == "key"
|
193
|
+
keys = sort_by(field || :key, true, &block)
|
194
|
+
keys.reverse! if reverse
|
195
|
+
|
196
|
+
if just_keys
|
197
|
+
keys[pstart..pend]
|
198
|
+
else
|
199
|
+
select :key => keys[pstart..pend]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
def self.entry(*entries)
|
206
|
+
entries = entries.collect{|entry| entry.to_s}
|
207
|
+
ENTRIES.concat entries
|
208
|
+
entries.each do |entry|
|
209
|
+
key = KEY_PREFIX + entry
|
210
|
+
ENTRY_KEYS << key
|
211
|
+
self.module_eval "
|
212
|
+
attr_accessor :#{entry}
|
213
|
+
|
214
|
+
def #{ entry }
|
215
|
+
if not defined? @#{entry}
|
216
|
+
@#{entry} = YAML.load(self.tsv_clean_get_brackets('#{key}') || nil.to_yaml)
|
217
|
+
end
|
218
|
+
@#{entry}
|
219
|
+
end
|
220
|
+
|
221
|
+
def #{ entry }=(value)
|
222
|
+
@#{entry} = value
|
223
|
+
self.tsv_clean_set_brackets '#{key}', value.to_yaml
|
224
|
+
end
|
225
|
+
"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
entry :key_field,
|
230
|
+
:fields,
|
231
|
+
:type,
|
232
|
+
:cast,
|
233
|
+
:identifiers,
|
234
|
+
:namespace,
|
235
|
+
:filename,
|
236
|
+
:serializer
|
237
|
+
|
238
|
+
def fields
|
239
|
+
@fields ||= YAML.load(self.tsv_clean_get_brackets("__tsv_hash_fields") || nil.to_yaml)
|
240
|
+
if @fields.nil? or @unnamed
|
241
|
+
@fields
|
242
|
+
else
|
243
|
+
NamedArray.setup @fields, @fields
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def self.zip_fields(list, fields = nil)
|
248
|
+
return [] if list.nil? || list.empty?
|
249
|
+
fields ||= list.fields if list.respond_to? :fields
|
250
|
+
zipped = list[0].zip(*list[1..-1])
|
251
|
+
zipped = zipped.collect{|v| NamedArray.setup(v, fields)} if fields
|
252
|
+
zipped
|
253
|
+
end
|
254
|
+
|
255
|
+
def identifier_files
|
256
|
+
case
|
257
|
+
when (identifiers and TSV === identifiers)
|
258
|
+
[identifiers]
|
259
|
+
when (identifiers and Array === identifiers)
|
260
|
+
case
|
261
|
+
when (TSV === identifiers.first or identifiers.empty?)
|
262
|
+
identifiers
|
263
|
+
when
|
264
|
+
identifiers.collect{|f| Path === f ? f : Path.setup(f, nil, namespace)}
|
265
|
+
end
|
266
|
+
when identifiers
|
267
|
+
[ Path === identifiers ? identifiers : Path.setup(identifiers, nil, namespace) ]
|
268
|
+
when Path === filename
|
269
|
+
filename.identifier_files
|
270
|
+
when filename
|
271
|
+
Path.setup(filename).identifier_files
|
272
|
+
else
|
273
|
+
[]
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def options
|
278
|
+
options = {}
|
279
|
+
ENTRIES.each do |entry|
|
280
|
+
options[entry] = self.send(entry)
|
281
|
+
end
|
282
|
+
IndiferentHash.setup options
|
283
|
+
end
|
284
|
+
|
285
|
+
|
286
|
+
def all_fields
|
287
|
+
[key_field] + fields
|
288
|
+
end
|
289
|
+
|
290
|
+
def values_to_s(values)
|
291
|
+
case
|
292
|
+
when (values.nil? and fields.nil?)
|
293
|
+
"\n"
|
294
|
+
when (values.nil? and not fields.nil?)
|
295
|
+
"\t" << ([""] * fields.length) * "\t" << "\n"
|
296
|
+
when (not Array === values)
|
297
|
+
"\t" << values.to_s << "\n"
|
298
|
+
else
|
299
|
+
"\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def to_s(keys = nil, no_options = false)
|
304
|
+
if FalseClass === keys or TrueClass === keys
|
305
|
+
no_options = keys
|
306
|
+
keys = nil
|
307
|
+
end
|
308
|
+
|
309
|
+
if keys == :sort
|
310
|
+
keys = self.keys.sort
|
311
|
+
end
|
312
|
+
|
313
|
+
str = ""
|
314
|
+
|
315
|
+
str << "#: " << Misc.hash2string(ENTRIES.collect{|key| [key.to_sym, self.send(key)]}) << "\n" unless no_options
|
316
|
+
if fields
|
317
|
+
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
318
|
+
end
|
319
|
+
|
320
|
+
saved_unnamed = @unnamed
|
321
|
+
@unnamed = false
|
322
|
+
if keys.nil?
|
323
|
+
each do |key, values|
|
324
|
+
key = key.to_s if Symbol === key
|
325
|
+
str << key.dup
|
326
|
+
str << values_to_s(values)
|
327
|
+
end
|
328
|
+
else
|
329
|
+
keys.zip(values_at(*keys)).each do |key, values|
|
330
|
+
key = key.to_s if Symbol === key
|
331
|
+
str << key.dup << values_to_s(values)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
@unnamed = saved_unnamed
|
336
|
+
str
|
337
|
+
end
|
338
|
+
|
339
|
+
def value_peek
|
340
|
+
peek = {}
|
341
|
+
keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
|
342
|
+
peek
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'rbbt/tsv'
|
2
|
+
require 'rbbt/tsv/attach/util'
|
3
|
+
module TSV
|
4
|
+
|
5
|
+
# Merge columns from different rows of a file
|
6
|
+
def self.merge_row_fields(input, output, sep = "\t")
|
7
|
+
is = case
|
8
|
+
when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
|
9
|
+
CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
10
|
+
when (String === input or StringIO === input)
|
11
|
+
CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
|
12
|
+
else
|
13
|
+
input
|
14
|
+
end
|
15
|
+
|
16
|
+
current_key = nil
|
17
|
+
current_parts = []
|
18
|
+
|
19
|
+
done = false
|
20
|
+
Open.write(output) do |os|
|
21
|
+
|
22
|
+
done = is.eof?
|
23
|
+
while not done
|
24
|
+
key, *parts = is.gets.sub("\n",'').split(sep, -1)
|
25
|
+
current_key ||= key
|
26
|
+
case
|
27
|
+
when key.nil?
|
28
|
+
when current_key == key
|
29
|
+
parts.each_with_index do |part,i|
|
30
|
+
if current_parts[i].nil?
|
31
|
+
current_parts[i] = part
|
32
|
+
else
|
33
|
+
current_parts[i] = current_parts[i] << "|" << part
|
34
|
+
end
|
35
|
+
end
|
36
|
+
when current_key != key
|
37
|
+
os.puts [current_key, current_parts].flatten * sep
|
38
|
+
current_key = key
|
39
|
+
current_parts = parts
|
40
|
+
end
|
41
|
+
|
42
|
+
done = is.eof?
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Merge two files with the same keys and different fields
|
49
|
+
def self.merge_different_fields(file1, file2, output, sep = "\t")
|
50
|
+
case
|
51
|
+
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
|
52
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
53
|
+
when (String === file1 or StringIO === file1)
|
54
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
|
55
|
+
when TSV === file1
|
56
|
+
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
|
57
|
+
end
|
58
|
+
|
59
|
+
case
|
60
|
+
when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
|
61
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
|
62
|
+
when (String === file2 or StringIO === file2)
|
63
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
|
64
|
+
when TSV === file2
|
65
|
+
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
|
66
|
+
end
|
67
|
+
|
68
|
+
output = File.open(output, 'w') if String === output
|
69
|
+
|
70
|
+
cols1 = nil
|
71
|
+
cols2 = nil
|
72
|
+
|
73
|
+
done1 = false
|
74
|
+
done2 = false
|
75
|
+
|
76
|
+
key1 = key2 = nil
|
77
|
+
while key1.nil?
|
78
|
+
while (line1 = file1.gets) =~ /#/; end
|
79
|
+
key1, *parts1 = line1.sub("\n",'').split(sep, -1)
|
80
|
+
cols1 = parts1.length
|
81
|
+
end
|
82
|
+
|
83
|
+
while key2.nil?
|
84
|
+
while (line2 = file2.gets) =~ /#/; end
|
85
|
+
key2, *parts2 = line2.sub("\n",'').split(sep, -1)
|
86
|
+
cols2 = parts2.length
|
87
|
+
end
|
88
|
+
|
89
|
+
key = key1 < key2 ? key1 : key2
|
90
|
+
parts = [""] * (cols1 + cols2)
|
91
|
+
while not (done1 and done2)
|
92
|
+
while (not done1 and key1 == key)
|
93
|
+
parts1.each_with_index do |part, i|
|
94
|
+
parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
95
|
+
end
|
96
|
+
key1 = nil
|
97
|
+
while key1.nil? and not done1
|
98
|
+
if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
while (not done2 and key2 == key)
|
102
|
+
parts2.each_with_index do |part, i|
|
103
|
+
i += cols1
|
104
|
+
parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
105
|
+
end
|
106
|
+
key2 = nil
|
107
|
+
while key2.nil? and not done2
|
108
|
+
if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
output.puts [key, parts].flatten * sep
|
113
|
+
parts = [""] * (cols1 + cols2)
|
114
|
+
|
115
|
+
case
|
116
|
+
when done1
|
117
|
+
key = key2
|
118
|
+
when done2
|
119
|
+
key = key1
|
120
|
+
else
|
121
|
+
key = key1 < key2 ? key1 : key2
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
output.close
|
126
|
+
end
|
127
|
+
|
128
|
+
# Merge columns from different files
|
129
|
+
def self.merge_paste(files, delim = "$")
|
130
|
+
CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
|
131
|
+
end
|
132
|
+
|
133
|
+
def attach(other, options = {})
|
134
|
+
options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
|
135
|
+
fields, one2one = Misc.process_options options, :fields, :one2one
|
136
|
+
in_namespace = options[:in_namespace]
|
137
|
+
|
138
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil? or fields == :all
|
139
|
+
if in_namespace
|
140
|
+
fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
141
|
+
else
|
142
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
143
|
+
end
|
144
|
+
|
145
|
+
Log.medium("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
|
146
|
+
|
147
|
+
other = other.tsv(:persist => options[:persist_input] == true) unless TSV === other
|
148
|
+
case
|
149
|
+
when key_field == other.key_field
|
150
|
+
attach_same_key other, fields
|
151
|
+
when (not in_namespace and self.fields.include?(other.key_field))
|
152
|
+
Log.debug "Found other's key field: #{other.key_field}"
|
153
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
154
|
+
when (in_namespace and self.fields_in_namespace.include?(other.key_field))
|
155
|
+
Log.debug "Found other's key field in #{in_namespace}: #{other.key_field}"
|
156
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
157
|
+
else
|
158
|
+
index = TSV.find_traversal(self, other, options)
|
159
|
+
raise "Cannot traverse identifiers" if index.nil?
|
160
|
+
attach_index other, index, fields
|
161
|
+
end
|
162
|
+
Log.debug("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
163
|
+
|
164
|
+
self
|
165
|
+
end
|
166
|
+
|
167
|
+
def detach(file)
|
168
|
+
file_fields = file.fields.collect{|field| field.fullname}
|
169
|
+
detached_fields = []
|
170
|
+
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
171
|
+
reorder :key, detached_fields
|
172
|
+
end
|
173
|
+
|
174
|
+
def merge_different_fields(other, options = {})
|
175
|
+
TmpFile.with_file do |output|
|
176
|
+
TSV.merge_different_fields(self, other, output, options[:sep] || "\t")
|
177
|
+
tsv = TSV.open output, options
|
178
|
+
tsv.key_field = self.key_field unless self.key_field.nil?
|
179
|
+
tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
180
|
+
tsv
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|