rbbt-util 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -1,312 +0,0 @@
|
|
1
|
-
require 'rbbt/util/resource'
|
2
|
-
require 'rbbt/util/misc'
|
3
|
-
|
4
|
-
class TSV
|
5
|
-
## Make sure we overwrite the methods declared by attr_accessor
|
6
|
-
MAIN_ACCESSORS = :data, :key_field, :fields, :cast
|
7
|
-
EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :type, :case_insensitive
|
8
|
-
attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
|
9
|
-
|
10
|
-
def self.zip_fields(list, fields = nil)
|
11
|
-
return [] if list.nil? || list.empty?
|
12
|
-
fields ||= list.fields if list.respond_to? :fields
|
13
|
-
zipped = list[0].zip(*list[1..-1])
|
14
|
-
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
15
|
-
zipped
|
16
|
-
end
|
17
|
-
|
18
|
-
module Field
|
19
|
-
attr_accessor :namespace
|
20
|
-
|
21
|
-
def self.field(field, namespace = nil)
|
22
|
-
field.extend Field
|
23
|
-
field.namespace = namespace
|
24
|
-
field
|
25
|
-
end
|
26
|
-
|
27
|
-
def self.namespace(string)
|
28
|
-
return nil unless string.match(/(.+):/)
|
29
|
-
namespace_str = $1
|
30
|
-
return nil if namespace_str.nil? or namespace_str.empty?
|
31
|
-
namespace_str
|
32
|
-
end
|
33
|
-
|
34
|
-
def fullname
|
35
|
-
return self if self =~ /:/ or namespace.nil?
|
36
|
-
namespace.to_s + ":" << self
|
37
|
-
end
|
38
|
-
|
39
|
-
def ==(string)
|
40
|
-
return false unless String === string
|
41
|
-
return true if self.casecmp(string) == 0
|
42
|
-
if Field === string
|
43
|
-
return true if self.fullname.casecmp(string.fullname) == 0
|
44
|
-
else
|
45
|
-
return true if self.fullname.casecmp(string) == 0
|
46
|
-
end
|
47
|
-
return true if self.sub(/.*:/,'').casecmp(string) == 0
|
48
|
-
return false
|
49
|
-
end
|
50
|
-
|
51
|
-
def namespace
|
52
|
-
Field.namespace(self) || @namespace
|
53
|
-
end
|
54
|
-
|
55
|
-
def matching_namespaces(other)
|
56
|
-
return true if namespace.nil?
|
57
|
-
return namespace == other.namespace
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
#{{{{ Field END
|
62
|
-
|
63
|
-
def identifier_files
|
64
|
-
case
|
65
|
-
when (identifiers and TSV === identifiers)
|
66
|
-
[identifiers]
|
67
|
-
when (identifiers and Array === identifiers)
|
68
|
-
case
|
69
|
-
when (TSV === identifiers.first or identifiers.empty?)
|
70
|
-
identifiers
|
71
|
-
when
|
72
|
-
identifiers.collect{|f| Resource::Path.path(f, nil, namespace)}
|
73
|
-
end
|
74
|
-
when (identifiers and not Array === identifiers)
|
75
|
-
[Resource::Path.path(identifiers, nil, namespace)]
|
76
|
-
when filename
|
77
|
-
Resource::Path.path(filename, nil, namespace).identifier_files
|
78
|
-
else
|
79
|
-
[]
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def fields_in_namespace(namespace = nil)
|
84
|
-
namespace = self.namespace if namespace == nil or TrueClass === namespace
|
85
|
-
fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
86
|
-
end
|
87
|
-
|
88
|
-
def key_field
|
89
|
-
return nil if @key_field.nil?
|
90
|
-
k = @key_field.dup
|
91
|
-
k.extend Field
|
92
|
-
k.namespace = namespace unless namespace.nil?
|
93
|
-
k
|
94
|
-
end
|
95
|
-
|
96
|
-
def fields
|
97
|
-
return nil if @fields.nil?
|
98
|
-
fds = @fields
|
99
|
-
fds.each do |f| f.extend Field end if Array === @fields
|
100
|
-
fds.each do |f| f.namespace = namespace end unless namespace.nil?
|
101
|
-
NamedArray.name(fds, @fields)
|
102
|
-
end
|
103
|
-
|
104
|
-
def all_fields
|
105
|
-
return nil if @fields.nil?
|
106
|
-
all_fields = @fields.dup
|
107
|
-
all_fields.unshift key_field
|
108
|
-
all_fields.each do |f| f.extend Field end if Array === @fields
|
109
|
-
all_fields.each do |f| f.namespace = namespace end unless namespace.nil?
|
110
|
-
NamedArray.name(all_fields, [key_field] + @fields)
|
111
|
-
all_fields
|
112
|
-
end
|
113
|
-
|
114
|
-
def all_namespace_fields(namespace = nil)
|
115
|
-
namespace = self.namespace if namespace == nil or TrueClass === namespace
|
116
|
-
all_fields = self.all_fields
|
117
|
-
return nil if all_fields.nil?
|
118
|
-
return all_fields if namespace.nil?
|
119
|
-
all_fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
120
|
-
end
|
121
|
-
|
122
|
-
def self.identify_field(key, fields, field)
|
123
|
-
return field if Integer === field
|
124
|
-
if String === field
|
125
|
-
field = field.dup
|
126
|
-
field.extend Field
|
127
|
-
end
|
128
|
-
return :key if field.nil? or field == 0 or field.to_sym == :key or field == key
|
129
|
-
return nil if fields.nil?
|
130
|
-
return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
|
131
|
-
return fields.index field
|
132
|
-
end
|
133
|
-
|
134
|
-
def identify_field(field)
|
135
|
-
TSV.identify_field(key_field, fields, field)
|
136
|
-
end
|
137
|
-
|
138
|
-
def key_field=(new_key_field)
|
139
|
-
@key_field = new_key_field
|
140
|
-
@data.key_field = new_key_field if @data.respond_to? :key_field= and @data.write?
|
141
|
-
end
|
142
|
-
|
143
|
-
def fields=(new_fields)
|
144
|
-
new_fields.collect! do |field|
|
145
|
-
if Field === field
|
146
|
-
if field !~ /:/ and field.namespace != nil and field.namespace.to_s != namespace.to_s
|
147
|
-
field.namespace.to_s + ":" + field.to_s
|
148
|
-
else
|
149
|
-
field
|
150
|
-
end
|
151
|
-
else
|
152
|
-
field
|
153
|
-
end
|
154
|
-
end if Array === new_fields
|
155
|
-
@fields = new_fields
|
156
|
-
@data.fields = new_fields if @data.respond_to? :fields= and @data.write?
|
157
|
-
end
|
158
|
-
|
159
|
-
def keys
|
160
|
-
@data.keys
|
161
|
-
end
|
162
|
-
|
163
|
-
def values
|
164
|
-
@data.values
|
165
|
-
end
|
166
|
-
|
167
|
-
def size
|
168
|
-
@data.size
|
169
|
-
end
|
170
|
-
|
171
|
-
# Write
|
172
|
-
|
173
|
-
def []=(key, value)
|
174
|
-
key = key.downcase if @case_insensitive
|
175
|
-
@data[key] = value
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
def merge!(new_data)
|
180
|
-
new_data.each do |key, value|
|
181
|
-
self[key] = value
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
# Read
|
186
|
-
|
187
|
-
attr_accessor :unnamed
|
188
|
-
def follow(value)
|
189
|
-
return nil if value.nil?
|
190
|
-
if String === value && value =~ /__Ref:(.*)/
|
191
|
-
return self[$1]
|
192
|
-
else
|
193
|
-
|
194
|
-
if Array === value and not unnamed
|
195
|
-
value = NamedArray.name value, fields
|
196
|
-
end
|
197
|
-
value
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def [](key)
|
202
|
-
if Array === key
|
203
|
-
return @data[key] if @data[key] != nil
|
204
|
-
key.each{|k| v = self[k]; return v unless v.nil?}
|
205
|
-
return nil
|
206
|
-
end
|
207
|
-
|
208
|
-
key = key.downcase if @case_insensitive and key !~ /^__Ref:/
|
209
|
-
follow @data[key]
|
210
|
-
end
|
211
|
-
|
212
|
-
def delete(key)
|
213
|
-
@data.delete(key)
|
214
|
-
end
|
215
|
-
|
216
|
-
def values_at(*keys)
|
217
|
-
keys.collect{|k|
|
218
|
-
self[k]
|
219
|
-
}
|
220
|
-
end
|
221
|
-
|
222
|
-
def each(&block)
|
223
|
-
@data.each do |key, value|
|
224
|
-
block.call(key, follow(value))
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
|
-
def collect
|
229
|
-
if block_given?
|
230
|
-
@data.collect do |key, value|
|
231
|
-
value = follow(value)
|
232
|
-
yield key, value
|
233
|
-
end
|
234
|
-
else
|
235
|
-
@data.collect do |key, value|
|
236
|
-
[key, follow(value)]
|
237
|
-
end
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
def sort(&block)
|
242
|
-
collect.sort(&block).collect{|p|
|
243
|
-
key, value = p
|
244
|
-
value = NamedArray.name value, fields if fields
|
245
|
-
[key, value]
|
246
|
-
}
|
247
|
-
end
|
248
|
-
|
249
|
-
def sort_by(&block)
|
250
|
-
collect.sort_by &block
|
251
|
-
end
|
252
|
-
|
253
|
-
def values_to_s(values)
|
254
|
-
case
|
255
|
-
when (values.nil? and fields.nil?)
|
256
|
-
"\n"
|
257
|
-
when (values.nil? and not fields.nil?)
|
258
|
-
"\t" << ([""] * fields.length) * "\t" << "\n"
|
259
|
-
when (not Array === values)
|
260
|
-
"\t" << values.to_s << "\n"
|
261
|
-
when Array === values.first
|
262
|
-
"\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
|
263
|
-
else
|
264
|
-
"\t" << values * "\t" << "\n"
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
def include?(key)
|
269
|
-
@data.include? key
|
270
|
-
end
|
271
|
-
|
272
|
-
def to_s(keys = nil, no_options = false)
|
273
|
-
if FalseClass === keys or TrueClass === keys
|
274
|
-
no_options = keys
|
275
|
-
keys = nil
|
276
|
-
end
|
277
|
-
|
278
|
-
if keys == :sort
|
279
|
-
keys = self.keys.sort
|
280
|
-
end
|
281
|
-
|
282
|
-
str = ""
|
283
|
-
|
284
|
-
str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n" unless no_options
|
285
|
-
if fields
|
286
|
-
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
287
|
-
end
|
288
|
-
|
289
|
-
saved_unnamed = unnamed
|
290
|
-
unnamed = false
|
291
|
-
if keys.nil?
|
292
|
-
each do |key, values|
|
293
|
-
key = key.to_s if Symbol === key
|
294
|
-
str << key.dup << values_to_s(values)
|
295
|
-
end
|
296
|
-
else
|
297
|
-
keys.zip(values_at(*keys)).each do |key, values|
|
298
|
-
key = key.to_s if Symbol === key
|
299
|
-
str << key.dup << values_to_s(values)
|
300
|
-
end
|
301
|
-
end
|
302
|
-
|
303
|
-
unnamed = saved_unnamed
|
304
|
-
str
|
305
|
-
end
|
306
|
-
|
307
|
-
def value_peek
|
308
|
-
peek = {}
|
309
|
-
keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
|
310
|
-
peek
|
311
|
-
end
|
312
|
-
end
|
data/lib/rbbt/util/tsv/attach.rb
DELETED
@@ -1,416 +0,0 @@
|
|
1
|
-
class TSV
|
2
|
-
def self.merge_rows(input, output, sep = "\t")
|
3
|
-
is = case
|
4
|
-
when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
|
5
|
-
CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
6
|
-
when (String === input or StringIO === input)
|
7
|
-
CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
|
8
|
-
else
|
9
|
-
input
|
10
|
-
end
|
11
|
-
|
12
|
-
current_key = nil
|
13
|
-
current_parts = []
|
14
|
-
|
15
|
-
done = false
|
16
|
-
Open.write(output) do |os|
|
17
|
-
|
18
|
-
done = is.eof?
|
19
|
-
while not done
|
20
|
-
key, *parts = is.gets.sub("\n",'').split(sep, -1)
|
21
|
-
current_key ||= key
|
22
|
-
case
|
23
|
-
when key.nil?
|
24
|
-
when current_key == key
|
25
|
-
parts.each_with_index do |part,i|
|
26
|
-
if current_parts[i].nil?
|
27
|
-
current_parts[i] = part
|
28
|
-
else
|
29
|
-
current_parts[i] = current_parts[i] << "|" << part
|
30
|
-
end
|
31
|
-
end
|
32
|
-
when current_key != key
|
33
|
-
os.puts [current_key, current_parts].flatten * sep
|
34
|
-
current_key = key
|
35
|
-
current_parts = parts
|
36
|
-
end
|
37
|
-
|
38
|
-
done = is.eof?
|
39
|
-
end
|
40
|
-
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.paste_merge(file1, file2, output, sep = "\t")
|
45
|
-
case
|
46
|
-
when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
|
47
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
48
|
-
when (String === file1 or StringIO === file1)
|
49
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
|
50
|
-
when TSV === file1
|
51
|
-
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
|
52
|
-
end
|
53
|
-
|
54
|
-
case
|
55
|
-
when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
|
56
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
|
57
|
-
when (String === file2 or StringIO === file2)
|
58
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
|
59
|
-
when TSV === file2
|
60
|
-
file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
|
61
|
-
end
|
62
|
-
|
63
|
-
output = File.open(output, 'w') if String === output
|
64
|
-
|
65
|
-
cols1 = nil
|
66
|
-
cols2 = nil
|
67
|
-
|
68
|
-
done1 = false
|
69
|
-
done2 = false
|
70
|
-
|
71
|
-
key1 = key2 = nil
|
72
|
-
while key1.nil?
|
73
|
-
while (line1 = file1.gets) =~ /#/; end
|
74
|
-
key1, *parts1 = line1.sub("\n",'').split(sep, -1)
|
75
|
-
cols1 = parts1.length
|
76
|
-
end
|
77
|
-
|
78
|
-
while key2.nil?
|
79
|
-
while (line2 = file2.gets) =~ /#/; end
|
80
|
-
key2, *parts2 = line2.sub("\n",'').split(sep, -1)
|
81
|
-
cols2 = parts2.length
|
82
|
-
end
|
83
|
-
|
84
|
-
key = key1 < key2 ? key1 : key2
|
85
|
-
parts = [""] * (cols1 + cols2)
|
86
|
-
while not (done1 and done2)
|
87
|
-
while (not done1 and key1 == key)
|
88
|
-
parts1.each_with_index do |part, i|
|
89
|
-
parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
90
|
-
end
|
91
|
-
key1 = nil
|
92
|
-
while key1.nil? and not done1
|
93
|
-
if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
while (not done2 and key2 == key)
|
97
|
-
parts2.each_with_index do |part, i|
|
98
|
-
i += cols1
|
99
|
-
parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
100
|
-
end
|
101
|
-
key2 = nil
|
102
|
-
while key2.nil? and not done2
|
103
|
-
if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
output.puts [key, parts].flatten * sep
|
108
|
-
parts = [""] * (cols1 + cols2)
|
109
|
-
|
110
|
-
case
|
111
|
-
when done1
|
112
|
-
key = key2
|
113
|
-
when done2
|
114
|
-
key = key1
|
115
|
-
else
|
116
|
-
key = key1 < key2 ? key1 : key2
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
output.close
|
121
|
-
end
|
122
|
-
#{{{ Attach Methods
|
123
|
-
|
124
|
-
def attach_same_key(other, fields = nil)
|
125
|
-
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
126
|
-
|
127
|
-
through do |key, values|
|
128
|
-
if other.include? key
|
129
|
-
new_values = other[key].values_at *fields
|
130
|
-
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
131
|
-
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
132
|
-
self[key] = self[key].concat new_values
|
133
|
-
else
|
134
|
-
if type == :double
|
135
|
-
self[key] = self[key].concat [[]] * fields.length
|
136
|
-
else
|
137
|
-
self[key] = self[key].concat [""] * fields.length
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
self.fields = self.fields.concat other.fields.values_at *fields
|
143
|
-
end
|
144
|
-
|
145
|
-
def attach_source_key(other, source, fields = nil)
|
146
|
-
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
147
|
-
|
148
|
-
other = other.tsv(:persistence => :no_create) unless TSV === other
|
149
|
-
field_positions = fields.collect{|field| other.identify_field field}
|
150
|
-
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
151
|
-
|
152
|
-
through do |key, values|
|
153
|
-
source_keys = values[source]
|
154
|
-
source_keys = [source_keys] unless Array === source_keys
|
155
|
-
if source_keys.nil? or source_keys.empty?
|
156
|
-
all_new_values = []
|
157
|
-
else
|
158
|
-
all_new_values = []
|
159
|
-
source_keys.each do |source_key|
|
160
|
-
next unless other.include? source_key
|
161
|
-
new_values = field_positions.collect do |pos|
|
162
|
-
if pos == :key
|
163
|
-
source_key
|
164
|
-
else
|
165
|
-
other[source_key][pos]
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
170
|
-
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
171
|
-
all_new_values << new_values
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
|
-
if all_new_values.empty?
|
176
|
-
if type == :double
|
177
|
-
self[key] = self[key].concat [[]] * field_positions.length
|
178
|
-
else
|
179
|
-
self[key] = self[key].concat [""] * field_positions.length
|
180
|
-
end
|
181
|
-
else
|
182
|
-
if type == :double
|
183
|
-
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
184
|
-
else
|
185
|
-
self[key] = self[key].concat all_new_values.first
|
186
|
-
end
|
187
|
-
end
|
188
|
-
end
|
189
|
-
|
190
|
-
self.fields = self.fields.concat field_names
|
191
|
-
end
|
192
|
-
|
193
|
-
def attach_index(other, index, fields = nil)
|
194
|
-
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
195
|
-
fields = [fields] unless Array === fields
|
196
|
-
|
197
|
-
other = other.tsv unless TSV === other
|
198
|
-
field_positions = fields.collect{|field| other.identify_field field}
|
199
|
-
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
200
|
-
|
201
|
-
length = self.fields.length
|
202
|
-
through do |key, values|
|
203
|
-
source_keys = index[key]
|
204
|
-
if source_keys.nil? or source_keys.empty?
|
205
|
-
all_new_values = []
|
206
|
-
else
|
207
|
-
all_new_values = []
|
208
|
-
source_keys.each do |source_key|
|
209
|
-
next unless other.include? source_key
|
210
|
-
new_values = field_positions.collect do |pos|
|
211
|
-
if pos == :key
|
212
|
-
if other.type == :double
|
213
|
-
[source_key]
|
214
|
-
else
|
215
|
-
source_key
|
216
|
-
end
|
217
|
-
else
|
218
|
-
other[source_key][pos]
|
219
|
-
end
|
220
|
-
end
|
221
|
-
new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
|
222
|
-
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
223
|
-
all_new_values << new_values
|
224
|
-
end
|
225
|
-
end
|
226
|
-
|
227
|
-
if all_new_values.empty?
|
228
|
-
if type == :double
|
229
|
-
all_new_values = [[[]] * field_positions.length]
|
230
|
-
else
|
231
|
-
all_new_values = [[""] * field_positions.length]
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
current = self[key]
|
236
|
-
|
237
|
-
if current.length > length
|
238
|
-
all_new_values << current.slice!(length..current.length - 1)
|
239
|
-
end
|
240
|
-
|
241
|
-
if type == :double
|
242
|
-
all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
243
|
-
else
|
244
|
-
all_new_values = all_new_values.first
|
245
|
-
end
|
246
|
-
|
247
|
-
current += all_new_values
|
248
|
-
|
249
|
-
self[key] = current
|
250
|
-
|
251
|
-
end
|
252
|
-
|
253
|
-
self.fields = self.fields.concat field_names
|
254
|
-
end
|
255
|
-
|
256
|
-
#{{{ Attach Helper
|
257
|
-
|
258
|
-
# May make an extra index!
|
259
|
-
def self.find_path(files, options = {})
|
260
|
-
options = Misc.add_defaults options, :in_namespace => false
|
261
|
-
in_namespace = options[:in_namespace]
|
262
|
-
|
263
|
-
if in_namespace
|
264
|
-
if files.first.all_fields.include? in_namespace
|
265
|
-
ids = [[in_namespace]]
|
266
|
-
else
|
267
|
-
ids = [files.first.all_namespace_fields(in_namespace)]
|
268
|
-
end
|
269
|
-
ids += files[1..-1].collect{|f| f.all_fields}
|
270
|
-
else
|
271
|
-
ids = files.collect{|f| f.all_fields}
|
272
|
-
end
|
273
|
-
id_list = []
|
274
|
-
|
275
|
-
ids.each_with_index do |list, i|
|
276
|
-
break if i == ids.length - 1
|
277
|
-
match = list.select{|field|
|
278
|
-
ids[i + 1].select{|f| field == f}.any?
|
279
|
-
}
|
280
|
-
return nil if match.empty?
|
281
|
-
id_list << match.first
|
282
|
-
end
|
283
|
-
|
284
|
-
if id_list.last != files.last.all_fields.first
|
285
|
-
id_list << files.last.all_fields.first
|
286
|
-
id_list.zip(files)
|
287
|
-
else
|
288
|
-
id_list.zip(files[0..-1])
|
289
|
-
end
|
290
|
-
end
|
291
|
-
|
292
|
-
def self.build_traverse_index(files, options = {})
|
293
|
-
options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
|
294
|
-
in_namespace = options[:in_namespace]
|
295
|
-
persist_input = options[:persist_input]
|
296
|
-
|
297
|
-
path = find_path(files, options)
|
298
|
-
|
299
|
-
return nil if path.nil?
|
300
|
-
|
301
|
-
traversal_ids = path.collect{|p| p.first}
|
302
|
-
|
303
|
-
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
304
|
-
|
305
|
-
data_key, data_file = path.shift
|
306
|
-
data_index = if data_key == data_file.key_field
|
307
|
-
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
308
|
-
nil
|
309
|
-
else
|
310
|
-
Log.debug "Data index required"
|
311
|
-
data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
|
312
|
-
end
|
313
|
-
|
314
|
-
current_index = data_index
|
315
|
-
current_key = data_key
|
316
|
-
while not path.empty?
|
317
|
-
next_key, next_file = path.shift
|
318
|
-
|
319
|
-
if current_index.nil?
|
320
|
-
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => (persist_input and path.empty?)
|
321
|
-
else
|
322
|
-
next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
323
|
-
current_index.process current_index.fields.first do |values|
|
324
|
-
if values.nil?
|
325
|
-
nil
|
326
|
-
else
|
327
|
-
next_index.values_at(*values).flatten.collect.to_a
|
328
|
-
end
|
329
|
-
end
|
330
|
-
current_index.fields = [next_key]
|
331
|
-
end
|
332
|
-
current_key = next_key
|
333
|
-
end
|
334
|
-
|
335
|
-
current_index
|
336
|
-
end
|
337
|
-
|
338
|
-
|
339
|
-
def self.find_traversal(tsv1, tsv2, options = {})
|
340
|
-
options = Misc.add_defaults options, :in_namespace => false
|
341
|
-
in_namespace = options[:in_namespace]
|
342
|
-
|
343
|
-
identifiers1 = tsv1.identifier_files || []
|
344
|
-
identifiers2 = tsv2.identifier_files || []
|
345
|
-
|
346
|
-
identifiers1.unshift tsv1
|
347
|
-
identifiers2.unshift tsv2
|
348
|
-
|
349
|
-
files1 = []
|
350
|
-
files2 = []
|
351
|
-
while identifiers1.any?
|
352
|
-
files1.push identifiers1.shift
|
353
|
-
identifiers2.each_with_index do |e,i|
|
354
|
-
files2 = identifiers2[(0..i)]
|
355
|
-
index = build_traverse_index(files1 + files2.reverse, options)
|
356
|
-
return index if not index.nil?
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
|
-
return nil
|
361
|
-
end
|
362
|
-
|
363
|
-
def attach(other, fields = nil, options = {})
|
364
|
-
options = Misc.add_defaults options, :in_namespace => false
|
365
|
-
in_namespace = options[:in_namespace]
|
366
|
-
|
367
|
-
fields = other.fields - [key_field].concat(self.fields) if fields == :all
|
368
|
-
if in_namespace
|
369
|
-
fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
370
|
-
else
|
371
|
-
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
372
|
-
end
|
373
|
-
|
374
|
-
Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
|
375
|
-
|
376
|
-
other = other.tsv(:persistence => options[:persist_input] == true) unless TSV === other
|
377
|
-
case
|
378
|
-
when key_field == other.key_field
|
379
|
-
attach_same_key other, fields
|
380
|
-
when (not in_namespace and self.fields.include?(other.key_field))
|
381
|
-
Log.medium "Found other's key field: #{other.key_field}"
|
382
|
-
attach_source_key other, other.key_field, fields
|
383
|
-
when (in_namespace and self.fields_in_namespace.include?(other.key_field))
|
384
|
-
Log.medium "Found other's key field in #{in_namespace}: #{other.key_field}"
|
385
|
-
attach_source_key other, other.key_field, fields
|
386
|
-
else
|
387
|
-
index = TSV.find_traversal(self, other, options)
|
388
|
-
raise "Cannot traverse identifiers" if index.nil?
|
389
|
-
attach_index other, index, fields
|
390
|
-
end
|
391
|
-
Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
392
|
-
|
393
|
-
self
|
394
|
-
end
|
395
|
-
|
396
|
-
def detach(file)
|
397
|
-
file_fields = file.fields.collect{|field| field.fullname}
|
398
|
-
detached_fields = []
|
399
|
-
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
400
|
-
reorder :key, detached_fields
|
401
|
-
end
|
402
|
-
|
403
|
-
def paste(other, options = {})
|
404
|
-
TmpFile.with_file do |output|
|
405
|
-
TSV.paste_merge(self, other, output, options[:sep] || "\t")
|
406
|
-
tsv = TSV.new output, options
|
407
|
-
tsv.key_field = self.key_field unless self.key_field.nil?
|
408
|
-
tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
409
|
-
tsv
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
def self.fast_paste(files, delim = "$")
|
414
|
-
CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
|
415
|
-
end
|
416
|
-
end
|