rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -1,312 +0,0 @@
1
- require 'rbbt/util/resource'
2
- require 'rbbt/util/misc'
3
-
4
- class TSV
5
- ## Make sure we overwrite the methods declared by attr_accessor
6
- MAIN_ACCESSORS = :data, :key_field, :fields, :cast
7
- EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :type, :case_insensitive
8
- attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
9
-
10
- def self.zip_fields(list, fields = nil)
11
- return [] if list.nil? || list.empty?
12
- fields ||= list.fields if list.respond_to? :fields
13
- zipped = list[0].zip(*list[1..-1])
14
- zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
15
- zipped
16
- end
17
-
18
- module Field
19
- attr_accessor :namespace
20
-
21
- def self.field(field, namespace = nil)
22
- field.extend Field
23
- field.namespace = namespace
24
- field
25
- end
26
-
27
- def self.namespace(string)
28
- return nil unless string.match(/(.+):/)
29
- namespace_str = $1
30
- return nil if namespace_str.nil? or namespace_str.empty?
31
- namespace_str
32
- end
33
-
34
- def fullname
35
- return self if self =~ /:/ or namespace.nil?
36
- namespace.to_s + ":" << self
37
- end
38
-
39
- def ==(string)
40
- return false unless String === string
41
- return true if self.casecmp(string) == 0
42
- if Field === string
43
- return true if self.fullname.casecmp(string.fullname) == 0
44
- else
45
- return true if self.fullname.casecmp(string) == 0
46
- end
47
- return true if self.sub(/.*:/,'').casecmp(string) == 0
48
- return false
49
- end
50
-
51
- def namespace
52
- Field.namespace(self) || @namespace
53
- end
54
-
55
- def matching_namespaces(other)
56
- return true if namespace.nil?
57
- return namespace == other.namespace
58
- end
59
- end
60
-
61
- #{{{{ Field END
62
-
63
- def identifier_files
64
- case
65
- when (identifiers and TSV === identifiers)
66
- [identifiers]
67
- when (identifiers and Array === identifiers)
68
- case
69
- when (TSV === identifiers.first or identifiers.empty?)
70
- identifiers
71
- when
72
- identifiers.collect{|f| Resource::Path.path(f, nil, namespace)}
73
- end
74
- when (identifiers and not Array === identifiers)
75
- [Resource::Path.path(identifiers, nil, namespace)]
76
- when filename
77
- Resource::Path.path(filename, nil, namespace).identifier_files
78
- else
79
- []
80
- end
81
- end
82
-
83
- def fields_in_namespace(namespace = nil)
84
- namespace = self.namespace if namespace == nil or TrueClass === namespace
85
- fields.select{|f| f.namespace.nil? or f.namespace == namespace}
86
- end
87
-
88
- def key_field
89
- return nil if @key_field.nil?
90
- k = @key_field.dup
91
- k.extend Field
92
- k.namespace = namespace unless namespace.nil?
93
- k
94
- end
95
-
96
- def fields
97
- return nil if @fields.nil?
98
- fds = @fields
99
- fds.each do |f| f.extend Field end if Array === @fields
100
- fds.each do |f| f.namespace = namespace end unless namespace.nil?
101
- NamedArray.name(fds, @fields)
102
- end
103
-
104
- def all_fields
105
- return nil if @fields.nil?
106
- all_fields = @fields.dup
107
- all_fields.unshift key_field
108
- all_fields.each do |f| f.extend Field end if Array === @fields
109
- all_fields.each do |f| f.namespace = namespace end unless namespace.nil?
110
- NamedArray.name(all_fields, [key_field] + @fields)
111
- all_fields
112
- end
113
-
114
- def all_namespace_fields(namespace = nil)
115
- namespace = self.namespace if namespace == nil or TrueClass === namespace
116
- all_fields = self.all_fields
117
- return nil if all_fields.nil?
118
- return all_fields if namespace.nil?
119
- all_fields.select{|f| f.namespace.nil? or f.namespace == namespace}
120
- end
121
-
122
- def self.identify_field(key, fields, field)
123
- return field if Integer === field
124
- if String === field
125
- field = field.dup
126
- field.extend Field
127
- end
128
- return :key if field.nil? or field == 0 or field.to_sym == :key or field == key
129
- return nil if fields.nil?
130
- return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
131
- return fields.index field
132
- end
133
-
134
- def identify_field(field)
135
- TSV.identify_field(key_field, fields, field)
136
- end
137
-
138
- def key_field=(new_key_field)
139
- @key_field = new_key_field
140
- @data.key_field = new_key_field if @data.respond_to? :key_field= and @data.write?
141
- end
142
-
143
- def fields=(new_fields)
144
- new_fields.collect! do |field|
145
- if Field === field
146
- if field !~ /:/ and field.namespace != nil and field.namespace.to_s != namespace.to_s
147
- field.namespace.to_s + ":" + field.to_s
148
- else
149
- field
150
- end
151
- else
152
- field
153
- end
154
- end if Array === new_fields
155
- @fields = new_fields
156
- @data.fields = new_fields if @data.respond_to? :fields= and @data.write?
157
- end
158
-
159
- def keys
160
- @data.keys
161
- end
162
-
163
- def values
164
- @data.values
165
- end
166
-
167
- def size
168
- @data.size
169
- end
170
-
171
- # Write
172
-
173
- def []=(key, value)
174
- key = key.downcase if @case_insensitive
175
- @data[key] = value
176
- end
177
-
178
-
179
- def merge!(new_data)
180
- new_data.each do |key, value|
181
- self[key] = value
182
- end
183
- end
184
-
185
- # Read
186
-
187
- attr_accessor :unnamed
188
- def follow(value)
189
- return nil if value.nil?
190
- if String === value && value =~ /__Ref:(.*)/
191
- return self[$1]
192
- else
193
-
194
- if Array === value and not unnamed
195
- value = NamedArray.name value, fields
196
- end
197
- value
198
- end
199
- end
200
-
201
- def [](key)
202
- if Array === key
203
- return @data[key] if @data[key] != nil
204
- key.each{|k| v = self[k]; return v unless v.nil?}
205
- return nil
206
- end
207
-
208
- key = key.downcase if @case_insensitive and key !~ /^__Ref:/
209
- follow @data[key]
210
- end
211
-
212
- def delete(key)
213
- @data.delete(key)
214
- end
215
-
216
- def values_at(*keys)
217
- keys.collect{|k|
218
- self[k]
219
- }
220
- end
221
-
222
- def each(&block)
223
- @data.each do |key, value|
224
- block.call(key, follow(value))
225
- end
226
- end
227
-
228
- def collect
229
- if block_given?
230
- @data.collect do |key, value|
231
- value = follow(value)
232
- yield key, value
233
- end
234
- else
235
- @data.collect do |key, value|
236
- [key, follow(value)]
237
- end
238
- end
239
- end
240
-
241
- def sort(&block)
242
- collect.sort(&block).collect{|p|
243
- key, value = p
244
- value = NamedArray.name value, fields if fields
245
- [key, value]
246
- }
247
- end
248
-
249
- def sort_by(&block)
250
- collect.sort_by &block
251
- end
252
-
253
- def values_to_s(values)
254
- case
255
- when (values.nil? and fields.nil?)
256
- "\n"
257
- when (values.nil? and not fields.nil?)
258
- "\t" << ([""] * fields.length) * "\t" << "\n"
259
- when (not Array === values)
260
- "\t" << values.to_s << "\n"
261
- when Array === values.first
262
- "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
263
- else
264
- "\t" << values * "\t" << "\n"
265
- end
266
- end
267
-
268
- def include?(key)
269
- @data.include? key
270
- end
271
-
272
- def to_s(keys = nil, no_options = false)
273
- if FalseClass === keys or TrueClass === keys
274
- no_options = keys
275
- keys = nil
276
- end
277
-
278
- if keys == :sort
279
- keys = self.keys.sort
280
- end
281
-
282
- str = ""
283
-
284
- str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n" unless no_options
285
- if fields
286
- str << "#" << key_field << "\t" << fields * "\t" << "\n"
287
- end
288
-
289
- saved_unnamed = unnamed
290
- unnamed = false
291
- if keys.nil?
292
- each do |key, values|
293
- key = key.to_s if Symbol === key
294
- str << key.dup << values_to_s(values)
295
- end
296
- else
297
- keys.zip(values_at(*keys)).each do |key, values|
298
- key = key.to_s if Symbol === key
299
- str << key.dup << values_to_s(values)
300
- end
301
- end
302
-
303
- unnamed = saved_unnamed
304
- str
305
- end
306
-
307
- def value_peek
308
- peek = {}
309
- keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
310
- peek
311
- end
312
- end
@@ -1,416 +0,0 @@
1
- class TSV
2
- def self.merge_rows(input, output, sep = "\t")
3
- is = case
4
- when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
5
- CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
6
- when (String === input or StringIO === input)
7
- CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
8
- else
9
- input
10
- end
11
-
12
- current_key = nil
13
- current_parts = []
14
-
15
- done = false
16
- Open.write(output) do |os|
17
-
18
- done = is.eof?
19
- while not done
20
- key, *parts = is.gets.sub("\n",'').split(sep, -1)
21
- current_key ||= key
22
- case
23
- when key.nil?
24
- when current_key == key
25
- parts.each_with_index do |part,i|
26
- if current_parts[i].nil?
27
- current_parts[i] = part
28
- else
29
- current_parts[i] = current_parts[i] << "|" << part
30
- end
31
- end
32
- when current_key != key
33
- os.puts [current_key, current_parts].flatten * sep
34
- current_key = key
35
- current_parts = parts
36
- end
37
-
38
- done = is.eof?
39
- end
40
-
41
- end
42
- end
43
-
44
- def self.paste_merge(file1, file2, output, sep = "\t")
45
- case
46
- when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
47
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
48
- when (String === file1 or StringIO === file1)
49
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
50
- when TSV === file1
51
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
52
- end
53
-
54
- case
55
- when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
56
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
57
- when (String === file2 or StringIO === file2)
58
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
59
- when TSV === file2
60
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
61
- end
62
-
63
- output = File.open(output, 'w') if String === output
64
-
65
- cols1 = nil
66
- cols2 = nil
67
-
68
- done1 = false
69
- done2 = false
70
-
71
- key1 = key2 = nil
72
- while key1.nil?
73
- while (line1 = file1.gets) =~ /#/; end
74
- key1, *parts1 = line1.sub("\n",'').split(sep, -1)
75
- cols1 = parts1.length
76
- end
77
-
78
- while key2.nil?
79
- while (line2 = file2.gets) =~ /#/; end
80
- key2, *parts2 = line2.sub("\n",'').split(sep, -1)
81
- cols2 = parts2.length
82
- end
83
-
84
- key = key1 < key2 ? key1 : key2
85
- parts = [""] * (cols1 + cols2)
86
- while not (done1 and done2)
87
- while (not done1 and key1 == key)
88
- parts1.each_with_index do |part, i|
89
- parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
90
- end
91
- key1 = nil
92
- while key1.nil? and not done1
93
- if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
94
- end
95
- end
96
- while (not done2 and key2 == key)
97
- parts2.each_with_index do |part, i|
98
- i += cols1
99
- parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
100
- end
101
- key2 = nil
102
- while key2.nil? and not done2
103
- if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
104
- end
105
- end
106
-
107
- output.puts [key, parts].flatten * sep
108
- parts = [""] * (cols1 + cols2)
109
-
110
- case
111
- when done1
112
- key = key2
113
- when done2
114
- key = key1
115
- else
116
- key = key1 < key2 ? key1 : key2
117
- end
118
- end
119
-
120
- output.close
121
- end
122
- #{{{ Attach Methods
123
-
124
- def attach_same_key(other, fields = nil)
125
- fields = other.fields - [key_field].concat(self.fields) if fields.nil?
126
-
127
- through do |key, values|
128
- if other.include? key
129
- new_values = other[key].values_at *fields
130
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
131
- new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
132
- self[key] = self[key].concat new_values
133
- else
134
- if type == :double
135
- self[key] = self[key].concat [[]] * fields.length
136
- else
137
- self[key] = self[key].concat [""] * fields.length
138
- end
139
- end
140
- end
141
-
142
- self.fields = self.fields.concat other.fields.values_at *fields
143
- end
144
-
145
- def attach_source_key(other, source, fields = nil)
146
- fields = other.fields - [key_field].concat(self.fields) if fields.nil?
147
-
148
- other = other.tsv(:persistence => :no_create) unless TSV === other
149
- field_positions = fields.collect{|field| other.identify_field field}
150
- field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
151
-
152
- through do |key, values|
153
- source_keys = values[source]
154
- source_keys = [source_keys] unless Array === source_keys
155
- if source_keys.nil? or source_keys.empty?
156
- all_new_values = []
157
- else
158
- all_new_values = []
159
- source_keys.each do |source_key|
160
- next unless other.include? source_key
161
- new_values = field_positions.collect do |pos|
162
- if pos == :key
163
- source_key
164
- else
165
- other[source_key][pos]
166
- end
167
- end
168
-
169
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
170
- new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
171
- all_new_values << new_values
172
- end
173
- end
174
-
175
- if all_new_values.empty?
176
- if type == :double
177
- self[key] = self[key].concat [[]] * field_positions.length
178
- else
179
- self[key] = self[key].concat [""] * field_positions.length
180
- end
181
- else
182
- if type == :double
183
- self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
184
- else
185
- self[key] = self[key].concat all_new_values.first
186
- end
187
- end
188
- end
189
-
190
- self.fields = self.fields.concat field_names
191
- end
192
-
193
- def attach_index(other, index, fields = nil)
194
- fields = other.fields - [key_field].concat(self.fields) if fields.nil?
195
- fields = [fields] unless Array === fields
196
-
197
- other = other.tsv unless TSV === other
198
- field_positions = fields.collect{|field| other.identify_field field}
199
- field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
200
-
201
- length = self.fields.length
202
- through do |key, values|
203
- source_keys = index[key]
204
- if source_keys.nil? or source_keys.empty?
205
- all_new_values = []
206
- else
207
- all_new_values = []
208
- source_keys.each do |source_key|
209
- next unless other.include? source_key
210
- new_values = field_positions.collect do |pos|
211
- if pos == :key
212
- if other.type == :double
213
- [source_key]
214
- else
215
- source_key
216
- end
217
- else
218
- other[source_key][pos]
219
- end
220
- end
221
- new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
222
- new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
223
- all_new_values << new_values
224
- end
225
- end
226
-
227
- if all_new_values.empty?
228
- if type == :double
229
- all_new_values = [[[]] * field_positions.length]
230
- else
231
- all_new_values = [[""] * field_positions.length]
232
- end
233
- end
234
-
235
- current = self[key]
236
-
237
- if current.length > length
238
- all_new_values << current.slice!(length..current.length - 1)
239
- end
240
-
241
- if type == :double
242
- all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
243
- else
244
- all_new_values = all_new_values.first
245
- end
246
-
247
- current += all_new_values
248
-
249
- self[key] = current
250
-
251
- end
252
-
253
- self.fields = self.fields.concat field_names
254
- end
255
-
256
- #{{{ Attach Helper
257
-
258
- # May make an extra index!
259
- def self.find_path(files, options = {})
260
- options = Misc.add_defaults options, :in_namespace => false
261
- in_namespace = options[:in_namespace]
262
-
263
- if in_namespace
264
- if files.first.all_fields.include? in_namespace
265
- ids = [[in_namespace]]
266
- else
267
- ids = [files.first.all_namespace_fields(in_namespace)]
268
- end
269
- ids += files[1..-1].collect{|f| f.all_fields}
270
- else
271
- ids = files.collect{|f| f.all_fields}
272
- end
273
- id_list = []
274
-
275
- ids.each_with_index do |list, i|
276
- break if i == ids.length - 1
277
- match = list.select{|field|
278
- ids[i + 1].select{|f| field == f}.any?
279
- }
280
- return nil if match.empty?
281
- id_list << match.first
282
- end
283
-
284
- if id_list.last != files.last.all_fields.first
285
- id_list << files.last.all_fields.first
286
- id_list.zip(files)
287
- else
288
- id_list.zip(files[0..-1])
289
- end
290
- end
291
-
292
- def self.build_traverse_index(files, options = {})
293
- options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
294
- in_namespace = options[:in_namespace]
295
- persist_input = options[:persist_input]
296
-
297
- path = find_path(files, options)
298
-
299
- return nil if path.nil?
300
-
301
- traversal_ids = path.collect{|p| p.first}
302
-
303
- Log.medium "Found Traversal: #{traversal_ids * " => "}"
304
-
305
- data_key, data_file = path.shift
306
- data_index = if data_key == data_file.key_field
307
- Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
308
- nil
309
- else
310
- Log.debug "Data index required"
311
- data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
312
- end
313
-
314
- current_index = data_index
315
- current_key = data_key
316
- while not path.empty?
317
- next_key, next_file = path.shift
318
-
319
- if current_index.nil?
320
- current_index = next_file.index :target => next_key, :fields => current_key, :persistence => (persist_input and path.empty?)
321
- else
322
- next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
323
- current_index.process current_index.fields.first do |values|
324
- if values.nil?
325
- nil
326
- else
327
- next_index.values_at(*values).flatten.collect.to_a
328
- end
329
- end
330
- current_index.fields = [next_key]
331
- end
332
- current_key = next_key
333
- end
334
-
335
- current_index
336
- end
337
-
338
-
339
- def self.find_traversal(tsv1, tsv2, options = {})
340
- options = Misc.add_defaults options, :in_namespace => false
341
- in_namespace = options[:in_namespace]
342
-
343
- identifiers1 = tsv1.identifier_files || []
344
- identifiers2 = tsv2.identifier_files || []
345
-
346
- identifiers1.unshift tsv1
347
- identifiers2.unshift tsv2
348
-
349
- files1 = []
350
- files2 = []
351
- while identifiers1.any?
352
- files1.push identifiers1.shift
353
- identifiers2.each_with_index do |e,i|
354
- files2 = identifiers2[(0..i)]
355
- index = build_traverse_index(files1 + files2.reverse, options)
356
- return index if not index.nil?
357
- end
358
- end
359
-
360
- return nil
361
- end
362
-
363
- def attach(other, fields = nil, options = {})
364
- options = Misc.add_defaults options, :in_namespace => false
365
- in_namespace = options[:in_namespace]
366
-
367
- fields = other.fields - [key_field].concat(self.fields) if fields == :all
368
- if in_namespace
369
- fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
370
- else
371
- fields = other.fields - [key_field].concat(self.fields) if fields.nil?
372
- end
373
-
374
- Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
375
-
376
- other = other.tsv(:persistence => options[:persist_input] == true) unless TSV === other
377
- case
378
- when key_field == other.key_field
379
- attach_same_key other, fields
380
- when (not in_namespace and self.fields.include?(other.key_field))
381
- Log.medium "Found other's key field: #{other.key_field}"
382
- attach_source_key other, other.key_field, fields
383
- when (in_namespace and self.fields_in_namespace.include?(other.key_field))
384
- Log.medium "Found other's key field in #{in_namespace}: #{other.key_field}"
385
- attach_source_key other, other.key_field, fields
386
- else
387
- index = TSV.find_traversal(self, other, options)
388
- raise "Cannot traverse identifiers" if index.nil?
389
- attach_index other, index, fields
390
- end
391
- Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
392
-
393
- self
394
- end
395
-
396
- def detach(file)
397
- file_fields = file.fields.collect{|field| field.fullname}
398
- detached_fields = []
399
- self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
400
- reorder :key, detached_fields
401
- end
402
-
403
- def paste(other, options = {})
404
- TmpFile.with_file do |output|
405
- TSV.paste_merge(self, other, output, options[:sep] || "\t")
406
- tsv = TSV.new output, options
407
- tsv.key_field = self.key_field unless self.key_field.nil?
408
- tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
409
- tsv
410
- end
411
- end
412
-
413
- def self.fast_paste(files, delim = "$")
414
- CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
415
- end
416
- end