rbbt-util 3.2.1 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
data/lib/rbbt/util/tsv/index.rb
DELETED
@@ -1,419 +0,0 @@
|
|
1
|
-
require 'rbbt/util/tsv/manipulate'
|
2
|
-
require 'rbbt/util/tsv/filters'
|
3
|
-
require 'rbbt/util/fix_width_table'
|
4
|
-
|
5
|
-
class TSV
|
6
|
-
|
7
|
-
def index(options = {})
|
8
|
-
options = Misc.add_defaults options, :order => false, :persistence => true, :target => :key, :fields => nil, :case_insensitive => case_insensitive, :tsv_serializer => :list
|
9
|
-
|
10
|
-
prefix = case
|
11
|
-
when options[:target]
|
12
|
-
"Index[#{options[:target]}]"
|
13
|
-
else
|
14
|
-
"Index[:key]"
|
15
|
-
end
|
16
|
-
|
17
|
-
Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
|
18
|
-
order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
|
19
|
-
|
20
|
-
new = {}
|
21
|
-
|
22
|
-
## Ordered
|
23
|
-
if order
|
24
|
-
|
25
|
-
# through
|
26
|
-
|
27
|
-
new_key_field, new_fields = through target, fields do |key, values|
|
28
|
-
if Array === key
|
29
|
-
keys = key
|
30
|
-
else
|
31
|
-
keys = [key]
|
32
|
-
end
|
33
|
-
|
34
|
-
values.each_with_index do |list,i|
|
35
|
-
list = [list] unless Array === list
|
36
|
-
i += 1 if fields.nil?
|
37
|
-
list.each do |elem|
|
38
|
-
next if elem.nil? or elem.empty?
|
39
|
-
elem.downcase if case_insensitive
|
40
|
-
new[elem] ||= []
|
41
|
-
new[elem][i] ||= []
|
42
|
-
new[elem][i].concat keys
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
if fields.nil?
|
47
|
-
keys.each do |key|
|
48
|
-
key = key.downcase if case_insensitive
|
49
|
-
new[key] ||= []
|
50
|
-
new[key][0] ||= []
|
51
|
-
new[key][0].concat keys
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
end
|
56
|
-
|
57
|
-
# flatten
|
58
|
-
|
59
|
-
new.each do |key, values|
|
60
|
-
new[key] = values.flatten.compact
|
61
|
-
end
|
62
|
-
|
63
|
-
## Not ordered
|
64
|
-
else
|
65
|
-
double_keys = true unless type != :double or identify_field(target) == :key
|
66
|
-
new.each do |key, fields| fields.flatten! end
|
67
|
-
|
68
|
-
new_key_field, new_fields = through target, fields do |key, values|
|
69
|
-
values.unshift type == :double ? [key] : key if fields.nil?
|
70
|
-
if type == :flat
|
71
|
-
list = values
|
72
|
-
else
|
73
|
-
list = values.flatten unless type == :flat
|
74
|
-
end
|
75
|
-
list.collect!{|e| e.downcase} if case_insensitive
|
76
|
-
list.each do |elem|
|
77
|
-
next if elem.nil? or elem.empty?
|
78
|
-
new[elem] ||= []
|
79
|
-
if double_keys
|
80
|
-
new[elem].concat key
|
81
|
-
else
|
82
|
-
new[elem] << key
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
new.each do |key, values|
|
90
|
-
values.uniq!
|
91
|
-
end
|
92
|
-
|
93
|
-
key_field = case
|
94
|
-
when new_key_field
|
95
|
-
new_key_field + "|" + new_fields * "|"
|
96
|
-
else
|
97
|
-
nil
|
98
|
-
end
|
99
|
-
|
100
|
-
fields = case
|
101
|
-
when new_key_field.nil?
|
102
|
-
nil
|
103
|
-
else
|
104
|
-
[new_key_field]
|
105
|
-
end
|
106
|
-
|
107
|
-
new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
|
108
|
-
|
109
|
-
new
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
def self.index(file, options = {})
|
114
|
-
options = Misc.add_defaults options,
|
115
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
|
116
|
-
:data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
|
117
|
-
|
118
|
-
options_data = {
|
119
|
-
:persistence => Misc.process_options(options, :data_persistence),
|
120
|
-
:persistence_file => Misc.process_options(options, :data_persistence_file),
|
121
|
-
:persistence_update => Misc.process_options(options, :data_persistence_update),
|
122
|
-
:persistence_source => Misc.process_options(options, :data_persistence_source),
|
123
|
-
}
|
124
|
-
|
125
|
-
options_data[:type] = :flat if options[:order] == false
|
126
|
-
|
127
|
-
prefix = case
|
128
|
-
when options[:target]
|
129
|
-
"Index_static[#{options[:target]}]"
|
130
|
-
else
|
131
|
-
"Index_static[:key]"
|
132
|
-
end
|
133
|
-
|
134
|
-
new = Persistence.persist(file, prefix, :tsv, options) do |file, options, filename|
|
135
|
-
TSV.new(file, :double, options_data).index options.merge :persistence => false, :persistence_file => nil
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
|
140
|
-
def smart_merge(other, match = nil, fields2add = nil)
|
141
|
-
|
142
|
-
# Determine new fields
|
143
|
-
# both have fields => list of names
|
144
|
-
# not both have fields => nil
|
145
|
-
|
146
|
-
# fields2add = case
|
147
|
-
# when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
|
148
|
-
# nil
|
149
|
-
# when fields2add.nil?
|
150
|
-
# other.all_fields
|
151
|
-
# else
|
152
|
-
# fields2add
|
153
|
-
# end
|
154
|
-
|
155
|
-
# Determine common fields
|
156
|
-
|
157
|
-
common_fields, new_fields = case
|
158
|
-
when fields2add != nil
|
159
|
-
[fields & other.fields, fields2add]
|
160
|
-
when (other.all_fields.nil? or self.all_fields.nil?)
|
161
|
-
[nil, other_fields]
|
162
|
-
else
|
163
|
-
[(all_fields & other.all_fields), (other.all_fields - all_fields)]
|
164
|
-
end
|
165
|
-
|
166
|
-
# Load matching scheme. Index and source field
|
167
|
-
|
168
|
-
match_source, match_index = case
|
169
|
-
when (match.nil? and not key_field.nil? and other.key_field == key_field)
|
170
|
-
[:key, nil]
|
171
|
-
when match.nil?
|
172
|
-
[:key, other.index]
|
173
|
-
when TSV === match
|
174
|
-
raise "No field info in match TSV" if match.fields.nil?
|
175
|
-
match_source = (all_fields & match.all_fields).first
|
176
|
-
index = match.index :target => other.key_field, :fields => match_source
|
177
|
-
[match_source, index]
|
178
|
-
when (String === match and match == key_field)
|
179
|
-
[:key, other.index]
|
180
|
-
when String === match
|
181
|
-
[match, other.index]
|
182
|
-
when Array === match
|
183
|
-
[match.first, other.index(:fields => match.last)]
|
184
|
-
end
|
185
|
-
|
186
|
-
match_source_position = identify_field match_source
|
187
|
-
|
188
|
-
# through
|
189
|
-
new = {}
|
190
|
-
each do |key,values|
|
191
|
-
source_keys = match_source == :key ? key : values[match_source_position]
|
192
|
-
source_keys = [source_keys] unless Array === source_keys
|
193
|
-
other_keys = case
|
194
|
-
when index.nil?
|
195
|
-
source_keys
|
196
|
-
else
|
197
|
-
index.values_at(*source_keys).flatten.compact
|
198
|
-
end
|
199
|
-
|
200
|
-
other_keys = other_keys.collect do |other_key| match_index[other_key] end.flatten unless match_index.nil?
|
201
|
-
|
202
|
-
|
203
|
-
other_values = other_keys.collect do |other_key|
|
204
|
-
next unless other.include? other_key
|
205
|
-
new_fields.collect do |field|
|
206
|
-
if field == other.key_field
|
207
|
-
if type == :double
|
208
|
-
[other_key]
|
209
|
-
else
|
210
|
-
other_key
|
211
|
-
end
|
212
|
-
else
|
213
|
-
other[other_key][field]
|
214
|
-
end
|
215
|
-
end
|
216
|
-
end.compact
|
217
|
-
|
218
|
-
other_values = case
|
219
|
-
when type == :double
|
220
|
-
TSV.zip_fields(other_values).collect{|v| v.flatten.uniq}
|
221
|
-
else
|
222
|
-
TSV.zip_fields(other_values).collect{|v| v.flatten.first}
|
223
|
-
end
|
224
|
-
|
225
|
-
new_values = values + other_values
|
226
|
-
|
227
|
-
new[key] = new_values
|
228
|
-
end
|
229
|
-
|
230
|
-
new = TSV.new new
|
231
|
-
new.fields = fields + new_fields if fields
|
232
|
-
new.key_field = key_field if key_field
|
233
|
-
new.type = type
|
234
|
-
|
235
|
-
new
|
236
|
-
end
|
237
|
-
|
238
|
-
def self.field_matches(tsv, values)
|
239
|
-
values = [values] if not Array === values
|
240
|
-
Log.debug "Matcing #{values.length} values to #{tsv.filename}"
|
241
|
-
|
242
|
-
if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
|
243
|
-
return {}
|
244
|
-
end
|
245
|
-
|
246
|
-
key_field = tsv.key_field
|
247
|
-
fields = tsv.fields
|
248
|
-
|
249
|
-
field_values = {}
|
250
|
-
fields.each{|field|
|
251
|
-
field_values[field] = []
|
252
|
-
}
|
253
|
-
|
254
|
-
if tsv.type == :double
|
255
|
-
tsv.through do |key,entry_values|
|
256
|
-
fields.zip(entry_values).each do |field,entry_field_values|
|
257
|
-
field_values[field].concat entry_field_values unless entry_field_values.nil?
|
258
|
-
end
|
259
|
-
end
|
260
|
-
else
|
261
|
-
tsv.through do |key,entry_values|
|
262
|
-
fields.zip(entry_values).each do |field,entry_field_values|
|
263
|
-
field_values[field] << entry_field_values
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
field_values.each do |field,field_value_list|
|
269
|
-
field_value_list.replace(values & field_value_list.flatten.uniq)
|
270
|
-
end
|
271
|
-
|
272
|
-
field_values[key_field] = values & tsv.keys
|
273
|
-
|
274
|
-
field_values
|
275
|
-
end
|
276
|
-
|
277
|
-
def field_matches(values)
|
278
|
-
TSV.field_matches(self, values)
|
279
|
-
end
|
280
|
-
|
281
|
-
def guess_field(values)
|
282
|
-
field_matches(values).sort_by{|field, matches| matches.uniq.length}.last
|
283
|
-
end
|
284
|
-
|
285
|
-
def pos_index(pos_field = nil, options = {})
|
286
|
-
pos_field ||= "Position"
|
287
|
-
|
288
|
-
options = Misc.add_defaults options,
|
289
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false
|
290
|
-
|
291
|
-
prefix = "Pos[#{pos_field}]"
|
292
|
-
|
293
|
-
Persistence.persist(filename, prefix, :fwt, options.merge({
|
294
|
-
:pos_field => pos_field,
|
295
|
-
:filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
|
296
|
-
})) do |file, options, filename|
|
297
|
-
pos_field = options[:pos_field]
|
298
|
-
value_size = 0
|
299
|
-
index_data = []
|
300
|
-
|
301
|
-
through :key, pos_field do |key, values|
|
302
|
-
value_size = key.length if key.length > value_size
|
303
|
-
|
304
|
-
pos = values.first
|
305
|
-
if Array === pos
|
306
|
-
pos.each do |p|
|
307
|
-
index_data << [key, p.to_i]
|
308
|
-
end
|
309
|
-
else
|
310
|
-
index_data << [key, pos.to_i]
|
311
|
-
end
|
312
|
-
end
|
313
|
-
|
314
|
-
index = FixWidthTable.get(:memory, value_size, false)
|
315
|
-
index.add_point index_data
|
316
|
-
index.read
|
317
|
-
index
|
318
|
-
end
|
319
|
-
end
|
320
|
-
|
321
|
-
def self.pos_index(file, pos_field = nil, options = {})
|
322
|
-
options = Misc.add_defaults options,
|
323
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
|
324
|
-
:data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
|
325
|
-
|
326
|
-
options_data = {
|
327
|
-
:persistence => Misc.process_options(options, :data_persistence),
|
328
|
-
:persistence_file => Misc.process_options(options, :data_persistence_file),
|
329
|
-
:persistence_update => Misc.process_options(options, :data_persistence_update),
|
330
|
-
:persistence_source => Misc.process_options(options, :data_persistence_source),
|
331
|
-
}
|
332
|
-
|
333
|
-
|
334
|
-
prefix = "Pos[#{pos_field}]"
|
335
|
-
|
336
|
-
new = Persistence.persist(file, prefix, :fwt, options.merge({:pos_field => pos_field})) do |file, options, filename|
|
337
|
-
tsv = TSV.new(file, :list, options_data)
|
338
|
-
|
339
|
-
if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
|
340
|
-
tsv.filter
|
341
|
-
options[:filters].each do |match, value, persistence|
|
342
|
-
tsv.add_filter(match, value, persistence)
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
tsv.pos_index options[:pos_field], options.merge(:persistence => false, :persistence_file => nil)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
def range_index(start_field = nil, end_field = nil, options = {})
|
351
|
-
start_field ||= "Start"
|
352
|
-
end_field ||= "End"
|
353
|
-
options = Misc.add_defaults options,
|
354
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false
|
355
|
-
|
356
|
-
prefix = "Range[#{start_field}-#{end_field}]"
|
357
|
-
|
358
|
-
Persistence.persist(filename, prefix, :fwt, options.merge({
|
359
|
-
:start_field => start_field, :end_field => end_field,
|
360
|
-
:filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
|
361
|
-
})) do |file, options, filename|
|
362
|
-
start_field, end_field = options.values_at :start_field, :end_field
|
363
|
-
|
364
|
-
value_size = 0
|
365
|
-
index_data = []
|
366
|
-
|
367
|
-
through :key, [start_field, end_field] do |key, values|
|
368
|
-
value_size = key.length if key.length > value_size
|
369
|
-
|
370
|
-
start_pos, end_pos = values
|
371
|
-
|
372
|
-
if Array === start_pos
|
373
|
-
start_pos.zip(end_pos).each do |s,e|
|
374
|
-
index_data << [key, [s.to_i, e.to_i]]
|
375
|
-
end
|
376
|
-
else
|
377
|
-
index_data << [key, [start_pos.to_i, end_pos.to_i]]
|
378
|
-
end
|
379
|
-
end
|
380
|
-
|
381
|
-
index = FixWidthTable.get(:memory, value_size, true)
|
382
|
-
index.add_range index_data
|
383
|
-
index.read
|
384
|
-
index
|
385
|
-
end
|
386
|
-
end
|
387
|
-
|
388
|
-
def self.range_index(file, start_field = nil, end_field = nil, options = {})
|
389
|
-
options = Misc.add_defaults options,
|
390
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
|
391
|
-
:data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
|
392
|
-
|
393
|
-
options_data = {
|
394
|
-
:persistence => Misc.process_options(options, :data_persistence),
|
395
|
-
:persistence_file => Misc.process_options(options, :data_persistence_file),
|
396
|
-
:persistence_update => Misc.process_options(options, :data_persistence_update),
|
397
|
-
:persistence_source => Misc.process_options(options, :data_persistence_source),
|
398
|
-
}
|
399
|
-
|
400
|
-
prefix = "Range[#{start_field}-#{end_field}]"
|
401
|
-
|
402
|
-
options_data[:type] = :flat if options[:order] == false
|
403
|
-
|
404
|
-
Persistence.persist(file, prefix, :fwt, options.merge({:start_field => start_field, :end_field => end_field})) do |file, options, filename|
|
405
|
-
tsv = TSV.new(file, :list, options_data)
|
406
|
-
|
407
|
-
if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
|
408
|
-
tsv.filter
|
409
|
-
options[:filters].each do |match, value, persistence|
|
410
|
-
tsv.add_filter(match, value, persistence)
|
411
|
-
end
|
412
|
-
end
|
413
|
-
|
414
|
-
tsv.range_index options[:start_field], options[:end_field], options.merge(:persistence => false, :persistence_file => nil)
|
415
|
-
end
|
416
|
-
end
|
417
|
-
|
418
|
-
end
|
419
|
-
|
@@ -1,300 +0,0 @@
|
|
1
|
-
class TSV
|
2
|
-
|
3
|
-
attr_accessor :monitor
|
4
|
-
|
5
|
-
def through(new_key_field = :key, new_fields = nil, &block)
|
6
|
-
|
7
|
-
# Get positions
|
8
|
-
|
9
|
-
new_key_position = identify_field new_key_field
|
10
|
-
|
11
|
-
new_field_positions = case
|
12
|
-
when Integer === new_fields
|
13
|
-
[new_fields]
|
14
|
-
when String === new_fields
|
15
|
-
[identify_field(new_fields)]
|
16
|
-
when Array === new_fields
|
17
|
-
new_fields.collect{|new_field| identify_field new_field}
|
18
|
-
when new_fields == :key
|
19
|
-
[:key]
|
20
|
-
when new_fields == :fields
|
21
|
-
nil
|
22
|
-
when new_fields.nil?
|
23
|
-
nil
|
24
|
-
else
|
25
|
-
raise "Unknown new fields specified: #{new_fields.inspect}"
|
26
|
-
end
|
27
|
-
|
28
|
-
# Get Field Names
|
29
|
-
|
30
|
-
## New key
|
31
|
-
new_key_field_name = new_key_position == :key ? key_field : fields[new_key_position] if fields
|
32
|
-
## New fields
|
33
|
-
new_field_names = case
|
34
|
-
when (new_field_positions.nil? and new_key_position == :key)
|
35
|
-
fields.dup
|
36
|
-
when new_field_positions.nil?
|
37
|
-
f = fields.dup
|
38
|
-
f.delete_at(new_key_position)
|
39
|
-
f.unshift key_field
|
40
|
-
f
|
41
|
-
else
|
42
|
-
f = fields.dup
|
43
|
-
f.push key_field
|
44
|
-
f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
|
45
|
-
end if fields
|
46
|
-
|
47
|
-
# Cycle through
|
48
|
-
if monitor
|
49
|
-
desc = "Iterating TSV"
|
50
|
-
step = 100
|
51
|
-
if Hash === monitor
|
52
|
-
desc = monitor[:desc] if monitor.include? :desc
|
53
|
-
step = monitor[:step] if monitor.include? :step
|
54
|
-
end
|
55
|
-
progress_monitor = Progress::Bar.new(size, 0, step, desc)
|
56
|
-
else
|
57
|
-
progress_monitor = nil
|
58
|
-
end
|
59
|
-
|
60
|
-
if new_key_position == :key and ( new_fields.nil? or new_fields == fields)
|
61
|
-
each do |key, fields| progress_monitor.tick if progress_monitor; yield key, fields end
|
62
|
-
else
|
63
|
-
each do |key, fields|
|
64
|
-
progress_monitor.tick if progress_monitor;
|
65
|
-
new_key_value = case
|
66
|
-
when (new_key_position.nil? or new_key_position == :key)
|
67
|
-
key
|
68
|
-
else
|
69
|
-
fields[new_key_position]
|
70
|
-
end
|
71
|
-
new_field_values = case
|
72
|
-
when (new_field_positions.nil? and new_fields == :fields)
|
73
|
-
fields
|
74
|
-
when (new_fields.nil? and new_key_position == :key)
|
75
|
-
fields
|
76
|
-
when new_field_positions.nil?
|
77
|
-
f = fields.dup
|
78
|
-
f.delete_at(new_key_position)
|
79
|
-
if type == :double
|
80
|
-
f.unshift [key]
|
81
|
-
else
|
82
|
-
f.unshift key
|
83
|
-
end
|
84
|
-
f
|
85
|
-
else
|
86
|
-
f = fields.dup
|
87
|
-
case
|
88
|
-
when type == :single
|
89
|
-
f = [f,key]
|
90
|
-
when type == :double
|
91
|
-
f.push [key]
|
92
|
-
else
|
93
|
-
f.push key
|
94
|
-
end
|
95
|
-
f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
|
96
|
-
end
|
97
|
-
|
98
|
-
if type == :single
|
99
|
-
new_field_values = new_field_values.first
|
100
|
-
else
|
101
|
-
new_field_values = NamedArray.name new_field_values, new_field_names unless unnamed
|
102
|
-
end
|
103
|
-
|
104
|
-
next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
|
105
|
-
yield new_key_value, new_field_values
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# Return new field names
|
110
|
-
|
111
|
-
return [new_key_field_name, new_field_names]
|
112
|
-
end
|
113
|
-
|
114
|
-
def reorder(new_key_field, new_fields = nil, options = {})
|
115
|
-
options = Misc.add_defaults options, :new_key_field => new_key_field, :new_fields => new_fields, :persistence => false
|
116
|
-
|
117
|
-
new, extra = Persistence.persist(self, :Reorder, :tsv, options ) do |tsv, options, filename|
|
118
|
-
new_key_field = options[:new_key_field]
|
119
|
-
new_fields = options[:new_fields]
|
120
|
-
|
121
|
-
new = {}
|
122
|
-
new_key_field, new_fields = through new_key_field, new_fields do |keys, values|
|
123
|
-
if Array === keys
|
124
|
-
keys.each do |key|
|
125
|
-
if new[key].nil? or not type == :double
|
126
|
-
new[key] = values.collect{|l| l.dup}
|
127
|
-
else
|
128
|
-
new[key] = new[key].zip(values).collect{|old_list, new_list| old_list.concat new_list}
|
129
|
-
end
|
130
|
-
end
|
131
|
-
else
|
132
|
-
new[keys] = values
|
133
|
-
end
|
134
|
-
nil
|
135
|
-
end
|
136
|
-
|
137
|
-
new = TSV.new new
|
138
|
-
|
139
|
-
new.fields = new_fields
|
140
|
-
new.key_field = new_key_field
|
141
|
-
new.filename = filename
|
142
|
-
new.type = type
|
143
|
-
new.case_insensitive = case_insensitive
|
144
|
-
new.identifiers = identifiers
|
145
|
-
|
146
|
-
new
|
147
|
-
end
|
148
|
-
|
149
|
-
new
|
150
|
-
end
|
151
|
-
|
152
|
-
def slice(fields)
|
153
|
-
reorder :key, fields
|
154
|
-
end
|
155
|
-
|
156
|
-
def slice_namespace(namespace)
|
157
|
-
fields = self.fields
|
158
|
-
namespace_fields = []
|
159
|
-
fields.each_with_index{|field,i| namespace_fields << i if field.namespace == namespace}
|
160
|
-
reorder :key, namespace_fields
|
161
|
-
end
|
162
|
-
|
163
|
-
def sort(*fields)
|
164
|
-
|
165
|
-
pos = case
|
166
|
-
when fields.length == 0
|
167
|
-
:key
|
168
|
-
when fields.length == 1
|
169
|
-
identify_field fields.first
|
170
|
-
else
|
171
|
-
fields.collect{|field| identify_field field}
|
172
|
-
end
|
173
|
-
elems = []
|
174
|
-
through :key, pos do |key, values|
|
175
|
-
elems << case
|
176
|
-
when block_given?
|
177
|
-
[key, yield(values)]
|
178
|
-
else
|
179
|
-
if type == :double
|
180
|
-
[key, values.first.first]
|
181
|
-
else
|
182
|
-
[key, values.first]
|
183
|
-
end
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
elems.sort_by{|k,v| v}.collect{|k,v| k}
|
188
|
-
end
|
189
|
-
|
190
|
-
def select(method = nil)
|
191
|
-
new = TSV.new({})
|
192
|
-
new.key_field = key_field
|
193
|
-
new.fields = fields.dup
|
194
|
-
new.type = type
|
195
|
-
new.filename = filename
|
196
|
-
new.case_insensitive = case_insensitive
|
197
|
-
|
198
|
-
case
|
199
|
-
when (method.nil? and block_given?)
|
200
|
-
through do |key, values|
|
201
|
-
new[key] = values if yield key, values
|
202
|
-
end
|
203
|
-
when Array === method
|
204
|
-
through do |key, values|
|
205
|
-
new[key] = values if ([key,values].flatten & method).any?
|
206
|
-
end
|
207
|
-
when Regexp === method
|
208
|
-
through do |key, values|
|
209
|
-
new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
|
210
|
-
end
|
211
|
-
when String === method
|
212
|
-
if block_given?
|
213
|
-
through do |key, values|
|
214
|
-
new[key] = values if yield((method == key_field or method == :key)? key : values[method])
|
215
|
-
end
|
216
|
-
else
|
217
|
-
through do |key, values|
|
218
|
-
new[key] = values if [key,values].flatten.select{|v| v == method}.any?
|
219
|
-
end
|
220
|
-
end
|
221
|
-
when Hash === method
|
222
|
-
key = method.keys.first
|
223
|
-
method = method.values.first
|
224
|
-
case
|
225
|
-
when (Array === method and (key == :key or key_field == key))
|
226
|
-
method.each{|item| new[item] = self[item] if self.include? item}
|
227
|
-
when Array === method
|
228
|
-
through :key, key do |key, values|
|
229
|
-
values = [values] if type == :single
|
230
|
-
new[key] = self[key] if (values.flatten & method).any?
|
231
|
-
end
|
232
|
-
when Regexp === method
|
233
|
-
through :key, key do |key, values|
|
234
|
-
values = [values] if type == :single
|
235
|
-
new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
|
236
|
-
end
|
237
|
-
when String === method
|
238
|
-
through :key, key do |key, values|
|
239
|
-
values = [values] if type == :single
|
240
|
-
new[key] = self[key] if values.flatten.select{|v| v == method}.any?
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
|
246
|
-
new
|
247
|
-
end
|
248
|
-
|
249
|
-
def process(field, &block)
|
250
|
-
through do |key, values|
|
251
|
-
if type == :flat
|
252
|
-
field_values = values
|
253
|
-
else
|
254
|
-
field_values = values[field]
|
255
|
-
end
|
256
|
-
|
257
|
-
next if values[field].nil?
|
258
|
-
new_values = case
|
259
|
-
when block.arity == 1
|
260
|
-
yield(field_values)
|
261
|
-
when block.arity == 2
|
262
|
-
yield(field_values, key)
|
263
|
-
when block.arity == 3
|
264
|
-
yield(field_values, key, values)
|
265
|
-
else
|
266
|
-
raise "Unknown arity in block"
|
267
|
-
end
|
268
|
-
|
269
|
-
if type == :flat
|
270
|
-
self[key] = new_values
|
271
|
-
else
|
272
|
-
values[field].replace new_values
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
def add_field(name = nil)
|
278
|
-
through do |key, values|
|
279
|
-
new_values = yield(key, values)
|
280
|
-
new_values = [new_values] if type == :double and not Array === new_values
|
281
|
-
|
282
|
-
self[key] = values + [new_values]
|
283
|
-
end
|
284
|
-
|
285
|
-
self.fields = self.fields + [name] if fields != nil and name != nil
|
286
|
-
|
287
|
-
self
|
288
|
-
end
|
289
|
-
|
290
|
-
def add_fields(names = nil)
|
291
|
-
through do |key, values|
|
292
|
-
new_values = yield(key, values)
|
293
|
-
new_values = [new_values] if type == :double and not Array == new_values
|
294
|
-
|
295
|
-
self[key] = values.concat yield(key, values)
|
296
|
-
end
|
297
|
-
|
298
|
-
self.fields = self.fields.concat names if fields != nil and names != nil
|
299
|
-
end
|
300
|
-
end
|