rbbt-util 3.2.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +65 -0
- data/bin/run_workflow.rb +142 -69
- data/lib/rbbt-util.rb +3 -3
- data/lib/rbbt.rb +12 -3
- data/lib/rbbt/annotations.rb +215 -0
- data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
- data/lib/rbbt/persist.rb +164 -0
- data/lib/rbbt/persist/tsv.rb +135 -0
- data/lib/rbbt/resource.rb +100 -0
- data/lib/rbbt/resource/path.rb +180 -0
- data/lib/rbbt/resource/rake.rb +48 -0
- data/lib/rbbt/resource/util.rb +111 -0
- data/lib/rbbt/resource/with_key.rb +28 -0
- data/lib/rbbt/tsv.rb +134 -0
- data/lib/rbbt/tsv/accessor.rb +345 -0
- data/lib/rbbt/tsv/attach.rb +183 -0
- data/lib/rbbt/tsv/attach/util.rb +277 -0
- data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
- data/lib/rbbt/tsv/index.rb +453 -0
- data/lib/rbbt/tsv/manipulate.rb +361 -0
- data/lib/rbbt/tsv/parser.rb +231 -0
- data/lib/rbbt/tsv/serializers.rb +79 -0
- data/lib/rbbt/tsv/util.rb +67 -0
- data/lib/rbbt/util/R.rb +3 -3
- data/lib/rbbt/util/chain_methods.rb +64 -0
- data/lib/rbbt/util/cmd.rb +17 -13
- data/lib/rbbt/util/excel2tsv.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -0
- data/lib/rbbt/util/misc.rb +296 -285
- data/lib/rbbt/util/open.rb +9 -2
- data/lib/rbbt/util/persistence.rb +1 -1
- data/lib/rbbt/util/task/job.rb +3 -1
- data/lib/rbbt/workflow.rb +193 -0
- data/lib/rbbt/workflow/accessor.rb +249 -0
- data/lib/rbbt/workflow/annotate.rb +60 -0
- data/lib/rbbt/workflow/soap.rb +100 -0
- data/lib/rbbt/workflow/step.rb +102 -0
- data/lib/rbbt/workflow/task.rb +76 -0
- data/test/rbbt/resource/test_path.rb +12 -0
- data/test/rbbt/test_annotations.rb +106 -0
- data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
- data/test/rbbt/test_resource.rb +66 -0
- data/test/rbbt/test_tsv.rb +332 -0
- data/test/rbbt/test_workflow.rb +102 -0
- data/test/rbbt/tsv/test_accessor.rb +163 -0
- data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
- data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
- data/test/rbbt/tsv/test_index.rb +284 -0
- data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
- data/test/rbbt/util/test_R.rb +1 -1
- data/test/rbbt/util/test_chain_methods.rb +22 -0
- data/test/rbbt/util/test_filecache.rb +0 -1
- data/test/rbbt/util/test_misc.rb +97 -79
- data/test/rbbt/util/test_open.rb +1 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/workflow/test_soap.rb +103 -0
- data/test/rbbt/workflow/test_step.rb +142 -0
- data/test/rbbt/workflow/test_task.rb +84 -0
- data/test/test_helper.rb +7 -7
- metadata +80 -54
- data/lib/rbbt/util/rake.rb +0 -176
- data/lib/rbbt/util/resource.rb +0 -355
- data/lib/rbbt/util/task.rb +0 -183
- data/lib/rbbt/util/tc_hash.rb +0 -324
- data/lib/rbbt/util/tsv.rb +0 -236
- data/lib/rbbt/util/tsv/accessor.rb +0 -312
- data/lib/rbbt/util/tsv/attach.rb +0 -416
- data/lib/rbbt/util/tsv/index.rb +0 -419
- data/lib/rbbt/util/tsv/manipulate.rb +0 -300
- data/lib/rbbt/util/tsv/misc.rb +0 -41
- data/lib/rbbt/util/tsv/parse.rb +0 -324
- data/lib/rbbt/util/tsv/resource.rb +0 -88
- data/lib/rbbt/util/workflow.rb +0 -135
- data/lib/rbbt/util/workflow/soap.rb +0 -116
- data/test/rbbt/util/test_persistence.rb +0 -201
- data/test/rbbt/util/test_rake.rb +0 -54
- data/test/rbbt/util/test_resource.rb +0 -77
- data/test/rbbt/util/test_task.rb +0 -133
- data/test/rbbt/util/test_tc_hash.rb +0 -144
- data/test/rbbt/util/test_tsv.rb +0 -221
- data/test/rbbt/util/test_workflow.rb +0 -135
- data/test/rbbt/util/tsv/test_accessor.rb +0 -150
- data/test/rbbt/util/tsv/test_index.rb +0 -241
- data/test/rbbt/util/tsv/test_parse.rb +0 -87
- data/test/rbbt/util/tsv/test_resource.rb +0 -9
data/lib/rbbt/util/tsv/index.rb
DELETED
@@ -1,419 +0,0 @@
|
|
1
|
-
require 'rbbt/util/tsv/manipulate'
|
2
|
-
require 'rbbt/util/tsv/filters'
|
3
|
-
require 'rbbt/util/fix_width_table'
|
4
|
-
|
5
|
-
class TSV
|
6
|
-
|
7
|
-
def index(options = {})
|
8
|
-
options = Misc.add_defaults options, :order => false, :persistence => true, :target => :key, :fields => nil, :case_insensitive => case_insensitive, :tsv_serializer => :list
|
9
|
-
|
10
|
-
prefix = case
|
11
|
-
when options[:target]
|
12
|
-
"Index[#{options[:target]}]"
|
13
|
-
else
|
14
|
-
"Index[:key]"
|
15
|
-
end
|
16
|
-
|
17
|
-
Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
|
18
|
-
order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
|
19
|
-
|
20
|
-
new = {}
|
21
|
-
|
22
|
-
## Ordered
|
23
|
-
if order
|
24
|
-
|
25
|
-
# through
|
26
|
-
|
27
|
-
new_key_field, new_fields = through target, fields do |key, values|
|
28
|
-
if Array === key
|
29
|
-
keys = key
|
30
|
-
else
|
31
|
-
keys = [key]
|
32
|
-
end
|
33
|
-
|
34
|
-
values.each_with_index do |list,i|
|
35
|
-
list = [list] unless Array === list
|
36
|
-
i += 1 if fields.nil?
|
37
|
-
list.each do |elem|
|
38
|
-
next if elem.nil? or elem.empty?
|
39
|
-
elem.downcase if case_insensitive
|
40
|
-
new[elem] ||= []
|
41
|
-
new[elem][i] ||= []
|
42
|
-
new[elem][i].concat keys
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
if fields.nil?
|
47
|
-
keys.each do |key|
|
48
|
-
key = key.downcase if case_insensitive
|
49
|
-
new[key] ||= []
|
50
|
-
new[key][0] ||= []
|
51
|
-
new[key][0].concat keys
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
|
-
end
|
56
|
-
|
57
|
-
# flatten
|
58
|
-
|
59
|
-
new.each do |key, values|
|
60
|
-
new[key] = values.flatten.compact
|
61
|
-
end
|
62
|
-
|
63
|
-
## Not ordered
|
64
|
-
else
|
65
|
-
double_keys = true unless type != :double or identify_field(target) == :key
|
66
|
-
new.each do |key, fields| fields.flatten! end
|
67
|
-
|
68
|
-
new_key_field, new_fields = through target, fields do |key, values|
|
69
|
-
values.unshift type == :double ? [key] : key if fields.nil?
|
70
|
-
if type == :flat
|
71
|
-
list = values
|
72
|
-
else
|
73
|
-
list = values.flatten unless type == :flat
|
74
|
-
end
|
75
|
-
list.collect!{|e| e.downcase} if case_insensitive
|
76
|
-
list.each do |elem|
|
77
|
-
next if elem.nil? or elem.empty?
|
78
|
-
new[elem] ||= []
|
79
|
-
if double_keys
|
80
|
-
new[elem].concat key
|
81
|
-
else
|
82
|
-
new[elem] << key
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
new.each do |key, values|
|
90
|
-
values.uniq!
|
91
|
-
end
|
92
|
-
|
93
|
-
key_field = case
|
94
|
-
when new_key_field
|
95
|
-
new_key_field + "|" + new_fields * "|"
|
96
|
-
else
|
97
|
-
nil
|
98
|
-
end
|
99
|
-
|
100
|
-
fields = case
|
101
|
-
when new_key_field.nil?
|
102
|
-
nil
|
103
|
-
else
|
104
|
-
[new_key_field]
|
105
|
-
end
|
106
|
-
|
107
|
-
new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
|
108
|
-
|
109
|
-
new
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
def self.index(file, options = {})
|
114
|
-
options = Misc.add_defaults options,
|
115
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
|
116
|
-
:data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
|
117
|
-
|
118
|
-
options_data = {
|
119
|
-
:persistence => Misc.process_options(options, :data_persistence),
|
120
|
-
:persistence_file => Misc.process_options(options, :data_persistence_file),
|
121
|
-
:persistence_update => Misc.process_options(options, :data_persistence_update),
|
122
|
-
:persistence_source => Misc.process_options(options, :data_persistence_source),
|
123
|
-
}
|
124
|
-
|
125
|
-
options_data[:type] = :flat if options[:order] == false
|
126
|
-
|
127
|
-
prefix = case
|
128
|
-
when options[:target]
|
129
|
-
"Index_static[#{options[:target]}]"
|
130
|
-
else
|
131
|
-
"Index_static[:key]"
|
132
|
-
end
|
133
|
-
|
134
|
-
new = Persistence.persist(file, prefix, :tsv, options) do |file, options, filename|
|
135
|
-
TSV.new(file, :double, options_data).index options.merge :persistence => false, :persistence_file => nil
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
|
140
|
-
def smart_merge(other, match = nil, fields2add = nil)
|
141
|
-
|
142
|
-
# Determine new fields
|
143
|
-
# both have fields => list of names
|
144
|
-
# not both have fields => nil
|
145
|
-
|
146
|
-
# fields2add = case
|
147
|
-
# when (fields2add.nil? and (other.fields.nil? or self.fields.nil?))
|
148
|
-
# nil
|
149
|
-
# when fields2add.nil?
|
150
|
-
# other.all_fields
|
151
|
-
# else
|
152
|
-
# fields2add
|
153
|
-
# end
|
154
|
-
|
155
|
-
# Determine common fields
|
156
|
-
|
157
|
-
common_fields, new_fields = case
|
158
|
-
when fields2add != nil
|
159
|
-
[fields & other.fields, fields2add]
|
160
|
-
when (other.all_fields.nil? or self.all_fields.nil?)
|
161
|
-
[nil, other_fields]
|
162
|
-
else
|
163
|
-
[(all_fields & other.all_fields), (other.all_fields - all_fields)]
|
164
|
-
end
|
165
|
-
|
166
|
-
# Load matching scheme. Index and source field
|
167
|
-
|
168
|
-
match_source, match_index = case
|
169
|
-
when (match.nil? and not key_field.nil? and other.key_field == key_field)
|
170
|
-
[:key, nil]
|
171
|
-
when match.nil?
|
172
|
-
[:key, other.index]
|
173
|
-
when TSV === match
|
174
|
-
raise "No field info in match TSV" if match.fields.nil?
|
175
|
-
match_source = (all_fields & match.all_fields).first
|
176
|
-
index = match.index :target => other.key_field, :fields => match_source
|
177
|
-
[match_source, index]
|
178
|
-
when (String === match and match == key_field)
|
179
|
-
[:key, other.index]
|
180
|
-
when String === match
|
181
|
-
[match, other.index]
|
182
|
-
when Array === match
|
183
|
-
[match.first, other.index(:fields => match.last)]
|
184
|
-
end
|
185
|
-
|
186
|
-
match_source_position = identify_field match_source
|
187
|
-
|
188
|
-
# through
|
189
|
-
new = {}
|
190
|
-
each do |key,values|
|
191
|
-
source_keys = match_source == :key ? key : values[match_source_position]
|
192
|
-
source_keys = [source_keys] unless Array === source_keys
|
193
|
-
other_keys = case
|
194
|
-
when index.nil?
|
195
|
-
source_keys
|
196
|
-
else
|
197
|
-
index.values_at(*source_keys).flatten.compact
|
198
|
-
end
|
199
|
-
|
200
|
-
other_keys = other_keys.collect do |other_key| match_index[other_key] end.flatten unless match_index.nil?
|
201
|
-
|
202
|
-
|
203
|
-
other_values = other_keys.collect do |other_key|
|
204
|
-
next unless other.include? other_key
|
205
|
-
new_fields.collect do |field|
|
206
|
-
if field == other.key_field
|
207
|
-
if type == :double
|
208
|
-
[other_key]
|
209
|
-
else
|
210
|
-
other_key
|
211
|
-
end
|
212
|
-
else
|
213
|
-
other[other_key][field]
|
214
|
-
end
|
215
|
-
end
|
216
|
-
end.compact
|
217
|
-
|
218
|
-
other_values = case
|
219
|
-
when type == :double
|
220
|
-
TSV.zip_fields(other_values).collect{|v| v.flatten.uniq}
|
221
|
-
else
|
222
|
-
TSV.zip_fields(other_values).collect{|v| v.flatten.first}
|
223
|
-
end
|
224
|
-
|
225
|
-
new_values = values + other_values
|
226
|
-
|
227
|
-
new[key] = new_values
|
228
|
-
end
|
229
|
-
|
230
|
-
new = TSV.new new
|
231
|
-
new.fields = fields + new_fields if fields
|
232
|
-
new.key_field = key_field if key_field
|
233
|
-
new.type = type
|
234
|
-
|
235
|
-
new
|
236
|
-
end
|
237
|
-
|
238
|
-
def self.field_matches(tsv, values)
|
239
|
-
values = [values] if not Array === values
|
240
|
-
Log.debug "Matcing #{values.length} values to #{tsv.filename}"
|
241
|
-
|
242
|
-
if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
|
243
|
-
return {}
|
244
|
-
end
|
245
|
-
|
246
|
-
key_field = tsv.key_field
|
247
|
-
fields = tsv.fields
|
248
|
-
|
249
|
-
field_values = {}
|
250
|
-
fields.each{|field|
|
251
|
-
field_values[field] = []
|
252
|
-
}
|
253
|
-
|
254
|
-
if tsv.type == :double
|
255
|
-
tsv.through do |key,entry_values|
|
256
|
-
fields.zip(entry_values).each do |field,entry_field_values|
|
257
|
-
field_values[field].concat entry_field_values unless entry_field_values.nil?
|
258
|
-
end
|
259
|
-
end
|
260
|
-
else
|
261
|
-
tsv.through do |key,entry_values|
|
262
|
-
fields.zip(entry_values).each do |field,entry_field_values|
|
263
|
-
field_values[field] << entry_field_values
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
field_values.each do |field,field_value_list|
|
269
|
-
field_value_list.replace(values & field_value_list.flatten.uniq)
|
270
|
-
end
|
271
|
-
|
272
|
-
field_values[key_field] = values & tsv.keys
|
273
|
-
|
274
|
-
field_values
|
275
|
-
end
|
276
|
-
|
277
|
-
def field_matches(values)
|
278
|
-
TSV.field_matches(self, values)
|
279
|
-
end
|
280
|
-
|
281
|
-
def guess_field(values)
|
282
|
-
field_matches(values).sort_by{|field, matches| matches.uniq.length}.last
|
283
|
-
end
|
284
|
-
|
285
|
-
def pos_index(pos_field = nil, options = {})
|
286
|
-
pos_field ||= "Position"
|
287
|
-
|
288
|
-
options = Misc.add_defaults options,
|
289
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false
|
290
|
-
|
291
|
-
prefix = "Pos[#{pos_field}]"
|
292
|
-
|
293
|
-
Persistence.persist(filename, prefix, :fwt, options.merge({
|
294
|
-
:pos_field => pos_field,
|
295
|
-
:filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
|
296
|
-
})) do |file, options, filename|
|
297
|
-
pos_field = options[:pos_field]
|
298
|
-
value_size = 0
|
299
|
-
index_data = []
|
300
|
-
|
301
|
-
through :key, pos_field do |key, values|
|
302
|
-
value_size = key.length if key.length > value_size
|
303
|
-
|
304
|
-
pos = values.first
|
305
|
-
if Array === pos
|
306
|
-
pos.each do |p|
|
307
|
-
index_data << [key, p.to_i]
|
308
|
-
end
|
309
|
-
else
|
310
|
-
index_data << [key, pos.to_i]
|
311
|
-
end
|
312
|
-
end
|
313
|
-
|
314
|
-
index = FixWidthTable.get(:memory, value_size, false)
|
315
|
-
index.add_point index_data
|
316
|
-
index.read
|
317
|
-
index
|
318
|
-
end
|
319
|
-
end
|
320
|
-
|
321
|
-
def self.pos_index(file, pos_field = nil, options = {})
|
322
|
-
options = Misc.add_defaults options,
|
323
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
|
324
|
-
:data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
|
325
|
-
|
326
|
-
options_data = {
|
327
|
-
:persistence => Misc.process_options(options, :data_persistence),
|
328
|
-
:persistence_file => Misc.process_options(options, :data_persistence_file),
|
329
|
-
:persistence_update => Misc.process_options(options, :data_persistence_update),
|
330
|
-
:persistence_source => Misc.process_options(options, :data_persistence_source),
|
331
|
-
}
|
332
|
-
|
333
|
-
|
334
|
-
prefix = "Pos[#{pos_field}]"
|
335
|
-
|
336
|
-
new = Persistence.persist(file, prefix, :fwt, options.merge({:pos_field => pos_field})) do |file, options, filename|
|
337
|
-
tsv = TSV.new(file, :list, options_data)
|
338
|
-
|
339
|
-
if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
|
340
|
-
tsv.filter
|
341
|
-
options[:filters].each do |match, value, persistence|
|
342
|
-
tsv.add_filter(match, value, persistence)
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
tsv.pos_index options[:pos_field], options.merge(:persistence => false, :persistence_file => nil)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
def range_index(start_field = nil, end_field = nil, options = {})
|
351
|
-
start_field ||= "Start"
|
352
|
-
end_field ||= "End"
|
353
|
-
options = Misc.add_defaults options,
|
354
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false
|
355
|
-
|
356
|
-
prefix = "Range[#{start_field}-#{end_field}]"
|
357
|
-
|
358
|
-
Persistence.persist(filename, prefix, :fwt, options.merge({
|
359
|
-
:start_field => start_field, :end_field => end_field,
|
360
|
-
:filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
|
361
|
-
})) do |file, options, filename|
|
362
|
-
start_field, end_field = options.values_at :start_field, :end_field
|
363
|
-
|
364
|
-
value_size = 0
|
365
|
-
index_data = []
|
366
|
-
|
367
|
-
through :key, [start_field, end_field] do |key, values|
|
368
|
-
value_size = key.length if key.length > value_size
|
369
|
-
|
370
|
-
start_pos, end_pos = values
|
371
|
-
|
372
|
-
if Array === start_pos
|
373
|
-
start_pos.zip(end_pos).each do |s,e|
|
374
|
-
index_data << [key, [s.to_i, e.to_i]]
|
375
|
-
end
|
376
|
-
else
|
377
|
-
index_data << [key, [start_pos.to_i, end_pos.to_i]]
|
378
|
-
end
|
379
|
-
end
|
380
|
-
|
381
|
-
index = FixWidthTable.get(:memory, value_size, true)
|
382
|
-
index.add_range index_data
|
383
|
-
index.read
|
384
|
-
index
|
385
|
-
end
|
386
|
-
end
|
387
|
-
|
388
|
-
def self.range_index(file, start_field = nil, end_field = nil, options = {})
|
389
|
-
options = Misc.add_defaults options,
|
390
|
-
:persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
|
391
|
-
:data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
|
392
|
-
|
393
|
-
options_data = {
|
394
|
-
:persistence => Misc.process_options(options, :data_persistence),
|
395
|
-
:persistence_file => Misc.process_options(options, :data_persistence_file),
|
396
|
-
:persistence_update => Misc.process_options(options, :data_persistence_update),
|
397
|
-
:persistence_source => Misc.process_options(options, :data_persistence_source),
|
398
|
-
}
|
399
|
-
|
400
|
-
prefix = "Range[#{start_field}-#{end_field}]"
|
401
|
-
|
402
|
-
options_data[:type] = :flat if options[:order] == false
|
403
|
-
|
404
|
-
Persistence.persist(file, prefix, :fwt, options.merge({:start_field => start_field, :end_field => end_field})) do |file, options, filename|
|
405
|
-
tsv = TSV.new(file, :list, options_data)
|
406
|
-
|
407
|
-
if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
|
408
|
-
tsv.filter
|
409
|
-
options[:filters].each do |match, value, persistence|
|
410
|
-
tsv.add_filter(match, value, persistence)
|
411
|
-
end
|
412
|
-
end
|
413
|
-
|
414
|
-
tsv.range_index options[:start_field], options[:end_field], options.merge(:persistence => false, :persistence_file => nil)
|
415
|
-
end
|
416
|
-
end
|
417
|
-
|
418
|
-
end
|
419
|
-
|
@@ -1,300 +0,0 @@
|
|
1
|
-
class TSV
|
2
|
-
|
3
|
-
attr_accessor :monitor
|
4
|
-
|
5
|
-
def through(new_key_field = :key, new_fields = nil, &block)
|
6
|
-
|
7
|
-
# Get positions
|
8
|
-
|
9
|
-
new_key_position = identify_field new_key_field
|
10
|
-
|
11
|
-
new_field_positions = case
|
12
|
-
when Integer === new_fields
|
13
|
-
[new_fields]
|
14
|
-
when String === new_fields
|
15
|
-
[identify_field(new_fields)]
|
16
|
-
when Array === new_fields
|
17
|
-
new_fields.collect{|new_field| identify_field new_field}
|
18
|
-
when new_fields == :key
|
19
|
-
[:key]
|
20
|
-
when new_fields == :fields
|
21
|
-
nil
|
22
|
-
when new_fields.nil?
|
23
|
-
nil
|
24
|
-
else
|
25
|
-
raise "Unknown new fields specified: #{new_fields.inspect}"
|
26
|
-
end
|
27
|
-
|
28
|
-
# Get Field Names
|
29
|
-
|
30
|
-
## New key
|
31
|
-
new_key_field_name = new_key_position == :key ? key_field : fields[new_key_position] if fields
|
32
|
-
## New fields
|
33
|
-
new_field_names = case
|
34
|
-
when (new_field_positions.nil? and new_key_position == :key)
|
35
|
-
fields.dup
|
36
|
-
when new_field_positions.nil?
|
37
|
-
f = fields.dup
|
38
|
-
f.delete_at(new_key_position)
|
39
|
-
f.unshift key_field
|
40
|
-
f
|
41
|
-
else
|
42
|
-
f = fields.dup
|
43
|
-
f.push key_field
|
44
|
-
f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
|
45
|
-
end if fields
|
46
|
-
|
47
|
-
# Cycle through
|
48
|
-
if monitor
|
49
|
-
desc = "Iterating TSV"
|
50
|
-
step = 100
|
51
|
-
if Hash === monitor
|
52
|
-
desc = monitor[:desc] if monitor.include? :desc
|
53
|
-
step = monitor[:step] if monitor.include? :step
|
54
|
-
end
|
55
|
-
progress_monitor = Progress::Bar.new(size, 0, step, desc)
|
56
|
-
else
|
57
|
-
progress_monitor = nil
|
58
|
-
end
|
59
|
-
|
60
|
-
if new_key_position == :key and ( new_fields.nil? or new_fields == fields)
|
61
|
-
each do |key, fields| progress_monitor.tick if progress_monitor; yield key, fields end
|
62
|
-
else
|
63
|
-
each do |key, fields|
|
64
|
-
progress_monitor.tick if progress_monitor;
|
65
|
-
new_key_value = case
|
66
|
-
when (new_key_position.nil? or new_key_position == :key)
|
67
|
-
key
|
68
|
-
else
|
69
|
-
fields[new_key_position]
|
70
|
-
end
|
71
|
-
new_field_values = case
|
72
|
-
when (new_field_positions.nil? and new_fields == :fields)
|
73
|
-
fields
|
74
|
-
when (new_fields.nil? and new_key_position == :key)
|
75
|
-
fields
|
76
|
-
when new_field_positions.nil?
|
77
|
-
f = fields.dup
|
78
|
-
f.delete_at(new_key_position)
|
79
|
-
if type == :double
|
80
|
-
f.unshift [key]
|
81
|
-
else
|
82
|
-
f.unshift key
|
83
|
-
end
|
84
|
-
f
|
85
|
-
else
|
86
|
-
f = fields.dup
|
87
|
-
case
|
88
|
-
when type == :single
|
89
|
-
f = [f,key]
|
90
|
-
when type == :double
|
91
|
-
f.push [key]
|
92
|
-
else
|
93
|
-
f.push key
|
94
|
-
end
|
95
|
-
f.values_at *new_field_positions.collect{|pos| pos == :key ? -1 : pos }
|
96
|
-
end
|
97
|
-
|
98
|
-
if type == :single
|
99
|
-
new_field_values = new_field_values.first
|
100
|
-
else
|
101
|
-
new_field_values = NamedArray.name new_field_values, new_field_names unless unnamed
|
102
|
-
end
|
103
|
-
|
104
|
-
next if new_key_value.nil? or (String === new_key_value and new_key_value.empty?)
|
105
|
-
yield new_key_value, new_field_values
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
# Return new field names
|
110
|
-
|
111
|
-
return [new_key_field_name, new_field_names]
|
112
|
-
end
|
113
|
-
|
114
|
-
def reorder(new_key_field, new_fields = nil, options = {})
|
115
|
-
options = Misc.add_defaults options, :new_key_field => new_key_field, :new_fields => new_fields, :persistence => false
|
116
|
-
|
117
|
-
new, extra = Persistence.persist(self, :Reorder, :tsv, options ) do |tsv, options, filename|
|
118
|
-
new_key_field = options[:new_key_field]
|
119
|
-
new_fields = options[:new_fields]
|
120
|
-
|
121
|
-
new = {}
|
122
|
-
new_key_field, new_fields = through new_key_field, new_fields do |keys, values|
|
123
|
-
if Array === keys
|
124
|
-
keys.each do |key|
|
125
|
-
if new[key].nil? or not type == :double
|
126
|
-
new[key] = values.collect{|l| l.dup}
|
127
|
-
else
|
128
|
-
new[key] = new[key].zip(values).collect{|old_list, new_list| old_list.concat new_list}
|
129
|
-
end
|
130
|
-
end
|
131
|
-
else
|
132
|
-
new[keys] = values
|
133
|
-
end
|
134
|
-
nil
|
135
|
-
end
|
136
|
-
|
137
|
-
new = TSV.new new
|
138
|
-
|
139
|
-
new.fields = new_fields
|
140
|
-
new.key_field = new_key_field
|
141
|
-
new.filename = filename
|
142
|
-
new.type = type
|
143
|
-
new.case_insensitive = case_insensitive
|
144
|
-
new.identifiers = identifiers
|
145
|
-
|
146
|
-
new
|
147
|
-
end
|
148
|
-
|
149
|
-
new
|
150
|
-
end
|
151
|
-
|
152
|
-
def slice(fields)
|
153
|
-
reorder :key, fields
|
154
|
-
end
|
155
|
-
|
156
|
-
def slice_namespace(namespace)
|
157
|
-
fields = self.fields
|
158
|
-
namespace_fields = []
|
159
|
-
fields.each_with_index{|field,i| namespace_fields << i if field.namespace == namespace}
|
160
|
-
reorder :key, namespace_fields
|
161
|
-
end
|
162
|
-
|
163
|
-
def sort(*fields)
|
164
|
-
|
165
|
-
pos = case
|
166
|
-
when fields.length == 0
|
167
|
-
:key
|
168
|
-
when fields.length == 1
|
169
|
-
identify_field fields.first
|
170
|
-
else
|
171
|
-
fields.collect{|field| identify_field field}
|
172
|
-
end
|
173
|
-
elems = []
|
174
|
-
through :key, pos do |key, values|
|
175
|
-
elems << case
|
176
|
-
when block_given?
|
177
|
-
[key, yield(values)]
|
178
|
-
else
|
179
|
-
if type == :double
|
180
|
-
[key, values.first.first]
|
181
|
-
else
|
182
|
-
[key, values.first]
|
183
|
-
end
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
elems.sort_by{|k,v| v}.collect{|k,v| k}
|
188
|
-
end
|
189
|
-
|
190
|
-
def select(method = nil)
|
191
|
-
new = TSV.new({})
|
192
|
-
new.key_field = key_field
|
193
|
-
new.fields = fields.dup
|
194
|
-
new.type = type
|
195
|
-
new.filename = filename
|
196
|
-
new.case_insensitive = case_insensitive
|
197
|
-
|
198
|
-
case
|
199
|
-
when (method.nil? and block_given?)
|
200
|
-
through do |key, values|
|
201
|
-
new[key] = values if yield key, values
|
202
|
-
end
|
203
|
-
when Array === method
|
204
|
-
through do |key, values|
|
205
|
-
new[key] = values if ([key,values].flatten & method).any?
|
206
|
-
end
|
207
|
-
when Regexp === method
|
208
|
-
through do |key, values|
|
209
|
-
new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
|
210
|
-
end
|
211
|
-
when String === method
|
212
|
-
if block_given?
|
213
|
-
through do |key, values|
|
214
|
-
new[key] = values if yield((method == key_field or method == :key)? key : values[method])
|
215
|
-
end
|
216
|
-
else
|
217
|
-
through do |key, values|
|
218
|
-
new[key] = values if [key,values].flatten.select{|v| v == method}.any?
|
219
|
-
end
|
220
|
-
end
|
221
|
-
when Hash === method
|
222
|
-
key = method.keys.first
|
223
|
-
method = method.values.first
|
224
|
-
case
|
225
|
-
when (Array === method and (key == :key or key_field == key))
|
226
|
-
method.each{|item| new[item] = self[item] if self.include? item}
|
227
|
-
when Array === method
|
228
|
-
through :key, key do |key, values|
|
229
|
-
values = [values] if type == :single
|
230
|
-
new[key] = self[key] if (values.flatten & method).any?
|
231
|
-
end
|
232
|
-
when Regexp === method
|
233
|
-
through :key, key do |key, values|
|
234
|
-
values = [values] if type == :single
|
235
|
-
new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
|
236
|
-
end
|
237
|
-
when String === method
|
238
|
-
through :key, key do |key, values|
|
239
|
-
values = [values] if type == :single
|
240
|
-
new[key] = self[key] if values.flatten.select{|v| v == method}.any?
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|
244
|
-
|
245
|
-
|
246
|
-
new
|
247
|
-
end
|
248
|
-
|
249
|
-
def process(field, &block)
|
250
|
-
through do |key, values|
|
251
|
-
if type == :flat
|
252
|
-
field_values = values
|
253
|
-
else
|
254
|
-
field_values = values[field]
|
255
|
-
end
|
256
|
-
|
257
|
-
next if values[field].nil?
|
258
|
-
new_values = case
|
259
|
-
when block.arity == 1
|
260
|
-
yield(field_values)
|
261
|
-
when block.arity == 2
|
262
|
-
yield(field_values, key)
|
263
|
-
when block.arity == 3
|
264
|
-
yield(field_values, key, values)
|
265
|
-
else
|
266
|
-
raise "Unknown arity in block"
|
267
|
-
end
|
268
|
-
|
269
|
-
if type == :flat
|
270
|
-
self[key] = new_values
|
271
|
-
else
|
272
|
-
values[field].replace new_values
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
def add_field(name = nil)
|
278
|
-
through do |key, values|
|
279
|
-
new_values = yield(key, values)
|
280
|
-
new_values = [new_values] if type == :double and not Array === new_values
|
281
|
-
|
282
|
-
self[key] = values + [new_values]
|
283
|
-
end
|
284
|
-
|
285
|
-
self.fields = self.fields + [name] if fields != nil and name != nil
|
286
|
-
|
287
|
-
self
|
288
|
-
end
|
289
|
-
|
290
|
-
def add_fields(names = nil)
|
291
|
-
through do |key, values|
|
292
|
-
new_values = yield(key, values)
|
293
|
-
new_values = [new_values] if type == :double and not Array == new_values
|
294
|
-
|
295
|
-
self[key] = values.concat yield(key, values)
|
296
|
-
end
|
297
|
-
|
298
|
-
self.fields = self.fields.concat names if fields != nil and names != nil
|
299
|
-
end
|
300
|
-
end
|