rbbt-util 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/bin/tchash.rb +15 -0
- data/bin/tsv.rb +14 -0
- data/lib/rbbt/util/cachehelper.rb +100 -0
- data/lib/rbbt/util/cmd.rb +140 -0
- data/lib/rbbt/util/data_module.rb +81 -0
- data/lib/rbbt/util/excel2tsv.rb +32 -0
- data/lib/rbbt/util/filecache.rb +58 -0
- data/lib/rbbt/util/log.rb +50 -0
- data/lib/rbbt/util/misc.rb +158 -0
- data/lib/rbbt/util/open.rb +200 -0
- data/lib/rbbt/util/pkg_config.rb +78 -0
- data/lib/rbbt/util/pkg_data.rb +110 -0
- data/lib/rbbt/util/pkg_software.rb +130 -0
- data/lib/rbbt/util/simpleDSL.rb +92 -0
- data/lib/rbbt/util/simpleopt.rb +56 -0
- data/lib/rbbt/util/tc_hash.rb +124 -0
- data/lib/rbbt/util/tmpfile.rb +42 -0
- data/lib/rbbt/util/tsv.rb +804 -0
- data/lib/rbbt-util.rb +13 -0
- data/lib/rbbt.rb +15 -0
- data/share/install/software/lib/install_helpers +257 -0
- data/test/rbbt/util/test_cmd.rb +30 -0
- data/test/rbbt/util/test_data_module.rb +45 -0
- data/test/rbbt/util/test_excel2tsv.rb +10 -0
- data/test/rbbt/util/test_filecache.rb +36 -0
- data/test/rbbt/util/test_misc.rb +22 -0
- data/test/rbbt/util/test_open.rb +89 -0
- data/test/rbbt/util/test_simpleDSL.rb +55 -0
- data/test/rbbt/util/test_simpleopt.rb +10 -0
- data/test/rbbt/util/test_tc_hash.rb +18 -0
- data/test/rbbt/util/test_tmpfile.rb +20 -0
- data/test/rbbt/util/test_tsv.rb +652 -0
- data/test/test_helper.rb +9 -0
- data/test/test_pkg.rb +38 -0
- data/test/test_rbbt.rb +90 -0
- metadata +185 -0
@@ -0,0 +1,804 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/tc_hash'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'digest'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
def add_defaults(options, defaults = {})
|
9
|
+
new_options = options.dup
|
10
|
+
defaults.each do |key, value|
|
11
|
+
new_options[key] = value if new_options[key].nil?
|
12
|
+
end
|
13
|
+
new_options
|
14
|
+
end
|
15
|
+
|
16
|
+
class TSV
|
17
|
+
class FieldNotFoundError < StandardError;end
|
18
|
+
|
19
|
+
#{{{ Persistence
|
20
|
+
|
21
|
+
PersistenceHash = TCHash
|
22
|
+
|
23
|
+
CACHEDIR="/tmp/tsv_persistent_cache"
|
24
|
+
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
25
|
+
|
26
|
+
def self.cachedir=(cachedir)
|
27
|
+
CACHEDIR.replace cachedir
|
28
|
+
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.cachedir
|
32
|
+
CACHEDIR
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.get_persistence_file(file, prefix, options = {})
|
36
|
+
File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
37
|
+
end
|
38
|
+
|
39
|
+
@debug = ENV['TSV_DEBUG'] == "true"
|
40
|
+
def self.log(message)
|
41
|
+
STDERR.puts message if @debug == true
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.debug=(value)
|
45
|
+
@debug = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.headers(file, options = {})
|
49
|
+
if file =~ /(.*)#(.*)/ and File.exists? $1
|
50
|
+
options.merge! Misc.string2hash $2
|
51
|
+
file = $1
|
52
|
+
end
|
53
|
+
|
54
|
+
options = Misc.add_defaults options, :sep => "\t", :header_hash => "#"
|
55
|
+
io = Open.open(file)
|
56
|
+
line = io.gets
|
57
|
+
io.close
|
58
|
+
|
59
|
+
if line =~ /^#{options[:header_hash]}/
|
60
|
+
line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
|
61
|
+
else
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
#{{{ Accesor Methods
|
67
|
+
|
68
|
+
def keys
|
69
|
+
@data.keys
|
70
|
+
end
|
71
|
+
|
72
|
+
def values
|
73
|
+
@data.values
|
74
|
+
end
|
75
|
+
|
76
|
+
def size
|
77
|
+
@data.size
|
78
|
+
end
|
79
|
+
|
80
|
+
# Write
|
81
|
+
|
82
|
+
def []=(key, value)
|
83
|
+
key = key.downcase if @case_insensitive
|
84
|
+
@data[key] = value
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def merge!(new_data)
|
89
|
+
new_data.each do |key, value|
|
90
|
+
self[key] = value
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Read
|
95
|
+
|
96
|
+
def follow(value)
|
97
|
+
if String === value && value =~ /__Ref:(.*)/
|
98
|
+
return self[$1]
|
99
|
+
else
|
100
|
+
value = NamedArray.name value, fields if Array === value and fields
|
101
|
+
value
|
102
|
+
end
|
103
|
+
end
|
104
|
+
def [](key)
|
105
|
+
if Array === key
|
106
|
+
return @data[key] if @data[key] != nil
|
107
|
+
key.each{|k| v = self[k]; return v unless v.nil?}
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
|
111
|
+
key = key.downcase if @case_insensitive
|
112
|
+
follow @data[key]
|
113
|
+
end
|
114
|
+
|
115
|
+
def values_at(*keys)
|
116
|
+
keys.collect{|k|
|
117
|
+
self[k]
|
118
|
+
}
|
119
|
+
end
|
120
|
+
|
121
|
+
def each(&block)
|
122
|
+
@data.each do |key, value|
|
123
|
+
block.call(key, follow(value))
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def collect
|
128
|
+
if block_given?
|
129
|
+
@data.collect do |key, value|
|
130
|
+
value = follow(value)
|
131
|
+
key, values = yield key, value
|
132
|
+
end
|
133
|
+
else
|
134
|
+
@data.collect do |key, value|
|
135
|
+
[key, follow(value)]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def sort(&block)
|
141
|
+
collect.sort(&block).collect{|p|
|
142
|
+
key, value = p
|
143
|
+
value = NamedArray.name value, fields if fields
|
144
|
+
[key, value]
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
def sort_by(&block)
|
149
|
+
collect.sort_by &block
|
150
|
+
end
|
151
|
+
|
152
|
+
#{{{ Parsing
|
153
|
+
|
154
|
+
def self.parse_fields(io, delimiter = "\t")
|
155
|
+
return [] if io.nil?
|
156
|
+
fields = io.split(delimiter, -1)
|
157
|
+
fields
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.zip_fields(list, fields = nil)
|
161
|
+
return [] if list.nil? || list.empty?
|
162
|
+
fields ||= list.fields if list.respond_to? :fields
|
163
|
+
zipped = list[0].zip(*list[1..-1])
|
164
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
165
|
+
zipped
|
166
|
+
end
|
167
|
+
|
168
|
+
def self.parse(data, file, options = {})
|
169
|
+
|
170
|
+
# Prepare options
|
171
|
+
options = add_defaults options,
|
172
|
+
:sep => "\t",
|
173
|
+
:sep2 => "|",
|
174
|
+
:native => 0,
|
175
|
+
:extra => nil,
|
176
|
+
:fix => nil,
|
177
|
+
:exclude => nil,
|
178
|
+
:select => nil,
|
179
|
+
:grep => nil,
|
180
|
+
:single => false,
|
181
|
+
:unique => false,
|
182
|
+
:flatten => false,
|
183
|
+
:overwrite => false,
|
184
|
+
:keep_empty => true,
|
185
|
+
:case_insensitive => false,
|
186
|
+
:header_hash => '#' ,
|
187
|
+
:persistence_file => nil
|
188
|
+
|
189
|
+
options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
|
190
|
+
options[:flatten] = true if options[:single]
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
#{{{ Process first line
|
195
|
+
|
196
|
+
line = file.gets
|
197
|
+
raise "Empty content" if line.nil?
|
198
|
+
line.chomp!
|
199
|
+
|
200
|
+
if line =~ /^#{options[:header_hash]}/
|
201
|
+
header_fields = parse_fields(line, options[:sep])
|
202
|
+
header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
|
203
|
+
line = file.gets
|
204
|
+
else
|
205
|
+
header_fields = nil
|
206
|
+
end
|
207
|
+
|
208
|
+
id_pos = Misc.field_position(header_fields, options[:native])
|
209
|
+
|
210
|
+
if options[:extra].nil?
|
211
|
+
extra_pos = nil
|
212
|
+
max_cols = 0
|
213
|
+
else
|
214
|
+
extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
|
215
|
+
end
|
216
|
+
|
217
|
+
#{{{ Process rest
|
218
|
+
while line do
|
219
|
+
line.chomp!
|
220
|
+
|
221
|
+
line = options[:fix].call line if options[:fix]
|
222
|
+
|
223
|
+
# Select and fix lines
|
224
|
+
if (options[:exclude] and options[:exclude].call(line)) or
|
225
|
+
(options[:select] and not options[:select].call(line))
|
226
|
+
line = file.gets
|
227
|
+
next
|
228
|
+
end
|
229
|
+
|
230
|
+
### Process line
|
231
|
+
|
232
|
+
# Chunk fields
|
233
|
+
parts = parse_fields(line, options[:sep])
|
234
|
+
|
235
|
+
# Get next line
|
236
|
+
line = file.gets
|
237
|
+
|
238
|
+
# Get id field
|
239
|
+
next if parts[id_pos].nil? || parts[id_pos].empty?
|
240
|
+
ids = parse_fields(parts[id_pos], options[:sep2])
|
241
|
+
ids.collect!{|id| id.downcase } if options[:case_insensitive]
|
242
|
+
|
243
|
+
# Get extra fields
|
244
|
+
|
245
|
+
if options[:extra].nil? and not (options[:flatten] or options[:single])
|
246
|
+
extra = parts
|
247
|
+
extra.delete_at(id_pos)
|
248
|
+
max_cols = extra.size if extra.size > (max_cols || 0)
|
249
|
+
else
|
250
|
+
if extra_pos.nil?
|
251
|
+
extra = parts
|
252
|
+
extra.delete_at id_pos
|
253
|
+
else
|
254
|
+
extra = parts.values_at(*extra_pos)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
extra.collect!{|value| parse_fields(value, options[:sep2])}
|
259
|
+
extra.collect!{|values| values.first} if options[:unique]
|
260
|
+
extra.flatten! if options[:flatten]
|
261
|
+
extra = extra.first if options[:single]
|
262
|
+
|
263
|
+
if options[:overwrite]
|
264
|
+
main_entry = ids.shift
|
265
|
+
ids.each do |id|
|
266
|
+
data[id] = "__Ref:#{main_entry}"
|
267
|
+
end
|
268
|
+
|
269
|
+
data[main_entry] = extra
|
270
|
+
else
|
271
|
+
main_entry = ids.shift
|
272
|
+
ids.each do |id|
|
273
|
+
data[id] = "__Ref:#{main_entry}"
|
274
|
+
end
|
275
|
+
|
276
|
+
case
|
277
|
+
when (options[:single] or options[:unique])
|
278
|
+
data[main_entry] ||= extra
|
279
|
+
when options[:flatten]
|
280
|
+
if PersistenceHash === data
|
281
|
+
data[main_entry] = (data[main_entry] || []).concat extra
|
282
|
+
else
|
283
|
+
data[main_entry] ||= []
|
284
|
+
data[main_entry].concat extra
|
285
|
+
end
|
286
|
+
else
|
287
|
+
entry = data[main_entry] || []
|
288
|
+
while entry =~ /__Ref:(.*)/ do
|
289
|
+
entry = data[$1]
|
290
|
+
end
|
291
|
+
|
292
|
+
extra.each_with_index do |fields, i|
|
293
|
+
if fields.empty?
|
294
|
+
next unless options[:keep_empty]
|
295
|
+
fields = [""]
|
296
|
+
end
|
297
|
+
entry[i] ||= []
|
298
|
+
entry[i] = entry[i].concat fields
|
299
|
+
end
|
300
|
+
|
301
|
+
data[main_entry] = entry
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
if options[:keep_empty] and not max_cols.nil?
|
307
|
+
data.each do |key,values|
|
308
|
+
new_values = values
|
309
|
+
max_cols.times do |i|
|
310
|
+
new_values[i] ||= [""]
|
311
|
+
end
|
312
|
+
data[key] = new_values
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
|
317
|
+
# Save header information
|
318
|
+
key_field = nil
|
319
|
+
fields = nil
|
320
|
+
if header_fields && header_fields.any?
|
321
|
+
key_field = header_fields[id_pos]
|
322
|
+
if extra_pos.nil?
|
323
|
+
fields = header_fields
|
324
|
+
fields.delete_at(id_pos)
|
325
|
+
else
|
326
|
+
fields = header_fields.values_at(*extra_pos)
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
data.read if PersistenceHash === data
|
331
|
+
|
332
|
+
[key_field, fields]
|
333
|
+
end
|
334
|
+
|
335
|
+
attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
|
336
|
+
def initialize(file = {}, options = {})
|
337
|
+
@case_insensitive = options[:case_insensitive] == true
|
338
|
+
@list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
|
339
|
+
|
340
|
+
case
|
341
|
+
when TSV === file
|
342
|
+
@filename = file.filename
|
343
|
+
@data = file.data
|
344
|
+
@key_field = file.key_field
|
345
|
+
@fields = file.fields
|
346
|
+
@case_insensitive = file.case_insensitive
|
347
|
+
@list = file.is_list
|
348
|
+
return self
|
349
|
+
when (Hash === file or PersistenceHash === file)
|
350
|
+
@filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
|
351
|
+
@data = file
|
352
|
+
return self
|
353
|
+
when File === file
|
354
|
+
@filename = File.expand_path file.path
|
355
|
+
when String === file && File.exists?(file)
|
356
|
+
@filename = File.expand_path file
|
357
|
+
file = Open.open(file)
|
358
|
+
when StringIO
|
359
|
+
else
|
360
|
+
raise "File #{file} not found"
|
361
|
+
end
|
362
|
+
|
363
|
+
if options[:persistence]
|
364
|
+
options.delete :persistence
|
365
|
+
persistence_file = TSV.get_persistence_file @filename, "file:#{ @filename }:", options
|
366
|
+
|
367
|
+
if File.exists? persistence_file
|
368
|
+
TSV.log "Loading Persistence for #{ @filename } in #{persistence_file}"
|
369
|
+
@data = PersistenceHash.get(persistence_file, false)
|
370
|
+
@key_field = @data.key_field
|
371
|
+
@fields = @data.fields
|
372
|
+
else
|
373
|
+
@data = PersistenceHash.get(persistence_file, true)
|
374
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
375
|
+
|
376
|
+
TSV.log "Persistent Parsing for #{ @filename } in #{persistence_file}"
|
377
|
+
@key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
|
378
|
+
@data.key_field = @key_field
|
379
|
+
@data.fields = @fields
|
380
|
+
@data.read
|
381
|
+
end
|
382
|
+
else
|
383
|
+
TSV.log "Non-persistent parsing for #{ @filename }"
|
384
|
+
@data = {}
|
385
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
386
|
+
@key_field, @fields = TSV.parse(@data, file, options)
|
387
|
+
end
|
388
|
+
|
389
|
+
file.close
|
390
|
+
@case_insensitive = options[:case_insensitive] == true
|
391
|
+
end
|
392
|
+
|
393
|
+
|
394
|
+
def to_s
|
395
|
+
str = ""
|
396
|
+
|
397
|
+
if fields
|
398
|
+
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
399
|
+
end
|
400
|
+
|
401
|
+
each do |key, values|
|
402
|
+
case
|
403
|
+
when values.nil?
|
404
|
+
str << key.dup << "\n"
|
405
|
+
when (not Array === values)
|
406
|
+
str << key.dup << "\t" << values.to_s << "\n"
|
407
|
+
when Array === values.first
|
408
|
+
str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
|
409
|
+
else
|
410
|
+
str << key.dup << "\t" << values * "\t" << "\n"
|
411
|
+
end
|
412
|
+
end
|
413
|
+
|
414
|
+
str
|
415
|
+
end
|
416
|
+
|
417
|
+
#{{{ New
|
418
|
+
|
419
|
+
def self.fields_include(key_field, fields, field)
|
420
|
+
return true if field == key_field or fields.include? field
|
421
|
+
return false
|
422
|
+
end
|
423
|
+
|
424
|
+
def self.field_positions(key_field, fields, *selected)
|
425
|
+
selected.collect do |sel|
|
426
|
+
case
|
427
|
+
when (sel.nil? or sel == :main or sel == key_field)
|
428
|
+
-1
|
429
|
+
when Integer === sel
|
430
|
+
sel
|
431
|
+
else
|
432
|
+
Misc.field_position fields, sel
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
def fields_include(field)
|
438
|
+
return TSV.fields_include key_field, fields, field
|
439
|
+
end
|
440
|
+
|
441
|
+
def field_positions(*selected)
|
442
|
+
return nil if selected.nil? or selected == [nil]
|
443
|
+
TSV.field_positions(key_field, fields, *selected)
|
444
|
+
end
|
445
|
+
|
446
|
+
def fields_at(*positions)
|
447
|
+
return nil if fields.nil?
|
448
|
+
return nil if positions.nil? or positions == [nil]
|
449
|
+
(fields + [key_field]).values_at(*positions)
|
450
|
+
end
|
451
|
+
|
452
|
+
def through(new_key_field = nil, new_fields = nil, &block)
|
453
|
+
new_key_position = (field_positions(new_key_field) || [-1]).first
|
454
|
+
|
455
|
+
if new_key_position == -1
|
456
|
+
|
457
|
+
if new_fields.nil? or new_fields == fields
|
458
|
+
each &block
|
459
|
+
return [key_field, fields]
|
460
|
+
else
|
461
|
+
new_field_positions = field_positions(*new_fields)
|
462
|
+
each do |key, values|
|
463
|
+
yield key, values.values_at(*new_field_positions)
|
464
|
+
end
|
465
|
+
return [key_field, fields_at(*new_field_positions)]
|
466
|
+
end
|
467
|
+
|
468
|
+
else
|
469
|
+
new_field_positions = field_positions(*new_fields)
|
470
|
+
|
471
|
+
new_field_names = fields_at(*new_field_positions)
|
472
|
+
if new_field_names.nil? and fields
|
473
|
+
new_field_names = fields.dup
|
474
|
+
new_field_names.delete_at new_key_position
|
475
|
+
new_field_names.unshift key_field
|
476
|
+
end
|
477
|
+
|
478
|
+
each do |key, values|
|
479
|
+
if list
|
480
|
+
tmp_values = values + [[key]]
|
481
|
+
else
|
482
|
+
tmp_values = values + [key]
|
483
|
+
end
|
484
|
+
|
485
|
+
if new_field_positions.nil?
|
486
|
+
new_values = values.dup
|
487
|
+
new_values.delete_at new_key_position
|
488
|
+
new_values.unshift [key]
|
489
|
+
else
|
490
|
+
new_values = tmp_values.values_at(*new_field_positions)
|
491
|
+
end
|
492
|
+
|
493
|
+
tmp_values[new_key_position].each do |new_key|
|
494
|
+
if new_field_names
|
495
|
+
yield new_key, NamedArray.name(new_values, new_field_names)
|
496
|
+
else
|
497
|
+
yield new_key, new_values
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|
501
|
+
return [(fields_at(new_key_position) || [nil]).first, new_field_names]
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
def process(field)
|
506
|
+
through do |key, values|
|
507
|
+
values[field].replace yield(values[field], key, values) unless values[field].nil?
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
511
|
+
|
512
|
+
def reorder(new_key_field, new_fields = nil, options = {})
|
513
|
+
options = Misc.add_defaults options
|
514
|
+
return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
|
515
|
+
|
516
|
+
new = {}
|
517
|
+
new_key_field, new_fields = through new_key_field, new_fields do |key, values|
|
518
|
+
if new[key].nil?
|
519
|
+
new[key] = values
|
520
|
+
else
|
521
|
+
new[key] = new[key].zip(values)
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
new.each do |key,values|
|
526
|
+
values.each{|list| list.flatten! if Array === list}
|
527
|
+
end
|
528
|
+
|
529
|
+
if options[:persistence_file]
|
530
|
+
reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
|
531
|
+
reordered.merge! new
|
532
|
+
else
|
533
|
+
reordered = TSV.new(new, :case_insensitive => case_insensitive)
|
534
|
+
end
|
535
|
+
|
536
|
+
reordered.key_field = new_key_field
|
537
|
+
reordered.fields = new_fields
|
538
|
+
|
539
|
+
reordered
|
540
|
+
end
|
541
|
+
|
542
|
+
def slice(new_fields, options = {})
|
543
|
+
reorder(:main, new_fields)
|
544
|
+
end
|
545
|
+
|
546
|
+
def index(options = {})
|
547
|
+
options = Misc.add_defaults options, :order => false
|
548
|
+
|
549
|
+
if options[:persistence] and ! options[:persistence_file]
|
550
|
+
options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
|
551
|
+
end
|
552
|
+
|
553
|
+
if options[:persistence_file] and File.exists?(options[:persistence_file])
|
554
|
+
return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
|
555
|
+
end
|
556
|
+
|
557
|
+
new = {}
|
558
|
+
if options[:order]
|
559
|
+
new_key_field, new_fields = through options[:field], options[:others] do |key, values|
|
560
|
+
|
561
|
+
values.each_with_index do |list, i|
|
562
|
+
next if list.nil? or list.empty?
|
563
|
+
|
564
|
+
list = [list] unless Array === list
|
565
|
+
|
566
|
+
list.each do |value|
|
567
|
+
next if value.nil? or value.empty?
|
568
|
+
value = value.downcase if options[:case_insensitive]
|
569
|
+
new[value] ||= []
|
570
|
+
new[value][i + 1] ||= []
|
571
|
+
new[value][i + 1] << key
|
572
|
+
end
|
573
|
+
new[key] ||= []
|
574
|
+
new[key][0] = key
|
575
|
+
end
|
576
|
+
|
577
|
+
end
|
578
|
+
|
579
|
+
new.each do |key, values|
|
580
|
+
values.flatten!
|
581
|
+
values.compact!
|
582
|
+
end
|
583
|
+
|
584
|
+
else
|
585
|
+
new_key_field, new_fields = through options[:field], options[:others] do |key, values|
|
586
|
+
new[key] ||= []
|
587
|
+
new[key] << key
|
588
|
+
values.each do |list|
|
589
|
+
next if list.nil?
|
590
|
+
if Array === list
|
591
|
+
list.each do |value|
|
592
|
+
value = value.downcase if options[:case_insensitive]
|
593
|
+
new[value] ||= []
|
594
|
+
new[value] << key
|
595
|
+
end
|
596
|
+
else
|
597
|
+
next if list.empty?
|
598
|
+
value = list
|
599
|
+
value = value.downcase if options[:case_insensitive]
|
600
|
+
new[value] ||= []
|
601
|
+
new[value] << key
|
602
|
+
end
|
603
|
+
end
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
if options[:persistence_file]
|
608
|
+
index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
|
609
|
+
index.merge! new
|
610
|
+
else
|
611
|
+
index = TSV.new(new, :case_insensitive => options[:case_insensitive])
|
612
|
+
end
|
613
|
+
|
614
|
+
index.key_field = new_key_field
|
615
|
+
index.fields = new_fields
|
616
|
+
index
|
617
|
+
end
|
618
|
+
|
619
|
+
def smart_merge(other, match = nil, new_fields = nil)
|
620
|
+
|
621
|
+
new_fields = [new_fields] if String === new_fields
|
622
|
+
if self.fields and other.fields
|
623
|
+
common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
|
624
|
+
new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
|
625
|
+
|
626
|
+
common_fields.delete match if String === match
|
627
|
+
common_fields.delete_at match if Integer === match
|
628
|
+
|
629
|
+
this_common_field_positions = self.field_positions *common_fields
|
630
|
+
other_common_field_positions = other.field_positions *common_fields
|
631
|
+
other_new_field_positions = other.field_positions *new_fields
|
632
|
+
else
|
633
|
+
nofieldinfo = true
|
634
|
+
end
|
635
|
+
|
636
|
+
case
|
637
|
+
when TSV === match
|
638
|
+
match_index = match
|
639
|
+
matching_code_position = nil
|
640
|
+
|
641
|
+
when Array === match
|
642
|
+
match_index = match.first
|
643
|
+
matching_code_position = field_positions(match.last).first
|
644
|
+
|
645
|
+
when match =~ /^through:(.*)/
|
646
|
+
through = $1
|
647
|
+
if through =~ /(.*)#using:(.*)/
|
648
|
+
through = $1
|
649
|
+
matching_code_position = field_positions($2).first
|
650
|
+
else
|
651
|
+
matching_code_position = nil
|
652
|
+
end
|
653
|
+
index_fields = TSV.headers(through)
|
654
|
+
target_field = index_fields.select{|field| other.fields_include field}.first
|
655
|
+
Log.debug "Target Field: #{ target_field }"
|
656
|
+
match_index = TSV.open_file(through).index(:field => target_field)
|
657
|
+
|
658
|
+
when field_positions(match).first
|
659
|
+
matching_code_position = field_positions(match).first
|
660
|
+
match_index = nil
|
661
|
+
end
|
662
|
+
|
663
|
+
if matching_code_position.nil? and match_index.fields
|
664
|
+
match_index.fields.each do |field|
|
665
|
+
if matching_code_position = field_positions(field).first
|
666
|
+
break
|
667
|
+
end
|
668
|
+
end
|
669
|
+
end
|
670
|
+
|
671
|
+
if match_index and match_index.key_field == other.key_field
|
672
|
+
other_index = nil
|
673
|
+
else
|
674
|
+
other_index = (match === String and other.fields_include(match)) ?
|
675
|
+
other.index(:other => match, :order => true) : other.index(:order => true)
|
676
|
+
end
|
677
|
+
|
678
|
+
each do |key,values|
|
679
|
+
Log.debug "Key: #{ key }. Values: #{values * ", "}"
|
680
|
+
if matching_code_position.nil? or matching_code_position == -1
|
681
|
+
matching_codes = [key]
|
682
|
+
else
|
683
|
+
matching_codes = values[matching_code_position]
|
684
|
+
matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
|
685
|
+
end
|
686
|
+
Log.debug "Matching codes: #{matching_codes}"
|
687
|
+
|
688
|
+
next if matching_codes.nil?
|
689
|
+
|
690
|
+
matching_codes.each do |matching_code|
|
691
|
+
if match_index
|
692
|
+
if match_index[matching_code]
|
693
|
+
matching_code_fix = match_index[matching_code].first
|
694
|
+
else
|
695
|
+
matching_code_fix = nil
|
696
|
+
end
|
697
|
+
else
|
698
|
+
matching_code_fix = matching_code
|
699
|
+
end
|
700
|
+
|
701
|
+
Log.debug "Matching code (fix): #{matching_code_fix}"
|
702
|
+
next if matching_code_fix.nil?
|
703
|
+
|
704
|
+
if other_index
|
705
|
+
Log.debug "Using other_index"
|
706
|
+
other_codes = other_index[matching_code_fix]
|
707
|
+
else
|
708
|
+
other_codes = matching_code_fix
|
709
|
+
end
|
710
|
+
Log.debug "Other codes: #{other_codes}"
|
711
|
+
|
712
|
+
next if other_codes.nil? or other_codes.empty?
|
713
|
+
other_code = other_codes.first
|
714
|
+
|
715
|
+
if nofieldinfo
|
716
|
+
next if other[other_code].nil?
|
717
|
+
if list
|
718
|
+
other_values = [[other_code]] + other[other_code]
|
719
|
+
else
|
720
|
+
other_values = [other_code] + other[other_code]
|
721
|
+
end
|
722
|
+
other_values.delete_if do |list|
|
723
|
+
list = [list] unless Array === list
|
724
|
+
list.collect{|e| case_insensitive ? e.downcase : e }.
|
725
|
+
select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
|
726
|
+
end
|
727
|
+
|
728
|
+
new_values = values + other_values
|
729
|
+
else
|
730
|
+
if other[other_code].nil?
|
731
|
+
if list
|
732
|
+
other_values = [[]] * other.fields.length
|
733
|
+
else
|
734
|
+
other_values = [] * other.fields.length
|
735
|
+
end
|
736
|
+
else
|
737
|
+
if list
|
738
|
+
other_values = other[other_code] + [[other_code]]
|
739
|
+
else
|
740
|
+
other_values = other[other_code] + [other_code]
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
|
745
|
+
new_values = values.dup
|
746
|
+
|
747
|
+
if list
|
748
|
+
this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
|
749
|
+
new_values_tops = new_values[tpos]
|
750
|
+
|
751
|
+
if other.list
|
752
|
+
new_values_tops += other_values[opos]
|
753
|
+
else
|
754
|
+
new_values_tops += [other_values[opos]]
|
755
|
+
end
|
756
|
+
|
757
|
+
new_values[tpos] = new_values_tops.uniq
|
758
|
+
end
|
759
|
+
end
|
760
|
+
|
761
|
+
new_values.concat other_values.values_at *other_new_field_positions
|
762
|
+
end
|
763
|
+
|
764
|
+
self[key] = new_values
|
765
|
+
end
|
766
|
+
end
|
767
|
+
|
768
|
+
self.fields = self.fields + new_fields unless nofieldinfo
|
769
|
+
end
|
770
|
+
|
771
|
+
#{{{ Helpers
|
772
|
+
|
773
|
+
def self.index(file, options = {})
|
774
|
+
opt_data = options.dup
|
775
|
+
opt_index = options.dup
|
776
|
+
opt_data.delete :field
|
777
|
+
opt_data.delete :persistence
|
778
|
+
opt_index.delete :persistence
|
779
|
+
|
780
|
+
opt_data[:persistence] = true if options[:data_persistence]
|
781
|
+
|
782
|
+
opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
|
783
|
+
|
784
|
+
if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
|
785
|
+
TSV.log "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
|
786
|
+
TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
|
787
|
+
else
|
788
|
+
TSV.log "Creating index for #{ file }: #{opt_index[:persistence_file]}"
|
789
|
+
data = TSV.new(file, opt_data)
|
790
|
+
data.index(opt_index)
|
791
|
+
end
|
792
|
+
end
|
793
|
+
|
794
|
+
def self.open_file(file)
|
795
|
+
if file =~ /(.*?)#(.*)/
|
796
|
+
file, options = $1, Misc.string2hash($2.to_s)
|
797
|
+
else
|
798
|
+
options = {}
|
799
|
+
end
|
800
|
+
|
801
|
+
TSV.new(file, options)
|
802
|
+
end
|
803
|
+
|
804
|
+
end
|