rbbt-util 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/bin/tchash.rb +15 -0
- data/bin/tsv.rb +14 -0
- data/lib/rbbt/util/cachehelper.rb +100 -0
- data/lib/rbbt/util/cmd.rb +140 -0
- data/lib/rbbt/util/data_module.rb +81 -0
- data/lib/rbbt/util/excel2tsv.rb +32 -0
- data/lib/rbbt/util/filecache.rb +58 -0
- data/lib/rbbt/util/log.rb +50 -0
- data/lib/rbbt/util/misc.rb +158 -0
- data/lib/rbbt/util/open.rb +200 -0
- data/lib/rbbt/util/pkg_config.rb +78 -0
- data/lib/rbbt/util/pkg_data.rb +110 -0
- data/lib/rbbt/util/pkg_software.rb +130 -0
- data/lib/rbbt/util/simpleDSL.rb +92 -0
- data/lib/rbbt/util/simpleopt.rb +56 -0
- data/lib/rbbt/util/tc_hash.rb +124 -0
- data/lib/rbbt/util/tmpfile.rb +42 -0
- data/lib/rbbt/util/tsv.rb +804 -0
- data/lib/rbbt-util.rb +13 -0
- data/lib/rbbt.rb +15 -0
- data/share/install/software/lib/install_helpers +257 -0
- data/test/rbbt/util/test_cmd.rb +30 -0
- data/test/rbbt/util/test_data_module.rb +45 -0
- data/test/rbbt/util/test_excel2tsv.rb +10 -0
- data/test/rbbt/util/test_filecache.rb +36 -0
- data/test/rbbt/util/test_misc.rb +22 -0
- data/test/rbbt/util/test_open.rb +89 -0
- data/test/rbbt/util/test_simpleDSL.rb +55 -0
- data/test/rbbt/util/test_simpleopt.rb +10 -0
- data/test/rbbt/util/test_tc_hash.rb +18 -0
- data/test/rbbt/util/test_tmpfile.rb +20 -0
- data/test/rbbt/util/test_tsv.rb +652 -0
- data/test/test_helper.rb +9 -0
- data/test/test_pkg.rb +38 -0
- data/test/test_rbbt.rb +90 -0
- metadata +185 -0
@@ -0,0 +1,804 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
require 'rbbt/util/open'
|
3
|
+
require 'rbbt/util/tc_hash'
|
4
|
+
require 'rbbt/util/tmpfile'
|
5
|
+
require 'digest'
|
6
|
+
require 'fileutils'
|
7
|
+
|
8
|
+
def add_defaults(options, defaults = {})
|
9
|
+
new_options = options.dup
|
10
|
+
defaults.each do |key, value|
|
11
|
+
new_options[key] = value if new_options[key].nil?
|
12
|
+
end
|
13
|
+
new_options
|
14
|
+
end
|
15
|
+
|
16
|
+
class TSV
|
17
|
+
class FieldNotFoundError < StandardError;end
|
18
|
+
|
19
|
+
#{{{ Persistence
|
20
|
+
|
21
|
+
PersistenceHash = TCHash
|
22
|
+
|
23
|
+
CACHEDIR="/tmp/tsv_persistent_cache"
|
24
|
+
FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
|
25
|
+
|
26
|
+
def self.cachedir=(cachedir)
|
27
|
+
CACHEDIR.replace cachedir
|
28
|
+
FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.cachedir
|
32
|
+
CACHEDIR
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.get_persistence_file(file, prefix, options = {})
|
36
|
+
File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
|
37
|
+
end
|
38
|
+
|
39
|
+
@debug = ENV['TSV_DEBUG'] == "true"
|
40
|
+
def self.log(message)
|
41
|
+
STDERR.puts message if @debug == true
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.debug=(value)
|
45
|
+
@debug = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.headers(file, options = {})
|
49
|
+
if file =~ /(.*)#(.*)/ and File.exists? $1
|
50
|
+
options.merge! Misc.string2hash $2
|
51
|
+
file = $1
|
52
|
+
end
|
53
|
+
|
54
|
+
options = Misc.add_defaults options, :sep => "\t", :header_hash => "#"
|
55
|
+
io = Open.open(file)
|
56
|
+
line = io.gets
|
57
|
+
io.close
|
58
|
+
|
59
|
+
if line =~ /^#{options[:header_hash]}/
|
60
|
+
line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
|
61
|
+
else
|
62
|
+
nil
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
#{{{ Accesor Methods
|
67
|
+
|
68
|
+
def keys
|
69
|
+
@data.keys
|
70
|
+
end
|
71
|
+
|
72
|
+
def values
|
73
|
+
@data.values
|
74
|
+
end
|
75
|
+
|
76
|
+
def size
|
77
|
+
@data.size
|
78
|
+
end
|
79
|
+
|
80
|
+
# Write
|
81
|
+
|
82
|
+
def []=(key, value)
|
83
|
+
key = key.downcase if @case_insensitive
|
84
|
+
@data[key] = value
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def merge!(new_data)
|
89
|
+
new_data.each do |key, value|
|
90
|
+
self[key] = value
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Read
|
95
|
+
|
96
|
+
def follow(value)
|
97
|
+
if String === value && value =~ /__Ref:(.*)/
|
98
|
+
return self[$1]
|
99
|
+
else
|
100
|
+
value = NamedArray.name value, fields if Array === value and fields
|
101
|
+
value
|
102
|
+
end
|
103
|
+
end
|
104
|
+
def [](key)
|
105
|
+
if Array === key
|
106
|
+
return @data[key] if @data[key] != nil
|
107
|
+
key.each{|k| v = self[k]; return v unless v.nil?}
|
108
|
+
return nil
|
109
|
+
end
|
110
|
+
|
111
|
+
key = key.downcase if @case_insensitive
|
112
|
+
follow @data[key]
|
113
|
+
end
|
114
|
+
|
115
|
+
def values_at(*keys)
|
116
|
+
keys.collect{|k|
|
117
|
+
self[k]
|
118
|
+
}
|
119
|
+
end
|
120
|
+
|
121
|
+
def each(&block)
|
122
|
+
@data.each do |key, value|
|
123
|
+
block.call(key, follow(value))
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def collect
|
128
|
+
if block_given?
|
129
|
+
@data.collect do |key, value|
|
130
|
+
value = follow(value)
|
131
|
+
key, values = yield key, value
|
132
|
+
end
|
133
|
+
else
|
134
|
+
@data.collect do |key, value|
|
135
|
+
[key, follow(value)]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def sort(&block)
|
141
|
+
collect.sort(&block).collect{|p|
|
142
|
+
key, value = p
|
143
|
+
value = NamedArray.name value, fields if fields
|
144
|
+
[key, value]
|
145
|
+
}
|
146
|
+
end
|
147
|
+
|
148
|
+
def sort_by(&block)
|
149
|
+
collect.sort_by &block
|
150
|
+
end
|
151
|
+
|
152
|
+
#{{{ Parsing
|
153
|
+
|
154
|
+
def self.parse_fields(io, delimiter = "\t")
|
155
|
+
return [] if io.nil?
|
156
|
+
fields = io.split(delimiter, -1)
|
157
|
+
fields
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.zip_fields(list, fields = nil)
|
161
|
+
return [] if list.nil? || list.empty?
|
162
|
+
fields ||= list.fields if list.respond_to? :fields
|
163
|
+
zipped = list[0].zip(*list[1..-1])
|
164
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
165
|
+
zipped
|
166
|
+
end
|
167
|
+
|
168
|
+
def self.parse(data, file, options = {})
|
169
|
+
|
170
|
+
# Prepare options
|
171
|
+
options = add_defaults options,
|
172
|
+
:sep => "\t",
|
173
|
+
:sep2 => "|",
|
174
|
+
:native => 0,
|
175
|
+
:extra => nil,
|
176
|
+
:fix => nil,
|
177
|
+
:exclude => nil,
|
178
|
+
:select => nil,
|
179
|
+
:grep => nil,
|
180
|
+
:single => false,
|
181
|
+
:unique => false,
|
182
|
+
:flatten => false,
|
183
|
+
:overwrite => false,
|
184
|
+
:keep_empty => true,
|
185
|
+
:case_insensitive => false,
|
186
|
+
:header_hash => '#' ,
|
187
|
+
:persistence_file => nil
|
188
|
+
|
189
|
+
options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
|
190
|
+
options[:flatten] = true if options[:single]
|
191
|
+
|
192
|
+
|
193
|
+
|
194
|
+
#{{{ Process first line
|
195
|
+
|
196
|
+
line = file.gets
|
197
|
+
raise "Empty content" if line.nil?
|
198
|
+
line.chomp!
|
199
|
+
|
200
|
+
if line =~ /^#{options[:header_hash]}/
|
201
|
+
header_fields = parse_fields(line, options[:sep])
|
202
|
+
header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
|
203
|
+
line = file.gets
|
204
|
+
else
|
205
|
+
header_fields = nil
|
206
|
+
end
|
207
|
+
|
208
|
+
id_pos = Misc.field_position(header_fields, options[:native])
|
209
|
+
|
210
|
+
if options[:extra].nil?
|
211
|
+
extra_pos = nil
|
212
|
+
max_cols = 0
|
213
|
+
else
|
214
|
+
extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
|
215
|
+
end
|
216
|
+
|
217
|
+
#{{{ Process rest
|
218
|
+
while line do
|
219
|
+
line.chomp!
|
220
|
+
|
221
|
+
line = options[:fix].call line if options[:fix]
|
222
|
+
|
223
|
+
# Select and fix lines
|
224
|
+
if (options[:exclude] and options[:exclude].call(line)) or
|
225
|
+
(options[:select] and not options[:select].call(line))
|
226
|
+
line = file.gets
|
227
|
+
next
|
228
|
+
end
|
229
|
+
|
230
|
+
### Process line
|
231
|
+
|
232
|
+
# Chunk fields
|
233
|
+
parts = parse_fields(line, options[:sep])
|
234
|
+
|
235
|
+
# Get next line
|
236
|
+
line = file.gets
|
237
|
+
|
238
|
+
# Get id field
|
239
|
+
next if parts[id_pos].nil? || parts[id_pos].empty?
|
240
|
+
ids = parse_fields(parts[id_pos], options[:sep2])
|
241
|
+
ids.collect!{|id| id.downcase } if options[:case_insensitive]
|
242
|
+
|
243
|
+
# Get extra fields
|
244
|
+
|
245
|
+
if options[:extra].nil? and not (options[:flatten] or options[:single])
|
246
|
+
extra = parts
|
247
|
+
extra.delete_at(id_pos)
|
248
|
+
max_cols = extra.size if extra.size > (max_cols || 0)
|
249
|
+
else
|
250
|
+
if extra_pos.nil?
|
251
|
+
extra = parts
|
252
|
+
extra.delete_at id_pos
|
253
|
+
else
|
254
|
+
extra = parts.values_at(*extra_pos)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
extra.collect!{|value| parse_fields(value, options[:sep2])}
|
259
|
+
extra.collect!{|values| values.first} if options[:unique]
|
260
|
+
extra.flatten! if options[:flatten]
|
261
|
+
extra = extra.first if options[:single]
|
262
|
+
|
263
|
+
if options[:overwrite]
|
264
|
+
main_entry = ids.shift
|
265
|
+
ids.each do |id|
|
266
|
+
data[id] = "__Ref:#{main_entry}"
|
267
|
+
end
|
268
|
+
|
269
|
+
data[main_entry] = extra
|
270
|
+
else
|
271
|
+
main_entry = ids.shift
|
272
|
+
ids.each do |id|
|
273
|
+
data[id] = "__Ref:#{main_entry}"
|
274
|
+
end
|
275
|
+
|
276
|
+
case
|
277
|
+
when (options[:single] or options[:unique])
|
278
|
+
data[main_entry] ||= extra
|
279
|
+
when options[:flatten]
|
280
|
+
if PersistenceHash === data
|
281
|
+
data[main_entry] = (data[main_entry] || []).concat extra
|
282
|
+
else
|
283
|
+
data[main_entry] ||= []
|
284
|
+
data[main_entry].concat extra
|
285
|
+
end
|
286
|
+
else
|
287
|
+
entry = data[main_entry] || []
|
288
|
+
while entry =~ /__Ref:(.*)/ do
|
289
|
+
entry = data[$1]
|
290
|
+
end
|
291
|
+
|
292
|
+
extra.each_with_index do |fields, i|
|
293
|
+
if fields.empty?
|
294
|
+
next unless options[:keep_empty]
|
295
|
+
fields = [""]
|
296
|
+
end
|
297
|
+
entry[i] ||= []
|
298
|
+
entry[i] = entry[i].concat fields
|
299
|
+
end
|
300
|
+
|
301
|
+
data[main_entry] = entry
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
if options[:keep_empty] and not max_cols.nil?
|
307
|
+
data.each do |key,values|
|
308
|
+
new_values = values
|
309
|
+
max_cols.times do |i|
|
310
|
+
new_values[i] ||= [""]
|
311
|
+
end
|
312
|
+
data[key] = new_values
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
|
317
|
+
# Save header information
|
318
|
+
key_field = nil
|
319
|
+
fields = nil
|
320
|
+
if header_fields && header_fields.any?
|
321
|
+
key_field = header_fields[id_pos]
|
322
|
+
if extra_pos.nil?
|
323
|
+
fields = header_fields
|
324
|
+
fields.delete_at(id_pos)
|
325
|
+
else
|
326
|
+
fields = header_fields.values_at(*extra_pos)
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
data.read if PersistenceHash === data
|
331
|
+
|
332
|
+
[key_field, fields]
|
333
|
+
end
|
334
|
+
|
335
|
+
attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
|
336
|
+
def initialize(file = {}, options = {})
|
337
|
+
@case_insensitive = options[:case_insensitive] == true
|
338
|
+
@list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
|
339
|
+
|
340
|
+
case
|
341
|
+
when TSV === file
|
342
|
+
@filename = file.filename
|
343
|
+
@data = file.data
|
344
|
+
@key_field = file.key_field
|
345
|
+
@fields = file.fields
|
346
|
+
@case_insensitive = file.case_insensitive
|
347
|
+
@list = file.is_list
|
348
|
+
return self
|
349
|
+
when (Hash === file or PersistenceHash === file)
|
350
|
+
@filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
|
351
|
+
@data = file
|
352
|
+
return self
|
353
|
+
when File === file
|
354
|
+
@filename = File.expand_path file.path
|
355
|
+
when String === file && File.exists?(file)
|
356
|
+
@filename = File.expand_path file
|
357
|
+
file = Open.open(file)
|
358
|
+
when StringIO
|
359
|
+
else
|
360
|
+
raise "File #{file} not found"
|
361
|
+
end
|
362
|
+
|
363
|
+
if options[:persistence]
|
364
|
+
options.delete :persistence
|
365
|
+
persistence_file = TSV.get_persistence_file @filename, "file:#{ @filename }:", options
|
366
|
+
|
367
|
+
if File.exists? persistence_file
|
368
|
+
TSV.log "Loading Persistence for #{ @filename } in #{persistence_file}"
|
369
|
+
@data = PersistenceHash.get(persistence_file, false)
|
370
|
+
@key_field = @data.key_field
|
371
|
+
@fields = @data.fields
|
372
|
+
else
|
373
|
+
@data = PersistenceHash.get(persistence_file, true)
|
374
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
375
|
+
|
376
|
+
TSV.log "Persistent Parsing for #{ @filename } in #{persistence_file}"
|
377
|
+
@key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
|
378
|
+
@data.key_field = @key_field
|
379
|
+
@data.fields = @fields
|
380
|
+
@data.read
|
381
|
+
end
|
382
|
+
else
|
383
|
+
TSV.log "Non-persistent parsing for #{ @filename }"
|
384
|
+
@data = {}
|
385
|
+
file = Open.grep(file, options[:grep]) if options[:grep]
|
386
|
+
@key_field, @fields = TSV.parse(@data, file, options)
|
387
|
+
end
|
388
|
+
|
389
|
+
file.close
|
390
|
+
@case_insensitive = options[:case_insensitive] == true
|
391
|
+
end
|
392
|
+
|
393
|
+
|
394
|
+
def to_s
|
395
|
+
str = ""
|
396
|
+
|
397
|
+
if fields
|
398
|
+
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
399
|
+
end
|
400
|
+
|
401
|
+
each do |key, values|
|
402
|
+
case
|
403
|
+
when values.nil?
|
404
|
+
str << key.dup << "\n"
|
405
|
+
when (not Array === values)
|
406
|
+
str << key.dup << "\t" << values.to_s << "\n"
|
407
|
+
when Array === values.first
|
408
|
+
str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
|
409
|
+
else
|
410
|
+
str << key.dup << "\t" << values * "\t" << "\n"
|
411
|
+
end
|
412
|
+
end
|
413
|
+
|
414
|
+
str
|
415
|
+
end
|
416
|
+
|
417
|
+
#{{{ New
|
418
|
+
|
419
|
+
def self.fields_include(key_field, fields, field)
|
420
|
+
return true if field == key_field or fields.include? field
|
421
|
+
return false
|
422
|
+
end
|
423
|
+
|
424
|
+
def self.field_positions(key_field, fields, *selected)
|
425
|
+
selected.collect do |sel|
|
426
|
+
case
|
427
|
+
when (sel.nil? or sel == :main or sel == key_field)
|
428
|
+
-1
|
429
|
+
when Integer === sel
|
430
|
+
sel
|
431
|
+
else
|
432
|
+
Misc.field_position fields, sel
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
def fields_include(field)
|
438
|
+
return TSV.fields_include key_field, fields, field
|
439
|
+
end
|
440
|
+
|
441
|
+
def field_positions(*selected)
|
442
|
+
return nil if selected.nil? or selected == [nil]
|
443
|
+
TSV.field_positions(key_field, fields, *selected)
|
444
|
+
end
|
445
|
+
|
446
|
+
def fields_at(*positions)
|
447
|
+
return nil if fields.nil?
|
448
|
+
return nil if positions.nil? or positions == [nil]
|
449
|
+
(fields + [key_field]).values_at(*positions)
|
450
|
+
end
|
451
|
+
|
452
|
+
def through(new_key_field = nil, new_fields = nil, &block)
|
453
|
+
new_key_position = (field_positions(new_key_field) || [-1]).first
|
454
|
+
|
455
|
+
if new_key_position == -1
|
456
|
+
|
457
|
+
if new_fields.nil? or new_fields == fields
|
458
|
+
each &block
|
459
|
+
return [key_field, fields]
|
460
|
+
else
|
461
|
+
new_field_positions = field_positions(*new_fields)
|
462
|
+
each do |key, values|
|
463
|
+
yield key, values.values_at(*new_field_positions)
|
464
|
+
end
|
465
|
+
return [key_field, fields_at(*new_field_positions)]
|
466
|
+
end
|
467
|
+
|
468
|
+
else
|
469
|
+
new_field_positions = field_positions(*new_fields)
|
470
|
+
|
471
|
+
new_field_names = fields_at(*new_field_positions)
|
472
|
+
if new_field_names.nil? and fields
|
473
|
+
new_field_names = fields.dup
|
474
|
+
new_field_names.delete_at new_key_position
|
475
|
+
new_field_names.unshift key_field
|
476
|
+
end
|
477
|
+
|
478
|
+
each do |key, values|
|
479
|
+
if list
|
480
|
+
tmp_values = values + [[key]]
|
481
|
+
else
|
482
|
+
tmp_values = values + [key]
|
483
|
+
end
|
484
|
+
|
485
|
+
if new_field_positions.nil?
|
486
|
+
new_values = values.dup
|
487
|
+
new_values.delete_at new_key_position
|
488
|
+
new_values.unshift [key]
|
489
|
+
else
|
490
|
+
new_values = tmp_values.values_at(*new_field_positions)
|
491
|
+
end
|
492
|
+
|
493
|
+
tmp_values[new_key_position].each do |new_key|
|
494
|
+
if new_field_names
|
495
|
+
yield new_key, NamedArray.name(new_values, new_field_names)
|
496
|
+
else
|
497
|
+
yield new_key, new_values
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|
501
|
+
return [(fields_at(new_key_position) || [nil]).first, new_field_names]
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
def process(field)
|
506
|
+
through do |key, values|
|
507
|
+
values[field].replace yield(values[field], key, values) unless values[field].nil?
|
508
|
+
end
|
509
|
+
end
|
510
|
+
|
511
|
+
|
512
|
+
def reorder(new_key_field, new_fields = nil, options = {})
|
513
|
+
options = Misc.add_defaults options
|
514
|
+
return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
|
515
|
+
|
516
|
+
new = {}
|
517
|
+
new_key_field, new_fields = through new_key_field, new_fields do |key, values|
|
518
|
+
if new[key].nil?
|
519
|
+
new[key] = values
|
520
|
+
else
|
521
|
+
new[key] = new[key].zip(values)
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
new.each do |key,values|
|
526
|
+
values.each{|list| list.flatten! if Array === list}
|
527
|
+
end
|
528
|
+
|
529
|
+
if options[:persistence_file]
|
530
|
+
reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
|
531
|
+
reordered.merge! new
|
532
|
+
else
|
533
|
+
reordered = TSV.new(new, :case_insensitive => case_insensitive)
|
534
|
+
end
|
535
|
+
|
536
|
+
reordered.key_field = new_key_field
|
537
|
+
reordered.fields = new_fields
|
538
|
+
|
539
|
+
reordered
|
540
|
+
end
|
541
|
+
|
542
|
+
def slice(new_fields, options = {})
|
543
|
+
reorder(:main, new_fields)
|
544
|
+
end
|
545
|
+
|
546
|
+
def index(options = {})
|
547
|
+
options = Misc.add_defaults options, :order => false
|
548
|
+
|
549
|
+
if options[:persistence] and ! options[:persistence_file]
|
550
|
+
options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
|
551
|
+
end
|
552
|
+
|
553
|
+
if options[:persistence_file] and File.exists?(options[:persistence_file])
|
554
|
+
return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
|
555
|
+
end
|
556
|
+
|
557
|
+
new = {}
|
558
|
+
if options[:order]
|
559
|
+
new_key_field, new_fields = through options[:field], options[:others] do |key, values|
|
560
|
+
|
561
|
+
values.each_with_index do |list, i|
|
562
|
+
next if list.nil? or list.empty?
|
563
|
+
|
564
|
+
list = [list] unless Array === list
|
565
|
+
|
566
|
+
list.each do |value|
|
567
|
+
next if value.nil? or value.empty?
|
568
|
+
value = value.downcase if options[:case_insensitive]
|
569
|
+
new[value] ||= []
|
570
|
+
new[value][i + 1] ||= []
|
571
|
+
new[value][i + 1] << key
|
572
|
+
end
|
573
|
+
new[key] ||= []
|
574
|
+
new[key][0] = key
|
575
|
+
end
|
576
|
+
|
577
|
+
end
|
578
|
+
|
579
|
+
new.each do |key, values|
|
580
|
+
values.flatten!
|
581
|
+
values.compact!
|
582
|
+
end
|
583
|
+
|
584
|
+
else
|
585
|
+
new_key_field, new_fields = through options[:field], options[:others] do |key, values|
|
586
|
+
new[key] ||= []
|
587
|
+
new[key] << key
|
588
|
+
values.each do |list|
|
589
|
+
next if list.nil?
|
590
|
+
if Array === list
|
591
|
+
list.each do |value|
|
592
|
+
value = value.downcase if options[:case_insensitive]
|
593
|
+
new[value] ||= []
|
594
|
+
new[value] << key
|
595
|
+
end
|
596
|
+
else
|
597
|
+
next if list.empty?
|
598
|
+
value = list
|
599
|
+
value = value.downcase if options[:case_insensitive]
|
600
|
+
new[value] ||= []
|
601
|
+
new[value] << key
|
602
|
+
end
|
603
|
+
end
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
if options[:persistence_file]
|
608
|
+
index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
|
609
|
+
index.merge! new
|
610
|
+
else
|
611
|
+
index = TSV.new(new, :case_insensitive => options[:case_insensitive])
|
612
|
+
end
|
613
|
+
|
614
|
+
index.key_field = new_key_field
|
615
|
+
index.fields = new_fields
|
616
|
+
index
|
617
|
+
end
|
618
|
+
|
619
|
+
def smart_merge(other, match = nil, new_fields = nil)
|
620
|
+
|
621
|
+
new_fields = [new_fields] if String === new_fields
|
622
|
+
if self.fields and other.fields
|
623
|
+
common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
|
624
|
+
new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
|
625
|
+
|
626
|
+
common_fields.delete match if String === match
|
627
|
+
common_fields.delete_at match if Integer === match
|
628
|
+
|
629
|
+
this_common_field_positions = self.field_positions *common_fields
|
630
|
+
other_common_field_positions = other.field_positions *common_fields
|
631
|
+
other_new_field_positions = other.field_positions *new_fields
|
632
|
+
else
|
633
|
+
nofieldinfo = true
|
634
|
+
end
|
635
|
+
|
636
|
+
case
|
637
|
+
when TSV === match
|
638
|
+
match_index = match
|
639
|
+
matching_code_position = nil
|
640
|
+
|
641
|
+
when Array === match
|
642
|
+
match_index = match.first
|
643
|
+
matching_code_position = field_positions(match.last).first
|
644
|
+
|
645
|
+
when match =~ /^through:(.*)/
|
646
|
+
through = $1
|
647
|
+
if through =~ /(.*)#using:(.*)/
|
648
|
+
through = $1
|
649
|
+
matching_code_position = field_positions($2).first
|
650
|
+
else
|
651
|
+
matching_code_position = nil
|
652
|
+
end
|
653
|
+
index_fields = TSV.headers(through)
|
654
|
+
target_field = index_fields.select{|field| other.fields_include field}.first
|
655
|
+
Log.debug "Target Field: #{ target_field }"
|
656
|
+
match_index = TSV.open_file(through).index(:field => target_field)
|
657
|
+
|
658
|
+
when field_positions(match).first
|
659
|
+
matching_code_position = field_positions(match).first
|
660
|
+
match_index = nil
|
661
|
+
end
|
662
|
+
|
663
|
+
if matching_code_position.nil? and match_index.fields
|
664
|
+
match_index.fields.each do |field|
|
665
|
+
if matching_code_position = field_positions(field).first
|
666
|
+
break
|
667
|
+
end
|
668
|
+
end
|
669
|
+
end
|
670
|
+
|
671
|
+
if match_index and match_index.key_field == other.key_field
|
672
|
+
other_index = nil
|
673
|
+
else
|
674
|
+
other_index = (match === String and other.fields_include(match)) ?
|
675
|
+
other.index(:other => match, :order => true) : other.index(:order => true)
|
676
|
+
end
|
677
|
+
|
678
|
+
each do |key,values|
|
679
|
+
Log.debug "Key: #{ key }. Values: #{values * ", "}"
|
680
|
+
if matching_code_position.nil? or matching_code_position == -1
|
681
|
+
matching_codes = [key]
|
682
|
+
else
|
683
|
+
matching_codes = values[matching_code_position]
|
684
|
+
matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
|
685
|
+
end
|
686
|
+
Log.debug "Matching codes: #{matching_codes}"
|
687
|
+
|
688
|
+
next if matching_codes.nil?
|
689
|
+
|
690
|
+
matching_codes.each do |matching_code|
|
691
|
+
if match_index
|
692
|
+
if match_index[matching_code]
|
693
|
+
matching_code_fix = match_index[matching_code].first
|
694
|
+
else
|
695
|
+
matching_code_fix = nil
|
696
|
+
end
|
697
|
+
else
|
698
|
+
matching_code_fix = matching_code
|
699
|
+
end
|
700
|
+
|
701
|
+
Log.debug "Matching code (fix): #{matching_code_fix}"
|
702
|
+
next if matching_code_fix.nil?
|
703
|
+
|
704
|
+
if other_index
|
705
|
+
Log.debug "Using other_index"
|
706
|
+
other_codes = other_index[matching_code_fix]
|
707
|
+
else
|
708
|
+
other_codes = matching_code_fix
|
709
|
+
end
|
710
|
+
Log.debug "Other codes: #{other_codes}"
|
711
|
+
|
712
|
+
next if other_codes.nil? or other_codes.empty?
|
713
|
+
other_code = other_codes.first
|
714
|
+
|
715
|
+
if nofieldinfo
|
716
|
+
next if other[other_code].nil?
|
717
|
+
if list
|
718
|
+
other_values = [[other_code]] + other[other_code]
|
719
|
+
else
|
720
|
+
other_values = [other_code] + other[other_code]
|
721
|
+
end
|
722
|
+
other_values.delete_if do |list|
|
723
|
+
list = [list] unless Array === list
|
724
|
+
list.collect{|e| case_insensitive ? e.downcase : e }.
|
725
|
+
select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
|
726
|
+
end
|
727
|
+
|
728
|
+
new_values = values + other_values
|
729
|
+
else
|
730
|
+
if other[other_code].nil?
|
731
|
+
if list
|
732
|
+
other_values = [[]] * other.fields.length
|
733
|
+
else
|
734
|
+
other_values = [] * other.fields.length
|
735
|
+
end
|
736
|
+
else
|
737
|
+
if list
|
738
|
+
other_values = other[other_code] + [[other_code]]
|
739
|
+
else
|
740
|
+
other_values = other[other_code] + [other_code]
|
741
|
+
end
|
742
|
+
end
|
743
|
+
|
744
|
+
|
745
|
+
new_values = values.dup
|
746
|
+
|
747
|
+
if list
|
748
|
+
this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
|
749
|
+
new_values_tops = new_values[tpos]
|
750
|
+
|
751
|
+
if other.list
|
752
|
+
new_values_tops += other_values[opos]
|
753
|
+
else
|
754
|
+
new_values_tops += [other_values[opos]]
|
755
|
+
end
|
756
|
+
|
757
|
+
new_values[tpos] = new_values_tops.uniq
|
758
|
+
end
|
759
|
+
end
|
760
|
+
|
761
|
+
new_values.concat other_values.values_at *other_new_field_positions
|
762
|
+
end
|
763
|
+
|
764
|
+
self[key] = new_values
|
765
|
+
end
|
766
|
+
end
|
767
|
+
|
768
|
+
self.fields = self.fields + new_fields unless nofieldinfo
|
769
|
+
end
|
770
|
+
|
771
|
+
#{{{ Helpers
|
772
|
+
|
773
|
+
def self.index(file, options = {})
|
774
|
+
opt_data = options.dup
|
775
|
+
opt_index = options.dup
|
776
|
+
opt_data.delete :field
|
777
|
+
opt_data.delete :persistence
|
778
|
+
opt_index.delete :persistence
|
779
|
+
|
780
|
+
opt_data[:persistence] = true if options[:data_persistence]
|
781
|
+
|
782
|
+
opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
|
783
|
+
|
784
|
+
if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
|
785
|
+
TSV.log "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
|
786
|
+
TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
|
787
|
+
else
|
788
|
+
TSV.log "Creating index for #{ file }: #{opt_index[:persistence_file]}"
|
789
|
+
data = TSV.new(file, opt_data)
|
790
|
+
data.index(opt_index)
|
791
|
+
end
|
792
|
+
end
|
793
|
+
|
794
|
+
def self.open_file(file)
|
795
|
+
if file =~ /(.*?)#(.*)/
|
796
|
+
file, options = $1, Misc.string2hash($2.to_s)
|
797
|
+
else
|
798
|
+
options = {}
|
799
|
+
end
|
800
|
+
|
801
|
+
TSV.new(file, options)
|
802
|
+
end
|
803
|
+
|
804
|
+
end
|