rbbt-util 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,804 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/util/open'
3
+ require 'rbbt/util/tc_hash'
4
+ require 'rbbt/util/tmpfile'
5
+ require 'digest'
6
+ require 'fileutils'
7
+
8
+ def add_defaults(options, defaults = {})
9
+ new_options = options.dup
10
+ defaults.each do |key, value|
11
+ new_options[key] = value if new_options[key].nil?
12
+ end
13
+ new_options
14
+ end
15
+
16
+ class TSV
17
+ class FieldNotFoundError < StandardError;end
18
+
19
+ #{{{ Persistence
20
+
21
+ PersistenceHash = TCHash
22
+
23
+ CACHEDIR="/tmp/tsv_persistent_cache"
24
+ FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
25
+
26
+ def self.cachedir=(cachedir)
27
+ CACHEDIR.replace cachedir
28
+ FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
29
+ end
30
+
31
+ def self.cachedir
32
+ CACHEDIR
33
+ end
34
+
35
+ def self.get_persistence_file(file, prefix, options = {})
36
+ File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
37
+ end
38
+
39
+ @debug = ENV['TSV_DEBUG'] == "true"
40
+ def self.log(message)
41
+ STDERR.puts message if @debug == true
42
+ end
43
+
44
+ def self.debug=(value)
45
+ @debug = value
46
+ end
47
+
48
+ def self.headers(file, options = {})
49
+ if file =~ /(.*)#(.*)/ and File.exists? $1
50
+ options.merge! Misc.string2hash $2
51
+ file = $1
52
+ end
53
+
54
+ options = Misc.add_defaults options, :sep => "\t", :header_hash => "#"
55
+ io = Open.open(file)
56
+ line = io.gets
57
+ io.close
58
+
59
+ if line =~ /^#{options[:header_hash]}/
60
+ line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
61
+ else
62
+ nil
63
+ end
64
+ end
65
+
66
+ #{{{ Accesor Methods
67
+
68
+ def keys
69
+ @data.keys
70
+ end
71
+
72
+ def values
73
+ @data.values
74
+ end
75
+
76
+ def size
77
+ @data.size
78
+ end
79
+
80
+ # Write
81
+
82
+ def []=(key, value)
83
+ key = key.downcase if @case_insensitive
84
+ @data[key] = value
85
+ end
86
+
87
+
88
+ def merge!(new_data)
89
+ new_data.each do |key, value|
90
+ self[key] = value
91
+ end
92
+ end
93
+
94
+ # Read
95
+
96
+ def follow(value)
97
+ if String === value && value =~ /__Ref:(.*)/
98
+ return self[$1]
99
+ else
100
+ value = NamedArray.name value, fields if Array === value and fields
101
+ value
102
+ end
103
+ end
104
+ def [](key)
105
+ if Array === key
106
+ return @data[key] if @data[key] != nil
107
+ key.each{|k| v = self[k]; return v unless v.nil?}
108
+ return nil
109
+ end
110
+
111
+ key = key.downcase if @case_insensitive
112
+ follow @data[key]
113
+ end
114
+
115
+ def values_at(*keys)
116
+ keys.collect{|k|
117
+ self[k]
118
+ }
119
+ end
120
+
121
+ def each(&block)
122
+ @data.each do |key, value|
123
+ block.call(key, follow(value))
124
+ end
125
+ end
126
+
127
+ def collect
128
+ if block_given?
129
+ @data.collect do |key, value|
130
+ value = follow(value)
131
+ key, values = yield key, value
132
+ end
133
+ else
134
+ @data.collect do |key, value|
135
+ [key, follow(value)]
136
+ end
137
+ end
138
+ end
139
+
140
+ def sort(&block)
141
+ collect.sort(&block).collect{|p|
142
+ key, value = p
143
+ value = NamedArray.name value, fields if fields
144
+ [key, value]
145
+ }
146
+ end
147
+
148
+ def sort_by(&block)
149
+ collect.sort_by &block
150
+ end
151
+
152
+ #{{{ Parsing
153
+
154
+ def self.parse_fields(io, delimiter = "\t")
155
+ return [] if io.nil?
156
+ fields = io.split(delimiter, -1)
157
+ fields
158
+ end
159
+
160
+ def self.zip_fields(list, fields = nil)
161
+ return [] if list.nil? || list.empty?
162
+ fields ||= list.fields if list.respond_to? :fields
163
+ zipped = list[0].zip(*list[1..-1])
164
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
165
+ zipped
166
+ end
167
+
168
+ def self.parse(data, file, options = {})
169
+
170
+ # Prepare options
171
+ options = add_defaults options,
172
+ :sep => "\t",
173
+ :sep2 => "|",
174
+ :native => 0,
175
+ :extra => nil,
176
+ :fix => nil,
177
+ :exclude => nil,
178
+ :select => nil,
179
+ :grep => nil,
180
+ :single => false,
181
+ :unique => false,
182
+ :flatten => false,
183
+ :overwrite => false,
184
+ :keep_empty => true,
185
+ :case_insensitive => false,
186
+ :header_hash => '#' ,
187
+ :persistence_file => nil
188
+
189
+ options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
190
+ options[:flatten] = true if options[:single]
191
+
192
+
193
+
194
+ #{{{ Process first line
195
+
196
+ line = file.gets
197
+ raise "Empty content" if line.nil?
198
+ line.chomp!
199
+
200
+ if line =~ /^#{options[:header_hash]}/
201
+ header_fields = parse_fields(line, options[:sep])
202
+ header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
203
+ line = file.gets
204
+ else
205
+ header_fields = nil
206
+ end
207
+
208
+ id_pos = Misc.field_position(header_fields, options[:native])
209
+
210
+ if options[:extra].nil?
211
+ extra_pos = nil
212
+ max_cols = 0
213
+ else
214
+ extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
215
+ end
216
+
217
+ #{{{ Process rest
218
+ while line do
219
+ line.chomp!
220
+
221
+ line = options[:fix].call line if options[:fix]
222
+
223
+ # Select and fix lines
224
+ if (options[:exclude] and options[:exclude].call(line)) or
225
+ (options[:select] and not options[:select].call(line))
226
+ line = file.gets
227
+ next
228
+ end
229
+
230
+ ### Process line
231
+
232
+ # Chunk fields
233
+ parts = parse_fields(line, options[:sep])
234
+
235
+ # Get next line
236
+ line = file.gets
237
+
238
+ # Get id field
239
+ next if parts[id_pos].nil? || parts[id_pos].empty?
240
+ ids = parse_fields(parts[id_pos], options[:sep2])
241
+ ids.collect!{|id| id.downcase } if options[:case_insensitive]
242
+
243
+ # Get extra fields
244
+
245
+ if options[:extra].nil? and not (options[:flatten] or options[:single])
246
+ extra = parts
247
+ extra.delete_at(id_pos)
248
+ max_cols = extra.size if extra.size > (max_cols || 0)
249
+ else
250
+ if extra_pos.nil?
251
+ extra = parts
252
+ extra.delete_at id_pos
253
+ else
254
+ extra = parts.values_at(*extra_pos)
255
+ end
256
+ end
257
+
258
+ extra.collect!{|value| parse_fields(value, options[:sep2])}
259
+ extra.collect!{|values| values.first} if options[:unique]
260
+ extra.flatten! if options[:flatten]
261
+ extra = extra.first if options[:single]
262
+
263
+ if options[:overwrite]
264
+ main_entry = ids.shift
265
+ ids.each do |id|
266
+ data[id] = "__Ref:#{main_entry}"
267
+ end
268
+
269
+ data[main_entry] = extra
270
+ else
271
+ main_entry = ids.shift
272
+ ids.each do |id|
273
+ data[id] = "__Ref:#{main_entry}"
274
+ end
275
+
276
+ case
277
+ when (options[:single] or options[:unique])
278
+ data[main_entry] ||= extra
279
+ when options[:flatten]
280
+ if PersistenceHash === data
281
+ data[main_entry] = (data[main_entry] || []).concat extra
282
+ else
283
+ data[main_entry] ||= []
284
+ data[main_entry].concat extra
285
+ end
286
+ else
287
+ entry = data[main_entry] || []
288
+ while entry =~ /__Ref:(.*)/ do
289
+ entry = data[$1]
290
+ end
291
+
292
+ extra.each_with_index do |fields, i|
293
+ if fields.empty?
294
+ next unless options[:keep_empty]
295
+ fields = [""]
296
+ end
297
+ entry[i] ||= []
298
+ entry[i] = entry[i].concat fields
299
+ end
300
+
301
+ data[main_entry] = entry
302
+ end
303
+ end
304
+ end
305
+
306
+ if options[:keep_empty] and not max_cols.nil?
307
+ data.each do |key,values|
308
+ new_values = values
309
+ max_cols.times do |i|
310
+ new_values[i] ||= [""]
311
+ end
312
+ data[key] = new_values
313
+ end
314
+ end
315
+
316
+
317
+ # Save header information
318
+ key_field = nil
319
+ fields = nil
320
+ if header_fields && header_fields.any?
321
+ key_field = header_fields[id_pos]
322
+ if extra_pos.nil?
323
+ fields = header_fields
324
+ fields.delete_at(id_pos)
325
+ else
326
+ fields = header_fields.values_at(*extra_pos)
327
+ end
328
+ end
329
+
330
+ data.read if PersistenceHash === data
331
+
332
+ [key_field, fields]
333
+ end
334
+
335
+ attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
336
+ def initialize(file = {}, options = {})
337
+ @case_insensitive = options[:case_insensitive] == true
338
+ @list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
339
+
340
+ case
341
+ when TSV === file
342
+ @filename = file.filename
343
+ @data = file.data
344
+ @key_field = file.key_field
345
+ @fields = file.fields
346
+ @case_insensitive = file.case_insensitive
347
+ @list = file.is_list
348
+ return self
349
+ when (Hash === file or PersistenceHash === file)
350
+ @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
351
+ @data = file
352
+ return self
353
+ when File === file
354
+ @filename = File.expand_path file.path
355
+ when String === file && File.exists?(file)
356
+ @filename = File.expand_path file
357
+ file = Open.open(file)
358
+ when StringIO
359
+ else
360
+ raise "File #{file} not found"
361
+ end
362
+
363
+ if options[:persistence]
364
+ options.delete :persistence
365
+ persistence_file = TSV.get_persistence_file @filename, "file:#{ @filename }:", options
366
+
367
+ if File.exists? persistence_file
368
+ TSV.log "Loading Persistence for #{ @filename } in #{persistence_file}"
369
+ @data = PersistenceHash.get(persistence_file, false)
370
+ @key_field = @data.key_field
371
+ @fields = @data.fields
372
+ else
373
+ @data = PersistenceHash.get(persistence_file, true)
374
+ file = Open.grep(file, options[:grep]) if options[:grep]
375
+
376
+ TSV.log "Persistent Parsing for #{ @filename } in #{persistence_file}"
377
+ @key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
378
+ @data.key_field = @key_field
379
+ @data.fields = @fields
380
+ @data.read
381
+ end
382
+ else
383
+ TSV.log "Non-persistent parsing for #{ @filename }"
384
+ @data = {}
385
+ file = Open.grep(file, options[:grep]) if options[:grep]
386
+ @key_field, @fields = TSV.parse(@data, file, options)
387
+ end
388
+
389
+ file.close
390
+ @case_insensitive = options[:case_insensitive] == true
391
+ end
392
+
393
+
394
+ def to_s
395
+ str = ""
396
+
397
+ if fields
398
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
399
+ end
400
+
401
+ each do |key, values|
402
+ case
403
+ when values.nil?
404
+ str << key.dup << "\n"
405
+ when (not Array === values)
406
+ str << key.dup << "\t" << values.to_s << "\n"
407
+ when Array === values.first
408
+ str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
409
+ else
410
+ str << key.dup << "\t" << values * "\t" << "\n"
411
+ end
412
+ end
413
+
414
+ str
415
+ end
416
+
417
+ #{{{ New
418
+
419
+ def self.fields_include(key_field, fields, field)
420
+ return true if field == key_field or fields.include? field
421
+ return false
422
+ end
423
+
424
+ def self.field_positions(key_field, fields, *selected)
425
+ selected.collect do |sel|
426
+ case
427
+ when (sel.nil? or sel == :main or sel == key_field)
428
+ -1
429
+ when Integer === sel
430
+ sel
431
+ else
432
+ Misc.field_position fields, sel
433
+ end
434
+ end
435
+ end
436
+
437
+ def fields_include(field)
438
+ return TSV.fields_include key_field, fields, field
439
+ end
440
+
441
+ def field_positions(*selected)
442
+ return nil if selected.nil? or selected == [nil]
443
+ TSV.field_positions(key_field, fields, *selected)
444
+ end
445
+
446
+ def fields_at(*positions)
447
+ return nil if fields.nil?
448
+ return nil if positions.nil? or positions == [nil]
449
+ (fields + [key_field]).values_at(*positions)
450
+ end
451
+
452
+ def through(new_key_field = nil, new_fields = nil, &block)
453
+ new_key_position = (field_positions(new_key_field) || [-1]).first
454
+
455
+ if new_key_position == -1
456
+
457
+ if new_fields.nil? or new_fields == fields
458
+ each &block
459
+ return [key_field, fields]
460
+ else
461
+ new_field_positions = field_positions(*new_fields)
462
+ each do |key, values|
463
+ yield key, values.values_at(*new_field_positions)
464
+ end
465
+ return [key_field, fields_at(*new_field_positions)]
466
+ end
467
+
468
+ else
469
+ new_field_positions = field_positions(*new_fields)
470
+
471
+ new_field_names = fields_at(*new_field_positions)
472
+ if new_field_names.nil? and fields
473
+ new_field_names = fields.dup
474
+ new_field_names.delete_at new_key_position
475
+ new_field_names.unshift key_field
476
+ end
477
+
478
+ each do |key, values|
479
+ if list
480
+ tmp_values = values + [[key]]
481
+ else
482
+ tmp_values = values + [key]
483
+ end
484
+
485
+ if new_field_positions.nil?
486
+ new_values = values.dup
487
+ new_values.delete_at new_key_position
488
+ new_values.unshift [key]
489
+ else
490
+ new_values = tmp_values.values_at(*new_field_positions)
491
+ end
492
+
493
+ tmp_values[new_key_position].each do |new_key|
494
+ if new_field_names
495
+ yield new_key, NamedArray.name(new_values, new_field_names)
496
+ else
497
+ yield new_key, new_values
498
+ end
499
+ end
500
+ end
501
+ return [(fields_at(new_key_position) || [nil]).first, new_field_names]
502
+ end
503
+ end
504
+
505
+ def process(field)
506
+ through do |key, values|
507
+ values[field].replace yield(values[field], key, values) unless values[field].nil?
508
+ end
509
+ end
510
+
511
+
512
+ def reorder(new_key_field, new_fields = nil, options = {})
513
+ options = Misc.add_defaults options
514
+ return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
515
+
516
+ new = {}
517
+ new_key_field, new_fields = through new_key_field, new_fields do |key, values|
518
+ if new[key].nil?
519
+ new[key] = values
520
+ else
521
+ new[key] = new[key].zip(values)
522
+ end
523
+ end
524
+
525
+ new.each do |key,values|
526
+ values.each{|list| list.flatten! if Array === list}
527
+ end
528
+
529
+ if options[:persistence_file]
530
+ reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
531
+ reordered.merge! new
532
+ else
533
+ reordered = TSV.new(new, :case_insensitive => case_insensitive)
534
+ end
535
+
536
+ reordered.key_field = new_key_field
537
+ reordered.fields = new_fields
538
+
539
+ reordered
540
+ end
541
+
542
+ def slice(new_fields, options = {})
543
+ reorder(:main, new_fields)
544
+ end
545
+
546
+ def index(options = {})
547
+ options = Misc.add_defaults options, :order => false
548
+
549
+ if options[:persistence] and ! options[:persistence_file]
550
+ options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
551
+ end
552
+
553
+ if options[:persistence_file] and File.exists?(options[:persistence_file])
554
+ return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
555
+ end
556
+
557
+ new = {}
558
+ if options[:order]
559
+ new_key_field, new_fields = through options[:field], options[:others] do |key, values|
560
+
561
+ values.each_with_index do |list, i|
562
+ next if list.nil? or list.empty?
563
+
564
+ list = [list] unless Array === list
565
+
566
+ list.each do |value|
567
+ next if value.nil? or value.empty?
568
+ value = value.downcase if options[:case_insensitive]
569
+ new[value] ||= []
570
+ new[value][i + 1] ||= []
571
+ new[value][i + 1] << key
572
+ end
573
+ new[key] ||= []
574
+ new[key][0] = key
575
+ end
576
+
577
+ end
578
+
579
+ new.each do |key, values|
580
+ values.flatten!
581
+ values.compact!
582
+ end
583
+
584
+ else
585
+ new_key_field, new_fields = through options[:field], options[:others] do |key, values|
586
+ new[key] ||= []
587
+ new[key] << key
588
+ values.each do |list|
589
+ next if list.nil?
590
+ if Array === list
591
+ list.each do |value|
592
+ value = value.downcase if options[:case_insensitive]
593
+ new[value] ||= []
594
+ new[value] << key
595
+ end
596
+ else
597
+ next if list.empty?
598
+ value = list
599
+ value = value.downcase if options[:case_insensitive]
600
+ new[value] ||= []
601
+ new[value] << key
602
+ end
603
+ end
604
+ end
605
+ end
606
+
607
+ if options[:persistence_file]
608
+ index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
609
+ index.merge! new
610
+ else
611
+ index = TSV.new(new, :case_insensitive => options[:case_insensitive])
612
+ end
613
+
614
+ index.key_field = new_key_field
615
+ index.fields = new_fields
616
+ index
617
+ end
618
+
619
+ def smart_merge(other, match = nil, new_fields = nil)
620
+
621
+ new_fields = [new_fields] if String === new_fields
622
+ if self.fields and other.fields
623
+ common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
624
+ new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
625
+
626
+ common_fields.delete match if String === match
627
+ common_fields.delete_at match if Integer === match
628
+
629
+ this_common_field_positions = self.field_positions *common_fields
630
+ other_common_field_positions = other.field_positions *common_fields
631
+ other_new_field_positions = other.field_positions *new_fields
632
+ else
633
+ nofieldinfo = true
634
+ end
635
+
636
+ case
637
+ when TSV === match
638
+ match_index = match
639
+ matching_code_position = nil
640
+
641
+ when Array === match
642
+ match_index = match.first
643
+ matching_code_position = field_positions(match.last).first
644
+
645
+ when match =~ /^through:(.*)/
646
+ through = $1
647
+ if through =~ /(.*)#using:(.*)/
648
+ through = $1
649
+ matching_code_position = field_positions($2).first
650
+ else
651
+ matching_code_position = nil
652
+ end
653
+ index_fields = TSV.headers(through)
654
+ target_field = index_fields.select{|field| other.fields_include field}.first
655
+ Log.debug "Target Field: #{ target_field }"
656
+ match_index = TSV.open_file(through).index(:field => target_field)
657
+
658
+ when field_positions(match).first
659
+ matching_code_position = field_positions(match).first
660
+ match_index = nil
661
+ end
662
+
663
+ if matching_code_position.nil? and match_index.fields
664
+ match_index.fields.each do |field|
665
+ if matching_code_position = field_positions(field).first
666
+ break
667
+ end
668
+ end
669
+ end
670
+
671
+ if match_index and match_index.key_field == other.key_field
672
+ other_index = nil
673
+ else
674
+ other_index = (match === String and other.fields_include(match)) ?
675
+ other.index(:other => match, :order => true) : other.index(:order => true)
676
+ end
677
+
678
+ each do |key,values|
679
+ Log.debug "Key: #{ key }. Values: #{values * ", "}"
680
+ if matching_code_position.nil? or matching_code_position == -1
681
+ matching_codes = [key]
682
+ else
683
+ matching_codes = values[matching_code_position]
684
+ matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
685
+ end
686
+ Log.debug "Matching codes: #{matching_codes}"
687
+
688
+ next if matching_codes.nil?
689
+
690
+ matching_codes.each do |matching_code|
691
+ if match_index
692
+ if match_index[matching_code]
693
+ matching_code_fix = match_index[matching_code].first
694
+ else
695
+ matching_code_fix = nil
696
+ end
697
+ else
698
+ matching_code_fix = matching_code
699
+ end
700
+
701
+ Log.debug "Matching code (fix): #{matching_code_fix}"
702
+ next if matching_code_fix.nil?
703
+
704
+ if other_index
705
+ Log.debug "Using other_index"
706
+ other_codes = other_index[matching_code_fix]
707
+ else
708
+ other_codes = matching_code_fix
709
+ end
710
+ Log.debug "Other codes: #{other_codes}"
711
+
712
+ next if other_codes.nil? or other_codes.empty?
713
+ other_code = other_codes.first
714
+
715
+ if nofieldinfo
716
+ next if other[other_code].nil?
717
+ if list
718
+ other_values = [[other_code]] + other[other_code]
719
+ else
720
+ other_values = [other_code] + other[other_code]
721
+ end
722
+ other_values.delete_if do |list|
723
+ list = [list] unless Array === list
724
+ list.collect{|e| case_insensitive ? e.downcase : e }.
725
+ select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
726
+ end
727
+
728
+ new_values = values + other_values
729
+ else
730
+ if other[other_code].nil?
731
+ if list
732
+ other_values = [[]] * other.fields.length
733
+ else
734
+ other_values = [] * other.fields.length
735
+ end
736
+ else
737
+ if list
738
+ other_values = other[other_code] + [[other_code]]
739
+ else
740
+ other_values = other[other_code] + [other_code]
741
+ end
742
+ end
743
+
744
+
745
+ new_values = values.dup
746
+
747
+ if list
748
+ this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
749
+ new_values_tops = new_values[tpos]
750
+
751
+ if other.list
752
+ new_values_tops += other_values[opos]
753
+ else
754
+ new_values_tops += [other_values[opos]]
755
+ end
756
+
757
+ new_values[tpos] = new_values_tops.uniq
758
+ end
759
+ end
760
+
761
+ new_values.concat other_values.values_at *other_new_field_positions
762
+ end
763
+
764
+ self[key] = new_values
765
+ end
766
+ end
767
+
768
+ self.fields = self.fields + new_fields unless nofieldinfo
769
+ end
770
+
771
+ #{{{ Helpers
772
+
773
+ def self.index(file, options = {})
774
+ opt_data = options.dup
775
+ opt_index = options.dup
776
+ opt_data.delete :field
777
+ opt_data.delete :persistence
778
+ opt_index.delete :persistence
779
+
780
+ opt_data[:persistence] = true if options[:data_persistence]
781
+
782
+ opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
783
+
784
+ if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
785
+ TSV.log "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
786
+ TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
787
+ else
788
+ TSV.log "Creating index for #{ file }: #{opt_index[:persistence_file]}"
789
+ data = TSV.new(file, opt_data)
790
+ data.index(opt_index)
791
+ end
792
+ end
793
+
794
+ def self.open_file(file)
795
+ if file =~ /(.*?)#(.*)/
796
+ file, options = $1, Misc.string2hash($2.to_s)
797
+ else
798
+ options = {}
799
+ end
800
+
801
+ TSV.new(file, options)
802
+ end
803
+
804
+ end