rbbt-util 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,804 @@
1
+ require 'rbbt/util/misc'
2
+ require 'rbbt/util/open'
3
+ require 'rbbt/util/tc_hash'
4
+ require 'rbbt/util/tmpfile'
5
+ require 'digest'
6
+ require 'fileutils'
7
+
8
+ def add_defaults(options, defaults = {})
9
+ new_options = options.dup
10
+ defaults.each do |key, value|
11
+ new_options[key] = value if new_options[key].nil?
12
+ end
13
+ new_options
14
+ end
15
+
16
+ class TSV
17
+ class FieldNotFoundError < StandardError;end
18
+
19
+ #{{{ Persistence
20
+
21
+ PersistenceHash = TCHash
22
+
23
+ CACHEDIR="/tmp/tsv_persistent_cache"
24
+ FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
25
+
26
+ def self.cachedir=(cachedir)
27
+ CACHEDIR.replace cachedir
28
+ FileUtils.mkdir_p CACHEDIR unless File.exist? CACHEDIR
29
+ end
30
+
31
+ def self.cachedir
32
+ CACHEDIR
33
+ end
34
+
35
+ def self.get_persistence_file(file, prefix, options = {})
36
+ File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
37
+ end
38
+
39
+ @debug = ENV['TSV_DEBUG'] == "true"
40
+ def self.log(message)
41
+ STDERR.puts message if @debug == true
42
+ end
43
+
44
+ def self.debug=(value)
45
+ @debug = value
46
+ end
47
+
48
+ def self.headers(file, options = {})
49
+ if file =~ /(.*)#(.*)/ and File.exists? $1
50
+ options.merge! Misc.string2hash $2
51
+ file = $1
52
+ end
53
+
54
+ options = Misc.add_defaults options, :sep => "\t", :header_hash => "#"
55
+ io = Open.open(file)
56
+ line = io.gets
57
+ io.close
58
+
59
+ if line =~ /^#{options[:header_hash]}/
60
+ line.chomp.sub(/^#{options[:header_hash]}/,'').split(options[:sep])
61
+ else
62
+ nil
63
+ end
64
+ end
65
+
66
+ #{{{ Accesor Methods
67
+
68
+ def keys
69
+ @data.keys
70
+ end
71
+
72
+ def values
73
+ @data.values
74
+ end
75
+
76
+ def size
77
+ @data.size
78
+ end
79
+
80
+ # Write
81
+
82
+ def []=(key, value)
83
+ key = key.downcase if @case_insensitive
84
+ @data[key] = value
85
+ end
86
+
87
+
88
+ def merge!(new_data)
89
+ new_data.each do |key, value|
90
+ self[key] = value
91
+ end
92
+ end
93
+
94
+ # Read
95
+
96
+ def follow(value)
97
+ if String === value && value =~ /__Ref:(.*)/
98
+ return self[$1]
99
+ else
100
+ value = NamedArray.name value, fields if Array === value and fields
101
+ value
102
+ end
103
+ end
104
+ def [](key)
105
+ if Array === key
106
+ return @data[key] if @data[key] != nil
107
+ key.each{|k| v = self[k]; return v unless v.nil?}
108
+ return nil
109
+ end
110
+
111
+ key = key.downcase if @case_insensitive
112
+ follow @data[key]
113
+ end
114
+
115
+ def values_at(*keys)
116
+ keys.collect{|k|
117
+ self[k]
118
+ }
119
+ end
120
+
121
+ def each(&block)
122
+ @data.each do |key, value|
123
+ block.call(key, follow(value))
124
+ end
125
+ end
126
+
127
+ def collect
128
+ if block_given?
129
+ @data.collect do |key, value|
130
+ value = follow(value)
131
+ key, values = yield key, value
132
+ end
133
+ else
134
+ @data.collect do |key, value|
135
+ [key, follow(value)]
136
+ end
137
+ end
138
+ end
139
+
140
+ def sort(&block)
141
+ collect.sort(&block).collect{|p|
142
+ key, value = p
143
+ value = NamedArray.name value, fields if fields
144
+ [key, value]
145
+ }
146
+ end
147
+
148
+ def sort_by(&block)
149
+ collect.sort_by &block
150
+ end
151
+
152
+ #{{{ Parsing
153
+
154
+ def self.parse_fields(io, delimiter = "\t")
155
+ return [] if io.nil?
156
+ fields = io.split(delimiter, -1)
157
+ fields
158
+ end
159
+
160
+ def self.zip_fields(list, fields = nil)
161
+ return [] if list.nil? || list.empty?
162
+ fields ||= list.fields if list.respond_to? :fields
163
+ zipped = list[0].zip(*list[1..-1])
164
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
165
+ zipped
166
+ end
167
+
168
+ def self.parse(data, file, options = {})
169
+
170
+ # Prepare options
171
+ options = add_defaults options,
172
+ :sep => "\t",
173
+ :sep2 => "|",
174
+ :native => 0,
175
+ :extra => nil,
176
+ :fix => nil,
177
+ :exclude => nil,
178
+ :select => nil,
179
+ :grep => nil,
180
+ :single => false,
181
+ :unique => false,
182
+ :flatten => false,
183
+ :overwrite => false,
184
+ :keep_empty => true,
185
+ :case_insensitive => false,
186
+ :header_hash => '#' ,
187
+ :persistence_file => nil
188
+
189
+ options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
190
+ options[:flatten] = true if options[:single]
191
+
192
+
193
+
194
+ #{{{ Process first line
195
+
196
+ line = file.gets
197
+ raise "Empty content" if line.nil?
198
+ line.chomp!
199
+
200
+ if line =~ /^#{options[:header_hash]}/
201
+ header_fields = parse_fields(line, options[:sep])
202
+ header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
203
+ line = file.gets
204
+ else
205
+ header_fields = nil
206
+ end
207
+
208
+ id_pos = Misc.field_position(header_fields, options[:native])
209
+
210
+ if options[:extra].nil?
211
+ extra_pos = nil
212
+ max_cols = 0
213
+ else
214
+ extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
215
+ end
216
+
217
+ #{{{ Process rest
218
+ while line do
219
+ line.chomp!
220
+
221
+ line = options[:fix].call line if options[:fix]
222
+
223
+ # Select and fix lines
224
+ if (options[:exclude] and options[:exclude].call(line)) or
225
+ (options[:select] and not options[:select].call(line))
226
+ line = file.gets
227
+ next
228
+ end
229
+
230
+ ### Process line
231
+
232
+ # Chunk fields
233
+ parts = parse_fields(line, options[:sep])
234
+
235
+ # Get next line
236
+ line = file.gets
237
+
238
+ # Get id field
239
+ next if parts[id_pos].nil? || parts[id_pos].empty?
240
+ ids = parse_fields(parts[id_pos], options[:sep2])
241
+ ids.collect!{|id| id.downcase } if options[:case_insensitive]
242
+
243
+ # Get extra fields
244
+
245
+ if options[:extra].nil? and not (options[:flatten] or options[:single])
246
+ extra = parts
247
+ extra.delete_at(id_pos)
248
+ max_cols = extra.size if extra.size > (max_cols || 0)
249
+ else
250
+ if extra_pos.nil?
251
+ extra = parts
252
+ extra.delete_at id_pos
253
+ else
254
+ extra = parts.values_at(*extra_pos)
255
+ end
256
+ end
257
+
258
+ extra.collect!{|value| parse_fields(value, options[:sep2])}
259
+ extra.collect!{|values| values.first} if options[:unique]
260
+ extra.flatten! if options[:flatten]
261
+ extra = extra.first if options[:single]
262
+
263
+ if options[:overwrite]
264
+ main_entry = ids.shift
265
+ ids.each do |id|
266
+ data[id] = "__Ref:#{main_entry}"
267
+ end
268
+
269
+ data[main_entry] = extra
270
+ else
271
+ main_entry = ids.shift
272
+ ids.each do |id|
273
+ data[id] = "__Ref:#{main_entry}"
274
+ end
275
+
276
+ case
277
+ when (options[:single] or options[:unique])
278
+ data[main_entry] ||= extra
279
+ when options[:flatten]
280
+ if PersistenceHash === data
281
+ data[main_entry] = (data[main_entry] || []).concat extra
282
+ else
283
+ data[main_entry] ||= []
284
+ data[main_entry].concat extra
285
+ end
286
+ else
287
+ entry = data[main_entry] || []
288
+ while entry =~ /__Ref:(.*)/ do
289
+ entry = data[$1]
290
+ end
291
+
292
+ extra.each_with_index do |fields, i|
293
+ if fields.empty?
294
+ next unless options[:keep_empty]
295
+ fields = [""]
296
+ end
297
+ entry[i] ||= []
298
+ entry[i] = entry[i].concat fields
299
+ end
300
+
301
+ data[main_entry] = entry
302
+ end
303
+ end
304
+ end
305
+
306
+ if options[:keep_empty] and not max_cols.nil?
307
+ data.each do |key,values|
308
+ new_values = values
309
+ max_cols.times do |i|
310
+ new_values[i] ||= [""]
311
+ end
312
+ data[key] = new_values
313
+ end
314
+ end
315
+
316
+
317
+ # Save header information
318
+ key_field = nil
319
+ fields = nil
320
+ if header_fields && header_fields.any?
321
+ key_field = header_fields[id_pos]
322
+ if extra_pos.nil?
323
+ fields = header_fields
324
+ fields.delete_at(id_pos)
325
+ else
326
+ fields = header_fields.values_at(*extra_pos)
327
+ end
328
+ end
329
+
330
+ data.read if PersistenceHash === data
331
+
332
+ [key_field, fields]
333
+ end
334
+
335
+ attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
336
+ def initialize(file = {}, options = {})
337
+ @case_insensitive = options[:case_insensitive] == true
338
+ @list = ! (options[:flatten] == true || options[:single] == true || options[:unique] == true)
339
+
340
+ case
341
+ when TSV === file
342
+ @filename = file.filename
343
+ @data = file.data
344
+ @key_field = file.key_field
345
+ @fields = file.fields
346
+ @case_insensitive = file.case_insensitive
347
+ @list = file.is_list
348
+ return self
349
+ when (Hash === file or PersistenceHash === file)
350
+ @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
351
+ @data = file
352
+ return self
353
+ when File === file
354
+ @filename = File.expand_path file.path
355
+ when String === file && File.exists?(file)
356
+ @filename = File.expand_path file
357
+ file = Open.open(file)
358
+ when StringIO
359
+ else
360
+ raise "File #{file} not found"
361
+ end
362
+
363
+ if options[:persistence]
364
+ options.delete :persistence
365
+ persistence_file = TSV.get_persistence_file @filename, "file:#{ @filename }:", options
366
+
367
+ if File.exists? persistence_file
368
+ TSV.log "Loading Persistence for #{ @filename } in #{persistence_file}"
369
+ @data = PersistenceHash.get(persistence_file, false)
370
+ @key_field = @data.key_field
371
+ @fields = @data.fields
372
+ else
373
+ @data = PersistenceHash.get(persistence_file, true)
374
+ file = Open.grep(file, options[:grep]) if options[:grep]
375
+
376
+ TSV.log "Persistent Parsing for #{ @filename } in #{persistence_file}"
377
+ @key_field, @fields = TSV.parse(@data, file, options.merge(:persistence_file => persistence_file))
378
+ @data.key_field = @key_field
379
+ @data.fields = @fields
380
+ @data.read
381
+ end
382
+ else
383
+ TSV.log "Non-persistent parsing for #{ @filename }"
384
+ @data = {}
385
+ file = Open.grep(file, options[:grep]) if options[:grep]
386
+ @key_field, @fields = TSV.parse(@data, file, options)
387
+ end
388
+
389
+ file.close
390
+ @case_insensitive = options[:case_insensitive] == true
391
+ end
392
+
393
+
394
+ def to_s
395
+ str = ""
396
+
397
+ if fields
398
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
399
+ end
400
+
401
+ each do |key, values|
402
+ case
403
+ when values.nil?
404
+ str << key.dup << "\n"
405
+ when (not Array === values)
406
+ str << key.dup << "\t" << values.to_s << "\n"
407
+ when Array === values.first
408
+ str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
409
+ else
410
+ str << key.dup << "\t" << values * "\t" << "\n"
411
+ end
412
+ end
413
+
414
+ str
415
+ end
416
+
417
+ #{{{ New
418
+
419
+ def self.fields_include(key_field, fields, field)
420
+ return true if field == key_field or fields.include? field
421
+ return false
422
+ end
423
+
424
+ def self.field_positions(key_field, fields, *selected)
425
+ selected.collect do |sel|
426
+ case
427
+ when (sel.nil? or sel == :main or sel == key_field)
428
+ -1
429
+ when Integer === sel
430
+ sel
431
+ else
432
+ Misc.field_position fields, sel
433
+ end
434
+ end
435
+ end
436
+
437
+ def fields_include(field)
438
+ return TSV.fields_include key_field, fields, field
439
+ end
440
+
441
+ def field_positions(*selected)
442
+ return nil if selected.nil? or selected == [nil]
443
+ TSV.field_positions(key_field, fields, *selected)
444
+ end
445
+
446
+ def fields_at(*positions)
447
+ return nil if fields.nil?
448
+ return nil if positions.nil? or positions == [nil]
449
+ (fields + [key_field]).values_at(*positions)
450
+ end
451
+
452
+ def through(new_key_field = nil, new_fields = nil, &block)
453
+ new_key_position = (field_positions(new_key_field) || [-1]).first
454
+
455
+ if new_key_position == -1
456
+
457
+ if new_fields.nil? or new_fields == fields
458
+ each &block
459
+ return [key_field, fields]
460
+ else
461
+ new_field_positions = field_positions(*new_fields)
462
+ each do |key, values|
463
+ yield key, values.values_at(*new_field_positions)
464
+ end
465
+ return [key_field, fields_at(*new_field_positions)]
466
+ end
467
+
468
+ else
469
+ new_field_positions = field_positions(*new_fields)
470
+
471
+ new_field_names = fields_at(*new_field_positions)
472
+ if new_field_names.nil? and fields
473
+ new_field_names = fields.dup
474
+ new_field_names.delete_at new_key_position
475
+ new_field_names.unshift key_field
476
+ end
477
+
478
+ each do |key, values|
479
+ if list
480
+ tmp_values = values + [[key]]
481
+ else
482
+ tmp_values = values + [key]
483
+ end
484
+
485
+ if new_field_positions.nil?
486
+ new_values = values.dup
487
+ new_values.delete_at new_key_position
488
+ new_values.unshift [key]
489
+ else
490
+ new_values = tmp_values.values_at(*new_field_positions)
491
+ end
492
+
493
+ tmp_values[new_key_position].each do |new_key|
494
+ if new_field_names
495
+ yield new_key, NamedArray.name(new_values, new_field_names)
496
+ else
497
+ yield new_key, new_values
498
+ end
499
+ end
500
+ end
501
+ return [(fields_at(new_key_position) || [nil]).first, new_field_names]
502
+ end
503
+ end
504
+
505
+ def process(field)
506
+ through do |key, values|
507
+ values[field].replace yield(values[field], key, values) unless values[field].nil?
508
+ end
509
+ end
510
+
511
+
512
+ def reorder(new_key_field, new_fields = nil, options = {})
513
+ options = Misc.add_defaults options
514
+ return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
515
+
516
+ new = {}
517
+ new_key_field, new_fields = through new_key_field, new_fields do |key, values|
518
+ if new[key].nil?
519
+ new[key] = values
520
+ else
521
+ new[key] = new[key].zip(values)
522
+ end
523
+ end
524
+
525
+ new.each do |key,values|
526
+ values.each{|list| list.flatten! if Array === list}
527
+ end
528
+
529
+ if options[:persistence_file]
530
+ reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
531
+ reordered.merge! new
532
+ else
533
+ reordered = TSV.new(new, :case_insensitive => case_insensitive)
534
+ end
535
+
536
+ reordered.key_field = new_key_field
537
+ reordered.fields = new_fields
538
+
539
+ reordered
540
+ end
541
+
542
+ def slice(new_fields, options = {})
543
+ reorder(:main, new_fields)
544
+ end
545
+
546
+ def index(options = {})
547
+ options = Misc.add_defaults options, :order => false
548
+
549
+ if options[:persistence] and ! options[:persistence_file]
550
+ options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
551
+ end
552
+
553
+ if options[:persistence_file] and File.exists?(options[:persistence_file])
554
+ return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
555
+ end
556
+
557
+ new = {}
558
+ if options[:order]
559
+ new_key_field, new_fields = through options[:field], options[:others] do |key, values|
560
+
561
+ values.each_with_index do |list, i|
562
+ next if list.nil? or list.empty?
563
+
564
+ list = [list] unless Array === list
565
+
566
+ list.each do |value|
567
+ next if value.nil? or value.empty?
568
+ value = value.downcase if options[:case_insensitive]
569
+ new[value] ||= []
570
+ new[value][i + 1] ||= []
571
+ new[value][i + 1] << key
572
+ end
573
+ new[key] ||= []
574
+ new[key][0] = key
575
+ end
576
+
577
+ end
578
+
579
+ new.each do |key, values|
580
+ values.flatten!
581
+ values.compact!
582
+ end
583
+
584
+ else
585
+ new_key_field, new_fields = through options[:field], options[:others] do |key, values|
586
+ new[key] ||= []
587
+ new[key] << key
588
+ values.each do |list|
589
+ next if list.nil?
590
+ if Array === list
591
+ list.each do |value|
592
+ value = value.downcase if options[:case_insensitive]
593
+ new[value] ||= []
594
+ new[value] << key
595
+ end
596
+ else
597
+ next if list.empty?
598
+ value = list
599
+ value = value.downcase if options[:case_insensitive]
600
+ new[value] ||= []
601
+ new[value] << key
602
+ end
603
+ end
604
+ end
605
+ end
606
+
607
+ if options[:persistence_file]
608
+ index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
609
+ index.merge! new
610
+ else
611
+ index = TSV.new(new, :case_insensitive => options[:case_insensitive])
612
+ end
613
+
614
+ index.key_field = new_key_field
615
+ index.fields = new_fields
616
+ index
617
+ end
618
+
619
+ def smart_merge(other, match = nil, new_fields = nil)
620
+
621
+ new_fields = [new_fields] if String === new_fields
622
+ if self.fields and other.fields
623
+ common_fields = ([self.key_field] + self.fields) & ([other.key_field] + other.fields)
624
+ new_fields ||= ([other.key_field] + other.fields) - ([self.key_field] + self.fields)
625
+
626
+ common_fields.delete match if String === match
627
+ common_fields.delete_at match if Integer === match
628
+
629
+ this_common_field_positions = self.field_positions *common_fields
630
+ other_common_field_positions = other.field_positions *common_fields
631
+ other_new_field_positions = other.field_positions *new_fields
632
+ else
633
+ nofieldinfo = true
634
+ end
635
+
636
+ case
637
+ when TSV === match
638
+ match_index = match
639
+ matching_code_position = nil
640
+
641
+ when Array === match
642
+ match_index = match.first
643
+ matching_code_position = field_positions(match.last).first
644
+
645
+ when match =~ /^through:(.*)/
646
+ through = $1
647
+ if through =~ /(.*)#using:(.*)/
648
+ through = $1
649
+ matching_code_position = field_positions($2).first
650
+ else
651
+ matching_code_position = nil
652
+ end
653
+ index_fields = TSV.headers(through)
654
+ target_field = index_fields.select{|field| other.fields_include field}.first
655
+ Log.debug "Target Field: #{ target_field }"
656
+ match_index = TSV.open_file(through).index(:field => target_field)
657
+
658
+ when field_positions(match).first
659
+ matching_code_position = field_positions(match).first
660
+ match_index = nil
661
+ end
662
+
663
+ if matching_code_position.nil? and match_index.fields
664
+ match_index.fields.each do |field|
665
+ if matching_code_position = field_positions(field).first
666
+ break
667
+ end
668
+ end
669
+ end
670
+
671
+ if match_index and match_index.key_field == other.key_field
672
+ other_index = nil
673
+ else
674
+ other_index = (match === String and other.fields_include(match)) ?
675
+ other.index(:other => match, :order => true) : other.index(:order => true)
676
+ end
677
+
678
+ each do |key,values|
679
+ Log.debug "Key: #{ key }. Values: #{values * ", "}"
680
+ if matching_code_position.nil? or matching_code_position == -1
681
+ matching_codes = [key]
682
+ else
683
+ matching_codes = values[matching_code_position]
684
+ matching_codes = [matching_codes] unless matching_codes.nil? or Array === matching_codes
685
+ end
686
+ Log.debug "Matching codes: #{matching_codes}"
687
+
688
+ next if matching_codes.nil?
689
+
690
+ matching_codes.each do |matching_code|
691
+ if match_index
692
+ if match_index[matching_code]
693
+ matching_code_fix = match_index[matching_code].first
694
+ else
695
+ matching_code_fix = nil
696
+ end
697
+ else
698
+ matching_code_fix = matching_code
699
+ end
700
+
701
+ Log.debug "Matching code (fix): #{matching_code_fix}"
702
+ next if matching_code_fix.nil?
703
+
704
+ if other_index
705
+ Log.debug "Using other_index"
706
+ other_codes = other_index[matching_code_fix]
707
+ else
708
+ other_codes = matching_code_fix
709
+ end
710
+ Log.debug "Other codes: #{other_codes}"
711
+
712
+ next if other_codes.nil? or other_codes.empty?
713
+ other_code = other_codes.first
714
+
715
+ if nofieldinfo
716
+ next if other[other_code].nil?
717
+ if list
718
+ other_values = [[other_code]] + other[other_code]
719
+ else
720
+ other_values = [other_code] + other[other_code]
721
+ end
722
+ other_values.delete_if do |list|
723
+ list = [list] unless Array === list
724
+ list.collect{|e| case_insensitive ? e.downcase : e }.
725
+ select{|e| case_insensitive ? e == matching_code.downcase : e == matching_code }.any?
726
+ end
727
+
728
+ new_values = values + other_values
729
+ else
730
+ if other[other_code].nil?
731
+ if list
732
+ other_values = [[]] * other.fields.length
733
+ else
734
+ other_values = [] * other.fields.length
735
+ end
736
+ else
737
+ if list
738
+ other_values = other[other_code] + [[other_code]]
739
+ else
740
+ other_values = other[other_code] + [other_code]
741
+ end
742
+ end
743
+
744
+
745
+ new_values = values.dup
746
+
747
+ if list
748
+ this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
749
+ new_values_tops = new_values[tpos]
750
+
751
+ if other.list
752
+ new_values_tops += other_values[opos]
753
+ else
754
+ new_values_tops += [other_values[opos]]
755
+ end
756
+
757
+ new_values[tpos] = new_values_tops.uniq
758
+ end
759
+ end
760
+
761
+ new_values.concat other_values.values_at *other_new_field_positions
762
+ end
763
+
764
+ self[key] = new_values
765
+ end
766
+ end
767
+
768
+ self.fields = self.fields + new_fields unless nofieldinfo
769
+ end
770
+
771
+ #{{{ Helpers
772
+
773
+ def self.index(file, options = {})
774
+ opt_data = options.dup
775
+ opt_index = options.dup
776
+ opt_data.delete :field
777
+ opt_data.delete :persistence
778
+ opt_index.delete :persistence
779
+
780
+ opt_data[:persistence] = true if options[:data_persistence]
781
+
782
+ opt_index.merge! :persistence_file => get_persistence_file(file, "index:#{ file }_#{options[:field]}:", opt_index) if options[:persistence]
783
+
784
+ if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
785
+ TSV.log "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
786
+ TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
787
+ else
788
+ TSV.log "Creating index for #{ file }: #{opt_index[:persistence_file]}"
789
+ data = TSV.new(file, opt_data)
790
+ data.index(opt_index)
791
+ end
792
+ end
793
+
794
+ def self.open_file(file)
795
+ if file =~ /(.*?)#(.*)/
796
+ file, options = $1, Misc.string2hash($2.to_s)
797
+ else
798
+ options = {}
799
+ end
800
+
801
+ TSV.new(file, options)
802
+ end
803
+
804
+ end