rbbt-util 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,13 @@ class TCHash < TokyoCabinet::HDB
6
6
 
7
7
  Serializer = Marshal
8
8
 
9
- FIELD_INFO_ENTRIES = {:fields => '__tokyocabinet_hash_fields', :key_field => '__tokyocabinet_hash_native_field'}
9
+ FIELD_INFO_ENTRIES = {
10
+ :fields => '__tokyocabinet_hash_fields',
11
+ :key_field => '__tokyocabinet_hash_key_field',
12
+ :filename => '__tokyocabinet_hash_filename',
13
+ :type => '__tokyocabinet_hash_type',
14
+ :case_insensitive => '__tokyocabinet_hash_case_insensitive'
15
+ }
10
16
  CONNECTIONS = {}
11
17
 
12
18
  FIELD_INFO_ENTRIES.each do |entry, key|
data/lib/rbbt/util/tsv.rb CHANGED
@@ -3,17 +3,10 @@ require 'rbbt/util/open'
3
3
  require 'rbbt/util/tc_hash'
4
4
  require 'rbbt/util/tmpfile'
5
5
  require 'rbbt/util/log'
6
+ require 'rbbt/util/persistence'
6
7
  require 'digest'
7
8
  require 'fileutils'
8
9
 
9
- def add_defaults(options, defaults = {})
10
- new_options = options.dup
11
- defaults.each do |key, value|
12
- new_options[key] = value if new_options[key].nil?
13
- end
14
- new_options
15
- end
16
-
17
10
  class TSV
18
11
  class FieldNotFoundError < StandardError;end
19
12
 
@@ -26,8 +19,6 @@ class TSV
26
19
 
27
20
  #{{{ Persistence
28
21
 
29
- PersistenceHash = TCHash
30
-
31
22
  CACHEDIR="/tmp/tsv_persistent_cache"
32
23
  FileUtils.mkdir CACHEDIR unless File.exist? CACHEDIR
33
24
 
@@ -40,10 +31,7 @@ class TSV
40
31
  CACHEDIR
41
32
  end
42
33
 
43
- def self.get_persistence_file(file, prefix, options = {})
44
- File.join(CACHEDIR, prefix.gsub(/\s/,'_').gsub(/\//,'>') + Digest::MD5.hexdigest([file, options].inspect))
45
- end
46
-
34
+
47
35
  #{{{ Headers and Field Stuff
48
36
 
49
37
  def self.headers(file, options = {})
@@ -130,7 +118,7 @@ class TSV
130
118
  end
131
119
 
132
120
  each do |key, values|
133
- if list
121
+ if type == :double
134
122
  tmp_values = values + [[key]]
135
123
  else
136
124
  tmp_values = values + [key]
@@ -144,11 +132,15 @@ class TSV
144
132
  new_values = tmp_values.values_at(*new_field_positions)
145
133
  end
146
134
 
147
- tmp_values[new_key_position].each do |new_key|
148
- if new_field_names
149
- yield new_key, NamedArray.name(new_values, new_field_names)
150
- else
151
- yield new_key, new_values
135
+ if not Array === tmp_values[new_key_position]
136
+ yield tmp_values[new_key_position], NamedArray.name(new_values, new_field_names)
137
+ else
138
+ tmp_values[new_key_position].each do |new_key|
139
+ if new_field_names
140
+ yield new_key, NamedArray.name(new_values, new_field_names)
141
+ else
142
+ yield new_key, new_values
143
+ end
152
144
  end
153
145
  end
154
146
  end
@@ -165,7 +157,7 @@ class TSV
165
157
 
166
158
  def reorder(new_key_field, new_fields = nil, options = {})
167
159
  options = Misc.add_defaults options
168
- return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
160
+ return TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive) if options[:persistence_file] and File.exists?(options[:persistence_file])
169
161
 
170
162
  new = {}
171
163
  new_key_field, new_fields = through new_key_field, new_fields do |key, values|
@@ -181,7 +173,7 @@ class TSV
181
173
  end
182
174
 
183
175
  if options[:persistence_file]
184
- reordered = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
176
+ reordered = TSV.new(Persistence::TSV.get(options[:persistence_file], false), :case_insensitive => case_insensitive)
185
177
  reordered.merge! new
186
178
  else
187
179
  reordered = TSV.new(new, :case_insensitive => case_insensitive)
@@ -199,12 +191,12 @@ class TSV
199
191
 
200
192
  def add_field(name = nil)
201
193
  each do |key, values|
202
- self[key] = values << yield(key, values)
194
+ self[key] = values + [yield(key, values)]
203
195
  end
204
196
 
205
- fields << name if list
206
- if PersistenceHash === @data
207
- @data.fields = fields
197
+ if fields != nil
198
+ new_fields = fields + [name]
199
+ self.fields = new_fields
208
200
  end
209
201
  end
210
202
 
@@ -212,6 +204,9 @@ class TSV
212
204
  new = TSV.new({})
213
205
  new.key_field = key_field
214
206
  new.fields = fields.dup
207
+ new.type = type
208
+ new.filename = filename + "#Select: #{method.inspect}"
209
+ new.case_insensitive = case_insensitive
215
210
 
216
211
  case
217
212
  when Array === method
@@ -222,6 +217,10 @@ class TSV
222
217
  through do |key, values|
223
218
  new[key] = values if [key,values].flatten.select{|v| v =~ method}.any?
224
219
  end
220
+ when String === method
221
+ through do |key, values|
222
+ new[key] = values if [key,values].flatten.select{|v| v == method}.any?
223
+ end
225
224
  when Hash === method
226
225
  key = method.keys.first
227
226
  method = method.values.first
@@ -230,89 +229,87 @@ class TSV
230
229
  method.each{|item| if values = self[item]; then new[item] = values; end}
231
230
  when Array === method
232
231
  through :main, key do |key, values|
233
- new[key] = values if (values.flatten & method).any?
232
+ new[key] = self[key] if (values.flatten & method).any?
234
233
  end
235
234
  when Regexp === method
236
235
  through :main, key do |key, values|
237
- new[key] = values if values.flatten.select{|v| v =~ method}.any?
236
+ new[key] = self[key] if values.flatten.select{|v| v =~ method}.any?
237
+ end
238
+ when String === method
239
+ through :main, key do |key, values|
240
+ new[key] = self[key] if values.flatten.select{|v| v == method}.any?
238
241
  end
239
242
  end
240
243
  end
241
244
 
245
+
242
246
  new
243
247
  end
244
248
 
245
249
  def index(options = {})
246
- options = Misc.add_defaults options, :order => false
250
+ options = Misc.add_defaults options, :order => false, :persistence => false
247
251
 
248
- if options[:persistence] and ! options[:persistence_file]
249
- options[:persistence_file] = TSV.get_persistence_file(filename, "index:#{ filename }_#{options[:field]}:", options)
250
- end
252
+ new, extra = Persistence.persist(filename, :Index, :tsv, options) do |filename, options|
253
+ new = {}
254
+ if options[:order]
255
+ new_key_field, new_fields = through options[:target], options[:others] do |key, values|
251
256
 
252
- if options[:persistence_file] and File.exists?(options[:persistence_file])
253
- return TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
254
- end
257
+ values.each_with_index do |list, i|
258
+ next if list.nil? or list.empty?
255
259
 
256
- new = {}
257
- if options[:order]
258
- new_key_field, new_fields = through options[:field], options[:others] do |key, values|
259
-
260
- values.each_with_index do |list, i|
261
- next if list.nil? or list.empty?
262
-
263
- list = [list] unless Array === list
260
+ list = [list] unless Array === list
264
261
 
265
- list.each do |value|
266
- next if value.nil? or value.empty?
267
- value = value.downcase if options[:case_insensitive]
268
- new[value] ||= []
269
- new[value][i + 1] ||= []
270
- new[value][i + 1] << key
271
- end
262
+ list.each do |value|
263
+ next if value.nil? or value.empty?
264
+ value = value.downcase if options[:case_insensitive]
265
+ new[value] ||= []
266
+ new[value][i + 1] ||= []
267
+ new[value][i + 1] << key
268
+ end
272
269
  new[key] ||= []
273
270
  new[key][0] = key
274
- end
271
+ end
275
272
 
276
- end
273
+ end
277
274
 
278
- new.each do |key, values|
279
- values.flatten!
280
- values.compact!
281
- end
275
+ new.each do |key, values|
276
+ values.flatten!
277
+ values.compact!
278
+ end
282
279
 
283
- else
284
- new_key_field, new_fields = through options[:field], options[:others] do |key, values|
285
- new[key] ||= []
286
- new[key] << key
287
- values.each do |list|
288
- next if list.nil?
289
- if Array === list
290
- list.each do |value|
280
+ else
281
+ new_key_field, new_fields = through options[:target], options[:others] do |key, values|
282
+ new[key] ||= []
283
+ new[key] << key
284
+ values.each do |list|
285
+ next if list.nil?
286
+ if Array === list
287
+ list.each do |value|
288
+ value = value.downcase if options[:case_insensitive]
289
+ new[value] ||= []
290
+ new[value] << key
291
+ end
292
+ else
293
+ next if list.empty?
294
+ value = list
291
295
  value = value.downcase if options[:case_insensitive]
292
296
  new[value] ||= []
293
297
  new[value] << key
294
298
  end
295
- else
296
- next if list.empty?
297
- value = list
298
- value = value.downcase if options[:case_insensitive]
299
- new[value] ||= []
300
- new[value] << key
301
299
  end
302
300
  end
303
301
  end
304
- end
305
302
 
306
- if options[:persistence_file]
307
- index = TSV.new(PersistenceHash.get(options[:persistence_file], false), :case_insensitive => options[:case_insensitive])
308
- index.merge! new
309
- else
310
- index = TSV.new(new, :case_insensitive => options[:case_insensitive])
303
+ [new, {:key_field => new_key_field, :fields => new_fields, :type => :double, :case_insensitive => options[:case_insensitive]}]
311
304
  end
312
305
 
313
- index.key_field = new_key_field
314
- index.fields = new_fields
315
- index
306
+ new = TSV.new(new)
307
+ new.filename = "Index: " + filename + options.inspect
308
+ new.fields = extra[:fields]
309
+ new.key_field = extra[:key_field]
310
+ new.case_insensitive = extra[:case_insensitive]
311
+ new.type = extra[:type]
312
+ new
316
313
  end
317
314
 
318
315
  def smart_merge(other, match = nil, new_fields = nil)
@@ -413,7 +410,7 @@ class TSV
413
410
 
414
411
  if nofieldinfo
415
412
  next if other[other_code].nil?
416
- if list
413
+ if type == :double
417
414
  other_values = [[other_code]] + other[other_code]
418
415
  else
419
416
  other_values = [other_code] + other[other_code]
@@ -427,13 +424,13 @@ class TSV
427
424
  new_values = values + other_values
428
425
  else
429
426
  if other[other_code].nil?
430
- if list
427
+ if type == :double
431
428
  other_values = [[]] * other.fields.length
432
429
  else
433
430
  other_values = [] * other.fields.length
434
431
  end
435
432
  else
436
- if list
433
+ if type == :double
437
434
  other_values = other[other_code] + [[other_code]]
438
435
  else
439
436
  other_values = other[other_code] + [other_code]
@@ -443,11 +440,11 @@ class TSV
443
440
 
444
441
  new_values = values.dup
445
442
 
446
- if list
443
+ if type == :double
447
444
  this_common_field_positions.zip(other_common_field_positions).each do |tpos, opos|
448
445
  new_values_tops = new_values[tpos]
449
446
 
450
- if other.list
447
+ if other.type == :double
451
448
  new_values_tops += other_values[opos]
452
449
  else
453
450
  new_values_tops += [other_values[opos]]
@@ -466,10 +463,58 @@ class TSV
466
463
 
467
464
  self.fields = self.fields + new_fields unless nofieldinfo
468
465
  end
466
+
467
+
468
+ def self.field_matches(tsv, values)
469
+ if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
470
+ return {}
471
+ end
472
+
473
+ key_field = tsv.key_field
474
+ fields = tsv.fields
475
+
476
+ field_values = {}
477
+ fields.each{|field|
478
+ field_values[field] = []
479
+ }
480
+
481
+ tsv.through do |key,entry_values|
482
+ fields.zip(entry_values).each do |field,entry_field_values|
483
+ field_values[field].concat entry_field_values
484
+ end
485
+ end
486
+
487
+ field_values.each do |field,field_value_list|
488
+ field_value_list.replace(values & field_value_list.flatten.uniq)
489
+ end
490
+
491
+ field_values[key_field] = values & tsv.keys
492
+
493
+ field_values
494
+ end
495
+
496
+ def field_matches(values)
497
+ TSV.field_matches(self, values)
498
+ end
499
+
500
+
469
501
 
470
502
  #{{{ Helpers
471
503
 
472
504
  def self.index(file, options = {})
505
+ options = Misc.add_defaults options, :data_persistence => true, :persistence => true
506
+ persistence, persistence_file = Misc.process_options options, :persistence, :persistence_file
507
+ options[:persistence], options[:persistence_file] = options.values_at :data_persistence, :data_persistence_file
508
+ options.delete :data_persistence
509
+ options.delete :data_persistence_file
510
+
511
+ index, extra = Persistence.persist(file, :Index, :tsv, options) do |file, options, filename|
512
+ TSV.new(file, :double, options).index
513
+ end
514
+ index
515
+ end
516
+
517
+ def self.index2(file, options = {})
473
518
  opt_data = options.dup
474
519
  opt_index = options.dup
475
520
  opt_data.delete :field
@@ -482,7 +527,7 @@ class TSV
482
527
 
483
528
  if ! opt_index[:persistence_file].nil? && File.exists?(opt_index[:persistence_file])
484
529
  Log.low "Reloading persistent index for #{ file }: #{opt_index[:persistence_file]}"
485
- TSV.new(PersistenceHash.get(opt_index[:persistence_file], false), opt_index)
530
+ TSV.new(Persistence::TSV.get(opt_index[:persistence_file], false), opt_index)
486
531
  else
487
532
  Log.low "Creating index for #{ file }: #{opt_index[:persistence_file]}"
488
533
  data = TSV.new(file, opt_data)
@@ -501,6 +546,23 @@ class TSV
501
546
  end
502
547
 
503
548
  #{{{ Accesor Methods
549
+ attr_accessor :filename, :type, :case_insensitive, :key_field, :fields, :data
550
+
551
+ def fields
552
+ return nil if @fields.nil?
553
+ fields = @fields
554
+ fields.each do |f| f.extend Field end if Array === fields
555
+ fields
556
+ end
557
+
558
+ def fields=(new_fields)
559
+ @fields = new_fields
560
+ if Persistence::TSV === @data
561
+ @data.fields = new_fields
562
+ end
563
+ end
564
+
565
+
504
566
 
505
567
  def keys
506
568
  @data.keys
@@ -531,6 +593,7 @@ class TSV
531
593
  # Read
532
594
 
533
595
  def follow(value)
596
+ return nil if value.nil?
534
597
  if String === value && value =~ /__Ref:(.*)/
535
598
  return self[$1]
536
599
  else
@@ -546,7 +609,7 @@ class TSV
546
609
  return nil
547
610
  end
548
611
 
549
- key = key.downcase if @case_insensitive
612
+ key = key.downcase if @case_insensitive and key !~ /^__Ref:/
550
613
  follow @data[key]
551
614
  end
552
615
 
@@ -587,23 +650,37 @@ class TSV
587
650
  collect.sort_by &block
588
651
  end
589
652
 
590
- def to_s
653
+ def values_to_s(values)
654
+ case
655
+ when (values.nil? and fields.nil?)
656
+ "\n"
657
+ when (values.nil? and not fields.nil?)
658
+ "\t" << ([""] * fields.length) * "\t" << "\n"
659
+ when (not Array === values)
660
+ "\t" << values.to_s << "\n"
661
+ when Array === values.first
662
+ "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
663
+ else
664
+ "\t" << values * "\t" << "\n"
665
+ end
666
+ end
667
+
668
+ def to_s(keys = nil)
591
669
  str = ""
592
670
 
593
671
  if fields
594
672
  str << "#" << key_field << "\t" << fields * "\t" << "\n"
595
673
  end
596
674
 
597
- each do |key, values|
598
- case
599
- when values.nil?
600
- str << key.dup << "\n"
601
- when (not Array === values)
602
- str << key.dup << "\t" << values.to_s << "\n"
603
- when Array === values.first
604
- str << key.dup << "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
605
- else
606
- str << key.dup << "\t" << values * "\t" << "\n"
675
+ if keys.nil?
676
+ each do |key, values|
677
+ key = key.to_s if Symbol === key
678
+ str << key.dup << values_to_s(values)
679
+ end
680
+ else
681
+ keys.zip(values_at(*keys)).each do |key, values|
682
+ key = key.to_s if Symbol === key
683
+ str << key.dup << values_to_s(values)
607
684
  end
608
685
  end
609
686
 
@@ -625,11 +702,301 @@ class TSV
625
702
  zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
626
703
  zipped
627
704
  end
628
-
629
- def self.parse(data, file, options = {})
630
705
 
706
+ def self.key_order(file, options = {})
631
707
  # Prepare options
632
708
  options = add_defaults options,
709
+ :sep => "\t",
710
+ :sep2 => "|",
711
+ :native => 0,
712
+ :fix => nil,
713
+ :exclude => nil,
714
+ :select => nil,
715
+ :grep => nil,
716
+ :case_insensitive => false,
717
+ :header_hash => '#'
718
+
719
+ options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
720
+
721
+ if String === file and File.exists? file
722
+ file = File.open(file)
723
+ end
724
+
725
+ #{{{ Process first line
726
+
727
+ line = file.gets
728
+ raise "Empty content" if line.nil?
729
+ line.chomp!
730
+
731
+ if line =~ /^#{options[:header_hash]}/
732
+ header_fields = parse_fields(line, options[:sep])
733
+ header_fields[0] = header_fields[0][(0 + options[:header_hash].length)..-1] # Remove initial hash character
734
+ line = file.gets
735
+ else
736
+ header_fields = nil
737
+ end
738
+
739
+ id_pos = Misc.field_position(header_fields, options[:native])
740
+
741
+ if options[:extra].nil?
742
+ extra_pos = nil
743
+ max_cols = 0
744
+ else
745
+ extra_pos = options[:extra].collect{|pos| Misc.field_position(header_fields, pos) }
746
+ end
747
+
748
+ ids = []
749
+ #{{{ Process rest
750
+ while line do
751
+ line.chomp!
752
+
753
+ line = options[:fix].call line if options[:fix]
754
+ break if not line
755
+
756
+ # Select and fix lines
757
+ if line.empty? or
758
+ (options[:exclude] and options[:exclude].call(line)) or
759
+ (options[:select] and not options[:select].call(line))
760
+
761
+ line = file.gets
762
+ next
763
+ end
764
+
765
+ ### Process line
766
+
767
+ # Chunk fields
768
+ parts = parse_fields(line, options[:sep])
769
+
770
+ # Get next line
771
+ line = file.gets
772
+
773
+ # Get id field
774
+ next if parts[id_pos].nil? || parts[id_pos].empty?
775
+ ids << parts[id_pos]
776
+ end
777
+
778
+ ids
779
+ end
780
+
781
+ def self.parse_header(stream, sep, header_hash)
782
+ fields, key_field = nil
783
+ options = {}
784
+
785
+ line = stream.gets
786
+
787
+ if line and line =~ /^#{header_hash}: (.*)/
788
+ options = Misc.string2hash $1
789
+ line = stream.gets
790
+ end
791
+
792
+ sep = options[:sep] if options[:sep]
793
+
794
+ if line and line =~ /^#{header_hash}/
795
+ line.chomp!
796
+ fields = parse_fields(line, sep)
797
+ key_field = fields.shift
798
+ key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
799
+ line = stream.gets
800
+ end
801
+
802
+ raise "Empty content" if line.nil?
803
+ return key_field, fields, options, line
804
+ end
805
+
806
+ def self.parse(stream, options = {})
807
+ # Prepare options
808
+ options = Misc.add_defaults options,
809
+ :case_insensitive => false,
810
+ :type => :double,
811
+
812
+ :merge => false,
813
+ :keep_empty => true,
814
+ :cast => nil,
815
+
816
+ :sep => "\t",
817
+ :sep2 => "|",
818
+ :header_hash => '#',
819
+
820
+ :key => 0,
821
+ :fields => nil,
822
+
823
+ :fix => nil,
824
+ :exclude => nil,
825
+ :select => nil,
826
+ :grep => nil
827
+
828
+
829
+ sep, header_hash =
830
+ Misc.process_options options, :sep, :header_hash
831
+
832
+ key_field, other_fields, more_options, line = TSV.parse_header(stream, sep, header_hash)
833
+
834
+ sep = more_options[:sep] if more_options[:sep]
835
+ options = Misc.add_defaults options, more_options
836
+ sep2 = Misc.process_options options, :sep2
837
+
838
+ key, others =
839
+ Misc.process_options options, :key, :others
840
+
841
+ if key_field.nil?
842
+ key_pos = key
843
+ key_field, fields = nil
844
+ else
845
+ all_fields = [key_field].concat other_fields
846
+
847
+ key_pos = Misc.field_position(all_fields, key)
848
+
849
+ if String === others or Symbol === others
850
+ others = [others]
851
+ end
852
+
853
+ if others.nil?
854
+ other_pos = (0..(all_fields.length - 1)).to_a
855
+ other_pos.delete key_pos
856
+ else
857
+ other_pos = Misc.field_position(all_fields, *others)
858
+ end
859
+
860
+ key_field = all_fields[key_pos]
861
+ fields = all_fields.values_at *other_pos
862
+ end
863
+
864
+ case_insensitive, type, merge, keep_empty, cast =
865
+ Misc.process_options options, :case_insensitive, :type, :merge, :keep_empty, :cast
866
+ fix, exclude, select, grep =
867
+ Misc.process_options options, :fix, :exclude, :select, :grep
868
+
869
+ #{{{ Process rest
870
+ data = {}
871
+ single = type.to_sym != :double
872
+ max_cols = 0
873
+ while line do
874
+ line.chomp!
875
+
876
+ line = fix.call line if fix
877
+ break if not line
878
+
879
+ if header_hash and line =~ /^#{header_hash}/
880
+ line = stream.gets
881
+ next
882
+ end
883
+
884
+ if line.empty? or
885
+ (exclude and exclude.call(line)) or
886
+ (select and not select.call(line))
887
+
888
+ line = stream.gets
889
+ next
890
+ end
891
+
892
+ # Chunk fields
893
+ parts = parse_fields(line, sep)
894
+
895
+ # Get next line
896
+ line = stream.gets
897
+
898
+ # Get id field
899
+ next if parts[key_pos].nil? || parts[key_pos].empty?
900
+
901
+ if single
902
+ ids = parse_fields(parts[key_pos], sep2)
903
+ ids.collect!{|id| id.downcase} if case_insensitive
904
+
905
+ id = ids.shift
906
+ ids.each do |id2| data[id2] = "__Ref:#{id}" end
907
+
908
+ if key_field.nil?
909
+ other_pos = (0..(parts.length - 1)).to_a
910
+ other_pos.delete key_pos
911
+ end
912
+
913
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
914
+ extra.collect! do |elem|
915
+ case
916
+ when String === cast
917
+ elem.send(cast)
918
+ when Proc === cast
919
+ cast.call elem
920
+ end
921
+ end if cast
922
+
923
+ max_cols = extra.size if extra.size > (max_cols || 0)
924
+ case type
925
+ when :list
926
+ data[id] = extra unless data.include? id
927
+ when :flat
928
+ data[id] = extra.flatten unless data.include? id
929
+ when :single
930
+ data[id] = extra.flatten.first unless data.include? id
931
+ end
932
+
933
+ else
934
+ ids = parse_fields(parts[key_pos], sep2)
935
+ ids.collect!{|id| id.downcase} if case_insensitive
936
+
937
+ id = ids.shift
938
+ ids.each do |id2| data[id2] = "__Ref:#{id}" end
939
+
940
+ if key_field.nil?
941
+ other_pos = (0..(parts.length - 1)).to_a
942
+ other_pos.delete key_pos
943
+ end
944
+
945
+ extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
946
+ extra.collect! do |list|
947
+ case
948
+ when String === cast
949
+ list.collect{|elem| elem.send(cast)}
950
+ when Proc === cast
951
+ list.collect{|elem| cast.call elem}
952
+ end
953
+ end if cast
954
+
955
+ max_cols = extra.size if extra.size > (max_cols || 0)
956
+ if merge
957
+ data[id] = extra unless data.include? id
958
+ else
959
+ if not data.include? id
960
+ data[id] = extra
961
+ else
962
+ entry = data[id]
963
+ while entry =~ /__Ref:(.*)/ do entry = data[$1] end
964
+ extra.each_with_index do |f, i|
965
+ if f.empty?
966
+ next unless keep_empty
967
+ f= [""]
968
+ end
969
+ entry[i] ||= []
970
+ entry[i] = entry[i].concat f
971
+ end
972
+ data[id] = entry
973
+ end
974
+ end
975
+ end
976
+ end
977
+
978
+ if keep_empty and max_cols > 0
979
+ data.each do |key, values|
980
+ next if values =~ /__Ref:/
981
+ new_values = values
982
+ max_cols.times do |i|
983
+ if type == :double
984
+ new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
985
+ else
986
+ new_values[i] = "" if new_values[i].nil?
987
+ end
988
+ end
989
+ data[key] = new_values
990
+ end
991
+ end
992
+
993
+ [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive}]
994
+ end
995
+
996
+ def self.parse2(data, file, options = {})
997
+
998
+ # Prepare options
999
+ options = Misc.add_defaults options,
633
1000
  :sep => "\t",
634
1001
  :sep2 => "|",
635
1002
  :native => 0,
@@ -640,18 +1007,19 @@ class TSV
640
1007
  :grep => nil,
641
1008
  :single => false,
642
1009
  :unique => false,
1010
+ :merge => false,
643
1011
  :flatten => false,
644
- :overwrite => false,
645
1012
  :keep_empty => true,
646
1013
  :case_insensitive => false,
647
1014
  :header_hash => '#' ,
1015
+ :cast => nil,
648
1016
  :persistence_file => nil
1017
+
649
1018
 
1019
+ options[:unique] = options[:uniq] if options[:unique].nil?
650
1020
  options[:extra] = [options[:extra]] if options[:extra] != nil && ! (Array === options[:extra])
651
1021
  options[:flatten] = true if options[:single]
652
1022
 
653
-
654
-
655
1023
  #{{{ Process first line
656
1024
 
657
1025
  line = file.gets
@@ -680,10 +1048,18 @@ class TSV
680
1048
  line.chomp!
681
1049
 
682
1050
  line = options[:fix].call line if options[:fix]
1051
+ break if not line
1052
+
1053
+ if options[:header_hash] && line =~ /^#{options[:header_hash]}/
1054
+ line = file.gets
1055
+ next
1056
+ end
683
1057
 
684
1058
  # Select and fix lines
685
- if (options[:exclude] and options[:exclude].call(line)) or
1059
+ if line.empty? or
1060
+ (options[:exclude] and options[:exclude].call(line)) or
686
1061
  (options[:select] and not options[:select].call(line))
1062
+
687
1063
  line = file.gets
688
1064
  next
689
1065
  end
@@ -721,35 +1097,48 @@ class TSV
721
1097
  extra.flatten! if options[:flatten]
722
1098
  extra = extra.first if options[:single]
723
1099
 
724
- if options[:overwrite]
725
- main_entry = ids.shift
726
- ids.each do |id|
727
- data[id] = "__Ref:#{main_entry}"
1100
+ if options[:cast]
1101
+ if Array === extra[0]
1102
+ e = extra
1103
+ else
1104
+ e = [extra]
728
1105
  end
729
1106
 
730
- data[main_entry] = extra
731
- else
732
- main_entry = ids.shift
733
- ids.each do |id|
734
- data[id] = "__Ref:#{main_entry}"
1107
+ e.each do |list|
1108
+ case
1109
+ when String === options[:cast]
1110
+ list.collect!{|elem| elem.send(options[:cast])}
1111
+ when Proc === options[:cast]
1112
+ list.collect!{|elem| options[:cast].call elem}
1113
+ end
735
1114
  end
1115
+ end
736
1116
 
737
- case
738
- when (options[:single] or options[:unique])
739
- data[main_entry] ||= extra
740
- when options[:flatten]
741
- if PersistenceHash === data
742
- data[main_entry] = (data[main_entry] || []).concat extra
1117
+ main_entry = ids.shift
1118
+ ids.each do |id| data[id] = "__Ref:#{main_entry}" end
1119
+
1120
+ case
1121
+ when (options[:single] or options[:unique] or not options[:merge])
1122
+ data[main_entry] = extra unless data.include? main_entry
1123
+ when options[:flatten]
1124
+ entry = data[main_entry]
1125
+
1126
+ if entry.nil?
1127
+ data[main_entry] = extra
1128
+ else
1129
+ while entry =~ /__Ref:(.*)/ do entry = data[$1] end
1130
+ if Persistence::TSV === data
1131
+ data[main_entry] = entry.concat extra
743
1132
  else
744
- data[main_entry] ||= []
745
1133
  data[main_entry].concat extra
746
1134
  end
1135
+ end
1136
+ else
1137
+ entry = data[main_entry]
1138
+ if entry.nil?
1139
+ data[main_entry] = extra
747
1140
  else
748
- entry = data[main_entry] || []
749
- while entry =~ /__Ref:(.*)/ do
750
- entry = data[$1]
751
- end
752
-
1141
+ while entry =~ /__Ref:(.*)/ do entry = data[$1] end
753
1142
  extra.each_with_index do |fields, i|
754
1143
  if fields.empty?
755
1144
  next unless options[:keep_empty]
@@ -758,7 +1147,6 @@ class TSV
758
1147
  entry[i] ||= []
759
1148
  entry[i] = entry[i].concat fields
760
1149
  end
761
-
762
1150
  data[main_entry] = entry
763
1151
  end
764
1152
  end
@@ -774,7 +1162,6 @@ class TSV
774
1162
  end
775
1163
  end
776
1164
 
777
-
778
1165
  # Save header information
779
1166
  key_field = nil
780
1167
  fields = nil
@@ -788,19 +1175,61 @@ class TSV
788
1175
  end
789
1176
  end
790
1177
 
791
- data.read if PersistenceHash === data
1178
+ data.read if Persistence::TSV === data
792
1179
 
793
1180
  [key_field, fields]
794
1181
  end
1182
+ def initialize(file = {}, type = :double, options = {})
1183
+ if Hash === type
1184
+ options = type
1185
+ type = :double
1186
+ end
795
1187
 
796
- attr_accessor :data, :key_field, :fields, :list, :case_insensitive, :filename
797
- def fields
798
- fields = @fields
799
- fields.each do |f| f.extend Field end if Array === fields
800
- fields
1188
+ if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
1189
+ options = Misc.add_defaults options, Misc.string2hash($2)
1190
+ file = $1
1191
+ end
1192
+
1193
+ options = Misc.add_defaults options, :persistence => false, :case_insensitive => false, :type => type
1194
+
1195
+ @filename = Misc.process_options options, :filename
1196
+ @filename ||= case
1197
+ when (String === file and File.exists? file)
1198
+ File.expand_path file
1199
+ when File === file
1200
+ File.expand_path file.path
1201
+ else
1202
+ Digest::MD5.hexdigest(file.inspect)
1203
+ end
1204
+
1205
+ if block_given?
1206
+ @data, extra = Persistence.persist(@filename, :TSV, :tsv, options) do |filename, options| yield filename, options end
1207
+ else
1208
+ @data, extra = Persistence.persist(@filename, :TSV, :tsv, options) do |filename, options|
1209
+ data, extra = nil
1210
+ case
1211
+ when String === file
1212
+ File.open(file) do |f|
1213
+ data, extra = TSV.parse(f, options)
1214
+ end
1215
+ when File === file
1216
+ data, extra = TSV.parse(file, options)
1217
+ when Hash === file
1218
+ data = file
1219
+ extra = {:case_insensitive => options[:case_insensitive], :type => type}
1220
+ end
1221
+
1222
+ [data, extra]
1223
+ end
1224
+ end
1225
+
1226
+ @type = extra[:type]
1227
+ @key_field = extra[:key_field]
1228
+ @fields = extra[:fields]
1229
+ @case_insensitive = extra[:case_insensitive]
801
1230
  end
802
1231
 
803
- def initialize(file = {}, options = {})
1232
+ def initialize2(file = {}, options = {})
804
1233
  options = Misc.add_defaults options
805
1234
  options[:persistence] = true if options[:persistence_file]
806
1235
 
@@ -817,7 +1246,7 @@ class TSV
817
1246
  Log.low "Copying TSV"
818
1247
  @filename = file.filename
819
1248
 
820
- if options[:persistence] and not PersistenceHash === file.data
1249
+ if options[:persistence] and not Persistence::TSV === file.data
821
1250
  persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
822
1251
  Log.low "Making persistance #{ persistence_file }"
823
1252
  @data = TCHash.get(persistence_file)
@@ -834,7 +1263,7 @@ class TSV
834
1263
  @list = file.list
835
1264
  return self
836
1265
  when Hash === file
837
- Log.low "Encapsulating Hash"
1266
+ Log.low "Encapsulating Hash in TSV object"
838
1267
  @filename = "Hash:" + Digest::MD5.hexdigest(file.inspect)
839
1268
  if options[:persistence]
840
1269
  persistence_file = options.delete(:persistence_file) || TSV.get_persistence_file(@filename, "file:#{ @filename }:", options)
@@ -845,9 +1274,9 @@ class TSV
845
1274
  @data = file
846
1275
  end
847
1276
  return self
848
- when PersistenceHash === file
849
- Log.low "Encapsulating PersistenceHash"
850
- @filename = "PersistenceHash:" + Digest::MD5.hexdigest(file.inspect)
1277
+ when Persistence::TSV === file
1278
+ Log.low "Encapsulating Persistence::TSV"
1279
+ @filename = "Persistence::TSV:" + Digest::MD5.hexdigest(file.inspect)
851
1280
  @data = file
852
1281
  @key_field = file.key_field
853
1282
  @fields = file.fields
@@ -860,7 +1289,7 @@ class TSV
860
1289
  when StringIO
861
1290
  else
862
1291
  raise "File #{file} not found"
863
- end
1292
+ end
864
1293
 
865
1294
  if options[:persistence]
866
1295
  options.delete :persistence
@@ -868,11 +1297,11 @@ class TSV
868
1297
 
869
1298
  if File.exists? persistence_file
870
1299
  Log.low "Loading Persistence for #{ @filename } in #{persistence_file}"
871
- @data = PersistenceHash.get(persistence_file, false)
1300
+ @data = Persistence::TSV.get(persistence_file, false)
872
1301
  @key_field = @data.key_field
873
1302
  @fields = @data.fields
874
1303
  else
875
- @data = PersistenceHash.get(persistence_file, true)
1304
+ @data = Persistence::TSV.get(persistence_file, true)
876
1305
  file = Open.grep(file, options[:grep]) if options[:grep]
877
1306
 
878
1307
  Log.low "Persistent Parsing for #{ @filename } in #{persistence_file}"
@@ -897,17 +1326,17 @@ end
897
1326
  #{{{ CacheHelper
898
1327
  require 'rbbt/util/cachehelper'
899
1328
  module CacheHelper
900
- def self.tsv_cache(name, key = [])
901
- cache_file = CacheHelper.build_filename name, key
902
-
903
- if File.exists? cache_file
904
- Log.debug "TSV cache file '#{cache_file}' found"
905
- hash = TCHash.get(cache_file)
906
- TSV.new(hash)
907
- else
908
- Log.debug "Producing TSV cache file '#{cache_file}'"
909
- data = yield
910
- TSV.new(data, :persistence_file => cache_file)
911
- end
1329
+ def self.tsv_cache(name, key = [])
1330
+ cache_file = CacheHelper.build_filename name, key
1331
+
1332
+ if File.exists? cache_file
1333
+ Log.debug "TSV cache file '#{cache_file}' found"
1334
+ hash = TCHash.get(cache_file)
1335
+ TSV.new(hash)
1336
+ else
1337
+ Log.debug "Producing TSV cache file '#{cache_file}'"
1338
+ data = yield
1339
+ TSV.new(data, :persistence_file => cache_file)
1340
+ end
912
1341
  end
913
1342
  end