rbbt-util 3.0.3 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rbbt/util/tsv.rb CHANGED
@@ -41,9 +41,6 @@ class TSV
41
41
  end
42
42
  end
43
43
 
44
- def self.encapsulate_persistence(file, options)
45
- end
46
-
47
44
  def initialize(file = {}, type = nil, options = {})
48
45
  # Process Options
49
46
 
@@ -58,7 +55,7 @@ class TSV
58
55
  file = $1
59
56
  end
60
57
 
61
- options = Misc.add_defaults options, :persistence => false, :type => type
58
+ options = Misc.add_defaults options, :persistence => false, :type => type, :in_situ_persistence => true
62
59
 
63
60
  # Extract Filename
64
61
 
@@ -114,50 +111,79 @@ class TSV
114
111
  end
115
112
  end
116
113
  else
117
- @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename|
114
+ in_situ_persistence = Misc.process_options(options, :in_situ_persistence)
115
+ @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename, persistence_file|
118
116
  data, extra = nil
119
117
 
120
- case
121
- ## Parse source
122
- when Resource::Path === file #(String === file and file.respond_to? :open)
123
- data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
124
- extra[:namespace] ||= file.namespace
125
- extra[:datadir] ||= file.datadir
126
- when StringIO === file
127
- data, extra = TSV.parse(file, options)
128
- when Open.can_open?(file)
129
- Open.open(file, :grep => options[:grep]) do |f|
130
- data, extra = TSV.parse(f, options)
131
- end
132
- when File === file
133
- path = file.path
134
- file = Open.grep(file, options[:grep]) if options[:grep]
135
- data, extra = TSV.parse(file, options)
136
- when IO === file
137
- file = Open.grep(file, options[:grep]) if options[:grep]
138
- data, extra = TSV.parse(file, options)
139
- when block_given?
140
- data
141
- else
142
- raise "Unknown input in TSV.new #{file.inspect}"
118
+ if in_situ_persistence and persistence_file
119
+
120
+ cast = options[:cast]
121
+ type = options[:type]
122
+ serializer = case
123
+ when ((cast == "to_i" or cast == :to_i) and type == :single)
124
+ :integer
125
+ when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
126
+ :integer_array
127
+ when type == :double
128
+ :double
129
+ when type == :list
130
+ :list
131
+ when type == :single
132
+ :single
133
+ else
134
+ :marshal
135
+ end
136
+
137
+ options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, serializer)
143
138
  end
144
139
 
145
- extra[:filename] = filename
140
+ begin
141
+ case
142
+ ## Parse source
143
+ when Resource::Path === file #(String === file and file.respond_to? :open)
144
+ data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
145
+ extra[:namespace] ||= file.namespace
146
+ extra[:datadir] ||= file.datadir
147
+ when StringIO === file
148
+ data, extra = TSV.parse(file, options)
149
+ when Open.can_open?(file)
150
+ Open.open(file, :grep => options[:grep]) do |f|
151
+ data, extra = TSV.parse(f, options)
152
+ end
153
+ when File === file
154
+ path = file.path
155
+ file = Open.grep(file, options[:grep]) if options[:grep]
156
+ data, extra = TSV.parse(file, options)
157
+ when IO === file
158
+ file = Open.grep(file, options[:grep]) if options[:grep]
159
+ data, extra = TSV.parse(file, options)
160
+ when block_given?
161
+ data
162
+ else
163
+ raise "Unknown input in TSV.new #{file.inspect}"
164
+ end
165
+
166
+ extra[:filename] = filename
167
+ rescue Exception
168
+ FileUtils.rm persistence_file if persistence_file and File.exists?(persistence_file)
169
+ raise $!
170
+ end
146
171
 
147
172
  [data, extra]
148
173
  end
149
174
  end
150
175
  end
151
176
 
152
- if not extra.nil?
177
+ if not extra.nil?
153
178
  %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
154
179
  if extra.include? key.to_sym
155
180
  self.send("#{key}=".to_sym, extra[key.to_sym])
156
- if @data.respond_to? "#{key}=".to_sym
157
- @data.send("#{key}=".to_sym, extra[key.to_sym])
158
- end
181
+ #if @data.respond_to? "#{key}=".to_sym
182
+ # @data.send("#{key}=".to_sym, extra[key.to_sym])
183
+ #end
159
184
  end
160
185
  end
186
+ @data.read if Persistence::TSV === @data
161
187
  end
162
188
  end
163
189
 
@@ -85,6 +85,13 @@ class TSV
85
85
  fields.select{|f| f.namespace.nil? or f.namespace == namespace}
86
86
  end
87
87
 
88
+ def key_field
89
+ return nil if @key_field.nil?
90
+ k = @key_field.dup
91
+ k.extend Field
92
+ k
93
+ end
94
+
88
95
  def fields
89
96
  return nil if @fields.nil?
90
97
  fds = @fields
@@ -113,7 +120,11 @@ class TSV
113
120
 
114
121
  def self.identify_field(key, fields, field)
115
122
  return field if Integer === field
116
- return :key if field.nil? or field == 0 or field.to_sym == :key or key == field
123
+ if String === field
124
+ field = field.dup
125
+ field.extend Field
126
+ end
127
+ return :key if field.nil? or field == 0 or field.to_sym == :key or field == key
117
128
  return nil if fields.nil?
118
129
  return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
119
130
  return fields.index field
@@ -136,7 +147,7 @@ class TSV
136
147
  end
137
148
  end if Array === new_fields
138
149
  @fields = new_fields
139
- @data.fields = new_fields if @data.respond_to? :fields=
150
+ @data.fields = new_fields if @data.respond_to? :fields= and @data.write?
140
151
  end
141
152
 
142
153
  def old_fields=(new_fields)
@@ -196,6 +207,10 @@ class TSV
196
207
  follow @data[key]
197
208
  end
198
209
 
210
+ def delete(key)
211
+ @data.delete(key)
212
+ end
213
+
199
214
  def values_at(*keys)
200
215
  keys.collect{|k|
201
216
  self[k]
@@ -258,6 +273,10 @@ class TSV
258
273
  keys = nil
259
274
  end
260
275
 
276
+ if keys == :sort
277
+ keys = self.keys.sort
278
+ end
279
+
261
280
  str = ""
262
281
 
263
282
  str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n" unless no_options
@@ -279,4 +298,10 @@ class TSV
279
298
 
280
299
  str
281
300
  end
301
+
302
+ def value_peek
303
+ peek = {}
304
+ keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
305
+ peek
306
+ end
282
307
  end
@@ -47,6 +47,8 @@ class TSV
47
47
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
48
48
  when (String === file1 or StringIO === file1)
49
49
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
50
+ when TSV === file1
51
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
50
52
  end
51
53
 
52
54
  case
@@ -54,6 +56,8 @@ class TSV
54
56
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
55
57
  when (String === file2 or StringIO === file2)
56
58
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
59
+ when TSV === file2
60
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
57
61
  end
58
62
 
59
63
  output = File.open(output, 'w') if String === output
@@ -115,110 +119,6 @@ class TSV
115
119
 
116
120
  output.close
117
121
  end
118
-
119
- def self.paste(file1, file2, output, sep = "\t")
120
- case
121
- when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
122
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } ", :pipe => true)
123
- when String === file1
124
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file1, :pipe => true)
125
- end
126
-
127
- case
128
- when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
129
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } ", :pipe => true)
130
- when String === file2
131
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file2, :pipe => true)
132
- end
133
-
134
- output = File.open(output, 'w') if String === output
135
-
136
- cols1 = nil
137
- cols2 = nil
138
-
139
- done1 = false
140
- done2 = false
141
-
142
- while (line1 = file1.gets) =~ /#/; end
143
- line1.strip!
144
- parts1 = line1.split(sep)
145
- key1 = parts1.shift
146
- cols1 = parts1.length
147
-
148
- while (line2 = file2.gets) =~ /#/; end
149
- line2.strip!
150
- parts2 = line2.split(sep)
151
- key2 = parts2.shift
152
- cols2 = parts2.length
153
- while not (done1 or done2)
154
- case
155
- when key1 < key2
156
- output.puts [key1, parts1, [""] * cols2] * sep
157
- if file1.eof?
158
- done1 = true
159
- else
160
- line1 = file1.gets
161
- line1.strip!
162
- parts1 = line1.split(sep)
163
- key1 = parts1.shift
164
- end
165
- when key2 < key1
166
- output.puts [key2, [""] * cols1, parts2] * sep
167
- if file2.eof?
168
- done2 = true
169
- else
170
- line2 = file2.gets
171
- line2.strip!
172
- parts2 = line2.split(sep)
173
- key2 = parts2.shift
174
- end
175
- when key1 == key2
176
- output.puts [key1, parts1, parts2] * sep
177
- if file1.eof?
178
- done1 = true
179
- else
180
- line1 = file1.gets
181
- line1.strip!
182
- parts1 = line1.split(sep)
183
- key1 = parts1.shift
184
- end
185
- if file2.eof?
186
- done2 = true
187
- else
188
- line2 = file2.gets
189
- line2.strip!
190
- parts2 = line2.split(sep)
191
- key2 = parts2.shift
192
- end
193
- end
194
- end
195
-
196
- while not done1
197
- output.puts [key1, parts1, [""] * cols2] * sep
198
- if file1.eof?
199
- done1 = true
200
- else
201
- line1 = file1.gets
202
- line1.strip!
203
- parts1 = line1.split(sep)
204
- key1 = parts1.shift
205
- end
206
- end
207
-
208
- while not done2
209
- output.puts [key2, [""] * cols1, parts2] * sep
210
- if file2.eof?
211
- done2 = true
212
- else
213
- line2 = file2.gets
214
- line2.strip!
215
- parts2 = line2.split(sep)
216
- key2 = parts2.shift
217
- end
218
- end
219
-
220
- output.close
221
- end
222
122
  #{{{ Attach Methods
223
123
 
224
124
  def attach_same_key(other, fields = nil)
@@ -296,6 +196,8 @@ class TSV
296
196
  field_positions = fields.collect{|field| other.identify_field field}
297
197
  field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
298
198
 
199
+
200
+ length = self.fields.length
299
201
  through do |key, values|
300
202
  source_keys = index[key]
301
203
  if source_keys.nil? or source_keys.empty?
@@ -315,7 +217,7 @@ class TSV
315
217
  other[source_key][pos]
316
218
  end
317
219
  end
318
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
220
+ new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
319
221
  new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
320
222
  all_new_values << new_values
321
223
  end
@@ -323,17 +225,28 @@ class TSV
323
225
 
324
226
  if all_new_values.empty?
325
227
  if type == :double
326
- self[key] = self[key].concat [[]] * field_positions.length
228
+ all_new_values = [[[]] * field_positions.length]
327
229
  else
328
- self[key] = self[key].concat [""] * field_positions.length
230
+ all_new_values = [[""] * field_positions.length]
329
231
  end
232
+ end
233
+
234
+ current = self[key]
235
+
236
+ if current.length > length
237
+ all_new_values << current.slice!(length..current.length - 1)
238
+ end
239
+
240
+ if type == :double
241
+ all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
330
242
  else
331
- if type == :double
332
- self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
333
- else
334
- self[key] = self[key].concat all_new_values.first
335
- end
243
+ all_new_values = all_new_values.first
336
244
  end
245
+
246
+ current += all_new_values
247
+
248
+ self[key] = current
249
+
337
250
  end
338
251
 
339
252
  self.fields = self.fields.concat field_names
@@ -385,13 +298,13 @@ class TSV
385
298
  Log.medium "Found Traversal: #{traversal_ids * " => "}"
386
299
 
387
300
  data_key, data_file = path.shift
388
- if data_key == data_file.key_field
389
- Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
390
- data_index = nil
391
- else
392
- Log.debug "Data index required"
393
- data_index = data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
394
- end
301
+ data_index = if data_key == data_file.key_field
302
+ Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
303
+ nil
304
+ else
305
+ Log.debug "Data index required"
306
+ data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
307
+ end
395
308
 
396
309
  current_index = data_index
397
310
  current_key = data_key
@@ -402,7 +315,7 @@ class TSV
402
315
  current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
403
316
  else
404
317
  next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
405
- current_index.process current_index.fields.first do |key, values, values|
318
+ current_index.process current_index.fields.first do |values|
406
319
  if values.nil?
407
320
  nil
408
321
  else
@@ -411,6 +324,7 @@ class TSV
411
324
  end
412
325
  current_index.fields = [next_key]
413
326
  end
327
+ current_key = next_key
414
328
  end
415
329
 
416
330
  current_index
@@ -470,6 +384,8 @@ class TSV
470
384
  attach_index other, index, fields
471
385
  end
472
386
  Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
387
+
388
+ self
473
389
  end
474
390
 
475
391
  def detach(file)
@@ -489,10 +405,21 @@ class TSV
489
405
  if self.fields and other.fields
490
406
  new.fields = self.fields + other.fields
491
407
  end
492
-
408
+
493
409
  FileUtils.rm tmpfile if File.exists? tmpfile
494
410
 
495
411
  new
496
412
  end
497
413
 
414
+
415
+ def paste(other, options = {})
416
+ TmpFile.with_file do |output|
417
+ TSV.paste_merge(self, other, output, options[:sep] || "\t")
418
+ TSV.new output, options
419
+ end
420
+ end
421
+
422
+ def self.fast_paste(files, delim = "$")
423
+ CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
424
+ end
498
425
  end
@@ -34,6 +34,7 @@ class TSV
34
34
  list = [list] unless Array === list
35
35
  i += 1 if fields.nil?
36
36
  list.each do |elem|
37
+ next if elem.empty?
37
38
  elem.downcase if case_insensitive
38
39
  new[elem] ||= []
39
40
  new[elem][i] ||= []
@@ -73,6 +74,7 @@ class TSV
73
74
  end
74
75
  list.collect!{|e| e.downcase} if case_insensitive
75
76
  list.each do |elem|
77
+ next if elem.empty?
76
78
  new[elem] ||= []
77
79
  if double_keys
78
80
  new[elem].concat key
@@ -233,6 +235,8 @@ class TSV
233
235
 
234
236
  def self.field_matches(tsv, values)
235
237
  values = [values] if not Array === values
238
+ Log.debug "Matcing #{values.length} values to #{tsv.filename}"
239
+
236
240
  if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
237
241
  return {}
238
242
  end