rbbt-util 3.0.3 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbbt/util/tsv.rb CHANGED
@@ -41,9 +41,6 @@ class TSV
41
41
  end
42
42
  end
43
43
 
44
- def self.encapsulate_persistence(file, options)
45
- end
46
-
47
44
  def initialize(file = {}, type = nil, options = {})
48
45
  # Process Options
49
46
 
@@ -58,7 +55,7 @@ class TSV
58
55
  file = $1
59
56
  end
60
57
 
61
- options = Misc.add_defaults options, :persistence => false, :type => type
58
+ options = Misc.add_defaults options, :persistence => false, :type => type, :in_situ_persistence => true
62
59
 
63
60
  # Extract Filename
64
61
 
@@ -114,50 +111,79 @@ class TSV
114
111
  end
115
112
  end
116
113
  else
117
- @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename|
114
+ in_situ_persistence = Misc.process_options(options, :in_situ_persistence)
115
+ @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename, persistence_file|
118
116
  data, extra = nil
119
117
 
120
- case
121
- ## Parse source
122
- when Resource::Path === file #(String === file and file.respond_to? :open)
123
- data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
124
- extra[:namespace] ||= file.namespace
125
- extra[:datadir] ||= file.datadir
126
- when StringIO === file
127
- data, extra = TSV.parse(file, options)
128
- when Open.can_open?(file)
129
- Open.open(file, :grep => options[:grep]) do |f|
130
- data, extra = TSV.parse(f, options)
131
- end
132
- when File === file
133
- path = file.path
134
- file = Open.grep(file, options[:grep]) if options[:grep]
135
- data, extra = TSV.parse(file, options)
136
- when IO === file
137
- file = Open.grep(file, options[:grep]) if options[:grep]
138
- data, extra = TSV.parse(file, options)
139
- when block_given?
140
- data
141
- else
142
- raise "Unknown input in TSV.new #{file.inspect}"
118
+ if in_situ_persistence and persistence_file
119
+
120
+ cast = options[:cast]
121
+ type = options[:type]
122
+ serializer = case
123
+ when ((cast == "to_i" or cast == :to_i) and type == :single)
124
+ :integer
125
+ when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
126
+ :integer_array
127
+ when type == :double
128
+ :double
129
+ when type == :list
130
+ :list
131
+ when type == :single
132
+ :single
133
+ else
134
+ :marshal
135
+ end
136
+
137
+ options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, serializer)
143
138
  end
144
139
 
145
- extra[:filename] = filename
140
+ begin
141
+ case
142
+ ## Parse source
143
+ when Resource::Path === file #(String === file and file.respond_to? :open)
144
+ data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
145
+ extra[:namespace] ||= file.namespace
146
+ extra[:datadir] ||= file.datadir
147
+ when StringIO === file
148
+ data, extra = TSV.parse(file, options)
149
+ when Open.can_open?(file)
150
+ Open.open(file, :grep => options[:grep]) do |f|
151
+ data, extra = TSV.parse(f, options)
152
+ end
153
+ when File === file
154
+ path = file.path
155
+ file = Open.grep(file, options[:grep]) if options[:grep]
156
+ data, extra = TSV.parse(file, options)
157
+ when IO === file
158
+ file = Open.grep(file, options[:grep]) if options[:grep]
159
+ data, extra = TSV.parse(file, options)
160
+ when block_given?
161
+ data
162
+ else
163
+ raise "Unknown input in TSV.new #{file.inspect}"
164
+ end
165
+
166
+ extra[:filename] = filename
167
+ rescue Exception
168
+ FileUtils.rm persistence_file if persistence_file and File.exists?(persistence_file)
169
+ raise $!
170
+ end
146
171
 
147
172
  [data, extra]
148
173
  end
149
174
  end
150
175
  end
151
176
 
152
- if not extra.nil?
177
+ if not extra.nil?
153
178
  %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
154
179
  if extra.include? key.to_sym
155
180
  self.send("#{key}=".to_sym, extra[key.to_sym])
156
- if @data.respond_to? "#{key}=".to_sym
157
- @data.send("#{key}=".to_sym, extra[key.to_sym])
158
- end
181
+ #if @data.respond_to? "#{key}=".to_sym
182
+ # @data.send("#{key}=".to_sym, extra[key.to_sym])
183
+ #end
159
184
  end
160
185
  end
186
+ @data.read if Persistence::TSV === @data
161
187
  end
162
188
  end
163
189
 
@@ -85,6 +85,13 @@ class TSV
85
85
  fields.select{|f| f.namespace.nil? or f.namespace == namespace}
86
86
  end
87
87
 
88
+ def key_field
89
+ return nil if @key_field.nil?
90
+ k = @key_field.dup
91
+ k.extend Field
92
+ k
93
+ end
94
+
88
95
  def fields
89
96
  return nil if @fields.nil?
90
97
  fds = @fields
@@ -113,7 +120,11 @@ class TSV
113
120
 
114
121
  def self.identify_field(key, fields, field)
115
122
  return field if Integer === field
116
- return :key if field.nil? or field == 0 or field.to_sym == :key or key == field
123
+ if String === field
124
+ field = field.dup
125
+ field.extend Field
126
+ end
127
+ return :key if field.nil? or field == 0 or field.to_sym == :key or field == key
117
128
  return nil if fields.nil?
118
129
  return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
119
130
  return fields.index field
@@ -136,7 +147,7 @@ class TSV
136
147
  end
137
148
  end if Array === new_fields
138
149
  @fields = new_fields
139
- @data.fields = new_fields if @data.respond_to? :fields=
150
+ @data.fields = new_fields if @data.respond_to? :fields= and @data.write?
140
151
  end
141
152
 
142
153
  def old_fields=(new_fields)
@@ -196,6 +207,10 @@ class TSV
196
207
  follow @data[key]
197
208
  end
198
209
 
210
+ def delete(key)
211
+ @data.delete(key)
212
+ end
213
+
199
214
  def values_at(*keys)
200
215
  keys.collect{|k|
201
216
  self[k]
@@ -258,6 +273,10 @@ class TSV
258
273
  keys = nil
259
274
  end
260
275
 
276
+ if keys == :sort
277
+ keys = self.keys.sort
278
+ end
279
+
261
280
  str = ""
262
281
 
263
282
  str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n" unless no_options
@@ -279,4 +298,10 @@ class TSV
279
298
 
280
299
  str
281
300
  end
301
+
302
+ def value_peek
303
+ peek = {}
304
+ keys[0..10].zip(values[0..10]).each do |k,v| peek[k] = v end
305
+ peek
306
+ end
282
307
  end
@@ -47,6 +47,8 @@ class TSV
47
47
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
48
48
  when (String === file1 or StringIO === file1)
49
49
  file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
50
+ when TSV === file1
51
+ file1 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
50
52
  end
51
53
 
52
54
  case
@@ -54,6 +56,8 @@ class TSV
54
56
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
55
57
  when (String === file2 or StringIO === file2)
56
58
  file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
59
+ when TSV === file2
60
+ file2 = CMD.cmd("sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
57
61
  end
58
62
 
59
63
  output = File.open(output, 'w') if String === output
@@ -115,110 +119,6 @@ class TSV
115
119
 
116
120
  output.close
117
121
  end
118
-
119
- def self.paste(file1, file2, output, sep = "\t")
120
- case
121
- when (String === file1 and not file1.index("\n") and file1.length < 250 and File.exists?(file1))
122
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } ", :pipe => true)
123
- when String === file1
124
- file1 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file1, :pipe => true)
125
- end
126
-
127
- case
128
- when (String === file2 and not file2.index("\n") and file2.length < 250 and File.exists?(file2))
129
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file2 } ", :pipe => true)
130
- when String === file2
131
- file2 = CMD.cmd("sort -k1,1 -t'#{sep}'", :in => file2, :pipe => true)
132
- end
133
-
134
- output = File.open(output, 'w') if String === output
135
-
136
- cols1 = nil
137
- cols2 = nil
138
-
139
- done1 = false
140
- done2 = false
141
-
142
- while (line1 = file1.gets) =~ /#/; end
143
- line1.strip!
144
- parts1 = line1.split(sep)
145
- key1 = parts1.shift
146
- cols1 = parts1.length
147
-
148
- while (line2 = file2.gets) =~ /#/; end
149
- line2.strip!
150
- parts2 = line2.split(sep)
151
- key2 = parts2.shift
152
- cols2 = parts2.length
153
- while not (done1 or done2)
154
- case
155
- when key1 < key2
156
- output.puts [key1, parts1, [""] * cols2] * sep
157
- if file1.eof?
158
- done1 = true
159
- else
160
- line1 = file1.gets
161
- line1.strip!
162
- parts1 = line1.split(sep)
163
- key1 = parts1.shift
164
- end
165
- when key2 < key1
166
- output.puts [key2, [""] * cols1, parts2] * sep
167
- if file2.eof?
168
- done2 = true
169
- else
170
- line2 = file2.gets
171
- line2.strip!
172
- parts2 = line2.split(sep)
173
- key2 = parts2.shift
174
- end
175
- when key1 == key2
176
- output.puts [key1, parts1, parts2] * sep
177
- if file1.eof?
178
- done1 = true
179
- else
180
- line1 = file1.gets
181
- line1.strip!
182
- parts1 = line1.split(sep)
183
- key1 = parts1.shift
184
- end
185
- if file2.eof?
186
- done2 = true
187
- else
188
- line2 = file2.gets
189
- line2.strip!
190
- parts2 = line2.split(sep)
191
- key2 = parts2.shift
192
- end
193
- end
194
- end
195
-
196
- while not done1
197
- output.puts [key1, parts1, [""] * cols2] * sep
198
- if file1.eof?
199
- done1 = true
200
- else
201
- line1 = file1.gets
202
- line1.strip!
203
- parts1 = line1.split(sep)
204
- key1 = parts1.shift
205
- end
206
- end
207
-
208
- while not done2
209
- output.puts [key2, [""] * cols1, parts2] * sep
210
- if file2.eof?
211
- done2 = true
212
- else
213
- line2 = file2.gets
214
- line2.strip!
215
- parts2 = line2.split(sep)
216
- key2 = parts2.shift
217
- end
218
- end
219
-
220
- output.close
221
- end
222
122
  #{{{ Attach Methods
223
123
 
224
124
  def attach_same_key(other, fields = nil)
@@ -296,6 +196,8 @@ class TSV
296
196
  field_positions = fields.collect{|field| other.identify_field field}
297
197
  field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
298
198
 
199
+
200
+ length = self.fields.length
299
201
  through do |key, values|
300
202
  source_keys = index[key]
301
203
  if source_keys.nil? or source_keys.empty?
@@ -315,7 +217,7 @@ class TSV
315
217
  other[source_key][pos]
316
218
  end
317
219
  end
318
- new_values.collect!{|v| [v]} if type == :double and not other.type == :double
220
+ new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
319
221
  new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
320
222
  all_new_values << new_values
321
223
  end
@@ -323,17 +225,28 @@ class TSV
323
225
 
324
226
  if all_new_values.empty?
325
227
  if type == :double
326
- self[key] = self[key].concat [[]] * field_positions.length
228
+ all_new_values = [[[]] * field_positions.length]
327
229
  else
328
- self[key] = self[key].concat [""] * field_positions.length
230
+ all_new_values = [[""] * field_positions.length]
329
231
  end
232
+ end
233
+
234
+ current = self[key]
235
+
236
+ if current.length > length
237
+ all_new_values << current.slice!(length..current.length - 1)
238
+ end
239
+
240
+ if type == :double
241
+ all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
330
242
  else
331
- if type == :double
332
- self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
333
- else
334
- self[key] = self[key].concat all_new_values.first
335
- end
243
+ all_new_values = all_new_values.first
336
244
  end
245
+
246
+ current += all_new_values
247
+
248
+ self[key] = current
249
+
337
250
  end
338
251
 
339
252
  self.fields = self.fields.concat field_names
@@ -385,13 +298,13 @@ class TSV
385
298
  Log.medium "Found Traversal: #{traversal_ids * " => "}"
386
299
 
387
300
  data_key, data_file = path.shift
388
- if data_key == data_file.key_field
389
- Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
390
- data_index = nil
391
- else
392
- Log.debug "Data index required"
393
- data_index = data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
394
- end
301
+ data_index = if data_key == data_file.key_field
302
+ Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
303
+ nil
304
+ else
305
+ Log.debug "Data index required"
306
+ data_file.index :target => data_key, :fields => data_file.key_field, :persistence => false
307
+ end
395
308
 
396
309
  current_index = data_index
397
310
  current_key = data_key
@@ -402,7 +315,7 @@ class TSV
402
315
  current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
403
316
  else
404
317
  next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
405
- current_index.process current_index.fields.first do |key, values, values|
318
+ current_index.process current_index.fields.first do |values|
406
319
  if values.nil?
407
320
  nil
408
321
  else
@@ -411,6 +324,7 @@ class TSV
411
324
  end
412
325
  current_index.fields = [next_key]
413
326
  end
327
+ current_key = next_key
414
328
  end
415
329
 
416
330
  current_index
@@ -470,6 +384,8 @@ class TSV
470
384
  attach_index other, index, fields
471
385
  end
472
386
  Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
387
+
388
+ self
473
389
  end
474
390
 
475
391
  def detach(file)
@@ -489,10 +405,21 @@ class TSV
489
405
  if self.fields and other.fields
490
406
  new.fields = self.fields + other.fields
491
407
  end
492
-
408
+
493
409
  FileUtils.rm tmpfile if File.exists? tmpfile
494
410
 
495
411
  new
496
412
  end
497
413
 
414
+
415
+ def paste(other, options = {})
416
+ TmpFile.with_file do |output|
417
+ TSV.paste_merge(self, other, output, options[:sep] || "\t")
418
+ TSV.new output, options
419
+ end
420
+ end
421
+
422
+ def self.fast_paste(files, delim = "$")
423
+ CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
424
+ end
498
425
  end
@@ -34,6 +34,7 @@ class TSV
34
34
  list = [list] unless Array === list
35
35
  i += 1 if fields.nil?
36
36
  list.each do |elem|
37
+ next if elem.empty?
37
38
  elem.downcase if case_insensitive
38
39
  new[elem] ||= []
39
40
  new[elem][i] ||= []
@@ -73,6 +74,7 @@ class TSV
73
74
  end
74
75
  list.collect!{|e| e.downcase} if case_insensitive
75
76
  list.each do |elem|
77
+ next if elem.empty?
76
78
  new[elem] ||= []
77
79
  if double_keys
78
80
  new[elem].concat key
@@ -233,6 +235,8 @@ class TSV
233
235
 
234
236
  def self.field_matches(tsv, values)
235
237
  values = [values] if not Array === values
238
+ Log.debug "Matcing #{values.length} values to #{tsv.filename}"
239
+
236
240
  if values.flatten.sort[0..9].compact.collect{|n| n.to_i} == (1..10).to_a
237
241
  return {}
238
242
  end