rbbt-util 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rbbt/util/tsv.rb CHANGED
@@ -119,23 +119,34 @@ class TSV
119
119
  @data = Hash[file.collect{|v|
120
120
  [v,[]]
121
121
  }]
122
- @data.key_field = key_field if key_field
123
- @data.fields = fields if fields
122
+ self.key_field = options[:key]
123
+ self.fields = options[:fields]
124
+ self.type = options[:type] || :double
124
125
  when Hash === file
125
126
  @data = file
126
- @data.key_field = key_field if key_field
127
- @data.fields = fields if fields
127
+ self.key_field = options[:key]
128
+ self.fields = options[:fields]
129
+ self.type = options[:type] || :double
128
130
  when TSV === file
129
131
  @data = file.data
130
- @data.key_field = key_field if key_field
131
- @data.fields = fields if fields
132
+ self.key_field = options[:key] || file.key_field
133
+ self.fields = options[:fields] || file.fields
134
+ self.type = options[:type] || file.type
132
135
  when Persistence::TSV === file
136
+ Log.debug("Reopening persistence file #{ file.path_to_db }")
133
137
  @data = file
134
138
  %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
135
139
  if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
136
140
  self.send "#{key}=".to_sym, @data.send(key.to_sym)
137
141
  end
138
142
  end
143
+ self.key_field = options[:key] || file.key_field
144
+ self.fields = options[:fields] || file.fields
145
+ self.type = options[:type] || file.type
146
+
147
+ file.key_field = self.key_field
148
+ file.fields = self.fields
149
+ file.type = self.type
139
150
  else
140
151
  in_situ_persistence = Misc.process_options(options, :in_situ_persistence)
141
152
  @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename, persistence_file|
@@ -178,7 +189,7 @@ class TSV
178
189
  end
179
190
 
180
191
  if Persistence::TSV === data
181
- %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
192
+ %w(case_insensitive namespace identifiers fields key_field type filename cast).each do |key|
182
193
  if extra.include? key.to_sym
183
194
  if data.respond_to? "#{key}=".to_sym
184
195
  data.send("#{key}=".to_sym, extra[key.to_sym])
@@ -194,7 +205,7 @@ class TSV
194
205
  end
195
206
 
196
207
  if not extra.nil?
197
- %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
208
+ %w(case_insensitive namespace identifiers fields key_field type filename cast).each do |key|
198
209
  if extra.include? key.to_sym
199
210
  self.send("#{key}=".to_sym, extra[key.to_sym])
200
211
  #if @data.respond_to? "#{key}=".to_sym
@@ -217,5 +228,9 @@ class TSV
217
228
  @data.write? if @data.respond_to? :write
218
229
  end
219
230
 
231
+ def clear
232
+ @data.clear
233
+ end
234
+
220
235
 
221
236
  end
@@ -3,8 +3,8 @@ require 'rbbt/util/misc'
3
3
 
4
4
  class TSV
5
5
  ## Make sure we overwrite the methods declared by attr_accessor
6
- MAIN_ACCESSORS = :data, :key_field, :fields, :cast
7
- EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :datadir, :type, :case_insensitive
6
+ MAIN_ACCESSORS = :data, :key_field, :fields, :cast
7
+ EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :type, :case_insensitive
8
8
  attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
9
9
 
10
10
  def self.zip_fields(list, fields = nil)
@@ -33,7 +33,7 @@ class TSV
33
33
 
34
34
  def fullname
35
35
  return self if self =~ /:/ or namespace.nil?
36
- namespace + ":" << self
36
+ namespace.to_s + ":" << self
37
37
  end
38
38
 
39
39
  def ==(string)
@@ -143,8 +143,8 @@ class TSV
143
143
  def fields=(new_fields)
144
144
  new_fields.collect! do |field|
145
145
  if Field === field
146
- if field !~ /:/ and field.namespace != nil and field.namespace != namespace
147
- field.namespace + ":" + field.to_s
146
+ if field !~ /:/ and field.namespace != nil and field.namespace.to_s != namespace.to_s
147
+ field.namespace.to_s + ":" + field.to_s
148
148
  else
149
149
  field
150
150
  end
@@ -286,6 +286,8 @@ class TSV
286
286
  str << "#" << key_field << "\t" << fields * "\t" << "\n"
287
287
  end
288
288
 
289
+ saved_unnamed = unnamed
290
+ unnamed = false
289
291
  if keys.nil?
290
292
  each do |key, values|
291
293
  key = key.to_s if Symbol === key
@@ -298,6 +300,7 @@ class TSV
298
300
  end
299
301
  end
300
302
 
303
+ unnamed = saved_unnamed
301
304
  str
302
305
  end
303
306
 
@@ -1,11 +1,27 @@
1
1
  require 'rbbt/util/misc'
2
2
  module Filtered
3
3
 
4
+ class FilterArray
5
+ attr_accessor :filters
6
+
7
+ def ids
8
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.merge_sorted_arrays(list, filter.ids.dup)}
9
+ end
10
+
11
+ def method_missing(name, *args)
12
+ filters.each do |filter|
13
+ filter.send(name, *args)
14
+ end
15
+ end
16
+ end
17
+
4
18
  class Filter
5
19
  attr_accessor :data, :match, :fieldnum, :value, :list, :unsaved
6
20
  attr_accessor :persistence
21
+
7
22
  def initialize(data, match, value, persistence = nil)
8
23
  @data = data
24
+ @match = match
9
25
  @value = value
10
26
  @unsaved = []
11
27
 
@@ -19,11 +35,15 @@ module Filtered
19
35
 
20
36
  @list = nil
21
37
  case
22
- when match.match(/field:(.*)/)
23
- field_num = data.identify_field $1
24
- Misc.add_method(self, :match) do |entry|
25
- entry[field_num] == value
26
- end
38
+ when @match.match(/field:(.*)/)
39
+ @fieldnum = data.identify_field $1
40
+ class << self
41
+ self
42
+ end.class_eval <<-EOC
43
+ def match_entry(entry)
44
+ entry[@fieldnum] == @value
45
+ end
46
+ EOC
27
47
  end
28
48
  end
29
49
 
@@ -53,7 +73,7 @@ module Filtered
53
73
  def update
54
74
  ids = []
55
75
  data.unfiltered_each do |key, entry|
56
- ids << key if match(entry)
76
+ ids << key if match_entry(entry)
57
77
  end
58
78
  save(ids.sort)
59
79
  end
@@ -88,7 +108,20 @@ module Filtered
88
108
  unsaved.push id
89
109
  end
90
110
 
111
+ def clean
112
+ add_unsaved
113
+ if persistence and persistence.include? self.key
114
+ restore = ! persistence.write?
115
+ persistence.write unless persistence.write?
116
+ persistence.delete self.key
117
+ persistence.read if restore
118
+ else
119
+ @list = nil
120
+ end
121
+ end
122
+
91
123
  def reset
124
+ add_unsaved
92
125
  if persistence
93
126
  persistence.clear
94
127
  else
@@ -107,7 +140,7 @@ module Filtered
107
140
  self.send(:unfiltered_set, key, value)
108
141
  else
109
142
  filters.each do |filter|
110
- filter.add key if filter.match value
143
+ filter.add key if filter.match_entry value
111
144
  end
112
145
  self.send(:unfiltered_set, key, value)
113
146
  end
@@ -117,7 +150,7 @@ module Filtered
117
150
  if filters.empty?
118
151
  self.send(:unfiltered_keys)
119
152
  else
120
- filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
153
+ filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
121
154
  end
122
155
  end
123
156
 
@@ -125,7 +158,7 @@ module Filtered
125
158
  if filters.empty?
126
159
  self.send(:unfiltered_values)
127
160
  else
128
- ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
161
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
129
162
  self.send :values_at, *ids
130
163
  end
131
164
  end
@@ -134,7 +167,7 @@ module Filtered
134
167
  if filters.empty?
135
168
  self.send(:unfiltered_each, &block)
136
169
  else
137
- ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
170
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
138
171
  new = self.dup
139
172
  new.data = {}
140
173
 
@@ -150,7 +183,8 @@ module Filtered
150
183
  if filters.empty?
151
184
  self.send(:unfiltered_collect, &block)
152
185
  else
153
- ids = filters.inject(nil){|list,filter| list = (list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids))}
186
+ ids = filters.inject(nil){|list,filter| list = (list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup))}
187
+
154
188
  new = self.dup
155
189
  new.data = {}
156
190
  ids.zip(self.send(:values_at, *ids)).each do |id, values|
@@ -159,25 +193,28 @@ module Filtered
159
193
  new.send :collect, &block
160
194
  end
161
195
  end
162
- end
163
196
 
164
- def filter_name(match, value)
165
- @filename + "&F[#{match}=#{value}]"
197
+ Misc.redefine_method base, :delete, :unfiltered_delete do |key|
198
+ if filters.empty?
199
+ self.send(:unfiltered_delete, key)
200
+ else
201
+ reset_filters
202
+ self.send :unfiltered_delete, key
203
+ end
204
+ end
166
205
  end
167
206
 
168
207
  def add_filter(match, value, persistence = nil)
169
208
  if persistence.nil? and filter_dir
170
- persistence = File.join(filter_dir, match.to_s)
209
+ persistence = File.join(filter_dir, match.to_s + '.filter')
171
210
  end
172
211
 
173
- @filename = filter_name(match, value) if @filename
174
-
175
- filters.push Filter.new self, match, value, persistence
212
+ filter = Filter.new self, match, value, persistence
213
+ filters.push filter
176
214
  end
177
215
 
178
216
  def pop_filter
179
- @filename = @filename.sub(/&F\[[^\]]*\]$/, '') if @filename
180
- filters.pop
217
+ filters.pop.add_unsaved
181
218
  end
182
219
 
183
220
  end
@@ -189,5 +226,16 @@ class TSV
189
226
  self.filters = []
190
227
  self
191
228
  end
229
+
230
+ def reset_filters
231
+ if filter_dir.nil? or filter_dir.empty?
232
+ filters.each do |filter| filter.reset end
233
+ return
234
+ end
235
+
236
+ Dir.glob(File.join(filter_dir, '*.filter')).each do |f|
237
+ FileUtils.rm f
238
+ end
239
+ end
192
240
  end
193
241
 
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/util/tsv/manipulate'
2
+ require 'rbbt/util/tsv/filters'
2
3
  require 'rbbt/util/fix_width_table'
3
4
 
4
5
  class TSV
@@ -281,116 +282,138 @@ class TSV
281
282
  field_matches(values).sort_by{|field, matches| matches.uniq.length}.last
282
283
  end
283
284
 
284
- def sorted_index(pos_start = nil, pos_end = nil)
285
- raise "Please specify indexing fields" if (pos_start.nil? and fields.length > 2)
285
+ def pos_index(pos_field = nil, options = {})
286
+ pos_field ||= "Position"
286
287
 
287
- case
288
- when (pos_start.nil? and pos_end.nil? and fields.length == 2)
289
- pos_start = fields.first
290
- pos_end = fields.last
291
- when (pos_start.nil? and pos_end.nil? and fields.length == 1)
292
- pos_start = fields.first
293
- end
288
+ options = Misc.add_defaults options,
289
+ :persistence => true, :persistence_file => nil, :persistence_update => false
294
290
 
295
- range = ! pos_end.nil?
296
-
297
- index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
298
- pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
299
- data = case
300
- when (type == :double and range)
301
- collect do |key, values|
302
- p_start, p_end = values.values_at pos_start, pos_end
303
- next if p_start.nil? or p_end.nil? or p_start.empty? or p_end.empty?
304
- [[p_start.first, p_end.first], key]
305
- end
306
- when (type == :double and not range)
307
- collect do |key, values|
308
- p_start = values.values_at pos_start
309
- next if p_start.nil? or p_start.empty?
310
- [p_start.first, key]
311
- end
312
- when range
313
- slice [pos_start, pos_end]
314
- else
315
- slice pos_start
316
- end
317
- data
291
+ prefix = "Pos[#{pos_field}]"
292
+
293
+ Persistence.persist(filename, prefix, :fwt, options.merge({
294
+ :pos_field => pos_field,
295
+ :filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
296
+ })) do |file, options, filename|
297
+ pos_field = options[:pos_field]
298
+ value_size = 0
299
+ index_data = []
300
+
301
+ through :key, pos_field do |key, values|
302
+ value_size = key.length if key.length > value_size
303
+
304
+ pos = values.first
305
+ if Array === pos
306
+ pos.each do |p|
307
+ index_data << [key, p.to_i]
308
+ end
309
+ else
310
+ index_data << [key, pos.to_i]
311
+ end
312
+ end
313
+
314
+ index = FixWidthTable.get(:memory, value_size, false)
315
+ index.add_point index_data
316
+ index.read
317
+ index
318
318
  end
319
+ end
320
+
321
+ def self.pos_index(file, pos_field = nil, options = {})
322
+ options = Misc.add_defaults options,
323
+ :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
324
+ :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
325
+
326
+ options_data = {
327
+ :persistence => Misc.process_options(options, :data_persistence),
328
+ :persistence_file => Misc.process_options(options, :data_persistence_file),
329
+ :persistence_update => Misc.process_options(options, :data_persistence_update),
330
+ :persistence_source => Misc.process_options(options, :data_persistence_source),
331
+ }
332
+
333
+
334
+ prefix = "Pos[#{pos_field}]"
335
+
336
+ new = Persistence.persist(file, prefix, :fwt, options.merge({:pos_field => pos_field})) do |file, options, filename|
337
+ tsv = TSV.new(file, :list, options_data)
338
+
339
+ if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
340
+ tsv.filter
341
+ options[:filters].each do |match, value, persistence|
342
+ tsv.add_filter(match, value, persistence)
343
+ end
344
+ end
319
345
 
320
- index
346
+ tsv.pos_index options[:pos_field], options.merge(:persistence => false, :persistence_file => nil)
347
+ end
321
348
  end
322
349
 
323
- def pos_index(pos_field, file = nil, update = false)
324
- value_size = 0
325
- index_data = []
350
+ def range_index(start_field = nil, end_field = nil, options = {})
351
+ start_field ||= "Start"
352
+ end_field ||= "End"
353
+ options = Misc.add_defaults options,
354
+ :persistence => true, :persistence_file => nil, :persistence_update => false
355
+
356
+ prefix = "Range[#{start_field}-#{end_field}]"
326
357
 
327
- file ||= filename + "-PosIndex[#{ pos_field }]" if filename
328
-
329
- through :key, pos_field do |key, values|
330
- value_size = key.length if key.length > value_size
358
+ Persistence.persist(filename, prefix, :fwt, options.merge({
359
+ :start_field => start_field, :end_field => end_field,
360
+ :filters => (self.respond_to?(:filters)? filters.collect{|f| [f.match, f.value]} : [])
361
+ })) do |file, options, filename|
362
+ start_field, end_field = options.values_at :start_field, :end_field
331
363
 
332
- pos = values.first
333
- if Array === pos
334
- pos.each do |p|
335
- index_data << [key, p.to_i]
364
+ value_size = 0
365
+ index_data = []
366
+
367
+ through :key, [start_field, end_field] do |key, values|
368
+ value_size = key.length if key.length > value_size
369
+
370
+ start_pos, end_pos = values
371
+
372
+ if Array === start_pos
373
+ start_pos.zip(end_pos).each do |s,e|
374
+ index_data << [key, [s.to_i, e.to_i]]
375
+ end
376
+ else
377
+ index_data << [key, [start_pos.to_i, end_pos.to_i]]
336
378
  end
337
- else
338
- index_data << [key, pos.to_i]
339
379
  end
380
+
381
+ index = FixWidthTable.get(:memory, value_size, true)
382
+ index.add_range index_data
383
+ index.read
384
+ index
340
385
  end
341
-
342
- pos_index = case
343
- when file == :memory
344
- index = FixWidthTable.new(file, value_size, false)
345
- index.add_point index_data
346
- index
347
- when (update or not File.exists? file)
348
- index = FixWidthTable.new(file, value_size, false, true)
349
- index.add_point index_data
350
- index
351
- else
352
- FixWidthTable.new(file, value_size, false)
353
- end
354
-
355
- pos_index
356
386
  end
357
387
 
358
- def range_index(start_field, end_field, file = nil, update = false)
359
- value_size = 0
360
- index_data = []
388
+ def self.range_index(file, start_field = nil, end_field = nil, options = {})
389
+ options = Misc.add_defaults options,
390
+ :persistence => true, :persistence_file => nil, :persistence_update => false, :persistence_source => file, :tsv_serializer => :list,
391
+ :data_persistence => false, :data_persistence_file => nil, :data_persistence_update => false, :data_persistence_source => file
361
392
 
362
- file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename
363
-
364
- through :key, [start_field, end_field] do |key, values|
365
- value_size = key.length if key.length > value_size
393
+ options_data = {
394
+ :persistence => Misc.process_options(options, :data_persistence),
395
+ :persistence_file => Misc.process_options(options, :data_persistence_file),
396
+ :persistence_update => Misc.process_options(options, :data_persistence_update),
397
+ :persistence_source => Misc.process_options(options, :data_persistence_source),
398
+ }
366
399
 
367
- start_pos, end_pos = values
368
-
369
- if Array === start_pos
370
- start_pos.zip(end_pos).each do |s,e|
371
- index_data << [key, [s.to_i, e.to_i]]
400
+ prefix = "Range[#{start_field}-#{end_field}]"
401
+
402
+ options_data[:type] = :flat if options[:order] == false
403
+
404
+ Persistence.persist(file, prefix, :fwt, options.merge({:start_field => start_field, :end_field => end_field})) do |file, options, filename|
405
+ tsv = TSV.new(file, :list, options_data)
406
+
407
+ if options.include?(:filters) and Array === options[:filters] and not options[:filters].empty?
408
+ tsv.filter
409
+ options[:filters].each do |match, value, persistence|
410
+ tsv.add_filter(match, value, persistence)
372
411
  end
373
- else
374
- index_data << [key, [start_pos.to_i, end_pos.to_i]]
375
412
  end
413
+
414
+ tsv.range_index options[:start_field], options[:end_field], options.merge(:persistence => false, :persistence_file => nil)
376
415
  end
377
-
378
- pos_index = case
379
- when file == :memory
380
- index = FixWidthTable.get(file, value_size, true)
381
- index.add_range index_data
382
- index.read
383
- index
384
- when (update or not File.exists?(file))
385
- index = FixWidthTable.get(file, value_size, true, true)
386
- index.add_range index_data
387
- index.read
388
- index
389
- else
390
- FixWidthTable.get(file, value_size, true)
391
- end
392
-
393
- pos_index
394
416
  end
395
417
 
396
418
  end
419
+