rbbt-util 3.1.0 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/rbbt_query.rb +1 -1
- data/lib/rbbt/util/cmd.rb +115 -67
- data/lib/rbbt/util/fix_width_table.rb +18 -3
- data/lib/rbbt/util/misc.rb +106 -6
- data/lib/rbbt/util/open.rb +9 -7
- data/lib/rbbt/util/persistence.rb +17 -14
- data/lib/rbbt/util/resource.rb +10 -3
- data/lib/rbbt/util/task.rb +2 -2
- data/lib/rbbt/util/task/job.rb +16 -3
- data/lib/rbbt/util/tc_hash.rb +64 -27
- data/lib/rbbt/util/tsv.rb +44 -21
- data/lib/rbbt/util/tsv/accessor.rb +8 -6
- data/lib/rbbt/util/tsv/attach.rb +19 -28
- data/lib/rbbt/util/tsv/filters.rb +193 -0
- data/lib/rbbt/util/tsv/index.rb +80 -8
- data/lib/rbbt/util/tsv/manipulate.rb +17 -6
- data/lib/rbbt/util/tsv/misc.rb +10 -0
- data/lib/rbbt/util/tsv/parse.rb +18 -1
- data/lib/rbbt/util/workflow.rb +12 -3
- data/lib/rbbt/util/workflow/soap.rb +0 -1
- data/share/install/software/lib/install_helpers +0 -2
- data/share/lib/R/util.R +3 -3
- data/test/rbbt/util/test_cmd.rb +23 -0
- data/test/rbbt/util/test_excel2tsv.rb +1 -1
- data/test/rbbt/util/test_misc.rb +41 -11
- data/test/rbbt/util/test_open.rb +2 -2
- data/test/rbbt/util/test_persistence.rb +2 -2
- data/test/rbbt/util/test_resource.rb +4 -20
- data/test/rbbt/util/test_tc_hash.rb +38 -0
- data/test/rbbt/util/test_tmpfile.rb +1 -1
- data/test/rbbt/util/test_tsv.rb +6 -0
- data/test/rbbt/util/test_workflow.rb +14 -10
- data/test/rbbt/util/tsv/test_accessor.rb +42 -0
- data/test/rbbt/util/tsv/test_filters.rb +141 -0
- data/test/rbbt/util/tsv/test_index.rb +32 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
- data/test/test_helper.rb +3 -1
- metadata +41 -38
@@ -89,6 +89,7 @@ class TSV
|
|
89
89
|
return nil if @key_field.nil?
|
90
90
|
k = @key_field.dup
|
91
91
|
k.extend Field
|
92
|
+
k.namespace = namespace unless namespace.nil?
|
92
93
|
k
|
93
94
|
end
|
94
95
|
|
@@ -134,6 +135,11 @@ class TSV
|
|
134
135
|
TSV.identify_field(key_field, fields, field)
|
135
136
|
end
|
136
137
|
|
138
|
+
def key_field=(new_key_field)
|
139
|
+
@key_field = new_key_field
|
140
|
+
@data.key_field = new_key_field if @data.respond_to? :key_field= and @data.write?
|
141
|
+
end
|
142
|
+
|
137
143
|
def fields=(new_fields)
|
138
144
|
new_fields.collect! do |field|
|
139
145
|
if Field === field
|
@@ -150,11 +156,6 @@ class TSV
|
|
150
156
|
@data.fields = new_fields if @data.respond_to? :fields= and @data.write?
|
151
157
|
end
|
152
158
|
|
153
|
-
def old_fields=(new_fields)
|
154
|
-
@fields = new_fields
|
155
|
-
@data.fields = new_fields if @data.respond_to? :fields=
|
156
|
-
end
|
157
|
-
|
158
159
|
def keys
|
159
160
|
@data.keys
|
160
161
|
end
|
@@ -183,13 +184,14 @@ class TSV
|
|
183
184
|
|
184
185
|
# Read
|
185
186
|
|
187
|
+
attr_accessor :unnamed
|
186
188
|
def follow(value)
|
187
189
|
return nil if value.nil?
|
188
190
|
if String === value && value =~ /__Ref:(.*)/
|
189
191
|
return self[$1]
|
190
192
|
else
|
191
193
|
|
192
|
-
if Array === value and
|
194
|
+
if Array === value and not unnamed
|
193
195
|
value = NamedArray.name value, fields
|
194
196
|
end
|
195
197
|
value
|
data/lib/rbbt/util/tsv/attach.rb
CHANGED
@@ -128,7 +128,7 @@ class TSV
|
|
128
128
|
if other.include? key
|
129
129
|
new_values = other[key].values_at *fields
|
130
130
|
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
131
|
-
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
131
|
+
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
132
132
|
self[key] = self[key].concat new_values
|
133
133
|
else
|
134
134
|
if type == :double
|
@@ -151,6 +151,7 @@ class TSV
|
|
151
151
|
|
152
152
|
through do |key, values|
|
153
153
|
source_keys = values[source]
|
154
|
+
source_keys = [source_keys] unless Array === source_keys
|
154
155
|
if source_keys.nil? or source_keys.empty?
|
155
156
|
all_new_values = []
|
156
157
|
else
|
@@ -165,8 +166,8 @@ class TSV
|
|
165
166
|
end
|
166
167
|
end
|
167
168
|
|
168
|
-
new_values.collect!{|v| [v]}
|
169
|
-
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
169
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
170
|
+
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
170
171
|
all_new_values << new_values
|
171
172
|
end
|
172
173
|
end
|
@@ -191,12 +192,12 @@ class TSV
|
|
191
192
|
|
192
193
|
def attach_index(other, index, fields = nil)
|
193
194
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
195
|
+
fields = [fields] unless Array === fields
|
194
196
|
|
195
197
|
other = other.tsv unless TSV === other
|
196
198
|
field_positions = fields.collect{|field| other.identify_field field}
|
197
199
|
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
198
200
|
|
199
|
-
|
200
201
|
length = self.fields.length
|
201
202
|
through do |key, values|
|
202
203
|
source_keys = index[key]
|
@@ -218,7 +219,7 @@ class TSV
|
|
218
219
|
end
|
219
220
|
end
|
220
221
|
new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
|
221
|
-
new_values.collect!{|v| v.nil? ? nil : v.first} if not type == :double and other.type == :double
|
222
|
+
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
222
223
|
all_new_values << new_values
|
223
224
|
end
|
224
225
|
end
|
@@ -227,7 +228,7 @@ class TSV
|
|
227
228
|
if type == :double
|
228
229
|
all_new_values = [[[]] * field_positions.length]
|
229
230
|
else
|
230
|
-
all_new_values =
|
231
|
+
all_new_values = [[""] * field_positions.length]
|
231
232
|
end
|
232
233
|
end
|
233
234
|
|
@@ -260,7 +261,11 @@ class TSV
|
|
260
261
|
in_namespace = options[:in_namespace]
|
261
262
|
|
262
263
|
if in_namespace
|
263
|
-
|
264
|
+
if files.first.all_fields.include? in_namespace
|
265
|
+
ids = [[in_namespace]]
|
266
|
+
else
|
267
|
+
ids = [files.first.all_namespace_fields(in_namespace)]
|
268
|
+
end
|
264
269
|
ids += files[1..-1].collect{|f| f.all_fields}
|
265
270
|
else
|
266
271
|
ids = files.collect{|f| f.all_fields}
|
@@ -276,7 +281,7 @@ class TSV
|
|
276
281
|
id_list << match.first
|
277
282
|
end
|
278
283
|
|
279
|
-
if id_list.last
|
284
|
+
if id_list.last != files.last.all_fields.first
|
280
285
|
id_list << files.last.all_fields.first
|
281
286
|
id_list.zip(files)
|
282
287
|
else
|
@@ -312,14 +317,14 @@ class TSV
|
|
312
317
|
next_key, next_file = path.shift
|
313
318
|
|
314
319
|
if current_index.nil?
|
315
|
-
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
320
|
+
current_index = next_file.index :target => next_key, :fields => current_key, :persistence => (persist_input and path.empty?)
|
316
321
|
else
|
317
322
|
next_index = next_file.index :target => next_key, :fields => current_key, :persistence => persist_input
|
318
323
|
current_index.process current_index.fields.first do |values|
|
319
324
|
if values.nil?
|
320
325
|
nil
|
321
326
|
else
|
322
|
-
next_index.values_at(*values).flatten.collect
|
327
|
+
next_index.values_at(*values).flatten.collect.to_a
|
323
328
|
end
|
324
329
|
end
|
325
330
|
current_index.fields = [next_key]
|
@@ -395,27 +400,13 @@ class TSV
|
|
395
400
|
reorder :key, detached_fields
|
396
401
|
end
|
397
402
|
|
398
|
-
def paste(other, options = {})
|
399
|
-
tmpfile = TmpFile.tmp_file
|
400
|
-
TSV.paste(self.to_s, other.to_s, tmpfile)
|
401
|
-
|
402
|
-
new = TSV.new(tmpfile, options)
|
403
|
-
|
404
|
-
new.key_field = self.key_field unless self.key_field.nil?
|
405
|
-
if self.fields and other.fields
|
406
|
-
new.fields = self.fields + other.fields
|
407
|
-
end
|
408
|
-
|
409
|
-
FileUtils.rm tmpfile if File.exists? tmpfile
|
410
|
-
|
411
|
-
new
|
412
|
-
end
|
413
|
-
|
414
|
-
|
415
403
|
def paste(other, options = {})
|
416
404
|
TmpFile.with_file do |output|
|
417
405
|
TSV.paste_merge(self, other, output, options[:sep] || "\t")
|
418
|
-
TSV.new output, options
|
406
|
+
tsv = TSV.new output, options
|
407
|
+
tsv.key_field = self.key_field unless self.key_field.nil?
|
408
|
+
tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
409
|
+
tsv
|
419
410
|
end
|
420
411
|
end
|
421
412
|
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
module Filtered
|
3
|
+
|
4
|
+
class Filter
|
5
|
+
attr_accessor :data, :match, :fieldnum, :value, :list, :unsaved
|
6
|
+
attr_accessor :persistence
|
7
|
+
def initialize(data, match, value, persistence = nil)
|
8
|
+
@data = data
|
9
|
+
@value = value
|
10
|
+
@unsaved = []
|
11
|
+
|
12
|
+
case
|
13
|
+
when Hash === persistence
|
14
|
+
@persistence = persistence
|
15
|
+
when String === persistence
|
16
|
+
@persistence = TSV.new TCHash.get(persistence)
|
17
|
+
@persistence.read
|
18
|
+
end
|
19
|
+
|
20
|
+
@list = nil
|
21
|
+
case
|
22
|
+
when match.match(/field:(.*)/)
|
23
|
+
field_num = data.identify_field $1
|
24
|
+
Misc.add_method(self, :match) do |entry|
|
25
|
+
entry[field_num] == value
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def key
|
31
|
+
case
|
32
|
+
when String === value
|
33
|
+
value
|
34
|
+
else
|
35
|
+
Marshal.dump(value)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def save(ids)
|
40
|
+
if persistence
|
41
|
+
persistence.write
|
42
|
+
persistence[self.key] = ids
|
43
|
+
persistence.read
|
44
|
+
else
|
45
|
+
if list.nil?
|
46
|
+
@list = ids
|
47
|
+
else
|
48
|
+
@list.replace ids
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def update
|
54
|
+
ids = []
|
55
|
+
data.unfiltered_each do |key, entry|
|
56
|
+
ids << key if match(entry)
|
57
|
+
end
|
58
|
+
save(ids.sort)
|
59
|
+
end
|
60
|
+
|
61
|
+
def saved
|
62
|
+
if persistence.nil?
|
63
|
+
return nil if list.nil?
|
64
|
+
list
|
65
|
+
else
|
66
|
+
return nil if not persistence.include?(self.key)
|
67
|
+
persistence[self.key]
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def add_unsaved
|
72
|
+
save(Misc.merge_sorted_arrays(unsaved.sort, saved || [])) if unsaved.any?
|
73
|
+
unsaved.clear
|
74
|
+
end
|
75
|
+
|
76
|
+
def ids
|
77
|
+
add_unsaved
|
78
|
+
|
79
|
+
list = saved
|
80
|
+
if list.nil?
|
81
|
+
update
|
82
|
+
list = saved
|
83
|
+
end
|
84
|
+
list
|
85
|
+
end
|
86
|
+
|
87
|
+
def add(id)
|
88
|
+
unsaved.push id
|
89
|
+
end
|
90
|
+
|
91
|
+
def reset
|
92
|
+
if persistence
|
93
|
+
persistence.clear
|
94
|
+
else
|
95
|
+
@list = nil
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.extended(base)
|
101
|
+
class << base
|
102
|
+
attr_accessor :filter_dir, :filters
|
103
|
+
end
|
104
|
+
|
105
|
+
Misc.redefine_method base, :[]=, :unfiltered_set do |key,value|
|
106
|
+
if filters.empty?
|
107
|
+
self.send(:unfiltered_set, key, value)
|
108
|
+
else
|
109
|
+
filters.each do |filter|
|
110
|
+
filter.add key if filter.match value
|
111
|
+
end
|
112
|
+
self.send(:unfiltered_set, key, value)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
Misc.redefine_method base, :keys, :unfiltered_keys do
|
117
|
+
if filters.empty?
|
118
|
+
self.send(:unfiltered_keys)
|
119
|
+
else
|
120
|
+
filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
Misc.redefine_method base, :values, :unfiltered_values do
|
125
|
+
if filters.empty?
|
126
|
+
self.send(:unfiltered_values)
|
127
|
+
else
|
128
|
+
ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
|
129
|
+
self.send :values_at, *ids
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
Misc.redefine_method base, :each, :unfiltered_each do |&block|
|
134
|
+
if filters.empty?
|
135
|
+
self.send(:unfiltered_each, &block)
|
136
|
+
else
|
137
|
+
ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
|
138
|
+
new = self.dup
|
139
|
+
new.data = {}
|
140
|
+
|
141
|
+
ids.zip(self.send(:values_at, *ids)).each do |id, values|
|
142
|
+
new[id] = values
|
143
|
+
end
|
144
|
+
|
145
|
+
new.send :each, &block
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
Misc.redefine_method base, :collect, :unfiltered_collect do |&block|
|
150
|
+
if filters.empty?
|
151
|
+
self.send(:unfiltered_collect, &block)
|
152
|
+
else
|
153
|
+
ids = filters.inject(nil){|list,filter| list = (list.nil? ? filter.ids : Misc.intersect_sorted_arrays(list, filter.ids))}
|
154
|
+
new = self.dup
|
155
|
+
new.data = {}
|
156
|
+
ids.zip(self.send(:values_at, *ids)).each do |id, values|
|
157
|
+
new[id] = values
|
158
|
+
end
|
159
|
+
new.send :collect, &block
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def filter_name(match, value)
|
165
|
+
@filename + "&F[#{match}=#{value}]"
|
166
|
+
end
|
167
|
+
|
168
|
+
def add_filter(match, value, persistence = nil)
|
169
|
+
if persistence.nil? and filter_dir
|
170
|
+
persistence = File.join(filter_dir, match.to_s)
|
171
|
+
end
|
172
|
+
|
173
|
+
@filename = filter_name(match, value) if @filename
|
174
|
+
|
175
|
+
filters.push Filter.new self, match, value, persistence
|
176
|
+
end
|
177
|
+
|
178
|
+
def pop_filter
|
179
|
+
@filename = @filename.sub(/&F\[[^\]]*\]$/, '') if @filename
|
180
|
+
filters.pop
|
181
|
+
end
|
182
|
+
|
183
|
+
end
|
184
|
+
|
185
|
+
class TSV
|
186
|
+
def filter(filter_dir = nil)
|
187
|
+
self.extend Filtered
|
188
|
+
self.filter_dir = filter_dir
|
189
|
+
self.filters = []
|
190
|
+
self
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
data/lib/rbbt/util/tsv/index.rb
CHANGED
@@ -13,7 +13,7 @@ class TSV
|
|
13
13
|
"Index[:key]"
|
14
14
|
end
|
15
15
|
|
16
|
-
|
16
|
+
Persistence.persist(self, prefix, :tsv, options) do |tsv, options, filename|
|
17
17
|
order, target, fields, case_insensitive = Misc.process_options options, :order, :target, :fields, :case_insensitive
|
18
18
|
|
19
19
|
new = {}
|
@@ -34,7 +34,7 @@ class TSV
|
|
34
34
|
list = [list] unless Array === list
|
35
35
|
i += 1 if fields.nil?
|
36
36
|
list.each do |elem|
|
37
|
-
next if elem.empty?
|
37
|
+
next if elem.nil? or elem.empty?
|
38
38
|
elem.downcase if case_insensitive
|
39
39
|
new[elem] ||= []
|
40
40
|
new[elem][i] ||= []
|
@@ -56,10 +56,9 @@ class TSV
|
|
56
56
|
# flatten
|
57
57
|
|
58
58
|
new.each do |key, values|
|
59
|
-
values.flatten
|
60
|
-
values.compact!
|
59
|
+
new[key] = values.flatten.compact
|
61
60
|
end
|
62
|
-
|
61
|
+
|
63
62
|
## Not ordered
|
64
63
|
else
|
65
64
|
double_keys = true unless type != :double or identify_field(target) == :key
|
@@ -74,7 +73,7 @@ class TSV
|
|
74
73
|
end
|
75
74
|
list.collect!{|e| e.downcase} if case_insensitive
|
76
75
|
list.each do |elem|
|
77
|
-
next if elem.empty?
|
76
|
+
next if elem.nil? or elem.empty?
|
78
77
|
new[elem] ||= []
|
79
78
|
if double_keys
|
80
79
|
new[elem].concat key
|
@@ -103,7 +102,9 @@ class TSV
|
|
103
102
|
else
|
104
103
|
[new_key_field]
|
105
104
|
end
|
105
|
+
|
106
106
|
new = TSV.new([new, {:namespace => namespace, :key_field => key_field, :fields => fields, :type => :flat, :filename => (filename.nil? ? nil : "Index:" + filename), :case_insensitive => case_insensitive}])
|
107
|
+
|
107
108
|
new
|
108
109
|
end
|
109
110
|
end
|
@@ -293,7 +294,7 @@ class TSV
|
|
293
294
|
|
294
295
|
range = ! pos_end.nil?
|
295
296
|
|
296
|
-
index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end: pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
|
297
|
+
index = Persistence.persist(filename, "SortedIndex[#{range ? pos_start + ":" + pos_end : pos_start}]", :fwt, :start => pos_start, :end => pos_end, :range => range) do |filename, options|
|
297
298
|
pos_start, pos_end, range = Misc.process_options options, :start, :end, :range
|
298
299
|
data = case
|
299
300
|
when (type == :double and range)
|
@@ -319,6 +320,77 @@ class TSV
|
|
319
320
|
index
|
320
321
|
end
|
321
322
|
|
323
|
+
def pos_index(pos_field, file = nil, update = false)
|
324
|
+
value_size = 0
|
325
|
+
index_data = []
|
322
326
|
|
323
|
-
|
327
|
+
file ||= filename + "-PosIndex[#{ pos_field }]" if filename
|
328
|
+
|
329
|
+
through :key, pos_field do |key, values|
|
330
|
+
value_size = key.length if key.length > value_size
|
331
|
+
|
332
|
+
pos = values.first
|
333
|
+
if Array === pos
|
334
|
+
pos.each do |p|
|
335
|
+
index_data << [key, p.to_i]
|
336
|
+
end
|
337
|
+
else
|
338
|
+
index_data << [key, pos.to_i]
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
pos_index = case
|
343
|
+
when file == :memory
|
344
|
+
index = FixWidthTable.new(file, value_size, false)
|
345
|
+
index.add_point index_data
|
346
|
+
index
|
347
|
+
when (update or not File.exists? file)
|
348
|
+
index = FixWidthTable.new(file, value_size, false, true)
|
349
|
+
index.add_point index_data
|
350
|
+
index
|
351
|
+
else
|
352
|
+
FixWidthTable.new(file, value_size, false)
|
353
|
+
end
|
354
|
+
|
355
|
+
pos_index
|
356
|
+
end
|
324
357
|
|
358
|
+
def range_index(start_field, end_field, file = nil, update = false)
|
359
|
+
value_size = 0
|
360
|
+
index_data = []
|
361
|
+
|
362
|
+
file ||= filename + "-PosIndex[#{ start_field }-#{end_field}]" if filename
|
363
|
+
|
364
|
+
through :key, [start_field, end_field] do |key, values|
|
365
|
+
value_size = key.length if key.length > value_size
|
366
|
+
|
367
|
+
start_pos, end_pos = values
|
368
|
+
|
369
|
+
if Array === start_pos
|
370
|
+
start_pos.zip(end_pos).each do |s,e|
|
371
|
+
index_data << [key, [s.to_i, e.to_i]]
|
372
|
+
end
|
373
|
+
else
|
374
|
+
index_data << [key, [start_pos.to_i, end_pos.to_i]]
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
pos_index = case
|
379
|
+
when file == :memory
|
380
|
+
index = FixWidthTable.get(file, value_size, true)
|
381
|
+
index.add_range index_data
|
382
|
+
index.read
|
383
|
+
index
|
384
|
+
when (update or not File.exists?(file))
|
385
|
+
index = FixWidthTable.get(file, value_size, true, true)
|
386
|
+
index.add_range index_data
|
387
|
+
index.read
|
388
|
+
index
|
389
|
+
else
|
390
|
+
FixWidthTable.get(file, value_size, true)
|
391
|
+
end
|
392
|
+
|
393
|
+
pos_index
|
394
|
+
end
|
395
|
+
|
396
|
+
end
|