rbbt-util 4.0.2 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +7 -0
- data/lib/rbbt/annotations.rb +147 -10
- data/lib/rbbt/persist.rb +5 -1
- data/lib/rbbt/persist/tsv.rb +4 -3
- data/lib/rbbt/resource/path.rb +8 -1
- data/lib/rbbt/tsv.rb +3 -2
- data/lib/rbbt/tsv/accessor.rb +140 -51
- data/lib/rbbt/tsv/attach/util.rb +124 -106
- data/lib/rbbt/tsv/filter.rb +4 -2
- data/lib/rbbt/tsv/manipulate.rb +68 -13
- data/lib/rbbt/tsv/parser.rb +110 -20
- data/lib/rbbt/tsv/serializers.rb +6 -0
- data/lib/rbbt/tsv/util.rb +35 -1
- data/lib/rbbt/util/chain_methods.rb +25 -10
- data/lib/rbbt/util/misc.rb +109 -27
- data/lib/rbbt/util/open.rb +15 -4
- data/lib/rbbt/workflow.rb +18 -3
- data/lib/rbbt/workflow/annotate.rb +6 -1
- data/lib/rbbt/workflow/soap.rb +1 -1
- data/lib/rbbt/workflow/step.rb +13 -3
- data/lib/rbbt/workflow/task.rb +2 -2
- data/share/install/software/lib/install_helpers +6 -0
- data/share/lib/R/util.R +6 -1
- data/test/rbbt/test_annotations.rb +7 -0
- data/test/rbbt/test_persist.rb +32 -0
- data/test/rbbt/test_tsv.rb +101 -2
- data/test/rbbt/test_workflow.rb +11 -0
- data/test/rbbt/tsv/test_accessor.rb +15 -0
- data/test/rbbt/tsv/test_attach.rb +1 -1
- data/test/rbbt/tsv/test_manipulate.rb +37 -3
- data/test/rbbt/tsv/test_util.rb +25 -0
- data/test/rbbt/util/test_misc.rb +8 -0
- metadata +7 -4
- data/lib/rbbt/util/persistence.rb +0 -406
data/lib/rbbt/tsv/attach/util.rb
CHANGED
@@ -3,17 +3,22 @@ module TSV
|
|
3
3
|
def attach_same_key(other, fields = nil)
|
4
4
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
6
|
+
field_positions = fields.collect{|field| other.identify_field field}
|
7
|
+
other.with_unnamed do
|
8
|
+
with_unnamed do
|
9
|
+
through do |key, values|
|
10
|
+
if other.include? key
|
11
|
+
new_values = other[key].values_at *field_positions
|
12
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
13
|
+
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
14
|
+
self[key] = self[key].concat new_values
|
15
|
+
else
|
16
|
+
if type == :double
|
17
|
+
self[key] = self[key].concat [[]] * fields.length
|
18
|
+
else
|
19
|
+
self[key] = self[key].concat [""] * fields.length
|
20
|
+
end
|
21
|
+
end
|
17
22
|
end
|
18
23
|
end
|
19
24
|
end
|
@@ -33,59 +38,61 @@ module TSV
|
|
33
38
|
|
34
39
|
source_pos = identify_field source
|
35
40
|
|
36
|
-
with_unnamed do
|
37
|
-
|
38
|
-
|
41
|
+
other.with_unnamed do
|
42
|
+
with_unnamed do
|
43
|
+
through do |key, values|
|
44
|
+
source_keys = values[source_pos]
|
45
|
+
|
46
|
+
case
|
47
|
+
when (source_keys.nil? or (Array === source_keys and source_keys.empty?))
|
48
|
+
if type == :double
|
49
|
+
self[key] = values.concat field_positions.collect{|v| []}
|
50
|
+
else
|
51
|
+
self[key] = values.concat [nil] * field_positions
|
52
|
+
end
|
53
|
+
when Array === source_keys
|
54
|
+
all_new_values = source_keys.collect do |source_key|
|
55
|
+
positions = field_positions.collect do |pos|
|
56
|
+
if pos == :key
|
57
|
+
[source_key]
|
58
|
+
else
|
59
|
+
if other.include? source_key
|
60
|
+
v = other[source_key][pos]
|
61
|
+
Array === v ? v : [v]
|
62
|
+
else
|
63
|
+
[nil]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
positions.collect!{|v| v[0..0]} if one2one
|
69
|
+
positions
|
70
|
+
end
|
71
|
+
|
72
|
+
new = Misc.zip_fields(all_new_values).each{|field_entry|
|
73
|
+
field_entry.flatten!
|
74
|
+
}
|
39
75
|
|
40
|
-
|
41
|
-
when (source_keys.nil? or (Array === source_keys and source_keys.empty?))
|
42
|
-
if type == :double
|
43
|
-
self[key] = values.concat field_positions.collect{|v| []}
|
76
|
+
self[key] = values.concat new
|
44
77
|
else
|
45
|
-
|
46
|
-
|
47
|
-
when Array === source_keys
|
48
|
-
all_new_values = source_keys.collect do |source_key|
|
49
|
-
positions = field_positions.collect do |pos|
|
78
|
+
source_key = source_keys
|
79
|
+
all_new_values = field_positions.collect do |pos|
|
50
80
|
if pos == :key
|
51
|
-
|
81
|
+
source_key
|
52
82
|
else
|
53
83
|
if other.include? source_key
|
54
84
|
v = other[source_key][pos]
|
55
|
-
Array === v ? v :
|
85
|
+
Array === v ? v.first : v
|
56
86
|
else
|
57
|
-
|
87
|
+
nil
|
58
88
|
end
|
59
89
|
end
|
60
90
|
end
|
61
91
|
|
62
|
-
|
63
|
-
positions
|
64
|
-
end
|
65
|
-
|
66
|
-
new = Misc.zip_fields(all_new_values).each{|field_entry|
|
67
|
-
field_entry.flatten!
|
68
|
-
}
|
69
|
-
|
70
|
-
self[key] = values.concat new
|
71
|
-
else
|
72
|
-
source_key = source_keys
|
73
|
-
all_new_values = field_positions.collect do |pos|
|
74
|
-
if pos == :key
|
75
|
-
source_key
|
76
|
-
else
|
77
|
-
if other.include? source_key
|
78
|
-
v = other[source_key][pos]
|
79
|
-
Array === v ? v.first : v
|
80
|
-
else
|
81
|
-
nil
|
82
|
-
end
|
83
|
-
end
|
92
|
+
self[key] = values.concat all_new_values
|
84
93
|
end
|
85
94
|
|
86
|
-
self[key] = values.concat all_new_values
|
87
95
|
end
|
88
|
-
|
89
96
|
end
|
90
97
|
end
|
91
98
|
|
@@ -102,55 +109,61 @@ module TSV
|
|
102
109
|
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
103
110
|
|
104
111
|
length = self.fields.length
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
112
|
+
other.with_unnamed do
|
113
|
+
index.with_unnamed do
|
114
|
+
with_unnamed do
|
115
|
+
through do |key, values|
|
116
|
+
source_keys = index[key]
|
117
|
+
if source_keys.nil? or source_keys.empty?
|
118
|
+
all_new_values = []
|
119
|
+
else
|
120
|
+
all_new_values = []
|
121
|
+
source_keys.each do |source_key|
|
122
|
+
next unless other.include? source_key
|
123
|
+
new_values = field_positions.collect do |pos|
|
124
|
+
if pos == :key
|
125
|
+
if other.type == :double
|
126
|
+
[source_key]
|
127
|
+
else
|
128
|
+
source_key
|
129
|
+
end
|
130
|
+
else
|
131
|
+
other[source_key][pos]
|
132
|
+
end
|
133
|
+
end
|
134
|
+
new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
|
135
|
+
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
136
|
+
all_new_values << new_values
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
if all_new_values.empty?
|
141
|
+
if type == :double
|
142
|
+
all_new_values = [[[]] * field_positions.length]
|
117
143
|
else
|
118
|
-
|
144
|
+
all_new_values = [[""] * field_positions.length]
|
119
145
|
end
|
120
|
-
else
|
121
|
-
other[source_key][pos]
|
122
146
|
end
|
123
|
-
end
|
124
|
-
new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
|
125
|
-
new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
|
126
|
-
all_new_values << new_values
|
127
|
-
end
|
128
|
-
end
|
129
147
|
|
130
|
-
|
131
|
-
if type == :double
|
132
|
-
all_new_values = [[[]] * field_positions.length]
|
133
|
-
else
|
134
|
-
all_new_values = [[""] * field_positions.length]
|
135
|
-
end
|
136
|
-
end
|
148
|
+
current = self[key] || [[]] * fields.length
|
137
149
|
|
138
|
-
|
150
|
+
if current.length > length
|
151
|
+
all_new_values << current.slice!(length..current.length - 1)
|
152
|
+
end
|
139
153
|
|
140
|
-
|
141
|
-
|
142
|
-
|
154
|
+
if type == :double
|
155
|
+
all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
156
|
+
else
|
157
|
+
all_new_values = all_new_values.first
|
158
|
+
end
|
143
159
|
|
144
|
-
|
145
|
-
all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
146
|
-
else
|
147
|
-
all_new_values = all_new_values.first
|
148
|
-
end
|
149
|
-
|
150
|
-
current += all_new_values
|
160
|
+
current += all_new_values
|
151
161
|
|
152
|
-
|
162
|
+
self[key] = current
|
153
163
|
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
154
167
|
end
|
155
168
|
|
156
169
|
self.fields = self.fields.concat field_names
|
@@ -184,7 +197,7 @@ module TSV
|
|
184
197
|
return nil if match.empty?
|
185
198
|
id_list << match.first
|
186
199
|
end
|
187
|
-
|
200
|
+
|
188
201
|
if id_list.last != files.last.all_fields.first
|
189
202
|
id_list << files.last.all_fields.first
|
190
203
|
id_list.zip(files)
|
@@ -201,18 +214,18 @@ module TSV
|
|
201
214
|
path = find_path(files, options)
|
202
215
|
|
203
216
|
return nil if path.nil?
|
204
|
-
|
217
|
+
|
205
218
|
traversal_ids = path.collect{|p| p.first}
|
206
219
|
|
207
220
|
Log.low "Found Traversal: #{traversal_ids * " => "}"
|
208
|
-
|
221
|
+
|
209
222
|
data_key, data_file = path.shift
|
210
223
|
data_index = if data_key == data_file.key_field
|
211
224
|
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
212
225
|
nil
|
213
226
|
else
|
214
227
|
Log.debug "Data index required"
|
215
|
-
data_file.index :target => data_key, :fields => data_file.key_field, :persist => false
|
228
|
+
data_file.index :target => data_key, :fields => [data_file.key_field], :persist => false
|
216
229
|
end
|
217
230
|
|
218
231
|
current_index = data_index
|
@@ -221,27 +234,32 @@ module TSV
|
|
221
234
|
next_key, next_file = path.shift
|
222
235
|
|
223
236
|
if current_index.nil?
|
224
|
-
current_index = next_file.index(:target => next_key, :fields => current_key, :persist => persist_input)
|
237
|
+
current_index = next_file.index(:target => next_key, :fields => [current_key], :persist => persist_input)
|
225
238
|
else
|
226
|
-
next_index = next_file.index :target => next_key, :fields => current_key, :persist => persist_input
|
239
|
+
next_index = next_file.index :target => next_key, :fields => [current_key], :persist => persist_input
|
227
240
|
|
228
241
|
if TokyoCabinet::HDB === current_index
|
229
|
-
tmp = TSV.setup({}, :key_field => current_index.key_field, :fields => current_index.fields, :serializer => current_index.serializer, :type => current_index.type, :filename => current_index.filename)
|
230
|
-
current_index.
|
231
|
-
|
232
|
-
|
242
|
+
tmp = TSV.setup({}, :key_field => current_index.key_field, :fields => [current_index.fields], :serializer => current_index.serializer, :type => current_index.type, :filename => current_index.filename)
|
243
|
+
current_index.with_unnamed do
|
244
|
+
current_index.each do |key,value|
|
245
|
+
tmp.tsv_clean_set_brackets(key, current_index.tsv_clean_get_brackets(key))
|
246
|
+
end
|
233
247
|
end
|
234
248
|
current_index = tmp
|
235
249
|
end
|
236
250
|
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
251
|
+
next_index.with_unnamed do
|
252
|
+
current_index.with_unnamed do
|
253
|
+
current_index.process current_index.fields.first do |values|
|
254
|
+
if values.nil?
|
255
|
+
nil
|
256
|
+
else
|
257
|
+
next_index.values_at(*values).flatten.collect
|
258
|
+
end
|
259
|
+
end
|
260
|
+
current_index.fields = [next_key]
|
242
261
|
end
|
243
262
|
end
|
244
|
-
current_index.fields = [next_key]
|
245
263
|
end
|
246
264
|
current_key = next_key
|
247
265
|
end
|
data/lib/rbbt/tsv/filter.rb
CHANGED
@@ -76,8 +76,10 @@ module Filtered
|
|
76
76
|
def update
|
77
77
|
ids = []
|
78
78
|
|
79
|
-
data.
|
80
|
-
|
79
|
+
data.with_unnamed do
|
80
|
+
data.unfiltered_each do |key, entry|
|
81
|
+
ids << key if match_entry(entry)
|
82
|
+
end
|
81
83
|
end
|
82
84
|
|
83
85
|
save(ids.sort)
|
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -53,6 +53,12 @@ module TSV
|
|
53
53
|
]
|
54
54
|
end
|
55
55
|
|
56
|
+
def process_reorder_flat(key, values)
|
57
|
+
[ values,
|
58
|
+
@new_fields.collect{|field| field == :key ?
|
59
|
+
[key] : values[field] }.flatten
|
60
|
+
]
|
61
|
+
end
|
56
62
|
def initialize(key_field, fields, new_key_field, new_fields, type, uniq)
|
57
63
|
@new_key_field = TSV.identify_field(key_field, fields, new_key_field)
|
58
64
|
|
@@ -128,12 +134,15 @@ module TSV
|
|
128
134
|
end
|
129
135
|
end
|
130
136
|
else
|
131
|
-
|
137
|
+
case type
|
138
|
+
when :double
|
132
139
|
if uniq
|
133
140
|
self.instance_eval do alias process process_reorder_double_uniq end
|
134
141
|
else
|
135
142
|
self.instance_eval do alias process process_reorder_double end
|
136
143
|
end
|
144
|
+
when :flat
|
145
|
+
self.instance_eval do alias process process_reorder_flat end
|
137
146
|
else
|
138
147
|
self.instance_eval do alias process process_reorder_list end
|
139
148
|
end
|
@@ -144,9 +153,9 @@ module TSV
|
|
144
153
|
|
145
154
|
#{{{ Methods
|
146
155
|
|
147
|
-
def through(new_key_field = nil, new_fields = nil, uniq = false)
|
156
|
+
def through(new_key_field = nil, new_fields = nil, uniq = false, zipped = false)
|
148
157
|
|
149
|
-
traverser = Traverser.new
|
158
|
+
traverser = Traverser.new key_field, fields, new_key_field, new_fields, type, uniq
|
150
159
|
|
151
160
|
if @monitor
|
152
161
|
desc = "Iterating TSV"
|
@@ -164,25 +173,71 @@ module TSV
|
|
164
173
|
progress_monitor.tick if progress_monitor
|
165
174
|
|
166
175
|
keys, value = traverser.process(key, value)
|
176
|
+
|
177
|
+
# Annotated with Entity and NamedArray
|
178
|
+
if not @unnamed
|
179
|
+
if not traverser.new_field_names.nil?
|
180
|
+
case type
|
181
|
+
when :double, :list
|
182
|
+
NamedArray.setup value, traverser.new_field_names
|
183
|
+
when :flat, :single
|
184
|
+
Entity.formats[traverser.new_field_names.first].setup(value, :format => traverser.new_field_names.first) if defined?(Entity) and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_field_names
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
167
189
|
next if keys.nil?
|
168
|
-
|
169
|
-
|
170
|
-
|
190
|
+
|
191
|
+
if zipped
|
192
|
+
|
193
|
+
keys.each_with_index do |k,i|
|
194
|
+
v = value.collect{|v|
|
195
|
+
r = v[i]
|
196
|
+
r = v[0] if r.nil?
|
197
|
+
r
|
198
|
+
}
|
199
|
+
|
200
|
+
if not @unnamed and defined?(Entity) and not traverser.new_key_field_name.nil? and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_key_field_name
|
201
|
+
k = Entity.formats[traverser.new_key_field_name].setup(k.dup, :format => traverser.new_key_field_name)
|
202
|
+
end
|
203
|
+
v.key = k if NamedArray === v
|
204
|
+
yield k, v
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
else
|
209
|
+
keys.each do |key|
|
210
|
+
if not @unnamed and defined?(Entity) and not traverser.new_key_field_name.nil? and Entity.respond_to?(:formats) and Entity.formats.include? traverser.new_key_field_name
|
211
|
+
key = Entity.formats[traverser.new_key_field_name].setup(key.dup, :format => traverser.new_key_field_name)
|
212
|
+
end
|
213
|
+
value.key = key if NamedArray === value
|
214
|
+
yield key, value
|
215
|
+
end
|
171
216
|
end
|
172
217
|
end
|
173
218
|
|
174
219
|
[traverser.new_key_field_name, traverser.new_field_names]
|
175
220
|
end
|
176
221
|
|
177
|
-
def reorder(new_key_field = nil, new_fields = nil,
|
178
|
-
|
222
|
+
def reorder(new_key_field = nil, new_fields = nil, options = {})
|
223
|
+
zipped, uniq = Misc.process_options options, :zipped, :uniq
|
224
|
+
|
225
|
+
persist_options = Misc.pull_keys options, :persist
|
226
|
+
persist_options[:prefix] = "Reorder"
|
227
|
+
|
228
|
+
Persist.persist_tsv self, self.filename, {:key_field => new_key_field, :fields => new_fields}, persist_options do |data|
|
179
229
|
|
180
230
|
with_unnamed do
|
181
|
-
new_key_field_name, new_field_names = through new_key_field, new_fields do |key, value|
|
182
|
-
if data.include?(key) and
|
183
|
-
|
231
|
+
new_key_field_name, new_field_names = through new_key_field, new_fields, uniq, zipped do |key, value|
|
232
|
+
if data.include?(key) and not zipped
|
233
|
+
case type
|
234
|
+
when :double
|
235
|
+
data[key] = data[key].zip(value).collect do |old_list, new_list| old_list + new_list end
|
236
|
+
when :flat
|
237
|
+
data[key].concat value
|
238
|
+
end
|
184
239
|
else
|
185
|
-
data[key] = value
|
240
|
+
data[key] = value.dup
|
186
241
|
end
|
187
242
|
end
|
188
243
|
|
@@ -367,7 +422,7 @@ module TSV
|
|
367
422
|
when type == :flat
|
368
423
|
self[key] = new_values
|
369
424
|
else
|
370
|
-
values[
|
425
|
+
values[field_pos].replace new_values
|
371
426
|
self[key] = values
|
372
427
|
end
|
373
428
|
end
|