rbbt-util 5.0.1 → 5.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +17 -7
- data/lib/rbbt/annotations.rb +186 -484
- data/lib/rbbt/annotations/annotated_array.rb +174 -0
- data/lib/rbbt/annotations/util.rb +206 -0
- data/lib/rbbt/tsv/accessor.rb +53 -15
- data/lib/rbbt/tsv/manipulate.rb +2 -2
- data/lib/rbbt/tsv/parser.rb +1 -1
- data/lib/rbbt/util/misc.rb +4 -3
- data/lib/rbbt/util/named_array.rb +28 -2
- data/lib/rbbt/workflow/accessor.rb +1 -1
- data/lib/rbbt/workflow/step.rb +14 -3
- data/test/rbbt/test_annotations.rb +7 -3
- data/test/rbbt/tsv/test_index.rb +0 -1
- metadata +4 -2
@@ -0,0 +1,174 @@
|
|
1
|
+
module AnnotatedArray
|
2
|
+
def double_array
|
3
|
+
AnnotatedArray === self.send(:[], 0, true)
|
4
|
+
end
|
5
|
+
|
6
|
+
def first
|
7
|
+
self[0]
|
8
|
+
end
|
9
|
+
|
10
|
+
def last
|
11
|
+
self[-1]
|
12
|
+
end
|
13
|
+
|
14
|
+
def [](pos, clean = false)
|
15
|
+
|
16
|
+
value = super(pos)
|
17
|
+
return value if value.nil? or clean
|
18
|
+
|
19
|
+
value = value.dup if value.frozen?
|
20
|
+
|
21
|
+
value = annotate(value)
|
22
|
+
|
23
|
+
value.extend AnnotatedArray if Array === value
|
24
|
+
|
25
|
+
value.container = self
|
26
|
+
value.container_index = pos
|
27
|
+
|
28
|
+
value
|
29
|
+
end
|
30
|
+
|
31
|
+
def each(&block)
|
32
|
+
|
33
|
+
pos = 0
|
34
|
+
super do |value|
|
35
|
+
|
36
|
+
if value.nil?
|
37
|
+
|
38
|
+
block.call value
|
39
|
+
else
|
40
|
+
|
41
|
+
value = value.dup if value.frozen?
|
42
|
+
|
43
|
+
value = annotate(value)
|
44
|
+
|
45
|
+
value.extend AnnotatedArray if Array === value
|
46
|
+
|
47
|
+
value.container = self
|
48
|
+
value.container_index = pos
|
49
|
+
|
50
|
+
pos += 1
|
51
|
+
|
52
|
+
block.call value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def collect(&block)
|
58
|
+
|
59
|
+
if block_given?
|
60
|
+
|
61
|
+
res = []
|
62
|
+
each do |value|
|
63
|
+
res << yield(value)
|
64
|
+
end
|
65
|
+
|
66
|
+
res
|
67
|
+
else
|
68
|
+
|
69
|
+
res = []
|
70
|
+
each do |value|
|
71
|
+
res << value
|
72
|
+
end
|
73
|
+
|
74
|
+
res
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def select(method = nil, *args)
|
79
|
+
|
80
|
+
if method
|
81
|
+
|
82
|
+
res = self.zip( self.send(method, *args) ).
|
83
|
+
select{|e,result| result }.
|
84
|
+
collect{|element,r| element }
|
85
|
+
else
|
86
|
+
|
87
|
+
return self unless block_given?
|
88
|
+
|
89
|
+
res = []
|
90
|
+
each do |value|
|
91
|
+
res << value if yield(value)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
annotate(res)
|
96
|
+
res.extend AnnotatedArray
|
97
|
+
|
98
|
+
res
|
99
|
+
end
|
100
|
+
|
101
|
+
def reject
|
102
|
+
res = []
|
103
|
+
|
104
|
+
each do |value|
|
105
|
+
res << value unless yield(value)
|
106
|
+
end
|
107
|
+
|
108
|
+
annotate(res)
|
109
|
+
res.extend AnnotatedArray
|
110
|
+
|
111
|
+
res
|
112
|
+
end
|
113
|
+
|
114
|
+
def select
|
115
|
+
res = []
|
116
|
+
|
117
|
+
each do |value|
|
118
|
+
res << value if yield(value)
|
119
|
+
end
|
120
|
+
|
121
|
+
annotate(res)
|
122
|
+
res.extend AnnotatedArray
|
123
|
+
|
124
|
+
res
|
125
|
+
end
|
126
|
+
|
127
|
+
def subset(list)
|
128
|
+
|
129
|
+
res = (self & list)
|
130
|
+
|
131
|
+
annotate(res)
|
132
|
+
res.extend AnnotatedArray
|
133
|
+
|
134
|
+
res
|
135
|
+
end
|
136
|
+
|
137
|
+
def remove(list)
|
138
|
+
|
139
|
+
res = (self - list)
|
140
|
+
|
141
|
+
annotate(res)
|
142
|
+
res.extend AnnotatedArray
|
143
|
+
|
144
|
+
res
|
145
|
+
end
|
146
|
+
|
147
|
+
def sort(&block)
|
148
|
+
res = self.collect.sort(&block).collect{|value| value.respond_to?(:clean_annotations) ? value.clean_annotations.dup : value.dup }
|
149
|
+
|
150
|
+
annotate(res)
|
151
|
+
res.extend AnnotatedArray
|
152
|
+
|
153
|
+
res
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
%w(compact uniq flatten reverse sort_by).each do |method|
|
158
|
+
|
159
|
+
self.module_eval <<-EOC
|
160
|
+
|
161
|
+
def #{method}
|
162
|
+
res = super
|
163
|
+
|
164
|
+
annotate(res)
|
165
|
+
res.extend AnnotatedArray
|
166
|
+
|
167
|
+
res
|
168
|
+
end
|
169
|
+
|
170
|
+
EOC
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
|
@@ -0,0 +1,206 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Annotated
|
4
|
+
|
5
|
+
def self.load(object, info)
|
6
|
+
annotation_types = info.delete(:annotation_types) || info.delete("annotation_types") || []
|
7
|
+
annotation_types = annotation_types.split("|") if String === annotation_types
|
8
|
+
|
9
|
+
return object if annotation_types.nil? or annotation_types.empty?
|
10
|
+
|
11
|
+
annotated_array = false
|
12
|
+
annotated_array = true if (info.delete(:annotated_array) || info.delete("annotated_array")).to_s == "true"
|
13
|
+
entity_id = info.delete(:entity_id) || info.delete("entity_id")
|
14
|
+
|
15
|
+
annotation_types.each do |mod|
|
16
|
+
mod = Misc.string2const(mod) if String === mod
|
17
|
+
object.extend mod
|
18
|
+
end
|
19
|
+
|
20
|
+
object.instance_variable_set(:@annotation_values, info)
|
21
|
+
|
22
|
+
object.instance_variable_set(:@id, entity_id) if entity_id
|
23
|
+
|
24
|
+
object.extend AnnotatedArray if annotated_array
|
25
|
+
|
26
|
+
object
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.resolve_array(entry)
|
30
|
+
if entry =~ /^Array:/
|
31
|
+
entry["Array:".length..-1].split("|")
|
32
|
+
else
|
33
|
+
entry
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.load_tsv_values(id, values, *fields)
|
38
|
+
fields = fields.flatten
|
39
|
+
info = {}
|
40
|
+
literal_pos = fields.index "literal"
|
41
|
+
|
42
|
+
object = case
|
43
|
+
when literal_pos
|
44
|
+
values[literal_pos]
|
45
|
+
else
|
46
|
+
id.dup
|
47
|
+
end
|
48
|
+
|
49
|
+
object = resolve_array(object)
|
50
|
+
|
51
|
+
if Array === values.first
|
52
|
+
Misc.zip_fields(values).collect do |list|
|
53
|
+
fields.each_with_index do |field,i|
|
54
|
+
next if field == "literal"
|
55
|
+
if field == "JSON"
|
56
|
+
JSON.parse(list[i]).each do |key, value|
|
57
|
+
info[key.to_sym] = value
|
58
|
+
end
|
59
|
+
else
|
60
|
+
info[field.to_sym] = resolve_array(list[i])
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
else
|
65
|
+
fields.each_with_index do |field,i|
|
66
|
+
next if field == "literal"
|
67
|
+
if field == "JSON"
|
68
|
+
JSON.parse(values[i]).each do |key, value|
|
69
|
+
info[key.to_sym] = value
|
70
|
+
end
|
71
|
+
else
|
72
|
+
info[field.to_sym] = resolve_array(values[i])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
self.load(object, info)
|
78
|
+
|
79
|
+
object
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.load_tsv(tsv)
|
83
|
+
tsv.with_unnamed do
|
84
|
+
annotated_entities = tsv.collect do |id, values|
|
85
|
+
Annotated.load_tsv_values(id, values, tsv.fields)
|
86
|
+
end
|
87
|
+
|
88
|
+
case tsv.key_field
|
89
|
+
when "List"
|
90
|
+
annotated_entities.first
|
91
|
+
else
|
92
|
+
annotated_entities
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
|
98
|
+
def self.tsv(annotations, *fields)
|
99
|
+
return nil if annotations.nil?
|
100
|
+
|
101
|
+
fields = case
|
102
|
+
|
103
|
+
when ((fields.compact.empty?) and not annotations.empty?)
|
104
|
+
fields = AnnotatedArray === annotations ? annotations.annotations : annotations.compact.first.annotations
|
105
|
+
fields << :annotation_types
|
106
|
+
|
107
|
+
when (fields == [:literal] and not annotations.empty?)
|
108
|
+
fields << :literal
|
109
|
+
|
110
|
+
when (fields == [:all] and Annotated === annotations)
|
111
|
+
fields = [:annotation_types] + annotations.annotations
|
112
|
+
fields << :annotated_array if AnnotatedArray === annotations
|
113
|
+
fields << :literal
|
114
|
+
|
115
|
+
when (fields == [:all] and not annotations.empty?)
|
116
|
+
raise "Input array must be annotated or its elements must be" if not Annotated === annotations.compact.first and not Array === annotations.compact.first
|
117
|
+
raise "Input array must be annotated or its elements must be. No duble arrays of singly annotated entities." if not Annotated === annotations.compact.first and Array === annotations.compact.first
|
118
|
+
fields = [:annotation_types] + (Annotated === annotations ?
|
119
|
+
annotations.annotations:
|
120
|
+
annotations.compact.first.annotations)
|
121
|
+
fields << :literal
|
122
|
+
|
123
|
+
when annotations.empty?
|
124
|
+
[:annotation_types, :literal]
|
125
|
+
|
126
|
+
else
|
127
|
+
fields.flatten
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
fields = fields.collect{|f| f.to_s}
|
132
|
+
|
133
|
+
case
|
134
|
+
|
135
|
+
when (Annotated === annotations and not (AnnotatedArray === annotations and annotations.double_array))
|
136
|
+
tsv = TSV.setup({}, :key_field => "List", :fields => fields, :type => :list, :unnamed => true)
|
137
|
+
|
138
|
+
annot_id = annotations.id
|
139
|
+
tsv[annot_id] = annotations.tsv_values(*fields).dup
|
140
|
+
|
141
|
+
when Array === annotations
|
142
|
+
tsv = TSV.setup({}, :key_field => "ID", :fields => fields, :type => :list, :unnamed => true)
|
143
|
+
|
144
|
+
annotations.compact.each_with_index do |annotation,i|
|
145
|
+
tsv[annotation.id + ":" << i.to_s] = annotation.tsv_values(*fields).dup
|
146
|
+
end
|
147
|
+
|
148
|
+
else
|
149
|
+
raise "Annotations need to be an Array to create TSV"
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
tsv
|
154
|
+
end
|
155
|
+
|
156
|
+
def tsv_values(*fields)
|
157
|
+
if Array === self and (not AnnotatedArray === self or self.double_array)
|
158
|
+
Misc.zip_fields(self.compact.collect{|e| e.tsv_values(fields)})
|
159
|
+
else
|
160
|
+
fields = fields.flatten
|
161
|
+
|
162
|
+
info = self.info
|
163
|
+
|
164
|
+
values = []
|
165
|
+
|
166
|
+
fields.each do |field|
|
167
|
+
values << case
|
168
|
+
|
169
|
+
when Proc === field
|
170
|
+
field.call(self)
|
171
|
+
|
172
|
+
when field == "JSON"
|
173
|
+
if AnnotatedArray === self
|
174
|
+
info.merge(:annotated_array => true).to_json
|
175
|
+
else
|
176
|
+
info.to_json
|
177
|
+
end
|
178
|
+
|
179
|
+
when field == "annotation_types"
|
180
|
+
annotation_types.collect{|t| t.to_s} * "|"
|
181
|
+
|
182
|
+
when field == "annotated_array"
|
183
|
+
AnnotatedArray === self
|
184
|
+
|
185
|
+
when field == "literal"
|
186
|
+
(Array === self ? "Array:" << self * "|" : self).gsub(/\n|\t/, ' ')
|
187
|
+
|
188
|
+
when info.include?(field.to_sym)
|
189
|
+
res = info[field.to_sym]
|
190
|
+
Array === res ? "Array:" << res * "|" : res
|
191
|
+
|
192
|
+
when self.respond_to?(field)
|
193
|
+
res = self.send(field)
|
194
|
+
Array === res ? "Array:"<< res * "|" : res
|
195
|
+
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
values
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
end
|
206
|
+
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -9,11 +9,37 @@ module TSV
|
|
9
9
|
attr_accessor :unnamed, :serializer_module, :entity_options
|
10
10
|
|
11
11
|
def entity_options
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
if @entity_options.nil?
|
13
|
+
@entity_options = namespace ? {:namespace => namespace, :organism => namespace} : {}
|
14
|
+
@entity_templates = nil
|
15
|
+
end
|
16
|
+
@entity_options
|
17
|
+
end
|
18
|
+
|
19
|
+
def prepare_entity(entity, field, options = {})
|
20
|
+
return entity if entity.nil?
|
21
|
+
return entity unless defined? Entity
|
22
|
+
entity = entity if options.delete :dup_array
|
23
|
+
@entity_templates ||= {}
|
24
|
+
if (template = @entity_templates[field])
|
25
|
+
entity = template.annotate(entity.frozen? ? entity.dup : entity)
|
26
|
+
entity.extend AnnotatedArray if Array === entity
|
27
|
+
entity
|
15
28
|
else
|
16
|
-
|
29
|
+
if @entity_templates.include? field
|
30
|
+
entity
|
31
|
+
else
|
32
|
+
template = Misc.prepare_entity("TEMPLATE", field, options)
|
33
|
+
if Annotated === template
|
34
|
+
@entity_templates[field] = template
|
35
|
+
entity = template.annotate(entity.frozen? ? entity.dup : entity)
|
36
|
+
entity.extend AnnotatedArray if Array === entity
|
37
|
+
entity
|
38
|
+
else
|
39
|
+
@entity_templates[field] = nil
|
40
|
+
entity
|
41
|
+
end
|
42
|
+
end
|
17
43
|
end
|
18
44
|
end
|
19
45
|
|
@@ -87,7 +113,7 @@ module TSV
|
|
87
113
|
when :flat, :single
|
88
114
|
value = value.dup if value.frozen?
|
89
115
|
|
90
|
-
value =
|
116
|
+
value = prepare_entity(value, fields.first, entity_options)
|
91
117
|
end
|
92
118
|
value
|
93
119
|
end
|
@@ -100,7 +126,7 @@ module TSV
|
|
100
126
|
keys = tsv_clean_keys - ENTRY_KEYS
|
101
127
|
return keys if @unnamed or key_field.nil?
|
102
128
|
|
103
|
-
|
129
|
+
prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
104
130
|
end
|
105
131
|
|
106
132
|
def tsv_values
|
@@ -111,7 +137,7 @@ module TSV
|
|
111
137
|
when :double, :list
|
112
138
|
values.each{|value| NamedArray.setup value, fields, nil, entity_options}
|
113
139
|
when :flat, :single
|
114
|
-
values = values.collect{|v|
|
140
|
+
values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
|
115
141
|
end
|
116
142
|
|
117
143
|
values
|
@@ -135,10 +161,10 @@ module TSV
|
|
135
161
|
when :double, :list
|
136
162
|
NamedArray.setup value, fields, key, entity_options if Array === value
|
137
163
|
when :flat, :single
|
138
|
-
|
164
|
+
prepare_entity(value, fields.first, entity_options)
|
139
165
|
end
|
140
166
|
end
|
141
|
-
key =
|
167
|
+
key = prepare_entity(key, key_field, entity_options)
|
142
168
|
end
|
143
169
|
|
144
170
|
yield key, value if block_given?
|
@@ -162,10 +188,10 @@ module TSV
|
|
162
188
|
when :double, :list
|
163
189
|
NamedArray.setup value, fields, key, entity_options if Array === value
|
164
190
|
when :flat, :single
|
165
|
-
value =
|
191
|
+
value = prepare_entity(value, fields.first, entity_options)
|
166
192
|
end
|
167
193
|
end
|
168
|
-
key =
|
194
|
+
key = prepare_entity(key, key_field, entity_options)
|
169
195
|
end
|
170
196
|
|
171
197
|
|
@@ -229,14 +255,14 @@ module TSV
|
|
229
255
|
if fields == :all
|
230
256
|
if just_keys
|
231
257
|
keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
|
232
|
-
keys =
|
258
|
+
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
233
259
|
else
|
234
260
|
elems.sort_by{|key, value| key }
|
235
261
|
end
|
236
262
|
else
|
237
263
|
if just_keys
|
238
264
|
keys = elems.sort_by{|key, value| value }.collect{|key, value| key}
|
239
|
-
keys =
|
265
|
+
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
240
266
|
keys
|
241
267
|
else
|
242
268
|
elems.sort_by{|key, value| value }.collect{|key, value| [key, self[key]]}
|
@@ -345,8 +371,8 @@ end
|
|
345
371
|
end
|
346
372
|
|
347
373
|
entry :key_field,
|
348
|
-
:fields,
|
349
374
|
:type,
|
375
|
+
:fields,
|
350
376
|
:cast,
|
351
377
|
:identifiers,
|
352
378
|
:namespace,
|
@@ -358,10 +384,22 @@ end
|
|
358
384
|
if @fields.nil? or @unnamed
|
359
385
|
@fields
|
360
386
|
else
|
361
|
-
NamedArray.setup @fields, @fields, nil, entity_options
|
387
|
+
@named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options
|
362
388
|
end
|
363
389
|
end
|
364
390
|
|
391
|
+
def namespace=(value)
|
392
|
+
self.tsv_clean_set_brackets "__tsv_hash_namespace", value.nil? ? NIL_YAML : value.to_yaml
|
393
|
+
@namespace = value
|
394
|
+
@entity_options = nil
|
395
|
+
end
|
396
|
+
|
397
|
+
def fields=(value)
|
398
|
+
self.tsv_clean_set_brackets "__tsv_hash_fields", value.nil? ? NIL_YAML : value.to_yaml
|
399
|
+
@fields = value
|
400
|
+
@named_fields = nil
|
401
|
+
end
|
402
|
+
|
365
403
|
def self.zip_fields(list, fields = nil)
|
366
404
|
return [] if list.nil? || list.empty?
|
367
405
|
fields ||= list.fields if list.respond_to? :fields
|