rbbt-util 5.3.4 → 5.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/rbbt/annotations/annotated_array.rb +4 -2
- data/lib/rbbt/association.rb +256 -0
- data/lib/rbbt/resource/path.rb +7 -2
- data/lib/rbbt/tsv/accessor.rb +11 -0
- data/lib/rbbt/tsv/attach/util.rb +18 -13
- data/lib/rbbt/tsv/filter.rb +3 -3
- data/lib/rbbt/tsv/manipulate.rb +1 -1
- data/lib/rbbt/tsv/parser.rb +15 -1
- data/lib/rbbt/tsv/util.rb +56 -3
- data/lib/rbbt/tsv.rb +1 -1
- data/lib/rbbt/util/misc.rb +3 -3
- data/lib/rbbt/util/named_array.rb +2 -1
- data/lib/rbbt/workflow/step.rb +5 -0
- data/share/rbbt_commands/app/start +1 -1
- data/test/rbbt/test_association.rb +76 -0
- data/test/rbbt/tsv/test_attach.rb +28 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzQxM2Y1NGU5NjBmNjc1M2IzZWExM2ZiNzU0OWIxNzRiZjYyMjdkNg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MjUwNjI3OTFhODA5YTQ0ZThkYzM5OWNiZjFjZGRmMDU1MWY3ZjI0NQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Y2Y5ZjBmNWVkOTkwZDNiNmI5NTkyMzJkZjZkN2M1OTY1ZmQzZWQyMWY2Zjkz
|
10
|
+
YTM3MTIyODJkMmRhZjQwNzI3NTIxZjNiNWQzY2Q0ODlmZDg2ODU1OWUxYTkx
|
11
|
+
YTIyOTRjNzA2MGNhZjNmOGVhYmEwMjJjMzZkYjM0ZDNhM2QwZjc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDI1MmRlNzhmZGI5ZDJmZGJiNzMyMTkzNjY1ODUyYjQ2ZTM1ZTZlZTBkOTZk
|
14
|
+
N2ZlMDAzOGRmOWY3ZmUyZDExZGVlNDVhOTIzMDQzMjhiNTc3MjgzMjA1Y2Rk
|
15
|
+
YjUxN2MxMjM3NDEyNTk4MGZjYTI2YjlhODZkOTY0YTg0NjFlOTU=
|
@@ -24,8 +24,10 @@ module AnnotatedArray
|
|
24
24
|
|
25
25
|
value.extend AnnotatedArray if Array === value
|
26
26
|
|
27
|
-
value.container
|
28
|
-
|
27
|
+
if value.respond_to? :container
|
28
|
+
value.container = self
|
29
|
+
value.container_index = pos
|
30
|
+
end
|
29
31
|
|
30
32
|
value
|
31
33
|
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
|
3
|
+
module Association
|
4
|
+
class << self
|
5
|
+
attr_accessor :databases
|
6
|
+
def databases
|
7
|
+
@databases ||= {}
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.register(database, file, options = {})
|
12
|
+
self.databases[database.to_s] = [file, options]
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.get_database(database)
|
16
|
+
self.databases[database.to_s]
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.open_database(database, options = {}, persist_options = {})
|
20
|
+
file, database_options = get_database database
|
21
|
+
open(file, database_options.merge(options), persist_options)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.index_database(database, options = {}, persist_options = {})
|
25
|
+
file, database_options = databases[database.to_s]
|
26
|
+
index(file, database_options.merge(options), persist_options)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.parse_field_specification(spec, fields)
|
30
|
+
spec = spec.split "=>" unless Array === spec
|
31
|
+
field_part, final_type = spec
|
32
|
+
|
33
|
+
field, type = field_part.split "=~"
|
34
|
+
|
35
|
+
[field, type, final_type]
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.resolve_field(name, fields)
|
39
|
+
type = Entity.formats[name]
|
40
|
+
return "Field #{ name } could not be resolved: #{fields}" if type.nil?
|
41
|
+
field = fields.select{|f| Entity.formats[f] == type}.first
|
42
|
+
[field, nil, name]
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.add_reciprocal(tsv)
|
46
|
+
new_tsv = {}
|
47
|
+
tsv.with_unnamed do
|
48
|
+
|
49
|
+
tsv.through do |target,v|
|
50
|
+
source_values = tsv.type == :double ? Misc.zip_fields(v) : [v]
|
51
|
+
|
52
|
+
source_values.each do |values|
|
53
|
+
source = values.shift
|
54
|
+
values.unshift target
|
55
|
+
current = new_tsv[source] || tsv[source]
|
56
|
+
|
57
|
+
case tsv.type
|
58
|
+
when :double
|
59
|
+
new = current ? current.zip(values).collect{|p| p.flatten} : values.collect{|p| [p]}
|
60
|
+
when :flat
|
61
|
+
new = current ? (current + values).compact.uniq : values
|
62
|
+
end
|
63
|
+
|
64
|
+
new_tsv[source] = new
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
tsv.merge! new_tsv
|
69
|
+
end
|
70
|
+
|
71
|
+
tsv
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.load_tsv(file, options)
|
75
|
+
key_field = TSV.parse_header(file, options).key_field
|
76
|
+
fields = TSV.parse_header(file, options).fields
|
77
|
+
all_fields = TSV.parse_header(file, options).all_fields
|
78
|
+
|
79
|
+
source = options[:source] || options[:source_type]
|
80
|
+
source = TSV.identify_field key_field, fields, options[:key_field] if source.nil? and options[:key_field]
|
81
|
+
source = all_fields[source] if Fixnum === source
|
82
|
+
source = key_field if source == :key or source.nil?
|
83
|
+
|
84
|
+
target = options[:target]
|
85
|
+
target = TSV.identify_field key_field, fields, options[:fields].first if target.nil? and options[:fields]
|
86
|
+
target = all_fields[target] if Fixnum === target
|
87
|
+
target = key_field if target == :key
|
88
|
+
|
89
|
+
zipped = options[:zipped]
|
90
|
+
undirected = options[:undirected]
|
91
|
+
|
92
|
+
source, source_header, source_final_type = parse_field_specification source, all_fields
|
93
|
+
target, target_header, target_final_type = parse_field_specification target, all_fields if target
|
94
|
+
|
95
|
+
if source and not all_fields.include? source
|
96
|
+
Log.debug("Resolving source: #{ source }")
|
97
|
+
source, source_header, source_final_type = resolve_field source, all_fields
|
98
|
+
Log.debug([source, source_header, source_final_type] * ", ")
|
99
|
+
end
|
100
|
+
|
101
|
+
if target and not all_fields.include? target
|
102
|
+
Log.debug("Resolving target: #{ target }")
|
103
|
+
target, target_header, target_final_type = resolve_field target, all_fields
|
104
|
+
Log.debug([target, target_header, target_final_type] * ", ")
|
105
|
+
end
|
106
|
+
|
107
|
+
source_final_type ||= options[:source_type] if options[:source_type]
|
108
|
+
target_final_type ||= options[:target_type] if options[:target_type]
|
109
|
+
|
110
|
+
Log.debug("Loading associations from: #{ file }")
|
111
|
+
Log.debug("sources: #{[source, source_header, source_final_type] * ", "}")
|
112
|
+
Log.debug("targets: #{[target, target_header, target_final_type] * ", "}")
|
113
|
+
if source != all_fields.first or (target and target != all_fields[1])
|
114
|
+
fields = ([target] + (all_fields - [source, target])).compact
|
115
|
+
open_options = options.merge({:key_field => source, :fields => fields})
|
116
|
+
tsv = TSV.open(file, open_options)
|
117
|
+
else
|
118
|
+
tsv = TSV.open(file, options)
|
119
|
+
end
|
120
|
+
|
121
|
+
if source_header and tsv.key_field != source_header
|
122
|
+
tsv.key_field = source_header
|
123
|
+
end
|
124
|
+
|
125
|
+
if source_final_type and tsv.key_field != source_final_type
|
126
|
+
Log.debug("Changing source type from #{tsv.key_field} to #{source_final_type}")
|
127
|
+
tsv.with_unnamed do
|
128
|
+
tsv = TSVWorkflow.job(:change_id, tsv.filename, :tsv => tsv, :format => source_final_type, :organism => tsv.namespace).exec
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
if target_header and tsv.fields.first != target_header
|
133
|
+
tsv.fields = tsv.fields.collect{|f| f == target ? target_header : f }
|
134
|
+
end
|
135
|
+
|
136
|
+
if target_final_type and tsv.fields.first != target_final_type and
|
137
|
+
Entity.formats[tsv.fields.first] and
|
138
|
+
Entity.formats[tsv.fields.first] == Entity.formats[target_final_type]
|
139
|
+
|
140
|
+
Log.debug("Changing target type from #{tsv.fields.first} to #{source_final_type}")
|
141
|
+
save_key_field = tsv.key_field
|
142
|
+
tsv.key_field = "MASKED"
|
143
|
+
tsv.with_unnamed do
|
144
|
+
tsv = TSVWorkflow.job(:swap_id, tsv.filename, :tsv => tsv, :field => tsv.fields.first, :format => target_final_type, :organism => tsv.namespace).exec
|
145
|
+
end
|
146
|
+
tsv.key_field = save_key_field
|
147
|
+
end
|
148
|
+
|
149
|
+
if undirected
|
150
|
+
tsv = add_reciprocal tsv
|
151
|
+
end
|
152
|
+
|
153
|
+
tsv
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.open(file, options = {}, persist_options = {})
|
157
|
+
options = {} if options.nil?
|
158
|
+
persist_options = {} if persist_options.nil?
|
159
|
+
|
160
|
+
namespace = options[:namespace]
|
161
|
+
old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
|
162
|
+
old_file.annotate file if Path === old_file
|
163
|
+
|
164
|
+
Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association"}.merge(persist_options)) do |data|
|
165
|
+
options = options.clone
|
166
|
+
|
167
|
+
tsv = load_tsv(file, options)
|
168
|
+
|
169
|
+
tsv.annotate(data)
|
170
|
+
data.serializer = tsv.type if TokyoCabinet::HDB === data
|
171
|
+
data.merge! tsv
|
172
|
+
tsv.annotate data
|
173
|
+
|
174
|
+
data
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def self.index(file, options = {}, persist_options = {})
|
179
|
+
options = {} if options.nil?
|
180
|
+
persist_options = {} if persist_options.nil?
|
181
|
+
|
182
|
+
Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association Index"}.merge(persist_options).merge(:engine => TokyoCabinet::BDB, :serializer => :clean)) do |assocs|
|
183
|
+
undirected = options[:undirected]
|
184
|
+
tsv = TSV === file ? file : Association.open(file, options, persist_options.merge(:persist => false))
|
185
|
+
|
186
|
+
key_field = [tsv.key_field, tsv.fields.first.split(":").last, undirected ? "undirected" : nil].compact * "~"
|
187
|
+
|
188
|
+
TSV.setup(assocs, :key_field => key_field, :fields => tsv.fields[1..-1], :type => :list, :serializer => :list)
|
189
|
+
|
190
|
+
tsv.with_unnamed do
|
191
|
+
tsv.with_monitor :desc => "Extracting annotations" do
|
192
|
+
case tsv.type
|
193
|
+
when :flat
|
194
|
+
tsv.through do |source, targets|
|
195
|
+
next if source.nil? or source.empty? or targets.nil? or targets.empty?
|
196
|
+
|
197
|
+
targets.each do |target|
|
198
|
+
next if target.nil? or target.empty?
|
199
|
+
key = [source, target] * "~"
|
200
|
+
assocs[key] = nil
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
when :double
|
205
|
+
tsv.through do |source, values|
|
206
|
+
next if values.empty?
|
207
|
+
next if source.nil?
|
208
|
+
next if values.empty?
|
209
|
+
targets = values.first
|
210
|
+
rest = Misc.zip_fields values[1..-1]
|
211
|
+
|
212
|
+
annotations = rest.length > 1 ?
|
213
|
+
targets.zip(rest) :
|
214
|
+
targets.zip(rest * targets.length)
|
215
|
+
|
216
|
+
annotations.each do |target, info|
|
217
|
+
next if target.nil?
|
218
|
+
key = [source, target] * "~"
|
219
|
+
assocs[key] = info
|
220
|
+
end
|
221
|
+
end
|
222
|
+
else
|
223
|
+
raise "Type not supported: #{tsv.type}"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
assocs.close
|
228
|
+
|
229
|
+
assocs
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def self.connections(repo, entities)
|
234
|
+
source_field, target_field, undirected = repo.key_field.split("~")
|
235
|
+
|
236
|
+
source_type = Entity.formats[source_field].to_s
|
237
|
+
target_type = Entity.formats[target_field].to_s
|
238
|
+
|
239
|
+
source_entities = entities[source_type] || entities[source_field]
|
240
|
+
target_entities = entities[target_type] || entities[target_field]
|
241
|
+
|
242
|
+
return [] if source_entities.nil? or target_entities.nil?
|
243
|
+
|
244
|
+
source_entities.collect do |entity|
|
245
|
+
keys = repo.prefix(entity + "~")
|
246
|
+
keys.collect do |key|
|
247
|
+
source, target = key.split("~")
|
248
|
+
next unless target_entities.include? target
|
249
|
+
next if undirected and target > source
|
250
|
+
info = Hash[*repo.fields.zip(repo[key]).flatten]
|
251
|
+
|
252
|
+
{:source => source, :target => target, :info => info}
|
253
|
+
end.compact
|
254
|
+
end.flatten
|
255
|
+
end
|
256
|
+
end
|
data/lib/rbbt/resource/path.rb
CHANGED
@@ -10,11 +10,16 @@ module Path
|
|
10
10
|
string.resource = resource
|
11
11
|
string
|
12
12
|
end
|
13
|
+
|
14
|
+
def annotate(name)
|
15
|
+
Path.setup name.to_s, @pkgdir, @resource
|
16
|
+
end
|
17
|
+
|
13
18
|
def join(name)
|
14
19
|
if self.empty?
|
15
|
-
|
20
|
+
self.annotate name.to_s
|
16
21
|
else
|
17
|
-
|
22
|
+
self.annotate File.join(self, name.to_s)
|
18
23
|
end
|
19
24
|
end
|
20
25
|
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -540,6 +540,17 @@ end
|
|
540
540
|
peek
|
541
541
|
end
|
542
542
|
|
543
|
+
def summary
|
544
|
+
<<-EOF
|
545
|
+
Key field = #{key_field}
|
546
|
+
Fields = #{fields * ", "}
|
547
|
+
Type = #{type}
|
548
|
+
Example:
|
549
|
+
- #{key = keys.first}: #{self[key].inspect}
|
550
|
+
|
551
|
+
EOF
|
552
|
+
end
|
553
|
+
|
543
554
|
def to_hash
|
544
555
|
new = self.dup
|
545
556
|
ENTRY_KEYS.each{|entry| new.delete entry}
|
data/lib/rbbt/tsv/attach/util.rb
CHANGED
@@ -237,19 +237,7 @@ module TSV
|
|
237
237
|
end
|
238
238
|
end
|
239
239
|
|
240
|
-
def self.
|
241
|
-
options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
|
242
|
-
in_namespace = options[:in_namespace]
|
243
|
-
persist_input = options[:persist_input]
|
244
|
-
|
245
|
-
path = find_path(files, options)
|
246
|
-
|
247
|
-
return nil if path.nil?
|
248
|
-
|
249
|
-
traversal_ids = path.collect{|p| p.first}
|
250
|
-
|
251
|
-
Log.debug "Found Traversal: #{traversal_ids * " => "}"
|
252
|
-
|
240
|
+
def self.index_for_traversal(path, persist_input = false)
|
253
241
|
data_key, data_file = path.shift
|
254
242
|
data_index = if data_key == data_file.key_field
|
255
243
|
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
@@ -292,6 +280,23 @@ module TSV
|
|
292
280
|
end
|
293
281
|
|
294
282
|
current_index
|
283
|
+
|
284
|
+
end
|
285
|
+
|
286
|
+
def self.build_traverse_index(files, options = {})
|
287
|
+
options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
|
288
|
+
in_namespace = options[:in_namespace]
|
289
|
+
persist_input = options[:persist_input]
|
290
|
+
|
291
|
+
path = find_path(files, options)
|
292
|
+
|
293
|
+
return nil if path.nil?
|
294
|
+
|
295
|
+
traversal_ids = path.collect{|p| p.first}
|
296
|
+
|
297
|
+
Log.debug "Found Traversal: #{traversal_ids * " => "}"
|
298
|
+
|
299
|
+
index_for_traversal path, persist_input
|
295
300
|
end
|
296
301
|
|
297
302
|
|
data/lib/rbbt/tsv/filter.rb
CHANGED
@@ -187,14 +187,14 @@ module Filtered
|
|
187
187
|
end
|
188
188
|
end
|
189
189
|
|
190
|
-
def filtered_set(key, value)
|
190
|
+
def filtered_set(key, value, clean = false)
|
191
191
|
if filters.empty?
|
192
|
-
self.send(:unfiltered_set, key, value)
|
192
|
+
self.send(:unfiltered_set, key, value, clean)
|
193
193
|
else
|
194
194
|
filters.each do |filter|
|
195
195
|
filter.add key if filter.match_entry key, value
|
196
196
|
end
|
197
|
-
self.send(:unfiltered_set, key, value)
|
197
|
+
self.send(:unfiltered_set, key, value, clean)
|
198
198
|
end
|
199
199
|
end
|
200
200
|
|
data/lib/rbbt/tsv/manipulate.rb
CHANGED
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -187,6 +187,12 @@ module TSV
|
|
187
187
|
|
188
188
|
def add_to_data_merge_zipped(data, keys, values)
|
189
189
|
num = keys.length
|
190
|
+
|
191
|
+
if values.first.length > 1 and num == 1
|
192
|
+
keys = keys * values.first.length
|
193
|
+
num = keys.length
|
194
|
+
end
|
195
|
+
|
190
196
|
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
191
197
|
all = values.unshift keys
|
192
198
|
Misc.zip_fields(all).each do |values|
|
@@ -204,6 +210,12 @@ module TSV
|
|
204
210
|
|
205
211
|
def add_to_data_zipped(data, keys, values)
|
206
212
|
num = keys.length
|
213
|
+
|
214
|
+
if values.first.length > 1 and num == 1
|
215
|
+
keys = keys * values.first.length
|
216
|
+
num = keys.length
|
217
|
+
end
|
218
|
+
|
207
219
|
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
208
220
|
all = values.unshift keys
|
209
221
|
Misc.zip_fields(all).each do |values|
|
@@ -309,7 +321,6 @@ module TSV
|
|
309
321
|
options = header_options.merge options
|
310
322
|
|
311
323
|
@type = Misc.process_options(options, :type) || :double
|
312
|
-
merge = Misc.process_options(options, :merge) || false
|
313
324
|
|
314
325
|
@sep2 = Misc.process_options(options, :sep2) || "|"
|
315
326
|
@cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
|
@@ -318,6 +329,9 @@ module TSV
|
|
318
329
|
@select= Misc.process_options options, :select
|
319
330
|
@zipped = Misc.process_options options, :zipped
|
320
331
|
@namespace = Misc.process_options options, :namespace
|
332
|
+
merge = Misc.process_options(options, :merge)
|
333
|
+
merge = @zipped if merge.nil?
|
334
|
+
merge = false if merge.nil?
|
321
335
|
|
322
336
|
case @type
|
323
337
|
when :double
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -62,13 +62,16 @@ module TSV
|
|
62
62
|
case
|
63
63
|
when Integer === field
|
64
64
|
field
|
65
|
-
when (field.nil? or field == :key
|
65
|
+
when (field.nil? or field == :key)
|
66
66
|
:key
|
67
|
-
when String === field
|
68
|
-
raise "No fields specified in TSV.identify_field" if fields.nil?
|
67
|
+
when (String === field and not fields.nil?)
|
69
68
|
pos = fields.index field
|
69
|
+
pos ||= :key if key_field == field
|
70
70
|
Log.medium "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
|
71
71
|
pos
|
72
|
+
else
|
73
|
+
raise "No fields specified in TSV.identify_field" if fields.nil?
|
74
|
+
Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
|
72
75
|
end
|
73
76
|
end
|
74
77
|
|
@@ -76,5 +79,55 @@ module TSV
|
|
76
79
|
TSV.identify_field(key_field, fields, field)
|
77
80
|
end
|
78
81
|
|
82
|
+
def to_double
|
83
|
+
new = {}
|
84
|
+
case type
|
85
|
+
when :double
|
86
|
+
self
|
87
|
+
when :flat
|
88
|
+
through do |k,v|
|
89
|
+
new[k] = [v]
|
90
|
+
end
|
91
|
+
when :single
|
92
|
+
through do |k,v|
|
93
|
+
new[k] = [[v]]
|
94
|
+
end
|
95
|
+
when :list
|
96
|
+
through do |k,v|
|
97
|
+
new[k] = v.collect{|e| [e]}
|
98
|
+
end
|
99
|
+
end
|
100
|
+
self.annotate(new)
|
101
|
+
new.type = :double
|
102
|
+
new
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_flat(field = nil)
|
106
|
+
new = {}
|
107
|
+
case type
|
108
|
+
when :double
|
109
|
+
if field.nil?
|
110
|
+
through do |k,v| new[k] = v.first end
|
111
|
+
else
|
112
|
+
pos = identify_field field
|
113
|
+
through do |k,v| new[k] = v[pos] end
|
114
|
+
end
|
115
|
+
when :flat
|
116
|
+
self
|
117
|
+
when :single
|
118
|
+
through do |k,v|
|
119
|
+
new[k] = [v]
|
120
|
+
end
|
121
|
+
when :list
|
122
|
+
through do |k,v|
|
123
|
+
new[k] = [v.first]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
self.annotate(new)
|
127
|
+
new.type = :flat
|
128
|
+
new
|
129
|
+
end
|
130
|
+
|
131
|
+
|
79
132
|
|
80
133
|
end
|
data/lib/rbbt/tsv.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -184,15 +184,15 @@ module Misc
|
|
184
184
|
ARRAY_MAX_LENGTH = 1000
|
185
185
|
STRING_MAX_LENGTH = ARRAY_MAX_LENGTH * 10
|
186
186
|
|
187
|
-
def self.sanitize_filename(filename, length =
|
187
|
+
def self.sanitize_filename(filename, length = 254)
|
188
188
|
if filename.length > length
|
189
|
-
if filename =~ /(\..{2,
|
189
|
+
if filename =~ /(\..{2,9})$/
|
190
190
|
extension = $1
|
191
191
|
else
|
192
192
|
extension = ''
|
193
193
|
end
|
194
194
|
|
195
|
-
post_fix = "
|
195
|
+
post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
|
196
196
|
|
197
197
|
filename = filename[0..(length - post_fix.length - 1)] << post_fix
|
198
198
|
else
|
@@ -87,10 +87,11 @@ module NamedArray
|
|
87
87
|
def [](key, clean = false)
|
88
88
|
pos = Misc.field_position(fields, key)
|
89
89
|
elem = super(pos)
|
90
|
+
return elem if clean
|
90
91
|
|
91
92
|
return elem if @fields.nil? or @fields.empty?
|
92
93
|
|
93
|
-
field = NamedArray === @fields ? @fields
|
94
|
+
field = NamedArray === @fields ? @fields[pos, true] : @fields[pos]
|
94
95
|
elem = prepare_entity(elem, field, entity_options)
|
95
96
|
elem
|
96
97
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -12,6 +12,7 @@ class Step
|
|
12
12
|
class Aborted < Exception; end
|
13
13
|
|
14
14
|
def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil)
|
15
|
+
path = Misc.sanitize_filename path
|
15
16
|
@path = Path.setup(path)
|
16
17
|
@task = task
|
17
18
|
@bindings = bindings
|
@@ -172,6 +173,10 @@ class Step
|
|
172
173
|
FileUtils.mkdir_p File.dirname(path) unless Open.exists? File.dirname(path)
|
173
174
|
begin
|
174
175
|
run(true)
|
176
|
+
rescue Step::Aborted
|
177
|
+
Log.debug("Forked process aborted: #{@path}")
|
178
|
+
log :aborted, "Aborted"
|
179
|
+
exit -1
|
175
180
|
rescue Exception
|
176
181
|
Log.debug("Exception caught on forked process: #{$!.message}")
|
177
182
|
exit -1
|
@@ -15,7 +15,7 @@ app_dir = app_dir[app]
|
|
15
15
|
server = options[:server] || 'thin'
|
16
16
|
Misc.in_dir(app_dir) do
|
17
17
|
require 'rack'
|
18
|
-
ENV["RBBT_FINDER"] = true if options.include?(:finder)
|
18
|
+
ENV["RBBT_FINDER"] = "true" if options.include?(:finder)
|
19
19
|
ENV["RACK_ENV"] = options[:environment] if options.include?(:environment)
|
20
20
|
Rack::Server.start(options.merge(:config => 'config.ru'))
|
21
21
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/association'
|
4
|
+
require 'rbbt/entity'
|
5
|
+
require 'rbbt/util/tmpfile'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
Workflow.require_workflow "Genomics"
|
9
|
+
require 'rbbt/entity/gene'
|
10
|
+
|
11
|
+
Workflow.require_workflow "TSVWorkflow"
|
12
|
+
|
13
|
+
TEST_ASSOCIATIONS =<<-EOF
|
14
|
+
#: :sep=" "#:namespace=Hsa/jan2013
|
15
|
+
#Entity1 Entity2 Score Entity3 Gene
|
16
|
+
a A 1 aa TP53
|
17
|
+
b B 2 bb KRAS
|
18
|
+
c C|K 3|4 cc|kk PTEN|PTEN
|
19
|
+
EOF
|
20
|
+
|
21
|
+
class TestAssociations < Test::Unit::TestCase
|
22
|
+
|
23
|
+
FAssocs = ""
|
24
|
+
DAssocs = ""
|
25
|
+
def setup
|
26
|
+
FAssocs.replace TmpFile.tmp_file
|
27
|
+
DAssocs.replace TmpFile.tmp_file
|
28
|
+
Open.write(FAssocs, TEST_ASSOCIATIONS)
|
29
|
+
end
|
30
|
+
def teardown
|
31
|
+
FileUtils.rm FAssocs
|
32
|
+
FileUtils.rm_rf DAssocs
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_simple_open
|
36
|
+
database = Association.open(FAssocs, {}, :dir => DAssocs)
|
37
|
+
assert_equal ["C", "K"], database["c"]["Entity2"]
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_source_open
|
41
|
+
database = Association.open(FAssocs, {:source => "Entity2", :zipped => true}, :dir => DAssocs)
|
42
|
+
assert_equal ["c", "3", 'cc', "PTEN"], database["C"].flatten
|
43
|
+
assert_equal ["c", "4", 'kk', "PTEN"], database["K"].flatten
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_target_open
|
47
|
+
database = Association.open(FAssocs, {:source => "Entity2", :target => "Entity3", :zipped => true}, :dir => DAssocs)
|
48
|
+
assert_equal ["cc", "c", "3", "PTEN"], database["C"].flatten
|
49
|
+
assert_equal ["kk", "c", "4", "PTEN"], database["K"].flatten
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_gene_open
|
53
|
+
database = Association.open(FAssocs, {:source => "Gene=~Associated Gene Name", :target => "Entity3", :zipped => true}, :dir => DAssocs)
|
54
|
+
assert_equal ["aa"], database["TP53"].first
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_gene_open_translate
|
58
|
+
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013")
|
59
|
+
database = Association.open(FAssocs, {:source => "Gene=~Associated Gene Name", :source_type => "Ensembl Gene ID", :target => "Entity3", :zipped => true}, :dir => DAssocs)
|
60
|
+
assert_equal ["aa"], database[tp53.ensembl].first
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_gene_target_open_translate
|
64
|
+
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013")
|
65
|
+
database = Association.open(FAssocs, {:target => "Gene=~Associated Gene Name=>Ensembl Gene ID", :source => "Entity3", :zipped => true}, :dir => DAssocs)
|
66
|
+
assert_equal [tp53.ensembl], database["aa"].first
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_undirected
|
70
|
+
require 'rbbt/sources/pina'
|
71
|
+
require 'rbbt/gene_associations'
|
72
|
+
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013")
|
73
|
+
index = Association.index_database('pina', {:source_type => "Ensembl Gene ID", :target_type => "Ensembl Gene ID", :undirected => true}, {:dir => DAssocs})
|
74
|
+
assert Association.connections(index, "Gene" => tp53.pina_interactors.ensembl.compact).any?
|
75
|
+
end
|
76
|
+
end
|
@@ -446,5 +446,33 @@ bb Id4
|
|
446
446
|
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
447
447
|
assert_equal %w(Id1 Id4), tsv1["row3"]["OtherID"]
|
448
448
|
end
|
449
|
+
|
450
|
+
def test_attach_flat
|
451
|
+
content1 =<<-EOF
|
452
|
+
#Id ValueA ValueB
|
453
|
+
row1 a|aa|aaa b
|
454
|
+
row2 A B
|
455
|
+
EOF
|
456
|
+
|
457
|
+
content2 =<<-EOF
|
458
|
+
#ValueA OtherID
|
459
|
+
a Id1|Id2
|
460
|
+
A Id3
|
461
|
+
EOF
|
462
|
+
|
463
|
+
tsv1 = tsv2 = index = nil
|
464
|
+
TmpFile.with_file(content1) do |filename|
|
465
|
+
tsv1 = TSV.open(File.open(filename), :flat, :fields => ["ValueA"], :sep => /\s+/)
|
466
|
+
puts tsv1
|
467
|
+
end
|
468
|
+
|
469
|
+
TmpFile.with_file(content2) do |filename|
|
470
|
+
tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
|
471
|
+
end
|
472
|
+
|
473
|
+
res = tsv1.attach tsv2, :fields => ["OtherID"]
|
474
|
+
puts res
|
475
|
+
|
476
|
+
end
|
449
477
|
end
|
450
478
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -173,6 +173,7 @@ files:
|
|
173
173
|
- lib/rbbt/annotations.rb
|
174
174
|
- lib/rbbt/annotations/annotated_array.rb
|
175
175
|
- lib/rbbt/annotations/util.rb
|
176
|
+
- lib/rbbt/association.rb
|
176
177
|
- lib/rbbt/entity.rb
|
177
178
|
- lib/rbbt/fix_width_table.rb
|
178
179
|
- lib/rbbt/persist.rb
|
@@ -240,6 +241,7 @@ files:
|
|
240
241
|
- test/rbbt/tsv/test_attach.rb
|
241
242
|
- test/rbbt/tsv/test_manipulate.rb
|
242
243
|
- test/rbbt/test_fix_width_table.rb
|
244
|
+
- test/rbbt/test_association.rb
|
243
245
|
- test/rbbt/test_workflow.rb
|
244
246
|
- test/rbbt/workflow/test_step.rb
|
245
247
|
- test/rbbt/workflow/test_task.rb
|
@@ -303,6 +305,7 @@ test_files:
|
|
303
305
|
- test/rbbt/tsv/test_attach.rb
|
304
306
|
- test/rbbt/tsv/test_manipulate.rb
|
305
307
|
- test/rbbt/test_fix_width_table.rb
|
308
|
+
- test/rbbt/test_association.rb
|
306
309
|
- test/rbbt/test_workflow.rb
|
307
310
|
- test/rbbt/workflow/test_step.rb
|
308
311
|
- test/rbbt/workflow/test_task.rb
|