rbbt-util 5.3.4 → 5.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/rbbt/annotations/annotated_array.rb +4 -2
- data/lib/rbbt/association.rb +256 -0
- data/lib/rbbt/resource/path.rb +7 -2
- data/lib/rbbt/tsv/accessor.rb +11 -0
- data/lib/rbbt/tsv/attach/util.rb +18 -13
- data/lib/rbbt/tsv/filter.rb +3 -3
- data/lib/rbbt/tsv/manipulate.rb +1 -1
- data/lib/rbbt/tsv/parser.rb +15 -1
- data/lib/rbbt/tsv/util.rb +56 -3
- data/lib/rbbt/tsv.rb +1 -1
- data/lib/rbbt/util/misc.rb +3 -3
- data/lib/rbbt/util/named_array.rb +2 -1
- data/lib/rbbt/workflow/step.rb +5 -0
- data/share/rbbt_commands/app/start +1 -1
- data/test/rbbt/test_association.rb +76 -0
- data/test/rbbt/tsv/test_attach.rb +28 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzQxM2Y1NGU5NjBmNjc1M2IzZWExM2ZiNzU0OWIxNzRiZjYyMjdkNg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MjUwNjI3OTFhODA5YTQ0ZThkYzM5OWNiZjFjZGRmMDU1MWY3ZjI0NQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Y2Y5ZjBmNWVkOTkwZDNiNmI5NTkyMzJkZjZkN2M1OTY1ZmQzZWQyMWY2Zjkz
|
10
|
+
YTM3MTIyODJkMmRhZjQwNzI3NTIxZjNiNWQzY2Q0ODlmZDg2ODU1OWUxYTkx
|
11
|
+
YTIyOTRjNzA2MGNhZjNmOGVhYmEwMjJjMzZkYjM0ZDNhM2QwZjc=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDI1MmRlNzhmZGI5ZDJmZGJiNzMyMTkzNjY1ODUyYjQ2ZTM1ZTZlZTBkOTZk
|
14
|
+
N2ZlMDAzOGRmOWY3ZmUyZDExZGVlNDVhOTIzMDQzMjhiNTc3MjgzMjA1Y2Rk
|
15
|
+
YjUxN2MxMjM3NDEyNTk4MGZjYTI2YjlhODZkOTY0YTg0NjFlOTU=
|
@@ -24,8 +24,10 @@ module AnnotatedArray
|
|
24
24
|
|
25
25
|
value.extend AnnotatedArray if Array === value
|
26
26
|
|
27
|
-
value.container
|
28
|
-
|
27
|
+
if value.respond_to? :container
|
28
|
+
value.container = self
|
29
|
+
value.container_index = pos
|
30
|
+
end
|
29
31
|
|
30
32
|
value
|
31
33
|
end
|
@@ -0,0 +1,256 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
|
3
|
+
module Association
|
4
|
+
class << self
|
5
|
+
attr_accessor :databases
|
6
|
+
def databases
|
7
|
+
@databases ||= {}
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.register(database, file, options = {})
|
12
|
+
self.databases[database.to_s] = [file, options]
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.get_database(database)
|
16
|
+
self.databases[database.to_s]
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.open_database(database, options = {}, persist_options = {})
|
20
|
+
file, database_options = get_database database
|
21
|
+
open(file, database_options.merge(options), persist_options)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.index_database(database, options = {}, persist_options = {})
|
25
|
+
file, database_options = databases[database.to_s]
|
26
|
+
index(file, database_options.merge(options), persist_options)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.parse_field_specification(spec, fields)
|
30
|
+
spec = spec.split "=>" unless Array === spec
|
31
|
+
field_part, final_type = spec
|
32
|
+
|
33
|
+
field, type = field_part.split "=~"
|
34
|
+
|
35
|
+
[field, type, final_type]
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.resolve_field(name, fields)
|
39
|
+
type = Entity.formats[name]
|
40
|
+
return "Field #{ name } could not be resolved: #{fields}" if type.nil?
|
41
|
+
field = fields.select{|f| Entity.formats[f] == type}.first
|
42
|
+
[field, nil, name]
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.add_reciprocal(tsv)
|
46
|
+
new_tsv = {}
|
47
|
+
tsv.with_unnamed do
|
48
|
+
|
49
|
+
tsv.through do |target,v|
|
50
|
+
source_values = tsv.type == :double ? Misc.zip_fields(v) : [v]
|
51
|
+
|
52
|
+
source_values.each do |values|
|
53
|
+
source = values.shift
|
54
|
+
values.unshift target
|
55
|
+
current = new_tsv[source] || tsv[source]
|
56
|
+
|
57
|
+
case tsv.type
|
58
|
+
when :double
|
59
|
+
new = current ? current.zip(values).collect{|p| p.flatten} : values.collect{|p| [p]}
|
60
|
+
when :flat
|
61
|
+
new = current ? (current + values).compact.uniq : values
|
62
|
+
end
|
63
|
+
|
64
|
+
new_tsv[source] = new
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
tsv.merge! new_tsv
|
69
|
+
end
|
70
|
+
|
71
|
+
tsv
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.load_tsv(file, options)
|
75
|
+
key_field = TSV.parse_header(file, options).key_field
|
76
|
+
fields = TSV.parse_header(file, options).fields
|
77
|
+
all_fields = TSV.parse_header(file, options).all_fields
|
78
|
+
|
79
|
+
source = options[:source] || options[:source_type]
|
80
|
+
source = TSV.identify_field key_field, fields, options[:key_field] if source.nil? and options[:key_field]
|
81
|
+
source = all_fields[source] if Fixnum === source
|
82
|
+
source = key_field if source == :key or source.nil?
|
83
|
+
|
84
|
+
target = options[:target]
|
85
|
+
target = TSV.identify_field key_field, fields, options[:fields].first if target.nil? and options[:fields]
|
86
|
+
target = all_fields[target] if Fixnum === target
|
87
|
+
target = key_field if target == :key
|
88
|
+
|
89
|
+
zipped = options[:zipped]
|
90
|
+
undirected = options[:undirected]
|
91
|
+
|
92
|
+
source, source_header, source_final_type = parse_field_specification source, all_fields
|
93
|
+
target, target_header, target_final_type = parse_field_specification target, all_fields if target
|
94
|
+
|
95
|
+
if source and not all_fields.include? source
|
96
|
+
Log.debug("Resolving source: #{ source }")
|
97
|
+
source, source_header, source_final_type = resolve_field source, all_fields
|
98
|
+
Log.debug([source, source_header, source_final_type] * ", ")
|
99
|
+
end
|
100
|
+
|
101
|
+
if target and not all_fields.include? target
|
102
|
+
Log.debug("Resolving target: #{ target }")
|
103
|
+
target, target_header, target_final_type = resolve_field target, all_fields
|
104
|
+
Log.debug([target, target_header, target_final_type] * ", ")
|
105
|
+
end
|
106
|
+
|
107
|
+
source_final_type ||= options[:source_type] if options[:source_type]
|
108
|
+
target_final_type ||= options[:target_type] if options[:target_type]
|
109
|
+
|
110
|
+
Log.debug("Loading associations from: #{ file }")
|
111
|
+
Log.debug("sources: #{[source, source_header, source_final_type] * ", "}")
|
112
|
+
Log.debug("targets: #{[target, target_header, target_final_type] * ", "}")
|
113
|
+
if source != all_fields.first or (target and target != all_fields[1])
|
114
|
+
fields = ([target] + (all_fields - [source, target])).compact
|
115
|
+
open_options = options.merge({:key_field => source, :fields => fields})
|
116
|
+
tsv = TSV.open(file, open_options)
|
117
|
+
else
|
118
|
+
tsv = TSV.open(file, options)
|
119
|
+
end
|
120
|
+
|
121
|
+
if source_header and tsv.key_field != source_header
|
122
|
+
tsv.key_field = source_header
|
123
|
+
end
|
124
|
+
|
125
|
+
if source_final_type and tsv.key_field != source_final_type
|
126
|
+
Log.debug("Changing source type from #{tsv.key_field} to #{source_final_type}")
|
127
|
+
tsv.with_unnamed do
|
128
|
+
tsv = TSVWorkflow.job(:change_id, tsv.filename, :tsv => tsv, :format => source_final_type, :organism => tsv.namespace).exec
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
if target_header and tsv.fields.first != target_header
|
133
|
+
tsv.fields = tsv.fields.collect{|f| f == target ? target_header : f }
|
134
|
+
end
|
135
|
+
|
136
|
+
if target_final_type and tsv.fields.first != target_final_type and
|
137
|
+
Entity.formats[tsv.fields.first] and
|
138
|
+
Entity.formats[tsv.fields.first] == Entity.formats[target_final_type]
|
139
|
+
|
140
|
+
Log.debug("Changing target type from #{tsv.fields.first} to #{source_final_type}")
|
141
|
+
save_key_field = tsv.key_field
|
142
|
+
tsv.key_field = "MASKED"
|
143
|
+
tsv.with_unnamed do
|
144
|
+
tsv = TSVWorkflow.job(:swap_id, tsv.filename, :tsv => tsv, :field => tsv.fields.first, :format => target_final_type, :organism => tsv.namespace).exec
|
145
|
+
end
|
146
|
+
tsv.key_field = save_key_field
|
147
|
+
end
|
148
|
+
|
149
|
+
if undirected
|
150
|
+
tsv = add_reciprocal tsv
|
151
|
+
end
|
152
|
+
|
153
|
+
tsv
|
154
|
+
end
|
155
|
+
|
156
|
+
def self.open(file, options = {}, persist_options = {})
|
157
|
+
options = {} if options.nil?
|
158
|
+
persist_options = {} if persist_options.nil?
|
159
|
+
|
160
|
+
namespace = options[:namespace]
|
161
|
+
old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
|
162
|
+
old_file.annotate file if Path === old_file
|
163
|
+
|
164
|
+
Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association"}.merge(persist_options)) do |data|
|
165
|
+
options = options.clone
|
166
|
+
|
167
|
+
tsv = load_tsv(file, options)
|
168
|
+
|
169
|
+
tsv.annotate(data)
|
170
|
+
data.serializer = tsv.type if TokyoCabinet::HDB === data
|
171
|
+
data.merge! tsv
|
172
|
+
tsv.annotate data
|
173
|
+
|
174
|
+
data
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def self.index(file, options = {}, persist_options = {})
|
179
|
+
options = {} if options.nil?
|
180
|
+
persist_options = {} if persist_options.nil?
|
181
|
+
|
182
|
+
Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association Index"}.merge(persist_options).merge(:engine => TokyoCabinet::BDB, :serializer => :clean)) do |assocs|
|
183
|
+
undirected = options[:undirected]
|
184
|
+
tsv = TSV === file ? file : Association.open(file, options, persist_options.merge(:persist => false))
|
185
|
+
|
186
|
+
key_field = [tsv.key_field, tsv.fields.first.split(":").last, undirected ? "undirected" : nil].compact * "~"
|
187
|
+
|
188
|
+
TSV.setup(assocs, :key_field => key_field, :fields => tsv.fields[1..-1], :type => :list, :serializer => :list)
|
189
|
+
|
190
|
+
tsv.with_unnamed do
|
191
|
+
tsv.with_monitor :desc => "Extracting annotations" do
|
192
|
+
case tsv.type
|
193
|
+
when :flat
|
194
|
+
tsv.through do |source, targets|
|
195
|
+
next if source.nil? or source.empty? or targets.nil? or targets.empty?
|
196
|
+
|
197
|
+
targets.each do |target|
|
198
|
+
next if target.nil? or target.empty?
|
199
|
+
key = [source, target] * "~"
|
200
|
+
assocs[key] = nil
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
when :double
|
205
|
+
tsv.through do |source, values|
|
206
|
+
next if values.empty?
|
207
|
+
next if source.nil?
|
208
|
+
next if values.empty?
|
209
|
+
targets = values.first
|
210
|
+
rest = Misc.zip_fields values[1..-1]
|
211
|
+
|
212
|
+
annotations = rest.length > 1 ?
|
213
|
+
targets.zip(rest) :
|
214
|
+
targets.zip(rest * targets.length)
|
215
|
+
|
216
|
+
annotations.each do |target, info|
|
217
|
+
next if target.nil?
|
218
|
+
key = [source, target] * "~"
|
219
|
+
assocs[key] = info
|
220
|
+
end
|
221
|
+
end
|
222
|
+
else
|
223
|
+
raise "Type not supported: #{tsv.type}"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
assocs.close
|
228
|
+
|
229
|
+
assocs
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def self.connections(repo, entities)
|
234
|
+
source_field, target_field, undirected = repo.key_field.split("~")
|
235
|
+
|
236
|
+
source_type = Entity.formats[source_field].to_s
|
237
|
+
target_type = Entity.formats[target_field].to_s
|
238
|
+
|
239
|
+
source_entities = entities[source_type] || entities[source_field]
|
240
|
+
target_entities = entities[target_type] || entities[target_field]
|
241
|
+
|
242
|
+
return [] if source_entities.nil? or target_entities.nil?
|
243
|
+
|
244
|
+
source_entities.collect do |entity|
|
245
|
+
keys = repo.prefix(entity + "~")
|
246
|
+
keys.collect do |key|
|
247
|
+
source, target = key.split("~")
|
248
|
+
next unless target_entities.include? target
|
249
|
+
next if undirected and target > source
|
250
|
+
info = Hash[*repo.fields.zip(repo[key]).flatten]
|
251
|
+
|
252
|
+
{:source => source, :target => target, :info => info}
|
253
|
+
end.compact
|
254
|
+
end.flatten
|
255
|
+
end
|
256
|
+
end
|
data/lib/rbbt/resource/path.rb
CHANGED
@@ -10,11 +10,16 @@ module Path
|
|
10
10
|
string.resource = resource
|
11
11
|
string
|
12
12
|
end
|
13
|
+
|
14
|
+
def annotate(name)
|
15
|
+
Path.setup name.to_s, @pkgdir, @resource
|
16
|
+
end
|
17
|
+
|
13
18
|
def join(name)
|
14
19
|
if self.empty?
|
15
|
-
|
20
|
+
self.annotate name.to_s
|
16
21
|
else
|
17
|
-
|
22
|
+
self.annotate File.join(self, name.to_s)
|
18
23
|
end
|
19
24
|
end
|
20
25
|
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -540,6 +540,17 @@ end
|
|
540
540
|
peek
|
541
541
|
end
|
542
542
|
|
543
|
+
def summary
|
544
|
+
<<-EOF
|
545
|
+
Key field = #{key_field}
|
546
|
+
Fields = #{fields * ", "}
|
547
|
+
Type = #{type}
|
548
|
+
Example:
|
549
|
+
- #{key = keys.first}: #{self[key].inspect}
|
550
|
+
|
551
|
+
EOF
|
552
|
+
end
|
553
|
+
|
543
554
|
def to_hash
|
544
555
|
new = self.dup
|
545
556
|
ENTRY_KEYS.each{|entry| new.delete entry}
|
data/lib/rbbt/tsv/attach/util.rb
CHANGED
@@ -237,19 +237,7 @@ module TSV
|
|
237
237
|
end
|
238
238
|
end
|
239
239
|
|
240
|
-
def self.
|
241
|
-
options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
|
242
|
-
in_namespace = options[:in_namespace]
|
243
|
-
persist_input = options[:persist_input]
|
244
|
-
|
245
|
-
path = find_path(files, options)
|
246
|
-
|
247
|
-
return nil if path.nil?
|
248
|
-
|
249
|
-
traversal_ids = path.collect{|p| p.first}
|
250
|
-
|
251
|
-
Log.debug "Found Traversal: #{traversal_ids * " => "}"
|
252
|
-
|
240
|
+
def self.index_for_traversal(path, persist_input = false)
|
253
241
|
data_key, data_file = path.shift
|
254
242
|
data_index = if data_key == data_file.key_field
|
255
243
|
Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
|
@@ -292,6 +280,23 @@ module TSV
|
|
292
280
|
end
|
293
281
|
|
294
282
|
current_index
|
283
|
+
|
284
|
+
end
|
285
|
+
|
286
|
+
def self.build_traverse_index(files, options = {})
|
287
|
+
options = Misc.add_defaults options, :in_namespace => false, :persist_input => true
|
288
|
+
in_namespace = options[:in_namespace]
|
289
|
+
persist_input = options[:persist_input]
|
290
|
+
|
291
|
+
path = find_path(files, options)
|
292
|
+
|
293
|
+
return nil if path.nil?
|
294
|
+
|
295
|
+
traversal_ids = path.collect{|p| p.first}
|
296
|
+
|
297
|
+
Log.debug "Found Traversal: #{traversal_ids * " => "}"
|
298
|
+
|
299
|
+
index_for_traversal path, persist_input
|
295
300
|
end
|
296
301
|
|
297
302
|
|
data/lib/rbbt/tsv/filter.rb
CHANGED
@@ -187,14 +187,14 @@ module Filtered
|
|
187
187
|
end
|
188
188
|
end
|
189
189
|
|
190
|
-
def filtered_set(key, value)
|
190
|
+
def filtered_set(key, value, clean = false)
|
191
191
|
if filters.empty?
|
192
|
-
self.send(:unfiltered_set, key, value)
|
192
|
+
self.send(:unfiltered_set, key, value, clean)
|
193
193
|
else
|
194
194
|
filters.each do |filter|
|
195
195
|
filter.add key if filter.match_entry key, value
|
196
196
|
end
|
197
|
-
self.send(:unfiltered_set, key, value)
|
197
|
+
self.send(:unfiltered_set, key, value, clean)
|
198
198
|
end
|
199
199
|
end
|
200
200
|
|
data/lib/rbbt/tsv/manipulate.rb
CHANGED
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -187,6 +187,12 @@ module TSV
|
|
187
187
|
|
188
188
|
def add_to_data_merge_zipped(data, keys, values)
|
189
189
|
num = keys.length
|
190
|
+
|
191
|
+
if values.first.length > 1 and num == 1
|
192
|
+
keys = keys * values.first.length
|
193
|
+
num = keys.length
|
194
|
+
end
|
195
|
+
|
190
196
|
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
191
197
|
all = values.unshift keys
|
192
198
|
Misc.zip_fields(all).each do |values|
|
@@ -204,6 +210,12 @@ module TSV
|
|
204
210
|
|
205
211
|
def add_to_data_zipped(data, keys, values)
|
206
212
|
num = keys.length
|
213
|
+
|
214
|
+
if values.first.length > 1 and num == 1
|
215
|
+
keys = keys * values.first.length
|
216
|
+
num = keys.length
|
217
|
+
end
|
218
|
+
|
207
219
|
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
208
220
|
all = values.unshift keys
|
209
221
|
Misc.zip_fields(all).each do |values|
|
@@ -309,7 +321,6 @@ module TSV
|
|
309
321
|
options = header_options.merge options
|
310
322
|
|
311
323
|
@type = Misc.process_options(options, :type) || :double
|
312
|
-
merge = Misc.process_options(options, :merge) || false
|
313
324
|
|
314
325
|
@sep2 = Misc.process_options(options, :sep2) || "|"
|
315
326
|
@cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
|
@@ -318,6 +329,9 @@ module TSV
|
|
318
329
|
@select= Misc.process_options options, :select
|
319
330
|
@zipped = Misc.process_options options, :zipped
|
320
331
|
@namespace = Misc.process_options options, :namespace
|
332
|
+
merge = Misc.process_options(options, :merge)
|
333
|
+
merge = @zipped if merge.nil?
|
334
|
+
merge = false if merge.nil?
|
321
335
|
|
322
336
|
case @type
|
323
337
|
when :double
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -62,13 +62,16 @@ module TSV
|
|
62
62
|
case
|
63
63
|
when Integer === field
|
64
64
|
field
|
65
|
-
when (field.nil? or field == :key
|
65
|
+
when (field.nil? or field == :key)
|
66
66
|
:key
|
67
|
-
when String === field
|
68
|
-
raise "No fields specified in TSV.identify_field" if fields.nil?
|
67
|
+
when (String === field and not fields.nil?)
|
69
68
|
pos = fields.index field
|
69
|
+
pos ||= :key if key_field == field
|
70
70
|
Log.medium "Field #{ field } was not found. Options: #{fields * ", "}" if pos.nil?
|
71
71
|
pos
|
72
|
+
else
|
73
|
+
raise "No fields specified in TSV.identify_field" if fields.nil?
|
74
|
+
Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
|
72
75
|
end
|
73
76
|
end
|
74
77
|
|
@@ -76,5 +79,55 @@ module TSV
|
|
76
79
|
TSV.identify_field(key_field, fields, field)
|
77
80
|
end
|
78
81
|
|
82
|
+
def to_double
|
83
|
+
new = {}
|
84
|
+
case type
|
85
|
+
when :double
|
86
|
+
self
|
87
|
+
when :flat
|
88
|
+
through do |k,v|
|
89
|
+
new[k] = [v]
|
90
|
+
end
|
91
|
+
when :single
|
92
|
+
through do |k,v|
|
93
|
+
new[k] = [[v]]
|
94
|
+
end
|
95
|
+
when :list
|
96
|
+
through do |k,v|
|
97
|
+
new[k] = v.collect{|e| [e]}
|
98
|
+
end
|
99
|
+
end
|
100
|
+
self.annotate(new)
|
101
|
+
new.type = :double
|
102
|
+
new
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_flat(field = nil)
|
106
|
+
new = {}
|
107
|
+
case type
|
108
|
+
when :double
|
109
|
+
if field.nil?
|
110
|
+
through do |k,v| new[k] = v.first end
|
111
|
+
else
|
112
|
+
pos = identify_field field
|
113
|
+
through do |k,v| new[k] = v[pos] end
|
114
|
+
end
|
115
|
+
when :flat
|
116
|
+
self
|
117
|
+
when :single
|
118
|
+
through do |k,v|
|
119
|
+
new[k] = [v]
|
120
|
+
end
|
121
|
+
when :list
|
122
|
+
through do |k,v|
|
123
|
+
new[k] = [v.first]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
self.annotate(new)
|
127
|
+
new.type = :flat
|
128
|
+
new
|
129
|
+
end
|
130
|
+
|
131
|
+
|
79
132
|
|
80
133
|
end
|
data/lib/rbbt/tsv.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -184,15 +184,15 @@ module Misc
|
|
184
184
|
ARRAY_MAX_LENGTH = 1000
|
185
185
|
STRING_MAX_LENGTH = ARRAY_MAX_LENGTH * 10
|
186
186
|
|
187
|
-
def self.sanitize_filename(filename, length =
|
187
|
+
def self.sanitize_filename(filename, length = 254)
|
188
188
|
if filename.length > length
|
189
|
-
if filename =~ /(\..{2,
|
189
|
+
if filename =~ /(\..{2,9})$/
|
190
190
|
extension = $1
|
191
191
|
else
|
192
192
|
extension = ''
|
193
193
|
end
|
194
194
|
|
195
|
-
post_fix = "
|
195
|
+
post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension
|
196
196
|
|
197
197
|
filename = filename[0..(length - post_fix.length - 1)] << post_fix
|
198
198
|
else
|
@@ -87,10 +87,11 @@ module NamedArray
|
|
87
87
|
def [](key, clean = false)
|
88
88
|
pos = Misc.field_position(fields, key)
|
89
89
|
elem = super(pos)
|
90
|
+
return elem if clean
|
90
91
|
|
91
92
|
return elem if @fields.nil? or @fields.empty?
|
92
93
|
|
93
|
-
field = NamedArray === @fields ? @fields
|
94
|
+
field = NamedArray === @fields ? @fields[pos, true] : @fields[pos]
|
94
95
|
elem = prepare_entity(elem, field, entity_options)
|
95
96
|
elem
|
96
97
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -12,6 +12,7 @@ class Step
|
|
12
12
|
class Aborted < Exception; end
|
13
13
|
|
14
14
|
def initialize(path, task = nil, inputs = nil, dependencies = nil, bindings = nil)
|
15
|
+
path = Misc.sanitize_filename path
|
15
16
|
@path = Path.setup(path)
|
16
17
|
@task = task
|
17
18
|
@bindings = bindings
|
@@ -172,6 +173,10 @@ class Step
|
|
172
173
|
FileUtils.mkdir_p File.dirname(path) unless Open.exists? File.dirname(path)
|
173
174
|
begin
|
174
175
|
run(true)
|
176
|
+
rescue Step::Aborted
|
177
|
+
Log.debug("Forked process aborted: #{@path}")
|
178
|
+
log :aborted, "Aborted"
|
179
|
+
exit -1
|
175
180
|
rescue Exception
|
176
181
|
Log.debug("Exception caught on forked process: #{$!.message}")
|
177
182
|
exit -1
|
@@ -15,7 +15,7 @@ app_dir = app_dir[app]
|
|
15
15
|
server = options[:server] || 'thin'
|
16
16
|
Misc.in_dir(app_dir) do
|
17
17
|
require 'rack'
|
18
|
-
ENV["RBBT_FINDER"] = true if options.include?(:finder)
|
18
|
+
ENV["RBBT_FINDER"] = "true" if options.include?(:finder)
|
19
19
|
ENV["RACK_ENV"] = options[:environment] if options.include?(:environment)
|
20
20
|
Rack::Server.start(options.merge(:config => 'config.ru'))
|
21
21
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
|
2
|
+
require 'rbbt/workflow'
|
3
|
+
require 'rbbt/association'
|
4
|
+
require 'rbbt/entity'
|
5
|
+
require 'rbbt/util/tmpfile'
|
6
|
+
require 'test/unit'
|
7
|
+
|
8
|
+
Workflow.require_workflow "Genomics"
|
9
|
+
require 'rbbt/entity/gene'
|
10
|
+
|
11
|
+
Workflow.require_workflow "TSVWorkflow"
|
12
|
+
|
13
|
+
TEST_ASSOCIATIONS =<<-EOF
|
14
|
+
#: :sep=" "#:namespace=Hsa/jan2013
|
15
|
+
#Entity1 Entity2 Score Entity3 Gene
|
16
|
+
a A 1 aa TP53
|
17
|
+
b B 2 bb KRAS
|
18
|
+
c C|K 3|4 cc|kk PTEN|PTEN
|
19
|
+
EOF
|
20
|
+
|
21
|
+
class TestAssociations < Test::Unit::TestCase
|
22
|
+
|
23
|
+
FAssocs = ""
|
24
|
+
DAssocs = ""
|
25
|
+
def setup
|
26
|
+
FAssocs.replace TmpFile.tmp_file
|
27
|
+
DAssocs.replace TmpFile.tmp_file
|
28
|
+
Open.write(FAssocs, TEST_ASSOCIATIONS)
|
29
|
+
end
|
30
|
+
def teardown
|
31
|
+
FileUtils.rm FAssocs
|
32
|
+
FileUtils.rm_rf DAssocs
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_simple_open
|
36
|
+
database = Association.open(FAssocs, {}, :dir => DAssocs)
|
37
|
+
assert_equal ["C", "K"], database["c"]["Entity2"]
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_source_open
|
41
|
+
database = Association.open(FAssocs, {:source => "Entity2", :zipped => true}, :dir => DAssocs)
|
42
|
+
assert_equal ["c", "3", 'cc', "PTEN"], database["C"].flatten
|
43
|
+
assert_equal ["c", "4", 'kk', "PTEN"], database["K"].flatten
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_target_open
|
47
|
+
database = Association.open(FAssocs, {:source => "Entity2", :target => "Entity3", :zipped => true}, :dir => DAssocs)
|
48
|
+
assert_equal ["cc", "c", "3", "PTEN"], database["C"].flatten
|
49
|
+
assert_equal ["kk", "c", "4", "PTEN"], database["K"].flatten
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_gene_open
|
53
|
+
database = Association.open(FAssocs, {:source => "Gene=~Associated Gene Name", :target => "Entity3", :zipped => true}, :dir => DAssocs)
|
54
|
+
assert_equal ["aa"], database["TP53"].first
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_gene_open_translate
|
58
|
+
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013")
|
59
|
+
database = Association.open(FAssocs, {:source => "Gene=~Associated Gene Name", :source_type => "Ensembl Gene ID", :target => "Entity3", :zipped => true}, :dir => DAssocs)
|
60
|
+
assert_equal ["aa"], database[tp53.ensembl].first
|
61
|
+
end
|
62
|
+
|
63
|
+
def test_gene_target_open_translate
|
64
|
+
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013")
|
65
|
+
database = Association.open(FAssocs, {:target => "Gene=~Associated Gene Name=>Ensembl Gene ID", :source => "Entity3", :zipped => true}, :dir => DAssocs)
|
66
|
+
assert_equal [tp53.ensembl], database["aa"].first
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_undirected
|
70
|
+
require 'rbbt/sources/pina'
|
71
|
+
require 'rbbt/gene_associations'
|
72
|
+
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013")
|
73
|
+
index = Association.index_database('pina', {:source_type => "Ensembl Gene ID", :target_type => "Ensembl Gene ID", :undirected => true}, {:dir => DAssocs})
|
74
|
+
assert Association.connections(index, "Gene" => tp53.pina_interactors.ensembl.compact).any?
|
75
|
+
end
|
76
|
+
end
|
@@ -446,5 +446,33 @@ bb Id4
|
|
446
446
|
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
447
447
|
assert_equal %w(Id1 Id4), tsv1["row3"]["OtherID"]
|
448
448
|
end
|
449
|
+
|
450
|
+
def test_attach_flat
|
451
|
+
content1 =<<-EOF
|
452
|
+
#Id ValueA ValueB
|
453
|
+
row1 a|aa|aaa b
|
454
|
+
row2 A B
|
455
|
+
EOF
|
456
|
+
|
457
|
+
content2 =<<-EOF
|
458
|
+
#ValueA OtherID
|
459
|
+
a Id1|Id2
|
460
|
+
A Id3
|
461
|
+
EOF
|
462
|
+
|
463
|
+
tsv1 = tsv2 = index = nil
|
464
|
+
TmpFile.with_file(content1) do |filename|
|
465
|
+
tsv1 = TSV.open(File.open(filename), :flat, :fields => ["ValueA"], :sep => /\s+/)
|
466
|
+
puts tsv1
|
467
|
+
end
|
468
|
+
|
469
|
+
TmpFile.with_file(content2) do |filename|
|
470
|
+
tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
|
471
|
+
end
|
472
|
+
|
473
|
+
res = tsv1.attach tsv2, :fields => ["OtherID"]
|
474
|
+
puts res
|
475
|
+
|
476
|
+
end
|
449
477
|
end
|
450
478
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -173,6 +173,7 @@ files:
|
|
173
173
|
- lib/rbbt/annotations.rb
|
174
174
|
- lib/rbbt/annotations/annotated_array.rb
|
175
175
|
- lib/rbbt/annotations/util.rb
|
176
|
+
- lib/rbbt/association.rb
|
176
177
|
- lib/rbbt/entity.rb
|
177
178
|
- lib/rbbt/fix_width_table.rb
|
178
179
|
- lib/rbbt/persist.rb
|
@@ -240,6 +241,7 @@ files:
|
|
240
241
|
- test/rbbt/tsv/test_attach.rb
|
241
242
|
- test/rbbt/tsv/test_manipulate.rb
|
242
243
|
- test/rbbt/test_fix_width_table.rb
|
244
|
+
- test/rbbt/test_association.rb
|
243
245
|
- test/rbbt/test_workflow.rb
|
244
246
|
- test/rbbt/workflow/test_step.rb
|
245
247
|
- test/rbbt/workflow/test_task.rb
|
@@ -303,6 +305,7 @@ test_files:
|
|
303
305
|
- test/rbbt/tsv/test_attach.rb
|
304
306
|
- test/rbbt/tsv/test_manipulate.rb
|
305
307
|
- test/rbbt/test_fix_width_table.rb
|
308
|
+
- test/rbbt/test_association.rb
|
306
309
|
- test/rbbt/test_workflow.rb
|
307
310
|
- test/rbbt/workflow/test_step.rb
|
308
311
|
- test/rbbt/workflow/test_task.rb
|