rbbt-util 5.6.0 → 5.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/accessor.rb +2 -4
- data/lib/rbbt/tsv/attach.rb +35 -10
- data/lib/rbbt/tsv/parser.rb +1 -1
- data/lib/rbbt/tsv/util.rb +47 -0
- data/lib/rbbt/util/filecache.rb +32 -0
- data/lib/rbbt/util/log.rb +4 -0
- data/lib/rbbt/util/misc.rb +18 -0
- data/lib/rbbt/util/simpleopt.rb +11 -1
- data/lib/rbbt/workflow.rb +7 -1
- data/lib/rbbt/workflow/accessor.rb +17 -7
- data/lib/rbbt/workflow/step.rb +5 -1
- data/share/rbbt_commands/workflow/task +50 -34
- data/test/rbbt/association/test_index.rb +1 -1
- data/test/rbbt/test_fix_width_table.rb +1 -1
- data/test/rbbt/test_knowledge_base.rb +3 -44
- data/test/rbbt/tsv/test_accessor.rb +1 -0
- data/test/rbbt/tsv/test_attach.rb +12 -6
- data/test/rbbt/util/test_misc.rb +55 -33
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3656ab04698a077becda559ac837570042688fa8
|
4
|
+
data.tar.gz: f54aa65f719933df04767b54e896c74adb805e34
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 929e4be6771df3db4f8507a9af317cb9bd841107d4170ec2ff198f1e1ed2aca0762e5b32851b1653336afdae49dc2729b0143fd65938cedd11660db2a5d687fc
|
7
|
+
data.tar.gz: 63ce2e0a7e12c9e6c6204a5d79764e9691ff781ae68e27586b135c524710ce557ce89a958aff4b7c101aa3cfd67897fdcb0ec415b5b09f315dad8a27ce437199
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -484,10 +484,8 @@ module TSV
|
|
484
484
|
|
485
485
|
str = ""
|
486
486
|
|
487
|
-
|
488
|
-
|
489
|
-
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
490
|
-
end
|
487
|
+
entry_hash = no_options ? {} : (ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}
|
488
|
+
str = TSV.header_lines(key_field, fields, entry_hash)
|
491
489
|
|
492
490
|
with_unnamed do
|
493
491
|
if keys.nil?
|
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -3,7 +3,11 @@ require 'rbbt/tsv/attach/util'
|
|
3
3
|
module TSV
|
4
4
|
|
5
5
|
# Merge columns from different rows of a file
|
6
|
-
def self.merge_row_fields(input, output,
|
6
|
+
def self.merge_row_fields(input, output, options = {})
|
7
|
+
options = Misc.add_defaults options, :sep => "\t"
|
8
|
+
key_field, fields = Misc.process_options options, :key_field, :fields
|
9
|
+
sep = options[:sep]
|
10
|
+
|
7
11
|
is = case
|
8
12
|
when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
|
9
13
|
CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
@@ -12,16 +16,25 @@ module TSV
|
|
12
16
|
else
|
13
17
|
input
|
14
18
|
end
|
19
|
+
|
20
|
+
if key_field.nil? or fields.nil?
|
21
|
+
parser = TSV::Parser.new(is, options.dup)
|
22
|
+
fields ||= parser.fields
|
23
|
+
key_field ||= parser.key_field
|
24
|
+
line = parser.first_line
|
25
|
+
else
|
26
|
+
line = is.gets
|
27
|
+
end
|
15
28
|
|
16
29
|
current_key = nil
|
17
30
|
current_parts = []
|
18
31
|
|
19
32
|
done = false
|
20
33
|
Open.write(output) do |os|
|
34
|
+
os.puts TSV.header_lines(key_field, fields, options)
|
21
35
|
|
22
|
-
|
23
|
-
|
24
|
-
key, *parts = is.gets.sub("\n",'').split(sep, -1)
|
36
|
+
while line
|
37
|
+
key, *parts = line.sub("\n",'').split(sep, -1)
|
25
38
|
current_key ||= key
|
26
39
|
case
|
27
40
|
when key.nil?
|
@@ -39,7 +52,7 @@ module TSV
|
|
39
52
|
current_parts = parts
|
40
53
|
end
|
41
54
|
|
42
|
-
|
55
|
+
line = is.gets
|
43
56
|
end
|
44
57
|
|
45
58
|
os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
|
@@ -48,10 +61,14 @@ module TSV
|
|
48
61
|
end
|
49
62
|
|
50
63
|
# Merge two files with the same keys and different fields
|
51
|
-
def self.merge_different_fields(file1, file2, output,
|
64
|
+
def self.merge_different_fields(file1, file2, output, options = {})
|
65
|
+
options = Misc.add_defaults options, :sep => "\t"
|
66
|
+
monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
|
67
|
+
sep = options[:sep] || "\t"
|
68
|
+
|
52
69
|
case
|
53
70
|
when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
|
54
|
-
size = CMD.cmd("wc -
|
71
|
+
size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
|
55
72
|
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
56
73
|
when (String === file1 or StringIO === file1)
|
57
74
|
size = file1.length if monitor
|
@@ -80,19 +97,27 @@ module TSV
|
|
80
97
|
|
81
98
|
key1 = key2 = nil
|
82
99
|
while key1.nil?
|
83
|
-
while (line1 = file1.gets) =~
|
100
|
+
while (line1 = file1.gets) =~ /^#/
|
101
|
+
key_field1, *fields1 = line1.strip.sub('#','').split(sep)
|
102
|
+
end
|
84
103
|
key1, *parts1 = line1.sub("\n",'').split(sep, -1)
|
85
104
|
cols1 = parts1.length
|
86
105
|
end
|
87
106
|
|
88
107
|
while key2.nil?
|
89
|
-
while (line2 = file2.gets) =~
|
108
|
+
while (line2 = file2.gets) =~ /^#/
|
109
|
+
key_field2, *fields2 = line2.strip.sub('#','').split(sep)
|
110
|
+
end
|
90
111
|
key2, *parts2 = line2.sub("\n",'').split(sep, -1)
|
91
112
|
cols2 = parts2.length
|
92
113
|
end
|
93
114
|
|
94
115
|
progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
|
95
116
|
|
117
|
+
entry_hash = options
|
118
|
+
entry_hash.delete :sep if entry_hash[:sep] == "\t"
|
119
|
+
output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
|
120
|
+
|
96
121
|
key = key1 < key2 ? key1 : key2
|
97
122
|
parts = [""] * (cols1 + cols2)
|
98
123
|
while not (done1 and done2)
|
@@ -189,7 +214,7 @@ module TSV
|
|
189
214
|
|
190
215
|
def merge_different_fields(other, options = {})
|
191
216
|
TmpFile.with_file do |output|
|
192
|
-
TSV.merge_different_fields(self, other, output, options
|
217
|
+
TSV.merge_different_fields(self, other, output, options)
|
193
218
|
tsv = TSV.open output, options
|
194
219
|
tsv.key_field = self.key_field unless self.key_field.nil?
|
195
220
|
tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
data/lib/rbbt/tsv/parser.rb
CHANGED
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -1,6 +1,40 @@
|
|
1
1
|
require 'rbbt/resource/path'
|
2
2
|
module TSV
|
3
3
|
|
4
|
+
def self.reorder_stream(stream, positions, sep = "\t")
|
5
|
+
Misc.open_pipe do |sin|
|
6
|
+
line = stream.gets
|
7
|
+
while line =~ /^#\:/
|
8
|
+
sin.puts line
|
9
|
+
line = stream.gets
|
10
|
+
end
|
11
|
+
while line =~ /^#/
|
12
|
+
if Hash === positions
|
13
|
+
new = (0..line.split(sep).length-1).to_a
|
14
|
+
positions.each do |k,v|
|
15
|
+
new[k] = v
|
16
|
+
new[v] = k
|
17
|
+
end
|
18
|
+
positions = new
|
19
|
+
end
|
20
|
+
sin.puts "#" + line.sub!(/^#/,'').strip.split(sep).values_at(*positions).compact * sep
|
21
|
+
line = stream.gets
|
22
|
+
end
|
23
|
+
while line
|
24
|
+
if Hash === positions
|
25
|
+
new = (0..line.split(sep).length-1).to_a
|
26
|
+
positions.each do |k,v|
|
27
|
+
new[k] = v
|
28
|
+
new[v] = k
|
29
|
+
end
|
30
|
+
positions = new
|
31
|
+
end
|
32
|
+
sin.puts line.strip.split(sep).values_at(*positions) * sep
|
33
|
+
line = stream.gets
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
4
38
|
def self.field_match_counts(file, values, options = {})
|
5
39
|
options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
|
6
40
|
persist_options = Misc.pull_keys options, :persist
|
@@ -52,6 +86,7 @@ module TSV
|
|
52
86
|
when String === file
|
53
87
|
Open.open(file, open_options)
|
54
88
|
when file.respond_to?(:gets)
|
89
|
+
file.rewind if file.respond_to?(:rewind) and file.eof?
|
55
90
|
file
|
56
91
|
else
|
57
92
|
raise "Cannot get stream from: #{file.inspect}"
|
@@ -76,6 +111,18 @@ module TSV
|
|
76
111
|
Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
|
77
112
|
end
|
78
113
|
end
|
114
|
+
|
115
|
+
def self.header_lines(key_field, fields, entry_hash = {})
|
116
|
+
sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
|
117
|
+
|
118
|
+
str = ""
|
119
|
+
str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
|
120
|
+
if fields
|
121
|
+
str << "#" << key_field << sep << fields * sep << "\n"
|
122
|
+
end
|
123
|
+
|
124
|
+
str
|
125
|
+
end
|
79
126
|
|
80
127
|
def identify_field(field)
|
81
128
|
TSV.identify_field(key_field, fields, field)
|
data/lib/rbbt/util/filecache.rb
CHANGED
@@ -55,4 +55,36 @@ module FileCache
|
|
55
55
|
|
56
56
|
FileUtils.rm path if File.exist? path
|
57
57
|
end
|
58
|
+
|
59
|
+
def self.cache_online_elements(ids, pattern = nil, &block)
|
60
|
+
ids = [ids] unless Array === ids
|
61
|
+
|
62
|
+
result_files = {}
|
63
|
+
missing = []
|
64
|
+
ids.each do |id|
|
65
|
+
filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
|
66
|
+
|
67
|
+
if FileCache.found(filename)
|
68
|
+
result_files[id] = FileCache.path(filename)
|
69
|
+
else
|
70
|
+
missing << id
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
yield(missing).each do |id, content|
|
75
|
+
filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
|
76
|
+
path = FileCache.path(filename)
|
77
|
+
Open.write(path, content)
|
78
|
+
result_files[id] = content
|
79
|
+
end
|
80
|
+
|
81
|
+
missing.each do |id|
|
82
|
+
filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
|
83
|
+
result = yield id
|
84
|
+
File.open{|f| f.write(path = FileCache.path(filename)) }
|
85
|
+
result_files[id] = path
|
86
|
+
end
|
87
|
+
|
88
|
+
result_files
|
89
|
+
end
|
58
90
|
end
|
data/lib/rbbt/util/log.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -1318,6 +1318,7 @@ end
|
|
1318
1318
|
# Divides the array into +num+ chunks of the same size by placing one
|
1319
1319
|
# element in each chunk iteratively.
|
1320
1320
|
def self.divide(array, num)
|
1321
|
+
num = 1 if num == 0
|
1321
1322
|
chunks = []
|
1322
1323
|
num.to_i.times do chunks << [] end
|
1323
1324
|
array.each_with_index{|e, i|
|
@@ -1341,6 +1342,23 @@ end
|
|
1341
1342
|
chunks
|
1342
1343
|
end
|
1343
1344
|
|
1345
|
+
def self.open_pipe
|
1346
|
+
sout, sin = IO.pipe
|
1347
|
+
raise "No block given" unless block_given?
|
1348
|
+
Thread.new{
|
1349
|
+
begin
|
1350
|
+
yield sin
|
1351
|
+
rescue
|
1352
|
+
Log.exception $!
|
1353
|
+
raise $!
|
1354
|
+
ensure
|
1355
|
+
sin.close
|
1356
|
+
end
|
1357
|
+
}
|
1358
|
+
sout
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
|
1344
1362
|
def self.zip_fields(array)
|
1345
1363
|
return [] if array.empty?
|
1346
1364
|
array[0].zip(*array[1..-1])
|
data/lib/rbbt/util/simpleopt.rb
CHANGED
@@ -28,11 +28,21 @@ module SOPT
|
|
28
28
|
@description ||= "Missing"
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
31
|
def self.shortcuts
|
33
32
|
@shortcuts ||= []
|
34
33
|
end
|
35
34
|
|
35
|
+
def self.delete_inputs(inputs)
|
36
|
+
inputs.each do |input|
|
37
|
+
input = input.to_s
|
38
|
+
self.shortcuts.delete self.input_shortcuts.delete(input)
|
39
|
+
self.inputs.delete input
|
40
|
+
self.input_types.delete input
|
41
|
+
self.input_defaults.delete input
|
42
|
+
self.input_descriptions.delete input
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
36
46
|
def self.all
|
37
47
|
@all ||= {}
|
38
48
|
end
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -28,7 +28,7 @@ module Workflow
|
|
28
28
|
def self.load_workflow_file(filename)
|
29
29
|
begin
|
30
30
|
$LOAD_PATH.unshift(File.join(File.dirname(File.expand_path(filename)), 'lib'))
|
31
|
-
require filename
|
31
|
+
require File.expand_path(filename)
|
32
32
|
Log.debug{"Workflow loaded from: #{ filename }"}
|
33
33
|
return true
|
34
34
|
rescue Exception
|
@@ -99,6 +99,12 @@ module Workflow
|
|
99
99
|
|
100
100
|
# Load locally
|
101
101
|
|
102
|
+
if wf_name =~ /::\w+$/
|
103
|
+
clean_name = wf_name.sub(/::.*/,'')
|
104
|
+
Log.info{"Looking for '#{wf_name}' in '#{clean_name}'"}
|
105
|
+
wf_name = clean_name
|
106
|
+
end
|
107
|
+
|
102
108
|
Log.info{"Loading workflow #{wf_name}"}
|
103
109
|
require_local_workflow(wf_name) or
|
104
110
|
require_local_workflow(Misc.snake_case(wf_name)) or
|
@@ -184,7 +184,18 @@ class Step
|
|
184
184
|
end
|
185
185
|
end
|
186
186
|
|
187
|
-
|
187
|
+
def provenance
|
188
|
+
provenance = {}
|
189
|
+
dependencies.each do |dep|
|
190
|
+
next unless dep.path.exists?
|
191
|
+
if File.exists? dep.info_file
|
192
|
+
provenance[dep.path] = dep.provenance if File.exists? dep.path
|
193
|
+
else
|
194
|
+
provenance[dep.path] = nil
|
195
|
+
end
|
196
|
+
end
|
197
|
+
{:inputs => info[:inputs], :provenance => provenance}
|
198
|
+
end
|
188
199
|
end
|
189
200
|
|
190
201
|
module Workflow
|
@@ -264,16 +275,15 @@ module Workflow
|
|
264
275
|
[taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn.to_sym].input_options}
|
265
276
|
end
|
266
277
|
|
267
|
-
|
268
278
|
def real_dependencies(task, jobname, inputs, dependencies)
|
269
279
|
real_dependencies = []
|
270
280
|
dependencies.each do |dependency|
|
271
|
-
real_dependencies << case
|
272
|
-
when Step
|
281
|
+
real_dependencies << case dependency
|
282
|
+
when Step
|
273
283
|
dependency
|
274
|
-
when Symbol
|
284
|
+
when Symbol
|
275
285
|
job(dependency, jobname, inputs)
|
276
|
-
when Proc
|
286
|
+
when Proc
|
277
287
|
dependency.call jobname, inputs
|
278
288
|
end
|
279
289
|
end
|
@@ -303,7 +313,6 @@ module Workflow
|
|
303
313
|
}
|
304
314
|
end
|
305
315
|
|
306
|
-
|
307
316
|
def id_for(path)
|
308
317
|
if workdir.respond_to? :find
|
309
318
|
workdir_find = workdir.find
|
@@ -322,4 +331,5 @@ module Workflow
|
|
322
331
|
|
323
332
|
Misc.path_relative_to(workdir_find, File.dirname(path)).sub(/([^\/]+)\/.*/,'\1')
|
324
333
|
end
|
334
|
+
|
325
335
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -6,7 +6,7 @@ require 'rbbt/workflow/usage'
|
|
6
6
|
|
7
7
|
YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
|
8
8
|
|
9
|
-
def usage(workflow = nil, task = nil, error = nil)
|
9
|
+
def usage(workflow = nil, task = nil, error = nil, backtrace = nil)
|
10
10
|
puts SOPT.doc
|
11
11
|
puts "## WORKFLOW"
|
12
12
|
puts
|
@@ -27,7 +27,8 @@ def usage(workflow = nil, task = nil, error = nil)
|
|
27
27
|
workflow.doc(task)
|
28
28
|
if error
|
29
29
|
puts
|
30
|
-
puts "Error: " << error
|
30
|
+
puts Term::ANSIColor.red("Error: ") << error
|
31
|
+
puts Term::ANSIColor.red("Backtrace: ") << backtrace * "\n"
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -112,8 +113,10 @@ options = SOPT.get <<EOF
|
|
112
113
|
-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
|
113
114
|
-jn--jobname* Job name to use. The name 'Default' is used by default:
|
114
115
|
-pn--printname Print the name of the job and exit without starting it:
|
116
|
+
-jf--job_file* Output one of the job produced files:
|
115
117
|
-wd--workdir* Change the working directory of the workflow:
|
116
118
|
-O--output* Save job result into file:
|
119
|
+
--provenance Report the jobs provenance:
|
117
120
|
--fork Run job asyncronously:
|
118
121
|
EOF
|
119
122
|
|
@@ -178,13 +181,13 @@ workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir))) if opt
|
|
178
181
|
job = workflow.job(task.name, name, job_options)
|
179
182
|
|
180
183
|
# clean job
|
181
|
-
if clean
|
184
|
+
if clean
|
182
185
|
job.clean
|
183
186
|
sleep 1
|
184
187
|
job = workflow.job(task.name, name, job_options)
|
185
188
|
end
|
186
189
|
|
187
|
-
if recursive_clean
|
190
|
+
if recursive_clean
|
188
191
|
job.recursive_clean
|
189
192
|
sleep 1
|
190
193
|
job = workflow.job(task.name, name, job_options)
|
@@ -192,46 +195,59 @@ end
|
|
192
195
|
|
193
196
|
# run
|
194
197
|
begin
|
195
|
-
if do_exec
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
198
|
+
if do_exec
|
199
|
+
res = job.exec
|
200
|
+
case
|
201
|
+
when Array === res
|
202
|
+
puts res * "\n"
|
203
|
+
when TSV === res
|
204
|
+
puts res
|
205
|
+
when Hash === res
|
206
|
+
puts res.to_yaml
|
207
|
+
else
|
208
|
+
puts res
|
209
|
+
end
|
210
|
+
exit 0
|
211
|
+
end
|
212
|
+
|
213
|
+
if do_fork
|
214
|
+
job.fork
|
215
|
+
while not job.done?
|
216
|
+
message = job.messages ? job.messages.last : "no message"
|
217
|
+
Log.debug "#{job.status}: #{message}"
|
218
|
+
sleep 2
|
219
|
+
end
|
220
|
+
raise job.messages.last if job.error?
|
221
|
+
res = job.load
|
204
222
|
else
|
205
|
-
|
223
|
+
res = job.run(true)
|
206
224
|
end
|
207
|
-
exit 0
|
208
|
-
end
|
209
225
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
sleep 2
|
226
|
+
|
227
|
+
if options.delete(:provenance)
|
228
|
+
require 'pp'
|
229
|
+
pp job.provenance
|
230
|
+
exit 0
|
216
231
|
end
|
217
|
-
raise job.messages.last if job.error?
|
218
|
-
res = job.load
|
219
|
-
else
|
220
|
-
res = job.run(true)
|
221
|
-
end
|
222
232
|
|
223
|
-
if options.delete(:printname)
|
224
|
-
|
225
|
-
|
226
|
-
else
|
227
|
-
|
228
|
-
end
|
233
|
+
if options.delete(:printname)
|
234
|
+
puts job.name
|
235
|
+
exit 0
|
236
|
+
else
|
237
|
+
Log.low "Job name: #{job.name}"
|
238
|
+
end
|
229
239
|
rescue ParameterException
|
230
|
-
|
240
|
+
SOPT.delete_inputs(workflow.rec_inputs(task.name))
|
241
|
+
usage(workflow, task, $!.message, $!.backtrace)
|
231
242
|
end
|
232
243
|
|
233
244
|
out = options.include?(:output) ? File.open(options[:output], 'wb') : STDOUT
|
234
245
|
|
246
|
+
if job_file = options.delete(:job_file)
|
247
|
+
out.puts res.file(job_file).read
|
248
|
+
exit 0
|
249
|
+
end
|
250
|
+
|
235
251
|
if Step === res
|
236
252
|
out.puts Open.read(res.path) if File.exists? res.path
|
237
253
|
else
|
@@ -55,53 +55,12 @@ class TestKnowledgeBase < Test::Unit::TestCase
|
|
55
55
|
assert @kb.all_databases.include? "pina"
|
56
56
|
end
|
57
57
|
|
58
|
-
def
|
59
|
-
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
|
60
|
-
kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
|
61
|
-
kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
|
62
|
-
end
|
63
|
-
|
64
|
-
def __test_subset
|
58
|
+
def test_subset
|
65
59
|
gene = "TP53"
|
66
60
|
found = Genomics.knowledge_base.identify :pina, gene
|
67
|
-
p53_interactors =
|
61
|
+
p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity
|
68
62
|
|
69
|
-
|
70
|
-
Misc.profile do
|
71
|
-
puts Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).length
|
72
|
-
end
|
73
|
-
ddd 2
|
74
|
-
#assert Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).target_entities.name.include? "MDM2"
|
75
|
-
end
|
76
|
-
|
77
|
-
def test_benchmark
|
78
|
-
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
|
79
|
-
kb = KnowledgeBase.new Rbbt.tmp.test.kb2
|
80
|
-
kb.namespace = "Hsa/jan2013"
|
81
|
-
|
82
|
-
require 'rbbt/sources/COSMIC'
|
83
|
-
require 'rbbt/entity/genomic_mutation'
|
84
|
-
mutations = tp53.COSMIC_mutations
|
85
|
-
Misc.benchmark(10) do
|
86
|
-
name = "mutations"
|
87
|
-
kb.add_index name, "Ensembl Gene ID", "Genomic Mutation", "Change"
|
88
|
-
kb.write name do
|
89
|
-
mutations.each do |gm|
|
90
|
-
kb.add name, tp53, gm, gm.base
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
def test_benchmark2
|
97
|
-
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
|
98
|
-
kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
|
99
|
-
kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
|
100
|
-
l = nil
|
101
|
-
Misc.benchmark(1000) do
|
102
|
-
l = tp53.transcripts.length
|
103
|
-
end
|
104
|
-
assert l > 0
|
63
|
+
assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
|
105
64
|
end
|
106
65
|
|
107
66
|
def test_syndication
|
@@ -87,7 +87,6 @@ B Id3
|
|
87
87
|
|
88
88
|
tsv1.attach_source_key tsv2, "ValueB"
|
89
89
|
|
90
|
-
|
91
90
|
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
92
91
|
assert_equal "Id1", tsv1["row1"]["OtherID"]
|
93
92
|
end
|
@@ -290,7 +289,7 @@ row6 dd dd ee
|
|
290
289
|
EOF
|
291
290
|
|
292
291
|
TmpFile.with_file do |f|
|
293
|
-
TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, " ")
|
292
|
+
TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, :sep => " ")
|
294
293
|
assert_equal result, Open.read(f)
|
295
294
|
end
|
296
295
|
end
|
@@ -327,6 +326,7 @@ row6 dd dd ee
|
|
327
326
|
|
328
327
|
def test_merge_different_rows_split_lines
|
329
328
|
file1 =<<-EOF
|
329
|
+
#ID,letterA,letterB,letterC
|
330
330
|
row6,dd,dd,ee
|
331
331
|
row1,a,b,c
|
332
332
|
row1,aa,bb,cc
|
@@ -334,6 +334,7 @@ row2,A,B,C
|
|
334
334
|
row3,1,2,3
|
335
335
|
EOF
|
336
336
|
file2 =<<-EOF
|
337
|
+
#ID,letterD,letterE
|
337
338
|
row20,rr,rr
|
338
339
|
row1,d,e
|
339
340
|
row2,D,E
|
@@ -342,6 +343,8 @@ row4,x,y
|
|
342
343
|
|
343
344
|
# Might be slightly different ...
|
344
345
|
result1 =<<-EOF
|
346
|
+
#: :sep=,
|
347
|
+
#ID,letterA,letterB,letterC,letterD,letterE
|
345
348
|
row1,aa|a,bb|b,cc|c,d,e
|
346
349
|
row2,A,B,C,D,E
|
347
350
|
row20,,,,rr,rr
|
@@ -350,6 +353,8 @@ row4,,,,x,y
|
|
350
353
|
row6,dd,dd,ee,,
|
351
354
|
EOF
|
352
355
|
result2 =<<-EOF
|
356
|
+
#: :sep=,
|
357
|
+
#ID,letterA,letterB,letterC,letterD,letterE
|
353
358
|
row1,a|aa,b|bb,c|cc,d,e
|
354
359
|
row2,A,B,C,D,E
|
355
360
|
row20,,,,rr,rr
|
@@ -359,8 +364,7 @@ row6,dd,dd,ee,,
|
|
359
364
|
EOF
|
360
365
|
|
361
366
|
TmpFile.with_file do |f|
|
362
|
-
TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, ','
|
363
|
-
|
367
|
+
TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, :sep => ','
|
364
368
|
# ... so check for either
|
365
369
|
assert(Open.read(f) == result1 || Open.read(f) == result2)
|
366
370
|
end
|
@@ -403,6 +407,7 @@ row6,dd,dd,ee,,
|
|
403
407
|
|
404
408
|
def test_merge_rows
|
405
409
|
file1 =<<-EOF
|
410
|
+
#ID,letterA,letterB,letterC
|
406
411
|
row1,a,b,c
|
407
412
|
row1,aa,bb,cc
|
408
413
|
row2,A,B,C
|
@@ -410,8 +415,9 @@ row3,1,2,3
|
|
410
415
|
EOF
|
411
416
|
TmpFile.with_file(file1) do |input|
|
412
417
|
TmpFile.with_file() do |output|
|
413
|
-
TSV.merge_row_fields Open.open(input), output
|
414
|
-
assert Open.read(output) =~
|
418
|
+
TSV.merge_row_fields Open.open(input), output, :sep => ','
|
419
|
+
assert Open.read(output) =~ /^#ID,letterA,letterB,letterC$/
|
420
|
+
assert Open.read(output).index "a|aa"
|
415
421
|
end
|
416
422
|
end
|
417
423
|
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -6,11 +6,11 @@ require 'rbbt/entity'
|
|
6
6
|
|
7
7
|
class TestMisc < Test::Unit::TestCase
|
8
8
|
|
9
|
-
def
|
9
|
+
def test_humanize
|
10
10
|
assert_equal "mutation_enrichment", Misc.humanize("MutationEnrichment")
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
13
|
+
def test_fixutf8
|
14
14
|
string = "abc\xffdef"
|
15
15
|
string = string.force_encoding("UTF-8") if string.respond_to? :force_encoding
|
16
16
|
assert(! string.valid_encoding?) if string.respond_to? :valid_encoding?
|
@@ -19,37 +19,37 @@ class TestMisc < Test::Unit::TestCase
|
|
19
19
|
assert( Misc.fixutf8(string).valid_encoding) if string.respond_to? :valid_encoding
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def test_colors_for
|
23
23
|
colors, used = Misc.colors_for([1,2,2,1,2,1,2,2,3,3,2,3,2])
|
24
24
|
assert_equal Misc::COLOR_LIST[1], used[2]
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
27
|
+
def test_total_length
|
28
28
|
ranges = [(0..100), (50..150), (120..160)]
|
29
29
|
ranges = [(0..100), (50..150), (120..160), (51..70)]
|
30
30
|
assert_equal 161, Misc.total_length(ranges)
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def test_id_filename?
|
34
34
|
TmpFile.with_file("") do |file|
|
35
35
|
assert Misc.is_filename?(file)
|
36
36
|
assert ! Misc.is_filename?("TEST STRING")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def test_merge_sorted_arrays
|
41
41
|
assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
44
|
+
def test_intersect_sorted_arrays
|
45
45
|
assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
|
46
46
|
end
|
47
47
|
|
48
|
-
def
|
48
|
+
def test_sorted_array_matches
|
49
49
|
assert_equal [1,3], Misc.sorted_array_hits(%w(a b c d e), %w(b d))
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
52
|
+
def test_binary_include?
|
53
53
|
a = %w(a b c d e).sort
|
54
54
|
assert Misc.binary_include?(a, "a")
|
55
55
|
assert(!Misc.binary_include?(a, "z"))
|
@@ -58,24 +58,24 @@ class TestMisc < Test::Unit::TestCase
|
|
58
58
|
assert(Misc.binary_include?(a, "d"))
|
59
59
|
end
|
60
60
|
|
61
|
-
def
|
61
|
+
def test_process_to_hash
|
62
62
|
list = [1,2,3,4]
|
63
63
|
assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
|
64
64
|
end
|
65
65
|
|
66
|
-
# def
|
66
|
+
# def test_pdf2text_example
|
67
67
|
# assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
|
68
68
|
# end
|
69
69
|
#
|
70
|
-
# def
|
70
|
+
# def test_pdf2text_EPAR
|
71
71
|
# assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
|
72
72
|
# end
|
73
73
|
#
|
74
|
-
# def
|
74
|
+
# def test_pdf2text_wrong
|
75
75
|
# assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
|
76
76
|
# end
|
77
77
|
|
78
|
-
def
|
78
|
+
def test_string2hash
|
79
79
|
assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
|
80
80
|
assert_equal(true, Misc.string2hash(":true")[:true])
|
81
81
|
assert_equal(true, Misc.string2hash("true")["true"])
|
@@ -93,17 +93,17 @@ class TestMisc < Test::Unit::TestCase
|
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
|
-
def
|
96
|
+
def test_named_array
|
97
97
|
a = NamedArray.setup([1,2,3,4], %w(a b c d))
|
98
98
|
assert_equal(1, a['a'])
|
99
99
|
end
|
100
100
|
|
101
|
-
# def
|
101
|
+
# def test_path_relative_to
|
102
102
|
# assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
|
103
103
|
# end
|
104
104
|
|
105
|
-
# def
|
106
|
-
#
|
105
|
+
# def test_chunk
|
106
|
+
# test =<<-EOF
|
107
107
|
#This is an example file. Entries are separated by Entry
|
108
108
|
#-- Entry
|
109
109
|
#1
|
@@ -118,7 +118,7 @@ class TestMisc < Test::Unit::TestCase
|
|
118
118
|
# assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
|
119
119
|
# end
|
120
120
|
|
121
|
-
def
|
121
|
+
def test_hash2string
|
122
122
|
hash = {}
|
123
123
|
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
124
124
|
|
@@ -136,14 +136,14 @@ class TestMisc < Test::Unit::TestCase
|
|
136
136
|
|
137
137
|
end
|
138
138
|
|
139
|
-
def
|
139
|
+
def test_merge
|
140
140
|
a = [[1],[2]]
|
141
141
|
a = NamedArray.setup a, %w(1 2)
|
142
142
|
a.merge [3,4]
|
143
143
|
assert_equal [1,3], a[0]
|
144
144
|
end
|
145
145
|
|
146
|
-
def
|
146
|
+
def test_indiferent_hash
|
147
147
|
a = {:a => 1, "b" => 2}
|
148
148
|
a.extend IndiferentHash
|
149
149
|
|
@@ -153,7 +153,7 @@ class TestMisc < Test::Unit::TestCase
|
|
153
153
|
assert_equal 2, a[:b]
|
154
154
|
end
|
155
155
|
|
156
|
-
def
|
156
|
+
def test_lockfile
|
157
157
|
|
158
158
|
TmpFile.with_file do |tmpfile|
|
159
159
|
pids = []
|
@@ -177,7 +177,7 @@ class TestMisc < Test::Unit::TestCase
|
|
177
177
|
end
|
178
178
|
end
|
179
179
|
|
180
|
-
def
|
180
|
+
def test_positions2hash
|
181
181
|
inputs = Misc.positional2hash([:one, :two, :three], 1, :two => 2, :four => 4)
|
182
182
|
assert_equal 1, inputs[:one]
|
183
183
|
assert_equal 2, inputs[:two]
|
@@ -185,7 +185,7 @@ class TestMisc < Test::Unit::TestCase
|
|
185
185
|
assert_equal nil, inputs[:four]
|
186
186
|
end
|
187
187
|
|
188
|
-
def
|
188
|
+
def test_mean
|
189
189
|
assert_equal 2, Misc.mean([1,2,3])
|
190
190
|
assert_equal 3, Misc.mean([1,2,3,4,5])
|
191
191
|
end
|
@@ -194,32 +194,32 @@ class TestMisc < Test::Unit::TestCase
|
|
194
194
|
assert_equal Math.sqrt(2), Misc.sd([1,3])
|
195
195
|
end
|
196
196
|
|
197
|
-
def
|
197
|
+
def test_align_small
|
198
198
|
reference = "AABCDEBD"
|
199
199
|
sequence = "ABCD"
|
200
200
|
assert_equal '-ABCD---', Misc.fast_align(reference, sequence).last
|
201
201
|
end
|
202
202
|
|
203
|
-
def
|
203
|
+
def test_align_real
|
204
204
|
reference = "SGNECNKAIDGNKDTFWHTFYGANGDPKPPPHTYTIDMKTTQNVNGLSMLPRQDGNQNGWIGRHEVYLSSDGTNW"
|
205
205
|
sequence = "TYTIDMKTTQNVNGLSML"
|
206
206
|
assert_equal "--------------------------------TYTIDMKTTQNVNGLSML-------------------------", Misc.fast_align(reference, sequence).last
|
207
207
|
end
|
208
208
|
|
209
|
-
def
|
209
|
+
def test_divide
|
210
210
|
assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
|
211
211
|
end
|
212
212
|
|
213
|
-
def
|
213
|
+
def test_ordered_divide
|
214
214
|
assert_equal 5, Misc.ordered_divide(%w(1 2 3 4 5 6 7 8 9),2).length
|
215
215
|
end
|
216
216
|
|
217
|
-
def
|
217
|
+
def test_collapse_ranges
|
218
218
|
ranges = [(0..100), (50..150), (51..61),(200..250), (300..324),(320..350)]
|
219
219
|
assert_equal [(0..150),(200..250), (300..350)], Misc.collapse_ranges(ranges)
|
220
220
|
end
|
221
221
|
|
222
|
-
def
|
222
|
+
def test_humanize
|
223
223
|
str1 = "test_string"
|
224
224
|
str2 = "TEST_string"
|
225
225
|
str3 = "test"
|
@@ -229,18 +229,18 @@ class TestMisc < Test::Unit::TestCase
|
|
229
229
|
assert_equal "Test", Misc.humanize(str3)
|
230
230
|
end
|
231
231
|
|
232
|
-
def
|
232
|
+
def test_snake_case
|
233
233
|
str1 = "ACRONIMTest"
|
234
234
|
str2 = "ACRONIM_test"
|
235
235
|
assert_equal "ACRONIM_test", Misc.snake_case(str1)
|
236
236
|
assert_equal "ACRONIM_test", Misc.snake_case(str2)
|
237
237
|
end
|
238
238
|
|
239
|
-
def
|
239
|
+
def test_correct_vcf_mutations
|
240
240
|
assert_equal [737407, ["-----", "-----G", "-----GTTAAT"]], Misc.correct_vcf_mutation(737406, "GTTAAT", "G,GG,GGTTAAT")
|
241
241
|
end
|
242
242
|
|
243
|
-
def
|
243
|
+
def test_fingerprint
|
244
244
|
puts Misc.fingerprint({:a => 1})
|
245
245
|
end
|
246
246
|
|
@@ -253,4 +253,26 @@ class TestMisc < Test::Unit::TestCase
|
|
253
253
|
end
|
254
254
|
end
|
255
255
|
end
|
256
|
+
|
257
|
+
def test_pipe
|
258
|
+
t = 5
|
259
|
+
stream = Misc.open_pipe do |sin|
|
260
|
+
t.times do |i|
|
261
|
+
puts "Calculating line #{ i }"
|
262
|
+
sleep 0.5
|
263
|
+
sin.puts "LINE #{ i }"
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
time = Time.now
|
268
|
+
lines = []
|
269
|
+
while line = stream.gets
|
270
|
+
lines << line.strip
|
271
|
+
end
|
272
|
+
time_spent = Time.new - time
|
273
|
+
|
274
|
+
assert time_spent >= t * 0.5
|
275
|
+
assert time_spent <= (t+1) * 0.5
|
276
|
+
assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
|
277
|
+
end
|
256
278
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.6.
|
4
|
+
version: 5.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|