rbbt-util 5.6.0 → 5.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/accessor.rb +2 -4
- data/lib/rbbt/tsv/attach.rb +35 -10
- data/lib/rbbt/tsv/parser.rb +1 -1
- data/lib/rbbt/tsv/util.rb +47 -0
- data/lib/rbbt/util/filecache.rb +32 -0
- data/lib/rbbt/util/log.rb +4 -0
- data/lib/rbbt/util/misc.rb +18 -0
- data/lib/rbbt/util/simpleopt.rb +11 -1
- data/lib/rbbt/workflow.rb +7 -1
- data/lib/rbbt/workflow/accessor.rb +17 -7
- data/lib/rbbt/workflow/step.rb +5 -1
- data/share/rbbt_commands/workflow/task +50 -34
- data/test/rbbt/association/test_index.rb +1 -1
- data/test/rbbt/test_fix_width_table.rb +1 -1
- data/test/rbbt/test_knowledge_base.rb +3 -44
- data/test/rbbt/tsv/test_accessor.rb +1 -0
- data/test/rbbt/tsv/test_attach.rb +12 -6
- data/test/rbbt/util/test_misc.rb +55 -33
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3656ab04698a077becda559ac837570042688fa8
|
4
|
+
data.tar.gz: f54aa65f719933df04767b54e896c74adb805e34
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 929e4be6771df3db4f8507a9af317cb9bd841107d4170ec2ff198f1e1ed2aca0762e5b32851b1653336afdae49dc2729b0143fd65938cedd11660db2a5d687fc
|
7
|
+
data.tar.gz: 63ce2e0a7e12c9e6c6204a5d79764e9691ff781ae68e27586b135c524710ce557ce89a958aff4b7c101aa3cfd67897fdcb0ec415b5b09f315dad8a27ce437199
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -484,10 +484,8 @@ module TSV
|
|
484
484
|
|
485
485
|
str = ""
|
486
486
|
|
487
|
-
|
488
|
-
|
489
|
-
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
490
|
-
end
|
487
|
+
entry_hash = no_options ? {} : (ENTRIES - ["key_field", "fields"]).collect{|key| [key.to_sym, self.send(key)]}
|
488
|
+
str = TSV.header_lines(key_field, fields, entry_hash)
|
491
489
|
|
492
490
|
with_unnamed do
|
493
491
|
if keys.nil?
|
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -3,7 +3,11 @@ require 'rbbt/tsv/attach/util'
|
|
3
3
|
module TSV
|
4
4
|
|
5
5
|
# Merge columns from different rows of a file
|
6
|
-
def self.merge_row_fields(input, output,
|
6
|
+
def self.merge_row_fields(input, output, options = {})
|
7
|
+
options = Misc.add_defaults options, :sep => "\t"
|
8
|
+
key_field, fields = Misc.process_options options, :key_field, :fields
|
9
|
+
sep = options[:sep]
|
10
|
+
|
7
11
|
is = case
|
8
12
|
when (String === input and not input.index("\n") and input.length < 250 and File.exists?(input))
|
9
13
|
CMD.cmd("sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
@@ -12,16 +16,25 @@ module TSV
|
|
12
16
|
else
|
13
17
|
input
|
14
18
|
end
|
19
|
+
|
20
|
+
if key_field.nil? or fields.nil?
|
21
|
+
parser = TSV::Parser.new(is, options.dup)
|
22
|
+
fields ||= parser.fields
|
23
|
+
key_field ||= parser.key_field
|
24
|
+
line = parser.first_line
|
25
|
+
else
|
26
|
+
line = is.gets
|
27
|
+
end
|
15
28
|
|
16
29
|
current_key = nil
|
17
30
|
current_parts = []
|
18
31
|
|
19
32
|
done = false
|
20
33
|
Open.write(output) do |os|
|
34
|
+
os.puts TSV.header_lines(key_field, fields, options)
|
21
35
|
|
22
|
-
|
23
|
-
|
24
|
-
key, *parts = is.gets.sub("\n",'').split(sep, -1)
|
36
|
+
while line
|
37
|
+
key, *parts = line.sub("\n",'').split(sep, -1)
|
25
38
|
current_key ||= key
|
26
39
|
case
|
27
40
|
when key.nil?
|
@@ -39,7 +52,7 @@ module TSV
|
|
39
52
|
current_parts = parts
|
40
53
|
end
|
41
54
|
|
42
|
-
|
55
|
+
line = is.gets
|
43
56
|
end
|
44
57
|
|
45
58
|
os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
|
@@ -48,10 +61,14 @@ module TSV
|
|
48
61
|
end
|
49
62
|
|
50
63
|
# Merge two files with the same keys and different fields
|
51
|
-
def self.merge_different_fields(file1, file2, output,
|
64
|
+
def self.merge_different_fields(file1, file2, output, options = {})
|
65
|
+
options = Misc.add_defaults options, :sep => "\t"
|
66
|
+
monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
|
67
|
+
sep = options[:sep] || "\t"
|
68
|
+
|
52
69
|
case
|
53
70
|
when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exists?(file1))
|
54
|
-
size = CMD.cmd("wc -
|
71
|
+
size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
|
55
72
|
file1 = CMD.cmd("sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
56
73
|
when (String === file1 or StringIO === file1)
|
57
74
|
size = file1.length if monitor
|
@@ -80,19 +97,27 @@ module TSV
|
|
80
97
|
|
81
98
|
key1 = key2 = nil
|
82
99
|
while key1.nil?
|
83
|
-
while (line1 = file1.gets) =~
|
100
|
+
while (line1 = file1.gets) =~ /^#/
|
101
|
+
key_field1, *fields1 = line1.strip.sub('#','').split(sep)
|
102
|
+
end
|
84
103
|
key1, *parts1 = line1.sub("\n",'').split(sep, -1)
|
85
104
|
cols1 = parts1.length
|
86
105
|
end
|
87
106
|
|
88
107
|
while key2.nil?
|
89
|
-
while (line2 = file2.gets) =~
|
108
|
+
while (line2 = file2.gets) =~ /^#/
|
109
|
+
key_field2, *fields2 = line2.strip.sub('#','').split(sep)
|
110
|
+
end
|
90
111
|
key2, *parts2 = line2.sub("\n",'').split(sep, -1)
|
91
112
|
cols2 = parts2.length
|
92
113
|
end
|
93
114
|
|
94
115
|
progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
|
95
116
|
|
117
|
+
entry_hash = options
|
118
|
+
entry_hash.delete :sep if entry_hash[:sep] == "\t"
|
119
|
+
output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
|
120
|
+
|
96
121
|
key = key1 < key2 ? key1 : key2
|
97
122
|
parts = [""] * (cols1 + cols2)
|
98
123
|
while not (done1 and done2)
|
@@ -189,7 +214,7 @@ module TSV
|
|
189
214
|
|
190
215
|
def merge_different_fields(other, options = {})
|
191
216
|
TmpFile.with_file do |output|
|
192
|
-
TSV.merge_different_fields(self, other, output, options
|
217
|
+
TSV.merge_different_fields(self, other, output, options)
|
193
218
|
tsv = TSV.open output, options
|
194
219
|
tsv.key_field = self.key_field unless self.key_field.nil?
|
195
220
|
tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
data/lib/rbbt/tsv/parser.rb
CHANGED
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -1,6 +1,40 @@
|
|
1
1
|
require 'rbbt/resource/path'
|
2
2
|
module TSV
|
3
3
|
|
4
|
+
def self.reorder_stream(stream, positions, sep = "\t")
|
5
|
+
Misc.open_pipe do |sin|
|
6
|
+
line = stream.gets
|
7
|
+
while line =~ /^#\:/
|
8
|
+
sin.puts line
|
9
|
+
line = stream.gets
|
10
|
+
end
|
11
|
+
while line =~ /^#/
|
12
|
+
if Hash === positions
|
13
|
+
new = (0..line.split(sep).length-1).to_a
|
14
|
+
positions.each do |k,v|
|
15
|
+
new[k] = v
|
16
|
+
new[v] = k
|
17
|
+
end
|
18
|
+
positions = new
|
19
|
+
end
|
20
|
+
sin.puts "#" + line.sub!(/^#/,'').strip.split(sep).values_at(*positions).compact * sep
|
21
|
+
line = stream.gets
|
22
|
+
end
|
23
|
+
while line
|
24
|
+
if Hash === positions
|
25
|
+
new = (0..line.split(sep).length-1).to_a
|
26
|
+
positions.each do |k,v|
|
27
|
+
new[k] = v
|
28
|
+
new[v] = k
|
29
|
+
end
|
30
|
+
positions = new
|
31
|
+
end
|
32
|
+
sin.puts line.strip.split(sep).values_at(*positions) * sep
|
33
|
+
line = stream.gets
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
4
38
|
def self.field_match_counts(file, values, options = {})
|
5
39
|
options = Misc.add_defaults options, :persist_prefix => "Field_Matches"
|
6
40
|
persist_options = Misc.pull_keys options, :persist
|
@@ -52,6 +86,7 @@ module TSV
|
|
52
86
|
when String === file
|
53
87
|
Open.open(file, open_options)
|
54
88
|
when file.respond_to?(:gets)
|
89
|
+
file.rewind if file.respond_to?(:rewind) and file.eof?
|
55
90
|
file
|
56
91
|
else
|
57
92
|
raise "Cannot get stream from: #{file.inspect}"
|
@@ -76,6 +111,18 @@ module TSV
|
|
76
111
|
Log.medium "Field #{ field } was not found. Options: (#{key_field}), #{fields * ", "}"
|
77
112
|
end
|
78
113
|
end
|
114
|
+
|
115
|
+
def self.header_lines(key_field, fields, entry_hash = {})
|
116
|
+
sep = (Hash === entry_hash and entry_hash[:sep]) ? entry_hash[:sep] : "\t"
|
117
|
+
|
118
|
+
str = ""
|
119
|
+
str << "#: " << Misc.hash2string(entry_hash) << "\n" if entry_hash and entry_hash.any?
|
120
|
+
if fields
|
121
|
+
str << "#" << key_field << sep << fields * sep << "\n"
|
122
|
+
end
|
123
|
+
|
124
|
+
str
|
125
|
+
end
|
79
126
|
|
80
127
|
def identify_field(field)
|
81
128
|
TSV.identify_field(key_field, fields, field)
|
data/lib/rbbt/util/filecache.rb
CHANGED
@@ -55,4 +55,36 @@ module FileCache
|
|
55
55
|
|
56
56
|
FileUtils.rm path if File.exist? path
|
57
57
|
end
|
58
|
+
|
59
|
+
def self.cache_online_elements(ids, pattern = nil, &block)
|
60
|
+
ids = [ids] unless Array === ids
|
61
|
+
|
62
|
+
result_files = {}
|
63
|
+
missing = []
|
64
|
+
ids.each do |id|
|
65
|
+
filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
|
66
|
+
|
67
|
+
if FileCache.found(filename)
|
68
|
+
result_files[id] = FileCache.path(filename)
|
69
|
+
else
|
70
|
+
missing << id
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
yield(missing).each do |id, content|
|
75
|
+
filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
|
76
|
+
path = FileCache.path(filename)
|
77
|
+
Open.write(path, content)
|
78
|
+
result_files[id] = content
|
79
|
+
end
|
80
|
+
|
81
|
+
missing.each do |id|
|
82
|
+
filename = pattern ? pattern.sub("{ID}", id.to_s) : id.to_s
|
83
|
+
result = yield id
|
84
|
+
File.open{|f| f.write(path = FileCache.path(filename)) }
|
85
|
+
result_files[id] = path
|
86
|
+
end
|
87
|
+
|
88
|
+
result_files
|
89
|
+
end
|
58
90
|
end
|
data/lib/rbbt/util/log.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
@@ -1318,6 +1318,7 @@ end
|
|
1318
1318
|
# Divides the array into +num+ chunks of the same size by placing one
|
1319
1319
|
# element in each chunk iteratively.
|
1320
1320
|
def self.divide(array, num)
|
1321
|
+
num = 1 if num == 0
|
1321
1322
|
chunks = []
|
1322
1323
|
num.to_i.times do chunks << [] end
|
1323
1324
|
array.each_with_index{|e, i|
|
@@ -1341,6 +1342,23 @@ end
|
|
1341
1342
|
chunks
|
1342
1343
|
end
|
1343
1344
|
|
1345
|
+
def self.open_pipe
|
1346
|
+
sout, sin = IO.pipe
|
1347
|
+
raise "No block given" unless block_given?
|
1348
|
+
Thread.new{
|
1349
|
+
begin
|
1350
|
+
yield sin
|
1351
|
+
rescue
|
1352
|
+
Log.exception $!
|
1353
|
+
raise $!
|
1354
|
+
ensure
|
1355
|
+
sin.close
|
1356
|
+
end
|
1357
|
+
}
|
1358
|
+
sout
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
|
1344
1362
|
def self.zip_fields(array)
|
1345
1363
|
return [] if array.empty?
|
1346
1364
|
array[0].zip(*array[1..-1])
|
data/lib/rbbt/util/simpleopt.rb
CHANGED
@@ -28,11 +28,21 @@ module SOPT
|
|
28
28
|
@description ||= "Missing"
|
29
29
|
end
|
30
30
|
|
31
|
-
|
32
31
|
def self.shortcuts
|
33
32
|
@shortcuts ||= []
|
34
33
|
end
|
35
34
|
|
35
|
+
def self.delete_inputs(inputs)
|
36
|
+
inputs.each do |input|
|
37
|
+
input = input.to_s
|
38
|
+
self.shortcuts.delete self.input_shortcuts.delete(input)
|
39
|
+
self.inputs.delete input
|
40
|
+
self.input_types.delete input
|
41
|
+
self.input_defaults.delete input
|
42
|
+
self.input_descriptions.delete input
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
36
46
|
def self.all
|
37
47
|
@all ||= {}
|
38
48
|
end
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -28,7 +28,7 @@ module Workflow
|
|
28
28
|
def self.load_workflow_file(filename)
|
29
29
|
begin
|
30
30
|
$LOAD_PATH.unshift(File.join(File.dirname(File.expand_path(filename)), 'lib'))
|
31
|
-
require filename
|
31
|
+
require File.expand_path(filename)
|
32
32
|
Log.debug{"Workflow loaded from: #{ filename }"}
|
33
33
|
return true
|
34
34
|
rescue Exception
|
@@ -99,6 +99,12 @@ module Workflow
|
|
99
99
|
|
100
100
|
# Load locally
|
101
101
|
|
102
|
+
if wf_name =~ /::\w+$/
|
103
|
+
clean_name = wf_name.sub(/::.*/,'')
|
104
|
+
Log.info{"Looking for '#{wf_name}' in '#{clean_name}'"}
|
105
|
+
wf_name = clean_name
|
106
|
+
end
|
107
|
+
|
102
108
|
Log.info{"Loading workflow #{wf_name}"}
|
103
109
|
require_local_workflow(wf_name) or
|
104
110
|
require_local_workflow(Misc.snake_case(wf_name)) or
|
@@ -184,7 +184,18 @@ class Step
|
|
184
184
|
end
|
185
185
|
end
|
186
186
|
|
187
|
-
|
187
|
+
def provenance
|
188
|
+
provenance = {}
|
189
|
+
dependencies.each do |dep|
|
190
|
+
next unless dep.path.exists?
|
191
|
+
if File.exists? dep.info_file
|
192
|
+
provenance[dep.path] = dep.provenance if File.exists? dep.path
|
193
|
+
else
|
194
|
+
provenance[dep.path] = nil
|
195
|
+
end
|
196
|
+
end
|
197
|
+
{:inputs => info[:inputs], :provenance => provenance}
|
198
|
+
end
|
188
199
|
end
|
189
200
|
|
190
201
|
module Workflow
|
@@ -264,16 +275,15 @@ module Workflow
|
|
264
275
|
[taskname].concat(rec_dependencies(taskname)).inject({}){|acc, tn| acc.merge tasks[tn.to_sym].input_options}
|
265
276
|
end
|
266
277
|
|
267
|
-
|
268
278
|
def real_dependencies(task, jobname, inputs, dependencies)
|
269
279
|
real_dependencies = []
|
270
280
|
dependencies.each do |dependency|
|
271
|
-
real_dependencies << case
|
272
|
-
when Step
|
281
|
+
real_dependencies << case dependency
|
282
|
+
when Step
|
273
283
|
dependency
|
274
|
-
when Symbol
|
284
|
+
when Symbol
|
275
285
|
job(dependency, jobname, inputs)
|
276
|
-
when Proc
|
286
|
+
when Proc
|
277
287
|
dependency.call jobname, inputs
|
278
288
|
end
|
279
289
|
end
|
@@ -303,7 +313,6 @@ module Workflow
|
|
303
313
|
}
|
304
314
|
end
|
305
315
|
|
306
|
-
|
307
316
|
def id_for(path)
|
308
317
|
if workdir.respond_to? :find
|
309
318
|
workdir_find = workdir.find
|
@@ -322,4 +331,5 @@ module Workflow
|
|
322
331
|
|
323
332
|
Misc.path_relative_to(workdir_find, File.dirname(path)).sub(/([^\/]+)\/.*/,'\1')
|
324
333
|
end
|
334
|
+
|
325
335
|
end
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -6,7 +6,7 @@ require 'rbbt/workflow/usage'
|
|
6
6
|
|
7
7
|
YAML::ENGINE.yamler = 'syck' if defined? YAML::ENGINE and YAML::ENGINE.respond_to? :yamler
|
8
8
|
|
9
|
-
def usage(workflow = nil, task = nil, error = nil)
|
9
|
+
def usage(workflow = nil, task = nil, error = nil, backtrace = nil)
|
10
10
|
puts SOPT.doc
|
11
11
|
puts "## WORKFLOW"
|
12
12
|
puts
|
@@ -27,7 +27,8 @@ def usage(workflow = nil, task = nil, error = nil)
|
|
27
27
|
workflow.doc(task)
|
28
28
|
if error
|
29
29
|
puts
|
30
|
-
puts "Error: " << error
|
30
|
+
puts Term::ANSIColor.red("Error: ") << error
|
31
|
+
puts Term::ANSIColor.red("Backtrace: ") << backtrace * "\n"
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
@@ -112,8 +113,10 @@ options = SOPT.get <<EOF
|
|
112
113
|
-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
|
113
114
|
-jn--jobname* Job name to use. The name 'Default' is used by default:
|
114
115
|
-pn--printname Print the name of the job and exit without starting it:
|
116
|
+
-jf--job_file* Output one of the job produced files:
|
115
117
|
-wd--workdir* Change the working directory of the workflow:
|
116
118
|
-O--output* Save job result into file:
|
119
|
+
--provenance Report the jobs provenance:
|
117
120
|
--fork Run job asyncronously:
|
118
121
|
EOF
|
119
122
|
|
@@ -178,13 +181,13 @@ workflow.workdir = Path.setup(File.expand_path(options.delete(:workdir))) if opt
|
|
178
181
|
job = workflow.job(task.name, name, job_options)
|
179
182
|
|
180
183
|
# clean job
|
181
|
-
if clean
|
184
|
+
if clean
|
182
185
|
job.clean
|
183
186
|
sleep 1
|
184
187
|
job = workflow.job(task.name, name, job_options)
|
185
188
|
end
|
186
189
|
|
187
|
-
if recursive_clean
|
190
|
+
if recursive_clean
|
188
191
|
job.recursive_clean
|
189
192
|
sleep 1
|
190
193
|
job = workflow.job(task.name, name, job_options)
|
@@ -192,46 +195,59 @@ end
|
|
192
195
|
|
193
196
|
# run
|
194
197
|
begin
|
195
|
-
if do_exec
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
198
|
+
if do_exec
|
199
|
+
res = job.exec
|
200
|
+
case
|
201
|
+
when Array === res
|
202
|
+
puts res * "\n"
|
203
|
+
when TSV === res
|
204
|
+
puts res
|
205
|
+
when Hash === res
|
206
|
+
puts res.to_yaml
|
207
|
+
else
|
208
|
+
puts res
|
209
|
+
end
|
210
|
+
exit 0
|
211
|
+
end
|
212
|
+
|
213
|
+
if do_fork
|
214
|
+
job.fork
|
215
|
+
while not job.done?
|
216
|
+
message = job.messages ? job.messages.last : "no message"
|
217
|
+
Log.debug "#{job.status}: #{message}"
|
218
|
+
sleep 2
|
219
|
+
end
|
220
|
+
raise job.messages.last if job.error?
|
221
|
+
res = job.load
|
204
222
|
else
|
205
|
-
|
223
|
+
res = job.run(true)
|
206
224
|
end
|
207
|
-
exit 0
|
208
|
-
end
|
209
225
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
sleep 2
|
226
|
+
|
227
|
+
if options.delete(:provenance)
|
228
|
+
require 'pp'
|
229
|
+
pp job.provenance
|
230
|
+
exit 0
|
216
231
|
end
|
217
|
-
raise job.messages.last if job.error?
|
218
|
-
res = job.load
|
219
|
-
else
|
220
|
-
res = job.run(true)
|
221
|
-
end
|
222
232
|
|
223
|
-
if options.delete(:printname)
|
224
|
-
|
225
|
-
|
226
|
-
else
|
227
|
-
|
228
|
-
end
|
233
|
+
if options.delete(:printname)
|
234
|
+
puts job.name
|
235
|
+
exit 0
|
236
|
+
else
|
237
|
+
Log.low "Job name: #{job.name}"
|
238
|
+
end
|
229
239
|
rescue ParameterException
|
230
|
-
|
240
|
+
SOPT.delete_inputs(workflow.rec_inputs(task.name))
|
241
|
+
usage(workflow, task, $!.message, $!.backtrace)
|
231
242
|
end
|
232
243
|
|
233
244
|
out = options.include?(:output) ? File.open(options[:output], 'wb') : STDOUT
|
234
245
|
|
246
|
+
if job_file = options.delete(:job_file)
|
247
|
+
out.puts res.file(job_file).read
|
248
|
+
exit 0
|
249
|
+
end
|
250
|
+
|
235
251
|
if Step === res
|
236
252
|
out.puts Open.read(res.path) if File.exists? res.path
|
237
253
|
else
|
@@ -55,53 +55,12 @@ class TestKnowledgeBase < Test::Unit::TestCase
|
|
55
55
|
assert @kb.all_databases.include? "pina"
|
56
56
|
end
|
57
57
|
|
58
|
-
def
|
59
|
-
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
|
60
|
-
kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
|
61
|
-
kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
|
62
|
-
end
|
63
|
-
|
64
|
-
def __test_subset
|
58
|
+
def test_subset
|
65
59
|
gene = "TP53"
|
66
60
|
found = Genomics.knowledge_base.identify :pina, gene
|
67
|
-
p53_interactors =
|
61
|
+
p53_interactors = Genomics.knowledge_base.children(:pina, found).target_entity
|
68
62
|
|
69
|
-
|
70
|
-
Misc.profile do
|
71
|
-
puts Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).length
|
72
|
-
end
|
73
|
-
ddd 2
|
74
|
-
#assert Genomics.knowledge_base.subset(:pina,{"Gene" => p53_interactors}).target_entities.name.include? "MDM2"
|
75
|
-
end
|
76
|
-
|
77
|
-
def test_benchmark
|
78
|
-
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
|
79
|
-
kb = KnowledgeBase.new Rbbt.tmp.test.kb2
|
80
|
-
kb.namespace = "Hsa/jan2013"
|
81
|
-
|
82
|
-
require 'rbbt/sources/COSMIC'
|
83
|
-
require 'rbbt/entity/genomic_mutation'
|
84
|
-
mutations = tp53.COSMIC_mutations
|
85
|
-
Misc.benchmark(10) do
|
86
|
-
name = "mutations"
|
87
|
-
kb.add_index name, "Ensembl Gene ID", "Genomic Mutation", "Change"
|
88
|
-
kb.write name do
|
89
|
-
mutations.each do |gm|
|
90
|
-
kb.add name, tp53, gm, gm.base
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
def test_benchmark2
|
97
|
-
tp53 = Gene.setup("TP53", "Associated Gene Name", "Hsa/jan2013").ensembl
|
98
|
-
kb = KnowledgeBase.new Rbbt.tmp.test.kb2, "Hsa/jan2013"
|
99
|
-
kb.index('g2t', Organism.gene_transcripts("Hsa/jan2013"), :target => "Ensembl Transcript ID")
|
100
|
-
l = nil
|
101
|
-
Misc.benchmark(1000) do
|
102
|
-
l = tp53.transcripts.length
|
103
|
-
end
|
104
|
-
assert l > 0
|
63
|
+
assert Genomics.knowledge_base.subset(:pina, {"Gene" => p53_interactors}).target_entity.name.include? "MDM2"
|
105
64
|
end
|
106
65
|
|
107
66
|
def test_syndication
|
@@ -87,7 +87,6 @@ B Id3
|
|
87
87
|
|
88
88
|
tsv1.attach_source_key tsv2, "ValueB"
|
89
89
|
|
90
|
-
|
91
90
|
assert_equal %w(ValueA ValueB OtherID), tsv1.fields
|
92
91
|
assert_equal "Id1", tsv1["row1"]["OtherID"]
|
93
92
|
end
|
@@ -290,7 +289,7 @@ row6 dd dd ee
|
|
290
289
|
EOF
|
291
290
|
|
292
291
|
TmpFile.with_file do |f|
|
293
|
-
TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, " ")
|
292
|
+
TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, :sep => " ")
|
294
293
|
assert_equal result, Open.read(f)
|
295
294
|
end
|
296
295
|
end
|
@@ -327,6 +326,7 @@ row6 dd dd ee
|
|
327
326
|
|
328
327
|
def test_merge_different_rows_split_lines
|
329
328
|
file1 =<<-EOF
|
329
|
+
#ID,letterA,letterB,letterC
|
330
330
|
row6,dd,dd,ee
|
331
331
|
row1,a,b,c
|
332
332
|
row1,aa,bb,cc
|
@@ -334,6 +334,7 @@ row2,A,B,C
|
|
334
334
|
row3,1,2,3
|
335
335
|
EOF
|
336
336
|
file2 =<<-EOF
|
337
|
+
#ID,letterD,letterE
|
337
338
|
row20,rr,rr
|
338
339
|
row1,d,e
|
339
340
|
row2,D,E
|
@@ -342,6 +343,8 @@ row4,x,y
|
|
342
343
|
|
343
344
|
# Might be slightly different ...
|
344
345
|
result1 =<<-EOF
|
346
|
+
#: :sep=,
|
347
|
+
#ID,letterA,letterB,letterC,letterD,letterE
|
345
348
|
row1,aa|a,bb|b,cc|c,d,e
|
346
349
|
row2,A,B,C,D,E
|
347
350
|
row20,,,,rr,rr
|
@@ -350,6 +353,8 @@ row4,,,,x,y
|
|
350
353
|
row6,dd,dd,ee,,
|
351
354
|
EOF
|
352
355
|
result2 =<<-EOF
|
356
|
+
#: :sep=,
|
357
|
+
#ID,letterA,letterB,letterC,letterD,letterE
|
353
358
|
row1,a|aa,b|bb,c|cc,d,e
|
354
359
|
row2,A,B,C,D,E
|
355
360
|
row20,,,,rr,rr
|
@@ -359,8 +364,7 @@ row6,dd,dd,ee,,
|
|
359
364
|
EOF
|
360
365
|
|
361
366
|
TmpFile.with_file do |f|
|
362
|
-
TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, ','
|
363
|
-
|
367
|
+
TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, :sep => ','
|
364
368
|
# ... so check for either
|
365
369
|
assert(Open.read(f) == result1 || Open.read(f) == result2)
|
366
370
|
end
|
@@ -403,6 +407,7 @@ row6,dd,dd,ee,,
|
|
403
407
|
|
404
408
|
def test_merge_rows
|
405
409
|
file1 =<<-EOF
|
410
|
+
#ID,letterA,letterB,letterC
|
406
411
|
row1,a,b,c
|
407
412
|
row1,aa,bb,cc
|
408
413
|
row2,A,B,C
|
@@ -410,8 +415,9 @@ row3,1,2,3
|
|
410
415
|
EOF
|
411
416
|
TmpFile.with_file(file1) do |input|
|
412
417
|
TmpFile.with_file() do |output|
|
413
|
-
TSV.merge_row_fields Open.open(input), output
|
414
|
-
assert Open.read(output) =~
|
418
|
+
TSV.merge_row_fields Open.open(input), output, :sep => ','
|
419
|
+
assert Open.read(output) =~ /^#ID,letterA,letterB,letterC$/
|
420
|
+
assert Open.read(output).index "a|aa"
|
415
421
|
end
|
416
422
|
end
|
417
423
|
|
data/test/rbbt/util/test_misc.rb
CHANGED
@@ -6,11 +6,11 @@ require 'rbbt/entity'
|
|
6
6
|
|
7
7
|
class TestMisc < Test::Unit::TestCase
|
8
8
|
|
9
|
-
def
|
9
|
+
def test_humanize
|
10
10
|
assert_equal "mutation_enrichment", Misc.humanize("MutationEnrichment")
|
11
11
|
end
|
12
12
|
|
13
|
-
def
|
13
|
+
def test_fixutf8
|
14
14
|
string = "abc\xffdef"
|
15
15
|
string = string.force_encoding("UTF-8") if string.respond_to? :force_encoding
|
16
16
|
assert(! string.valid_encoding?) if string.respond_to? :valid_encoding?
|
@@ -19,37 +19,37 @@ class TestMisc < Test::Unit::TestCase
|
|
19
19
|
assert( Misc.fixutf8(string).valid_encoding) if string.respond_to? :valid_encoding
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def test_colors_for
|
23
23
|
colors, used = Misc.colors_for([1,2,2,1,2,1,2,2,3,3,2,3,2])
|
24
24
|
assert_equal Misc::COLOR_LIST[1], used[2]
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
27
|
+
def test_total_length
|
28
28
|
ranges = [(0..100), (50..150), (120..160)]
|
29
29
|
ranges = [(0..100), (50..150), (120..160), (51..70)]
|
30
30
|
assert_equal 161, Misc.total_length(ranges)
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def test_id_filename?
|
34
34
|
TmpFile.with_file("") do |file|
|
35
35
|
assert Misc.is_filename?(file)
|
36
36
|
assert ! Misc.is_filename?("TEST STRING")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def test_merge_sorted_arrays
|
41
41
|
assert_equal [1,2,3,4], Misc.merge_sorted_arrays([1,3], [2,4])
|
42
42
|
end
|
43
43
|
|
44
|
-
def
|
44
|
+
def test_intersect_sorted_arrays
|
45
45
|
assert_equal [2,4], Misc.intersect_sorted_arrays([1,2,3,4], [2,4])
|
46
46
|
end
|
47
47
|
|
48
|
-
def
|
48
|
+
def test_sorted_array_matches
|
49
49
|
assert_equal [1,3], Misc.sorted_array_hits(%w(a b c d e), %w(b d))
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
52
|
+
def test_binary_include?
|
53
53
|
a = %w(a b c d e).sort
|
54
54
|
assert Misc.binary_include?(a, "a")
|
55
55
|
assert(!Misc.binary_include?(a, "z"))
|
@@ -58,24 +58,24 @@ class TestMisc < Test::Unit::TestCase
|
|
58
58
|
assert(Misc.binary_include?(a, "d"))
|
59
59
|
end
|
60
60
|
|
61
|
-
def
|
61
|
+
def test_process_to_hash
|
62
62
|
list = [1,2,3,4]
|
63
63
|
assert_equal 4, Misc.process_to_hash(list){|l| l.collect{|e| e * 2}}[2]
|
64
64
|
end
|
65
65
|
|
66
|
-
# def
|
66
|
+
# def test_pdf2text_example
|
67
67
|
# assert PDF2Text.pdf2text(datafile_test('example.pdf')).read =~ /An Example Paper/i
|
68
68
|
# end
|
69
69
|
#
|
70
|
-
# def
|
70
|
+
# def test_pdf2text_EPAR
|
71
71
|
# assert PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB/document_library/EPAR_-_Scientific_Discussion/human/000402/WC500033103.pdf").read =~ /Tamiflu/i
|
72
72
|
# end
|
73
73
|
#
|
74
|
-
# def
|
74
|
+
# def test_pdf2text_wrong
|
75
75
|
# assert_raise CMD::CMDError do PDF2Text.pdf2text("http://www.ema.europa.eu/docs/en_GB#").read end
|
76
76
|
# end
|
77
77
|
|
78
|
-
def
|
78
|
+
def test_string2hash
|
79
79
|
assert(Misc.string2hash("--user-agent=firefox").include? "--user-agent")
|
80
80
|
assert_equal(true, Misc.string2hash(":true")[:true])
|
81
81
|
assert_equal(true, Misc.string2hash("true")["true"])
|
@@ -93,17 +93,17 @@ class TestMisc < Test::Unit::TestCase
|
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
|
-
def
|
96
|
+
def test_named_array
|
97
97
|
a = NamedArray.setup([1,2,3,4], %w(a b c d))
|
98
98
|
assert_equal(1, a['a'])
|
99
99
|
end
|
100
100
|
|
101
|
-
# def
|
101
|
+
# def test_path_relative_to
|
102
102
|
# assert_equal "test/foo", Misc.path_relative_to('test/test/foo', 'test')
|
103
103
|
# end
|
104
104
|
|
105
|
-
# def
|
106
|
-
#
|
105
|
+
# def test_chunk
|
106
|
+
# test =<<-EOF
|
107
107
|
#This is an example file. Entries are separated by Entry
|
108
108
|
#-- Entry
|
109
109
|
#1
|
@@ -118,7 +118,7 @@ class TestMisc < Test::Unit::TestCase
|
|
118
118
|
# assert_equal "1\n2\n3", Misc.chunk(test, /^-- Entry/).first.strip
|
119
119
|
# end
|
120
120
|
|
121
|
-
def
|
121
|
+
def test_hash2string
|
122
122
|
hash = {}
|
123
123
|
assert_equal hash, Misc.string2hash(Misc.hash2string(hash))
|
124
124
|
|
@@ -136,14 +136,14 @@ class TestMisc < Test::Unit::TestCase
|
|
136
136
|
|
137
137
|
end
|
138
138
|
|
139
|
-
def
|
139
|
+
def test_merge
|
140
140
|
a = [[1],[2]]
|
141
141
|
a = NamedArray.setup a, %w(1 2)
|
142
142
|
a.merge [3,4]
|
143
143
|
assert_equal [1,3], a[0]
|
144
144
|
end
|
145
145
|
|
146
|
-
def
|
146
|
+
def test_indiferent_hash
|
147
147
|
a = {:a => 1, "b" => 2}
|
148
148
|
a.extend IndiferentHash
|
149
149
|
|
@@ -153,7 +153,7 @@ class TestMisc < Test::Unit::TestCase
|
|
153
153
|
assert_equal 2, a[:b]
|
154
154
|
end
|
155
155
|
|
156
|
-
def
|
156
|
+
def test_lockfile
|
157
157
|
|
158
158
|
TmpFile.with_file do |tmpfile|
|
159
159
|
pids = []
|
@@ -177,7 +177,7 @@ class TestMisc < Test::Unit::TestCase
|
|
177
177
|
end
|
178
178
|
end
|
179
179
|
|
180
|
-
def
|
180
|
+
def test_positions2hash
|
181
181
|
inputs = Misc.positional2hash([:one, :two, :three], 1, :two => 2, :four => 4)
|
182
182
|
assert_equal 1, inputs[:one]
|
183
183
|
assert_equal 2, inputs[:two]
|
@@ -185,7 +185,7 @@ class TestMisc < Test::Unit::TestCase
|
|
185
185
|
assert_equal nil, inputs[:four]
|
186
186
|
end
|
187
187
|
|
188
|
-
def
|
188
|
+
def test_mean
|
189
189
|
assert_equal 2, Misc.mean([1,2,3])
|
190
190
|
assert_equal 3, Misc.mean([1,2,3,4,5])
|
191
191
|
end
|
@@ -194,32 +194,32 @@ class TestMisc < Test::Unit::TestCase
|
|
194
194
|
assert_equal Math.sqrt(2), Misc.sd([1,3])
|
195
195
|
end
|
196
196
|
|
197
|
-
def
|
197
|
+
def test_align_small
|
198
198
|
reference = "AABCDEBD"
|
199
199
|
sequence = "ABCD"
|
200
200
|
assert_equal '-ABCD---', Misc.fast_align(reference, sequence).last
|
201
201
|
end
|
202
202
|
|
203
|
-
def
|
203
|
+
def test_align_real
|
204
204
|
reference = "SGNECNKAIDGNKDTFWHTFYGANGDPKPPPHTYTIDMKTTQNVNGLSMLPRQDGNQNGWIGRHEVYLSSDGTNW"
|
205
205
|
sequence = "TYTIDMKTTQNVNGLSML"
|
206
206
|
assert_equal "--------------------------------TYTIDMKTTQNVNGLSML-------------------------", Misc.fast_align(reference, sequence).last
|
207
207
|
end
|
208
208
|
|
209
|
-
def
|
209
|
+
def test_divide
|
210
210
|
assert_equal 2, Misc.divide(%w(1 2 3 4 5 6 7 8 9),2).length
|
211
211
|
end
|
212
212
|
|
213
|
-
def
|
213
|
+
def test_ordered_divide
|
214
214
|
assert_equal 5, Misc.ordered_divide(%w(1 2 3 4 5 6 7 8 9),2).length
|
215
215
|
end
|
216
216
|
|
217
|
-
def
|
217
|
+
def test_collapse_ranges
|
218
218
|
ranges = [(0..100), (50..150), (51..61),(200..250), (300..324),(320..350)]
|
219
219
|
assert_equal [(0..150),(200..250), (300..350)], Misc.collapse_ranges(ranges)
|
220
220
|
end
|
221
221
|
|
222
|
-
def
|
222
|
+
def test_humanize
|
223
223
|
str1 = "test_string"
|
224
224
|
str2 = "TEST_string"
|
225
225
|
str3 = "test"
|
@@ -229,18 +229,18 @@ class TestMisc < Test::Unit::TestCase
|
|
229
229
|
assert_equal "Test", Misc.humanize(str3)
|
230
230
|
end
|
231
231
|
|
232
|
-
def
|
232
|
+
def test_snake_case
|
233
233
|
str1 = "ACRONIMTest"
|
234
234
|
str2 = "ACRONIM_test"
|
235
235
|
assert_equal "ACRONIM_test", Misc.snake_case(str1)
|
236
236
|
assert_equal "ACRONIM_test", Misc.snake_case(str2)
|
237
237
|
end
|
238
238
|
|
239
|
-
def
|
239
|
+
def test_correct_vcf_mutations
|
240
240
|
assert_equal [737407, ["-----", "-----G", "-----GTTAAT"]], Misc.correct_vcf_mutation(737406, "GTTAAT", "G,GG,GGTTAAT")
|
241
241
|
end
|
242
242
|
|
243
|
-
def
|
243
|
+
def test_fingerprint
|
244
244
|
puts Misc.fingerprint({:a => 1})
|
245
245
|
end
|
246
246
|
|
@@ -253,4 +253,26 @@ class TestMisc < Test::Unit::TestCase
|
|
253
253
|
end
|
254
254
|
end
|
255
255
|
end
|
256
|
+
|
257
|
+
def test_pipe
|
258
|
+
t = 5
|
259
|
+
stream = Misc.open_pipe do |sin|
|
260
|
+
t.times do |i|
|
261
|
+
puts "Calculating line #{ i }"
|
262
|
+
sleep 0.5
|
263
|
+
sin.puts "LINE #{ i }"
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
time = Time.now
|
268
|
+
lines = []
|
269
|
+
while line = stream.gets
|
270
|
+
lines << line.strip
|
271
|
+
end
|
272
|
+
time_spent = Time.new - time
|
273
|
+
|
274
|
+
assert time_spent >= t * 0.5
|
275
|
+
assert time_spent <= (t+1) * 0.5
|
276
|
+
assert_equal (0..t-1).to_a.collect{|i| "LINE #{ i }"}, lines
|
277
|
+
end
|
256
278
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.6.
|
4
|
+
version: 5.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|