rbbt-util 5.19.16 → 5.19.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/entity/identifiers.rb +1 -1
- data/lib/rbbt/knowledge_base/traverse.rb +69 -17
- data/lib/rbbt/persist.rb +5 -0
- data/lib/rbbt/tsv/dumper.rb +2 -2
- data/lib/rbbt/tsv/util.rb +4 -1
- data/lib/rbbt/util/docker.rb +2 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +8 -0
- data/lib/rbbt/workflow/accessor.rb +5 -1
- data/share/rbbt_commands/stat/density +1 -4
- data/share/rbbt_commands/tsv/assemble_pdf_table +25 -12
- data/share/rbbt_commands/workflow/task +21 -19
- data/test/rbbt/knowledge_base/test_traverse.rb +37 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7250e5af7af076517a13c322c1fe43506d9ece3d
|
4
|
+
data.tar.gz: c9ec9ec07c60be34a6f95c9bde5fd2deae7b3bbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef2ad33be33fbdef8d78a4461d31efbf25171133c553b0be00b67c4e776c5ca225a90609c1d0c5bae80ee4a827d9da1e073955317fca9a7ae4c736c7fa688f50
|
7
|
+
data.tar.gz: 8e98021bdafe0d005fea0047826a5068709dc009e84c941feaae4d9d1d48c4feac291c2915faf70c213ed71560ee74296fe02420c9656b325a38f6becbe0a357
|
@@ -38,7 +38,7 @@ module Entity
|
|
38
38
|
|
39
39
|
begin
|
40
40
|
index = TSV.translation_index(identifier_files, format, source, :persist => true)
|
41
|
-
raise "No index from #{ source } to #{ format }: #{Misc.fingerprint identifier_files}" if index.nil?
|
41
|
+
raise "No index from #{ Misc.fingerprint source } to #{ Misc.fingerprint format }: #{Misc.fingerprint identifier_files}" if index.nil?
|
42
42
|
index.unnamed = true
|
43
43
|
index
|
44
44
|
rescue
|
@@ -51,6 +51,7 @@ class KnowledgeBase
|
|
51
51
|
|
52
52
|
rules.zip(all_matches).each do |rule, matches|
|
53
53
|
source, db, target = rule.split /\s+/
|
54
|
+
next if matches.nil?
|
54
55
|
|
55
56
|
if is_wildcard? source
|
56
57
|
assigned = assignments[source] || []
|
@@ -74,19 +75,22 @@ class KnowledgeBase
|
|
74
75
|
rule, *rest = rules
|
75
76
|
source, db, target = rule.split /\s+/
|
76
77
|
|
78
|
+
wildcard_source = is_wildcard? source
|
79
|
+
wildcard_target = is_wildcard? target
|
80
|
+
|
77
81
|
paths = {}
|
78
82
|
matches = clean_matches[rule]
|
79
83
|
Annotated.purge(matches).each do |match|
|
80
84
|
new_assignments = nil
|
81
85
|
match_source, _sep, match_target = match.partition "~"
|
82
86
|
|
83
|
-
if
|
87
|
+
if wildcard_source
|
84
88
|
next if assignments[source] and assignments[source] != match_source
|
85
89
|
new_assignments ||= assignments.dup
|
86
90
|
new_assignments[source] = match_source
|
87
91
|
end
|
88
92
|
|
89
|
-
if
|
93
|
+
if wildcard_target
|
90
94
|
next if assignments[target] and assignments[target] != match_target
|
91
95
|
new_assignments ||= assignments.dup
|
92
96
|
new_assignments[target] = match_target
|
@@ -136,6 +140,7 @@ class KnowledgeBase
|
|
136
140
|
source_entities, target_entities = identify db, source, target
|
137
141
|
|
138
142
|
options = {:source => source_entities, :target => target_entities}
|
143
|
+
Log.debug "Traversing #{ db }: #{Misc.fingerprint options}"
|
139
144
|
matches = kb.subset(db, options)
|
140
145
|
|
141
146
|
if conditions
|
@@ -152,29 +157,41 @@ class KnowledgeBase
|
|
152
157
|
matches
|
153
158
|
end
|
154
159
|
|
160
|
+
def id_dbs(db)
|
161
|
+
if db.include? '?'
|
162
|
+
all_dbs = kb.registry.keys
|
163
|
+
_name, _sep, _kb = db.partition("@")
|
164
|
+
case
|
165
|
+
when _name[0] == '?'
|
166
|
+
dbs = all_dbs.select{|_db|
|
167
|
+
n,_s,d=_db.partition("@");
|
168
|
+
d.nil? or d.empty? or (d == _kd and assignments[_name].include?(n))
|
169
|
+
}
|
170
|
+
when _kb[0] == '?'
|
171
|
+
dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
|
172
|
+
end
|
173
|
+
else
|
174
|
+
dbs = [db]
|
175
|
+
end
|
176
|
+
|
177
|
+
dbs
|
178
|
+
end
|
155
179
|
|
156
180
|
def traverse
|
157
181
|
all_matches = []
|
158
|
-
|
182
|
+
path_rules = []
|
183
|
+
acc_var = nil
|
159
184
|
rules.each do |rule|
|
160
185
|
rule = rule.strip
|
161
186
|
next if rule.empty?
|
162
187
|
|
163
|
-
if m = rule.match(/([^\s]+)\s+([^\s]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
|
188
|
+
if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
|
189
|
+
Log.debug "Traverse rule: #{rule}"
|
190
|
+
path_rules << rule
|
164
191
|
|
165
192
|
source, db, target, conditions = m.captures
|
166
|
-
|
167
|
-
|
168
|
-
_name, _sep, _kb = db.partition("@")
|
169
|
-
case
|
170
|
-
when _kb[0] == '?'
|
171
|
-
dbs = all_dbs.select{|_db| _db.partition("@").first == _name}
|
172
|
-
when _name[0] == '?'
|
173
|
-
dbs = all_dbs.select{|_db| _db.include?("@") ? db.partition("@").last == _kb : true}
|
174
|
-
end
|
175
|
-
else
|
176
|
-
dbs = [db]
|
177
|
-
end
|
193
|
+
|
194
|
+
dbs = id_dbs(db)
|
178
195
|
|
179
196
|
rule_matches = []
|
180
197
|
dbs.each do |_db|
|
@@ -197,17 +214,52 @@ class KnowledgeBase
|
|
197
214
|
matches.each do |m|
|
198
215
|
rule_matches << m
|
199
216
|
end
|
217
|
+
|
218
|
+
assignments.each{|k,v| v.uniq! if v}
|
200
219
|
end
|
201
220
|
|
202
221
|
reassign rule_matches, source, target
|
203
222
|
|
204
223
|
all_matches << rule_matches
|
224
|
+
|
225
|
+
elsif m = rule.match(/([^\s=]+)\s*=([^\s]*)\s*(.*)/)
|
226
|
+
Log.debug "Assign rule: #{rule}"
|
227
|
+
var, db, value_str = m.captures
|
228
|
+
names = value_str.split(",").collect{|v| v.strip}
|
229
|
+
if db.empty?
|
230
|
+
ids = names
|
231
|
+
else
|
232
|
+
dbs = id_dbs(db)
|
233
|
+
ids = names.collect{|name|
|
234
|
+
id = nil
|
235
|
+
dbs.each do |db|
|
236
|
+
sid, tid = identify db, name, name
|
237
|
+
id = (sid + tid).compact.first
|
238
|
+
break if id
|
239
|
+
end
|
240
|
+
id
|
241
|
+
}
|
242
|
+
end
|
243
|
+
assignments[var] = ids
|
244
|
+
|
245
|
+
elsif m = rule.match(/(\?[^\s{]+)\s*{/)
|
246
|
+
acc_var = m.captures.first
|
247
|
+
Log.debug "Start assign block: #{acc_var}"
|
248
|
+
elsif m = rule.match(/^\s*}\s*$/)
|
249
|
+
Log.debug "Close assign block: #{acc_var}"
|
250
|
+
saved_assign = assignments[acc_var]
|
251
|
+
assignments.clear
|
252
|
+
assignments[acc_var] = saved_assign
|
253
|
+
all_matches = []
|
254
|
+
path_rules = []
|
205
255
|
else
|
206
256
|
raise "Rule not understood: #{rule}"
|
207
257
|
end
|
208
258
|
end
|
209
259
|
|
210
|
-
|
260
|
+
Log.debug "Finding paths: #{all_matches.length}"
|
261
|
+
paths = find_paths path_rules, all_matches, assignments
|
262
|
+
Log.debug "Found paths: #{paths.length}"
|
211
263
|
|
212
264
|
[assignments, paths]
|
213
265
|
end
|
data/lib/rbbt/persist.rb
CHANGED
@@ -237,6 +237,11 @@ module Persist
|
|
237
237
|
callback = stream.respond_to?(:callback)? stream.callback : nil
|
238
238
|
abort_callback = stream.respond_to?(:abort_callback)? stream.abort_callback : nil
|
239
239
|
|
240
|
+
# This is to avoid calling the callbacks twice, since they have been
|
241
|
+
# moved to the new 'res' stream
|
242
|
+
stream.callback = nil
|
243
|
+
stream.abort_callback = nil
|
244
|
+
|
240
245
|
res = tee_stream(stream, path, type, callback, abort_callback, lockfile)
|
241
246
|
|
242
247
|
res.lockfile = lockfile
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -38,11 +38,11 @@ module TSV
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def init
|
41
|
+
def init(init_options = {})
|
42
42
|
options = @options.dup
|
43
43
|
key_field, fields = Misc.process_options options, :key_field, :fields
|
44
44
|
|
45
|
-
str = TSV.header_lines(key_field, fields, options)
|
45
|
+
str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
|
46
46
|
|
47
47
|
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
|
48
48
|
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -179,14 +179,17 @@ module TSV
|
|
179
179
|
if Hash === entry_hash
|
180
180
|
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
181
181
|
preamble = entry_hash[:preamble]
|
182
|
+
header_hash = entry_hash[:header_hash]
|
182
183
|
end
|
183
184
|
|
185
|
+
header_hash = "#" if header_hash.nil?
|
186
|
+
|
184
187
|
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
185
188
|
|
186
189
|
str = ""
|
187
190
|
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
188
191
|
if fields
|
189
|
-
str <<
|
192
|
+
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
190
193
|
end
|
191
194
|
|
192
195
|
str
|
data/lib/rbbt/util/docker.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
module Docker
|
2
|
-
def self.run(image,cmd, options)
|
2
|
+
def self.run(image, cmd, options)
|
3
3
|
mounts, job_inputs, directory, pipe = Misc.process_options options, :mounts, :job_inputs, :directory, :pipe
|
4
4
|
|
5
5
|
if mounts
|
6
|
+
mounts.each{|t,s| FileUtils.mkdir_p s unless File.exists? s}
|
6
7
|
mount_cmd = mounts.sort.collect{|t,s| "-v " + ["'" + s + "'", "'" + t + "'"] * ":" } * " "
|
7
8
|
else
|
8
9
|
mount_cmd = ""
|
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
1
2
|
|
2
3
|
require 'rbbt/util/R'
|
3
4
|
|
@@ -8,10 +9,6 @@ Calculate density
|
|
8
9
|
|
9
10
|
$ rbbt stat density <file>
|
10
11
|
|
11
|
-
Display summary information. Works with Tokyocabinet HDB and BDB as well.
|
12
|
-
|
13
|
-
-tch--tokyocabinet File is a TC HDB
|
14
|
-
-tcb--tokyocabinet_bd File is a TC BDB
|
15
12
|
-h--help Help
|
16
13
|
EOF
|
17
14
|
|
@@ -13,14 +13,18 @@ $ rbbt tsv assemble_pdf_table file.txt
|
|
13
13
|
When extracting tables from PDF they are often laid out one column at a time, divided by pages.
|
14
14
|
This command takes a file with the following structure:
|
15
15
|
|
16
|
-
1 A few lines containing table headers, one per line
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
1. A few lines containing table headers, one per line
|
17
|
+
|
18
|
+
2. A group of lines containing the values for the first column of the first page, ending in an empty line
|
19
|
+
|
20
|
+
3. More groups of lines corresponding to other columns
|
21
|
+
|
22
|
+
4. Repetitions of 2 and 3 for more pages
|
20
23
|
|
21
24
|
This script will take care of matching the columns read with the headers specified
|
22
25
|
|
23
26
|
-h--help Help
|
27
|
+
-r--row Each block of lines is a row, not a column
|
24
28
|
EOF
|
25
29
|
|
26
30
|
SOPT.usage if options[:help]
|
@@ -50,19 +54,28 @@ while lines and lines.any?
|
|
50
54
|
lines = lines[block_size+1..-1]
|
51
55
|
columns[0] << first_block
|
52
56
|
(1..num_columns-1).each do |pos|
|
57
|
+
next if lines.nil?
|
53
58
|
block = lines[0..block_size-1]
|
54
59
|
lines = lines[block_size+1..-1]
|
55
60
|
columns[pos] << block
|
56
61
|
end
|
57
62
|
end
|
58
63
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
end
|
64
|
+
if options[:row]
|
65
|
+
columns.each do |n,list|
|
66
|
+
list.each do |values|
|
67
|
+
puts values * "\t"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
else
|
71
|
+
full_columns = []
|
72
|
+
num_columns.times do |i|
|
73
|
+
column = columns[i]
|
74
|
+
full_columns << column.flatten
|
75
|
+
end
|
64
76
|
|
65
|
-
puts "#" << fields * "\t"
|
66
|
-
Misc.zip_fields(full_columns).zip do |values|
|
67
|
-
|
77
|
+
puts "#" << fields * "\t"
|
78
|
+
Misc.zip_fields(full_columns).zip do |values|
|
79
|
+
puts values * "\t"
|
80
|
+
end
|
68
81
|
end
|
@@ -31,8 +31,10 @@ def usage(workflow = nil, task = nil, exception=nil)
|
|
31
31
|
else
|
32
32
|
puts Log.color :magenta, workflow.to_s
|
33
33
|
puts Log.color :magenta, "=" * workflow.to_s.length
|
34
|
-
|
35
|
-
|
34
|
+
if workflow.documentation[:description] and not workflow.documentation[:description].empty?
|
35
|
+
puts
|
36
|
+
puts workflow.documentation[:description]
|
37
|
+
end
|
36
38
|
puts
|
37
39
|
workflow.doc(task)
|
38
40
|
end
|
@@ -170,24 +172,24 @@ The first time a job is executed it will save the result. Once the job is done
|
|
170
172
|
you can re-doit using the `clean` parameter. The `recursive_clean` cleans all
|
171
173
|
the job dependencies recursively.
|
172
174
|
|
173
|
-
-h--help Show this help
|
174
|
-
-wd--workdir* Change the working directory of the workflow
|
175
|
-
-as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
|
176
|
-
-fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
|
177
|
-
-jn--jobname* Job name to use. The name 'Default' is used by default
|
178
|
-
-pn--printname Print the name of the job and exit without starting it
|
179
|
-
-pf--printpath Print the path of the job result
|
180
|
-
-cl--clean Clean the last step of the job so that it gets recomputed
|
181
|
-
-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
|
182
|
-
--fork Run job asyncronously and monitor progress. It monitors detached processes as well
|
183
|
-
--detach Run job asyncronously and detach process
|
184
|
-
--exec Run job with no persistence
|
185
|
-
-O--output* Save job result into file
|
186
|
-
-jf--job_file* Output one of the job produced files
|
187
|
-
-ljf--list_job_files List all the files produced in that step
|
175
|
+
-h--help Show this help
|
176
|
+
-wd--workdir* Change the working directory of the workflow
|
177
|
+
-as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
|
178
|
+
-fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
|
179
|
+
-jn--jobname* Job name to use. The name 'Default' is used by default
|
180
|
+
-pn--printname Print the name of the job and exit without starting it
|
181
|
+
-pf--printpath Print the path of the job result
|
182
|
+
-cl--clean Clean the last step of the job so that it gets recomputed
|
183
|
+
-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
|
184
|
+
--fork Run job asyncronously and monitor progress. It monitors detached processes as well
|
185
|
+
--detach Run job asyncronously and detach process
|
186
|
+
--exec Run job with no persistence
|
187
|
+
-O--output* Save job result into file
|
188
|
+
-jf--job_file* Output one of the job produced files
|
189
|
+
-ljf--list_job_files List all the files produced in that step
|
188
190
|
--load_inputs* Load inputs from a directory
|
189
|
-
--info Show the job info
|
190
|
-
--provenance Report the jobs provenance
|
191
|
+
--info Show the job info
|
192
|
+
--provenance Report the jobs provenance
|
191
193
|
-W--workflows* Load a list of workflows
|
192
194
|
-R--requires* Require a list of files
|
193
195
|
EOF
|
@@ -17,6 +17,43 @@ class TestKnowledgeBaseTraverse < Test::Unit::TestCase
|
|
17
17
|
assert res.first.include? "?1"
|
18
18
|
end
|
19
19
|
|
20
|
+
def test_traverse2
|
21
|
+
rules = []
|
22
|
+
rules << "?target =pina SF3B1"
|
23
|
+
rules << "?1 pina ?target - Method=MI:0006"
|
24
|
+
rules << "TP53 pina ?2"
|
25
|
+
rules << "?2 pina ?1"
|
26
|
+
res = kb.traverse rules
|
27
|
+
assert res.first.include? "?1"
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_traverse3
|
31
|
+
rules = []
|
32
|
+
rules << "?target = ENSG00000115524"
|
33
|
+
rules << "?1 pina ?target - Method=MI:0006"
|
34
|
+
rules << "TP53 pina ?2"
|
35
|
+
rules << "?2 pina ?1"
|
36
|
+
res = kb.traverse rules
|
37
|
+
assert res.first.include? "?1"
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
def test_traverse_acc
|
42
|
+
Log.severity = 0
|
43
|
+
rules_str=<<-EOF
|
44
|
+
?target{
|
45
|
+
?target pina SF3B1
|
46
|
+
}
|
47
|
+
?1 pina TP53
|
48
|
+
?1 pina ?target
|
49
|
+
EOF
|
50
|
+
rules = rules_str.split "\n"
|
51
|
+
res = kb.traverse rules
|
52
|
+
iii res
|
53
|
+
assert res.first.include? "?1"
|
54
|
+
end
|
55
|
+
|
56
|
+
|
20
57
|
def test_path
|
21
58
|
rules = []
|
22
59
|
rules << "?1 pina ARPC2"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.19.
|
4
|
+
version: 5.19.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|