rbbt-util 5.19.16 → 5.19.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/entity/identifiers.rb +1 -1
- data/lib/rbbt/knowledge_base/traverse.rb +69 -17
- data/lib/rbbt/persist.rb +5 -0
- data/lib/rbbt/tsv/dumper.rb +2 -2
- data/lib/rbbt/tsv/util.rb +4 -1
- data/lib/rbbt/util/docker.rb +2 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +8 -0
- data/lib/rbbt/workflow/accessor.rb +5 -1
- data/share/rbbt_commands/stat/density +1 -4
- data/share/rbbt_commands/tsv/assemble_pdf_table +25 -12
- data/share/rbbt_commands/workflow/task +21 -19
- data/test/rbbt/knowledge_base/test_traverse.rb +37 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7250e5af7af076517a13c322c1fe43506d9ece3d
|
4
|
+
data.tar.gz: c9ec9ec07c60be34a6f95c9bde5fd2deae7b3bbd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef2ad33be33fbdef8d78a4461d31efbf25171133c553b0be00b67c4e776c5ca225a90609c1d0c5bae80ee4a827d9da1e073955317fca9a7ae4c736c7fa688f50
|
7
|
+
data.tar.gz: 8e98021bdafe0d005fea0047826a5068709dc009e84c941feaae4d9d1d48c4feac291c2915faf70c213ed71560ee74296fe02420c9656b325a38f6becbe0a357
|
@@ -38,7 +38,7 @@ module Entity
|
|
38
38
|
|
39
39
|
begin
|
40
40
|
index = TSV.translation_index(identifier_files, format, source, :persist => true)
|
41
|
-
raise "No index from #{ source } to #{ format }: #{Misc.fingerprint identifier_files}" if index.nil?
|
41
|
+
raise "No index from #{ Misc.fingerprint source } to #{ Misc.fingerprint format }: #{Misc.fingerprint identifier_files}" if index.nil?
|
42
42
|
index.unnamed = true
|
43
43
|
index
|
44
44
|
rescue
|
@@ -51,6 +51,7 @@ class KnowledgeBase
|
|
51
51
|
|
52
52
|
rules.zip(all_matches).each do |rule, matches|
|
53
53
|
source, db, target = rule.split /\s+/
|
54
|
+
next if matches.nil?
|
54
55
|
|
55
56
|
if is_wildcard? source
|
56
57
|
assigned = assignments[source] || []
|
@@ -74,19 +75,22 @@ class KnowledgeBase
|
|
74
75
|
rule, *rest = rules
|
75
76
|
source, db, target = rule.split /\s+/
|
76
77
|
|
78
|
+
wildcard_source = is_wildcard? source
|
79
|
+
wildcard_target = is_wildcard? target
|
80
|
+
|
77
81
|
paths = {}
|
78
82
|
matches = clean_matches[rule]
|
79
83
|
Annotated.purge(matches).each do |match|
|
80
84
|
new_assignments = nil
|
81
85
|
match_source, _sep, match_target = match.partition "~"
|
82
86
|
|
83
|
-
if
|
87
|
+
if wildcard_source
|
84
88
|
next if assignments[source] and assignments[source] != match_source
|
85
89
|
new_assignments ||= assignments.dup
|
86
90
|
new_assignments[source] = match_source
|
87
91
|
end
|
88
92
|
|
89
|
-
if
|
93
|
+
if wildcard_target
|
90
94
|
next if assignments[target] and assignments[target] != match_target
|
91
95
|
new_assignments ||= assignments.dup
|
92
96
|
new_assignments[target] = match_target
|
@@ -136,6 +140,7 @@ class KnowledgeBase
|
|
136
140
|
source_entities, target_entities = identify db, source, target
|
137
141
|
|
138
142
|
options = {:source => source_entities, :target => target_entities}
|
143
|
+
Log.debug "Traversing #{ db }: #{Misc.fingerprint options}"
|
139
144
|
matches = kb.subset(db, options)
|
140
145
|
|
141
146
|
if conditions
|
@@ -152,29 +157,41 @@ class KnowledgeBase
|
|
152
157
|
matches
|
153
158
|
end
|
154
159
|
|
160
|
+
def id_dbs(db)
|
161
|
+
if db.include? '?'
|
162
|
+
all_dbs = kb.registry.keys
|
163
|
+
_name, _sep, _kb = db.partition("@")
|
164
|
+
case
|
165
|
+
when _name[0] == '?'
|
166
|
+
dbs = all_dbs.select{|_db|
|
167
|
+
n,_s,d=_db.partition("@");
|
168
|
+
d.nil? or d.empty? or (d == _kd and assignments[_name].include?(n))
|
169
|
+
}
|
170
|
+
when _kb[0] == '?'
|
171
|
+
dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
|
172
|
+
end
|
173
|
+
else
|
174
|
+
dbs = [db]
|
175
|
+
end
|
176
|
+
|
177
|
+
dbs
|
178
|
+
end
|
155
179
|
|
156
180
|
def traverse
|
157
181
|
all_matches = []
|
158
|
-
|
182
|
+
path_rules = []
|
183
|
+
acc_var = nil
|
159
184
|
rules.each do |rule|
|
160
185
|
rule = rule.strip
|
161
186
|
next if rule.empty?
|
162
187
|
|
163
|
-
if m = rule.match(/([^\s]+)\s+([^\s]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
|
188
|
+
if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
|
189
|
+
Log.debug "Traverse rule: #{rule}"
|
190
|
+
path_rules << rule
|
164
191
|
|
165
192
|
source, db, target, conditions = m.captures
|
166
|
-
|
167
|
-
|
168
|
-
_name, _sep, _kb = db.partition("@")
|
169
|
-
case
|
170
|
-
when _kb[0] == '?'
|
171
|
-
dbs = all_dbs.select{|_db| _db.partition("@").first == _name}
|
172
|
-
when _name[0] == '?'
|
173
|
-
dbs = all_dbs.select{|_db| _db.include?("@") ? db.partition("@").last == _kb : true}
|
174
|
-
end
|
175
|
-
else
|
176
|
-
dbs = [db]
|
177
|
-
end
|
193
|
+
|
194
|
+
dbs = id_dbs(db)
|
178
195
|
|
179
196
|
rule_matches = []
|
180
197
|
dbs.each do |_db|
|
@@ -197,17 +214,52 @@ class KnowledgeBase
|
|
197
214
|
matches.each do |m|
|
198
215
|
rule_matches << m
|
199
216
|
end
|
217
|
+
|
218
|
+
assignments.each{|k,v| v.uniq! if v}
|
200
219
|
end
|
201
220
|
|
202
221
|
reassign rule_matches, source, target
|
203
222
|
|
204
223
|
all_matches << rule_matches
|
224
|
+
|
225
|
+
elsif m = rule.match(/([^\s=]+)\s*=([^\s]*)\s*(.*)/)
|
226
|
+
Log.debug "Assign rule: #{rule}"
|
227
|
+
var, db, value_str = m.captures
|
228
|
+
names = value_str.split(",").collect{|v| v.strip}
|
229
|
+
if db.empty?
|
230
|
+
ids = names
|
231
|
+
else
|
232
|
+
dbs = id_dbs(db)
|
233
|
+
ids = names.collect{|name|
|
234
|
+
id = nil
|
235
|
+
dbs.each do |db|
|
236
|
+
sid, tid = identify db, name, name
|
237
|
+
id = (sid + tid).compact.first
|
238
|
+
break if id
|
239
|
+
end
|
240
|
+
id
|
241
|
+
}
|
242
|
+
end
|
243
|
+
assignments[var] = ids
|
244
|
+
|
245
|
+
elsif m = rule.match(/(\?[^\s{]+)\s*{/)
|
246
|
+
acc_var = m.captures.first
|
247
|
+
Log.debug "Start assign block: #{acc_var}"
|
248
|
+
elsif m = rule.match(/^\s*}\s*$/)
|
249
|
+
Log.debug "Close assign block: #{acc_var}"
|
250
|
+
saved_assign = assignments[acc_var]
|
251
|
+
assignments.clear
|
252
|
+
assignments[acc_var] = saved_assign
|
253
|
+
all_matches = []
|
254
|
+
path_rules = []
|
205
255
|
else
|
206
256
|
raise "Rule not understood: #{rule}"
|
207
257
|
end
|
208
258
|
end
|
209
259
|
|
210
|
-
|
260
|
+
Log.debug "Finding paths: #{all_matches.length}"
|
261
|
+
paths = find_paths path_rules, all_matches, assignments
|
262
|
+
Log.debug "Found paths: #{paths.length}"
|
211
263
|
|
212
264
|
[assignments, paths]
|
213
265
|
end
|
data/lib/rbbt/persist.rb
CHANGED
@@ -237,6 +237,11 @@ module Persist
|
|
237
237
|
callback = stream.respond_to?(:callback)? stream.callback : nil
|
238
238
|
abort_callback = stream.respond_to?(:abort_callback)? stream.abort_callback : nil
|
239
239
|
|
240
|
+
# This is to avoid calling the callbacks twice, since they have been
|
241
|
+
# moved to the new 'res' stream
|
242
|
+
stream.callback = nil
|
243
|
+
stream.abort_callback = nil
|
244
|
+
|
240
245
|
res = tee_stream(stream, path, type, callback, abort_callback, lockfile)
|
241
246
|
|
242
247
|
res.lockfile = lockfile
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -38,11 +38,11 @@ module TSV
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
def init
|
41
|
+
def init(init_options = {})
|
42
42
|
options = @options.dup
|
43
43
|
key_field, fields = Misc.process_options options, :key_field, :fields
|
44
44
|
|
45
|
-
str = TSV.header_lines(key_field, fields, options)
|
45
|
+
str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
|
46
46
|
|
47
47
|
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
|
48
48
|
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -179,14 +179,17 @@ module TSV
|
|
179
179
|
if Hash === entry_hash
|
180
180
|
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
181
181
|
preamble = entry_hash[:preamble]
|
182
|
+
header_hash = entry_hash[:header_hash]
|
182
183
|
end
|
183
184
|
|
185
|
+
header_hash = "#" if header_hash.nil?
|
186
|
+
|
184
187
|
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
185
188
|
|
186
189
|
str = ""
|
187
190
|
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
188
191
|
if fields
|
189
|
-
str <<
|
192
|
+
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
190
193
|
end
|
191
194
|
|
192
195
|
str
|
data/lib/rbbt/util/docker.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
module Docker
|
2
|
-
def self.run(image,cmd, options)
|
2
|
+
def self.run(image, cmd, options)
|
3
3
|
mounts, job_inputs, directory, pipe = Misc.process_options options, :mounts, :job_inputs, :directory, :pipe
|
4
4
|
|
5
5
|
if mounts
|
6
|
+
mounts.each{|t,s| FileUtils.mkdir_p s unless File.exists? s}
|
6
7
|
mount_cmd = mounts.sort.collect{|t,s| "-v " + ["'" + s + "'", "'" + t + "'"] * ":" } * " "
|
7
8
|
else
|
8
9
|
mount_cmd = ""
|
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
1
2
|
|
2
3
|
require 'rbbt/util/R'
|
3
4
|
|
@@ -8,10 +9,6 @@ Calculate density
|
|
8
9
|
|
9
10
|
$ rbbt stat density <file>
|
10
11
|
|
11
|
-
Display summary information. Works with Tokyocabinet HDB and BDB as well.
|
12
|
-
|
13
|
-
-tch--tokyocabinet File is a TC HDB
|
14
|
-
-tcb--tokyocabinet_bd File is a TC BDB
|
15
12
|
-h--help Help
|
16
13
|
EOF
|
17
14
|
|
@@ -13,14 +13,18 @@ $ rbbt tsv assemble_pdf_table file.txt
|
|
13
13
|
When extracting tables from PDF they are often laid out one column at a time, divided by pages.
|
14
14
|
This command takes a file with the following structure:
|
15
15
|
|
16
|
-
1 A few lines containing table headers, one per line
|
17
|
-
|
18
|
-
|
19
|
-
|
16
|
+
1. A few lines containing table headers, one per line
|
17
|
+
|
18
|
+
2. A group of lines containing the values for the first column of the first page, ending in an empty line
|
19
|
+
|
20
|
+
3. More groups of lines corresponding to other columns
|
21
|
+
|
22
|
+
4. Repetitions of 2 and 3 for more pages
|
20
23
|
|
21
24
|
This script will take care of matching the columns read with the headers specified
|
22
25
|
|
23
26
|
-h--help Help
|
27
|
+
-r--row Each block of lines is a row, not a column
|
24
28
|
EOF
|
25
29
|
|
26
30
|
SOPT.usage if options[:help]
|
@@ -50,19 +54,28 @@ while lines and lines.any?
|
|
50
54
|
lines = lines[block_size+1..-1]
|
51
55
|
columns[0] << first_block
|
52
56
|
(1..num_columns-1).each do |pos|
|
57
|
+
next if lines.nil?
|
53
58
|
block = lines[0..block_size-1]
|
54
59
|
lines = lines[block_size+1..-1]
|
55
60
|
columns[pos] << block
|
56
61
|
end
|
57
62
|
end
|
58
63
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
end
|
64
|
+
if options[:row]
|
65
|
+
columns.each do |n,list|
|
66
|
+
list.each do |values|
|
67
|
+
puts values * "\t"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
else
|
71
|
+
full_columns = []
|
72
|
+
num_columns.times do |i|
|
73
|
+
column = columns[i]
|
74
|
+
full_columns << column.flatten
|
75
|
+
end
|
64
76
|
|
65
|
-
puts "#" << fields * "\t"
|
66
|
-
Misc.zip_fields(full_columns).zip do |values|
|
67
|
-
|
77
|
+
puts "#" << fields * "\t"
|
78
|
+
Misc.zip_fields(full_columns).zip do |values|
|
79
|
+
puts values * "\t"
|
80
|
+
end
|
68
81
|
end
|
@@ -31,8 +31,10 @@ def usage(workflow = nil, task = nil, exception=nil)
|
|
31
31
|
else
|
32
32
|
puts Log.color :magenta, workflow.to_s
|
33
33
|
puts Log.color :magenta, "=" * workflow.to_s.length
|
34
|
-
|
35
|
-
|
34
|
+
if workflow.documentation[:description] and not workflow.documentation[:description].empty?
|
35
|
+
puts
|
36
|
+
puts workflow.documentation[:description]
|
37
|
+
end
|
36
38
|
puts
|
37
39
|
workflow.doc(task)
|
38
40
|
end
|
@@ -170,24 +172,24 @@ The first time a job is executed it will save the result. Once the job is done
|
|
170
172
|
you can re-doit using the `clean` parameter. The `recursive_clean` cleans all
|
171
173
|
the job dependencies recursively.
|
172
174
|
|
173
|
-
-h--help Show this help
|
174
|
-
-wd--workdir* Change the working directory of the workflow
|
175
|
-
-as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
|
176
|
-
-fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
|
177
|
-
-jn--jobname* Job name to use. The name 'Default' is used by default
|
178
|
-
-pn--printname Print the name of the job and exit without starting it
|
179
|
-
-pf--printpath Print the path of the job result
|
180
|
-
-cl--clean Clean the last step of the job so that it gets recomputed
|
181
|
-
-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
|
182
|
-
--fork Run job asyncronously and monitor progress. It monitors detached processes as well
|
183
|
-
--detach Run job asyncronously and detach process
|
184
|
-
--exec Run job with no persistence
|
185
|
-
-O--output* Save job result into file
|
186
|
-
-jf--job_file* Output one of the job produced files
|
187
|
-
-ljf--list_job_files List all the files produced in that step
|
175
|
+
-h--help Show this help
|
176
|
+
-wd--workdir* Change the working directory of the workflow
|
177
|
+
-as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
|
178
|
+
-fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
|
179
|
+
-jn--jobname* Job name to use. The name 'Default' is used by default
|
180
|
+
-pn--printname Print the name of the job and exit without starting it
|
181
|
+
-pf--printpath Print the path of the job result
|
182
|
+
-cl--clean Clean the last step of the job so that it gets recomputed
|
183
|
+
-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
|
184
|
+
--fork Run job asyncronously and monitor progress. It monitors detached processes as well
|
185
|
+
--detach Run job asyncronously and detach process
|
186
|
+
--exec Run job with no persistence
|
187
|
+
-O--output* Save job result into file
|
188
|
+
-jf--job_file* Output one of the job produced files
|
189
|
+
-ljf--list_job_files List all the files produced in that step
|
188
190
|
--load_inputs* Load inputs from a directory
|
189
|
-
--info Show the job info
|
190
|
-
--provenance Report the jobs provenance
|
191
|
+
--info Show the job info
|
192
|
+
--provenance Report the jobs provenance
|
191
193
|
-W--workflows* Load a list of workflows
|
192
194
|
-R--requires* Require a list of files
|
193
195
|
EOF
|
@@ -17,6 +17,43 @@ class TestKnowledgeBaseTraverse < Test::Unit::TestCase
|
|
17
17
|
assert res.first.include? "?1"
|
18
18
|
end
|
19
19
|
|
20
|
+
def test_traverse2
|
21
|
+
rules = []
|
22
|
+
rules << "?target =pina SF3B1"
|
23
|
+
rules << "?1 pina ?target - Method=MI:0006"
|
24
|
+
rules << "TP53 pina ?2"
|
25
|
+
rules << "?2 pina ?1"
|
26
|
+
res = kb.traverse rules
|
27
|
+
assert res.first.include? "?1"
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_traverse3
|
31
|
+
rules = []
|
32
|
+
rules << "?target = ENSG00000115524"
|
33
|
+
rules << "?1 pina ?target - Method=MI:0006"
|
34
|
+
rules << "TP53 pina ?2"
|
35
|
+
rules << "?2 pina ?1"
|
36
|
+
res = kb.traverse rules
|
37
|
+
assert res.first.include? "?1"
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
def test_traverse_acc
|
42
|
+
Log.severity = 0
|
43
|
+
rules_str=<<-EOF
|
44
|
+
?target{
|
45
|
+
?target pina SF3B1
|
46
|
+
}
|
47
|
+
?1 pina TP53
|
48
|
+
?1 pina ?target
|
49
|
+
EOF
|
50
|
+
rules = rules_str.split "\n"
|
51
|
+
res = kb.traverse rules
|
52
|
+
iii res
|
53
|
+
assert res.first.include? "?1"
|
54
|
+
end
|
55
|
+
|
56
|
+
|
20
57
|
def test_path
|
21
58
|
rules = []
|
22
59
|
rules << "?1 pina ARPC2"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.19.
|
4
|
+
version: 5.19.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|