rbbt-util 5.19.16 → 5.19.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 30d1f9a2c70b373c0146ba34c922b2577af62b62
4
- data.tar.gz: 80a304d5cd33966a92e79f249ec08502e83cd306
3
+ metadata.gz: 7250e5af7af076517a13c322c1fe43506d9ece3d
4
+ data.tar.gz: c9ec9ec07c60be34a6f95c9bde5fd2deae7b3bbd
5
5
  SHA512:
6
- metadata.gz: fbda93383628d898f22a69ba89beedda4fb527287a3beef8428f915fe6441b76cb94d41e19ee507d44a4cf89243bd1379ccdb698589e88e20692f2443c5817fe
7
- data.tar.gz: 4888a0652f9f80275ea0aa0773839fb8ba6f58582ba1344c8ae1e5486c8f6839e4af16559272fdd796877a600ba0eec83d947b51d82821e0db6ff5e023f8ee36
6
+ metadata.gz: ef2ad33be33fbdef8d78a4461d31efbf25171133c553b0be00b67c4e776c5ca225a90609c1d0c5bae80ee4a827d9da1e073955317fca9a7ae4c736c7fa688f50
7
+ data.tar.gz: 8e98021bdafe0d005fea0047826a5068709dc009e84c941feaae4d9d1d48c4feac291c2915faf70c213ed71560ee74296fe02420c9656b325a38f6becbe0a357
@@ -38,7 +38,7 @@ module Entity
38
38
 
39
39
  begin
40
40
  index = TSV.translation_index(identifier_files, format, source, :persist => true)
41
- raise "No index from #{ source } to #{ format }: #{Misc.fingerprint identifier_files}" if index.nil?
41
+ raise "No index from #{ Misc.fingerprint source } to #{ Misc.fingerprint format }: #{Misc.fingerprint identifier_files}" if index.nil?
42
42
  index.unnamed = true
43
43
  index
44
44
  rescue
@@ -51,6 +51,7 @@ class KnowledgeBase
51
51
 
52
52
  rules.zip(all_matches).each do |rule, matches|
53
53
  source, db, target = rule.split /\s+/
54
+ next if matches.nil?
54
55
 
55
56
  if is_wildcard? source
56
57
  assigned = assignments[source] || []
@@ -74,19 +75,22 @@ class KnowledgeBase
74
75
  rule, *rest = rules
75
76
  source, db, target = rule.split /\s+/
76
77
 
78
+ wildcard_source = is_wildcard? source
79
+ wildcard_target = is_wildcard? target
80
+
77
81
  paths = {}
78
82
  matches = clean_matches[rule]
79
83
  Annotated.purge(matches).each do |match|
80
84
  new_assignments = nil
81
85
  match_source, _sep, match_target = match.partition "~"
82
86
 
83
- if is_wildcard? source
87
+ if wildcard_source
84
88
  next if assignments[source] and assignments[source] != match_source
85
89
  new_assignments ||= assignments.dup
86
90
  new_assignments[source] = match_source
87
91
  end
88
92
 
89
- if is_wildcard? target
93
+ if wildcard_target
90
94
  next if assignments[target] and assignments[target] != match_target
91
95
  new_assignments ||= assignments.dup
92
96
  new_assignments[target] = match_target
@@ -136,6 +140,7 @@ class KnowledgeBase
136
140
  source_entities, target_entities = identify db, source, target
137
141
 
138
142
  options = {:source => source_entities, :target => target_entities}
143
+ Log.debug "Traversing #{ db }: #{Misc.fingerprint options}"
139
144
  matches = kb.subset(db, options)
140
145
 
141
146
  if conditions
@@ -152,29 +157,41 @@ class KnowledgeBase
152
157
  matches
153
158
  end
154
159
 
160
+ def id_dbs(db)
161
+ if db.include? '?'
162
+ all_dbs = kb.registry.keys
163
+ _name, _sep, _kb = db.partition("@")
164
+ case
165
+ when _name[0] == '?'
166
+ dbs = all_dbs.select{|_db|
167
+ n,_s,d=_db.partition("@");
168
+ d.nil? or d.empty? or (d == _kd and assignments[_name].include?(n))
169
+ }
170
+ when _kb[0] == '?'
171
+ dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
172
+ end
173
+ else
174
+ dbs = [db]
175
+ end
176
+
177
+ dbs
178
+ end
155
179
 
156
180
  def traverse
157
181
  all_matches = []
158
-
182
+ path_rules = []
183
+ acc_var = nil
159
184
  rules.each do |rule|
160
185
  rule = rule.strip
161
186
  next if rule.empty?
162
187
 
163
- if m = rule.match(/([^\s]+)\s+([^\s]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
188
+ if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
189
+ Log.debug "Traverse rule: #{rule}"
190
+ path_rules << rule
164
191
 
165
192
  source, db, target, conditions = m.captures
166
- if db.include? '?'
167
- all_dbs = kb.registry.keys
168
- _name, _sep, _kb = db.partition("@")
169
- case
170
- when _kb[0] == '?'
171
- dbs = all_dbs.select{|_db| _db.partition("@").first == _name}
172
- when _name[0] == '?'
173
- dbs = all_dbs.select{|_db| _db.include?("@") ? db.partition("@").last == _kb : true}
174
- end
175
- else
176
- dbs = [db]
177
- end
193
+
194
+ dbs = id_dbs(db)
178
195
 
179
196
  rule_matches = []
180
197
  dbs.each do |_db|
@@ -197,17 +214,52 @@ class KnowledgeBase
197
214
  matches.each do |m|
198
215
  rule_matches << m
199
216
  end
217
+
218
+ assignments.each{|k,v| v.uniq! if v}
200
219
  end
201
220
 
202
221
  reassign rule_matches, source, target
203
222
 
204
223
  all_matches << rule_matches
224
+
225
+ elsif m = rule.match(/([^\s=]+)\s*=([^\s]*)\s*(.*)/)
226
+ Log.debug "Assign rule: #{rule}"
227
+ var, db, value_str = m.captures
228
+ names = value_str.split(",").collect{|v| v.strip}
229
+ if db.empty?
230
+ ids = names
231
+ else
232
+ dbs = id_dbs(db)
233
+ ids = names.collect{|name|
234
+ id = nil
235
+ dbs.each do |db|
236
+ sid, tid = identify db, name, name
237
+ id = (sid + tid).compact.first
238
+ break if id
239
+ end
240
+ id
241
+ }
242
+ end
243
+ assignments[var] = ids
244
+
245
+ elsif m = rule.match(/(\?[^\s{]+)\s*{/)
246
+ acc_var = m.captures.first
247
+ Log.debug "Start assign block: #{acc_var}"
248
+ elsif m = rule.match(/^\s*}\s*$/)
249
+ Log.debug "Close assign block: #{acc_var}"
250
+ saved_assign = assignments[acc_var]
251
+ assignments.clear
252
+ assignments[acc_var] = saved_assign
253
+ all_matches = []
254
+ path_rules = []
205
255
  else
206
256
  raise "Rule not understood: #{rule}"
207
257
  end
208
258
  end
209
259
 
210
- paths = find_paths rules, all_matches, assignments
260
+ Log.debug "Finding paths: #{all_matches.length}"
261
+ paths = find_paths path_rules, all_matches, assignments
262
+ Log.debug "Found paths: #{paths.length}"
211
263
 
212
264
  [assignments, paths]
213
265
  end
data/lib/rbbt/persist.rb CHANGED
@@ -237,6 +237,11 @@ module Persist
237
237
  callback = stream.respond_to?(:callback)? stream.callback : nil
238
238
  abort_callback = stream.respond_to?(:abort_callback)? stream.abort_callback : nil
239
239
 
240
+ # This is to avoid calling the callbacks twice, since they have been
241
+ # moved to the new 'res' stream
242
+ stream.callback = nil
243
+ stream.abort_callback = nil
244
+
240
245
  res = tee_stream(stream, path, type, callback, abort_callback, lockfile)
241
246
 
242
247
  res.lockfile = lockfile
@@ -38,11 +38,11 @@ module TSV
38
38
  end
39
39
  end
40
40
 
41
- def init
41
+ def init(init_options = {})
42
42
  options = @options.dup
43
43
  key_field, fields = Misc.process_options options, :key_field, :fields
44
44
 
45
- str = TSV.header_lines(key_field, fields, options)
45
+ str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
46
46
 
47
47
  Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
48
48
 
data/lib/rbbt/tsv/util.rb CHANGED
@@ -179,14 +179,17 @@ module TSV
179
179
  if Hash === entry_hash
180
180
  sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
181
181
  preamble = entry_hash[:preamble]
182
+ header_hash = entry_hash[:header_hash]
182
183
  end
183
184
 
185
+ header_hash = "#" if header_hash.nil?
186
+
184
187
  preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
185
188
 
186
189
  str = ""
187
190
  str << preamble.strip << "\n" if preamble and not preamble.empty?
188
191
  if fields
189
- str << "#" << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
192
+ str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
190
193
  end
191
194
 
192
195
  str
@@ -1,8 +1,9 @@
1
1
  module Docker
2
- def self.run(image,cmd, options)
2
+ def self.run(image, cmd, options)
3
3
  mounts, job_inputs, directory, pipe = Misc.process_options options, :mounts, :job_inputs, :directory, :pipe
4
4
 
5
5
  if mounts
6
+ mounts.each{|t,s| FileUtils.mkdir_p s unless File.exists? s}
6
7
  mount_cmd = mounts.sort.collect{|t,s| "-v " + ["'" + s + "'", "'" + t + "'"] * ":" } * " "
7
8
  else
8
9
  mount_cmd = ""
@@ -171,4 +171,12 @@ module ConcurrentStream
171
171
  end
172
172
  end
173
173
 
174
+ def add_callback(&block)
175
+ old_callback = callback
176
+ @callback = Proc.new do
177
+ old_callback.call if old_callback
178
+ block.call
179
+ end
180
+ end
181
+
174
182
  end
@@ -295,7 +295,11 @@ class Step
295
295
  return nil if info[:pid].nil?
296
296
 
297
297
  pid = @pid || info[:pid]
298
- return Misc.pid_exists?(pid)
298
+ if Misc.pid_exists?(pid)
299
+ pid
300
+ else
301
+ false
302
+ end
299
303
  end
300
304
 
301
305
  def error?
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env ruby
1
2
 
2
3
  require 'rbbt/util/R'
3
4
 
@@ -8,10 +9,6 @@ Calculate density
8
9
 
9
10
  $ rbbt stat density <file>
10
11
 
11
- Display summary information. Works with Tokyocabinet HDB and BDB as well.
12
-
13
- -tch--tokyocabinet File is a TC HDB
14
- -tcb--tokyocabinet_bd File is a TC BDB
15
12
  -h--help Help
16
13
  EOF
17
14
 
@@ -13,14 +13,18 @@ $ rbbt tsv assemble_pdf_table file.txt
13
13
  When extracting tables from PDF they are often laid out one column at a time, divided by pages.
14
14
  This command takes a file with the following structure:
15
15
 
16
- 1 A few lines containing table headers, one per line
17
- 2 A group of lines containing the values for the first column of the first page, ending in an empty line
18
- 3 More groups of lines corresponding to other columns
19
- 4 Repetitions of 2 and 3 for more pages
16
+ 1. A few lines containing table headers, one per line
17
+
18
+ 2. A group of lines containing the values for the first column of the first page, ending in an empty line
19
+
20
+ 3. More groups of lines corresponding to other columns
21
+
22
+ 4. Repetitions of 2 and 3 for more pages
20
23
 
21
24
  This script will take care of matching the columns read with the headers specified
22
25
 
23
26
  -h--help Help
27
+ -r--row Each block of lines is a row, not a column
24
28
  EOF
25
29
 
26
30
  SOPT.usage if options[:help]
@@ -50,19 +54,28 @@ while lines and lines.any?
50
54
  lines = lines[block_size+1..-1]
51
55
  columns[0] << first_block
52
56
  (1..num_columns-1).each do |pos|
57
+ next if lines.nil?
53
58
  block = lines[0..block_size-1]
54
59
  lines = lines[block_size+1..-1]
55
60
  columns[pos] << block
56
61
  end
57
62
  end
58
63
 
59
- full_columns = []
60
- num_columns.times do |i|
61
- column = columns[i]
62
- full_columns << column.flatten
63
- end
64
+ if options[:row]
65
+ columns.each do |n,list|
66
+ list.each do |values|
67
+ puts values * "\t"
68
+ end
69
+ end
70
+ else
71
+ full_columns = []
72
+ num_columns.times do |i|
73
+ column = columns[i]
74
+ full_columns << column.flatten
75
+ end
64
76
 
65
- puts "#" << fields * "\t"
66
- Misc.zip_fields(full_columns).zip do |values|
67
- puts values * "\t"
77
+ puts "#" << fields * "\t"
78
+ Misc.zip_fields(full_columns).zip do |values|
79
+ puts values * "\t"
80
+ end
68
81
  end
@@ -31,8 +31,10 @@ def usage(workflow = nil, task = nil, exception=nil)
31
31
  else
32
32
  puts Log.color :magenta, workflow.to_s
33
33
  puts Log.color :magenta, "=" * workflow.to_s.length
34
- puts
35
- puts workflow.documentation[:description]
34
+ if workflow.documentation[:description] and not workflow.documentation[:description].empty?
35
+ puts
36
+ puts workflow.documentation[:description]
37
+ end
36
38
  puts
37
39
  workflow.doc(task)
38
40
  end
@@ -170,24 +172,24 @@ The first time a job is executed it will save the result. Once the job is done
170
172
  you can re-doit using the `clean` parameter. The `recursive_clean` cleans all
171
173
  the job dependencies recursively.
172
174
 
173
- -h--help Show this help:
174
- -wd--workdir* Change the working directory of the workflow:
175
- -as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default:
176
- -fs--field_separator* Change the character that separates fields of TSV files '\\t' by default:
177
- -jn--jobname* Job name to use. The name 'Default' is used by default:
178
- -pn--printname Print the name of the job and exit without starting it:
179
- -pf--printpath Print the path of the job result:
180
- -cl--clean Clean the last step of the job so that it gets recomputed:
181
- -rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
182
- --fork Run job asyncronously and monitor progress. It monitors detached processes as well:
183
- --detach Run job asyncronously and detach process:
184
- --exec Run job with no persistence:
185
- -O--output* Save job result into file:
186
- -jf--job_file* Output one of the job produced files:
187
- -ljf--list_job_files List all the files produced in that step:
175
+ -h--help Show this help
176
+ -wd--workdir* Change the working directory of the workflow
177
+ -as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
178
+ -fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
179
+ -jn--jobname* Job name to use. The name 'Default' is used by default
180
+ -pn--printname Print the name of the job and exit without starting it
181
+ -pf--printpath Print the path of the job result
182
+ -cl--clean Clean the last step of the job so that it gets recomputed
183
+ -rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
184
+ --fork Run job asyncronously and monitor progress. It monitors detached processes as well
185
+ --detach Run job asyncronously and detach process
186
+ --exec Run job with no persistence
187
+ -O--output* Save job result into file
188
+ -jf--job_file* Output one of the job produced files
189
+ -ljf--list_job_files List all the files produced in that step
188
190
  --load_inputs* Load inputs from a directory
189
- --info Show the job info:
190
- --provenance Report the jobs provenance:
191
+ --info Show the job info
192
+ --provenance Report the jobs provenance
191
193
  -W--workflows* Load a list of workflows
192
194
  -R--requires* Require a list of files
193
195
  EOF
@@ -17,6 +17,43 @@ class TestKnowledgeBaseTraverse < Test::Unit::TestCase
17
17
  assert res.first.include? "?1"
18
18
  end
19
19
 
20
+ def test_traverse2
21
+ rules = []
22
+ rules << "?target =pina SF3B1"
23
+ rules << "?1 pina ?target - Method=MI:0006"
24
+ rules << "TP53 pina ?2"
25
+ rules << "?2 pina ?1"
26
+ res = kb.traverse rules
27
+ assert res.first.include? "?1"
28
+ end
29
+
30
+ def test_traverse3
31
+ rules = []
32
+ rules << "?target = ENSG00000115524"
33
+ rules << "?1 pina ?target - Method=MI:0006"
34
+ rules << "TP53 pina ?2"
35
+ rules << "?2 pina ?1"
36
+ res = kb.traverse rules
37
+ assert res.first.include? "?1"
38
+ end
39
+
40
+
41
+ def test_traverse_acc
42
+ Log.severity = 0
43
+ rules_str=<<-EOF
44
+ ?target{
45
+ ?target pina SF3B1
46
+ }
47
+ ?1 pina TP53
48
+ ?1 pina ?target
49
+ EOF
50
+ rules = rules_str.split "\n"
51
+ res = kb.traverse rules
52
+ iii res
53
+ assert res.first.include? "?1"
54
+ end
55
+
56
+
20
57
  def test_path
21
58
  rules = []
22
59
  rules << "?1 pina ARPC2"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.19.16
4
+ version: 5.19.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake