rbbt-util 5.19.16 → 5.19.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 30d1f9a2c70b373c0146ba34c922b2577af62b62
4
- data.tar.gz: 80a304d5cd33966a92e79f249ec08502e83cd306
3
+ metadata.gz: 7250e5af7af076517a13c322c1fe43506d9ece3d
4
+ data.tar.gz: c9ec9ec07c60be34a6f95c9bde5fd2deae7b3bbd
5
5
  SHA512:
6
- metadata.gz: fbda93383628d898f22a69ba89beedda4fb527287a3beef8428f915fe6441b76cb94d41e19ee507d44a4cf89243bd1379ccdb698589e88e20692f2443c5817fe
7
- data.tar.gz: 4888a0652f9f80275ea0aa0773839fb8ba6f58582ba1344c8ae1e5486c8f6839e4af16559272fdd796877a600ba0eec83d947b51d82821e0db6ff5e023f8ee36
6
+ metadata.gz: ef2ad33be33fbdef8d78a4461d31efbf25171133c553b0be00b67c4e776c5ca225a90609c1d0c5bae80ee4a827d9da1e073955317fca9a7ae4c736c7fa688f50
7
+ data.tar.gz: 8e98021bdafe0d005fea0047826a5068709dc009e84c941feaae4d9d1d48c4feac291c2915faf70c213ed71560ee74296fe02420c9656b325a38f6becbe0a357
@@ -38,7 +38,7 @@ module Entity
38
38
 
39
39
  begin
40
40
  index = TSV.translation_index(identifier_files, format, source, :persist => true)
41
- raise "No index from #{ source } to #{ format }: #{Misc.fingerprint identifier_files}" if index.nil?
41
+ raise "No index from #{ Misc.fingerprint source } to #{ Misc.fingerprint format }: #{Misc.fingerprint identifier_files}" if index.nil?
42
42
  index.unnamed = true
43
43
  index
44
44
  rescue
@@ -51,6 +51,7 @@ class KnowledgeBase
51
51
 
52
52
  rules.zip(all_matches).each do |rule, matches|
53
53
  source, db, target = rule.split /\s+/
54
+ next if matches.nil?
54
55
 
55
56
  if is_wildcard? source
56
57
  assigned = assignments[source] || []
@@ -74,19 +75,22 @@ class KnowledgeBase
74
75
  rule, *rest = rules
75
76
  source, db, target = rule.split /\s+/
76
77
 
78
+ wildcard_source = is_wildcard? source
79
+ wildcard_target = is_wildcard? target
80
+
77
81
  paths = {}
78
82
  matches = clean_matches[rule]
79
83
  Annotated.purge(matches).each do |match|
80
84
  new_assignments = nil
81
85
  match_source, _sep, match_target = match.partition "~"
82
86
 
83
- if is_wildcard? source
87
+ if wildcard_source
84
88
  next if assignments[source] and assignments[source] != match_source
85
89
  new_assignments ||= assignments.dup
86
90
  new_assignments[source] = match_source
87
91
  end
88
92
 
89
- if is_wildcard? target
93
+ if wildcard_target
90
94
  next if assignments[target] and assignments[target] != match_target
91
95
  new_assignments ||= assignments.dup
92
96
  new_assignments[target] = match_target
@@ -136,6 +140,7 @@ class KnowledgeBase
136
140
  source_entities, target_entities = identify db, source, target
137
141
 
138
142
  options = {:source => source_entities, :target => target_entities}
143
+ Log.debug "Traversing #{ db }: #{Misc.fingerprint options}"
139
144
  matches = kb.subset(db, options)
140
145
 
141
146
  if conditions
@@ -152,29 +157,41 @@ class KnowledgeBase
152
157
  matches
153
158
  end
154
159
 
160
+ def id_dbs(db)
161
+ if db.include? '?'
162
+ all_dbs = kb.registry.keys
163
+ _name, _sep, _kb = db.partition("@")
164
+ case
165
+ when _name[0] == '?'
166
+ dbs = all_dbs.select{|_db|
167
+ n,_s,d=_db.partition("@");
168
+ d.nil? or d.empty? or (d == _kd and assignments[_name].include?(n))
169
+ }
170
+ when _kb[0] == '?'
171
+ dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
172
+ end
173
+ else
174
+ dbs = [db]
175
+ end
176
+
177
+ dbs
178
+ end
155
179
 
156
180
  def traverse
157
181
  all_matches = []
158
-
182
+ path_rules = []
183
+ acc_var = nil
159
184
  rules.each do |rule|
160
185
  rule = rule.strip
161
186
  next if rule.empty?
162
187
 
163
- if m = rule.match(/([^\s]+)\s+([^\s]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
188
+ if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
189
+ Log.debug "Traverse rule: #{rule}"
190
+ path_rules << rule
164
191
 
165
192
  source, db, target, conditions = m.captures
166
- if db.include? '?'
167
- all_dbs = kb.registry.keys
168
- _name, _sep, _kb = db.partition("@")
169
- case
170
- when _kb[0] == '?'
171
- dbs = all_dbs.select{|_db| _db.partition("@").first == _name}
172
- when _name[0] == '?'
173
- dbs = all_dbs.select{|_db| _db.include?("@") ? db.partition("@").last == _kb : true}
174
- end
175
- else
176
- dbs = [db]
177
- end
193
+
194
+ dbs = id_dbs(db)
178
195
 
179
196
  rule_matches = []
180
197
  dbs.each do |_db|
@@ -197,17 +214,52 @@ class KnowledgeBase
197
214
  matches.each do |m|
198
215
  rule_matches << m
199
216
  end
217
+
218
+ assignments.each{|k,v| v.uniq! if v}
200
219
  end
201
220
 
202
221
  reassign rule_matches, source, target
203
222
 
204
223
  all_matches << rule_matches
224
+
225
+ elsif m = rule.match(/([^\s=]+)\s*=([^\s]*)\s*(.*)/)
226
+ Log.debug "Assign rule: #{rule}"
227
+ var, db, value_str = m.captures
228
+ names = value_str.split(",").collect{|v| v.strip}
229
+ if db.empty?
230
+ ids = names
231
+ else
232
+ dbs = id_dbs(db)
233
+ ids = names.collect{|name|
234
+ id = nil
235
+ dbs.each do |db|
236
+ sid, tid = identify db, name, name
237
+ id = (sid + tid).compact.first
238
+ break if id
239
+ end
240
+ id
241
+ }
242
+ end
243
+ assignments[var] = ids
244
+
245
+ elsif m = rule.match(/(\?[^\s{]+)\s*{/)
246
+ acc_var = m.captures.first
247
+ Log.debug "Start assign block: #{acc_var}"
248
+ elsif m = rule.match(/^\s*}\s*$/)
249
+ Log.debug "Close assign block: #{acc_var}"
250
+ saved_assign = assignments[acc_var]
251
+ assignments.clear
252
+ assignments[acc_var] = saved_assign
253
+ all_matches = []
254
+ path_rules = []
205
255
  else
206
256
  raise "Rule not understood: #{rule}"
207
257
  end
208
258
  end
209
259
 
210
- paths = find_paths rules, all_matches, assignments
260
+ Log.debug "Finding paths: #{all_matches.length}"
261
+ paths = find_paths path_rules, all_matches, assignments
262
+ Log.debug "Found paths: #{paths.length}"
211
263
 
212
264
  [assignments, paths]
213
265
  end
data/lib/rbbt/persist.rb CHANGED
@@ -237,6 +237,11 @@ module Persist
237
237
  callback = stream.respond_to?(:callback)? stream.callback : nil
238
238
  abort_callback = stream.respond_to?(:abort_callback)? stream.abort_callback : nil
239
239
 
240
+ # This is to avoid calling the callbacks twice, since they have been
241
+ # moved to the new 'res' stream
242
+ stream.callback = nil
243
+ stream.abort_callback = nil
244
+
240
245
  res = tee_stream(stream, path, type, callback, abort_callback, lockfile)
241
246
 
242
247
  res.lockfile = lockfile
@@ -38,11 +38,11 @@ module TSV
38
38
  end
39
39
  end
40
40
 
41
- def init
41
+ def init(init_options = {})
42
42
  options = @options.dup
43
43
  key_field, fields = Misc.process_options options, :key_field, :fields
44
44
 
45
- str = TSV.header_lines(key_field, fields, options)
45
+ str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
46
46
 
47
47
  Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
48
48
 
data/lib/rbbt/tsv/util.rb CHANGED
@@ -179,14 +179,17 @@ module TSV
179
179
  if Hash === entry_hash
180
180
  sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
181
181
  preamble = entry_hash[:preamble]
182
+ header_hash = entry_hash[:header_hash]
182
183
  end
183
184
 
185
+ header_hash = "#" if header_hash.nil?
186
+
184
187
  preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
185
188
 
186
189
  str = ""
187
190
  str << preamble.strip << "\n" if preamble and not preamble.empty?
188
191
  if fields
189
- str << "#" << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
192
+ str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
190
193
  end
191
194
 
192
195
  str
@@ -1,8 +1,9 @@
1
1
  module Docker
2
- def self.run(image,cmd, options)
2
+ def self.run(image, cmd, options)
3
3
  mounts, job_inputs, directory, pipe = Misc.process_options options, :mounts, :job_inputs, :directory, :pipe
4
4
 
5
5
  if mounts
6
+ mounts.each{|t,s| FileUtils.mkdir_p s unless File.exists? s}
6
7
  mount_cmd = mounts.sort.collect{|t,s| "-v " + ["'" + s + "'", "'" + t + "'"] * ":" } * " "
7
8
  else
8
9
  mount_cmd = ""
@@ -171,4 +171,12 @@ module ConcurrentStream
171
171
  end
172
172
  end
173
173
 
174
+ def add_callback(&block)
175
+ old_callback = callback
176
+ @callback = Proc.new do
177
+ old_callback.call if old_callback
178
+ block.call
179
+ end
180
+ end
181
+
174
182
  end
@@ -295,7 +295,11 @@ class Step
295
295
  return nil if info[:pid].nil?
296
296
 
297
297
  pid = @pid || info[:pid]
298
- return Misc.pid_exists?(pid)
298
+ if Misc.pid_exists?(pid)
299
+ pid
300
+ else
301
+ false
302
+ end
299
303
  end
300
304
 
301
305
  def error?
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env ruby
1
2
 
2
3
  require 'rbbt/util/R'
3
4
 
@@ -8,10 +9,6 @@ Calculate density
8
9
 
9
10
  $ rbbt stat density <file>
10
11
 
11
- Display summary information. Works with Tokyocabinet HDB and BDB as well.
12
-
13
- -tch--tokyocabinet File is a TC HDB
14
- -tcb--tokyocabinet_bd File is a TC BDB
15
12
  -h--help Help
16
13
  EOF
17
14
 
@@ -13,14 +13,18 @@ $ rbbt tsv assemble_pdf_table file.txt
13
13
  When extracting tables from PDF they are often laid out one column at a time, divided by pages.
14
14
  This command takes a file with the following structure:
15
15
 
16
- 1 A few lines containing table headers, one per line
17
- 2 A group of lines containing the values for the first column of the first page, ending in an empty line
18
- 3 More groups of lines corresponding to other columns
19
- 4 Repetitions of 2 and 3 for more pages
16
+ 1. A few lines containing table headers, one per line
17
+
18
+ 2. A group of lines containing the values for the first column of the first page, ending in an empty line
19
+
20
+ 3. More groups of lines corresponding to other columns
21
+
22
+ 4. Repetitions of 2 and 3 for more pages
20
23
 
21
24
  This script will take care of matching the columns read with the headers specified
22
25
 
23
26
  -h--help Help
27
+ -r--row Each block of lines is a row, not a column
24
28
  EOF
25
29
 
26
30
  SOPT.usage if options[:help]
@@ -50,19 +54,28 @@ while lines and lines.any?
50
54
  lines = lines[block_size+1..-1]
51
55
  columns[0] << first_block
52
56
  (1..num_columns-1).each do |pos|
57
+ next if lines.nil?
53
58
  block = lines[0..block_size-1]
54
59
  lines = lines[block_size+1..-1]
55
60
  columns[pos] << block
56
61
  end
57
62
  end
58
63
 
59
- full_columns = []
60
- num_columns.times do |i|
61
- column = columns[i]
62
- full_columns << column.flatten
63
- end
64
+ if options[:row]
65
+ columns.each do |n,list|
66
+ list.each do |values|
67
+ puts values * "\t"
68
+ end
69
+ end
70
+ else
71
+ full_columns = []
72
+ num_columns.times do |i|
73
+ column = columns[i]
74
+ full_columns << column.flatten
75
+ end
64
76
 
65
- puts "#" << fields * "\t"
66
- Misc.zip_fields(full_columns).zip do |values|
67
- puts values * "\t"
77
+ puts "#" << fields * "\t"
78
+ Misc.zip_fields(full_columns).zip do |values|
79
+ puts values * "\t"
80
+ end
68
81
  end
@@ -31,8 +31,10 @@ def usage(workflow = nil, task = nil, exception=nil)
31
31
  else
32
32
  puts Log.color :magenta, workflow.to_s
33
33
  puts Log.color :magenta, "=" * workflow.to_s.length
34
- puts
35
- puts workflow.documentation[:description]
34
+ if workflow.documentation[:description] and not workflow.documentation[:description].empty?
35
+ puts
36
+ puts workflow.documentation[:description]
37
+ end
36
38
  puts
37
39
  workflow.doc(task)
38
40
  end
@@ -170,24 +172,24 @@ The first time a job is executed it will save the result. Once the job is done
170
172
  you can re-doit using the `clean` parameter. The `recursive_clean` cleans all
171
173
  the job dependencies recursively.
172
174
 
173
- -h--help Show this help:
174
- -wd--workdir* Change the working directory of the workflow:
175
- -as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default:
176
- -fs--field_separator* Change the character that separates fields of TSV files '\\t' by default:
177
- -jn--jobname* Job name to use. The name 'Default' is used by default:
178
- -pn--printname Print the name of the job and exit without starting it:
179
- -pf--printpath Print the path of the job result:
180
- -cl--clean Clean the last step of the job so that it gets recomputed:
181
- -rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
182
- --fork Run job asyncronously and monitor progress. It monitors detached processes as well:
183
- --detach Run job asyncronously and detach process:
184
- --exec Run job with no persistence:
185
- -O--output* Save job result into file:
186
- -jf--job_file* Output one of the job produced files:
187
- -ljf--list_job_files List all the files produced in that step:
175
+ -h--help Show this help
176
+ -wd--workdir* Change the working directory of the workflow
177
+ -as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
178
+ -fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
179
+ -jn--jobname* Job name to use. The name 'Default' is used by default
180
+ -pn--printname Print the name of the job and exit without starting it
181
+ -pf--printpath Print the path of the job result
182
+ -cl--clean Clean the last step of the job so that it gets recomputed
183
+ -rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
184
+ --fork Run job asyncronously and monitor progress. It monitors detached processes as well
185
+ --detach Run job asyncronously and detach process
186
+ --exec Run job with no persistence
187
+ -O--output* Save job result into file
188
+ -jf--job_file* Output one of the job produced files
189
+ -ljf--list_job_files List all the files produced in that step
188
190
  --load_inputs* Load inputs from a directory
189
- --info Show the job info:
190
- --provenance Report the jobs provenance:
191
+ --info Show the job info
192
+ --provenance Report the jobs provenance
191
193
  -W--workflows* Load a list of workflows
192
194
  -R--requires* Require a list of files
193
195
  EOF
@@ -17,6 +17,43 @@ class TestKnowledgeBaseTraverse < Test::Unit::TestCase
17
17
  assert res.first.include? "?1"
18
18
  end
19
19
 
20
+ def test_traverse2
21
+ rules = []
22
+ rules << "?target =pina SF3B1"
23
+ rules << "?1 pina ?target - Method=MI:0006"
24
+ rules << "TP53 pina ?2"
25
+ rules << "?2 pina ?1"
26
+ res = kb.traverse rules
27
+ assert res.first.include? "?1"
28
+ end
29
+
30
+ def test_traverse3
31
+ rules = []
32
+ rules << "?target = ENSG00000115524"
33
+ rules << "?1 pina ?target - Method=MI:0006"
34
+ rules << "TP53 pina ?2"
35
+ rules << "?2 pina ?1"
36
+ res = kb.traverse rules
37
+ assert res.first.include? "?1"
38
+ end
39
+
40
+
41
+ def test_traverse_acc
42
+ Log.severity = 0
43
+ rules_str=<<-EOF
44
+ ?target{
45
+ ?target pina SF3B1
46
+ }
47
+ ?1 pina TP53
48
+ ?1 pina ?target
49
+ EOF
50
+ rules = rules_str.split "\n"
51
+ res = kb.traverse rules
52
+ iii res
53
+ assert res.first.include? "?1"
54
+ end
55
+
56
+
20
57
  def test_path
21
58
  rules = []
22
59
  rules << "?1 pina ARPC2"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.19.16
4
+ version: 5.19.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-09 00:00:00.000000000 Z
11
+ date: 2016-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake