RubyGems - rbbt-util - Versions diffs - 5.19.16 → 5.19.17 - Mend

rbbt-util 5.19.16 → 5.19.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/lib/rbbt/entity/identifiers.rb +1 -1
data/lib/rbbt/knowledge_base/traverse.rb +69 -17
data/lib/rbbt/persist.rb +5 -0
data/lib/rbbt/tsv/dumper.rb +2 -2
data/lib/rbbt/tsv/util.rb +4 -1
data/lib/rbbt/util/docker.rb +2 -1
data/lib/rbbt/util/misc/concurrent_stream.rb +8 -0
data/lib/rbbt/workflow/accessor.rb +5 -1
data/share/rbbt_commands/stat/density +1 -4
data/share/rbbt_commands/tsv/assemble_pdf_table +25 -12
data/share/rbbt_commands/workflow/task +21 -19
data/test/rbbt/knowledge_base/test_traverse.rb +37 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 30d1f9a2c70b373c0146ba34c922b2577af62b62
-  data.tar.gz: 80a304d5cd33966a92e79f249ec08502e83cd306
+  metadata.gz: 7250e5af7af076517a13c322c1fe43506d9ece3d
+  data.tar.gz: c9ec9ec07c60be34a6f95c9bde5fd2deae7b3bbd
 SHA512:
-  metadata.gz: fbda93383628d898f22a69ba89beedda4fb527287a3beef8428f915fe6441b76cb94d41e19ee507d44a4cf89243bd1379ccdb698589e88e20692f2443c5817fe
-  data.tar.gz: 4888a0652f9f80275ea0aa0773839fb8ba6f58582ba1344c8ae1e5486c8f6839e4af16559272fdd796877a600ba0eec83d947b51d82821e0db6ff5e023f8ee36
+  metadata.gz: ef2ad33be33fbdef8d78a4461d31efbf25171133c553b0be00b67c4e776c5ca225a90609c1d0c5bae80ee4a827d9da1e073955317fca9a7ae4c736c7fa688f50
+  data.tar.gz: 8e98021bdafe0d005fea0047826a5068709dc009e84c941feaae4d9d1d48c4feac291c2915faf70c213ed71560ee74296fe02420c9656b325a38f6becbe0a357

data/lib/rbbt/entity/identifiers.rb CHANGED Viewed

@@ -38,7 +38,7 @@ module Entity
             begin
               index = TSV.translation_index(identifier_files, format, source, :persist => true)
-              raise "No index from #{ source } to #{ format }: #{Misc.fingerprint identifier_files}" if index.nil?
+              raise "No index from #{ Misc.fingerprint source } to #{ Misc.fingerprint format }: #{Misc.fingerprint identifier_files}" if index.nil?
               index.unnamed = true
               index
             rescue

data/lib/rbbt/knowledge_base/traverse.rb CHANGED Viewed

@@ -51,6 +51,7 @@ class KnowledgeBase
       rules.zip(all_matches).each do |rule, matches|
         source, db, target = rule.split /\s+/
+        next if matches.nil?
         if is_wildcard? source
           assigned = assignments[source] || []
@@ -74,19 +75,22 @@ class KnowledgeBase
       rule, *rest = rules
       source, db, target = rule.split /\s+/
+      wildcard_source = is_wildcard? source
+      wildcard_target = is_wildcard? target
       paths = {}
       matches = clean_matches[rule]
       Annotated.purge(matches).each do |match|
         new_assignments = nil
         match_source, _sep, match_target = match.partition "~"
-        if is_wildcard? source
+        if wildcard_source
           next if assignments[source] and assignments[source]  != match_source
           new_assignments ||= assignments.dup
           new_assignments[source] = match_source
         end
-        if is_wildcard? target
+        if wildcard_target
           next if assignments[target] and assignments[target]  != match_target
           new_assignments ||= assignments.dup
           new_assignments[target] = match_target
@@ -136,6 +140,7 @@ class KnowledgeBase
       source_entities, target_entities = identify db, source, target
       options = {:source => source_entities, :target => target_entities}
+      Log.debug "Traversing #{ db }: #{Misc.fingerprint options}"
       matches = kb.subset(db, options)
       if conditions
@@ -152,29 +157,41 @@ class KnowledgeBase
       matches
     end
+    def id_dbs(db)
+      if db.include? '?'
+        all_dbs = kb.registry.keys
+        _name, _sep, _kb = db.partition("@")
+        case
+        when _name[0] == '?'
+          dbs = all_dbs.select{|_db|
+            n,_s,d=_db.partition("@");
+            d.nil? or d.empty? or (d == _kd and assignments[_name].include?(n))
+          }
+        when _kb[0] == '?'
+          dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
+        end
+      else
+        dbs = [db]
+      end
+      dbs
+    end
     def traverse
       all_matches = []
+      path_rules = []
+      acc_var = nil
       rules.each do |rule|
         rule = rule.strip
         next if rule.empty?
-        if m = rule.match(/([^\s]+)\s+([^\s]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
+        if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
+          Log.debug "Traverse rule: #{rule}"
+          path_rules << rule
           source, db, target, conditions = m.captures
-          if db.include? '?'
-            all_dbs = kb.registry.keys
-            _name, _sep, _kb = db.partition("@")
-            case
-            when _kb[0] == '?'
-              dbs = all_dbs.select{|_db| _db.partition("@").first == _name}
-            when _name[0] == '?'
-              dbs = all_dbs.select{|_db| _db.include?("@") ? db.partition("@").last == _kb : true}
-            end
-          else
-            dbs = [db]
-          end
+          dbs = id_dbs(db)
           rule_matches = []
           dbs.each do |_db|
@@ -197,17 +214,52 @@ class KnowledgeBase
             matches.each do |m|
               rule_matches << m
             end
+            assignments.each{|k,v| v.uniq! if v}
           end
           reassign rule_matches, source, target
           all_matches << rule_matches
+        elsif m = rule.match(/([^\s=]+)\s*=([^\s]*)\s*(.*)/)
+          Log.debug "Assign rule: #{rule}"
+          var, db, value_str = m.captures
+          names = value_str.split(",").collect{|v| v.strip}
+          if db.empty?
+            ids = names
+          else
+            dbs = id_dbs(db)
+            ids = names.collect{|name|
+              id = nil
+              dbs.each do |db|
+                sid, tid = identify db, name, name
+                id = (sid + tid).compact.first
+                break if id
+              end
+              id
+            }
+          end
+          assignments[var] = ids
+        elsif m = rule.match(/(\?[^\s{]+)\s*{/)
+          acc_var = m.captures.first
+          Log.debug "Start assign block: #{acc_var}"
+        elsif m = rule.match(/^\s*}\s*$/)
+          Log.debug "Close assign block: #{acc_var}"
+          saved_assign = assignments[acc_var]
+          assignments.clear
+          assignments[acc_var] = saved_assign
+          all_matches = []
+          path_rules = []
         else
           raise "Rule not understood: #{rule}"
         end
       end
-      paths = find_paths rules, all_matches, assignments
+      Log.debug "Finding paths: #{all_matches.length}"
+      paths = find_paths path_rules, all_matches, assignments
+      Log.debug "Found paths: #{paths.length}"
       [assignments, paths]
     end

data/lib/rbbt/persist.rb CHANGED Viewed

@@ -237,6 +237,11 @@ module Persist
         callback = stream.respond_to?(:callback)? stream.callback : nil
         abort_callback = stream.respond_to?(:abort_callback)? stream.abort_callback : nil
+        # This is to avoid calling the callbacks twice, since they have been
+        # moved to the new 'res' stream
+        stream.callback = nil
+        stream.abort_callback = nil
         res = tee_stream(stream, path, type, callback, abort_callback, lockfile)
         res.lockfile = lockfile

data/lib/rbbt/tsv/dumper.rb CHANGED Viewed

@@ -38,11 +38,11 @@ module TSV
       end
     end
-    def init
+    def init(init_options = {})
       options = @options.dup
       key_field, fields = Misc.process_options options, :key_field, :fields
-      str = TSV.header_lines(key_field, fields, options)
+      str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
       Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?

data/lib/rbbt/tsv/util.rb CHANGED Viewed

@@ -179,14 +179,17 @@ module TSV
     if Hash === entry_hash
       sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
       preamble = entry_hash[:preamble]
+      header_hash = entry_hash[:header_hash]
     end
+    header_hash = "#" if header_hash.nil?
     preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
     str = ""
     str << preamble.strip << "\n" if preamble and not preamble.empty?
     if fields
-      str << "#" << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
+      str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
     end
     str

data/lib/rbbt/util/docker.rb CHANGED Viewed

@@ -1,8 +1,9 @@
 module Docker
-  def self.run(image,cmd, options)
+  def self.run(image, cmd, options)
     mounts, job_inputs, directory, pipe = Misc.process_options options, :mounts, :job_inputs, :directory, :pipe
     if mounts
+      mounts.each{|t,s| FileUtils.mkdir_p s unless File.exists? s}
       mount_cmd = mounts.sort.collect{|t,s| "-v " + ["'" + s + "'", "'" + t + "'"] * ":" } * " "
     else
       mount_cmd = ""

data/lib/rbbt/util/misc/concurrent_stream.rb CHANGED Viewed

@@ -171,4 +171,12 @@ module ConcurrentStream
     end
   end
+  def add_callback(&block)
+    old_callback = callback
+    @callback = Proc.new do
+      old_callback.call if old_callback
+      block.call
+    end
+  end
 end

data/lib/rbbt/workflow/accessor.rb CHANGED Viewed

@@ -295,7 +295,11 @@ class Step
     return nil if info[:pid].nil?
     pid = @pid || info[:pid]
-    return Misc.pid_exists?(pid)
+    if Misc.pid_exists?(pid)
+      pid
+    else
+      false
+    end
   end
   def error?

data/share/rbbt_commands/stat/density CHANGED Viewed

@@ -1,3 +1,4 @@
+#!/usr/bin/env ruby
 require 'rbbt/util/R'
@@ -8,10 +9,6 @@ Calculate density
 $ rbbt stat density <file>
-Display summary information. Works with Tokyocabinet HDB and BDB as well.
--tch--tokyocabinet File is a TC HDB
--tcb--tokyocabinet_bd File is a TC BDB
 -h--help Help
 EOF

data/share/rbbt_commands/tsv/assemble_pdf_table CHANGED Viewed

@@ -13,14 +13,18 @@ $ rbbt tsv assemble_pdf_table file.txt
 When extracting tables from PDF they are often laid out one column at a time, divided by pages.
 This command takes a file with the following structure:
-1 A few lines containing table headers, one per line
-2 A group of lines containing the values for the first column of the first page, ending in an empty line
-3 More groups of lines corresponding to other columns
-4 Repetitions of 2 and 3 for more pages
+1. A few lines containing table headers, one per line
+2. A group of lines containing the values for the first column of the first page, ending in an empty line
+3. More groups of lines corresponding to other columns
+4. Repetitions of 2 and 3 for more pages
 This script will take care of matching the columns read with the headers specified
 -h--help Help
+-r--row Each block of lines is a row, not a column
 EOF
 SOPT.usage if options[:help]
@@ -50,19 +54,28 @@ while lines and lines.any?
   lines = lines[block_size+1..-1]
   columns[0] << first_block
   (1..num_columns-1).each do |pos|
+    next if lines.nil?
     block = lines[0..block_size-1]
     lines = lines[block_size+1..-1]
     columns[pos] << block
   end
 end
-full_columns = []
-num_columns.times do |i|
-  column = columns[i]
-  full_columns << column.flatten
-end
+if options[:row]
+  columns.each do |n,list|
+    list.each do |values|
+      puts values * "\t"
+    end
+  end
+else
+  full_columns = []
+  num_columns.times do |i|
+    column = columns[i]
+    full_columns << column.flatten
+  end
-puts "#" << fields * "\t"
-Misc.zip_fields(full_columns).zip do |values|
-  puts values * "\t"
+  puts "#" << fields * "\t"
+  Misc.zip_fields(full_columns).zip do |values|
+    puts values * "\t"
+  end
 end

data/share/rbbt_commands/workflow/task CHANGED Viewed

@@ -31,8 +31,10 @@ def usage(workflow = nil, task = nil, exception=nil)
   else
     puts Log.color :magenta, workflow.to_s
     puts Log.color :magenta, "=" * workflow.to_s.length
-    puts
-    puts workflow.documentation[:description]
+    if workflow.documentation[:description] and not workflow.documentation[:description].empty?
+      puts
+      puts workflow.documentation[:description]
+    end
     puts
     workflow.doc(task)
   end
@@ -170,24 +172,24 @@ The first time a job is executed it will save the result. Once the job is done
 you can re-doit using the `clean` parameter. The `recursive_clean` cleans all
 the job dependencies recursively.
--h--help Show this help:
--wd--workdir* Change the working directory of the workflow:
--as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default:
--fs--field_separator* Change the character that separates fields of TSV files '\\t' by default:
--jn--jobname* Job name to use. The name 'Default' is used by default:
--pn--printname Print the name of the job and exit without starting it:
--pf--printpath Print the path of the job result:
--cl--clean Clean the last step of the job so that it gets recomputed:
--rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely:
---fork Run job asyncronously and monitor progress. It monitors detached processes as well:
---detach Run job asyncronously and detach process:
---exec Run job with no persistence:
--O--output* Save job result into file:
--jf--job_file* Output one of the job produced files:
--ljf--list_job_files List all the files produced in that step:
+-h--help Show this help
+-wd--workdir* Change the working directory of the workflow
+-as--array_separator* Change the character that separates elements of Arrays, ',', '|', or '\\n' by default
+-fs--field_separator* Change the character that separates fields of TSV files '\\t' by default
+-jn--jobname* Job name to use. The name 'Default' is used by default
+-pn--printname Print the name of the job and exit without starting it
+-pf--printpath Print the path of the job result
+-cl--clean Clean the last step of the job so that it gets recomputed
+-rcl--recursive_clean Clean the last step and its dependencies to recompute the job completely
+--fork Run job asyncronously and monitor progress. It monitors detached processes as well
+--detach Run job asyncronously and detach process
+--exec Run job with no persistence
+-O--output* Save job result into file
+-jf--job_file* Output one of the job produced files
+-ljf--list_job_files List all the files produced in that step
 --load_inputs* Load inputs from a directory
---info Show the job info:
---provenance Report the jobs provenance:
+--info Show the job info
+--provenance Report the jobs provenance
 -W--workflows* Load a list of workflows
 -R--requires* Require a list of files
 EOF

data/test/rbbt/knowledge_base/test_traverse.rb CHANGED Viewed

@@ -17,6 +17,43 @@ class TestKnowledgeBaseTraverse < Test::Unit::TestCase
     assert res.first.include? "?1"
   end
+  def test_traverse2
+    rules = []
+    rules << "?target =pina SF3B1"
+    rules << "?1 pina ?target - Method=MI:0006"
+    rules << "TP53 pina ?2"
+    rules << "?2 pina ?1"
+    res =  kb.traverse rules
+    assert res.first.include? "?1"
+  end
+  def test_traverse3
+    rules = []
+    rules << "?target = ENSG00000115524"
+    rules << "?1 pina ?target - Method=MI:0006"
+    rules << "TP53 pina ?2"
+    rules << "?2 pina ?1"
+    res =  kb.traverse rules
+    assert res.first.include? "?1"
+  end
+  def test_traverse_acc
+    Log.severity = 0
+    rules_str=<<-EOF
+?target{
+  ?target pina SF3B1
+}
+?1 pina TP53
+?1 pina ?target
+    EOF
+    rules = rules_str.split "\n"
+    res =  kb.traverse rules
+    iii res
+    assert res.first.include? "?1"
+  end
   def test_path
     rules = []
     rules << "?1 pina ARPC2"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rbbt-util
 version: !ruby/object:Gem::Version
-  version: 5.19.16
+  version: 5.19.17
 platform: ruby
 authors:
 - Miguel Vazquez
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-02-09 00:00:00.000000000 Z
+date: 2016-02-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake