rbbt-util 5.19.18 → 5.19.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64ccfc0af909dbc006460786c31cd03ecb5a09d4
4
- data.tar.gz: 221e3508c946239991fbb3fa8e4f9a4649cc954a
3
+ metadata.gz: a1c106d7f3ec1535abec1883fd465cee9c60ce1f
4
+ data.tar.gz: 31d9d1fc96bf50f6fa6c9f5b566d82fb10bc9f06
5
5
  SHA512:
6
- metadata.gz: 585970e6c3d50f7edd9b9188e5e45a7938b821bd0d6ed69b03b08b8fd6ee893cdff167d7804005ea96c7e5f5f3dea68445e4755d6be8724b3a8d5e03e31e94e3
7
- data.tar.gz: 3f3d63792bdd57aee6b3e7b863c7af2ed65e77706edcc55b1fda8b185dc0207bfd2e193b197d67f91bbf5afc7c8fbf0bc384654f4aeb2c510f5fde11d49e0ce5
6
+ metadata.gz: b7f0d24616ceed78fa2671f40ce3e0a9221b6739a0a65f09783dece72d74b661ca54f480b930059bda643c446f1a28e9c81b9deba534fe09ae23dd0a8f8955f8
7
+ data.tar.gz: 0802e21fe07e9f3fedfe244c3456106edfb74733cfbad4e092faea47e1734faa212605169222e7bccadc68d05c7f9a3794912af4c294ff462d1a246bf8db8f7b
@@ -29,7 +29,7 @@ module Association
29
29
 
30
30
  key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
31
31
 
32
- TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
32
+ TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list, :namespace => database.namespace)
33
33
 
34
34
  data.key_field = key_field
35
35
  data.fields = fields[1..-1]
@@ -69,6 +69,8 @@ module Association
69
69
 
70
70
  annotations.each do |target, info|
71
71
  next if target.nil? or target.empty?
72
+ source.gsub!('~','-..-')
73
+ target.gsub!('~','-..-')
72
74
  key = [source, target] * "~"
73
75
 
74
76
  if data[key].nil? or info.nil?
@@ -83,7 +83,8 @@ module AssociationItem
83
83
  end
84
84
  property :value => :array2single do
85
85
  index = index(database)
86
- value = (reverse ? index.reverse : index).chunked_values_at self
86
+ #value = (reverse ? index.reverse : index).chunked_values_at self
87
+ value = index.chunked_values_at self
87
88
  value.collect{|v| NamedArray.setup(v, index.fields)}
88
89
  end
89
90
 
@@ -23,6 +23,8 @@ class KnowledgeBase
23
23
  IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
24
24
  options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
25
25
  options[:format] = @format[type] if @format.include? :type
26
+ namespace = self.namespace
27
+ namespace = db_namespace(database_name) if namespace.nil? and database_name
26
28
  options = {:organism => namespace}.merge(options)
27
29
  if database_name
28
30
  database = get_database(database_name)
@@ -67,12 +69,17 @@ class KnowledgeBase
67
69
  get_database(name).identifier_files.dup
68
70
  end
69
71
 
72
+ def db_namespace(name)
73
+ get_database(name).namespace
74
+ end
75
+
70
76
  def source_index(name)
71
77
  Persist.memory("Source index #{name}: KB directory #{dir}") do
72
78
  identifier_files = identifier_files(name)
73
79
  identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
74
80
  identifier_files.uniq!
75
81
  identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
82
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
76
83
  identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
77
84
  TSV.translation_index identifier_files, source(name), nil, :persist => true
78
85
  end
@@ -83,7 +90,8 @@ class KnowledgeBase
83
90
  identifier_files = identifier_files(name)
84
91
  identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
85
92
  identifier_files.uniq!
86
- identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
93
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
94
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
87
95
  identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
88
96
  TSV.translation_index identifier_files, target(name), nil, :persist => true
89
97
  end
@@ -60,7 +60,9 @@ class KnowledgeBase
60
60
 
61
61
  def parents(name, entity)
62
62
  entity = identify_target(name, entity)
63
- setup(name, _parents(name, entity), true)
63
+ matches = _parents(name, entity)
64
+ matches.each{|m| m.replace(m.partition("~").reverse*"") } unless undirected(name)
65
+ setup(name, matches, true)
64
66
  end
65
67
 
66
68
  def _neighbours(name, entity)
@@ -9,7 +9,7 @@ class KnowledgeBase
9
9
  Log.debug("Registering #{ name } from code block")
10
10
  @registry[name] = [block, options]
11
11
  else
12
- Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
12
+ Log.debug("Registering #{ name }: #{ Misc.fingerprint file } #{Misc.fingerprint options}")
13
13
  @registry[name] = [file, options]
14
14
  end
15
15
  end
@@ -40,20 +40,26 @@ class KnowledgeBase
40
40
 
41
41
  def get_index(name, options = {})
42
42
  name = name.to_s
43
- options[:organism] ||= options[:namespace] ||= self.namespace
44
43
  @indices[[name, options]] ||=
45
44
  begin
46
- fp = Misc.hash2md5(options)
47
- key = name.to_s + "_" + fp
45
+ if options.empty?
46
+ key = name.to_s
47
+ else
48
+ fp = Misc.hash2md5(options)
49
+ key = name.to_s + "_" + fp
50
+ end
48
51
 
49
52
  Persist.memory("Index:" << [key, dir] * "@") do
50
53
  options = options.dup
54
+
55
+ options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
56
+
51
57
  persist_dir = dir
52
58
  persist_file = persist_dir[key].find
53
59
  file, registered_options = registry[name]
54
60
 
55
61
  options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
56
- options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true
62
+ options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :format => format, :persist => true
57
63
 
58
64
  if entity_options
59
65
  options[:entity_options] ||= {}
@@ -75,7 +81,7 @@ class KnowledgeBase
75
81
  Association.index(file, options, persist_options.dup)
76
82
  end
77
83
 
78
- index.namespace = self.namespace
84
+ index.namespace = self.namespace unless self.namespace
79
85
 
80
86
  index
81
87
  end
@@ -84,19 +90,30 @@ class KnowledgeBase
84
90
 
85
91
  def get_database(name, options = {})
86
92
  name = name.to_s
87
- options[:organism] ||= options[:namespace] ||= self.namespace
93
+
88
94
  @databases[[name, options]] ||=
89
95
  begin
90
96
  fp = Misc.fingerprint([name,options])
91
- key = name.to_s + "_" + Misc.digest(fp) + '.database'
97
+
98
+ if options.empty?
99
+ key = name.to_s
100
+ else
101
+ fp = Misc.hash2md5(options)
102
+ key = name.to_s + "_" + fp
103
+ end
104
+
105
+ key += '.database'
92
106
  Persist.memory("Database:" << [key, dir] * "@") do
93
107
  options = options.dup
108
+
109
+ options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
110
+
94
111
  persist_dir = dir
95
112
  persist_file = persist_dir[key].find
96
113
  file, registered_options = registry[name]
97
114
 
98
115
  options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
99
- options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true
116
+ options = Misc.add_defaults options, :persist_file => persist_file, :format => format, :persist => true
100
117
 
101
118
  if entity_options
102
119
  options[:entity_options] ||= {}
@@ -118,7 +135,7 @@ class KnowledgeBase
118
135
  Association.open(file, options, persist_options)
119
136
  end
120
137
 
121
- database.namespace = self.namespace
138
+ database.namespace = self.namespace if self.namespace
122
139
 
123
140
  database
124
141
  end
@@ -129,4 +146,34 @@ class KnowledgeBase
129
146
  get_index(name).fields
130
147
  end
131
148
 
149
+ def produce(name, *rest,&block)
150
+ register(name, *rest, &block)
151
+ get_index(name)
152
+ end
153
+
154
+ def info(name)
155
+
156
+ source = self.source(name)
157
+ target = self.target(name)
158
+ source_type = self.source_type(name)
159
+ target_type = self.target_type(name)
160
+ fields = self.fields(name)
161
+ source_entity_options = self.entity_options_for source_type, name
162
+ target_entity_options = self.entity_options_for target_type, name
163
+ undirected = self.undirected(name) == 'undirected'
164
+
165
+ info = {
166
+ :source => source,
167
+ :target => target,
168
+ :source_type => source_type,
169
+ :target_type => target_type,
170
+ :source_entity_options => source_entity_options,
171
+ :target_entity_options => target_entity_options,
172
+ :fields => fields,
173
+ :undirected => undirected,
174
+ }
175
+
176
+ info
177
+ end
178
+
132
179
  end
@@ -25,6 +25,10 @@ class KnowledgeBase
25
25
  @databases = {}
26
26
  end
27
27
 
28
+ def self.load(dir)
29
+ KnowledgeBase.new dir
30
+ end
31
+
28
32
  def setup(name, matches, reverse = false)
29
33
  AssociationItem.setup matches, self, name, reverse
30
34
  end
@@ -633,15 +633,17 @@ module TSV
633
633
  break
634
634
  end
635
635
 
636
+ filename = Path === filename ? filename.find : (filename || "No filename")
637
+ filename + " [" + persistence_path + "]" if respond_to?(:persistence_path) and persistence_path
636
638
  with_unnamed do
637
639
  <<-EOF
638
- Filename = #{Path === filename ? filename.find : (filename || "No filename")}
640
+ Filename = #{filename}
639
641
  Key field = #{key_field || "*No key field*"}
640
642
  Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
641
643
  Type = #{type}
642
644
  Serializer = #{serializer.inspect}
643
645
  Size = #{size}
644
- namespace = #{namespace}
646
+ namespace = #{Misc.fingerprint namespace}
645
647
  identifiers = #{Misc.fingerprint identifiers}
646
648
  Example:
647
649
  - #{key} -- #{Misc.fingerprint values }
@@ -8,6 +8,8 @@ module TSV
8
8
 
9
9
  identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
10
10
 
11
+ identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
12
+
11
13
  if not tsv.fields.include? format
12
14
  new = {}
13
15
  tsv.each do |k,v|
data/lib/rbbt/tsv/util.rb CHANGED
@@ -61,6 +61,7 @@ module TSV
61
61
  fields = nil
62
62
  tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
63
63
  names.zip(fields).each do |list, format|
64
+ list = [list] unless Array === list
64
65
  list.delete_if do |name| name.empty? end
65
66
  next if list.empty?
66
67
  text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
@@ -255,7 +256,7 @@ module TSV
255
256
  new = {}
256
257
  case type
257
258
  when :double
258
- self
259
+ return self
259
260
  when :flat
260
261
  through do |k,v|
261
262
  new[k] = [v]
@@ -81,7 +81,11 @@ module R
81
81
  args << "--RS-pidfile"
82
82
  args << "'#{pid_file}'"
83
83
 
84
- bin_path = File.join(ENV["R_HOME"], "bin/Rserve")
84
+ if ENV["R_HOME"]
85
+ bin_path = File.join(ENV["R_HOME"], "bin/Rserve")
86
+ else
87
+ bin_path = "Rserve"
88
+ end
85
89
  cmd = bin_path + " " + args*" "
86
90
  $stdout.reopen File.new('/dev/null', 'w')
87
91
  exec(ENV, cmd)
@@ -14,6 +14,7 @@ Display summary information. Works with Tokyocabinet HDB and BDB as well.
14
14
 
15
15
  -hh--header_hash* Change the character used to mark the header line (defaults to #)
16
16
  -f--field* Field to sort by (name or number)
17
+ -a--absolute Sort as absolute values
17
18
  -h--help Help
18
19
  EOF
19
20
 
@@ -23,19 +24,33 @@ file = ARGV.shift
23
24
 
24
25
  file = STDIN if file == '-' or file.nil?
25
26
 
26
- field = options[:field] || '2'
27
- if field =~ /^\d+$/
28
- field_pos = field.to_i
27
+ if options[:absolute]
28
+ tsv = TSV.open(file, options)
29
+ dumper = TSV::Dumper.new tsv.options
30
+ dumper.init
31
+ field = options[:field] || 1
32
+ keys = tsv.sort(field) do |a| a = a.first if Array === a; a.to_f.abs end
33
+ TSV.traverse keys, :type => :array, :into => dumper do |key|
34
+ values = tsv[key]
35
+ [key,values]
36
+ end
37
+ stream = dumper.stream
29
38
  else
30
- parser = TSV.parse_header(file, options)
31
- field_pos = parser.all_fields.index(field) + 1
32
- saved_line = parser.first_line
33
- end
39
+ field = options[:field] || '2'
40
+
41
+ if field =~ /^\d+$/
42
+ field_pos = field.to_i
43
+ else
44
+ parser = TSV.parse_header(file, options)
45
+ field_pos = parser.all_fields.index(field) + 1
46
+ saved_line = parser.first_line
47
+ end
34
48
 
35
- rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
49
+ rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
36
50
 
37
- file_io = TSV.get_stream(file)
38
- stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
51
+ file_io = TSV.get_stream(file)
52
+ stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
53
+ end
39
54
 
40
55
  begin
41
56
  while line = stream.gets
@@ -111,5 +111,17 @@ class TestKnowledgeBase < Test::Unit::TestCase
111
111
  end
112
112
  end
113
113
  end
114
+
115
+ def test_knowledge_base_reuse
116
+ organism = Organism.default_code("Hsa")
117
+ Log.severity = 0
118
+ TmpFile.with_file(nil, false) do |tmpdir|
119
+ Path.setup(tmpdir)
120
+ Association.index(TFacts.regulators, :persist_file => tmpdir.tfacts, :format => {"Gene" => "Ensembl Gene ID"}, :namespace => Organism.default_code("Hsa"))
121
+
122
+ kb = KnowledgeBase.load(tmpdir)
123
+ assert kb.identify_source('tfacts', "TP53") =~ /ENSG/
124
+ end
125
+ end
114
126
  end
115
127
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.19.18
4
+ version: 5.19.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-27 00:00:00.000000000 Z
11
+ date: 2016-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake