rbbt-util 5.19.18 → 5.19.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64ccfc0af909dbc006460786c31cd03ecb5a09d4
4
- data.tar.gz: 221e3508c946239991fbb3fa8e4f9a4649cc954a
3
+ metadata.gz: a1c106d7f3ec1535abec1883fd465cee9c60ce1f
4
+ data.tar.gz: 31d9d1fc96bf50f6fa6c9f5b566d82fb10bc9f06
5
5
  SHA512:
6
- metadata.gz: 585970e6c3d50f7edd9b9188e5e45a7938b821bd0d6ed69b03b08b8fd6ee893cdff167d7804005ea96c7e5f5f3dea68445e4755d6be8724b3a8d5e03e31e94e3
7
- data.tar.gz: 3f3d63792bdd57aee6b3e7b863c7af2ed65e77706edcc55b1fda8b185dc0207bfd2e193b197d67f91bbf5afc7c8fbf0bc384654f4aeb2c510f5fde11d49e0ce5
6
+ metadata.gz: b7f0d24616ceed78fa2671f40ce3e0a9221b6739a0a65f09783dece72d74b661ca54f480b930059bda643c446f1a28e9c81b9deba534fe09ae23dd0a8f8955f8
7
+ data.tar.gz: 0802e21fe07e9f3fedfe244c3456106edfb74733cfbad4e092faea47e1734faa212605169222e7bccadc68d05c7f9a3794912af4c294ff462d1a246bf8db8f7b
@@ -29,7 +29,7 @@ module Association
29
29
 
30
30
  key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
31
31
 
32
- TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
32
+ TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list, :namespace => database.namespace)
33
33
 
34
34
  data.key_field = key_field
35
35
  data.fields = fields[1..-1]
@@ -69,6 +69,8 @@ module Association
69
69
 
70
70
  annotations.each do |target, info|
71
71
  next if target.nil? or target.empty?
72
+ source.gsub!('~','-..-')
73
+ target.gsub!('~','-..-')
72
74
  key = [source, target] * "~"
73
75
 
74
76
  if data[key].nil? or info.nil?
@@ -83,7 +83,8 @@ module AssociationItem
83
83
  end
84
84
  property :value => :array2single do
85
85
  index = index(database)
86
- value = (reverse ? index.reverse : index).chunked_values_at self
86
+ #value = (reverse ? index.reverse : index).chunked_values_at self
87
+ value = index.chunked_values_at self
87
88
  value.collect{|v| NamedArray.setup(v, index.fields)}
88
89
  end
89
90
 
@@ -23,6 +23,8 @@ class KnowledgeBase
23
23
  IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
24
24
  options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
25
25
  options[:format] = @format[type] if @format.include? :type
26
+ namespace = self.namespace
27
+ namespace = db_namespace(database_name) if namespace.nil? and database_name
26
28
  options = {:organism => namespace}.merge(options)
27
29
  if database_name
28
30
  database = get_database(database_name)
@@ -67,12 +69,17 @@ class KnowledgeBase
67
69
  get_database(name).identifier_files.dup
68
70
  end
69
71
 
72
+ def db_namespace(name)
73
+ get_database(name).namespace
74
+ end
75
+
70
76
  def source_index(name)
71
77
  Persist.memory("Source index #{name}: KB directory #{dir}") do
72
78
  identifier_files = identifier_files(name)
73
79
  identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
74
80
  identifier_files.uniq!
75
81
  identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
82
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
76
83
  identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
77
84
  TSV.translation_index identifier_files, source(name), nil, :persist => true
78
85
  end
@@ -83,7 +90,8 @@ class KnowledgeBase
83
90
  identifier_files = identifier_files(name)
84
91
  identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
85
92
  identifier_files.uniq!
86
- identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
93
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
94
+ identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
87
95
  identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
88
96
  TSV.translation_index identifier_files, target(name), nil, :persist => true
89
97
  end
@@ -60,7 +60,9 @@ class KnowledgeBase
60
60
 
61
61
  def parents(name, entity)
62
62
  entity = identify_target(name, entity)
63
- setup(name, _parents(name, entity), true)
63
+ matches = _parents(name, entity)
64
+ matches.each{|m| m.replace(m.partition("~").reverse*"") } unless undirected(name)
65
+ setup(name, matches, true)
64
66
  end
65
67
 
66
68
  def _neighbours(name, entity)
@@ -9,7 +9,7 @@ class KnowledgeBase
9
9
  Log.debug("Registering #{ name } from code block")
10
10
  @registry[name] = [block, options]
11
11
  else
12
- Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
12
+ Log.debug("Registering #{ name }: #{ Misc.fingerprint file } #{Misc.fingerprint options}")
13
13
  @registry[name] = [file, options]
14
14
  end
15
15
  end
@@ -40,20 +40,26 @@ class KnowledgeBase
40
40
 
41
41
  def get_index(name, options = {})
42
42
  name = name.to_s
43
- options[:organism] ||= options[:namespace] ||= self.namespace
44
43
  @indices[[name, options]] ||=
45
44
  begin
46
- fp = Misc.hash2md5(options)
47
- key = name.to_s + "_" + fp
45
+ if options.empty?
46
+ key = name.to_s
47
+ else
48
+ fp = Misc.hash2md5(options)
49
+ key = name.to_s + "_" + fp
50
+ end
48
51
 
49
52
  Persist.memory("Index:" << [key, dir] * "@") do
50
53
  options = options.dup
54
+
55
+ options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
56
+
51
57
  persist_dir = dir
52
58
  persist_file = persist_dir[key].find
53
59
  file, registered_options = registry[name]
54
60
 
55
61
  options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
56
- options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true
62
+ options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :format => format, :persist => true
57
63
 
58
64
  if entity_options
59
65
  options[:entity_options] ||= {}
@@ -75,7 +81,7 @@ class KnowledgeBase
75
81
  Association.index(file, options, persist_options.dup)
76
82
  end
77
83
 
78
- index.namespace = self.namespace
84
+ index.namespace = self.namespace unless self.namespace
79
85
 
80
86
  index
81
87
  end
@@ -84,19 +90,30 @@ class KnowledgeBase
84
90
 
85
91
  def get_database(name, options = {})
86
92
  name = name.to_s
87
- options[:organism] ||= options[:namespace] ||= self.namespace
93
+
88
94
  @databases[[name, options]] ||=
89
95
  begin
90
96
  fp = Misc.fingerprint([name,options])
91
- key = name.to_s + "_" + Misc.digest(fp) + '.database'
97
+
98
+ if options.empty?
99
+ key = name.to_s
100
+ else
101
+ fp = Misc.hash2md5(options)
102
+ key = name.to_s + "_" + fp
103
+ end
104
+
105
+ key += '.database'
92
106
  Persist.memory("Database:" << [key, dir] * "@") do
93
107
  options = options.dup
108
+
109
+ options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
110
+
94
111
  persist_dir = dir
95
112
  persist_file = persist_dir[key].find
96
113
  file, registered_options = registry[name]
97
114
 
98
115
  options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
99
- options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true
116
+ options = Misc.add_defaults options, :persist_file => persist_file, :format => format, :persist => true
100
117
 
101
118
  if entity_options
102
119
  options[:entity_options] ||= {}
@@ -118,7 +135,7 @@ class KnowledgeBase
118
135
  Association.open(file, options, persist_options)
119
136
  end
120
137
 
121
- database.namespace = self.namespace
138
+ database.namespace = self.namespace if self.namespace
122
139
 
123
140
  database
124
141
  end
@@ -129,4 +146,34 @@ class KnowledgeBase
129
146
  get_index(name).fields
130
147
  end
131
148
 
149
+ def produce(name, *rest,&block)
150
+ register(name, *rest, &block)
151
+ get_index(name)
152
+ end
153
+
154
+ def info(name)
155
+
156
+ source = self.source(name)
157
+ target = self.target(name)
158
+ source_type = self.source_type(name)
159
+ target_type = self.target_type(name)
160
+ fields = self.fields(name)
161
+ source_entity_options = self.entity_options_for source_type, name
162
+ target_entity_options = self.entity_options_for target_type, name
163
+ undirected = self.undirected(name) == 'undirected'
164
+
165
+ info = {
166
+ :source => source,
167
+ :target => target,
168
+ :source_type => source_type,
169
+ :target_type => target_type,
170
+ :source_entity_options => source_entity_options,
171
+ :target_entity_options => target_entity_options,
172
+ :fields => fields,
173
+ :undirected => undirected,
174
+ }
175
+
176
+ info
177
+ end
178
+
132
179
  end
@@ -25,6 +25,10 @@ class KnowledgeBase
25
25
  @databases = {}
26
26
  end
27
27
 
28
+ def self.load(dir)
29
+ KnowledgeBase.new dir
30
+ end
31
+
28
32
  def setup(name, matches, reverse = false)
29
33
  AssociationItem.setup matches, self, name, reverse
30
34
  end
@@ -633,15 +633,17 @@ module TSV
633
633
  break
634
634
  end
635
635
 
636
+ filename = Path === filename ? filename.find : (filename || "No filename")
637
+ filename + " [" + persistence_path + "]" if respond_to?(:persistence_path) and persistence_path
636
638
  with_unnamed do
637
639
  <<-EOF
638
- Filename = #{Path === filename ? filename.find : (filename || "No filename")}
640
+ Filename = #{filename}
639
641
  Key field = #{key_field || "*No key field*"}
640
642
  Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
641
643
  Type = #{type}
642
644
  Serializer = #{serializer.inspect}
643
645
  Size = #{size}
644
- namespace = #{namespace}
646
+ namespace = #{Misc.fingerprint namespace}
645
647
  identifiers = #{Misc.fingerprint identifiers}
646
648
  Example:
647
649
  - #{key} -- #{Misc.fingerprint values }
@@ -8,6 +8,8 @@ module TSV
8
8
 
9
9
  identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
10
10
 
11
+ identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
12
+
11
13
  if not tsv.fields.include? format
12
14
  new = {}
13
15
  tsv.each do |k,v|
data/lib/rbbt/tsv/util.rb CHANGED
@@ -61,6 +61,7 @@ module TSV
61
61
  fields = nil
62
62
  tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
63
63
  names.zip(fields).each do |list, format|
64
+ list = [list] unless Array === list
64
65
  list.delete_if do |name| name.empty? end
65
66
  next if list.empty?
66
67
  text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
@@ -255,7 +256,7 @@ module TSV
255
256
  new = {}
256
257
  case type
257
258
  when :double
258
- self
259
+ return self
259
260
  when :flat
260
261
  through do |k,v|
261
262
  new[k] = [v]
@@ -81,7 +81,11 @@ module R
81
81
  args << "--RS-pidfile"
82
82
  args << "'#{pid_file}'"
83
83
 
84
- bin_path = File.join(ENV["R_HOME"], "bin/Rserve")
84
+ if ENV["R_HOME"]
85
+ bin_path = File.join(ENV["R_HOME"], "bin/Rserve")
86
+ else
87
+ bin_path = "Rserve"
88
+ end
85
89
  cmd = bin_path + " " + args*" "
86
90
  $stdout.reopen File.new('/dev/null', 'w')
87
91
  exec(ENV, cmd)
@@ -14,6 +14,7 @@ Display summary information. Works with Tokyocabinet HDB and BDB as well.
14
14
 
15
15
  -hh--header_hash* Change the character used to mark the header line (defaults to #)
16
16
  -f--field* Field to sort by (name or number)
17
+ -a--absolute Sort as absolute values
17
18
  -h--help Help
18
19
  EOF
19
20
 
@@ -23,19 +24,33 @@ file = ARGV.shift
23
24
 
24
25
  file = STDIN if file == '-' or file.nil?
25
26
 
26
- field = options[:field] || '2'
27
- if field =~ /^\d+$/
28
- field_pos = field.to_i
27
+ if options[:absolute]
28
+ tsv = TSV.open(file, options)
29
+ dumper = TSV::Dumper.new tsv.options
30
+ dumper.init
31
+ field = options[:field] || 1
32
+ keys = tsv.sort(field) do |a| a = a.first if Array === a; a.to_f.abs end
33
+ TSV.traverse keys, :type => :array, :into => dumper do |key|
34
+ values = tsv[key]
35
+ [key,values]
36
+ end
37
+ stream = dumper.stream
29
38
  else
30
- parser = TSV.parse_header(file, options)
31
- field_pos = parser.all_fields.index(field) + 1
32
- saved_line = parser.first_line
33
- end
39
+ field = options[:field] || '2'
40
+
41
+ if field =~ /^\d+$/
42
+ field_pos = field.to_i
43
+ else
44
+ parser = TSV.parse_header(file, options)
45
+ field_pos = parser.all_fields.index(field) + 1
46
+ saved_line = parser.first_line
47
+ end
34
48
 
35
- rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
49
+ rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
36
50
 
37
- file_io = TSV.get_stream(file)
38
- stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
51
+ file_io = TSV.get_stream(file)
52
+ stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
53
+ end
39
54
 
40
55
  begin
41
56
  while line = stream.gets
@@ -111,5 +111,17 @@ class TestKnowledgeBase < Test::Unit::TestCase
111
111
  end
112
112
  end
113
113
  end
114
+
115
+ def test_knowledge_base_reuse
116
+ organism = Organism.default_code("Hsa")
117
+ Log.severity = 0
118
+ TmpFile.with_file(nil, false) do |tmpdir|
119
+ Path.setup(tmpdir)
120
+ Association.index(TFacts.regulators, :persist_file => tmpdir.tfacts, :format => {"Gene" => "Ensembl Gene ID"}, :namespace => Organism.default_code("Hsa"))
121
+
122
+ kb = KnowledgeBase.load(tmpdir)
123
+ assert kb.identify_source('tfacts', "TP53") =~ /ENSG/
124
+ end
125
+ end
114
126
  end
115
127
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.19.18
4
+ version: 5.19.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-27 00:00:00.000000000 Z
11
+ date: 2016-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake