rbbt-util 5.19.18 → 5.19.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/association/index.rb +3 -1
- data/lib/rbbt/association/item.rb +2 -1
- data/lib/rbbt/knowledge_base/entity.rb +9 -1
- data/lib/rbbt/knowledge_base/query.rb +3 -1
- data/lib/rbbt/knowledge_base/registry.rb +57 -10
- data/lib/rbbt/knowledge_base.rb +4 -0
- data/lib/rbbt/tsv/accessor.rb +4 -2
- data/lib/rbbt/tsv/change_id.rb +2 -0
- data/lib/rbbt/tsv/util.rb +2 -1
- data/lib/rbbt/util/R/eval.rb +5 -1
- data/share/rbbt_commands/tsv/sort +25 -10
- data/test/rbbt/test_knowledge_base.rb +12 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1c106d7f3ec1535abec1883fd465cee9c60ce1f
|
4
|
+
data.tar.gz: 31d9d1fc96bf50f6fa6c9f5b566d82fb10bc9f06
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b7f0d24616ceed78fa2671f40ce3e0a9221b6739a0a65f09783dece72d74b661ca54f480b930059bda643c446f1a28e9c81b9deba534fe09ae23dd0a8f8955f8
|
7
|
+
data.tar.gz: 0802e21fe07e9f3fedfe244c3456106edfb74733cfbad4e092faea47e1734faa212605169222e7bccadc68d05c7f9a3794912af4c294ff462d1a246bf8db8f7b
|
@@ -29,7 +29,7 @@ module Association
|
|
29
29
|
|
30
30
|
key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
|
31
31
|
|
32
|
-
TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
|
32
|
+
TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list, :namespace => database.namespace)
|
33
33
|
|
34
34
|
data.key_field = key_field
|
35
35
|
data.fields = fields[1..-1]
|
@@ -69,6 +69,8 @@ module Association
|
|
69
69
|
|
70
70
|
annotations.each do |target, info|
|
71
71
|
next if target.nil? or target.empty?
|
72
|
+
source.gsub!('~','-..-')
|
73
|
+
target.gsub!('~','-..-')
|
72
74
|
key = [source, target] * "~"
|
73
75
|
|
74
76
|
if data[key].nil? or info.nil?
|
@@ -83,7 +83,8 @@ module AssociationItem
|
|
83
83
|
end
|
84
84
|
property :value => :array2single do
|
85
85
|
index = index(database)
|
86
|
-
value = (reverse ? index.reverse : index).chunked_values_at self
|
86
|
+
#value = (reverse ? index.reverse : index).chunked_values_at self
|
87
|
+
value = index.chunked_values_at self
|
87
88
|
value.collect{|v| NamedArray.setup(v, index.fields)}
|
88
89
|
end
|
89
90
|
|
@@ -23,6 +23,8 @@ class KnowledgeBase
|
|
23
23
|
IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
|
24
24
|
options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
|
25
25
|
options[:format] = @format[type] if @format.include? :type
|
26
|
+
namespace = self.namespace
|
27
|
+
namespace = db_namespace(database_name) if namespace.nil? and database_name
|
26
28
|
options = {:organism => namespace}.merge(options)
|
27
29
|
if database_name
|
28
30
|
database = get_database(database_name)
|
@@ -67,12 +69,17 @@ class KnowledgeBase
|
|
67
69
|
get_database(name).identifier_files.dup
|
68
70
|
end
|
69
71
|
|
72
|
+
def db_namespace(name)
|
73
|
+
get_database(name).namespace
|
74
|
+
end
|
75
|
+
|
70
76
|
def source_index(name)
|
71
77
|
Persist.memory("Source index #{name}: KB directory #{dir}") do
|
72
78
|
identifier_files = identifier_files(name)
|
73
79
|
identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
|
74
80
|
identifier_files.uniq!
|
75
81
|
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
82
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
|
76
83
|
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
77
84
|
TSV.translation_index identifier_files, source(name), nil, :persist => true
|
78
85
|
end
|
@@ -83,7 +90,8 @@ class KnowledgeBase
|
|
83
90
|
identifier_files = identifier_files(name)
|
84
91
|
identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
|
85
92
|
identifier_files.uniq!
|
86
|
-
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
93
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
|
94
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
|
87
95
|
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
88
96
|
TSV.translation_index identifier_files, target(name), nil, :persist => true
|
89
97
|
end
|
@@ -60,7 +60,9 @@ class KnowledgeBase
|
|
60
60
|
|
61
61
|
def parents(name, entity)
|
62
62
|
entity = identify_target(name, entity)
|
63
|
-
|
63
|
+
matches = _parents(name, entity)
|
64
|
+
matches.each{|m| m.replace(m.partition("~").reverse*"") } unless undirected(name)
|
65
|
+
setup(name, matches, true)
|
64
66
|
end
|
65
67
|
|
66
68
|
def _neighbours(name, entity)
|
@@ -9,7 +9,7 @@ class KnowledgeBase
|
|
9
9
|
Log.debug("Registering #{ name } from code block")
|
10
10
|
@registry[name] = [block, options]
|
11
11
|
else
|
12
|
-
Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
|
12
|
+
Log.debug("Registering #{ name }: #{ Misc.fingerprint file } #{Misc.fingerprint options}")
|
13
13
|
@registry[name] = [file, options]
|
14
14
|
end
|
15
15
|
end
|
@@ -40,20 +40,26 @@ class KnowledgeBase
|
|
40
40
|
|
41
41
|
def get_index(name, options = {})
|
42
42
|
name = name.to_s
|
43
|
-
options[:organism] ||= options[:namespace] ||= self.namespace
|
44
43
|
@indices[[name, options]] ||=
|
45
44
|
begin
|
46
|
-
|
47
|
-
|
45
|
+
if options.empty?
|
46
|
+
key = name.to_s
|
47
|
+
else
|
48
|
+
fp = Misc.hash2md5(options)
|
49
|
+
key = name.to_s + "_" + fp
|
50
|
+
end
|
48
51
|
|
49
52
|
Persist.memory("Index:" << [key, dir] * "@") do
|
50
53
|
options = options.dup
|
54
|
+
|
55
|
+
options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
|
56
|
+
|
51
57
|
persist_dir = dir
|
52
58
|
persist_file = persist_dir[key].find
|
53
59
|
file, registered_options = registry[name]
|
54
60
|
|
55
61
|
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
56
|
-
options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :
|
62
|
+
options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :format => format, :persist => true
|
57
63
|
|
58
64
|
if entity_options
|
59
65
|
options[:entity_options] ||= {}
|
@@ -75,7 +81,7 @@ class KnowledgeBase
|
|
75
81
|
Association.index(file, options, persist_options.dup)
|
76
82
|
end
|
77
83
|
|
78
|
-
index.namespace = self.namespace
|
84
|
+
index.namespace = self.namespace unless self.namespace
|
79
85
|
|
80
86
|
index
|
81
87
|
end
|
@@ -84,19 +90,30 @@ class KnowledgeBase
|
|
84
90
|
|
85
91
|
def get_database(name, options = {})
|
86
92
|
name = name.to_s
|
87
|
-
|
93
|
+
|
88
94
|
@databases[[name, options]] ||=
|
89
95
|
begin
|
90
96
|
fp = Misc.fingerprint([name,options])
|
91
|
-
|
97
|
+
|
98
|
+
if options.empty?
|
99
|
+
key = name.to_s
|
100
|
+
else
|
101
|
+
fp = Misc.hash2md5(options)
|
102
|
+
key = name.to_s + "_" + fp
|
103
|
+
end
|
104
|
+
|
105
|
+
key += '.database'
|
92
106
|
Persist.memory("Database:" << [key, dir] * "@") do
|
93
107
|
options = options.dup
|
108
|
+
|
109
|
+
options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
|
110
|
+
|
94
111
|
persist_dir = dir
|
95
112
|
persist_file = persist_dir[key].find
|
96
113
|
file, registered_options = registry[name]
|
97
114
|
|
98
115
|
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
99
|
-
options = Misc.add_defaults options, :persist_file => persist_file, :
|
116
|
+
options = Misc.add_defaults options, :persist_file => persist_file, :format => format, :persist => true
|
100
117
|
|
101
118
|
if entity_options
|
102
119
|
options[:entity_options] ||= {}
|
@@ -118,7 +135,7 @@ class KnowledgeBase
|
|
118
135
|
Association.open(file, options, persist_options)
|
119
136
|
end
|
120
137
|
|
121
|
-
database.namespace = self.namespace
|
138
|
+
database.namespace = self.namespace if self.namespace
|
122
139
|
|
123
140
|
database
|
124
141
|
end
|
@@ -129,4 +146,34 @@ class KnowledgeBase
|
|
129
146
|
get_index(name).fields
|
130
147
|
end
|
131
148
|
|
149
|
+
def produce(name, *rest,&block)
|
150
|
+
register(name, *rest, &block)
|
151
|
+
get_index(name)
|
152
|
+
end
|
153
|
+
|
154
|
+
def info(name)
|
155
|
+
|
156
|
+
source = self.source(name)
|
157
|
+
target = self.target(name)
|
158
|
+
source_type = self.source_type(name)
|
159
|
+
target_type = self.target_type(name)
|
160
|
+
fields = self.fields(name)
|
161
|
+
source_entity_options = self.entity_options_for source_type, name
|
162
|
+
target_entity_options = self.entity_options_for target_type, name
|
163
|
+
undirected = self.undirected(name) == 'undirected'
|
164
|
+
|
165
|
+
info = {
|
166
|
+
:source => source,
|
167
|
+
:target => target,
|
168
|
+
:source_type => source_type,
|
169
|
+
:target_type => target_type,
|
170
|
+
:source_entity_options => source_entity_options,
|
171
|
+
:target_entity_options => target_entity_options,
|
172
|
+
:fields => fields,
|
173
|
+
:undirected => undirected,
|
174
|
+
}
|
175
|
+
|
176
|
+
info
|
177
|
+
end
|
178
|
+
|
132
179
|
end
|
data/lib/rbbt/knowledge_base.rb
CHANGED
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -633,15 +633,17 @@ module TSV
|
|
633
633
|
break
|
634
634
|
end
|
635
635
|
|
636
|
+
filename = Path === filename ? filename.find : (filename || "No filename")
|
637
|
+
filename + " [" + persistence_path + "]" if respond_to?(:persistence_path) and persistence_path
|
636
638
|
with_unnamed do
|
637
639
|
<<-EOF
|
638
|
-
Filename = #{
|
640
|
+
Filename = #{filename}
|
639
641
|
Key field = #{key_field || "*No key field*"}
|
640
642
|
Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
|
641
643
|
Type = #{type}
|
642
644
|
Serializer = #{serializer.inspect}
|
643
645
|
Size = #{size}
|
644
|
-
namespace = #{namespace}
|
646
|
+
namespace = #{Misc.fingerprint namespace}
|
645
647
|
identifiers = #{Misc.fingerprint identifiers}
|
646
648
|
Example:
|
647
649
|
- #{key} -- #{Misc.fingerprint values }
|
data/lib/rbbt/tsv/change_id.rb
CHANGED
@@ -8,6 +8,8 @@ module TSV
|
|
8
8
|
|
9
9
|
identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
|
10
10
|
|
11
|
+
identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
|
12
|
+
|
11
13
|
if not tsv.fields.include? format
|
12
14
|
new = {}
|
13
15
|
tsv.each do |k,v|
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -61,6 +61,7 @@ module TSV
|
|
61
61
|
fields = nil
|
62
62
|
tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
63
63
|
names.zip(fields).each do |list, format|
|
64
|
+
list = [list] unless Array === list
|
64
65
|
list.delete_if do |name| name.empty? end
|
65
66
|
next if list.empty?
|
66
67
|
text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
@@ -255,7 +256,7 @@ module TSV
|
|
255
256
|
new = {}
|
256
257
|
case type
|
257
258
|
when :double
|
258
|
-
self
|
259
|
+
return self
|
259
260
|
when :flat
|
260
261
|
through do |k,v|
|
261
262
|
new[k] = [v]
|
data/lib/rbbt/util/R/eval.rb
CHANGED
@@ -81,7 +81,11 @@ module R
|
|
81
81
|
args << "--RS-pidfile"
|
82
82
|
args << "'#{pid_file}'"
|
83
83
|
|
84
|
-
|
84
|
+
if ENV["R_HOME"]
|
85
|
+
bin_path = File.join(ENV["R_HOME"], "bin/Rserve")
|
86
|
+
else
|
87
|
+
bin_path = "Rserve"
|
88
|
+
end
|
85
89
|
cmd = bin_path + " " + args*" "
|
86
90
|
$stdout.reopen File.new('/dev/null', 'w')
|
87
91
|
exec(ENV, cmd)
|
@@ -14,6 +14,7 @@ Display summary information. Works with Tokyocabinet HDB and BDB as well.
|
|
14
14
|
|
15
15
|
-hh--header_hash* Change the character used to mark the header line (defaults to #)
|
16
16
|
-f--field* Field to sort by (name or number)
|
17
|
+
-a--absolute Sort as absolute values
|
17
18
|
-h--help Help
|
18
19
|
EOF
|
19
20
|
|
@@ -23,19 +24,33 @@ file = ARGV.shift
|
|
23
24
|
|
24
25
|
file = STDIN if file == '-' or file.nil?
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
if options[:absolute]
|
28
|
+
tsv = TSV.open(file, options)
|
29
|
+
dumper = TSV::Dumper.new tsv.options
|
30
|
+
dumper.init
|
31
|
+
field = options[:field] || 1
|
32
|
+
keys = tsv.sort(field) do |a| a = a.first if Array === a; a.to_f.abs end
|
33
|
+
TSV.traverse keys, :type => :array, :into => dumper do |key|
|
34
|
+
values = tsv[key]
|
35
|
+
[key,values]
|
36
|
+
end
|
37
|
+
stream = dumper.stream
|
29
38
|
else
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
39
|
+
field = options[:field] || '2'
|
40
|
+
|
41
|
+
if field =~ /^\d+$/
|
42
|
+
field_pos = field.to_i
|
43
|
+
else
|
44
|
+
parser = TSV.parse_header(file, options)
|
45
|
+
field_pos = parser.all_fields.index(field) + 1
|
46
|
+
saved_line = parser.first_line
|
47
|
+
end
|
34
48
|
|
35
|
-
rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
|
49
|
+
rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
|
36
50
|
|
37
|
-
file_io = TSV.get_stream(file)
|
38
|
-
stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
|
51
|
+
file_io = TSV.get_stream(file)
|
52
|
+
stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
|
53
|
+
end
|
39
54
|
|
40
55
|
begin
|
41
56
|
while line = stream.gets
|
@@ -111,5 +111,17 @@ class TestKnowledgeBase < Test::Unit::TestCase
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
end
|
114
|
+
|
115
|
+
def test_knowledge_base_reuse
|
116
|
+
organism = Organism.default_code("Hsa")
|
117
|
+
Log.severity = 0
|
118
|
+
TmpFile.with_file(nil, false) do |tmpdir|
|
119
|
+
Path.setup(tmpdir)
|
120
|
+
Association.index(TFacts.regulators, :persist_file => tmpdir.tfacts, :format => {"Gene" => "Ensembl Gene ID"}, :namespace => Organism.default_code("Hsa"))
|
121
|
+
|
122
|
+
kb = KnowledgeBase.load(tmpdir)
|
123
|
+
assert kb.identify_source('tfacts', "TP53") =~ /ENSG/
|
124
|
+
end
|
125
|
+
end
|
114
126
|
end
|
115
127
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.19.
|
4
|
+
version: 5.19.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|