rbbt-util 5.19.18 → 5.19.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/association/index.rb +3 -1
- data/lib/rbbt/association/item.rb +2 -1
- data/lib/rbbt/knowledge_base/entity.rb +9 -1
- data/lib/rbbt/knowledge_base/query.rb +3 -1
- data/lib/rbbt/knowledge_base/registry.rb +57 -10
- data/lib/rbbt/knowledge_base.rb +4 -0
- data/lib/rbbt/tsv/accessor.rb +4 -2
- data/lib/rbbt/tsv/change_id.rb +2 -0
- data/lib/rbbt/tsv/util.rb +2 -1
- data/lib/rbbt/util/R/eval.rb +5 -1
- data/share/rbbt_commands/tsv/sort +25 -10
- data/test/rbbt/test_knowledge_base.rb +12 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a1c106d7f3ec1535abec1883fd465cee9c60ce1f
|
4
|
+
data.tar.gz: 31d9d1fc96bf50f6fa6c9f5b566d82fb10bc9f06
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b7f0d24616ceed78fa2671f40ce3e0a9221b6739a0a65f09783dece72d74b661ca54f480b930059bda643c446f1a28e9c81b9deba534fe09ae23dd0a8f8955f8
|
7
|
+
data.tar.gz: 0802e21fe07e9f3fedfe244c3456106edfb74733cfbad4e092faea47e1734faa212605169222e7bccadc68d05c7f9a3794912af4c294ff462d1a246bf8db8f7b
|
@@ -29,7 +29,7 @@ module Association
|
|
29
29
|
|
30
30
|
key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
|
31
31
|
|
32
|
-
TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
|
32
|
+
TSV.setup(data, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list, :namespace => database.namespace)
|
33
33
|
|
34
34
|
data.key_field = key_field
|
35
35
|
data.fields = fields[1..-1]
|
@@ -69,6 +69,8 @@ module Association
|
|
69
69
|
|
70
70
|
annotations.each do |target, info|
|
71
71
|
next if target.nil? or target.empty?
|
72
|
+
source.gsub!('~','-..-')
|
73
|
+
target.gsub!('~','-..-')
|
72
74
|
key = [source, target] * "~"
|
73
75
|
|
74
76
|
if data[key].nil? or info.nil?
|
@@ -83,7 +83,8 @@ module AssociationItem
|
|
83
83
|
end
|
84
84
|
property :value => :array2single do
|
85
85
|
index = index(database)
|
86
|
-
value = (reverse ? index.reverse : index).chunked_values_at self
|
86
|
+
#value = (reverse ? index.reverse : index).chunked_values_at self
|
87
|
+
value = index.chunked_values_at self
|
87
88
|
value.collect{|v| NamedArray.setup(v, index.fields)}
|
88
89
|
end
|
89
90
|
|
@@ -23,6 +23,8 @@ class KnowledgeBase
|
|
23
23
|
IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
|
24
24
|
options = entity_options[type.to_s] || entity_options[Entity.formats[type.to_s].to_s] || {}
|
25
25
|
options[:format] = @format[type] if @format.include? :type
|
26
|
+
namespace = self.namespace
|
27
|
+
namespace = db_namespace(database_name) if namespace.nil? and database_name
|
26
28
|
options = {:organism => namespace}.merge(options)
|
27
29
|
if database_name
|
28
30
|
database = get_database(database_name)
|
@@ -67,12 +69,17 @@ class KnowledgeBase
|
|
67
69
|
get_database(name).identifier_files.dup
|
68
70
|
end
|
69
71
|
|
72
|
+
def db_namespace(name)
|
73
|
+
get_database(name).namespace
|
74
|
+
end
|
75
|
+
|
70
76
|
def source_index(name)
|
71
77
|
Persist.memory("Source index #{name}: KB directory #{dir}") do
|
72
78
|
identifier_files = identifier_files(name)
|
73
79
|
identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
|
74
80
|
identifier_files.uniq!
|
75
81
|
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
82
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
|
76
83
|
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
77
84
|
TSV.translation_index identifier_files, source(name), nil, :persist => true
|
78
85
|
end
|
@@ -83,7 +90,8 @@ class KnowledgeBase
|
|
83
90
|
identifier_files = identifier_files(name)
|
84
91
|
identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
|
85
92
|
identifier_files.uniq!
|
86
|
-
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
93
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
|
94
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
|
87
95
|
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
88
96
|
TSV.translation_index identifier_files, target(name), nil, :persist => true
|
89
97
|
end
|
@@ -60,7 +60,9 @@ class KnowledgeBase
|
|
60
60
|
|
61
61
|
def parents(name, entity)
|
62
62
|
entity = identify_target(name, entity)
|
63
|
-
|
63
|
+
matches = _parents(name, entity)
|
64
|
+
matches.each{|m| m.replace(m.partition("~").reverse*"") } unless undirected(name)
|
65
|
+
setup(name, matches, true)
|
64
66
|
end
|
65
67
|
|
66
68
|
def _neighbours(name, entity)
|
@@ -9,7 +9,7 @@ class KnowledgeBase
|
|
9
9
|
Log.debug("Registering #{ name } from code block")
|
10
10
|
@registry[name] = [block, options]
|
11
11
|
else
|
12
|
-
Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
|
12
|
+
Log.debug("Registering #{ name }: #{ Misc.fingerprint file } #{Misc.fingerprint options}")
|
13
13
|
@registry[name] = [file, options]
|
14
14
|
end
|
15
15
|
end
|
@@ -40,20 +40,26 @@ class KnowledgeBase
|
|
40
40
|
|
41
41
|
def get_index(name, options = {})
|
42
42
|
name = name.to_s
|
43
|
-
options[:organism] ||= options[:namespace] ||= self.namespace
|
44
43
|
@indices[[name, options]] ||=
|
45
44
|
begin
|
46
|
-
|
47
|
-
|
45
|
+
if options.empty?
|
46
|
+
key = name.to_s
|
47
|
+
else
|
48
|
+
fp = Misc.hash2md5(options)
|
49
|
+
key = name.to_s + "_" + fp
|
50
|
+
end
|
48
51
|
|
49
52
|
Persist.memory("Index:" << [key, dir] * "@") do
|
50
53
|
options = options.dup
|
54
|
+
|
55
|
+
options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
|
56
|
+
|
51
57
|
persist_dir = dir
|
52
58
|
persist_file = persist_dir[key].find
|
53
59
|
file, registered_options = registry[name]
|
54
60
|
|
55
61
|
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
56
|
-
options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :
|
62
|
+
options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :format => format, :persist => true
|
57
63
|
|
58
64
|
if entity_options
|
59
65
|
options[:entity_options] ||= {}
|
@@ -75,7 +81,7 @@ class KnowledgeBase
|
|
75
81
|
Association.index(file, options, persist_options.dup)
|
76
82
|
end
|
77
83
|
|
78
|
-
index.namespace = self.namespace
|
84
|
+
index.namespace = self.namespace unless self.namespace
|
79
85
|
|
80
86
|
index
|
81
87
|
end
|
@@ -84,19 +90,30 @@ class KnowledgeBase
|
|
84
90
|
|
85
91
|
def get_database(name, options = {})
|
86
92
|
name = name.to_s
|
87
|
-
|
93
|
+
|
88
94
|
@databases[[name, options]] ||=
|
89
95
|
begin
|
90
96
|
fp = Misc.fingerprint([name,options])
|
91
|
-
|
97
|
+
|
98
|
+
if options.empty?
|
99
|
+
key = name.to_s
|
100
|
+
else
|
101
|
+
fp = Misc.hash2md5(options)
|
102
|
+
key = name.to_s + "_" + fp
|
103
|
+
end
|
104
|
+
|
105
|
+
key += '.database'
|
92
106
|
Persist.memory("Database:" << [key, dir] * "@") do
|
93
107
|
options = options.dup
|
108
|
+
|
109
|
+
options[:organism] ||= options[:namespace] ||= self.namespace unless self.namespace.nil?
|
110
|
+
|
94
111
|
persist_dir = dir
|
95
112
|
persist_file = persist_dir[key].find
|
96
113
|
file, registered_options = registry[name]
|
97
114
|
|
98
115
|
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
99
|
-
options = Misc.add_defaults options, :persist_file => persist_file, :
|
116
|
+
options = Misc.add_defaults options, :persist_file => persist_file, :format => format, :persist => true
|
100
117
|
|
101
118
|
if entity_options
|
102
119
|
options[:entity_options] ||= {}
|
@@ -118,7 +135,7 @@ class KnowledgeBase
|
|
118
135
|
Association.open(file, options, persist_options)
|
119
136
|
end
|
120
137
|
|
121
|
-
database.namespace = self.namespace
|
138
|
+
database.namespace = self.namespace if self.namespace
|
122
139
|
|
123
140
|
database
|
124
141
|
end
|
@@ -129,4 +146,34 @@ class KnowledgeBase
|
|
129
146
|
get_index(name).fields
|
130
147
|
end
|
131
148
|
|
149
|
+
def produce(name, *rest,&block)
|
150
|
+
register(name, *rest, &block)
|
151
|
+
get_index(name)
|
152
|
+
end
|
153
|
+
|
154
|
+
def info(name)
|
155
|
+
|
156
|
+
source = self.source(name)
|
157
|
+
target = self.target(name)
|
158
|
+
source_type = self.source_type(name)
|
159
|
+
target_type = self.target_type(name)
|
160
|
+
fields = self.fields(name)
|
161
|
+
source_entity_options = self.entity_options_for source_type, name
|
162
|
+
target_entity_options = self.entity_options_for target_type, name
|
163
|
+
undirected = self.undirected(name) == 'undirected'
|
164
|
+
|
165
|
+
info = {
|
166
|
+
:source => source,
|
167
|
+
:target => target,
|
168
|
+
:source_type => source_type,
|
169
|
+
:target_type => target_type,
|
170
|
+
:source_entity_options => source_entity_options,
|
171
|
+
:target_entity_options => target_entity_options,
|
172
|
+
:fields => fields,
|
173
|
+
:undirected => undirected,
|
174
|
+
}
|
175
|
+
|
176
|
+
info
|
177
|
+
end
|
178
|
+
|
132
179
|
end
|
data/lib/rbbt/knowledge_base.rb
CHANGED
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -633,15 +633,17 @@ module TSV
|
|
633
633
|
break
|
634
634
|
end
|
635
635
|
|
636
|
+
filename = Path === filename ? filename.find : (filename || "No filename")
|
637
|
+
filename + " [" + persistence_path + "]" if respond_to?(:persistence_path) and persistence_path
|
636
638
|
with_unnamed do
|
637
639
|
<<-EOF
|
638
|
-
Filename = #{
|
640
|
+
Filename = #{filename}
|
639
641
|
Key field = #{key_field || "*No key field*"}
|
640
642
|
Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
|
641
643
|
Type = #{type}
|
642
644
|
Serializer = #{serializer.inspect}
|
643
645
|
Size = #{size}
|
644
|
-
namespace = #{namespace}
|
646
|
+
namespace = #{Misc.fingerprint namespace}
|
645
647
|
identifiers = #{Misc.fingerprint identifiers}
|
646
648
|
Example:
|
647
649
|
- #{key} -- #{Misc.fingerprint values }
|
data/lib/rbbt/tsv/change_id.rb
CHANGED
@@ -8,6 +8,8 @@ module TSV
|
|
8
8
|
|
9
9
|
identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
|
10
10
|
|
11
|
+
identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
|
12
|
+
|
11
13
|
if not tsv.fields.include? format
|
12
14
|
new = {}
|
13
15
|
tsv.each do |k,v|
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -61,6 +61,7 @@ module TSV
|
|
61
61
|
fields = nil
|
62
62
|
tsv.tap{|e| e.unnamed = true; fields = e.fields}.through do |gene, names|
|
63
63
|
names.zip(fields).each do |list, format|
|
64
|
+
list = [list] unless Array === list
|
64
65
|
list.delete_if do |name| name.empty? end
|
65
66
|
next if list.empty?
|
66
67
|
text << list.collect{|name| [name, format] * "\t"} * "\n" << "\n"
|
@@ -255,7 +256,7 @@ module TSV
|
|
255
256
|
new = {}
|
256
257
|
case type
|
257
258
|
when :double
|
258
|
-
self
|
259
|
+
return self
|
259
260
|
when :flat
|
260
261
|
through do |k,v|
|
261
262
|
new[k] = [v]
|
data/lib/rbbt/util/R/eval.rb
CHANGED
@@ -81,7 +81,11 @@ module R
|
|
81
81
|
args << "--RS-pidfile"
|
82
82
|
args << "'#{pid_file}'"
|
83
83
|
|
84
|
-
|
84
|
+
if ENV["R_HOME"]
|
85
|
+
bin_path = File.join(ENV["R_HOME"], "bin/Rserve")
|
86
|
+
else
|
87
|
+
bin_path = "Rserve"
|
88
|
+
end
|
85
89
|
cmd = bin_path + " " + args*" "
|
86
90
|
$stdout.reopen File.new('/dev/null', 'w')
|
87
91
|
exec(ENV, cmd)
|
@@ -14,6 +14,7 @@ Display summary information. Works with Tokyocabinet HDB and BDB as well.
|
|
14
14
|
|
15
15
|
-hh--header_hash* Change the character used to mark the header line (defaults to #)
|
16
16
|
-f--field* Field to sort by (name or number)
|
17
|
+
-a--absolute Sort as absolute values
|
17
18
|
-h--help Help
|
18
19
|
EOF
|
19
20
|
|
@@ -23,19 +24,33 @@ file = ARGV.shift
|
|
23
24
|
|
24
25
|
file = STDIN if file == '-' or file.nil?
|
25
26
|
|
26
|
-
|
27
|
-
|
28
|
-
|
27
|
+
if options[:absolute]
|
28
|
+
tsv = TSV.open(file, options)
|
29
|
+
dumper = TSV::Dumper.new tsv.options
|
30
|
+
dumper.init
|
31
|
+
field = options[:field] || 1
|
32
|
+
keys = tsv.sort(field) do |a| a = a.first if Array === a; a.to_f.abs end
|
33
|
+
TSV.traverse keys, :type => :array, :into => dumper do |key|
|
34
|
+
values = tsv[key]
|
35
|
+
[key,values]
|
36
|
+
end
|
37
|
+
stream = dumper.stream
|
29
38
|
else
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
39
|
+
field = options[:field] || '2'
|
40
|
+
|
41
|
+
if field =~ /^\d+$/
|
42
|
+
field_pos = field.to_i
|
43
|
+
else
|
44
|
+
parser = TSV.parse_header(file, options)
|
45
|
+
field_pos = parser.all_fields.index(field) + 1
|
46
|
+
saved_line = parser.first_line
|
47
|
+
end
|
34
48
|
|
35
|
-
rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
|
49
|
+
rest = ARGV.inject("-t\"\t\" -g -k#{field_pos}"){|acc,e| acc << " '#{e}'" }
|
36
50
|
|
37
|
-
file_io = TSV.get_stream(file)
|
38
|
-
stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
|
51
|
+
file_io = TSV.get_stream(file)
|
52
|
+
stream = Misc.sort_stream(file_io, options[:header_hash] || "#", rest)
|
53
|
+
end
|
39
54
|
|
40
55
|
begin
|
41
56
|
while line = stream.gets
|
@@ -111,5 +111,17 @@ class TestKnowledgeBase < Test::Unit::TestCase
|
|
111
111
|
end
|
112
112
|
end
|
113
113
|
end
|
114
|
+
|
115
|
+
def test_knowledge_base_reuse
|
116
|
+
organism = Organism.default_code("Hsa")
|
117
|
+
Log.severity = 0
|
118
|
+
TmpFile.with_file(nil, false) do |tmpdir|
|
119
|
+
Path.setup(tmpdir)
|
120
|
+
Association.index(TFacts.regulators, :persist_file => tmpdir.tfacts, :format => {"Gene" => "Ensembl Gene ID"}, :namespace => Organism.default_code("Hsa"))
|
121
|
+
|
122
|
+
kb = KnowledgeBase.load(tmpdir)
|
123
|
+
assert kb.identify_source('tfacts', "TP53") =~ /ENSG/
|
124
|
+
end
|
125
|
+
end
|
114
126
|
end
|
115
127
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.19.
|
4
|
+
version: 5.19.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|