rbbt-util 5.14.28 → 5.14.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/association/index.rb +6 -2
- data/lib/rbbt/association.rb +13 -4
- data/lib/rbbt/knowledge_base.rb +15 -13
- data/lib/rbbt/persist.rb +2 -1
- data/lib/rbbt/resource/rake.rb +2 -1
- data/lib/rbbt/resource.rb +6 -1
- data/lib/rbbt/tsv/accessor.rb +1 -1
- data/lib/rbbt/tsv/attach.rb +1 -1
- data/lib/rbbt/tsv/dumper.rb +1 -0
- data/lib/rbbt/tsv/manipulate.rb +3 -6
- data/lib/rbbt/tsv/matrix.rb +20 -0
- data/lib/rbbt/tsv/parser.rb +7 -2
- data/lib/rbbt/tsv/util.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -1
- data/lib/rbbt/util/misc/bgzf.rb +6 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +6 -4
- data/lib/rbbt/util/open.rb +1 -1
- data/lib/rbbt/workflow.rb +4 -0
- data/share/rbbt_commands/check_bgzf +27 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ed65d89a02680821214ab3141ef167565045291
|
4
|
+
data.tar.gz: 95402199abefb0228dc7788731a1c2b0487c9bf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8643b977a908ea2447d070aef682d4f5bafc3360f380cbc3b43be992ba6bad689cfba5ee2ed546fce64672f133fb5f2676b13e4088d6a60f0c91ba2bef741108
|
7
|
+
data.tar.gz: 25b4bf073c68ee563eb17d77b00af6a38f9d25d10520f834167bb94e6b8daea866c048264f97a9b6afc2917d7d32b16c4b41a52aa123176541b3e84f8bd83b3b
|
@@ -70,7 +70,7 @@ module Association
|
|
70
70
|
source_type = Entity.formats[source_field]
|
71
71
|
target_type = Entity.formats[target_field]
|
72
72
|
|
73
|
-
source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
|
73
|
+
source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
|
74
74
|
target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]
|
75
75
|
|
76
76
|
[source_entities, target_entities]
|
@@ -89,7 +89,11 @@ module Association
|
|
89
89
|
end
|
90
90
|
|
91
91
|
matches = source.uniq.inject([]){|acc,e|
|
92
|
-
|
92
|
+
if block_given?
|
93
|
+
acc.concat(match(e))
|
94
|
+
else
|
95
|
+
acc.concat(match(e))
|
96
|
+
end
|
93
97
|
}
|
94
98
|
|
95
99
|
return matches if target == :all or target == "all"
|
data/lib/rbbt/association.rb
CHANGED
@@ -133,12 +133,13 @@ module Association
|
|
133
133
|
target_field = tsv.fields.first
|
134
134
|
|
135
135
|
if source_final_format and source_field != source_final_format and
|
136
|
-
Entity.formats[source_field]
|
137
|
-
|
136
|
+
Entity.formats[source_field]
|
137
|
+
|
138
138
|
Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")
|
139
139
|
|
140
140
|
tsv.with_unnamed do
|
141
|
-
|
141
|
+
identifiers = tsv.identifiers || Organism.identifiers(tsv.namespace)
|
142
|
+
tsv = tsv.change_key source_final_format, :identifiers => identifiers, :persist => true
|
142
143
|
end
|
143
144
|
end
|
144
145
|
|
@@ -221,7 +222,14 @@ module Association
|
|
221
222
|
|
222
223
|
case file
|
223
224
|
when Proc
|
224
|
-
|
225
|
+
res = file.call
|
226
|
+
tsv = case res
|
227
|
+
when TSV, Path
|
228
|
+
return load_tsv(res, options)
|
229
|
+
else
|
230
|
+
tsv = TSV.open(res, :unnamed => true)
|
231
|
+
return load_tsv(tsv, options)
|
232
|
+
end
|
225
233
|
when TSV
|
226
234
|
key_field, *fields = all_fields = file.all_fields
|
227
235
|
else
|
@@ -257,6 +265,7 @@ module Association
|
|
257
265
|
tsv = load_tsv(file, options)
|
258
266
|
|
259
267
|
tsv.annotate(data)
|
268
|
+
|
260
269
|
data.serializer = tsv.type
|
261
270
|
|
262
271
|
tsv.with_unnamed do
|
data/lib/rbbt/knowledge_base.rb
CHANGED
@@ -358,7 +358,7 @@ class KnowledgeBase
|
|
358
358
|
end
|
359
359
|
end
|
360
360
|
|
361
|
-
def subset(name, entities)
|
361
|
+
def subset(name, entities, &block)
|
362
362
|
entities = case entities
|
363
363
|
when :all
|
364
364
|
{:target => :all, :source => :all}
|
@@ -374,18 +374,20 @@ class KnowledgeBase
|
|
374
374
|
|
375
375
|
repo = get_index name
|
376
376
|
|
377
|
-
begin
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
377
|
+
matches = begin
|
378
|
+
s = repo.subset_entities(entities, &block)
|
379
|
+
setup(name, s)
|
380
|
+
rescue Exception
|
381
|
+
target = entities[:target]
|
382
|
+
source = entities[:source]
|
383
|
+
if target or source
|
384
|
+
entities[:target] = source
|
385
|
+
entities[:source] = target
|
386
|
+
end
|
387
|
+
setup(name, repo.reverse.subset_entities(entities, &block), true)
|
388
|
+
end
|
389
|
+
|
390
|
+
block_given? ? matches.select(&block) : matches
|
389
391
|
end
|
390
392
|
|
391
393
|
def translate(entities, type)
|
data/lib/rbbt/persist.rb
CHANGED
@@ -218,7 +218,7 @@ module Persist
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def self.get_result(path, type, persist_options, lockfile, &block)
|
221
|
-
res = yield
|
221
|
+
res = yield path
|
222
222
|
stream = res if IO === res
|
223
223
|
stream = res.stream if res.respond_to? :stream
|
224
224
|
|
@@ -317,6 +317,7 @@ module Persist
|
|
317
317
|
if FalseClass === persist_options[:persist]
|
318
318
|
yield
|
319
319
|
else
|
320
|
+
persist_options[:update] ||= true if persist_options[:persist].to_s == "update"
|
320
321
|
other_options = Misc.process_options persist_options, :other
|
321
322
|
path = persistence_path(name, persist_options, other_options || {})
|
322
323
|
|
data/lib/rbbt/resource/rake.rb
CHANGED
data/lib/rbbt/resource.rb
CHANGED
@@ -122,7 +122,12 @@ module Resource
|
|
122
122
|
raise "Resource is missing and does not seem to be claimed: #{ self } -- #{ path } "
|
123
123
|
end
|
124
124
|
|
125
|
-
|
125
|
+
if path.respond_to?(:find)
|
126
|
+
final_path = force ? path.find(:default) : path.find
|
127
|
+
else
|
128
|
+
final_path = path
|
129
|
+
end
|
130
|
+
|
126
131
|
if not File.exists? final_path or force
|
127
132
|
Log.medium "Producing: #{ final_path }"
|
128
133
|
lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -9,7 +9,7 @@ module TSV
|
|
9
9
|
attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
|
10
10
|
|
11
11
|
def info
|
12
|
-
{:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed}.delete_if{|k,v| v.nil? }
|
12
|
+
{:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
|
13
13
|
end
|
14
14
|
|
15
15
|
def annotate(tsv)
|
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -182,7 +182,7 @@ module TSV
|
|
182
182
|
unless TSV === other
|
183
183
|
other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
|
184
184
|
other = TSV.open(other, :persist => options[:persist_input] == true)
|
185
|
-
other.identifiers
|
185
|
+
other.identifiers ||= other_identifier_file
|
186
186
|
end
|
187
187
|
|
188
188
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil? or fields == :all
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -631,10 +631,11 @@ module TSV
|
|
631
631
|
self
|
632
632
|
end
|
633
633
|
|
634
|
-
def transpose(key_field)
|
634
|
+
def transpose(key_field="Unkown ID")
|
635
635
|
raise "Transposing only works for TSVs of type :list" unless type == :list
|
636
636
|
new_fields = keys
|
637
|
-
new =
|
637
|
+
new = self.annotate({})
|
638
|
+
TSV.setup(new, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers)
|
638
639
|
|
639
640
|
through do |key, values|
|
640
641
|
fields.zip(values) do |new_key, value|
|
@@ -643,10 +644,6 @@ module TSV
|
|
643
644
|
end
|
644
645
|
end
|
645
646
|
|
646
|
-
new.entity_options = entity_options
|
647
|
-
new.entity_templates = entity_templates
|
648
|
-
new.namespace = namespace
|
649
|
-
|
650
647
|
new
|
651
648
|
end
|
652
649
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module TSV
|
2
|
+
def self.read_matrix(tsv, field_format = "ID", value_format = "Value")
|
3
|
+
tsv = TSV.open(tsv) unless TSV === tsv
|
4
|
+
|
5
|
+
key_field, *fields = tsv.all_fields
|
6
|
+
options = tsv.options.merge(:key_field => key_field, :fields => [field_format, value_format], :type => :double, :cast => nil)
|
7
|
+
|
8
|
+
options[:filename] ||= tsv.filename
|
9
|
+
options[:identifiers] ||= tsv.identifier_files.first
|
10
|
+
|
11
|
+
dumper = TSV::Dumper.new(options)
|
12
|
+
|
13
|
+
dumper.init
|
14
|
+
TSV.traverse tsv, :into => dumper do |key, values|
|
15
|
+
[key, [fields, values]]
|
16
|
+
end
|
17
|
+
|
18
|
+
dumper.stream
|
19
|
+
end
|
20
|
+
end
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
2
|
module TSV
|
3
3
|
class Parser
|
4
|
-
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble
|
4
|
+
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers
|
5
5
|
|
6
6
|
class SKIP_LINE < Exception; end
|
7
7
|
class END_PARSING < Exception; end
|
@@ -392,6 +392,8 @@ module TSV
|
|
392
392
|
@type ||= Misc.process_options(options, :type) || :double
|
393
393
|
@type ||= :double
|
394
394
|
|
395
|
+
@identifiers = Misc.process_options(options, :identifiers)
|
396
|
+
|
395
397
|
@filename = Misc.process_options(options, :filename)
|
396
398
|
@filename ||= stream.filename if stream.respond_to? :filename
|
397
399
|
|
@@ -472,6 +474,7 @@ module TSV
|
|
472
474
|
data.fields = @fields
|
473
475
|
data.namespace = @namespace
|
474
476
|
data.filename = @filename
|
477
|
+
data.identifiers = @identifiers
|
475
478
|
data.cast = @cast if Symbol === @cast
|
476
479
|
data
|
477
480
|
end
|
@@ -559,6 +562,8 @@ module TSV
|
|
559
562
|
break
|
560
563
|
end
|
561
564
|
rescue END_PARSING
|
565
|
+
stream.close unless stream.closed?
|
566
|
+
begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
|
562
567
|
break
|
563
568
|
rescue Errno::EPIPE
|
564
569
|
Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
|
@@ -572,7 +577,7 @@ module TSV
|
|
572
577
|
end
|
573
578
|
ensure
|
574
579
|
stream.close unless stream.closed?
|
575
|
-
stream.join if stream.respond_to? :join
|
580
|
+
stream.join if stream.respond_to? :join and not stream.joined?
|
576
581
|
end
|
577
582
|
|
578
583
|
self
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -167,7 +167,7 @@ module TSV
|
|
167
167
|
str = ""
|
168
168
|
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
169
169
|
if fields
|
170
|
-
str << "#" << key_field << sep << fields * sep << "\n"
|
170
|
+
str << "#" << (key_field || "ID") << sep << fields * sep << "\n"
|
171
171
|
end
|
172
172
|
|
173
173
|
str
|
data/lib/rbbt/tsv.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -26,7 +26,7 @@ source('#{UTIL}');
|
|
26
26
|
Log.debug{"R Script:\n#{ cmd }"}
|
27
27
|
|
28
28
|
if options.delete :monitor
|
29
|
-
io = CMD.cmd('R --vanilla
|
29
|
+
io = CMD.cmd('R --vanilla', options.merge(:in => cmd, :pipe => true, :log => true))
|
30
30
|
while line = io.gets
|
31
31
|
puts line
|
32
32
|
end
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -104,7 +104,8 @@ module CMD
|
|
104
104
|
pipe = options.delete(:pipe)
|
105
105
|
post = options.delete(:post)
|
106
106
|
log = options.delete(:log)
|
107
|
-
|
107
|
+
no_fail = options.delete(:no_fail)
|
108
|
+
dont_close_in = options.delete(:dont_close_in)
|
108
109
|
|
109
110
|
log = true if log.nil?
|
110
111
|
|
@@ -205,7 +206,7 @@ module CMD
|
|
205
206
|
end
|
206
207
|
|
207
208
|
#SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
|
208
|
-
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true
|
209
|
+
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true, :no_fail => no_fail
|
209
210
|
|
210
211
|
sout
|
211
212
|
else
|
@@ -217,7 +218,7 @@ module CMD
|
|
217
218
|
serr.close
|
218
219
|
end
|
219
220
|
|
220
|
-
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true
|
221
|
+
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true, :no_fail => no_fail
|
221
222
|
out = StringIO.new sout.read
|
222
223
|
sout.close unless sout.closed?
|
223
224
|
|
data/lib/rbbt/util/log.rb
CHANGED
data/lib/rbbt/util/misc/bgzf.rb
CHANGED
@@ -38,7 +38,12 @@ module Bgzf
|
|
38
38
|
pos = 0
|
39
39
|
while true do
|
40
40
|
blockdata_offset = tell
|
41
|
-
block =
|
41
|
+
block = begin
|
42
|
+
read_block
|
43
|
+
rescue Exception
|
44
|
+
raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!.message =~ /BGFZ.*expected/
|
45
|
+
raise $!
|
46
|
+
end
|
42
47
|
break unless block
|
43
48
|
index << [pos, blockdata_offset]
|
44
49
|
pos += block.length
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module ConcurrentStream
|
2
|
-
attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined, :aborted, :autojoin, :lockfile
|
2
|
+
attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined, :aborted, :autojoin, :lockfile, :no_fail
|
3
3
|
|
4
4
|
def self.setup(stream, options = {}, &block)
|
5
|
-
threads, pids, callback, filename, autojoin, lockfile = Misc.process_options options, :threads, :pids, :callback, :filename, :autojoin, :lockfile
|
5
|
+
threads, pids, callback, filename, autojoin, lockfile, no_fail = Misc.process_options options, :threads, :pids, :callback, :filename, :autojoin, :lockfile, :no_fail
|
6
6
|
stream.extend ConcurrentStream unless ConcurrentStream === stream
|
7
7
|
|
8
8
|
stream.threads ||= []
|
@@ -10,6 +10,7 @@ module ConcurrentStream
|
|
10
10
|
stream.threads.concat(Array === threads ? threads : [threads]) unless threads.nil?
|
11
11
|
stream.pids.concat(Array === pids ? pids : [pids]) unless pids.nil? or pids.empty?
|
12
12
|
stream.autojoin = autojoin
|
13
|
+
stream.no_fail = no_fail
|
13
14
|
|
14
15
|
callback = block if block_given?
|
15
16
|
if stream.callback and callback
|
@@ -60,7 +61,7 @@ module ConcurrentStream
|
|
60
61
|
@pids.each do |pid|
|
61
62
|
begin
|
62
63
|
Process.waitpid(pid, Process::WUNTRACED)
|
63
|
-
raise ProcessFailed.new "Error joining process #{pid} in #{self.inspect}" unless $?.success?
|
64
|
+
raise ProcessFailed.new "Error joining process #{pid} in #{self.inspect}" unless $?.success? or no_fail
|
64
65
|
rescue Errno::ECHILD
|
65
66
|
end
|
66
67
|
end
|
@@ -76,12 +77,13 @@ module ConcurrentStream
|
|
76
77
|
end
|
77
78
|
|
78
79
|
def join
|
80
|
+
@joined = true
|
81
|
+
|
79
82
|
join_threads
|
80
83
|
join_pids
|
81
84
|
|
82
85
|
join_callback
|
83
86
|
|
84
|
-
@joined = true
|
85
87
|
lockfile.unlock if lockfile and lockfile.locked?
|
86
88
|
close unless closed?
|
87
89
|
end
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -132,7 +132,7 @@ module Open
|
|
132
132
|
CMD.cmd("grep #{invert ? '-v' : ''}", "-w" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
|
133
133
|
end
|
134
134
|
else
|
135
|
-
CMD.cmd("grep #{invert ? '-v ' : ''} '#{grep}' -", :in => stream, :pipe => true, :post => proc{stream.force_close if stream.respond_to?
|
135
|
+
CMD.cmd("grep #{invert ? '-v ' : ''} '#{grep}' -", :in => stream, :pipe => true, :post => proc{begin stream.force_close; rescue Exception; end if stream.respond_to?(:force_close)})
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
|
10
|
+
Check the integrity of a BGZF file
|
11
|
+
|
12
|
+
$ rbbt check_bgzf <filename.bgz>
|
13
|
+
|
14
|
+
-h--help Print this help
|
15
|
+
|
16
|
+
EOF
|
17
|
+
|
18
|
+
rbbt_usage and exit 0 if defined? rbbt_usage and (options[:help] or ARGV.empty?)
|
19
|
+
|
20
|
+
file = ARGV.shift
|
21
|
+
|
22
|
+
begin
|
23
|
+
Open.read(file)
|
24
|
+
rescue Exception
|
25
|
+
puts Log.color :red, "#{ file } is Broken"
|
26
|
+
exit -1
|
27
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.14.
|
4
|
+
version: 5.14.29
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -176,6 +176,7 @@ files:
|
|
176
176
|
- lib/rbbt/tsv/filter.rb
|
177
177
|
- lib/rbbt/tsv/index.rb
|
178
178
|
- lib/rbbt/tsv/manipulate.rb
|
179
|
+
- lib/rbbt/tsv/matrix.rb
|
179
180
|
- lib/rbbt/tsv/melt.rb
|
180
181
|
- lib/rbbt/tsv/parallel.rb
|
181
182
|
- lib/rbbt/tsv/parallel/through.rb
|
@@ -254,6 +255,7 @@ files:
|
|
254
255
|
- share/rbbt_commands/benchmark/pthrough
|
255
256
|
- share/rbbt_commands/benchmark/throughput
|
256
257
|
- share/rbbt_commands/benchmark/tsv
|
258
|
+
- share/rbbt_commands/check_bgzf
|
257
259
|
- share/rbbt_commands/color
|
258
260
|
- share/rbbt_commands/conf/web_user/add
|
259
261
|
- share/rbbt_commands/conf/web_user/list
|