rbbt-util 5.14.28 → 5.14.29
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/association/index.rb +6 -2
- data/lib/rbbt/association.rb +13 -4
- data/lib/rbbt/knowledge_base.rb +15 -13
- data/lib/rbbt/persist.rb +2 -1
- data/lib/rbbt/resource/rake.rb +2 -1
- data/lib/rbbt/resource.rb +6 -1
- data/lib/rbbt/tsv/accessor.rb +1 -1
- data/lib/rbbt/tsv/attach.rb +1 -1
- data/lib/rbbt/tsv/dumper.rb +1 -0
- data/lib/rbbt/tsv/manipulate.rb +3 -6
- data/lib/rbbt/tsv/matrix.rb +20 -0
- data/lib/rbbt/tsv/parser.rb +7 -2
- data/lib/rbbt/tsv/util.rb +1 -1
- data/lib/rbbt/tsv.rb +1 -0
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +4 -3
- data/lib/rbbt/util/log.rb +1 -1
- data/lib/rbbt/util/misc/bgzf.rb +6 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +6 -4
- data/lib/rbbt/util/open.rb +1 -1
- data/lib/rbbt/workflow.rb +4 -0
- data/share/rbbt_commands/check_bgzf +27 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ed65d89a02680821214ab3141ef167565045291
|
4
|
+
data.tar.gz: 95402199abefb0228dc7788731a1c2b0487c9bf6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8643b977a908ea2447d070aef682d4f5bafc3360f380cbc3b43be992ba6bad689cfba5ee2ed546fce64672f133fb5f2676b13e4088d6a60f0c91ba2bef741108
|
7
|
+
data.tar.gz: 25b4bf073c68ee563eb17d77b00af6a38f9d25d10520f834167bb94e6b8daea866c048264f97a9b6afc2917d7d32b16c4b41a52aa123176541b3e84f8bd83b3b
|
@@ -70,7 +70,7 @@ module Association
|
|
70
70
|
source_type = Entity.formats[source_field]
|
71
71
|
target_type = Entity.formats[target_field]
|
72
72
|
|
73
|
-
source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
|
73
|
+
source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
|
74
74
|
target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]
|
75
75
|
|
76
76
|
[source_entities, target_entities]
|
@@ -89,7 +89,11 @@ module Association
|
|
89
89
|
end
|
90
90
|
|
91
91
|
matches = source.uniq.inject([]){|acc,e|
|
92
|
-
|
92
|
+
if block_given?
|
93
|
+
acc.concat(match(e))
|
94
|
+
else
|
95
|
+
acc.concat(match(e))
|
96
|
+
end
|
93
97
|
}
|
94
98
|
|
95
99
|
return matches if target == :all or target == "all"
|
data/lib/rbbt/association.rb
CHANGED
@@ -133,12 +133,13 @@ module Association
|
|
133
133
|
target_field = tsv.fields.first
|
134
134
|
|
135
135
|
if source_final_format and source_field != source_final_format and
|
136
|
-
Entity.formats[source_field]
|
137
|
-
|
136
|
+
Entity.formats[source_field]
|
137
|
+
|
138
138
|
Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")
|
139
139
|
|
140
140
|
tsv.with_unnamed do
|
141
|
-
|
141
|
+
identifiers = tsv.identifiers || Organism.identifiers(tsv.namespace)
|
142
|
+
tsv = tsv.change_key source_final_format, :identifiers => identifiers, :persist => true
|
142
143
|
end
|
143
144
|
end
|
144
145
|
|
@@ -221,7 +222,14 @@ module Association
|
|
221
222
|
|
222
223
|
case file
|
223
224
|
when Proc
|
224
|
-
|
225
|
+
res = file.call
|
226
|
+
tsv = case res
|
227
|
+
when TSV, Path
|
228
|
+
return load_tsv(res, options)
|
229
|
+
else
|
230
|
+
tsv = TSV.open(res, :unnamed => true)
|
231
|
+
return load_tsv(tsv, options)
|
232
|
+
end
|
225
233
|
when TSV
|
226
234
|
key_field, *fields = all_fields = file.all_fields
|
227
235
|
else
|
@@ -257,6 +265,7 @@ module Association
|
|
257
265
|
tsv = load_tsv(file, options)
|
258
266
|
|
259
267
|
tsv.annotate(data)
|
268
|
+
|
260
269
|
data.serializer = tsv.type
|
261
270
|
|
262
271
|
tsv.with_unnamed do
|
data/lib/rbbt/knowledge_base.rb
CHANGED
@@ -358,7 +358,7 @@ class KnowledgeBase
|
|
358
358
|
end
|
359
359
|
end
|
360
360
|
|
361
|
-
def subset(name, entities)
|
361
|
+
def subset(name, entities, &block)
|
362
362
|
entities = case entities
|
363
363
|
when :all
|
364
364
|
{:target => :all, :source => :all}
|
@@ -374,18 +374,20 @@ class KnowledgeBase
|
|
374
374
|
|
375
375
|
repo = get_index name
|
376
376
|
|
377
|
-
begin
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
377
|
+
matches = begin
|
378
|
+
s = repo.subset_entities(entities, &block)
|
379
|
+
setup(name, s)
|
380
|
+
rescue Exception
|
381
|
+
target = entities[:target]
|
382
|
+
source = entities[:source]
|
383
|
+
if target or source
|
384
|
+
entities[:target] = source
|
385
|
+
entities[:source] = target
|
386
|
+
end
|
387
|
+
setup(name, repo.reverse.subset_entities(entities, &block), true)
|
388
|
+
end
|
389
|
+
|
390
|
+
block_given? ? matches.select(&block) : matches
|
389
391
|
end
|
390
392
|
|
391
393
|
def translate(entities, type)
|
data/lib/rbbt/persist.rb
CHANGED
@@ -218,7 +218,7 @@ module Persist
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def self.get_result(path, type, persist_options, lockfile, &block)
|
221
|
-
res = yield
|
221
|
+
res = yield path
|
222
222
|
stream = res if IO === res
|
223
223
|
stream = res.stream if res.respond_to? :stream
|
224
224
|
|
@@ -317,6 +317,7 @@ module Persist
|
|
317
317
|
if FalseClass === persist_options[:persist]
|
318
318
|
yield
|
319
319
|
else
|
320
|
+
persist_options[:update] ||= true if persist_options[:persist].to_s == "update"
|
320
321
|
other_options = Misc.process_options persist_options, :other
|
321
322
|
path = persistence_path(name, persist_options, other_options || {})
|
322
323
|
|
data/lib/rbbt/resource/rake.rb
CHANGED
data/lib/rbbt/resource.rb
CHANGED
@@ -122,7 +122,12 @@ module Resource
|
|
122
122
|
raise "Resource is missing and does not seem to be claimed: #{ self } -- #{ path } "
|
123
123
|
end
|
124
124
|
|
125
|
-
|
125
|
+
if path.respond_to?(:find)
|
126
|
+
final_path = force ? path.find(:default) : path.find
|
127
|
+
else
|
128
|
+
final_path = path
|
129
|
+
end
|
130
|
+
|
126
131
|
if not File.exists? final_path or force
|
127
132
|
Log.medium "Producing: #{ final_path }"
|
128
133
|
lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -9,7 +9,7 @@ module TSV
|
|
9
9
|
attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
|
10
10
|
|
11
11
|
def info
|
12
|
-
{:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed}.delete_if{|k,v| v.nil? }
|
12
|
+
{:key_field => key_field, :fields => fields, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
|
13
13
|
end
|
14
14
|
|
15
15
|
def annotate(tsv)
|
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -182,7 +182,7 @@ module TSV
|
|
182
182
|
unless TSV === other
|
183
183
|
other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
|
184
184
|
other = TSV.open(other, :persist => options[:persist_input] == true)
|
185
|
-
other.identifiers
|
185
|
+
other.identifiers ||= other_identifier_file
|
186
186
|
end
|
187
187
|
|
188
188
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil? or fields == :all
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -631,10 +631,11 @@ module TSV
|
|
631
631
|
self
|
632
632
|
end
|
633
633
|
|
634
|
-
def transpose(key_field)
|
634
|
+
def transpose(key_field="Unkown ID")
|
635
635
|
raise "Transposing only works for TSVs of type :list" unless type == :list
|
636
636
|
new_fields = keys
|
637
|
-
new =
|
637
|
+
new = self.annotate({})
|
638
|
+
TSV.setup(new, :key_field => key_field, :fields => new_fields, :type => type, :filename => filename, :identifiers => identifiers)
|
638
639
|
|
639
640
|
through do |key, values|
|
640
641
|
fields.zip(values) do |new_key, value|
|
@@ -643,10 +644,6 @@ module TSV
|
|
643
644
|
end
|
644
645
|
end
|
645
646
|
|
646
|
-
new.entity_options = entity_options
|
647
|
-
new.entity_templates = entity_templates
|
648
|
-
new.namespace = namespace
|
649
|
-
|
650
647
|
new
|
651
648
|
end
|
652
649
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module TSV
|
2
|
+
def self.read_matrix(tsv, field_format = "ID", value_format = "Value")
|
3
|
+
tsv = TSV.open(tsv) unless TSV === tsv
|
4
|
+
|
5
|
+
key_field, *fields = tsv.all_fields
|
6
|
+
options = tsv.options.merge(:key_field => key_field, :fields => [field_format, value_format], :type => :double, :cast => nil)
|
7
|
+
|
8
|
+
options[:filename] ||= tsv.filename
|
9
|
+
options[:identifiers] ||= tsv.identifier_files.first
|
10
|
+
|
11
|
+
dumper = TSV::Dumper.new(options)
|
12
|
+
|
13
|
+
dumper.init
|
14
|
+
TSV.traverse tsv, :into => dumper do |key, values|
|
15
|
+
[key, [fields, values]]
|
16
|
+
end
|
17
|
+
|
18
|
+
dumper.stream
|
19
|
+
end
|
20
|
+
end
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
2
|
module TSV
|
3
3
|
class Parser
|
4
|
-
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble
|
4
|
+
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers
|
5
5
|
|
6
6
|
class SKIP_LINE < Exception; end
|
7
7
|
class END_PARSING < Exception; end
|
@@ -392,6 +392,8 @@ module TSV
|
|
392
392
|
@type ||= Misc.process_options(options, :type) || :double
|
393
393
|
@type ||= :double
|
394
394
|
|
395
|
+
@identifiers = Misc.process_options(options, :identifiers)
|
396
|
+
|
395
397
|
@filename = Misc.process_options(options, :filename)
|
396
398
|
@filename ||= stream.filename if stream.respond_to? :filename
|
397
399
|
|
@@ -472,6 +474,7 @@ module TSV
|
|
472
474
|
data.fields = @fields
|
473
475
|
data.namespace = @namespace
|
474
476
|
data.filename = @filename
|
477
|
+
data.identifiers = @identifiers
|
475
478
|
data.cast = @cast if Symbol === @cast
|
476
479
|
data
|
477
480
|
end
|
@@ -559,6 +562,8 @@ module TSV
|
|
559
562
|
break
|
560
563
|
end
|
561
564
|
rescue END_PARSING
|
565
|
+
stream.close unless stream.closed?
|
566
|
+
begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
|
562
567
|
break
|
563
568
|
rescue Errno::EPIPE
|
564
569
|
Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
|
@@ -572,7 +577,7 @@ module TSV
|
|
572
577
|
end
|
573
578
|
ensure
|
574
579
|
stream.close unless stream.closed?
|
575
|
-
stream.join if stream.respond_to? :join
|
580
|
+
stream.join if stream.respond_to? :join and not stream.joined?
|
576
581
|
end
|
577
582
|
|
578
583
|
self
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -167,7 +167,7 @@ module TSV
|
|
167
167
|
str = ""
|
168
168
|
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
169
169
|
if fields
|
170
|
-
str << "#" << key_field << sep << fields * sep << "\n"
|
170
|
+
str << "#" << (key_field || "ID") << sep << fields * sep << "\n"
|
171
171
|
end
|
172
172
|
|
173
173
|
str
|
data/lib/rbbt/tsv.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -26,7 +26,7 @@ source('#{UTIL}');
|
|
26
26
|
Log.debug{"R Script:\n#{ cmd }"}
|
27
27
|
|
28
28
|
if options.delete :monitor
|
29
|
-
io = CMD.cmd('R --vanilla
|
29
|
+
io = CMD.cmd('R --vanilla', options.merge(:in => cmd, :pipe => true, :log => true))
|
30
30
|
while line = io.gets
|
31
31
|
puts line
|
32
32
|
end
|
data/lib/rbbt/util/cmd.rb
CHANGED
@@ -104,7 +104,8 @@ module CMD
|
|
104
104
|
pipe = options.delete(:pipe)
|
105
105
|
post = options.delete(:post)
|
106
106
|
log = options.delete(:log)
|
107
|
-
|
107
|
+
no_fail = options.delete(:no_fail)
|
108
|
+
dont_close_in = options.delete(:dont_close_in)
|
108
109
|
|
109
110
|
log = true if log.nil?
|
110
111
|
|
@@ -205,7 +206,7 @@ module CMD
|
|
205
206
|
end
|
206
207
|
|
207
208
|
#SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
|
208
|
-
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true
|
209
|
+
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true, :no_fail => no_fail
|
209
210
|
|
210
211
|
sout
|
211
212
|
else
|
@@ -217,7 +218,7 @@ module CMD
|
|
217
218
|
serr.close
|
218
219
|
end
|
219
220
|
|
220
|
-
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true
|
221
|
+
ConcurrentStream.setup sout, :pids => [pid], :autojoin => true, :no_fail => no_fail
|
221
222
|
out = StringIO.new sout.read
|
222
223
|
sout.close unless sout.closed?
|
223
224
|
|
data/lib/rbbt/util/log.rb
CHANGED
data/lib/rbbt/util/misc/bgzf.rb
CHANGED
@@ -38,7 +38,12 @@ module Bgzf
|
|
38
38
|
pos = 0
|
39
39
|
while true do
|
40
40
|
blockdata_offset = tell
|
41
|
-
block =
|
41
|
+
block = begin
|
42
|
+
read_block
|
43
|
+
rescue Exception
|
44
|
+
raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!.message =~ /BGFZ.*expected/
|
45
|
+
raise $!
|
46
|
+
end
|
42
47
|
break unless block
|
43
48
|
index << [pos, blockdata_offset]
|
44
49
|
pos += block.length
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module ConcurrentStream
|
2
|
-
attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined, :aborted, :autojoin, :lockfile
|
2
|
+
attr_accessor :threads, :pids, :callback, :abort_callback, :filename, :joined, :aborted, :autojoin, :lockfile, :no_fail
|
3
3
|
|
4
4
|
def self.setup(stream, options = {}, &block)
|
5
|
-
threads, pids, callback, filename, autojoin, lockfile = Misc.process_options options, :threads, :pids, :callback, :filename, :autojoin, :lockfile
|
5
|
+
threads, pids, callback, filename, autojoin, lockfile, no_fail = Misc.process_options options, :threads, :pids, :callback, :filename, :autojoin, :lockfile, :no_fail
|
6
6
|
stream.extend ConcurrentStream unless ConcurrentStream === stream
|
7
7
|
|
8
8
|
stream.threads ||= []
|
@@ -10,6 +10,7 @@ module ConcurrentStream
|
|
10
10
|
stream.threads.concat(Array === threads ? threads : [threads]) unless threads.nil?
|
11
11
|
stream.pids.concat(Array === pids ? pids : [pids]) unless pids.nil? or pids.empty?
|
12
12
|
stream.autojoin = autojoin
|
13
|
+
stream.no_fail = no_fail
|
13
14
|
|
14
15
|
callback = block if block_given?
|
15
16
|
if stream.callback and callback
|
@@ -60,7 +61,7 @@ module ConcurrentStream
|
|
60
61
|
@pids.each do |pid|
|
61
62
|
begin
|
62
63
|
Process.waitpid(pid, Process::WUNTRACED)
|
63
|
-
raise ProcessFailed.new "Error joining process #{pid} in #{self.inspect}" unless $?.success?
|
64
|
+
raise ProcessFailed.new "Error joining process #{pid} in #{self.inspect}" unless $?.success? or no_fail
|
64
65
|
rescue Errno::ECHILD
|
65
66
|
end
|
66
67
|
end
|
@@ -76,12 +77,13 @@ module ConcurrentStream
|
|
76
77
|
end
|
77
78
|
|
78
79
|
def join
|
80
|
+
@joined = true
|
81
|
+
|
79
82
|
join_threads
|
80
83
|
join_pids
|
81
84
|
|
82
85
|
join_callback
|
83
86
|
|
84
|
-
@joined = true
|
85
87
|
lockfile.unlock if lockfile and lockfile.locked?
|
86
88
|
close unless closed?
|
87
89
|
end
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -132,7 +132,7 @@ module Open
|
|
132
132
|
CMD.cmd("grep #{invert ? '-v' : ''}", "-w" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
|
133
133
|
end
|
134
134
|
else
|
135
|
-
CMD.cmd("grep #{invert ? '-v ' : ''} '#{grep}' -", :in => stream, :pipe => true, :post => proc{stream.force_close if stream.respond_to?
|
135
|
+
CMD.cmd("grep #{invert ? '-v ' : ''} '#{grep}' -", :in => stream, :pipe => true, :post => proc{begin stream.force_close; rescue Exception; end if stream.respond_to?(:force_close)})
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
data/lib/rbbt/workflow.rb
CHANGED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rbbt-util'
|
4
|
+
require 'rbbt/util/simpleopt'
|
5
|
+
|
6
|
+
$0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands
|
7
|
+
|
8
|
+
options = SOPT.setup <<EOF
|
9
|
+
|
10
|
+
Check the integrity of a BGZF file
|
11
|
+
|
12
|
+
$ rbbt check_bgzf <filename.bgz>
|
13
|
+
|
14
|
+
-h--help Print this help
|
15
|
+
|
16
|
+
EOF
|
17
|
+
|
18
|
+
rbbt_usage and exit 0 if defined? rbbt_usage and (options[:help] or ARGV.empty?)
|
19
|
+
|
20
|
+
file = ARGV.shift
|
21
|
+
|
22
|
+
begin
|
23
|
+
Open.read(file)
|
24
|
+
rescue Exception
|
25
|
+
puts Log.color :red, "#{ file } is Broken"
|
26
|
+
exit -1
|
27
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.14.
|
4
|
+
version: 5.14.29
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -176,6 +176,7 @@ files:
|
|
176
176
|
- lib/rbbt/tsv/filter.rb
|
177
177
|
- lib/rbbt/tsv/index.rb
|
178
178
|
- lib/rbbt/tsv/manipulate.rb
|
179
|
+
- lib/rbbt/tsv/matrix.rb
|
179
180
|
- lib/rbbt/tsv/melt.rb
|
180
181
|
- lib/rbbt/tsv/parallel.rb
|
181
182
|
- lib/rbbt/tsv/parallel/through.rb
|
@@ -254,6 +255,7 @@ files:
|
|
254
255
|
- share/rbbt_commands/benchmark/pthrough
|
255
256
|
- share/rbbt_commands/benchmark/throughput
|
256
257
|
- share/rbbt_commands/benchmark/tsv
|
258
|
+
- share/rbbt_commands/check_bgzf
|
257
259
|
- share/rbbt_commands/color
|
258
260
|
- share/rbbt_commands/conf/web_user/add
|
259
261
|
- share/rbbt_commands/conf/web_user/list
|