rbbt-util 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
data/bin/rbbt_query.rb CHANGED
@@ -9,7 +9,7 @@ options = SOPT.get("-i--identifiers*:-f--format*:-o--organism*:-p--persistence:-
9
9
  file = ARGV[0]
10
10
 
11
11
  if not File.exists? file
12
- base, path = file.match(/(.*)?\.(.*)/).values_at 1, 2
12
+ base, path = file.match(/([^.]*)\.(.*)/).values_at 1, 2
13
13
  require 'rbbt/sources/' << base.to_s.downcase
14
14
  klass = Misc.string2const base
15
15
  file = klass[path].find
data/lib/rbbt/util/cmd.rb CHANGED
@@ -3,54 +3,74 @@ require 'rbbt/util/log'
3
3
  require 'stringio'
4
4
 
5
5
  module CMD
6
- class CMDError < RBBTError; end
7
6
 
7
+ class CMDError < RBBTError; end
8
8
  module SmartIO
9
- def self.tie(io, pid = nil, cmd = "", post = nil)
10
- io.instance_eval{
11
- @pid = pid
12
- @cmd = cmd
13
- @post = post
14
- alias original_close close
15
- def close
16
- begin
17
- self.original_read unless self.closed? or self.eof?
18
- Process.waitpid(@pid) if @pid
19
- rescue
20
- end
21
-
22
- if $? and not $?.success?
23
- Log.debug "Raising exception"
24
- exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
25
- raise exception
26
- end
9
+ attr_accessor :pid, :cmd, :post, :in, :out, :err
10
+ def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
11
+ io.extend SmartIO
12
+ io.pid = pid
13
+ io.cmd = cmd
14
+ io.in = sin
15
+ io.out = out
16
+ io.err = err
17
+ io.post = post
18
+
19
+ io.class.send(:alias_method, :original_close, :close)
20
+ io.class.send(:alias_method, :original_read, :read)
21
+ io
22
+ end
27
23
 
28
- @post.call if @post
29
- original_close
24
+ def wait_and_status
25
+ if @pid
26
+ begin
27
+ Process.waitpid(@pid)
28
+ rescue
30
29
  end
31
30
 
32
- def force_close
33
- if @pid
34
- Log.debug "Forcing close by killing '#{@pid}'"
35
- Process.kill("KILL", @pid)
36
- Process.waitpid(@pid)
37
- end
38
- @post.call if @post
31
+ Log.debug "Process #{ cmd } succeded" if $? and $?.success?
32
+
33
+ if $? and not $?.success?
34
+ Log.debug "Raising exception"
35
+ exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
39
36
  original_close
37
+ raise exception
40
38
  end
41
-
42
- alias original_read read
43
- def read
44
- data = Misc.fixutf8(original_read)
45
- self.close unless self.closed?
46
- data
47
- end
39
+ end
40
+ end
48
41
 
49
- }
50
- io
42
+ def close
43
+ self.original_read unless self.eof?
44
+
45
+ wait_and_status
46
+
47
+ @post.call if @post
48
+
49
+ original_close unless self.closed?
50
+ end
51
+
52
+ def force_close
53
+ if @pid
54
+ Log.debug "Forcing close by killing '#{@pid}'"
55
+ Process.kill("KILL", @pid)
56
+ Process.waitpid(@pid)
57
+ end
58
+
59
+ @post.call if @post
60
+
61
+ original_close
62
+ end
63
+
64
+ def read(*args)
65
+ data = original_read(*args)
66
+
67
+ self.close if self.eof?
68
+
69
+ data
51
70
  end
52
71
 
53
- end
72
+ end
73
+
54
74
 
55
75
  def self.process_cmd_options(options = {})
56
76
  string = ""
@@ -91,79 +111,107 @@ module CMD
91
111
  cmd << " " << cmd_options
92
112
  end
93
113
 
94
- sout, serr = IO.pipe, IO.pipe
95
-
96
- case
97
- when (false and (IO === in_content and not StringIO === in_content))
98
- sin = [in_content, nil]
99
- else
100
- sin = IO.pipe
101
- end
114
+ in_content = StringIO.new in_content if String === in_content
102
115
 
116
+ sout, serr, sin = IO.pipe, IO.pipe, IO.pipe
103
117
 
104
118
  pid = fork {
105
119
  begin
120
+ sin.last.close
121
+ sout.first.close
122
+ serr.first.close
123
+
124
+ io = in_content
125
+ while IO === io
126
+ if SmartIO === io
127
+ io.original_close unless io.closed?
128
+ io.out.close unless io.out.nil? or io.out.closed?
129
+ io.err.close unless io.err.nil? or io.err.closed?
130
+ io = io.in
131
+ else
132
+ io.close unless io.closed?
133
+ io = nil
134
+ end
135
+ end
106
136
 
107
- sin.last.close if sin.last
108
137
  STDIN.reopen sin.first
109
138
  sin.first.close
110
139
 
111
- serr.first.close
112
140
  STDERR.reopen serr.last
113
141
  serr.last.close
114
142
 
115
- sout.first.close
116
143
  STDOUT.reopen sout.last
117
144
  sout.last.close
118
145
 
119
146
  STDOUT.sync = STDERR.sync = true
147
+
120
148
  exec(cmd)
149
+
150
+ exit(-1)
121
151
  rescue Exception
152
+ Log.debug("CMDError: #{$!.message}")
153
+ ddd $!.backtrace
122
154
  raise CMDError, $!.message
123
155
  end
124
156
  }
157
+
125
158
  sin.first.close
126
159
  sout.last.close
127
160
  serr.last.close
128
161
 
162
+ sin = sin.last
163
+ sout = sout.first
164
+ serr = serr.first
165
+
129
166
 
130
167
  Log.debug "CMD: [#{pid}] #{cmd}"
131
168
 
132
- case
133
- when String === in_content
134
- sin.last.write in_content
135
- sin.last.close
136
- when in_content.respond_to?(:gets)
169
+ if in_content.respond_to?(:read)
137
170
  Thread.new do
138
- while not in_content.eof?
139
- sin.last.write in_content.gets
171
+ begin
172
+ loop do
173
+ break if in_content.closed?
174
+ block = in_content.read 1024
175
+ break if block.nil? or block.empty?
176
+ sin.write block
177
+ end
178
+
179
+ sin.close unless sin.closed?
180
+ in_content.close unless in_content.closed?
181
+ rescue
182
+ Process.kill "INT", pid
183
+ raise $!
140
184
  end
141
- sin.last.close
142
185
  end
186
+ else
187
+ sin.close
143
188
  end
144
189
 
145
190
  if pipe
146
191
  Thread.new do
147
- while l = serr.first.gets
148
- Log.log l, stderr if Integer === stderr
192
+ while line = serr.gets
193
+ Log.log line, stderr if Integer === stderr
149
194
  end
150
- serr.first.close
195
+ serr.close
196
+ Thread.exit
151
197
  end
152
198
 
153
- SmartIO.tie sout.first, pid, cmd, post
154
- sout.first
199
+ SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
155
200
 
201
+ sout
156
202
  else
157
203
  err = ""
158
204
  Thread.new do
159
- while l = serr.first.gets
160
- err << l if Integer === stderr
205
+ while not serr.eof?
206
+ err << serr.gets if Integer === stderr
161
207
  end
162
- serr.first.close
208
+ serr.close
209
+ Thread.exit
163
210
  end
164
211
 
165
- out = StringIO.new sout.first.read
166
- SmartIO.tie out, pid, cmd, post
212
+ out = StringIO.new sout.read
213
+ sout.close unless sout.closed?
214
+ SmartIO.tie out, pid, cmd, post, in_content, sin, serr
167
215
 
168
216
  Process.waitpid pid
169
217
 
@@ -1,17 +1,16 @@
1
-
2
1
  class FixWidthTable
3
2
 
4
3
  attr_accessor :filename, :file, :value_size, :record_size, :range, :size
5
4
  def initialize(filename, value_size = nil, range = nil, update = false)
6
5
  @filename = filename
7
6
 
8
- if update or %(memmory stringio).include?(filename.to_s.downcase) or not File.exists? filename
7
+ if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists? filename
9
8
  Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
10
9
  @value_size = value_size
11
10
  @range = range
12
11
  @record_size = @value_size + (@range ? 12 : 4)
13
12
 
14
- if %(memory stringio).include? filename.to_s.downcase
13
+ if %w(memory stringio).include? filename.to_s.downcase
15
14
  @file = StringIO.new
16
15
  else
17
16
  FileUtils.rm @filename if File.exists? @filename
@@ -31,6 +30,20 @@ class FixWidthTable
31
30
  end
32
31
  end
33
32
 
33
+
34
+ CONNECTIONS = {} unless defined? CONNECTIONS
35
+ def self.get(filename, value_size = nil, range = nil, update = false)
36
+ return self.new(filename, value_size, range, update) if filename == :memory
37
+ case
38
+ when (!File.exists?(filename) or update)
39
+ CONNECTIONS[filename] = self.new(filename, value_size, range, update)
40
+ when (not CONNECTIONS.include?(filename))
41
+ CONNECTIONS[filename] = self.new(filename, value_size, range, update)
42
+ end
43
+
44
+ CONNECTIONS[filename]
45
+ end
46
+
34
47
  def format(pos, value)
35
48
  padding = value_size - value.length
36
49
  if range
@@ -82,6 +95,7 @@ class FixWidthTable
82
95
  end
83
96
 
84
97
  def read
98
+ return if @filename == :memory
85
99
  @file.close unless @file.closed?
86
100
  @file = File.open(@filename, 'r')
87
101
  end
@@ -207,6 +221,7 @@ class FixWidthTable
207
221
  end
208
222
 
209
223
  def [](pos)
224
+ return [] if size == 0
210
225
  if range
211
226
  get_range(pos)
212
227
  else
@@ -33,13 +33,83 @@ end
33
33
  module Misc
34
34
  class FieldNotFoundError < StandardError;end
35
35
 
36
+ def self.intersect_sorted_arrays(a1, a2)
37
+ e1, e2 = a1.shift, a2.shift
38
+ intersect = []
39
+ while true
40
+ case
41
+ when (e1 and e2)
42
+ case e1 <=> e2
43
+ when 0
44
+ intersect << e1
45
+ e1, e2 = a1.shift, a2.shift
46
+ when -1
47
+ e1 = a1.shift
48
+ when 1
49
+ e2 = a2.shift
50
+ end
51
+ else
52
+ break
53
+ end
54
+ end
55
+ intersect
56
+ end
57
+
58
+ def self.merge_sorted_arrays(a1, a2)
59
+ e1, e2 = a1.shift, a2.shift
60
+ new = []
61
+ while true
62
+ case
63
+ when (e1 and e2)
64
+ case e1 <=> e2
65
+ when 0
66
+ new << e1
67
+ e1, e2 = a1.shift, a2.shift
68
+ when -1
69
+ new << e1
70
+ e1 = a1.shift
71
+ when 1
72
+ new << e2
73
+ e2 = a2.shift
74
+ end
75
+ when e2
76
+ new << e2
77
+ new.concat a2
78
+ break
79
+ when e1
80
+ new << e1
81
+ new.concat a1
82
+ break
83
+ else
84
+ break
85
+ end
86
+ end
87
+ new
88
+ end
89
+
90
+ def self.digest(text)
91
+ Digest::MD5.hexdigest(text)
92
+ end
93
+
94
+ def self.add_method(object, method_name, &block)
95
+ class << object
96
+ self
97
+ end.send :define_method, method_name, block
98
+ end
99
+
100
+ def self.redefine_method(object, old_method, new_method_name, &block)
101
+ metaclass = class << object; self end
102
+ metaclass.send :alias_method, new_method_name, old_method
103
+ metaclass.send :define_method, old_method, &block
104
+ end
105
+
36
106
  def self.filename?(filename)
37
107
  String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
38
108
  end
39
109
 
40
110
  def self.lock(file, *args)
41
111
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
42
- lockfile = Lockfile.new file + '.lock'
112
+ lockfile = Lockfile.new(file + '.lock')
43
113
  lockfile.lock do
44
114
  yield file, *args
45
115
  end
@@ -104,6 +174,24 @@ module Misc
104
174
  counts
105
175
  end
106
176
 
177
+ def self.benchmark(repeats = 1)
178
+ require 'benchmark'
179
+ res = nil
180
+ begin
181
+ measure = Benchmark.measure do
182
+ repeats.times do
183
+ res = yield
184
+ end
185
+ end
186
+ puts "Benchmark for #{ repeats } repeats"
187
+ puts measure
188
+ rescue Exception
189
+ puts "Benchmark aborted"
190
+ raise $!
191
+ end
192
+ res
193
+ end
194
+
107
195
  def self.profile
108
196
  require 'ruby-prof'
109
197
  RubyProf.start
@@ -279,7 +367,8 @@ module Misc
279
367
  # Divides the array into +num+ chunks of the same size by placing one
280
368
  # element in each chunk iteratively.
281
369
  def self.divide(array, num)
282
- chunks = [[]] * num
370
+ chunks = []
371
+ num.times do chunks << [] end
283
372
  array.each_with_index{|e, i|
284
373
  c = i % num
285
374
  chunks[c] << e
@@ -287,6 +376,15 @@ module Misc
287
376
  chunks
288
377
  end
289
378
 
379
+
380
+
381
+ def self.process_to_hash(list)
382
+ result = yield list
383
+ hash = {}
384
+ list.zip(result).each do |k,v| hash[k] = v end
385
+ hash
386
+ end
387
+
290
388
  IUPAC2BASE = {
291
389
  "A" => ["A"],
292
390
  "C" => ["C"],
@@ -324,9 +422,9 @@ module PDF2Text
324
422
  require 'rbbt/util/cmd'
325
423
  require 'rbbt/util/tmpfile'
326
424
  require 'rbbt/util/open'
327
- TmpFile.with_file(Open.read(filename)) do |pdf|
328
- CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
329
- end
425
+
426
+
427
+ CMD.cmd("pdftotext - -", :in => Open.open(filename, :nocache => true), :pipe => true, :stderr => true)
330
428
  end
331
429
  end
332
430
 
@@ -381,7 +479,7 @@ class NamedArray < Array
381
479
  def zip_fields
382
480
  return [] if self.empty?
383
481
  zipped = self[0].zip(*self[1..-1])
384
- zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
482
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)}
385
483
  zipped
386
484
  end
387
485
 
@@ -400,6 +498,7 @@ class NamedArray < Array
400
498
  "* #{ field }: #{ Array === value ? value * "|" : value }"
401
499
  end * "\n"
402
500
  end
501
+
403
502
  end
404
503
 
405
504
  def benchmark(bench = true)
@@ -430,3 +529,4 @@ def profile(prof = true)
430
529
  yield
431
530
  end
432
531
  end
532
+