rbbt-util 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
data/bin/rbbt_query.rb CHANGED
@@ -9,7 +9,7 @@ options = SOPT.get("-i--identifiers*:-f--format*:-o--organism*:-p--persistence:-
9
9
  file = ARGV[0]
10
10
 
11
11
  if not File.exists? file
12
- base, path = file.match(/(.*)?\.(.*)/).values_at 1, 2
12
+ base, path = file.match(/([^.]*)\.(.*)/).values_at 1, 2
13
13
  require 'rbbt/sources/' << base.to_s.downcase
14
14
  klass = Misc.string2const base
15
15
  file = klass[path].find
data/lib/rbbt/util/cmd.rb CHANGED
@@ -3,54 +3,74 @@ require 'rbbt/util/log'
3
3
  require 'stringio'
4
4
 
5
5
  module CMD
6
- class CMDError < RBBTError; end
7
6
 
7
+ class CMDError < RBBTError; end
8
8
  module SmartIO
9
- def self.tie(io, pid = nil, cmd = "", post = nil)
10
- io.instance_eval{
11
- @pid = pid
12
- @cmd = cmd
13
- @post = post
14
- alias original_close close
15
- def close
16
- begin
17
- self.original_read unless self.closed? or self.eof?
18
- Process.waitpid(@pid) if @pid
19
- rescue
20
- end
21
-
22
- if $? and not $?.success?
23
- Log.debug "Raising exception"
24
- exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
25
- raise exception
26
- end
9
+ attr_accessor :pid, :cmd, :post, :in, :out, :err
10
+ def self.tie(io, pid = nil, cmd = "", post = nil, sin = nil, out = nil, err = nil)
11
+ io.extend SmartIO
12
+ io.pid = pid
13
+ io.cmd = cmd
14
+ io.in = sin
15
+ io.out = out
16
+ io.err = err
17
+ io.post = post
18
+
19
+ io.class.send(:alias_method, :original_close, :close)
20
+ io.class.send(:alias_method, :original_read, :read)
21
+ io
22
+ end
27
23
 
28
- @post.call if @post
29
- original_close
24
+ def wait_and_status
25
+ if @pid
26
+ begin
27
+ Process.waitpid(@pid)
28
+ rescue
30
29
  end
31
30
 
32
- def force_close
33
- if @pid
34
- Log.debug "Forcing close by killing '#{@pid}'"
35
- Process.kill("KILL", @pid)
36
- Process.waitpid(@pid)
37
- end
38
- @post.call if @post
31
+ Log.debug "Process #{ cmd } succeded" if $? and $?.success?
32
+
33
+ if $? and not $?.success?
34
+ Log.debug "Raising exception"
35
+ exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
39
36
  original_close
37
+ raise exception
40
38
  end
41
-
42
- alias original_read read
43
- def read
44
- data = Misc.fixutf8(original_read)
45
- self.close unless self.closed?
46
- data
47
- end
39
+ end
40
+ end
48
41
 
49
- }
50
- io
42
+ def close
43
+ self.original_read unless self.eof?
44
+
45
+ wait_and_status
46
+
47
+ @post.call if @post
48
+
49
+ original_close unless self.closed?
50
+ end
51
+
52
+ def force_close
53
+ if @pid
54
+ Log.debug "Forcing close by killing '#{@pid}'"
55
+ Process.kill("KILL", @pid)
56
+ Process.waitpid(@pid)
57
+ end
58
+
59
+ @post.call if @post
60
+
61
+ original_close
62
+ end
63
+
64
+ def read(*args)
65
+ data = original_read(*args)
66
+
67
+ self.close if self.eof?
68
+
69
+ data
51
70
  end
52
71
 
53
- end
72
+ end
73
+
54
74
 
55
75
  def self.process_cmd_options(options = {})
56
76
  string = ""
@@ -91,79 +111,107 @@ module CMD
91
111
  cmd << " " << cmd_options
92
112
  end
93
113
 
94
- sout, serr = IO.pipe, IO.pipe
95
-
96
- case
97
- when (false and (IO === in_content and not StringIO === in_content))
98
- sin = [in_content, nil]
99
- else
100
- sin = IO.pipe
101
- end
114
+ in_content = StringIO.new in_content if String === in_content
102
115
 
116
+ sout, serr, sin = IO.pipe, IO.pipe, IO.pipe
103
117
 
104
118
  pid = fork {
105
119
  begin
120
+ sin.last.close
121
+ sout.first.close
122
+ serr.first.close
123
+
124
+ io = in_content
125
+ while IO === io
126
+ if SmartIO === io
127
+ io.original_close unless io.closed?
128
+ io.out.close unless io.out.nil? or io.out.closed?
129
+ io.err.close unless io.err.nil? or io.err.closed?
130
+ io = io.in
131
+ else
132
+ io.close unless io.closed?
133
+ io = nil
134
+ end
135
+ end
106
136
 
107
- sin.last.close if sin.last
108
137
  STDIN.reopen sin.first
109
138
  sin.first.close
110
139
 
111
- serr.first.close
112
140
  STDERR.reopen serr.last
113
141
  serr.last.close
114
142
 
115
- sout.first.close
116
143
  STDOUT.reopen sout.last
117
144
  sout.last.close
118
145
 
119
146
  STDOUT.sync = STDERR.sync = true
147
+
120
148
  exec(cmd)
149
+
150
+ exit(-1)
121
151
  rescue Exception
152
+ Log.debug("CMDError: #{$!.message}")
153
+ ddd $!.backtrace
122
154
  raise CMDError, $!.message
123
155
  end
124
156
  }
157
+
125
158
  sin.first.close
126
159
  sout.last.close
127
160
  serr.last.close
128
161
 
162
+ sin = sin.last
163
+ sout = sout.first
164
+ serr = serr.first
165
+
129
166
 
130
167
  Log.debug "CMD: [#{pid}] #{cmd}"
131
168
 
132
- case
133
- when String === in_content
134
- sin.last.write in_content
135
- sin.last.close
136
- when in_content.respond_to?(:gets)
169
+ if in_content.respond_to?(:read)
137
170
  Thread.new do
138
- while not in_content.eof?
139
- sin.last.write in_content.gets
171
+ begin
172
+ loop do
173
+ break if in_content.closed?
174
+ block = in_content.read 1024
175
+ break if block.nil? or block.empty?
176
+ sin.write block
177
+ end
178
+
179
+ sin.close unless sin.closed?
180
+ in_content.close unless in_content.closed?
181
+ rescue
182
+ Process.kill "INT", pid
183
+ raise $!
140
184
  end
141
- sin.last.close
142
185
  end
186
+ else
187
+ sin.close
143
188
  end
144
189
 
145
190
  if pipe
146
191
  Thread.new do
147
- while l = serr.first.gets
148
- Log.log l, stderr if Integer === stderr
192
+ while line = serr.gets
193
+ Log.log line, stderr if Integer === stderr
149
194
  end
150
- serr.first.close
195
+ serr.close
196
+ Thread.exit
151
197
  end
152
198
 
153
- SmartIO.tie sout.first, pid, cmd, post
154
- sout.first
199
+ SmartIO.tie sout, pid, cmd, post, in_content, sin, serr
155
200
 
201
+ sout
156
202
  else
157
203
  err = ""
158
204
  Thread.new do
159
- while l = serr.first.gets
160
- err << l if Integer === stderr
205
+ while not serr.eof?
206
+ err << serr.gets if Integer === stderr
161
207
  end
162
- serr.first.close
208
+ serr.close
209
+ Thread.exit
163
210
  end
164
211
 
165
- out = StringIO.new sout.first.read
166
- SmartIO.tie out, pid, cmd, post
212
+ out = StringIO.new sout.read
213
+ sout.close unless sout.closed?
214
+ SmartIO.tie out, pid, cmd, post, in_content, sin, serr
167
215
 
168
216
  Process.waitpid pid
169
217
 
@@ -1,17 +1,16 @@
1
-
2
1
  class FixWidthTable
3
2
 
4
3
  attr_accessor :filename, :file, :value_size, :record_size, :range, :size
5
4
  def initialize(filename, value_size = nil, range = nil, update = false)
6
5
  @filename = filename
7
6
 
8
- if update or %(memmory stringio).include?(filename.to_s.downcase) or not File.exists? filename
7
+ if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists? filename
9
8
  Log.debug "Writing FixWidthTable at #{ @filename.inspect }"
10
9
  @value_size = value_size
11
10
  @range = range
12
11
  @record_size = @value_size + (@range ? 12 : 4)
13
12
 
14
- if %(memory stringio).include? filename.to_s.downcase
13
+ if %w(memory stringio).include? filename.to_s.downcase
15
14
  @file = StringIO.new
16
15
  else
17
16
  FileUtils.rm @filename if File.exists? @filename
@@ -31,6 +30,20 @@ class FixWidthTable
31
30
  end
32
31
  end
33
32
 
33
+
34
+ CONNECTIONS = {} unless defined? CONNECTIONS
35
+ def self.get(filename, value_size = nil, range = nil, update = false)
36
+ return self.new(filename, value_size, range, update) if filename == :memory
37
+ case
38
+ when (!File.exists?(filename) or update)
39
+ CONNECTIONS[filename] = self.new(filename, value_size, range, update)
40
+ when (not CONNECTIONS.include?(filename))
41
+ CONNECTIONS[filename] = self.new(filename, value_size, range, update)
42
+ end
43
+
44
+ CONNECTIONS[filename]
45
+ end
46
+
34
47
  def format(pos, value)
35
48
  padding = value_size - value.length
36
49
  if range
@@ -82,6 +95,7 @@ class FixWidthTable
82
95
  end
83
96
 
84
97
  def read
98
+ return if @filename == :memory
85
99
  @file.close unless @file.closed?
86
100
  @file = File.open(@filename, 'r')
87
101
  end
@@ -207,6 +221,7 @@ class FixWidthTable
207
221
  end
208
222
 
209
223
  def [](pos)
224
+ return [] if size == 0
210
225
  if range
211
226
  get_range(pos)
212
227
  else
@@ -33,13 +33,83 @@ end
33
33
  module Misc
34
34
  class FieldNotFoundError < StandardError;end
35
35
 
36
+ def self.intersect_sorted_arrays(a1, a2)
37
+ e1, e2 = a1.shift, a2.shift
38
+ intersect = []
39
+ while true
40
+ case
41
+ when (e1 and e2)
42
+ case e1 <=> e2
43
+ when 0
44
+ intersect << e1
45
+ e1, e2 = a1.shift, a2.shift
46
+ when -1
47
+ e1 = a1.shift
48
+ when 1
49
+ e2 = a2.shift
50
+ end
51
+ else
52
+ break
53
+ end
54
+ end
55
+ intersect
56
+ end
57
+
58
+ def self.merge_sorted_arrays(a1, a2)
59
+ e1, e2 = a1.shift, a2.shift
60
+ new = []
61
+ while true
62
+ case
63
+ when (e1 and e2)
64
+ case e1 <=> e2
65
+ when 0
66
+ new << e1
67
+ e1, e2 = a1.shift, a2.shift
68
+ when -1
69
+ new << e1
70
+ e1 = a1.shift
71
+ when 1
72
+ new << e2
73
+ e2 = a2.shift
74
+ end
75
+ when e2
76
+ new << e2
77
+ new.concat a2
78
+ break
79
+ when e1
80
+ new << e1
81
+ new.concat a1
82
+ break
83
+ else
84
+ break
85
+ end
86
+ end
87
+ new
88
+ end
89
+
90
+ def self.digest(text)
91
+ Digest::MD5.hexdigest(text)
92
+ end
93
+
94
+ def self.add_method(object, method_name, &block)
95
+ class << object
96
+ self
97
+ end.send :define_method, method_name, block
98
+ end
99
+
100
+ def self.redefine_method(object, old_method, new_method_name, &block)
101
+ metaclass = class << object; self end
102
+ metaclass.send :alias_method, new_method_name, old_method
103
+ metaclass.send :define_method, old_method, &block
104
+ end
105
+
36
106
  def self.filename?(filename)
37
107
  String === filename and filename.length < 1024 and filename.index("\n").nil? and File.exists? filename
38
108
  end
39
109
 
40
110
  def self.lock(file, *args)
41
111
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists? File.dirname(File.expand_path(file))
42
- lockfile = Lockfile.new file + '.lock'
112
+ lockfile = Lockfile.new(file + '.lock')
43
113
  lockfile.lock do
44
114
  yield file, *args
45
115
  end
@@ -104,6 +174,24 @@ module Misc
104
174
  counts
105
175
  end
106
176
 
177
+ def self.benchmark(repeats = 1)
178
+ require 'benchmark'
179
+ res = nil
180
+ begin
181
+ measure = Benchmark.measure do
182
+ repeats.times do
183
+ res = yield
184
+ end
185
+ end
186
+ puts "Benchmark for #{ repeats } repeats"
187
+ puts measure
188
+ rescue Exception
189
+ puts "Benchmark aborted"
190
+ raise $!
191
+ end
192
+ res
193
+ end
194
+
107
195
  def self.profile
108
196
  require 'ruby-prof'
109
197
  RubyProf.start
@@ -279,7 +367,8 @@ module Misc
279
367
  # Divides the array into +num+ chunks of the same size by placing one
280
368
  # element in each chunk iteratively.
281
369
  def self.divide(array, num)
282
- chunks = [[]] * num
370
+ chunks = []
371
+ num.times do chunks << [] end
283
372
  array.each_with_index{|e, i|
284
373
  c = i % num
285
374
  chunks[c] << e
@@ -287,6 +376,15 @@ module Misc
287
376
  chunks
288
377
  end
289
378
 
379
+
380
+
381
+ def self.process_to_hash(list)
382
+ result = yield list
383
+ hash = {}
384
+ list.zip(result).each do |k,v| hash[k] = v end
385
+ hash
386
+ end
387
+
290
388
  IUPAC2BASE = {
291
389
  "A" => ["A"],
292
390
  "C" => ["C"],
@@ -324,9 +422,9 @@ module PDF2Text
324
422
  require 'rbbt/util/cmd'
325
423
  require 'rbbt/util/tmpfile'
326
424
  require 'rbbt/util/open'
327
- TmpFile.with_file(Open.read(filename)) do |pdf|
328
- CMD.cmd("pdftotext #{pdf} -", :pipe => false, :stderr => true)
329
- end
425
+
426
+
427
+ CMD.cmd("pdftotext - -", :in => Open.open(filename, :nocache => true), :pipe => true, :stderr => true)
330
428
  end
331
429
  end
332
430
 
@@ -381,7 +479,7 @@ class NamedArray < Array
381
479
  def zip_fields
382
480
  return [] if self.empty?
383
481
  zipped = self[0].zip(*self[1..-1])
384
- zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
482
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)}
385
483
  zipped
386
484
  end
387
485
 
@@ -400,6 +498,7 @@ class NamedArray < Array
400
498
  "* #{ field }: #{ Array === value ? value * "|" : value }"
401
499
  end * "\n"
402
500
  end
501
+
403
502
  end
404
503
 
405
504
  def benchmark(bench = true)
@@ -430,3 +529,4 @@ def profile(prof = true)
430
529
  yield
431
530
  end
432
531
  end
532
+