rbbt-util 3.2.0 → 3.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/util/workflow'
6
+ require 'pp'
7
+
8
+ def usage(task)
9
+ puts task.usage
10
+ exit -1
11
+ end
12
+
13
+ def SOPT_options(task)
14
+ sopt_options = []
15
+ task.option_summary.flatten.each do |info|
16
+ name = info[:name]
17
+ short = name.to_s.chars.first
18
+ boolean = info[:type] == :boolean
19
+
20
+ sopt_options << "-#{short}--#{name}#{boolean ? '' : '*'}"
21
+ end
22
+
23
+ sopt_options * ":"
24
+ end
25
+
26
+ def fix_options(task, job_options)
27
+ option_types = task.option_summary.flatten.inject({}){|types, new| types[new[:name]] = new[:type]; types}
28
+
29
+ job_options_cleaned = {}
30
+
31
+ job_options.each do |name, value|
32
+ value = case
33
+ when option_types[name] == :float
34
+ value.to_f
35
+ when option_types[name] == :integer
36
+ value.to_i
37
+ when option_types[name] == :array
38
+ value.split(/[,|]/)
39
+ when option_types[name] == :tsv
40
+ begin
41
+ if value == '-'
42
+ TSV.new(STDIN).to_s :sort
43
+ else
44
+ TSV.new(value).to_s :sort
45
+ end
46
+ rescue
47
+ value
48
+ end
49
+ else
50
+ value
51
+ end
52
+ job_options_cleaned[name] = value
53
+ end
54
+
55
+ job_options_cleaned
56
+ end
57
+
58
+ options = SOPT.get "-t--task*:-l--log*:-h--help:-n--name:-cl--clean:-rcl-recursive_clean"
59
+
60
+ # Set log, fork, clean, recursive_clean and help
61
+ Log.severity = options[:log].to_i if options.include? :log
62
+ help = !!options.delete(:help)
63
+ do_fork = !!options.delete(:fork)
64
+ clean = !!options.delete(:clean)
65
+ recursive_clean = !!options.delete(:recursive_clean)
66
+
67
+ # Get workflow
68
+ workflow = ARGV.first
69
+ WorkFlow.require_workflow workflow
70
+
71
+ # Set task
72
+ namespace, task = nil, nil
73
+
74
+ case
75
+ when (not options[:task])
76
+ workflow_usage if help
77
+ task = self.last_task
78
+ namespace = self
79
+ when (options[:task] =~ /\./)
80
+ namespace, task = options.delete(:task).split('.')
81
+ namespace = Misc.string2const(namespace)
82
+ else
83
+ task_name = options.delete(:task)
84
+ task = self.tasks[task_name]
85
+ end
86
+
87
+ usage(task) if help
88
+
89
+ name = options.delete(:name) || "Default"
90
+
91
+ # get job args
92
+ sopt_option_string = SOPT_options(task)
93
+ job_options = SOPT.get sopt_option_string
94
+ job_options = fix_options(task, job_options)
95
+
96
+ #- get job
97
+ job = task.job(name, job_options)
98
+
99
+ # clean job
100
+ job.clean if clean
101
+ job.recursive_clean if recursive_clean
102
+
103
+ # run
104
+ if do_fork
105
+ job.fork
106
+ while not job.done?
107
+ puts "#{job.step}: #{job.messages.last}"
108
+ sleep 2
109
+ end
110
+ else
111
+ job.run
112
+ end
113
+
114
+ #- error
115
+ raise job.messages.last if job.error?
116
+
117
+ #print
118
+ pp job.load
data/lib/rbbt-util.rb CHANGED
@@ -5,12 +5,9 @@ require 'rbbt/util/tmpfile'
5
5
  require 'rbbt/util/filecache'
6
6
  require 'rbbt/util/tsv'
7
7
  require 'rbbt/util/persistence'
8
- require 'rbbt/util/bed'
9
- require 'rbbt/util/cachehelper'
10
8
  require 'rbbt/util/misc'
11
9
 
12
10
  FileCache.cachedir = Rbbt.var.cache.filecache.find :user
13
11
  Open.cachedir = Rbbt.var.cache["open-remote"].find :user
14
12
  TmpFile.tmpdir = Rbbt.tmp.find :user
15
13
  Persistence.cachedir = Rbbt.var.cache.persistence.find :user
16
- Bed.cachedir = Rbbt.var.cache["bed-persistence"].find :user
@@ -11,6 +11,7 @@ class FixWidthTable
11
11
  @record_size = @value_size + (@range ? 12 : 4)
12
12
 
13
13
  if %w(memory stringio).include? filename.to_s.downcase
14
+ @filename = :memory
14
15
  @file = StringIO.new
15
16
  else
16
17
  FileUtils.rm @filename if File.exists? @filename
@@ -104,6 +105,12 @@ class FixWidthTable
104
105
  @file.close
105
106
  end
106
107
 
108
+ def dump
109
+ read
110
+ @file.rewind
111
+ @file.read
112
+ end
113
+
107
114
  #{{{ Adding data
108
115
 
109
116
  def add_point(data)
@@ -33,6 +33,16 @@ end
33
33
  module Misc
34
34
  class FieldNotFoundError < StandardError;end
35
35
 
36
+ def self.in_dir(dir)
37
+ old_pwd = FileUtils.pwd
38
+ begin
39
+ FileUtils.cd dir
40
+ yield
41
+ ensure
42
+ FileUtils.cd old_pwd
43
+ end
44
+ end
45
+
36
46
  def self.intersect_sorted_arrays(a1, a2)
37
47
  e1, e2 = a1.shift, a2.shift
38
48
  intersect = []
@@ -98,6 +108,7 @@ module Misc
98
108
  end
99
109
 
100
110
  def self.redefine_method(object, old_method, new_method_name, &block)
111
+ return if object.respond_to? new_method_name
101
112
  metaclass = class << object; self end
102
113
  metaclass.send :alias_method, new_method_name, old_method
103
114
  metaclass.send :define_method, old_method, &block
@@ -376,13 +387,16 @@ module Misc
376
387
  chunks
377
388
  end
378
389
 
390
+ def self.merge2hash(list1, list2)
391
+ hash = {}
392
+ list1.zip(list2).each do |k,v| hash[k] = v end
393
+ hash
394
+ end
379
395
 
380
396
 
381
397
  def self.process_to_hash(list)
382
398
  result = yield list
383
- hash = {}
384
- list.zip(result).each do |k,v| hash[k] = v end
385
- hash
399
+ merge2hash(list, result)
386
400
  end
387
401
 
388
402
  IUPAC2BASE = {
@@ -424,7 +438,9 @@ module PDF2Text
424
438
  require 'rbbt/util/open'
425
439
 
426
440
 
427
- CMD.cmd("pdftotext - -", :in => Open.open(filename, :nocache => true), :pipe => true, :stderr => true)
441
+ TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
442
+ CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
443
+ end
428
444
  end
429
445
  end
430
446
 
@@ -23,6 +23,13 @@ module Persistence
23
23
  def self.get_persistence_file(file, prefix, options = {})
24
24
  persistence_dir = Misc.process_options options, :persistence_dir
25
25
  persistence_dir ||= CACHEDIR
26
+
27
+ if options.include? :filters
28
+ options[:filters].each do |match,value|
29
+ file = file + "&F[#{match}=#{Misc.digest(value.inspect)}]"
30
+ end
31
+ end
32
+
26
33
  name = prefix.to_s.dup << ":" << file.to_s << ":"
27
34
 
28
35
  options_md5 = Misc.hash2md5 options
@@ -171,22 +178,28 @@ module Persistence
171
178
  Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
172
179
 
173
180
  res = yield file, options, filename, persistence_file
181
+
174
182
  serializer = tsv_serializer res
175
183
 
176
- if File.exists? persistence_file
177
- Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
178
- FileUtils.rm persistence_file
179
- end
184
+ if TCHash === res
185
+ per = res
186
+ else
180
187
 
181
- Log.debug "Dump data into '#{persistence_file}'"
182
- per = Persistence::TSV.get persistence_file, true, serializer
188
+ if File.exists? persistence_file
189
+ Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
190
+ FileUtils.rm persistence_file
191
+ end
183
192
 
184
- per.write
185
- per.merge! res
193
+ Log.debug "Dump data into '#{persistence_file}'"
194
+ per = Persistence::TSV.get persistence_file, true, serializer
186
195
 
187
- Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
188
- if res.respond_to?(key.to_sym) and per.respond_to?("#{key}=".to_sym)
189
- per.send "#{key}=".to_sym, res.send(key.to_sym)
196
+ per.write
197
+ per.merge! res
198
+
199
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
200
+ if res.respond_to?(key.to_sym) and per.respond_to?("#{key}=".to_sym)
201
+ per.send "#{key}=".to_sym, res.send(key.to_sym)
202
+ end
190
203
  end
191
204
  end
192
205
 
@@ -293,27 +306,36 @@ module Persistence
293
306
  FileUtils.rm persistence_file
294
307
  end
295
308
 
296
- max_length = res.collect{|k,v| k.length}.max
297
309
 
298
- if range
299
- begin
300
- fwt = FixWidthTable.new persistence_file, max_length, true
301
- fwt.add_range res
302
- rescue
303
- FileUtils.rm persistence_file if File.exists? persistence_file
304
- raise $!
305
- end
310
+ if FixWidthTable === res and res.filename == :memory
311
+ Log.debug "Dumping memory FWT into #{ persistence_file }. Prefix = #{prefix}"
312
+ FileUtils.mkdir_p File.dirname(persistence_file) unless File.exists? File.dirname(persistence_file)
313
+ Open.write(persistence_file, res.dump)
314
+ fwt = FixWidthTable.get persistence_file
306
315
  else
307
- begin
308
- fwt = FixWidthTable.new persistence_file, max_length, false
309
- fwt.add_point res
310
- rescue
311
- FileUtils.rm persistence_file
312
- raise $!
316
+
317
+ max_length = res.collect{|k,v| k.length}.max
318
+
319
+ if range
320
+ begin
321
+ fwt = FixWidthTable.get persistence_file, max_length, true
322
+ fwt.add_range res
323
+ rescue
324
+ FileUtils.rm persistence_file if File.exists? persistence_file
325
+ raise $!
326
+ end
327
+ else
328
+ begin
329
+ fwt = FixWidthTable.get persistence_file, max_length, false
330
+ fwt.add_point res
331
+ rescue
332
+ FileUtils.rm persistence_file
333
+ raise $!
334
+ end
313
335
  end
314
- end
315
336
 
316
- fwt.read
337
+ fwt.read
338
+ end
317
339
 
318
340
  fwt
319
341
  else
@@ -327,6 +349,7 @@ module Persistence
327
349
 
328
350
  def self.persist(file, prefix = "", persistence_type = :string, options = {}, &block)
329
351
  options = Misc.add_defaults options, :persistence => true
352
+
330
353
  persistence =
331
354
  Misc.process_options options, :persistence
332
355
 
@@ -29,6 +29,7 @@ module Resource
29
29
  return dir if File.exists? File.join(dir, 'lib')
30
30
  file = File.dirname file
31
31
  end
32
+
32
33
  return nil
33
34
  end
34
35
 
@@ -55,10 +55,20 @@ class Task
55
55
  [run_options, args, optional_args]
56
56
  end
57
57
 
58
+ def pull_from_hash(args, optional_args)
59
+ option_summary.first.each do |info|
60
+ name = info[:name]
61
+ if optional_args.include? name
62
+ args.push optional_args.delete name
63
+ end
64
+ end
65
+ end
66
+
58
67
  def setup(jobname, args, optional_args, dependencies)
59
68
  previous_jobs = []
60
69
  required_files = []
61
70
 
71
+ pull_from_hash(args, optional_args)
62
72
  run_options, args, optional_args = process_options args, optional_args
63
73
 
64
74
  dependencies.each do |dependency|
@@ -89,6 +99,7 @@ class Task
89
99
  optional_args = {}
90
100
  end
91
101
 
102
+
92
103
  previous_jobs, required_files, run_options = setup(jobname, args, optional_args, dependencies)
93
104
 
94
105
  job_id = self.job_id jobname, run_options, previous_jobs
@@ -2,7 +2,7 @@ require 'rbbt/util/misc'
2
2
 
3
3
  class Task
4
4
  class Job
5
- attr_accessor :task, :id, :name, :options, :previsous_jobs, :required_files, :pid, :path, :previous_jobs, :input
5
+ attr_accessor :task, :id, :name, :options, :previous_jobs, :required_files, :pid, :path, :previous_jobs, :input
6
6
 
7
7
  IDSEP = "_"
8
8
 
@@ -285,6 +285,8 @@ class Task
285
285
  Marshal.load(Open.read(path))
286
286
  when :yaml
287
287
  YAML.load(Open.read(path))
288
+ when nil
289
+ nil
288
290
  end
289
291
  end
290
292
 
@@ -1,5 +1,6 @@
1
1
  require 'rbbt/util/misc'
2
2
  require 'tokyocabinet'
3
+ require 'set'
3
4
 
4
5
  class TCHash < TokyoCabinet::HDB
5
6
  class OpenError < StandardError;end
@@ -10,6 +11,11 @@ class TCHash < TokyoCabinet::HDB
10
11
  def self.load(str); str.unpack("l").first; end
11
12
  end
12
13
 
14
+ class FloatSerializer
15
+ def self.dump(i); [i].pack("d"); end
16
+ def self.load(str); str.unpack("d").first; end
17
+ end
18
+
13
19
  class IntegerArraySerializer
14
20
  def self.dump(a); a.pack("l*"); end
15
21
  def self.load(str); str.unpack("l*"); end
@@ -40,8 +46,29 @@ class TCHash < TokyoCabinet::HDB
40
46
  end
41
47
  end
42
48
 
49
+ class TSVSerializer
50
+ def self.dump(tsv)
51
+ tsv.to_s
52
+ end
53
+
54
+ def self.load(string)
55
+ TSV.new StringIO.new(string)
56
+ end
57
+ end
43
58
 
44
- ALIAS = {:integer => IntegerSerializer, :integer_array => IntegerArraySerializer, :marshal => Marshal, nil => Marshal, :single => StringSerializer, :list => StringArraySerializer, :double => StringDoubleArraySerializer}
59
+
60
+
61
+ ALIAS = {
62
+ :integer => IntegerSerializer,
63
+ :float => FloatSerializer,
64
+ :integer_array => IntegerArraySerializer,
65
+ :marshal => Marshal,
66
+ :single => StringSerializer,
67
+ :string => StringSerializer,
68
+ :list => StringArraySerializer,
69
+ :double => StringDoubleArraySerializer,
70
+ :tsv => TSVSerializer
71
+ }
45
72
 
46
73
  CONNECTIONS = {}
47
74
 
@@ -49,7 +76,6 @@ class TCHash < TokyoCabinet::HDB
49
76
  :type => '__tokyocabinet_hash_type',
50
77
  :serializer => '__tokyocabinet_hash_serializer',
51
78
  :identifiers => '__tokyocabinet_hash_identifiers',
52
- :datadir => '__tokyocabinet_hash_datadir',
53
79
  :fields => '__tokyocabinet_hash_fields',
54
80
  :key_field => '__tokyocabinet_hash_key_field',
55
81
  :filename => '__tokyocabinet_hash_filename',
@@ -74,7 +100,13 @@ class TCHash < TokyoCabinet::HDB
74
100
  out(key) or raise "Not deleted"
75
101
  end
76
102
 
77
- attr_accessor :serializer
103
+ alias original_include? include?
104
+ def include?(key)
105
+ return nil unless String === key
106
+ original_include? key
107
+ end
108
+
109
+ attr_accessor :serializer, :path_to_db
78
110
  def serializer=(serializer)
79
111
 
80
112
  if ALIAS.include? serializer.to_sym
@@ -97,17 +129,20 @@ class TCHash < TokyoCabinet::HDB
97
129
 
98
130
  @write = write
99
131
 
100
- if self.include? FIELD_INFO_ENTRIES[:serializer]
101
- serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
132
+ if @serializer.nil?
102
133
 
103
- mod = Misc.string2const serializer_str
104
- @serializer = mod
134
+ if self.include? FIELD_INFO_ENTRIES[:serializer]
135
+ serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
105
136
 
106
- else
107
- raise "No serializer specified" if serializer.nil?
137
+ mod = Misc.string2const serializer_str
138
+ @serializer = mod
108
139
 
109
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
110
- @serializer = serializer
140
+ else
141
+ raise "No serializer specified" if (serializer || @serializer).nil?
142
+
143
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
144
+ @serializer = serializer
145
+ end
111
146
  end
112
147
  end
113
148
 
@@ -116,31 +151,44 @@ class TCHash < TokyoCabinet::HDB
116
151
  end
117
152
 
118
153
  def write
154
+ self.sync
119
155
  self.close
120
156
  self.open(true)
121
157
  end
122
158
 
123
159
  def read
160
+ self.sync
124
161
  self.close
125
162
  self.open(false)
126
163
  end
127
164
 
128
- def initialize(path, write = false, serializer = Marshal)
165
+ def initialize(path, write = false, serializer = nil)
129
166
  super()
130
167
 
131
- serializer = ALIAS[serializer] if ALIAS.include? serializer
168
+ if ALIAS.include? serializer
169
+ @serializer = ALIAS[serializer]
170
+ else
171
+ @serializer = serializer
172
+ end
132
173
 
133
174
  @path_to_db = path
134
175
 
135
176
  if write || ! File.exists?(@path_to_db)
177
+ @serializer = Marshal if @serializer.nil?
136
178
  self.setcache(100000) or raise "Error setting cache"
137
- self.open(true, serializer)
179
+ self.open(true, @serializer)
180
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s)
138
181
  else
139
182
  self.open(false)
140
183
  end
141
184
  end
142
185
 
143
- def self.get(path, write = false, serializer = Marshal)
186
+ def self.get(path, write = false, serializer = nil)
187
+ if not (TrueClass === write or FalseClass === write) and serializer.nil?
188
+ serializer = write
189
+ write = false
190
+ end
191
+
144
192
  if ALIAS.include? serializer
145
193
  serializer = ALIAS[serializer]
146
194
  else
@@ -213,6 +261,7 @@ class TCHash < TokyoCabinet::HDB
213
261
  values.collect{|v| serializer.load(v)}
214
262
  end
215
263
 
264
+ alias real_original_each each
216
265
  # This version of each fixes a problem in ruby 1.9. It also
217
266
  # removes the special entries
218
267
  def each(&block)
@@ -224,17 +273,26 @@ class TCHash < TokyoCabinet::HDB
224
273
  keys.zip(values.collect{|v| serializer.load v}).each &block
225
274
  end
226
275
 
276
+ def each(&block)
277
+ skippable = Set.new(FIELD_INFO_ENTRIES.values)
278
+ real_original_each do |key, value|
279
+ block.call(key, serializer.load(value)) unless skippable.include? key
280
+ end
281
+ end
282
+
227
283
  alias original_each each
228
284
 
229
- def collect
285
+ def collect(&block)
286
+ skippable = Set.new(FIELD_INFO_ENTRIES.values)
230
287
  res = []
231
- self.each{|k, v|
288
+ real_original_each do |key,value|
289
+ next if skippable.include? key
232
290
  if block_given?
233
- res << yield(k,v)
291
+ block.call(key, serializer.load(value))
234
292
  else
235
- res << [k,v]
293
+ res << [key, value]
236
294
  end
237
- }
295
+ end
238
296
  res
239
297
  end
240
298
 
@@ -251,5 +309,16 @@ class TCHash < TokyoCabinet::HDB
251
309
  raise "Transaction cannot initiate"
252
310
  end
253
311
  end
312
+
313
+ def clear
314
+ special_values = FIELD_INFO_ENTRIES.values.sort.collect{|k| self.original_get_brackets(k)}
315
+ restore = ! write?
316
+ write if restore
317
+ vanish
318
+ FIELD_INFO_ENTRIES.values.sort.zip(special_values).each{|k,v|
319
+ self.original_set_brackets(k,v) unless v.nil?
320
+ }
321
+ read if restore
322
+ end
254
323
 
255
324
  end