rbbt-util 3.2.0 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+ require 'rbbt/util/workflow'
6
+ require 'pp'
7
+
8
+ def usage(task)
9
+ puts task.usage
10
+ exit -1
11
+ end
12
+
13
+ def SOPT_options(task)
14
+ sopt_options = []
15
+ task.option_summary.flatten.each do |info|
16
+ name = info[:name]
17
+ short = name.to_s.chars.first
18
+ boolean = info[:type] == :boolean
19
+
20
+ sopt_options << "-#{short}--#{name}#{boolean ? '' : '*'}"
21
+ end
22
+
23
+ sopt_options * ":"
24
+ end
25
+
26
+ def fix_options(task, job_options)
27
+ option_types = task.option_summary.flatten.inject({}){|types, new| types[new[:name]] = new[:type]; types}
28
+
29
+ job_options_cleaned = {}
30
+
31
+ job_options.each do |name, value|
32
+ value = case
33
+ when option_types[name] == :float
34
+ value.to_f
35
+ when option_types[name] == :integer
36
+ value.to_i
37
+ when option_types[name] == :array
38
+ value.split(/[,|]/)
39
+ when option_types[name] == :tsv
40
+ begin
41
+ if value == '-'
42
+ TSV.new(STDIN).to_s :sort
43
+ else
44
+ TSV.new(value).to_s :sort
45
+ end
46
+ rescue
47
+ value
48
+ end
49
+ else
50
+ value
51
+ end
52
+ job_options_cleaned[name] = value
53
+ end
54
+
55
+ job_options_cleaned
56
+ end
57
+
58
+ options = SOPT.get "-t--task*:-l--log*:-h--help:-n--name:-cl--clean:-rcl-recursive_clean"
59
+
60
+ # Set log, fork, clean, recursive_clean and help
61
+ Log.severity = options[:log].to_i if options.include? :log
62
+ help = !!options.delete(:help)
63
+ do_fork = !!options.delete(:fork)
64
+ clean = !!options.delete(:clean)
65
+ recursive_clean = !!options.delete(:recursive_clean)
66
+
67
+ # Get workflow
68
+ workflow = ARGV.first
69
+ WorkFlow.require_workflow workflow
70
+
71
+ # Set task
72
+ namespace, task = nil, nil
73
+
74
+ case
75
+ when (not options[:task])
76
+ workflow_usage if help
77
+ task = self.last_task
78
+ namespace = self
79
+ when (options[:task] =~ /\./)
80
+ namespace, task = options.delete(:task).split('.')
81
+ namespace = Misc.string2const(namespace)
82
+ else
83
+ task_name = options.delete(:task)
84
+ task = self.tasks[task_name]
85
+ end
86
+
87
+ usage(task) if help
88
+
89
+ name = options.delete(:name) || "Default"
90
+
91
+ # get job args
92
+ sopt_option_string = SOPT_options(task)
93
+ job_options = SOPT.get sopt_option_string
94
+ job_options = fix_options(task, job_options)
95
+
96
+ #- get job
97
+ job = task.job(name, job_options)
98
+
99
+ # clean job
100
+ job.clean if clean
101
+ job.recursive_clean if recursive_clean
102
+
103
+ # run
104
+ if do_fork
105
+ job.fork
106
+ while not job.done?
107
+ puts "#{job.step}: #{job.messages.last}"
108
+ sleep 2
109
+ end
110
+ else
111
+ job.run
112
+ end
113
+
114
+ #- error
115
+ raise job.messages.last if job.error?
116
+
117
+ #print
118
+ pp job.load
data/lib/rbbt-util.rb CHANGED
@@ -5,12 +5,9 @@ require 'rbbt/util/tmpfile'
5
5
  require 'rbbt/util/filecache'
6
6
  require 'rbbt/util/tsv'
7
7
  require 'rbbt/util/persistence'
8
- require 'rbbt/util/bed'
9
- require 'rbbt/util/cachehelper'
10
8
  require 'rbbt/util/misc'
11
9
 
12
10
  FileCache.cachedir = Rbbt.var.cache.filecache.find :user
13
11
  Open.cachedir = Rbbt.var.cache["open-remote"].find :user
14
12
  TmpFile.tmpdir = Rbbt.tmp.find :user
15
13
  Persistence.cachedir = Rbbt.var.cache.persistence.find :user
16
- Bed.cachedir = Rbbt.var.cache["bed-persistence"].find :user
@@ -11,6 +11,7 @@ class FixWidthTable
11
11
  @record_size = @value_size + (@range ? 12 : 4)
12
12
 
13
13
  if %w(memory stringio).include? filename.to_s.downcase
14
+ @filename = :memory
14
15
  @file = StringIO.new
15
16
  else
16
17
  FileUtils.rm @filename if File.exists? @filename
@@ -104,6 +105,12 @@ class FixWidthTable
104
105
  @file.close
105
106
  end
106
107
 
108
+ def dump
109
+ read
110
+ @file.rewind
111
+ @file.read
112
+ end
113
+
107
114
  #{{{ Adding data
108
115
 
109
116
  def add_point(data)
@@ -33,6 +33,16 @@ end
33
33
  module Misc
34
34
  class FieldNotFoundError < StandardError;end
35
35
 
36
+ def self.in_dir(dir)
37
+ old_pwd = FileUtils.pwd
38
+ begin
39
+ FileUtils.cd dir
40
+ yield
41
+ ensure
42
+ FileUtils.cd old_pwd
43
+ end
44
+ end
45
+
36
46
  def self.intersect_sorted_arrays(a1, a2)
37
47
  e1, e2 = a1.shift, a2.shift
38
48
  intersect = []
@@ -98,6 +108,7 @@ module Misc
98
108
  end
99
109
 
100
110
  def self.redefine_method(object, old_method, new_method_name, &block)
111
+ return if object.respond_to? new_method_name
101
112
  metaclass = class << object; self end
102
113
  metaclass.send :alias_method, new_method_name, old_method
103
114
  metaclass.send :define_method, old_method, &block
@@ -376,13 +387,16 @@ module Misc
376
387
  chunks
377
388
  end
378
389
 
390
+ def self.merge2hash(list1, list2)
391
+ hash = {}
392
+ list1.zip(list2).each do |k,v| hash[k] = v end
393
+ hash
394
+ end
379
395
 
380
396
 
381
397
  def self.process_to_hash(list)
382
398
  result = yield list
383
- hash = {}
384
- list.zip(result).each do |k,v| hash[k] = v end
385
- hash
399
+ merge2hash(list, result)
386
400
  end
387
401
 
388
402
  IUPAC2BASE = {
@@ -424,7 +438,9 @@ module PDF2Text
424
438
  require 'rbbt/util/open'
425
439
 
426
440
 
427
- CMD.cmd("pdftotext - -", :in => Open.open(filename, :nocache => true), :pipe => true, :stderr => true)
441
+ TmpFile.with_file(Open.open(filename, :nocache => true).read) do |pdf_file|
442
+ CMD.cmd("pdftotext #{pdf_file} -", :pipe => false, :stderr => true)
443
+ end
428
444
  end
429
445
  end
430
446
 
@@ -23,6 +23,13 @@ module Persistence
23
23
  def self.get_persistence_file(file, prefix, options = {})
24
24
  persistence_dir = Misc.process_options options, :persistence_dir
25
25
  persistence_dir ||= CACHEDIR
26
+
27
+ if options.include? :filters
28
+ options[:filters].each do |match,value|
29
+ file = file + "&F[#{match}=#{Misc.digest(value.inspect)}]"
30
+ end
31
+ end
32
+
26
33
  name = prefix.to_s.dup << ":" << file.to_s << ":"
27
34
 
28
35
  options_md5 = Misc.hash2md5 options
@@ -171,22 +178,28 @@ module Persistence
171
178
  Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
172
179
 
173
180
  res = yield file, options, filename, persistence_file
181
+
174
182
  serializer = tsv_serializer res
175
183
 
176
- if File.exists? persistence_file
177
- Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
178
- FileUtils.rm persistence_file
179
- end
184
+ if TCHash === res
185
+ per = res
186
+ else
180
187
 
181
- Log.debug "Dump data into '#{persistence_file}'"
182
- per = Persistence::TSV.get persistence_file, true, serializer
188
+ if File.exists? persistence_file
189
+ Log.debug "Erasing old #{ persistence_file }. Prefix = #{prefix}"
190
+ FileUtils.rm persistence_file
191
+ end
183
192
 
184
- per.write
185
- per.merge! res
193
+ Log.debug "Dump data into '#{persistence_file}'"
194
+ per = Persistence::TSV.get persistence_file, true, serializer
186
195
 
187
- Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
188
- if res.respond_to?(key.to_sym) and per.respond_to?("#{key}=".to_sym)
189
- per.send "#{key}=".to_sym, res.send(key.to_sym)
196
+ per.write
197
+ per.merge! res
198
+
199
+ Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
200
+ if res.respond_to?(key.to_sym) and per.respond_to?("#{key}=".to_sym)
201
+ per.send "#{key}=".to_sym, res.send(key.to_sym)
202
+ end
190
203
  end
191
204
  end
192
205
 
@@ -293,27 +306,36 @@ module Persistence
293
306
  FileUtils.rm persistence_file
294
307
  end
295
308
 
296
- max_length = res.collect{|k,v| k.length}.max
297
309
 
298
- if range
299
- begin
300
- fwt = FixWidthTable.new persistence_file, max_length, true
301
- fwt.add_range res
302
- rescue
303
- FileUtils.rm persistence_file if File.exists? persistence_file
304
- raise $!
305
- end
310
+ if FixWidthTable === res and res.filename == :memory
311
+ Log.debug "Dumping memory FWT into #{ persistence_file }. Prefix = #{prefix}"
312
+ FileUtils.mkdir_p File.dirname(persistence_file) unless File.exists? File.dirname(persistence_file)
313
+ Open.write(persistence_file, res.dump)
314
+ fwt = FixWidthTable.get persistence_file
306
315
  else
307
- begin
308
- fwt = FixWidthTable.new persistence_file, max_length, false
309
- fwt.add_point res
310
- rescue
311
- FileUtils.rm persistence_file
312
- raise $!
316
+
317
+ max_length = res.collect{|k,v| k.length}.max
318
+
319
+ if range
320
+ begin
321
+ fwt = FixWidthTable.get persistence_file, max_length, true
322
+ fwt.add_range res
323
+ rescue
324
+ FileUtils.rm persistence_file if File.exists? persistence_file
325
+ raise $!
326
+ end
327
+ else
328
+ begin
329
+ fwt = FixWidthTable.get persistence_file, max_length, false
330
+ fwt.add_point res
331
+ rescue
332
+ FileUtils.rm persistence_file
333
+ raise $!
334
+ end
313
335
  end
314
- end
315
336
 
316
- fwt.read
337
+ fwt.read
338
+ end
317
339
 
318
340
  fwt
319
341
  else
@@ -327,6 +349,7 @@ module Persistence
327
349
 
328
350
  def self.persist(file, prefix = "", persistence_type = :string, options = {}, &block)
329
351
  options = Misc.add_defaults options, :persistence => true
352
+
330
353
  persistence =
331
354
  Misc.process_options options, :persistence
332
355
 
@@ -29,6 +29,7 @@ module Resource
29
29
  return dir if File.exists? File.join(dir, 'lib')
30
30
  file = File.dirname file
31
31
  end
32
+
32
33
  return nil
33
34
  end
34
35
 
@@ -55,10 +55,20 @@ class Task
55
55
  [run_options, args, optional_args]
56
56
  end
57
57
 
58
+ def pull_from_hash(args, optional_args)
59
+ option_summary.first.each do |info|
60
+ name = info[:name]
61
+ if optional_args.include? name
62
+ args.push optional_args.delete name
63
+ end
64
+ end
65
+ end
66
+
58
67
  def setup(jobname, args, optional_args, dependencies)
59
68
  previous_jobs = []
60
69
  required_files = []
61
70
 
71
+ pull_from_hash(args, optional_args)
62
72
  run_options, args, optional_args = process_options args, optional_args
63
73
 
64
74
  dependencies.each do |dependency|
@@ -89,6 +99,7 @@ class Task
89
99
  optional_args = {}
90
100
  end
91
101
 
102
+
92
103
  previous_jobs, required_files, run_options = setup(jobname, args, optional_args, dependencies)
93
104
 
94
105
  job_id = self.job_id jobname, run_options, previous_jobs
@@ -2,7 +2,7 @@ require 'rbbt/util/misc'
2
2
 
3
3
  class Task
4
4
  class Job
5
- attr_accessor :task, :id, :name, :options, :previsous_jobs, :required_files, :pid, :path, :previous_jobs, :input
5
+ attr_accessor :task, :id, :name, :options, :previous_jobs, :required_files, :pid, :path, :previous_jobs, :input
6
6
 
7
7
  IDSEP = "_"
8
8
 
@@ -285,6 +285,8 @@ class Task
285
285
  Marshal.load(Open.read(path))
286
286
  when :yaml
287
287
  YAML.load(Open.read(path))
288
+ when nil
289
+ nil
288
290
  end
289
291
  end
290
292
 
@@ -1,5 +1,6 @@
1
1
  require 'rbbt/util/misc'
2
2
  require 'tokyocabinet'
3
+ require 'set'
3
4
 
4
5
  class TCHash < TokyoCabinet::HDB
5
6
  class OpenError < StandardError;end
@@ -10,6 +11,11 @@ class TCHash < TokyoCabinet::HDB
10
11
  def self.load(str); str.unpack("l").first; end
11
12
  end
12
13
 
14
+ class FloatSerializer
15
+ def self.dump(i); [i].pack("d"); end
16
+ def self.load(str); str.unpack("d").first; end
17
+ end
18
+
13
19
  class IntegerArraySerializer
14
20
  def self.dump(a); a.pack("l*"); end
15
21
  def self.load(str); str.unpack("l*"); end
@@ -40,8 +46,29 @@ class TCHash < TokyoCabinet::HDB
40
46
  end
41
47
  end
42
48
 
49
+ class TSVSerializer
50
+ def self.dump(tsv)
51
+ tsv.to_s
52
+ end
53
+
54
+ def self.load(string)
55
+ TSV.new StringIO.new(string)
56
+ end
57
+ end
43
58
 
44
- ALIAS = {:integer => IntegerSerializer, :integer_array => IntegerArraySerializer, :marshal => Marshal, nil => Marshal, :single => StringSerializer, :list => StringArraySerializer, :double => StringDoubleArraySerializer}
59
+
60
+
61
+ ALIAS = {
62
+ :integer => IntegerSerializer,
63
+ :float => FloatSerializer,
64
+ :integer_array => IntegerArraySerializer,
65
+ :marshal => Marshal,
66
+ :single => StringSerializer,
67
+ :string => StringSerializer,
68
+ :list => StringArraySerializer,
69
+ :double => StringDoubleArraySerializer,
70
+ :tsv => TSVSerializer
71
+ }
45
72
 
46
73
  CONNECTIONS = {}
47
74
 
@@ -49,7 +76,6 @@ class TCHash < TokyoCabinet::HDB
49
76
  :type => '__tokyocabinet_hash_type',
50
77
  :serializer => '__tokyocabinet_hash_serializer',
51
78
  :identifiers => '__tokyocabinet_hash_identifiers',
52
- :datadir => '__tokyocabinet_hash_datadir',
53
79
  :fields => '__tokyocabinet_hash_fields',
54
80
  :key_field => '__tokyocabinet_hash_key_field',
55
81
  :filename => '__tokyocabinet_hash_filename',
@@ -74,7 +100,13 @@ class TCHash < TokyoCabinet::HDB
74
100
  out(key) or raise "Not deleted"
75
101
  end
76
102
 
77
- attr_accessor :serializer
103
+ alias original_include? include?
104
+ def include?(key)
105
+ return nil unless String === key
106
+ original_include? key
107
+ end
108
+
109
+ attr_accessor :serializer, :path_to_db
78
110
  def serializer=(serializer)
79
111
 
80
112
  if ALIAS.include? serializer.to_sym
@@ -97,17 +129,20 @@ class TCHash < TokyoCabinet::HDB
97
129
 
98
130
  @write = write
99
131
 
100
- if self.include? FIELD_INFO_ENTRIES[:serializer]
101
- serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
132
+ if @serializer.nil?
102
133
 
103
- mod = Misc.string2const serializer_str
104
- @serializer = mod
134
+ if self.include? FIELD_INFO_ENTRIES[:serializer]
135
+ serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
105
136
 
106
- else
107
- raise "No serializer specified" if serializer.nil?
137
+ mod = Misc.string2const serializer_str
138
+ @serializer = mod
108
139
 
109
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
110
- @serializer = serializer
140
+ else
141
+ raise "No serializer specified" if (serializer || @serializer).nil?
142
+
143
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
144
+ @serializer = serializer
145
+ end
111
146
  end
112
147
  end
113
148
 
@@ -116,31 +151,44 @@ class TCHash < TokyoCabinet::HDB
116
151
  end
117
152
 
118
153
  def write
154
+ self.sync
119
155
  self.close
120
156
  self.open(true)
121
157
  end
122
158
 
123
159
  def read
160
+ self.sync
124
161
  self.close
125
162
  self.open(false)
126
163
  end
127
164
 
128
- def initialize(path, write = false, serializer = Marshal)
165
+ def initialize(path, write = false, serializer = nil)
129
166
  super()
130
167
 
131
- serializer = ALIAS[serializer] if ALIAS.include? serializer
168
+ if ALIAS.include? serializer
169
+ @serializer = ALIAS[serializer]
170
+ else
171
+ @serializer = serializer
172
+ end
132
173
 
133
174
  @path_to_db = path
134
175
 
135
176
  if write || ! File.exists?(@path_to_db)
177
+ @serializer = Marshal if @serializer.nil?
136
178
  self.setcache(100000) or raise "Error setting cache"
137
- self.open(true, serializer)
179
+ self.open(true, @serializer)
180
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s)
138
181
  else
139
182
  self.open(false)
140
183
  end
141
184
  end
142
185
 
143
- def self.get(path, write = false, serializer = Marshal)
186
+ def self.get(path, write = false, serializer = nil)
187
+ if not (TrueClass === write or FalseClass === write) and serializer.nil?
188
+ serializer = write
189
+ write = false
190
+ end
191
+
144
192
  if ALIAS.include? serializer
145
193
  serializer = ALIAS[serializer]
146
194
  else
@@ -213,6 +261,7 @@ class TCHash < TokyoCabinet::HDB
213
261
  values.collect{|v| serializer.load(v)}
214
262
  end
215
263
 
264
+ alias real_original_each each
216
265
  # This version of each fixes a problem in ruby 1.9. It also
217
266
  # removes the special entries
218
267
  def each(&block)
@@ -224,17 +273,26 @@ class TCHash < TokyoCabinet::HDB
224
273
  keys.zip(values.collect{|v| serializer.load v}).each &block
225
274
  end
226
275
 
276
+ def each(&block)
277
+ skippable = Set.new(FIELD_INFO_ENTRIES.values)
278
+ real_original_each do |key, value|
279
+ block.call(key, serializer.load(value)) unless skippable.include? key
280
+ end
281
+ end
282
+
227
283
  alias original_each each
228
284
 
229
- def collect
285
+ def collect(&block)
286
+ skippable = Set.new(FIELD_INFO_ENTRIES.values)
230
287
  res = []
231
- self.each{|k, v|
288
+ real_original_each do |key,value|
289
+ next if skippable.include? key
232
290
  if block_given?
233
- res << yield(k,v)
291
+ block.call(key, serializer.load(value))
234
292
  else
235
- res << [k,v]
293
+ res << [key, value]
236
294
  end
237
- }
295
+ end
238
296
  res
239
297
  end
240
298
 
@@ -251,5 +309,16 @@ class TCHash < TokyoCabinet::HDB
251
309
  raise "Transaction cannot initiate"
252
310
  end
253
311
  end
312
+
313
+ def clear
314
+ special_values = FIELD_INFO_ENTRIES.values.sort.collect{|k| self.original_get_brackets(k)}
315
+ restore = ! write?
316
+ write if restore
317
+ vanish
318
+ FIELD_INFO_ENTRIES.values.sort.zip(special_values).each{|k,v|
319
+ self.original_set_brackets(k,v) unless v.nil?
320
+ }
321
+ read if restore
322
+ end
254
323
 
255
324
  end