rbbt-util 3.1.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
@@ -101,7 +101,7 @@ module Open
101
101
  case
102
102
  when Array === grep
103
103
  TmpFile.with_file(grep * "\n", false) do |f|
104
- CMD.cmd("grep", "-E" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
104
+ CMD.cmd("grep", "-w" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
105
105
  end
106
106
  else
107
107
  CMD.cmd("grep '#{grep}' -", :in => stream, :pipe => true, :post => proc{stream.force_close if stream.respond_to? :force_close})
@@ -191,12 +191,16 @@ module Open
191
191
  f = open(file, options)
192
192
 
193
193
  if block_given?
194
- while l = Misc.fixutf8(f.gets)
194
+ while not f.eof?
195
+ l = f.gets
195
196
  l = fixutf8(l) if l.respond_to?(:valid_encoding?) && ! l.valid_encoding?
196
197
  yield l
197
198
  end
199
+ f.close
198
200
  else
199
- Misc.fixutf8(f.read)
201
+ text = Misc.fixutf8(f.read)
202
+ f.close unless f.closed?
203
+ text
200
204
  end
201
205
  end
202
206
 
@@ -206,9 +210,7 @@ module Open
206
210
  when content.nil?
207
211
  begin
208
212
  File.open(file, 'w') do |f|
209
- f.flock(File::LOCK_EX)
210
213
  yield f
211
- f.flock(File::LOCK_UN)
212
214
  end
213
215
  rescue Exception
214
216
  FileUtils.rm file if File.exists? file
@@ -224,8 +226,8 @@ module Open
224
226
  begin
225
227
  File.open(file, 'w') do |f|
226
228
  f.flock(File::LOCK_EX)
227
- while l = content.gets
228
- f.write l
229
+ while not content.eof?
230
+ f.write content.gets
229
231
  end
230
232
  f.flock(File::LOCK_UN)
231
233
  end
@@ -129,19 +129,18 @@ module Persistence
129
129
  end
130
130
 
131
131
  def self.tsv_serializer(data, extra = nil)
132
- if Object::TSV === data
132
+ return data.serializer if Persistence::TSV === data
133
+ if Object::TSV === data
133
134
  return :integer if (data.cast == "to_i" or data.cast == :to_i) and data.type == :single
134
135
  return :integer_array if (data.cast == "to_i" or data.cast == :to_i) and (data.type == :list or data.type == :flat)
135
136
 
136
137
  case
137
- when data.type == :double
138
- :double
139
- when data.type == :list
138
+ when (data.type == :list or data.type == :flat)
140
139
  :list
141
140
  when data.type == :single
142
141
  :single
143
142
  else
144
- :marshal
143
+ :double
145
144
  end
146
145
  else
147
146
  return :marshal if extra.nil?
@@ -149,14 +148,12 @@ module Persistence
149
148
  return :integer_array if (extra[:cast] == "to_i" or extra[:cast] == :to_i) and (extra[:type] == :list or extra[:type] == :flat)
150
149
 
151
150
  case
152
- when extra[:type] == :double
153
- :double
154
- when extra[:type] == :list
151
+ when (extra[:type] == :list or extra[:type] == :flat)
155
152
  :list
156
153
  when extra[:type] == :single
157
154
  :single
158
155
  else
159
- :marshal
156
+ :double
160
157
  end
161
158
  end
162
159
  end
@@ -201,7 +198,7 @@ module Persistence
201
198
  else
202
199
  Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
203
200
 
204
- per = Persistence::TSV.get persistence_file, true, serializer
201
+ per = Persistence::TSV.get persistence_file, false, serializer
205
202
  tsv = Object::TSV.new per
206
203
  Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
207
204
  if tsv.respond_to?(key.to_sym) and per.respond_to?(key.to_sym)
@@ -224,7 +221,7 @@ module Persistence
224
221
  filename ||= get_filename(file)
225
222
  persistence_file ||= get_persistence_file(filename, prefix, options)
226
223
 
227
- if persistence_update or not File.exists? persistence_file
224
+ if persistence_update or not File.exists?(persistence_file)
228
225
  Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
229
226
  res, extra = yield file, options, filename, persistence_file
230
227
 
@@ -267,7 +264,7 @@ module Persistence
267
264
  rescue Interrupt
268
265
  raise "Interrupted"
269
266
  rescue Exception
270
- per.close
267
+ per.close unless per.nil?
271
268
  raise $!
272
269
  end
273
270
 
@@ -353,9 +350,7 @@ module Persistence
353
350
  yield file, options, filename
354
351
  else
355
352
  Log.low "Persistent Loading for #{filename}. Prefix: #{prefix}. Type #{persistence_type.to_s}"
356
-
357
353
  Misc.lock(persistence_file, file, prefix, options, block) do |persistence_file,file,prefix,options,block|
358
-
359
354
  case persistence_type.to_sym
360
355
  when :string
361
356
  persist_string(file, prefix, options, &block)
@@ -375,6 +370,14 @@ module Persistence
375
370
  end
376
371
  end
377
372
  end
373
+ end
378
374
 
375
+ module LocalPersist
376
+
377
+ attr_accessor :local_persistence_dir
378
+ def local_persist(name, prefix, type, options= {}, &block)
379
+ Persistence.persist(name, prefix, type, options.merge({:persistence_dir => @local_persistence_dir}), &block)
380
+ end
379
381
 
380
382
  end
383
+
@@ -168,6 +168,7 @@ source "$INSTALL_HELPER_FILE"
168
168
  RakeHelper.run(rakefile, file, dir)
169
169
  resource
170
170
  end
171
+ resource
171
172
  end
172
173
 
173
174
  def relative_to(klass, path)
@@ -226,7 +227,9 @@ source "$INSTALL_HELPER_FILE"
226
227
  join name
227
228
  end
228
229
 
229
- def method_missing(name, prev = nil)
230
+ alias :old_method_missing :method_missing
231
+ def method_missing(name, prev = nil, *args)
232
+ old_method_missing(name, prev, *args) if name.to_s =~ /^to_/
230
233
  join prev unless prev.nil?
231
234
  join name
232
235
  end
@@ -267,7 +270,7 @@ source "$INSTALL_HELPER_FILE"
267
270
  end
268
271
 
269
272
  def define_as_proc(&block)
270
- Resource.define_resource(self, :proc, &block)
273
+ Resource.define_resource(self, :proc, block)
271
274
  end
272
275
 
273
276
 
@@ -307,7 +310,9 @@ source "$INSTALL_HELPER_FILE"
307
310
  end
308
311
  end
309
312
 
313
+ alias :old_method_missing :method_missing
310
314
  def method_missing(name, *args)
315
+ return old_method_missing(name, *args) if name.to_s =~ /^to_/
311
316
  if key
312
317
  klass.send(name, key, *args)
313
318
  else
@@ -333,7 +338,9 @@ source "$INSTALL_HELPER_FILE"
333
338
  Path.path(name, pkgdir, namespace, lib_dir)
334
339
  end
335
340
 
336
- def method_missing(name, prev = nil)
341
+ alias :old_method_missing :method_missing
342
+ def method_missing(name, prev = nil, *args)
343
+ return old_method_missing(name, prev, *args) if name.to_s =~ /^to_/
337
344
  if prev
338
345
  self[prev][name]
339
346
  else
@@ -145,6 +145,7 @@ class Task
145
145
  usage << "\nMandatory options:\n"
146
146
  usage << "\tTask\tName\tType \tDescription\n"
147
147
  usage << "\t----\t----\t---- \t-----------\n"
148
+
148
149
  options.each do |option|
149
150
  option_line = "\t[#{option[:source]}]\t#{option[:name]}"
150
151
  option_line << "\t#{option[:type] ? option[:type] : "Unspec."}"
@@ -154,8 +155,7 @@ class Task
154
155
  end
155
156
 
156
157
  if optional_options.any?
157
- usage << "\nOptional options:"
158
- usage << "Mandatory options:\n"
158
+ usage << "\nOptional options:\n"
159
159
  usage << "\tTask\tName\tDefault \tType \tDescription\n"
160
160
  usage << "\t----\t----\t------- \t---- \t-----------\n"
161
161
  optional_options.each do |option|
@@ -77,7 +77,7 @@ class Task
77
77
 
78
78
  def info
79
79
  return {} if not File.exists?(info_file)
80
- info = YAML.load(File.open(info_file))
80
+ info = YAML.load(File.open(info_file)) || {}
81
81
  info.extend IndiferentHash
82
82
  end
83
83
 
@@ -132,6 +132,10 @@ class Task
132
132
  step == :error or step == :aborted
133
133
  end
134
134
 
135
+ def aborted?
136
+ step == :aborted
137
+ end
138
+
135
139
  def arguments
136
140
  options.values_at *task.options
137
141
  end
@@ -142,7 +146,13 @@ class Task
142
146
 
143
147
  def run_dependencies
144
148
  required_files.each do |file| file.produce unless File.exists? file end unless required_files.nil?
145
- previous_jobs.each do |job| job.start unless File.exists? job.path; job.set_info(:step, :done) end unless previous_jobs.nil?
149
+ previous_jobs.each do |job|
150
+ if not job.recursive_done?
151
+ job.clean if job.error?
152
+ job.start
153
+ job.step :done unless job.step == :error or job.step == :aborted
154
+ end
155
+ end unless previous_jobs.nil?
146
156
  end
147
157
 
148
158
  def save_dependencies
@@ -185,6 +195,7 @@ class Task
185
195
  set_info(:end_time, Time.now)
186
196
  Log.medium("[#{task.name}] Finished Job '#{ name }'. Path: '#{ path }'")
187
197
  rescue Exception
198
+ set_info(:exception_backtrace, $!.backtrace)
188
199
  step(:error, "#{$!.class}: #{$!.message}")
189
200
  raise $!
190
201
  end
@@ -204,7 +215,7 @@ class Task
204
215
  end
205
216
 
206
217
  def recursive_done?
207
- previous_jobs.inject(true){|acc,j| acc and j.recursive_done?} and done?
218
+ (previous_jobs || []).inject(true){|acc,j| acc and j.recursive_done?} and done? and not error?
208
219
  end
209
220
 
210
221
  def run
@@ -280,6 +291,8 @@ class Task
280
291
  def clean
281
292
  FileUtils.rm path if File.exists? path
282
293
  FileUtils.rm info_file if File.exists? info_file
294
+ FileUtils.rm_rf path + '.files' if File.exists? path + '.files'
295
+ self
283
296
  end
284
297
 
285
298
  def recursive_clean
@@ -15,7 +15,6 @@ class TCHash < TokyoCabinet::HDB
15
15
  def self.load(str); str.unpack("l*"); end
16
16
  end
17
17
 
18
-
19
18
  class StringSerializer
20
19
  def self.dump(str); str.to_s; end
21
20
  def self.load(str); str; end
@@ -27,7 +26,7 @@ class TCHash < TokyoCabinet::HDB
27
26
  end
28
27
 
29
28
  def self.load(string)
30
- string.split(/\t/)
29
+ string.split("\t", -1)
31
30
  end
32
31
  end
33
32
 
@@ -37,7 +36,7 @@ class TCHash < TokyoCabinet::HDB
37
36
  end
38
37
 
39
38
  def self.load(string)
40
- string.split(/\t/).collect{|l| l.split("|")}
39
+ string.split("\t", -1).collect{|l| l.split("|", -1)}
41
40
  end
42
41
  end
43
42
 
@@ -66,17 +65,31 @@ class TCHash < TokyoCabinet::HDB
66
65
  end
67
66
  end
68
67
 
69
- def serializer
70
- @serializer
68
+ def size
69
+ keys.length
71
70
  end
72
71
 
73
- def serializer=(value)
74
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer],value) unless value.nil?
72
+ def delete(key)
73
+ raise "Cannot deleted key: closed connection" if not write?
74
+ out(key) or raise "Not deleted"
75
+ end
76
+
77
+ attr_accessor :serializer
78
+ def serializer=(serializer)
79
+
80
+ if ALIAS.include? serializer.to_sym
81
+ @serializer = ALIAS[serializer.to_sym]
82
+ else
83
+ @serializer = serializer
84
+ end
85
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s)
75
86
  end
76
87
 
77
88
  alias original_open open
78
- def open(write = false)
79
- flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
89
+ def open(write = false, serializer = nil)
90
+ flags = (write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER)
91
+
92
+ FileUtils.mkdir_p File.dirname(@path_to_db) unless File.exists?(File.dirname(@path_to_db))
80
93
  if !self.original_open(@path_to_db, flags)
81
94
  ecode = self.ecode
82
95
  raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
@@ -84,17 +97,17 @@ class TCHash < TokyoCabinet::HDB
84
97
 
85
98
  @write = write
86
99
 
87
- if write
88
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s) unless @serializer.nil?
89
- else
100
+ if self.include? FIELD_INFO_ENTRIES[:serializer]
90
101
  serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
91
102
 
92
- if serializer_str.nil? or serializer_str.empty?
93
- @serializer = Marshal
94
- else
95
- mod = Misc.string2const serializer_str
96
- @serializer = mod
97
- end
103
+ mod = Misc.string2const serializer_str
104
+ @serializer = mod
105
+
106
+ else
107
+ raise "No serializer specified" if serializer.nil?
108
+
109
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
110
+ @serializer = serializer
98
111
  end
99
112
  end
100
113
 
@@ -118,21 +131,37 @@ class TCHash < TokyoCabinet::HDB
118
131
  serializer = ALIAS[serializer] if ALIAS.include? serializer
119
132
 
120
133
  @path_to_db = path
121
- @serializer = serializer
122
134
 
123
135
  if write || ! File.exists?(@path_to_db)
124
136
  self.setcache(100000) or raise "Error setting cache"
125
- self.open(true)
137
+ self.open(true, serializer)
126
138
  else
127
139
  self.open(false)
128
140
  end
129
141
  end
130
142
 
131
143
  def self.get(path, write = false, serializer = Marshal)
132
- serializer = ALIAS[serializer] if ALIAS.include? serializer
133
- @serializer = serializer
134
- d = CONNECTIONS[path] ||= self.new(path, false, @serializer)
135
- write ? d.write : d.read
144
+ if ALIAS.include? serializer
145
+ serializer = ALIAS[serializer]
146
+ else
147
+ serializer = serializer
148
+ end
149
+
150
+ case
151
+ when !File.exists?(path)
152
+ CONNECTIONS[path] = self.new(path, true, serializer)
153
+ when (not CONNECTIONS.include?(path))
154
+ CONNECTIONS[path] = self.new(path, false, serializer)
155
+ end
156
+
157
+ d = CONNECTIONS[path]
158
+
159
+ if write
160
+ d.write unless d.write?
161
+ else
162
+ d.read if d.write?
163
+ end
164
+
136
165
  d
137
166
  end
138
167
 
@@ -142,14 +171,22 @@ class TCHash < TokyoCabinet::HDB
142
171
  def [](key)
143
172
  return nil unless String === key
144
173
  result = self.original_get_brackets(key)
145
- result ? @serializer.load(result) : nil
174
+ if result.nil? or (String === result and result =~ /__Ref:/)
175
+ result
176
+ else
177
+ @serializer.load(result)
178
+ end
146
179
  end
147
180
 
148
181
  alias original_set_brackets []=
149
182
  def []=(key,value)
150
183
  raise KeyFormatError, "Key must be a String, its #{key.class.to_s}" unless String === key
151
184
  raise "Closed TCHash connection" unless write?
152
- self.original_set_brackets(key, serializer.dump(value))
185
+ if String === value and value =~ /^__Ref/
186
+ self.original_set_brackets(key, value)
187
+ else
188
+ self.original_set_brackets(key, serializer.dump(value))
189
+ end
153
190
  end
154
191
 
155
192
  def values_at(*args)
@@ -204,7 +241,7 @@ class TCHash < TokyoCabinet::HDB
204
241
  def merge!(data)
205
242
  raise "Closed TCHash connection" unless write?
206
243
  serialized =
207
- data.collect{|key, values| [key.to_s, serializer.dump(values)]}
244
+ data.collect{|key, values| [key.to_s, serializer.dump(values)] }
208
245
  if tranbegin
209
246
  serialized.each do |key, values|
210
247
  self.putasync(key, values)
data/lib/rbbt/util/tsv.rb CHANGED
@@ -14,8 +14,28 @@ require 'rbbt/util/tsv/manipulate'
14
14
  require 'rbbt/util/tsv/index'
15
15
  require 'rbbt/util/tsv/attach'
16
16
  require 'rbbt/util/tsv/resource'
17
+
17
18
  class TSV
18
19
 
20
+ ESCAPES = {
21
+ "\n" => "[[NL]]",
22
+ "\t" => "[[TAB]]",
23
+ }
24
+
25
+ def self.escape(text)
26
+ ESCAPES.each do |char,replacement|
27
+ text = text.gsub(char, replacement)
28
+ end
29
+ text
30
+ end
31
+
32
+ def self.unescape(text)
33
+ ESCAPES.each do |char,replacement|
34
+ text = text.gsub(replacement, char)
35
+ end
36
+ text
37
+ end
38
+
19
39
  def self.headers(file, options = {})
20
40
 
21
41
  ## Remove options from filename
@@ -91,7 +111,7 @@ class TSV
91
111
  extra.each do |key, values|
92
112
  self.send("#{ key }=".to_sym, values) if self.respond_to? "#{ key }=".to_sym
93
113
  end if not extra.nil?
94
-
114
+
95
115
  else
96
116
 
97
117
  case
@@ -99,10 +119,16 @@ class TSV
99
119
  @data = Hash[file.collect{|v|
100
120
  [v,[]]
101
121
  }]
122
+ @data.key_field = key_field if key_field
123
+ @data.fields = fields if fields
102
124
  when Hash === file
103
125
  @data = file
126
+ @data.key_field = key_field if key_field
127
+ @data.fields = fields if fields
104
128
  when TSV === file
105
129
  @data = file.data
130
+ @data.key_field = key_field if key_field
131
+ @data.fields = fields if fields
106
132
  when Persistence::TSV === file
107
133
  @data = file
108
134
  %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
@@ -116,25 +142,7 @@ class TSV
116
142
  data, extra = nil
117
143
 
118
144
  if in_situ_persistence and persistence_file
119
-
120
- cast = options[:cast]
121
- type = options[:type]
122
- serializer = case
123
- when ((cast == "to_i" or cast == :to_i) and type == :single)
124
- :integer
125
- when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
126
- :integer_array
127
- when type == :double
128
- :double
129
- when type == :list
130
- :list
131
- when type == :single
132
- :single
133
- else
134
- :marshal
135
- end
136
-
137
- options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, serializer)
145
+ options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, :double)
138
146
  end
139
147
 
140
148
  begin
@@ -169,6 +177,17 @@ class TSV
169
177
  raise $!
170
178
  end
171
179
 
180
+ if Persistence::TSV === data
181
+ %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
182
+ if extra.include? key.to_sym
183
+ if data.respond_to? "#{key}=".to_sym
184
+ data.send("#{key}=".to_sym, extra[key.to_sym])
185
+ end
186
+ end
187
+ end
188
+ data.read
189
+ end
190
+
172
191
  [data, extra]
173
192
  end
174
193
  end
@@ -183,7 +202,6 @@ class TSV
183
202
  #end
184
203
  end
185
204
  end
186
- @data.read if Persistence::TSV === @data
187
205
  end
188
206
  end
189
207
 
@@ -195,4 +213,9 @@ class TSV
195
213
  @data.read if @data.respond_to? :read
196
214
  end
197
215
 
216
+ def write?
217
+ @data.write? if @data.respond_to? :write
218
+ end
219
+
220
+
198
221
  end