rbbt-util 3.1.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/bin/rbbt_query.rb +1 -1
  2. data/lib/rbbt/util/cmd.rb +115 -67
  3. data/lib/rbbt/util/fix_width_table.rb +18 -3
  4. data/lib/rbbt/util/misc.rb +106 -6
  5. data/lib/rbbt/util/open.rb +9 -7
  6. data/lib/rbbt/util/persistence.rb +17 -14
  7. data/lib/rbbt/util/resource.rb +10 -3
  8. data/lib/rbbt/util/task.rb +2 -2
  9. data/lib/rbbt/util/task/job.rb +16 -3
  10. data/lib/rbbt/util/tc_hash.rb +64 -27
  11. data/lib/rbbt/util/tsv.rb +44 -21
  12. data/lib/rbbt/util/tsv/accessor.rb +8 -6
  13. data/lib/rbbt/util/tsv/attach.rb +19 -28
  14. data/lib/rbbt/util/tsv/filters.rb +193 -0
  15. data/lib/rbbt/util/tsv/index.rb +80 -8
  16. data/lib/rbbt/util/tsv/manipulate.rb +17 -6
  17. data/lib/rbbt/util/tsv/misc.rb +10 -0
  18. data/lib/rbbt/util/tsv/parse.rb +18 -1
  19. data/lib/rbbt/util/workflow.rb +12 -3
  20. data/lib/rbbt/util/workflow/soap.rb +0 -1
  21. data/share/install/software/lib/install_helpers +0 -2
  22. data/share/lib/R/util.R +3 -3
  23. data/test/rbbt/util/test_cmd.rb +23 -0
  24. data/test/rbbt/util/test_excel2tsv.rb +1 -1
  25. data/test/rbbt/util/test_misc.rb +41 -11
  26. data/test/rbbt/util/test_open.rb +2 -2
  27. data/test/rbbt/util/test_persistence.rb +2 -2
  28. data/test/rbbt/util/test_resource.rb +4 -20
  29. data/test/rbbt/util/test_tc_hash.rb +38 -0
  30. data/test/rbbt/util/test_tmpfile.rb +1 -1
  31. data/test/rbbt/util/test_tsv.rb +6 -0
  32. data/test/rbbt/util/test_workflow.rb +14 -10
  33. data/test/rbbt/util/tsv/test_accessor.rb +42 -0
  34. data/test/rbbt/util/tsv/test_filters.rb +141 -0
  35. data/test/rbbt/util/tsv/test_index.rb +32 -0
  36. data/test/rbbt/util/tsv/test_manipulate.rb +18 -0
  37. data/test/test_helper.rb +3 -1
  38. metadata +41 -38
@@ -101,7 +101,7 @@ module Open
101
101
  case
102
102
  when Array === grep
103
103
  TmpFile.with_file(grep * "\n", false) do |f|
104
- CMD.cmd("grep", "-E" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
104
+ CMD.cmd("grep", "-w" => true, "-f" => f, :in => stream, :pipe => true, :post => proc{FileUtils.rm f})
105
105
  end
106
106
  else
107
107
  CMD.cmd("grep '#{grep}' -", :in => stream, :pipe => true, :post => proc{stream.force_close if stream.respond_to? :force_close})
@@ -191,12 +191,16 @@ module Open
191
191
  f = open(file, options)
192
192
 
193
193
  if block_given?
194
- while l = Misc.fixutf8(f.gets)
194
+ while not f.eof?
195
+ l = f.gets
195
196
  l = fixutf8(l) if l.respond_to?(:valid_encoding?) && ! l.valid_encoding?
196
197
  yield l
197
198
  end
199
+ f.close
198
200
  else
199
- Misc.fixutf8(f.read)
201
+ text = Misc.fixutf8(f.read)
202
+ f.close unless f.closed?
203
+ text
200
204
  end
201
205
  end
202
206
 
@@ -206,9 +210,7 @@ module Open
206
210
  when content.nil?
207
211
  begin
208
212
  File.open(file, 'w') do |f|
209
- f.flock(File::LOCK_EX)
210
213
  yield f
211
- f.flock(File::LOCK_UN)
212
214
  end
213
215
  rescue Exception
214
216
  FileUtils.rm file if File.exists? file
@@ -224,8 +226,8 @@ module Open
224
226
  begin
225
227
  File.open(file, 'w') do |f|
226
228
  f.flock(File::LOCK_EX)
227
- while l = content.gets
228
- f.write l
229
+ while not content.eof?
230
+ f.write content.gets
229
231
  end
230
232
  f.flock(File::LOCK_UN)
231
233
  end
@@ -129,19 +129,18 @@ module Persistence
129
129
  end
130
130
 
131
131
  def self.tsv_serializer(data, extra = nil)
132
- if Object::TSV === data
132
+ return data.serializer if Persistence::TSV === data
133
+ if Object::TSV === data
133
134
  return :integer if (data.cast == "to_i" or data.cast == :to_i) and data.type == :single
134
135
  return :integer_array if (data.cast == "to_i" or data.cast == :to_i) and (data.type == :list or data.type == :flat)
135
136
 
136
137
  case
137
- when data.type == :double
138
- :double
139
- when data.type == :list
138
+ when (data.type == :list or data.type == :flat)
140
139
  :list
141
140
  when data.type == :single
142
141
  :single
143
142
  else
144
- :marshal
143
+ :double
145
144
  end
146
145
  else
147
146
  return :marshal if extra.nil?
@@ -149,14 +148,12 @@ module Persistence
149
148
  return :integer_array if (extra[:cast] == "to_i" or extra[:cast] == :to_i) and (extra[:type] == :list or extra[:type] == :flat)
150
149
 
151
150
  case
152
- when extra[:type] == :double
153
- :double
154
- when extra[:type] == :list
151
+ when (extra[:type] == :list or extra[:type] == :flat)
155
152
  :list
156
153
  when extra[:type] == :single
157
154
  :single
158
155
  else
159
- :marshal
156
+ :double
160
157
  end
161
158
  end
162
159
  end
@@ -201,7 +198,7 @@ module Persistence
201
198
  else
202
199
  Log.debug "Loading #{ persistence_file }. Prefix = #{prefix}"
203
200
 
204
- per = Persistence::TSV.get persistence_file, true, serializer
201
+ per = Persistence::TSV.get persistence_file, false, serializer
205
202
  tsv = Object::TSV.new per
206
203
  Persistence::TSV::FIELD_INFO_ENTRIES.keys.each do |key|
207
204
  if tsv.respond_to?(key.to_sym) and per.respond_to?(key.to_sym)
@@ -224,7 +221,7 @@ module Persistence
224
221
  filename ||= get_filename(file)
225
222
  persistence_file ||= get_persistence_file(filename, prefix, options)
226
223
 
227
- if persistence_update or not File.exists? persistence_file
224
+ if persistence_update or not File.exists?(persistence_file)
228
225
  Log.debug "Creating #{ persistence_file }. Prefix = #{prefix}"
229
226
  res, extra = yield file, options, filename, persistence_file
230
227
 
@@ -267,7 +264,7 @@ module Persistence
267
264
  rescue Interrupt
268
265
  raise "Interrupted"
269
266
  rescue Exception
270
- per.close
267
+ per.close unless per.nil?
271
268
  raise $!
272
269
  end
273
270
 
@@ -353,9 +350,7 @@ module Persistence
353
350
  yield file, options, filename
354
351
  else
355
352
  Log.low "Persistent Loading for #{filename}. Prefix: #{prefix}. Type #{persistence_type.to_s}"
356
-
357
353
  Misc.lock(persistence_file, file, prefix, options, block) do |persistence_file,file,prefix,options,block|
358
-
359
354
  case persistence_type.to_sym
360
355
  when :string
361
356
  persist_string(file, prefix, options, &block)
@@ -375,6 +370,14 @@ module Persistence
375
370
  end
376
371
  end
377
372
  end
373
+ end
378
374
 
375
+ module LocalPersist
376
+
377
+ attr_accessor :local_persistence_dir
378
+ def local_persist(name, prefix, type, options= {}, &block)
379
+ Persistence.persist(name, prefix, type, options.merge({:persistence_dir => @local_persistence_dir}), &block)
380
+ end
379
381
 
380
382
  end
383
+
@@ -168,6 +168,7 @@ source "$INSTALL_HELPER_FILE"
168
168
  RakeHelper.run(rakefile, file, dir)
169
169
  resource
170
170
  end
171
+ resource
171
172
  end
172
173
 
173
174
  def relative_to(klass, path)
@@ -226,7 +227,9 @@ source "$INSTALL_HELPER_FILE"
226
227
  join name
227
228
  end
228
229
 
229
- def method_missing(name, prev = nil)
230
+ alias :old_method_missing :method_missing
231
+ def method_missing(name, prev = nil, *args)
232
+ old_method_missing(name, prev, *args) if name.to_s =~ /^to_/
230
233
  join prev unless prev.nil?
231
234
  join name
232
235
  end
@@ -267,7 +270,7 @@ source "$INSTALL_HELPER_FILE"
267
270
  end
268
271
 
269
272
  def define_as_proc(&block)
270
- Resource.define_resource(self, :proc, &block)
273
+ Resource.define_resource(self, :proc, block)
271
274
  end
272
275
 
273
276
 
@@ -307,7 +310,9 @@ source "$INSTALL_HELPER_FILE"
307
310
  end
308
311
  end
309
312
 
313
+ alias :old_method_missing :method_missing
310
314
  def method_missing(name, *args)
315
+ return old_method_missing(name, *args) if name.to_s =~ /^to_/
311
316
  if key
312
317
  klass.send(name, key, *args)
313
318
  else
@@ -333,7 +338,9 @@ source "$INSTALL_HELPER_FILE"
333
338
  Path.path(name, pkgdir, namespace, lib_dir)
334
339
  end
335
340
 
336
- def method_missing(name, prev = nil)
341
+ alias :old_method_missing :method_missing
342
+ def method_missing(name, prev = nil, *args)
343
+ return old_method_missing(name, prev, *args) if name.to_s =~ /^to_/
337
344
  if prev
338
345
  self[prev][name]
339
346
  else
@@ -145,6 +145,7 @@ class Task
145
145
  usage << "\nMandatory options:\n"
146
146
  usage << "\tTask\tName\tType \tDescription\n"
147
147
  usage << "\t----\t----\t---- \t-----------\n"
148
+
148
149
  options.each do |option|
149
150
  option_line = "\t[#{option[:source]}]\t#{option[:name]}"
150
151
  option_line << "\t#{option[:type] ? option[:type] : "Unspec."}"
@@ -154,8 +155,7 @@ class Task
154
155
  end
155
156
 
156
157
  if optional_options.any?
157
- usage << "\nOptional options:"
158
- usage << "Mandatory options:\n"
158
+ usage << "\nOptional options:\n"
159
159
  usage << "\tTask\tName\tDefault \tType \tDescription\n"
160
160
  usage << "\t----\t----\t------- \t---- \t-----------\n"
161
161
  optional_options.each do |option|
@@ -77,7 +77,7 @@ class Task
77
77
 
78
78
  def info
79
79
  return {} if not File.exists?(info_file)
80
- info = YAML.load(File.open(info_file))
80
+ info = YAML.load(File.open(info_file)) || {}
81
81
  info.extend IndiferentHash
82
82
  end
83
83
 
@@ -132,6 +132,10 @@ class Task
132
132
  step == :error or step == :aborted
133
133
  end
134
134
 
135
+ def aborted?
136
+ step == :aborted
137
+ end
138
+
135
139
  def arguments
136
140
  options.values_at *task.options
137
141
  end
@@ -142,7 +146,13 @@ class Task
142
146
 
143
147
  def run_dependencies
144
148
  required_files.each do |file| file.produce unless File.exists? file end unless required_files.nil?
145
- previous_jobs.each do |job| job.start unless File.exists? job.path; job.set_info(:step, :done) end unless previous_jobs.nil?
149
+ previous_jobs.each do |job|
150
+ if not job.recursive_done?
151
+ job.clean if job.error?
152
+ job.start
153
+ job.step :done unless job.step == :error or job.step == :aborted
154
+ end
155
+ end unless previous_jobs.nil?
146
156
  end
147
157
 
148
158
  def save_dependencies
@@ -185,6 +195,7 @@ class Task
185
195
  set_info(:end_time, Time.now)
186
196
  Log.medium("[#{task.name}] Finished Job '#{ name }'. Path: '#{ path }'")
187
197
  rescue Exception
198
+ set_info(:exception_backtrace, $!.backtrace)
188
199
  step(:error, "#{$!.class}: #{$!.message}")
189
200
  raise $!
190
201
  end
@@ -204,7 +215,7 @@ class Task
204
215
  end
205
216
 
206
217
  def recursive_done?
207
- previous_jobs.inject(true){|acc,j| acc and j.recursive_done?} and done?
218
+ (previous_jobs || []).inject(true){|acc,j| acc and j.recursive_done?} and done? and not error?
208
219
  end
209
220
 
210
221
  def run
@@ -280,6 +291,8 @@ class Task
280
291
  def clean
281
292
  FileUtils.rm path if File.exists? path
282
293
  FileUtils.rm info_file if File.exists? info_file
294
+ FileUtils.rm_rf path + '.files' if File.exists? path + '.files'
295
+ self
283
296
  end
284
297
 
285
298
  def recursive_clean
@@ -15,7 +15,6 @@ class TCHash < TokyoCabinet::HDB
15
15
  def self.load(str); str.unpack("l*"); end
16
16
  end
17
17
 
18
-
19
18
  class StringSerializer
20
19
  def self.dump(str); str.to_s; end
21
20
  def self.load(str); str; end
@@ -27,7 +26,7 @@ class TCHash < TokyoCabinet::HDB
27
26
  end
28
27
 
29
28
  def self.load(string)
30
- string.split(/\t/)
29
+ string.split("\t", -1)
31
30
  end
32
31
  end
33
32
 
@@ -37,7 +36,7 @@ class TCHash < TokyoCabinet::HDB
37
36
  end
38
37
 
39
38
  def self.load(string)
40
- string.split(/\t/).collect{|l| l.split("|")}
39
+ string.split("\t", -1).collect{|l| l.split("|", -1)}
41
40
  end
42
41
  end
43
42
 
@@ -66,17 +65,31 @@ class TCHash < TokyoCabinet::HDB
66
65
  end
67
66
  end
68
67
 
69
- def serializer
70
- @serializer
68
+ def size
69
+ keys.length
71
70
  end
72
71
 
73
- def serializer=(value)
74
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer],value) unless value.nil?
72
+ def delete(key)
73
+ raise "Cannot deleted key: closed connection" if not write?
74
+ out(key) or raise "Not deleted"
75
+ end
76
+
77
+ attr_accessor :serializer
78
+ def serializer=(serializer)
79
+
80
+ if ALIAS.include? serializer.to_sym
81
+ @serializer = ALIAS[serializer.to_sym]
82
+ else
83
+ @serializer = serializer
84
+ end
85
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s)
75
86
  end
76
87
 
77
88
  alias original_open open
78
- def open(write = false)
79
- flags = write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER
89
+ def open(write = false, serializer = nil)
90
+ flags = (write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER)
91
+
92
+ FileUtils.mkdir_p File.dirname(@path_to_db) unless File.exists?(File.dirname(@path_to_db))
80
93
  if !self.original_open(@path_to_db, flags)
81
94
  ecode = self.ecode
82
95
  raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
@@ -84,17 +97,17 @@ class TCHash < TokyoCabinet::HDB
84
97
 
85
98
  @write = write
86
99
 
87
- if write
88
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s) unless @serializer.nil?
89
- else
100
+ if self.include? FIELD_INFO_ENTRIES[:serializer]
90
101
  serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
91
102
 
92
- if serializer_str.nil? or serializer_str.empty?
93
- @serializer = Marshal
94
- else
95
- mod = Misc.string2const serializer_str
96
- @serializer = mod
97
- end
103
+ mod = Misc.string2const serializer_str
104
+ @serializer = mod
105
+
106
+ else
107
+ raise "No serializer specified" if serializer.nil?
108
+
109
+ self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
110
+ @serializer = serializer
98
111
  end
99
112
  end
100
113
 
@@ -118,21 +131,37 @@ class TCHash < TokyoCabinet::HDB
118
131
  serializer = ALIAS[serializer] if ALIAS.include? serializer
119
132
 
120
133
  @path_to_db = path
121
- @serializer = serializer
122
134
 
123
135
  if write || ! File.exists?(@path_to_db)
124
136
  self.setcache(100000) or raise "Error setting cache"
125
- self.open(true)
137
+ self.open(true, serializer)
126
138
  else
127
139
  self.open(false)
128
140
  end
129
141
  end
130
142
 
131
143
  def self.get(path, write = false, serializer = Marshal)
132
- serializer = ALIAS[serializer] if ALIAS.include? serializer
133
- @serializer = serializer
134
- d = CONNECTIONS[path] ||= self.new(path, false, @serializer)
135
- write ? d.write : d.read
144
+ if ALIAS.include? serializer
145
+ serializer = ALIAS[serializer]
146
+ else
147
+ serializer = serializer
148
+ end
149
+
150
+ case
151
+ when !File.exists?(path)
152
+ CONNECTIONS[path] = self.new(path, true, serializer)
153
+ when (not CONNECTIONS.include?(path))
154
+ CONNECTIONS[path] = self.new(path, false, serializer)
155
+ end
156
+
157
+ d = CONNECTIONS[path]
158
+
159
+ if write
160
+ d.write unless d.write?
161
+ else
162
+ d.read if d.write?
163
+ end
164
+
136
165
  d
137
166
  end
138
167
 
@@ -142,14 +171,22 @@ class TCHash < TokyoCabinet::HDB
142
171
  def [](key)
143
172
  return nil unless String === key
144
173
  result = self.original_get_brackets(key)
145
- result ? @serializer.load(result) : nil
174
+ if result.nil? or (String === result and result =~ /__Ref:/)
175
+ result
176
+ else
177
+ @serializer.load(result)
178
+ end
146
179
  end
147
180
 
148
181
  alias original_set_brackets []=
149
182
  def []=(key,value)
150
183
  raise KeyFormatError, "Key must be a String, its #{key.class.to_s}" unless String === key
151
184
  raise "Closed TCHash connection" unless write?
152
- self.original_set_brackets(key, serializer.dump(value))
185
+ if String === value and value =~ /^__Ref/
186
+ self.original_set_brackets(key, value)
187
+ else
188
+ self.original_set_brackets(key, serializer.dump(value))
189
+ end
153
190
  end
154
191
 
155
192
  def values_at(*args)
@@ -204,7 +241,7 @@ class TCHash < TokyoCabinet::HDB
204
241
  def merge!(data)
205
242
  raise "Closed TCHash connection" unless write?
206
243
  serialized =
207
- data.collect{|key, values| [key.to_s, serializer.dump(values)]}
244
+ data.collect{|key, values| [key.to_s, serializer.dump(values)] }
208
245
  if tranbegin
209
246
  serialized.each do |key, values|
210
247
  self.putasync(key, values)
data/lib/rbbt/util/tsv.rb CHANGED
@@ -14,8 +14,28 @@ require 'rbbt/util/tsv/manipulate'
14
14
  require 'rbbt/util/tsv/index'
15
15
  require 'rbbt/util/tsv/attach'
16
16
  require 'rbbt/util/tsv/resource'
17
+
17
18
  class TSV
18
19
 
20
+ ESCAPES = {
21
+ "\n" => "[[NL]]",
22
+ "\t" => "[[TAB]]",
23
+ }
24
+
25
+ def self.escape(text)
26
+ ESCAPES.each do |char,replacement|
27
+ text = text.gsub(char, replacement)
28
+ end
29
+ text
30
+ end
31
+
32
+ def self.unescape(text)
33
+ ESCAPES.each do |char,replacement|
34
+ text = text.gsub(replacement, char)
35
+ end
36
+ text
37
+ end
38
+
19
39
  def self.headers(file, options = {})
20
40
 
21
41
  ## Remove options from filename
@@ -91,7 +111,7 @@ class TSV
91
111
  extra.each do |key, values|
92
112
  self.send("#{ key }=".to_sym, values) if self.respond_to? "#{ key }=".to_sym
93
113
  end if not extra.nil?
94
-
114
+
95
115
  else
96
116
 
97
117
  case
@@ -99,10 +119,16 @@ class TSV
99
119
  @data = Hash[file.collect{|v|
100
120
  [v,[]]
101
121
  }]
122
+ @data.key_field = key_field if key_field
123
+ @data.fields = fields if fields
102
124
  when Hash === file
103
125
  @data = file
126
+ @data.key_field = key_field if key_field
127
+ @data.fields = fields if fields
104
128
  when TSV === file
105
129
  @data = file.data
130
+ @data.key_field = key_field if key_field
131
+ @data.fields = fields if fields
106
132
  when Persistence::TSV === file
107
133
  @data = file
108
134
  %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
@@ -116,25 +142,7 @@ class TSV
116
142
  data, extra = nil
117
143
 
118
144
  if in_situ_persistence and persistence_file
119
-
120
- cast = options[:cast]
121
- type = options[:type]
122
- serializer = case
123
- when ((cast == "to_i" or cast == :to_i) and type == :single)
124
- :integer
125
- when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
126
- :integer_array
127
- when type == :double
128
- :double
129
- when type == :list
130
- :list
131
- when type == :single
132
- :single
133
- else
134
- :marshal
135
- end
136
-
137
- options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, serializer)
145
+ options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, :double)
138
146
  end
139
147
 
140
148
  begin
@@ -169,6 +177,17 @@ class TSV
169
177
  raise $!
170
178
  end
171
179
 
180
+ if Persistence::TSV === data
181
+ %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
182
+ if extra.include? key.to_sym
183
+ if data.respond_to? "#{key}=".to_sym
184
+ data.send("#{key}=".to_sym, extra[key.to_sym])
185
+ end
186
+ end
187
+ end
188
+ data.read
189
+ end
190
+
172
191
  [data, extra]
173
192
  end
174
193
  end
@@ -183,7 +202,6 @@ class TSV
183
202
  #end
184
203
  end
185
204
  end
186
- @data.read if Persistence::TSV === @data
187
205
  end
188
206
  end
189
207
 
@@ -195,4 +213,9 @@ class TSV
195
213
  @data.read if @data.respond_to? :read
196
214
  end
197
215
 
216
+ def write?
217
+ @data.write? if @data.respond_to? :write
218
+ end
219
+
220
+
198
221
  end