rbbt-util 5.5.68 → 5.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/association.rb +1 -1
  3. data/lib/rbbt/association/index.rb +2 -1
  4. data/lib/rbbt/entity.rb +2 -0
  5. data/lib/rbbt/persist/tsv.rb +46 -232
  6. data/lib/rbbt/persist/tsv/cdb.rb +139 -0
  7. data/lib/rbbt/persist/tsv/kyotocabinet.rb +168 -0
  8. data/lib/rbbt/persist/tsv/leveldb.rb +121 -0
  9. data/lib/rbbt/persist/tsv/lmdb.rb +148 -0
  10. data/lib/rbbt/persist/tsv/tokyocabinet.rb +158 -0
  11. data/lib/rbbt/resource/rake.rb +2 -1
  12. data/lib/rbbt/tsv/accessor.rb +74 -101
  13. data/lib/rbbt/tsv/parser.rb +2 -5
  14. data/lib/rbbt/tsv/serializers.rb +6 -0
  15. data/lib/rbbt/tsv/util.rb +8 -2
  16. data/lib/rbbt/util/R.rb +6 -0
  17. data/lib/rbbt/util/cmd.rb +7 -4
  18. data/lib/rbbt/util/misc.rb +10 -4
  19. data/lib/rbbt/util/open.rb +8 -6
  20. data/lib/rbbt/util/simpleopt.rb +1 -1
  21. data/lib/rbbt/workflow.rb +17 -3
  22. data/lib/rbbt/workflow/accessor.rb +5 -1
  23. data/lib/rbbt/workflow/definition.rb +6 -0
  24. data/lib/rbbt/workflow/step.rb +10 -4
  25. data/lib/rbbt/workflow/task.rb +1 -1
  26. data/share/rbbt_commands/tsv/json +37 -0
  27. data/share/rbbt_commands/workflow/task +8 -2
  28. data/test/rbbt/persist/test_tsv.rb +77 -0
  29. data/test/rbbt/persist/tsv/test_cdb.rb +23 -0
  30. data/test/rbbt/persist/tsv/test_kyotocabinet.rb +33 -0
  31. data/test/rbbt/persist/tsv/test_leveldb.rb +22 -0
  32. data/test/rbbt/persist/tsv/test_lmdb.rb +22 -0
  33. data/test/rbbt/persist/tsv/test_tokyocabinet.rb +242 -0
  34. data/test/rbbt/test_persist.rb +1 -225
  35. data/test/rbbt/test_workflow.rb +0 -1
  36. data/test/rbbt/workflow/test_step.rb +14 -12
  37. data/test/test_helper.rb +4 -2
  38. metadata +20 -4
  39. data/test/rbbt/workflow/test_soap.rb +0 -105
@@ -47,7 +47,8 @@ module Rake
47
47
  Rake::FileTask.clear_files
48
48
  end
49
49
  rescue
50
- raise "Error in rake: #{$!.message}"
50
+ Log.error "Error in rake: #{$!.message}"
51
+ raise $!
51
52
  end
52
53
  #}
53
54
  #Process.wait(pid)
@@ -78,8 +78,6 @@ module TSV
78
78
  end
79
79
 
80
80
  def self.extended(data)
81
- #setup_chains(data)
82
-
83
81
  if not data.respond_to? :write
84
82
  class << data
85
83
  attr_accessor :writable
@@ -102,28 +100,79 @@ module TSV
102
100
  end
103
101
  end
104
102
  end
103
+ end
104
+
105
+ #{{{ TSV ENTRIES and ENTRY_KEYS
106
+
107
+ KEY_PREFIX = "__tsv_hash_"
108
+ ENTRIES = []
109
+ ENTRY_KEYS = Set.new
110
+ NIL_VALUE = "NIL_VALUE"
111
+
112
+ def self.entry(*entries)
113
+ entries = entries.collect{|entry| entry.to_s}
114
+ ENTRIES.concat entries
115
+ entries.each do |entry|
116
+ key = KEY_PREFIX + entry
117
+ ENTRY_KEYS << key
118
+ var_name = ("@" << entry).to_sym
119
+
120
+ TSV.send(:define_method, entry) do
121
+ return instance_variable_get(var_name) if instance_variables.include? var_name
122
+ svalue = self.send(:[], key, :entry_key)
123
+ value = (svalue.nil? or svalue == SERIALIZED_NIL) ? nil : TSV_SERIALIZER.load(svalue)
124
+ instance_variable_set(var_name, value)
125
+ value
126
+ end
127
+
128
+ TSV.send(:define_method, entry + "=") do |value|
129
+ instance_variable_set(var_name, value)
130
+ value = value.to_s if Path === value
131
+ self.send(:[]=, key, (value.nil? ? SERIALIZED_NIL : TSV_SERIALIZER.dump(value)), :entry_key)
132
+ value
133
+ end
105
134
 
106
- if not data.respond_to? :serialized_get
107
- #class << data
108
- # alias serialized_get []
109
- # alias serialized_set []=
110
- #end
111
135
  end
112
136
  end
113
137
 
114
- KEY_PREFIX = "__tsv_hash_"
138
+ entry :key_field,
139
+ :type,
140
+ :fields,
141
+ :cast,
142
+ :identifiers,
143
+ :namespace,
144
+ :filename,
145
+ :serializer
115
146
 
116
- ENTRIES = []
117
- ENTRY_KEYS = []
147
+ attr_reader :serializer_module
148
+
149
+ def serializer=(serializer)
150
+ @serializer = serializer
151
+ self.send(:[]=, KEY_PREFIX + 'serializer', (serializer.nil? ? SERIALIZED_NIL : TSV_SERIALIZER.dump(serializer)), :entry_key)
152
+ @serializar_module = serializer.nil? ? nil : SERIALIZER_ALIAS[serializer.to_sym]
153
+ end
154
+
155
+
156
+ def serializer_module
157
+ @serializar_module ||= begin
158
+ serializer = self.serializer
159
+ serializer.nil? ? TSV::CleanSerializer : SERIALIZER_ALIAS[serializer.to_sym]
160
+ end
161
+ end
118
162
 
119
- #{{{ Chained Methods
120
163
  def empty?
121
164
  length == 0
122
165
  end
123
166
 
167
+ #{{{ GETTERS AND SETTERS
168
+
169
+
124
170
  def [](key, clean = false)
125
- value = (self.respond_to?(:serialized_get) and not clean) ? serialized_get(key) : super(key)
126
- return value if value.nil? or @unnamed or clean == :entry_key or fields.nil?
171
+ value = super(key)
172
+ return value if clean or value.nil?
173
+
174
+ value = serializer_module.load(value) if serializer_module and not TSV::CleanSerializer === serializer_module
175
+ return value if @unnamed or fields.nil?
127
176
 
128
177
  case type
129
178
  when :double, :list
@@ -137,12 +186,15 @@ module TSV
137
186
  end
138
187
 
139
188
  def []=(key, value, clean = false)
140
- return super(key, value) if clean or not self.respond_to?(:serialized_set)
141
- serialized_set(key, value)
189
+ if clean or serializer_module.nil? or TSV::CleanSerializer === serializer_module or value.nil?
190
+ return super(key, value)
191
+ else
192
+ return super(key, serializer_module.dump(value))
193
+ end
142
194
  end
143
195
 
144
196
  def keys
145
- keys = super - ENTRY_KEYS
197
+ keys = super - ENTRY_KEYS.to_a
146
198
  return keys if @unnamed or key_field.nil?
147
199
 
148
200
  prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
@@ -168,12 +220,12 @@ module TSV
168
220
  fields = self.fields
169
221
 
170
222
  serializer = self.serializer
171
- serializer_module = SERIALIZER_ALIAS[serializer] unless serializer.nil?
223
+ serializer_module = self.serializer_module
172
224
  super do |key, value|
173
225
  next if ENTRY_KEYS.include? key
174
226
 
175
227
  # TODO Update this to be more efficient
176
- value = serializer_module.load(value) unless serializer.nil? or FalseClass === serializer
228
+ value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer === serializer_module
177
229
 
178
230
  # Annotated with Entity and NamedArray
179
231
  if not @unnamed
@@ -195,12 +247,12 @@ module TSV
195
247
 
196
248
  def collect
197
249
  serializer = self.serializer
198
- serializer_module = SERIALIZER_ALIAS[serializer] unless serializer.nil?
250
+ serializer_module = self.serializer_module
199
251
  super do |key, value|
200
252
  next if ENTRY_KEYS.include? key
201
253
 
202
254
  # TODO Update this to be more efficient
203
- value = serializer_module.load(value) unless serializer.nil?
255
+ value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer === serializer_module
204
256
 
205
257
  # Annotated with Entity and NamedArray
206
258
  if not @unnamed
@@ -338,86 +390,7 @@ module TSV
338
390
  end
339
391
 
340
392
 
341
- def self.entry(*entries)
342
- entries = entries.collect{|entry| entry.to_s}
343
- ENTRIES.concat entries
344
- entries.each do |entry|
345
- key = KEY_PREFIX + entry
346
- ENTRY_KEYS << key
347
- line = __LINE__; self.module_eval "
348
- attr_accessor :#{entry}
349
-
350
- def #{ entry }
351
- if not defined? @#{entry}
352
- # @#{entry} = (value = self.clean_get_brackets('#{key}')).nil? ? nil : TSV_SERIALIZER.load(value)
353
- @#{entry} = (value = self.send(:[], '#{key}', :entry_key)).nil? ? nil : TSV_SERIALIZER.load(value)
354
- end
355
- @#{entry}
356
- end
357
-
358
-
359
- if '#{entry}' == 'serializer'
360
-
361
- def #{ entry }=(value)
362
- @#{entry} = value
363
- #self.tsv_clean_set_brackets '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml
364
- self.send(:[]=, '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
365
-
366
- return if value.nil?
367
-
368
- self.serializer_module = SERIALIZER_ALIAS[value.to_sym]
369
-
370
- if serializer_module.nil?
371
- #class << self
372
- # alias serialized_get tsv_clean_get_brackets
373
- # alias serialized_set tsv_clean_set_brackets
374
- #end
375
-
376
- else
377
- class << self
378
-
379
- define_method :serialized_get do |key|
380
- return nil unless self.include? key
381
- res = self.send(:[], key, true)
382
- return res if res.nil?
383
- self.serializer_module.load(res)
384
- end
385
-
386
- define_method :serialized_set do |key, value|
387
- if value.nil?
388
- self.send(:[]=, key, value, true)
389
- #tsv_clean_set_brackets key, value
390
- else
391
- self.send(:[]=, key, self.serializer_module.dump(value), true)
392
- #tsv_clean_set_brackets key, self.serializer_module.dump(value)
393
- end
394
- end
395
- end
396
- end
397
-
398
- end
399
- else
400
- def #{ entry }=(value)
401
- @#{entry} = value
402
- self.send(:[]=, '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
403
- #self.tsv_clean_set_brackets '#{key}', value.nil? ? SERIALIZED_NIL : value.to_yaml
404
- end
405
- end
406
- ", __FILE__, line
407
- end
408
- end
409
-
410
- entry :key_field,
411
- :type,
412
- :fields,
413
- :cast,
414
- :identifiers,
415
- :namespace,
416
- :filename,
417
- :serializer
418
-
419
393
  def fields
420
- #@fields ||= TSV_SERIALIZER.load(self.tsv_clean_get_brackets("__tsv_hash_fields") || SERIALIZED_NIL)
421
394
  @fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
422
395
  if true or @fields.nil? or @unnamed
423
396
  @fields
@@ -427,14 +400,12 @@ end
427
400
  end
428
401
 
429
402
  def namespace=(value)
430
- #self.tsv_clean_set_brackets "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value.to_yaml
431
403
  self.send(:[]=, "__tsv_hash_namespace", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
432
404
  @namespace = value
433
405
  @entity_options = nil
434
406
  end
435
407
 
436
408
  def fields=(value)
437
- #self.tsv_clean_set_brackets "__tsv_hash_fields", value.nil? ? SERIALIZED_NIL : value.to_yaml
438
409
  self.send(:[]=, "__tsv_hash_fields", value.nil? ? SERIALIZED_NIL : value.to_yaml, true)
439
410
  @fields = value
440
411
  @named_fields = nil
@@ -563,8 +534,10 @@ end
563
534
  Key field = #{key_field || "*No key field*"}
564
535
  Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
565
536
  Type = #{type}
537
+ Serializer = #{serializer.inspect}
566
538
  Size = #{size}
567
539
  namespace = #{namespace}
540
+ identifiers = #{Misc.fingerprint identifiers}
568
541
  Example:
569
542
  - #{key} -- #{Misc.fingerprint values }
570
543
  EOF
@@ -164,17 +164,14 @@ module TSV
164
164
 
165
165
  def add_to_data_no_merge_double(data, keys, values)
166
166
  keys.each do |key|
167
- data[key] = values unless data.include? key
167
+ next if data.include? key
168
+ data[key] = values
168
169
  end
169
170
  end
170
171
 
171
172
  def add_to_data_merge(data, keys, values)
172
173
  keys.uniq.each do |key|
173
174
  if data.include? key
174
- #data[key] = data[key].zip(values).collect do |old, new|
175
- # old.concat new
176
- # old
177
- #end
178
175
  new = data[key]
179
176
  new.each_with_index do |old, i|
180
177
  old.concat values[i]
@@ -1,4 +1,9 @@
1
1
  module TSV
2
+ class CleanSerializer
3
+ def self.dump(o); o end
4
+ def self.load(o); o end
5
+ end
6
+
2
7
  class IntegerSerializer
3
8
  def self.dump(i); [i].pack("l"); end
4
9
  def self.load(str); str.unpack("l").first; end
@@ -78,6 +83,7 @@ module TSV
78
83
  :list => StringArraySerializer,
79
84
  :flat => StringArraySerializer,
80
85
  :double => StringDoubleArraySerializer,
86
+ :clean => CleanSerializer,
81
87
  :tsv => TSVSerializer,
82
88
  :marshal_tsv => TSVMarshalSerializer
83
89
  }
data/lib/rbbt/tsv/util.rb CHANGED
@@ -85,8 +85,14 @@ module TSV
85
85
  new = {}
86
86
  case type
87
87
  when :double
88
- through do |k,v|
89
- new[k] = v.collect{|e| e.first}
88
+ if block_given?
89
+ through do |k,v|
90
+ new[k] = v.collect{|e| yield e}
91
+ end
92
+ else
93
+ through do |k,v|
94
+ new[k] = v.collect{|e| e.first}
95
+ end
90
96
  end
91
97
  when :flat
92
98
  through do |k,v|
data/lib/rbbt/util/R.rb CHANGED
@@ -68,6 +68,12 @@ source('#{UTIL}');
68
68
  end
69
69
  end
70
70
 
71
+ def self.tsv(file, options = {})
72
+ options = Misc.add_defaults :header_hash => '', :sep => / +/, :type => :list, :key_field => 'ID'
73
+ key_field = Misc.process_options options, :key_field
74
+ clean = CMD.cmd('grep -v WARNING', :in => file, :pipe => true)
75
+ TSV.open(clean, options).tap{|tsv| tsv.key_field = key_field }
76
+ end
71
77
  end
72
78
 
73
79
  module TSV
data/lib/rbbt/util/cmd.rb CHANGED
@@ -33,9 +33,12 @@ module CMD
33
33
 
34
34
  if $? and not $?.success?
35
35
  Log.debug{"Raising exception" if log}
36
- exception = CMDError.new "Command [#{@pid}] #{@cmd} failed with error status #{$?.exitstatus}"
37
- original_close
38
- raise exception
36
+ exception = CMDError.new "Command [#{@pid}] '#{@cmd}' failed with error status #{$?.exitstatus}"
37
+ begin
38
+ original_close
39
+ ensure
40
+ raise exception
41
+ end
39
42
  end
40
43
  end
41
44
  end
@@ -97,7 +100,7 @@ module CMD
97
100
  end
98
101
 
99
102
  def self.cmd(cmd, options = {}, &block)
100
- options[:stderr] ||= Log::DEBUG
103
+ options = Misc.add_defaults options, :stderr => Log::DEBUG
101
104
  in_content = options.delete(:in)
102
105
  stderr = options.delete(:stderr)
103
106
  pipe = options.delete(:pipe)
@@ -251,7 +251,7 @@ module Misc
251
251
  ":" << obj.to_s
252
252
  when String
253
253
  if obj.length > 100
254
- "'" << obj[0..20-1] << "<...#{obj.length}...>" << obj[-10..-1] << " " << "'"
254
+ "'" << obj.slice(0,20) << "<...#{obj.length}...>" << obj.slice(-10,10) << " " << "'"
255
255
  else
256
256
  "'" << obj << "'"
257
257
  end
@@ -259,7 +259,7 @@ module Misc
259
259
  "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
260
260
  when Array
261
261
  if (length = obj.length) > 10
262
- "[#{length} --" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
262
+ "[#{length}--" << (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
263
263
  else
264
264
  "[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
265
265
  end
@@ -709,7 +709,7 @@ end
709
709
  end
710
710
  end
711
711
 
712
- def self.benchmark(repeats = 1)
712
+ def self.benchmark(repeats = 1, message = nil)
713
713
  require 'benchmark'
714
714
  res = nil
715
715
  begin
@@ -718,7 +718,11 @@ end
718
718
  res = yield
719
719
  end
720
720
  end
721
- puts "Benchmark for #{ repeats } repeats"
721
+ if message
722
+ puts "#{message }: #{ repeats } repeats"
723
+ else
724
+ puts "Benchmark for #{ repeats } repeats"
725
+ end
722
726
  puts measure
723
727
  rescue Exception
724
728
  puts "Benchmark aborted"
@@ -1015,8 +1019,10 @@ end
1015
1019
  end
1016
1020
 
1017
1021
  def self.fixutf8(string)
1022
+ return nil if string.nil?
1018
1023
  return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
1019
1024
  (string.respond_to? :valid_encoding and string.valid_encoding)
1025
+
1020
1026
  if string.respond_to?(:encode)
1021
1027
  string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
1022
1028
  else
@@ -80,11 +80,11 @@ module Open
80
80
  end
81
81
 
82
82
  begin
83
- CMD.cmd("wget '#{ url }'", options.merge(
84
- '-O' => '-',
85
- :pipe => pipe,
86
- :stderr => stderr
87
- ))
83
+ wget_options = options.merge( '-O' => '-')
84
+ wget_options[:pipe] = pipe unless pipe.nil?
85
+ wget_options[:stderr] = stderr unless stderr.nil?
86
+
87
+ CMD.cmd("wget '#{ url }'", wget_options)
88
88
  rescue
89
89
  STDERR.puts $!.backtrace.inspect
90
90
  raise OpenURLError, "Error reading remote url: #{ url }.\n#{$!.message}"
@@ -384,7 +384,7 @@ module Open
384
384
 
385
385
  FileUtils.mkdir_p File.dirname(file)
386
386
  case
387
- when content.nil?
387
+ when block_given?
388
388
  begin
389
389
  File.open(file, mode) do |f|
390
390
  yield f
@@ -393,6 +393,8 @@ module Open
393
393
  FileUtils.rm file if File.exists? file
394
394
  raise $!
395
395
  end
396
+ when content.nil?
397
+ File.open(file, mode){|f| f.write "" }
396
398
  when String === content
397
399
  file_write(file, content, mode)
398
400
  else