rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -1,324 +0,0 @@
1
- require 'rbbt/util/misc'
2
- require 'tokyocabinet'
3
- require 'set'
4
-
5
- class TCHash < TokyoCabinet::HDB
6
- class OpenError < StandardError;end
7
- class KeyFormatError < StandardError;end
8
-
9
- class IntegerSerializer
10
- def self.dump(i); [i].pack("l"); end
11
- def self.load(str); str.unpack("l").first; end
12
- end
13
-
14
- class FloatSerializer
15
- def self.dump(i); [i].pack("d"); end
16
- def self.load(str); str.unpack("d").first; end
17
- end
18
-
19
- class IntegerArraySerializer
20
- def self.dump(a); a.pack("l*"); end
21
- def self.load(str); str.unpack("l*"); end
22
- end
23
-
24
- class StringSerializer
25
- def self.dump(str); str.to_s; end
26
- def self.load(str); str; end
27
- end
28
-
29
- class StringArraySerializer
30
- def self.dump(array)
31
- array.collect{|a| a.to_s} * "\t"
32
- end
33
-
34
- def self.load(string)
35
- string.split("\t", -1)
36
- end
37
- end
38
-
39
- class StringDoubleArraySerializer
40
- def self.dump(array)
41
- array.collect{|a| a.collect{|a| a.to_s} * "|"} * "\t"
42
- end
43
-
44
- def self.load(string)
45
- string.split("\t", -1).collect{|l| l.split("|", -1)}
46
- end
47
- end
48
-
49
- class TSVSerializer
50
- def self.dump(tsv)
51
- tsv.to_s
52
- end
53
-
54
- def self.load(string)
55
- TSV.new StringIO.new(string)
56
- end
57
- end
58
-
59
-
60
-
61
- ALIAS = {
62
- :integer => IntegerSerializer,
63
- :float => FloatSerializer,
64
- :integer_array => IntegerArraySerializer,
65
- :marshal => Marshal,
66
- :single => StringSerializer,
67
- :string => StringSerializer,
68
- :list => StringArraySerializer,
69
- :double => StringDoubleArraySerializer,
70
- :tsv => TSVSerializer
71
- }
72
-
73
- CONNECTIONS = {}
74
-
75
- FIELD_INFO_ENTRIES = {
76
- :type => '__tokyocabinet_hash_type',
77
- :serializer => '__tokyocabinet_hash_serializer',
78
- :identifiers => '__tokyocabinet_hash_identifiers',
79
- :fields => '__tokyocabinet_hash_fields',
80
- :key_field => '__tokyocabinet_hash_key_field',
81
- :filename => '__tokyocabinet_hash_filename',
82
- :namespace => '__tokyocabinet_hash_namspace',
83
- :type => '__tokyocabinet_hash_type',
84
- :case_insensitive => '__tokyocabinet_hash_case_insensitive'
85
- }
86
-
87
- FIELD_INFO_ENTRIES.each do |entry, key|
88
- class_eval do
89
- define_method entry.to_s, proc{v = self.original_get_brackets(key); v.nil? ? nil : Marshal.load(v)}
90
- define_method entry.to_s + "=", proc{|value| write unless write?; self.original_set_brackets key, Marshal.dump(value)}
91
- end
92
- end
93
-
94
- def size
95
- keys.length
96
- end
97
-
98
- def delete(key)
99
- raise "Cannot deleted key: closed connection" if not write?
100
- out(key) or raise "Not deleted"
101
- end
102
-
103
- alias original_include? include?
104
- def include?(key)
105
- return nil unless String === key
106
- original_include? key
107
- end
108
-
109
- attr_accessor :serializer, :path_to_db
110
- def serializer=(serializer)
111
-
112
- if ALIAS.include? serializer.to_sym
113
- @serializer = ALIAS[serializer.to_sym]
114
- else
115
- @serializer = serializer
116
- end
117
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s)
118
- end
119
-
120
- alias original_open open
121
- def open(write = false, serializer = nil)
122
- flags = (write ? TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT : TokyoCabinet::BDB::OREADER)
123
-
124
- FileUtils.mkdir_p File.dirname(@path_to_db) unless File.exists?(File.dirname(@path_to_db))
125
- if !self.original_open(@path_to_db, flags)
126
- ecode = self.ecode
127
- raise OpenError, "Open error: #{self.errmsg(ecode)}. Trying to open file #{@path_to_db}"
128
- end
129
-
130
- @write = write
131
-
132
- if @serializer.nil?
133
-
134
- if self.include? FIELD_INFO_ENTRIES[:serializer]
135
- serializer_str = self.original_get_brackets(FIELD_INFO_ENTRIES[:serializer])
136
-
137
- mod = Misc.string2const serializer_str
138
- @serializer = mod
139
-
140
- else
141
- raise "No serializer specified" if (serializer || @serializer).nil?
142
-
143
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], serializer.to_s) unless self.include? FIELD_INFO_ENTRIES[:serializer]
144
- @serializer = serializer
145
- end
146
- end
147
- end
148
-
149
- def write?
150
- @write
151
- end
152
-
153
- def write
154
- self.sync
155
- self.close
156
- self.open(true)
157
- end
158
-
159
- def read
160
- self.sync
161
- self.close
162
- self.open(false)
163
- end
164
-
165
- def initialize(path, write = false, serializer = nil)
166
- super()
167
-
168
- if ALIAS.include? serializer
169
- @serializer = ALIAS[serializer]
170
- else
171
- @serializer = serializer
172
- end
173
-
174
- @path_to_db = path
175
-
176
- if write || ! File.exists?(@path_to_db)
177
- @serializer = Marshal if @serializer.nil?
178
- self.setcache(100000) or raise "Error setting cache"
179
- self.open(true, @serializer)
180
- self.original_set_brackets(FIELD_INFO_ENTRIES[:serializer], @serializer.to_s)
181
- else
182
- self.open(false)
183
- end
184
- end
185
-
186
- def self.get(path, write = false, serializer = nil)
187
- if not (TrueClass === write or FalseClass === write) and serializer.nil?
188
- serializer = write
189
- write = false
190
- end
191
-
192
- if ALIAS.include? serializer
193
- serializer = ALIAS[serializer]
194
- else
195
- serializer = serializer
196
- end
197
-
198
- case
199
- when !File.exists?(path)
200
- CONNECTIONS[path] = self.new(path, true, serializer)
201
- when (not CONNECTIONS.include?(path))
202
- CONNECTIONS[path] = self.new(path, false, serializer)
203
- end
204
-
205
- d = CONNECTIONS[path]
206
-
207
- if write
208
- d.write unless d.write?
209
- else
210
- d.read if d.write?
211
- end
212
-
213
- d
214
- end
215
-
216
- #{{{ ACESSORS
217
-
218
- alias original_get_brackets []
219
- def [](key)
220
- return nil unless String === key
221
- result = self.original_get_brackets(key)
222
- if result.nil? or (String === result and result =~ /__Ref:/)
223
- result
224
- else
225
- @serializer.load(result)
226
- end
227
- end
228
-
229
- alias original_set_brackets []=
230
- def []=(key,value)
231
- raise KeyFormatError, "Key must be a String, its #{key.class.to_s}" unless String === key
232
- raise "Closed TCHash connection" unless write?
233
- if String === value and value =~ /^__Ref/
234
- self.original_set_brackets(key, value)
235
- else
236
- self.original_set_brackets(key, serializer.dump(value))
237
- end
238
- end
239
-
240
- def values_at(*args)
241
- args.collect do |key|
242
- self[key]
243
- end
244
- end
245
-
246
- alias original_keys keys
247
- def keys
248
- list = self.original_keys
249
- indexes = FIELD_INFO_ENTRIES.values.collect do |field| list.index(field) end.compact.sort.reverse
250
- indexes.each do |index| list.delete_at index end
251
- list
252
- end
253
-
254
- alias original_values values
255
- def values
256
- values = self.original_values
257
- keys = self.original_keys
258
- indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
259
- indexes.each do |index| values.delete_at index end
260
-
261
- values.collect{|v| serializer.load(v)}
262
- end
263
-
264
- alias real_original_each each
265
- # This version of each fixes a problem in ruby 1.9. It also
266
- # removes the special entries
267
- def each(&block)
268
- values = self.original_values
269
- keys = self.original_keys
270
- indexes = FIELD_INFO_ENTRIES.values.collect do |field| keys.index(field) end.compact.sort.reverse
271
- indexes.sort.reverse.each do |index| values.delete_at(index); keys.delete_at(index) end
272
-
273
- keys.zip(values.collect{|v| serializer.load v}).each &block
274
- end
275
-
276
- def each(&block)
277
- skippable = Set.new(FIELD_INFO_ENTRIES.values)
278
- real_original_each do |key, value|
279
- block.call(key, serializer.load(value)) unless skippable.include? key
280
- end
281
- end
282
-
283
- alias original_each each
284
-
285
- def collect(&block)
286
- skippable = Set.new(FIELD_INFO_ENTRIES.values)
287
- res = []
288
- real_original_each do |key,value|
289
- next if skippable.include? key
290
- if block_given?
291
- block.call(key, serializer.load(value))
292
- else
293
- res << [key, value]
294
- end
295
- end
296
- res
297
- end
298
-
299
- def merge!(data)
300
- raise "Closed TCHash connection" unless write?
301
- serialized =
302
- data.collect{|key, values| [key.to_s, serializer.dump(values)] }
303
- if tranbegin
304
- serialized.each do |key, values|
305
- self.putasync(key, values)
306
- end
307
- trancommit
308
- else
309
- raise "Transaction cannot initiate"
310
- end
311
- end
312
-
313
- def clear
314
- special_values = FIELD_INFO_ENTRIES.values.sort.collect{|k| self.original_get_brackets(k)}
315
- restore = ! write?
316
- write if restore
317
- vanish
318
- FIELD_INFO_ENTRIES.values.sort.zip(special_values).each{|k,v|
319
- self.original_set_brackets(k,v) unless v.nil?
320
- }
321
- read if restore
322
- end
323
-
324
- end
data/lib/rbbt/util/tsv.rb DELETED
@@ -1,236 +0,0 @@
1
- require 'rbbt/util/resource'
2
- require 'rbbt/util/misc'
3
- require 'rbbt/util/open'
4
- require 'rbbt/util/tc_hash'
5
- require 'rbbt/util/tmpfile'
6
- require 'rbbt/util/log'
7
- require 'rbbt/util/persistence'
8
- require 'digest'
9
- require 'fileutils'
10
-
11
- require 'rbbt/util/tsv/parse'
12
- require 'rbbt/util/tsv/accessor'
13
- require 'rbbt/util/tsv/manipulate'
14
- require 'rbbt/util/tsv/index'
15
- require 'rbbt/util/tsv/attach'
16
- require 'rbbt/util/tsv/resource'
17
-
18
- class TSV
19
-
20
- ESCAPES = {
21
- "\n" => "[[NL]]",
22
- "\t" => "[[TAB]]",
23
- }
24
-
25
- def self.escape(text)
26
- ESCAPES.each do |char,replacement|
27
- text = text.gsub(char, replacement)
28
- end
29
- text
30
- end
31
-
32
- def self.unescape(text)
33
- ESCAPES.each do |char,replacement|
34
- text = text.gsub(replacement, char)
35
- end
36
- text
37
- end
38
-
39
- def self.headers(file, options = {})
40
-
41
- ## Remove options from filename
42
- if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
43
- options = Misc.add_defaults options, Misc.string2hash($2)
44
- file = $1
45
- end
46
-
47
- fields = case
48
- when Open.can_open?(file)
49
- Open.open(file, :grep => options[:grep]) do |f| TSV.parse_header(f, options[:sep], options[:header_hash]).values_at(0, 1).flatten end
50
- when File === file
51
- file = Open.grep(file, options[:grep]) if options[:grep]
52
- TSV.parse_header(file, options[:sep], options[:header_hash]).values_at(0, 1).flatten
53
- else
54
- raise "File #{file.inspect} not found"
55
- end
56
-
57
- if fields.compact.empty?
58
- nil
59
- else
60
- fields
61
- end
62
- end
63
-
64
- def initialize(file = {}, type = nil, options = {})
65
- # Process Options
66
-
67
- if Hash === type
68
- options = type
69
- type = nil
70
- end
71
-
72
- ## Remove options from filename
73
- if String === file and file =~/(.*?)#(.*)/ and File.exists? $1
74
- options = Misc.add_defaults options, Misc.string2hash($2)
75
- file = $1
76
- end
77
-
78
- options = Misc.add_defaults options, :persistence => false, :type => type, :in_situ_persistence => true
79
-
80
- # Extract Filename
81
-
82
- file, extra = file if Array === file and file.length == 2 and Hash === file.last
83
-
84
- @filename = Misc.process_options options, :filename
85
- @filename ||= case
86
- when Resource::Path === file
87
- file
88
- when (String === file and File.exists? file)
89
- File.expand_path file
90
- when String === file
91
- file
92
- when File === file
93
- File.expand_path file.path
94
- when TSV === file
95
- File.expand_path file.filename
96
- when (Persistence::TSV === file and file.filename)
97
- File.expand_path file.filename
98
- else
99
- file.class.to_s
100
- end
101
-
102
- # Process With Persistence
103
- # Use filename to identify the persistence
104
- # Several inputs supported
105
- # Filename or File: Parsed
106
- # Hash: Encapsulated, empty info
107
- # TSV: Duplicate
108
- case
109
- when block_given?
110
- @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options.merge(:force_array => true)) do |file, options, filename| yield file, options, filename end
111
- extra.each do |key, values|
112
- self.send("#{ key }=".to_sym, values) if self.respond_to? "#{ key }=".to_sym
113
- end if not extra.nil?
114
-
115
- else
116
-
117
- case
118
- when Array === file
119
- @data = Hash[file.collect{|v|
120
- [v,[]]
121
- }]
122
- self.key_field = options[:key]
123
- self.fields = options[:fields]
124
- self.type = options[:type] || :double
125
- when Hash === file
126
- @data = file
127
- self.key_field = options[:key]
128
- self.fields = options[:fields]
129
- self.type = options[:type] || :double
130
- when TSV === file
131
- @data = file.data
132
- self.key_field = options[:key] || file.key_field
133
- self.fields = options[:fields] || file.fields
134
- self.type = options[:type] || file.type
135
- when Persistence::TSV === file
136
- Log.debug("Reopening persistence file #{ file.path_to_db }")
137
- @data = file
138
- %w(case_insensitive namespace identifiers datadir fields key_field type filename cast).each do |key|
139
- if @data.respond_to?(key.to_sym) and self.respond_to?("#{key}=".to_sym)
140
- self.send "#{key}=".to_sym, @data.send(key.to_sym)
141
- end
142
- end
143
- self.key_field = options[:key] || file.key_field
144
- self.fields = options[:fields] || file.fields
145
- self.type = options[:type] || file.type
146
-
147
- file.key_field = self.key_field
148
- file.fields = self.fields
149
- file.type = self.type
150
- else
151
- in_situ_persistence = Misc.process_options(options, :in_situ_persistence)
152
- @data, extra = Persistence.persist(file, :TSV, :tsv_extra, options) do |file, options, filename, persistence_file|
153
- data, extra = nil
154
-
155
- if in_situ_persistence and persistence_file
156
- options.merge! :persistence_data => Persistence::TSV.get(persistence_file, true, :double)
157
- end
158
-
159
- begin
160
- case
161
- ## Parse source
162
- when Resource::Path === file #(String === file and file.respond_to? :open)
163
- data, extra = TSV.parse(file.open(:grep => options[:grep]) , options)
164
- extra[:namespace] ||= file.namespace
165
- extra[:datadir] ||= file.datadir
166
- when StringIO === file
167
- data, extra = TSV.parse(file, options)
168
- when Open.can_open?(file)
169
- Open.open(file, :grep => options[:grep]) do |f|
170
- data, extra = TSV.parse(f, options)
171
- end
172
- when File === file
173
- path = file.path
174
- file = Open.grep(file, options[:grep]) if options[:grep]
175
- data, extra = TSV.parse(file, options)
176
- when IO === file
177
- file = Open.grep(file, options[:grep]) if options[:grep]
178
- data, extra = TSV.parse(file, options)
179
- when block_given?
180
- data
181
- else
182
- raise "Unknown input in TSV.new #{file.inspect}"
183
- end
184
-
185
- extra[:filename] = filename
186
- rescue Exception
187
- FileUtils.rm persistence_file if persistence_file and File.exists?(persistence_file)
188
- raise $!
189
- end
190
-
191
- if Persistence::TSV === data
192
- %w(case_insensitive namespace identifiers fields key_field type filename cast).each do |key|
193
- if extra.include? key.to_sym
194
- if data.respond_to? "#{key}=".to_sym
195
- data.send("#{key}=".to_sym, extra[key.to_sym])
196
- end
197
- end
198
- end
199
- data.read
200
- end
201
-
202
- [data, extra]
203
- end
204
- end
205
- end
206
-
207
- if not extra.nil?
208
- %w(case_insensitive namespace identifiers fields key_field type filename cast).each do |key|
209
- if extra.include? key.to_sym
210
- self.send("#{key}=".to_sym, extra[key.to_sym])
211
- #if @data.respond_to? "#{key}=".to_sym
212
- # @data.send("#{key}=".to_sym, extra[key.to_sym])
213
- #end
214
- end
215
- end
216
- end
217
- end
218
-
219
- def write
220
- @data.write if @data.respond_to? :write
221
- end
222
-
223
- def read
224
- @data.read if @data.respond_to? :read
225
- end
226
-
227
- def write?
228
- @data.write? if @data.respond_to? :write
229
- end
230
-
231
- def clear
232
- @data.clear
233
- end
234
-
235
-
236
- end