rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -1,41 +0,0 @@
1
- require 'rbbt/util/tsv'
2
-
3
- class TSV
4
- def self.keys(file, sep = "\t")
5
- CMD.cmd("cut -f 1 -d'#{sep}' '#{file}'|grep -v ^#").read.split("\n")
6
- end
7
-
8
- def self.field_match_counts(file, values)
9
- key_field, fields = TSV.parse_header(Open.open(file))
10
- fields.unshift key_field
11
-
12
- counts = {}
13
- TmpFile.with_file do |tmpfile|
14
- if Array === values
15
- Open.write(tmpfile, values * "\n")
16
- else
17
- FileUtils.ln_s values, tmpfile
18
- end
19
-
20
- fields.each_with_index do |field,i|
21
- counts[field] = begin
22
- CMD.cmd("cat #{ file } |grep -v ^#|cut -f #{i + 1}|tr '|' '\\n' |sort -u |grep [[:alpha:]]|grep -f #{tmpfile} -F -w").read.count("\n")
23
- rescue
24
- 0
25
- end
26
- end
27
- end
28
-
29
- counts
30
- end
31
- end
32
-
33
- module Open
34
- def self.tsv(file, *args)
35
- TSV.new file, *args
36
- end
37
-
38
- def self.index(file, *args)
39
- TSV.index file, *args
40
- end
41
- end
@@ -1,324 +0,0 @@
1
- require 'rbbt/util/misc'
2
- require 'progress-bar'
3
- class TSV
4
-
5
- def self.parse_fields(io, delimiter = "\t")
6
- return [] if io.nil?
7
-
8
- ## split with delimiter, do not remove empty
9
- fields = io.split(delimiter, -1)
10
-
11
- fields
12
- end
13
-
14
- def self.parse_header(stream, sep = nil, header_hash = nil)
15
- sep = /\t/ if sep.nil?
16
- header_hash = "#" if header_hash.nil?
17
-
18
- fields, key_field = nil
19
- options = {}
20
-
21
- # Get line
22
-
23
- line = stream.gets
24
- raise "Empty content" if line.nil?
25
-
26
- # Process options line
27
-
28
- if line and line =~ /^#{header_hash}: (.*)/
29
- options = Misc.string2hash $1
30
- line = stream.gets
31
- end
32
-
33
- # Determine separator
34
-
35
- sep = options[:sep] if options[:sep]
36
-
37
- # Process fields line
38
-
39
- if line and line =~ /^#{header_hash}/
40
- line.chomp!
41
- fields = parse_fields(line, sep)
42
- key_field = fields.shift
43
- key_field = key_field[(0 + header_hash.length)..-1] # Remove initial hash character
44
- line = stream.gets
45
- end
46
-
47
- # Return fields, options and first line
48
-
49
- return key_field, fields, options, line
50
- end
51
-
52
- def self.parse(stream, options = {})
53
-
54
- # Prepare options
55
-
56
- key_field, other_fields, more_options, line = TSV.parse_header(stream, options[:sep], options[:header_hash])
57
-
58
- options = Misc.add_defaults options, more_options
59
-
60
- options = Misc.add_defaults options,
61
- :monitor => false,
62
- :case_insensitive => false,
63
- :type => :double,
64
- :namespace => nil,
65
- :identifiers => nil,
66
-
67
- :merge => false,
68
- :keep_empty => (options[:type] != :flat and options[:type] != :single),
69
- :cast => nil,
70
-
71
- :header_hash => '#',
72
- :sep => "\t",
73
- :sep2 => "|",
74
-
75
- :key => 0,
76
- :fields => nil,
77
-
78
- :fix => nil,
79
- :exclude => nil,
80
- :select => nil,
81
- :grep => nil
82
-
83
- monitor = Misc.process_options options, :monitor
84
-
85
- header_hash, sep, sep2 =
86
- Misc.process_options options, :header_hash, :sep, :sep2
87
-
88
- key, fields =
89
- Misc.process_options options, :key, :fields
90
-
91
- if key_field.nil?
92
- key_pos = key
93
- other_pos = fields
94
- else
95
- all_fields = [key_field].concat other_fields
96
-
97
- key_pos = Misc.field_position(all_fields, key)
98
-
99
- if String === fields or Symbol === fields
100
- fields = [fields]
101
- end
102
-
103
- if fields.nil?
104
- other_pos = (0..(all_fields.length - 1)).to_a
105
- other_pos.delete key_pos
106
- else
107
- if Array === fields
108
- other_pos = fields.collect{|field| Misc.field_position(all_fields, field)}
109
- else
110
- other_pos = Misc.field_position(all_fields, fields)
111
- end
112
- end
113
-
114
- key_field = all_fields[key_pos]
115
- fields = all_fields.values_at *other_pos
116
- end
117
-
118
- case_insensitive, type, namespace, merge, keep_empty, cast =
119
- Misc.process_options options, :case_insensitive, :type, :namespace, :merge, :keep_empty, :cast
120
- fix, exclude, select, grep =
121
- Misc.process_options options, :fix, :exclude, :select, :grep
122
-
123
- exclude ||= Misc.process_options options, :reject if options.include? :reject
124
-
125
- if monitor and (stream.respond_to?(:size) or (stream.respond_to?(:stat) and stream.stat.respond_to? :size)) and stream.respond_to?(:pos)
126
- size = case
127
- when stream.respond_to?(:size)
128
- stream.size
129
- else
130
- stream.stat.size
131
- end
132
- desc = "Parsing Stream"
133
- step = 100
134
- if Hash === monitor
135
- desc = monitor[:desc] if monitor.include? :desc
136
- step = monitor[:step] if monitor.include? :step
137
- end
138
- progress_monitor = Progress::Bar.new(size, 0, step, desc)
139
- else
140
- progress_monitor = nil
141
- end
142
-
143
- #{{{ Process rest
144
- data = options[:persistence_data] || {}
145
- if Persistence::TSV === data
146
- serializer = case
147
- when ((cast == "to_i" or cast == :to_i) and type == :single)
148
- :integer
149
- when ((cast == "to_i" or cast == :to_i) and (type == :flat or type == :list))
150
- :integer_array
151
- when (type == :list or type == :flat)
152
- :list
153
- when type == :single
154
- :single
155
- else
156
- :double
157
- end
158
- data.serializer = serializer
159
- end
160
-
161
-
162
- single = type.to_sym != :double
163
- max_cols = 0
164
- while line do
165
- line.chomp!
166
-
167
- progress_monitor.tick(stream.pos) if progress_monitor
168
-
169
- if line.empty? or
170
- (exclude and exclude.call(line)) or
171
- (select and not select.call(line))
172
-
173
- line = stream.gets
174
- next
175
- end
176
-
177
- line = fix.call line if fix
178
- break if not line
179
-
180
-
181
- if header_hash and not header_hash.empty? and line =~ /^#{header_hash}/
182
- line = stream.gets
183
- next
184
- end
185
-
186
- # Chunk fields
187
- parts = parse_fields(line, sep)
188
-
189
- # Get next line
190
- line = stream.gets
191
-
192
- # Get id field
193
- next if parts[key_pos].nil? || parts[key_pos].empty?
194
-
195
- if single
196
- ids = parse_fields(parts[key_pos], sep2)
197
- ids.collect!{|id| id.downcase} if case_insensitive
198
- ids = ids.reject{|_id| _id.empty?}.uniq
199
-
200
- id = ids.shift
201
- ids.each do |id2| data[id2] = "__Ref:#{id}" unless data.include? id2 end
202
-
203
- next if data.include?(id) and type != :flat
204
-
205
- if other_pos.nil? or (fields == nil and type == :flat)
206
- other_pos = (0..(parts.length - 1)).to_a
207
- other_pos.delete key_pos
208
- end
209
-
210
- if type == :flat
211
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}.flatten
212
- else
213
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2).first}
214
- end
215
-
216
- extra.collect! do |elem|
217
- case
218
- when (String === cast or Symbol === cast)
219
- elem.send(cast.to_s)
220
- when Proc === cast
221
- cast.call elem
222
- end
223
- end if cast
224
-
225
- case
226
- when type == :single
227
- data[id] = extra.first
228
- when type == :flat
229
- if data.include? id
230
- data[id] = data[id] + extra
231
- else
232
- data[id] = extra
233
- end
234
- else
235
- data[id] = extra
236
- end
237
-
238
- max_cols = extra.size if extra.size > (max_cols || 0) unless type == :flat
239
- else
240
- ids = parse_fields(parts[key_pos], sep2)
241
- ids.collect!{|id| id.downcase} if case_insensitive
242
- ids = ids.reject{|_id| _id.empty?}.uniq
243
-
244
- next if ids.empty?
245
-
246
- id = ids.shift
247
- while data.include? id and data[id] =~ /__Ref:(.*)/
248
- data[id] = data[$1].collect{|e| e.dup}
249
- end
250
-
251
- all_ids = [id]
252
- ids.each do |id2|
253
- if data.include? id2
254
- while data[id2] =~ /__Ref:(.*)/
255
- data[id2] = data[$1].collect{|e| e.dup}
256
- end
257
- all_ids << id2
258
- else
259
- data[id2] = "__Ref:#{id}"
260
- end
261
- end
262
-
263
- if other_pos.nil? or (fields == nil and type == :flat)
264
- other_pos = (0..(parts.length - 1)).to_a
265
- other_pos.delete key_pos
266
- end
267
-
268
- extra = parts.values_at(*other_pos).collect{|f| parse_fields(f, sep2)}
269
- extra.collect! do |list|
270
- case
271
- when (String === cast or Symbol === cast)
272
- list.collect{|elem| elem.send(cast.to_s)}
273
- when Proc === cast
274
- list.collect{|elem| cast.call elem}
275
- end
276
- end if cast
277
-
278
- max_cols = extra.size if extra.size > (max_cols || 0)
279
-
280
- all_ids.each do |id|
281
- if not merge
282
- data[id] = extra unless data.include? id
283
- else
284
- if not data.include? id
285
- data[id] = extra
286
- else
287
- entry = data[id]
288
- while entry =~ /__Ref:(.*)/ do entry = data[$1] end
289
- extra.each_with_index do |f, i|
290
- if f.empty?
291
- next unless keep_empty
292
- f= [""]
293
- end
294
- entry[i] ||= []
295
- entry[i] = entry[i].concat f
296
- end
297
- data[id] = entry
298
- end
299
- end
300
- end
301
- end
302
- end
303
-
304
- if keep_empty and max_cols > 0 and not Persistence::TSV === data
305
- data.each do |key, values|
306
- next if values =~ /__Ref:/
307
- new_values = values
308
- max_cols.times do |i|
309
- if type == :double
310
- new_values[i] = [""] if new_values[i].nil? or new_values[i].empty?
311
- else
312
- new_values[i] = "" if new_values[i].nil?
313
- end
314
- end
315
- data[key] = new_values
316
- end
317
- end
318
-
319
- fields = nil if Fixnum === fields or (Array === fields and fields.select{|f| Fixnum === f}.any?)
320
- fields ||= other_fields
321
- [data, {:key_field => key_field, :fields => fields, :type => type, :case_insensitive => case_insensitive, :namespace => namespace, :identifiers => options[:identifiers], :cast => (cast.nil? ? false : cast)}]
322
- end
323
-
324
- end
@@ -1,88 +0,0 @@
1
- require 'rbbt/util/resource'
2
-
3
- module Resource
4
- module Path
5
- def tsv(type = nil, options = {})
6
- if options.empty? and Hash === type
7
- options, type = type, nil
8
- end
9
-
10
- tsv = TSV.new self, type, options
11
- tsv.namespace ||= namespace
12
- tsv
13
- end
14
-
15
- def namespace_or_dirname
16
- namespace || File.basename(File.dirname(self))
17
- end
18
-
19
- def to_yaml(opts = {})
20
- YAML.quick_emit( nil, opts ) { |out|
21
- out.scalar( taguri, self, :plain )
22
- }
23
- end
24
-
25
- def index(options = {})
26
- TSV.index self, options
27
- end
28
-
29
- def pos_index(pos_field, options = {})
30
- TSV.pos_index self, pos_field, options
31
- end
32
-
33
- def range_index(start_field, end_field, options = {})
34
- TSV.range_index self, start_field, end_field, options
35
- end
36
-
37
- def open(options = {})
38
- produce
39
- Open.open(self.find, options)
40
- end
41
-
42
- def read(options = {})
43
- produce
44
- Open.read(self.find, options)
45
- end
46
-
47
- def fields(sep = nil, header_hash = nil)
48
- produce
49
- key, fields, options, line = TSV.parse_header(self.open, sep, header_hash)
50
- namespace = options[:namespace] if options.include? namespace
51
- fields.collect{|f| f.extend TSV::Field; f.namespace = namespace || namespace_or_dirname ;f}
52
- end
53
-
54
- def all_fields(sep = nil, header_hash = nil)
55
- produce
56
- key, fields, options, line = TSV.parse_header(self.open, sep, header_hash)
57
- namespace = options[:namespace] if options.include? namespace
58
- [key,fields].flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace || namespace_or_dirname ;f}
59
- end
60
-
61
- def fields_in_namespace(sep = nil, header_hash = nil)
62
- produce
63
- TSV.parse_header(self.open, sep, header_hash)[1].collect{|f| f.extend TSV::Field; f.namespace = namespace ;f}.select{|f| f.namespace == namespace}
64
- end
65
-
66
- def all_namespace_fields(namespace, sep = /\t/, header_hash = "#")
67
- produce
68
- key_field, fields = TSV.parse_header(self.open, sep, header_hash).values_at(0, 1).flatten.collect{|f| f.extend TSV::Field; f.namespace = namespace; f}.select{|f| f.namespace == namespace}
69
- end
70
-
71
- def identifier_files
72
- dir = self.find.sub(self,'')
73
- if dir.nil? or dir.empty?
74
- path = File.join(File.dirname(self.find), 'identifiers')
75
- path.extend Path
76
- path.pkg_module = pkg_module
77
- if path.exists?
78
- [path]
79
- else
80
- []
81
- end
82
- else
83
- identifier_files = Misc.find_files_back_to(self.find, 'identifiers', dir)
84
- return identifier_files.collect{|f| Resource::Path.path(f)}
85
- end
86
- end
87
- end
88
- end