rbbt-util 3.2.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. data/README.rdoc +65 -0
  2. data/bin/run_workflow.rb +142 -69
  3. data/lib/rbbt-util.rb +3 -3
  4. data/lib/rbbt.rb +12 -3
  5. data/lib/rbbt/annotations.rb +215 -0
  6. data/lib/rbbt/{util/fix_width_table.rb → fix_width_table.rb} +17 -13
  7. data/lib/rbbt/persist.rb +164 -0
  8. data/lib/rbbt/persist/tsv.rb +135 -0
  9. data/lib/rbbt/resource.rb +100 -0
  10. data/lib/rbbt/resource/path.rb +180 -0
  11. data/lib/rbbt/resource/rake.rb +48 -0
  12. data/lib/rbbt/resource/util.rb +111 -0
  13. data/lib/rbbt/resource/with_key.rb +28 -0
  14. data/lib/rbbt/tsv.rb +134 -0
  15. data/lib/rbbt/tsv/accessor.rb +345 -0
  16. data/lib/rbbt/tsv/attach.rb +183 -0
  17. data/lib/rbbt/tsv/attach/util.rb +277 -0
  18. data/lib/rbbt/{util/tsv/filters.rb → tsv/filter.rb} +76 -37
  19. data/lib/rbbt/tsv/index.rb +453 -0
  20. data/lib/rbbt/tsv/manipulate.rb +361 -0
  21. data/lib/rbbt/tsv/parser.rb +231 -0
  22. data/lib/rbbt/tsv/serializers.rb +79 -0
  23. data/lib/rbbt/tsv/util.rb +67 -0
  24. data/lib/rbbt/util/R.rb +3 -3
  25. data/lib/rbbt/util/chain_methods.rb +64 -0
  26. data/lib/rbbt/util/cmd.rb +17 -13
  27. data/lib/rbbt/util/excel2tsv.rb +4 -3
  28. data/lib/rbbt/util/log.rb +1 -0
  29. data/lib/rbbt/util/misc.rb +296 -285
  30. data/lib/rbbt/util/open.rb +9 -2
  31. data/lib/rbbt/util/persistence.rb +1 -1
  32. data/lib/rbbt/util/task/job.rb +3 -1
  33. data/lib/rbbt/workflow.rb +193 -0
  34. data/lib/rbbt/workflow/accessor.rb +249 -0
  35. data/lib/rbbt/workflow/annotate.rb +60 -0
  36. data/lib/rbbt/workflow/soap.rb +100 -0
  37. data/lib/rbbt/workflow/step.rb +102 -0
  38. data/lib/rbbt/workflow/task.rb +76 -0
  39. data/test/rbbt/resource/test_path.rb +12 -0
  40. data/test/rbbt/test_annotations.rb +106 -0
  41. data/test/rbbt/{util/test_fix_width_table.rb → test_fix_width_table.rb} +8 -9
  42. data/test/rbbt/test_resource.rb +66 -0
  43. data/test/rbbt/test_tsv.rb +332 -0
  44. data/test/rbbt/test_workflow.rb +102 -0
  45. data/test/rbbt/tsv/test_accessor.rb +163 -0
  46. data/test/rbbt/{util/tsv → tsv}/test_attach.rb +86 -43
  47. data/test/rbbt/{util/tsv/test_filters.rb → tsv/test_filter.rb} +31 -13
  48. data/test/rbbt/tsv/test_index.rb +284 -0
  49. data/test/rbbt/{util/tsv → tsv}/test_manipulate.rb +35 -105
  50. data/test/rbbt/util/test_R.rb +1 -1
  51. data/test/rbbt/util/test_chain_methods.rb +22 -0
  52. data/test/rbbt/util/test_filecache.rb +0 -1
  53. data/test/rbbt/util/test_misc.rb +97 -79
  54. data/test/rbbt/util/test_open.rb +1 -0
  55. data/test/rbbt/util/test_tmpfile.rb +1 -1
  56. data/test/rbbt/workflow/test_soap.rb +103 -0
  57. data/test/rbbt/workflow/test_step.rb +142 -0
  58. data/test/rbbt/workflow/test_task.rb +84 -0
  59. data/test/test_helper.rb +7 -7
  60. metadata +80 -54
  61. data/lib/rbbt/util/rake.rb +0 -176
  62. data/lib/rbbt/util/resource.rb +0 -355
  63. data/lib/rbbt/util/task.rb +0 -183
  64. data/lib/rbbt/util/tc_hash.rb +0 -324
  65. data/lib/rbbt/util/tsv.rb +0 -236
  66. data/lib/rbbt/util/tsv/accessor.rb +0 -312
  67. data/lib/rbbt/util/tsv/attach.rb +0 -416
  68. data/lib/rbbt/util/tsv/index.rb +0 -419
  69. data/lib/rbbt/util/tsv/manipulate.rb +0 -300
  70. data/lib/rbbt/util/tsv/misc.rb +0 -41
  71. data/lib/rbbt/util/tsv/parse.rb +0 -324
  72. data/lib/rbbt/util/tsv/resource.rb +0 -88
  73. data/lib/rbbt/util/workflow.rb +0 -135
  74. data/lib/rbbt/util/workflow/soap.rb +0 -116
  75. data/test/rbbt/util/test_persistence.rb +0 -201
  76. data/test/rbbt/util/test_rake.rb +0 -54
  77. data/test/rbbt/util/test_resource.rb +0 -77
  78. data/test/rbbt/util/test_task.rb +0 -133
  79. data/test/rbbt/util/test_tc_hash.rb +0 -144
  80. data/test/rbbt/util/test_tsv.rb +0 -221
  81. data/test/rbbt/util/test_workflow.rb +0 -135
  82. data/test/rbbt/util/tsv/test_accessor.rb +0 -150
  83. data/test/rbbt/util/tsv/test_index.rb +0 -241
  84. data/test/rbbt/util/tsv/test_parse.rb +0 -87
  85. data/test/rbbt/util/tsv/test_resource.rb +0 -9
@@ -0,0 +1,277 @@
1
+ module TSV
2
+
3
+ def attach_same_key(other, fields = nil)
4
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
+
6
+ through do |key, values|
7
+ if other.include? key
8
+ new_values = other[key].values_at *fields
9
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
10
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
11
+ self[key] = self[key].concat new_values
12
+ else
13
+ if type == :double
14
+ self[key] = self[key].concat [[]] * fields.length
15
+ else
16
+ self[key] = self[key].concat [""] * fields.length
17
+ end
18
+ end
19
+ end
20
+
21
+ self.fields = self.fields.concat other.fields.values_at *fields
22
+ end
23
+
24
+ def attach_source_key(other, source, options = {})
25
+ fields = Misc.process_options options, :fields
26
+ one2one = Misc.process_options options, :one2one
27
+
28
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
29
+
30
+ other = other.tsv(:persistence => :no_create) unless TSV === other
31
+ field_positions = fields.collect{|field| other.identify_field field}
32
+ field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
33
+
34
+ source_pos = identify_field source
35
+
36
+ with_unnamed do
37
+ through do |key, values|
38
+ source_keys = values[source_pos]
39
+
40
+ case
41
+ when (source_keys.nil? or (Array === source_keys and source_keys.empty?))
42
+ if type == :double
43
+ self[key] = values.concat field_positions.collect{|v| []}
44
+ else
45
+ self[key] = values.concat [nil] * field_positions
46
+ end
47
+ when Array === source_keys
48
+ all_new_values = source_keys.collect do |source_key|
49
+ positions = field_positions.collect do |pos|
50
+ if pos == :key
51
+ [source_key]
52
+ else
53
+ if other.include? source_key
54
+ v = other[source_key][pos]
55
+ Array === v ? v : [v]
56
+ else
57
+ [nil]
58
+ end
59
+ end
60
+ end
61
+
62
+ positions.collect!{|v| v[0..0]} if one2one
63
+ positions
64
+ end
65
+
66
+ new = Misc.zip_fields(all_new_values).each{|field_entry|
67
+ field_entry.flatten!
68
+ }
69
+
70
+ self[key] = values.concat new
71
+ else
72
+ source_key = source_keys
73
+ all_new_values = field_positions.collect do |pos|
74
+ if pos == :key
75
+ source_key
76
+ else
77
+ if other.include? source_key
78
+ v = other[source_key][pos]
79
+ Array === v ? v.first : v
80
+ else
81
+ nil
82
+ end
83
+ end
84
+ end
85
+
86
+ self[key] = values.concat all_new_values
87
+ end
88
+
89
+ end
90
+ end
91
+
92
+ self.fields = self.fields.concat field_names
93
+ self
94
+ end
95
+
96
+ def attach_index(other, index, fields = nil)
97
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
98
+ fields = [fields] unless Array === fields
99
+
100
+ other = other.tsv unless TSV === other
101
+ field_positions = fields.collect{|field| other.identify_field field}
102
+ field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
103
+
104
+ length = self.fields.length
105
+ through do |key, values|
106
+ source_keys = index[key]
107
+ if source_keys.nil? or source_keys.empty?
108
+ all_new_values = []
109
+ else
110
+ all_new_values = []
111
+ source_keys.each do |source_key|
112
+ next unless other.include? source_key
113
+ new_values = field_positions.collect do |pos|
114
+ if pos == :key
115
+ if other.type == :double
116
+ [source_key]
117
+ else
118
+ source_key
119
+ end
120
+ else
121
+ other[source_key][pos]
122
+ end
123
+ end
124
+ new_values.collect!{|v| v.nil? ? [[]] : [v]} if type == :double and not other.type == :double
125
+ new_values.collect!{|v| v.nil? ? nil : (other.type == :single ? v : v.first)} if not type == :double and other.type == :double
126
+ all_new_values << new_values
127
+ end
128
+ end
129
+
130
+ if all_new_values.empty?
131
+ if type == :double
132
+ all_new_values = [[[]] * field_positions.length]
133
+ else
134
+ all_new_values = [[""] * field_positions.length]
135
+ end
136
+ end
137
+
138
+ current = self[key] || [[]] * fields.length
139
+
140
+ if current.length > length
141
+ all_new_values << current.slice!(length..current.length - 1)
142
+ end
143
+
144
+ if type == :double
145
+ all_new_values = TSV.zip_fields(all_new_values).collect{|l| l.flatten}
146
+ else
147
+ all_new_values = all_new_values.first
148
+ end
149
+
150
+ current += all_new_values
151
+
152
+ self[key] = current
153
+
154
+ end
155
+
156
+ self.fields = self.fields.concat field_names
157
+ end
158
+
159
+ #{{{ Attach Helper
160
+
161
+ # May make an extra index!
162
+ def self.find_path(files, options = {})
163
+ options = Misc.add_defaults options, :in_namespace => false
164
+ in_namespace = options[:in_namespace]
165
+
166
+ if in_namespace
167
+ if files.first.all_fields.include? in_namespace
168
+ ids = [[in_namespace]]
169
+ else
170
+ ids = [files.first.all_namespace_fields(in_namespace)]
171
+ end
172
+ ids += files[1..-1].collect{|f| f.all_fields}
173
+ else
174
+ ids = files.collect{|f| f.all_fields }
175
+ end
176
+
177
+ id_list = []
178
+
179
+ ids.each_with_index do |list, i|
180
+ break if i == ids.length - 1
181
+ match = list.select{|field|
182
+ ids[i + 1].select{|f| field == f}.any?
183
+ }
184
+ return nil if match.empty?
185
+ id_list << match.first
186
+ end
187
+
188
+ if id_list.last != files.last.all_fields.first
189
+ id_list << files.last.all_fields.first
190
+ id_list.zip(files)
191
+ else
192
+ id_list.zip(files[0..-1])
193
+ end
194
+ end
195
+
196
+ def self.build_traverse_index(files, options = {})
197
+ options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
198
+ in_namespace = options[:in_namespace]
199
+ persist_input = options[:persist_input]
200
+
201
+ path = find_path(files, options)
202
+
203
+ return nil if path.nil?
204
+
205
+ traversal_ids = path.collect{|p| p.first}
206
+
207
+ Log.low "Found Traversal: #{traversal_ids * " => "}"
208
+
209
+ data_key, data_file = path.shift
210
+ data_index = if data_key == data_file.key_field
211
+ Log.debug "Data index not required '#{data_file.key_field}' => '#{data_key}'"
212
+ nil
213
+ else
214
+ Log.debug "Data index required"
215
+ data_file.index :target => data_key, :fields => data_file.key_field, :persist => false
216
+ end
217
+
218
+ current_index = data_index
219
+ current_key = data_key
220
+ while not path.empty?
221
+ next_key, next_file = path.shift
222
+
223
+ if current_index.nil?
224
+ current_index = next_file.index(:target => next_key, :fields => current_key, :persist => persist_input)
225
+ else
226
+ next_index = next_file.index :target => next_key, :fields => current_key, :persist => persist_input
227
+
228
+ if TokyoCabinet::HDB === current_index
229
+ tmp = TSV.setup({}, :key_field => current_index.key_field, :fields => current_index.fields, :serializer => current_index.serializer, :type => current_index.type, :filename => current_index.filename)
230
+ current_index.unnamed = true
231
+ current_index.each do |key,value|
232
+ tmp.tsv_clean_set_brackets(key, current_index.tsv_clean_get_brackets(key))
233
+ end
234
+ current_index = tmp
235
+ end
236
+
237
+ current_index.process current_index.fields.first do |values|
238
+ if values.nil?
239
+ nil
240
+ else
241
+ next_index.values_at(*values).flatten.collect
242
+ end
243
+ end
244
+ current_index.fields = [next_key]
245
+ end
246
+ current_key = next_key
247
+ end
248
+
249
+ current_index
250
+ end
251
+
252
+
253
+ def self.find_traversal(tsv1, tsv2, options = {})
254
+ options = Misc.add_defaults options, :in_namespace => false
255
+ in_namespace = options[:in_namespace]
256
+
257
+ identifiers1 = tsv1.identifier_files || []
258
+ identifiers2 = tsv2.identifier_files || []
259
+
260
+ identifiers1.unshift tsv1
261
+ identifiers2.unshift tsv2
262
+
263
+ files1 = []
264
+ files2 = []
265
+ while identifiers1.any?
266
+ files1.push identifiers1.shift
267
+ identifiers2.each_with_index do |e,i|
268
+ files2 = identifiers2[(0..i)]
269
+ index = build_traverse_index(files1 + files2.reverse, options)
270
+ return index if not index.nil?
271
+ end
272
+ end
273
+
274
+ return nil
275
+ end
276
+
277
+ end
@@ -15,6 +15,8 @@ module Filtered
15
15
  end
16
16
  end
17
17
 
18
+ #{{{ FILTER
19
+
18
20
  class Filter
19
21
  attr_accessor :data, :match, :fieldnum, :value, :list, :unsaved
20
22
  attr_accessor :persistence
@@ -29,7 +31,7 @@ module Filtered
29
31
  when Hash === persistence
30
32
  @persistence = persistence
31
33
  when String === persistence
32
- @persistence = TSV.new TCHash.get(persistence)
34
+ @persistence = TSV.setup Persist.open_tokyocabinet(persistence, false, :list)
33
35
  @persistence.read
34
36
  end
35
37
 
@@ -41,7 +43,8 @@ module Filtered
41
43
  self
42
44
  end.class_eval <<-EOC
43
45
  def match_entry(entry)
44
- entry[@fieldnum] == @value
46
+ value = entry[@fieldnum]
47
+ value == @value or (Array === value and value.include? @value)
45
48
  end
46
49
  EOC
47
50
  end
@@ -62,7 +65,7 @@ module Filtered
62
65
  persistence[self.key] = ids
63
66
  persistence.read
64
67
  else
65
- if list.nil?
68
+ if @list.nil?
66
69
  @list = ids
67
70
  else
68
71
  @list.replace ids
@@ -72,9 +75,11 @@ module Filtered
72
75
 
73
76
  def update
74
77
  ids = []
78
+
75
79
  data.unfiltered_each do |key, entry|
76
80
  ids << key if match_entry(entry)
77
81
  end
82
+
78
83
  save(ids.sort)
79
84
  end
80
85
 
@@ -130,78 +135,112 @@ module Filtered
130
135
  end
131
136
  end
132
137
 
138
+ #}}} FILTER
139
+
133
140
  def self.extended(base)
134
- class << base
135
- attr_accessor :filter_dir, :filters
136
- end
141
+ if not base.respond_to? :unfiltered_set
142
+ class << base
143
+ attr_accessor :filter_dir, :filters
144
+
145
+ alias unfiltered_set []=
146
+ alias []= filtered_set
137
147
 
138
- Misc.redefine_method base, :[]=, :unfiltered_set do |key,value|
139
- if filters.empty?
140
- self.send(:unfiltered_set, key, value)
141
- else
142
- filters.each do |filter|
143
- filter.add key if filter.match_entry value
144
- end
145
- self.send(:unfiltered_set, key, value)
148
+ alias unfiltered_filename filename
149
+ alias filename filtered_filename
150
+
151
+ alias unfiltered_keys keys
152
+ alias keys filtered_keys
153
+
154
+ alias unfiltered_values values
155
+ alias values filtered_values
156
+
157
+ alias unfiltered_each each
158
+ alias each filtered_each
159
+
160
+ alias unfiltered_collect collect
161
+ alias collect filtered_collect
162
+
163
+ alias unfiltered_delete delete
164
+ alias delete filtered_delete
146
165
  end
147
166
  end
167
+ base.filters = []
168
+ end
148
169
 
149
- Misc.redefine_method base, :keys, :unfiltered_keys do
170
+ def filtered_filename
150
171
  if filters.empty?
151
- self.send(:unfiltered_keys)
172
+ unfiltered_filename
152
173
  else
153
- filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
174
+ unfiltered_filename + ":Filtered[#{filters.collect{|f| [f.match, f.value] * "="} * ", "}]"
154
175
  end
176
+ end
177
+
178
+ def filtered_set(key, value)
179
+ if filters.empty?
180
+ self.send(:unfiltered_set, key, value)
181
+ else
182
+ filters.each do |filter|
183
+ filter.add key if filter.match_entry value
184
+ end
185
+ self.send(:unfiltered_set, key, value)
155
186
  end
187
+ end
156
188
 
157
- Misc.redefine_method base, :values, :unfiltered_values do
189
+ def filtered_keys
190
+ with_monitor(false) do
158
191
  if filters.empty?
159
- self.send(:unfiltered_values)
192
+ self.send(:unfiltered_keys)
160
193
  else
161
- ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
162
- self.send :values_at, *ids
194
+ filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
195
+ end
163
196
  end
197
+ end
198
+
199
+ def filtered_values
200
+ if filters.empty?
201
+ self.send(:unfiltered_values)
202
+ else
203
+ ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
204
+ self.send :values_at, *ids
164
205
  end
206
+ end
165
207
 
166
- Misc.redefine_method base, :each, :unfiltered_each do |&block|
208
+ def filtered_each(&block)
167
209
  if filters.empty?
168
210
  self.send(:unfiltered_each, &block)
169
211
  else
170
212
  ids = filters.inject(nil){|list,filter| list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup)}
171
- new = self.dup
172
- new.data = {}
173
213
 
174
- ids.zip(self.send(:values_at, *ids)).each do |id, values|
175
- new[id] = values
214
+ ids.each do |id|
215
+ value = self[id]
216
+ yield id, value if block_given?
217
+ [id, value]
176
218
  end
177
-
178
- new.send :each, &block
179
- end
180
219
  end
220
+ end
181
221
 
182
- Misc.redefine_method base, :collect, :unfiltered_collect do |&block|
222
+ def filtered_collect(&block)
183
223
  if filters.empty?
184
224
  self.send(:unfiltered_collect, &block)
185
225
  else
186
226
  ids = filters.inject(nil){|list,filter| list = (list.nil? ? filter.ids.dup : Misc.intersect_sorted_arrays(list, filter.ids.dup))}
187
227
 
188
- new = self.dup
189
- new.data = {}
228
+ new = TSV.setup({}, self.options)
229
+
190
230
  ids.zip(self.send(:values_at, *ids)).each do |id, values|
191
231
  new[id] = values
192
232
  end
193
233
  new.send :collect, &block
194
234
  end
195
- end
235
+ end
196
236
 
197
- Misc.redefine_method base, :delete, :unfiltered_delete do |key|
237
+ def filtered_delete(key)
198
238
  if filters.empty?
199
239
  self.send(:unfiltered_delete, key)
200
240
  else
201
241
  reset_filters
202
242
  self.send :unfiltered_delete, key
203
243
  end
204
- end
205
244
  end
206
245
 
207
246
  def add_filter(match, value, persistence = nil)
@@ -214,12 +253,12 @@ module Filtered
214
253
  end
215
254
 
216
255
  def pop_filter
217
- filters.pop.add_unsaved
256
+ filters.pop.add_unsaved if filters.any?
218
257
  end
219
258
 
220
259
  end
221
260
 
222
- class TSV
261
+ module TSV
223
262
  def filter(filter_dir = nil)
224
263
  self.extend Filtered
225
264
  self.filter_dir = filter_dir