rbbt-util 1.2.1 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,273 @@
1
+ require 'rbbt/util/misc'
2
+
3
+ class TSV
4
+ ## Make sure we overwrite the methods declared by attr_accessor
5
+ MAIN_ACCESSORS = :data, :key_field, :fields, :cast
6
+ EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :datadir, :type, :case_insensitive
7
+ attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
8
+
9
+ def self.zip_fields(list, fields = nil)
10
+ return [] if list.nil? || list.empty?
11
+ fields ||= list.fields if list.respond_to? :fields
12
+ zipped = list[0].zip(*list[1..-1])
13
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
14
+ zipped
15
+ end
16
+
17
+ module Field
18
+ attr_accessor :namespace
19
+
20
+ def self.field(field, namespace = nil)
21
+ field.extend Field
22
+ field.namespace = namespace
23
+ field
24
+ end
25
+
26
+ def self.namespace(string)
27
+ return nil unless string.match(/(.+):/)
28
+ namespace_str = $1
29
+ return nil if namespace_str.nil? or namespace_str.empty?
30
+ namespace_str
31
+ end
32
+
33
+ def fullname
34
+ return self if self =~ /:/ or namespace.nil?
35
+ namespace + ":" << self
36
+ end
37
+
38
+ def ==(string)
39
+ return false unless String === string
40
+ return true if self.casecmp(string) == 0
41
+ if Field === string
42
+ return true if self.fullname.casecmp(string.fullname) == 0
43
+ else
44
+ return true if self.fullname.casecmp(string) == 0
45
+ end
46
+ return true if self.sub(/.*:/,'').casecmp(string) == 0
47
+ return false
48
+ end
49
+
50
+ def namespace
51
+ Field.namespace(self) || @namespace
52
+ end
53
+
54
+ def matching_namespaces(other)
55
+ return true if namespace.nil?
56
+ return namespace == other.namespace
57
+ end
58
+ end
59
+
60
+ #{{{{ Field END
61
+
62
+ def identifier_files
63
+ case
64
+ when (identifiers and TSV === identifiers)
65
+ [identifiers]
66
+ when (identifiers and Array === identifiers)
67
+ case
68
+ when (TSV === identifiers.first or identifiers.empty?)
69
+ identifiers
70
+ when
71
+ identifiers.collect{|f| Path.path(f, datadir, namespace)}
72
+ end
73
+ when (identifiers and not Array === identifiers)
74
+ [Path.path(identifiers, datadir)]
75
+ when filename
76
+ Path.path(filename, datadir).identifier_files
77
+ else
78
+ []
79
+ end
80
+ end
81
+
82
+ def fields_in_namespace(namespace = nil)
83
+ namespace = self.namespace if namespace == nil or TrueClass === namespace
84
+ fields.select{|f| f.namespace.nil? or f.namespace == namespace}
85
+ end
86
+
87
+ def fields
88
+ return nil if @fields.nil?
89
+ fds = @fields
90
+ fds.each do |f| f.extend Field end if Array === @fields
91
+ fds.each do |f| f.namespace = namespace end unless namespace.nil?
92
+ NamedArray.name(fds, @fields)
93
+ end
94
+
95
+ def all_fields
96
+ return nil if @fields.nil?
97
+ all_fields = @fields.dup
98
+ all_fields.unshift key_field
99
+ all_fields.each do |f| f.extend Field end if Array === @fields
100
+ all_fields.each do |f| f.namespace = namespace end unless namespace.nil?
101
+ NamedArray.name(all_fields, [key_field] + @fields)
102
+ all_fields
103
+ end
104
+
105
+ def all_namespace_fields(namespace = nil)
106
+ namespace = self.namespace if namespace == nil or TrueClass === namespace
107
+ all_fields = self.all_fields
108
+ return nil if all_fields.nil?
109
+ return all_fields if namespace.nil?
110
+ all_fields.select{|f| f.namespace.nil? or f.namespace == namespace}
111
+ end
112
+
113
+ def self.identify_field(key, fields, field)
114
+ return field if Integer === field
115
+ return :key if field.nil? or field == 0 or field.to_sym == :key or key == field
116
+ return nil if fields.nil?
117
+ return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
118
+ return fields.index field
119
+ end
120
+
121
+ def identify_field(field)
122
+ TSV.identify_field(key_field, fields, field)
123
+ end
124
+
125
+ def fields=(new_fields)
126
+ new_fields.collect! do |field|
127
+ if Field === field
128
+ if field !~ /:/ and field.namespace != nil and field.namespace != namespace
129
+ field.namespace + ":" + field.to_s
130
+ else
131
+ field
132
+ end
133
+ else
134
+ field
135
+ end
136
+ end if Array === new_fields
137
+ @fields = new_fields
138
+ @data.fields = new_fields if @data.respond_to? :fields=
139
+ end
140
+
141
+ def old_fields=(new_fields)
142
+ @fields = new_fields
143
+ @data.fields = new_fields if @data.respond_to? :fields=
144
+ end
145
+
146
+ def keys
147
+ @data.keys
148
+ end
149
+
150
+ def values
151
+ @data.values
152
+ end
153
+
154
+ def size
155
+ @data.size
156
+ end
157
+
158
+ # Write
159
+
160
+ def []=(key, value)
161
+ key = key.downcase if @case_insensitive
162
+ @data[key] = value
163
+ end
164
+
165
+
166
+ def merge!(new_data)
167
+ new_data.each do |key, value|
168
+ self[key] = value
169
+ end
170
+ end
171
+
172
+ # Read
173
+
174
+ def follow(value)
175
+ return nil if value.nil?
176
+ if String === value && value =~ /__Ref:(.*)/
177
+ return self[$1]
178
+ else
179
+ value = NamedArray.name value, fields if Array === value and fields
180
+ value
181
+ end
182
+ end
183
+
184
+ def [](key)
185
+ if Array === key
186
+ return @data[key] if @data[key] != nil
187
+ key.each{|k| v = self[k]; return v unless v.nil?}
188
+ return nil
189
+ end
190
+
191
+ key = key.downcase if @case_insensitive and key !~ /^__Ref:/
192
+ follow @data[key]
193
+ end
194
+
195
+ def values_at(*keys)
196
+ keys.collect{|k|
197
+ self[k]
198
+ }
199
+ end
200
+
201
+ def each(&block)
202
+ @data.each do |key, value|
203
+ block.call(key, follow(value))
204
+ end
205
+ end
206
+
207
+ def collect
208
+ if block_given?
209
+ @data.collect do |key, value|
210
+ value = follow(value)
211
+ yield key, value
212
+ end
213
+ else
214
+ @data.collect do |key, value|
215
+ [key, follow(value)]
216
+ end
217
+ end
218
+ end
219
+
220
+ def sort(&block)
221
+ collect.sort(&block).collect{|p|
222
+ key, value = p
223
+ value = NamedArray.name value, fields if fields
224
+ [key, value]
225
+ }
226
+ end
227
+
228
+ def sort_by(&block)
229
+ collect.sort_by &block
230
+ end
231
+
232
+ def values_to_s(values)
233
+ case
234
+ when (values.nil? and fields.nil?)
235
+ "\n"
236
+ when (values.nil? and not fields.nil?)
237
+ "\t" << ([""] * fields.length) * "\t" << "\n"
238
+ when (not Array === values)
239
+ "\t" << values.to_s << "\n"
240
+ when Array === values.first
241
+ "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
242
+ else
243
+ "\t" << values * "\t" << "\n"
244
+ end
245
+ end
246
+
247
+ def include?(key)
248
+ data.include? key
249
+ end
250
+
251
+ def to_s(keys = nil)
252
+ str = ""
253
+
254
+ str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n"
255
+ if fields
256
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
257
+ end
258
+
259
+ if keys.nil?
260
+ each do |key, values|
261
+ key = key.to_s if Symbol === key
262
+ str << key.dup << values_to_s(values)
263
+ end
264
+ else
265
+ keys.zip(values_at(*keys)).each do |key, values|
266
+ key = key.to_s if Symbol === key
267
+ str << key.dup << values_to_s(values)
268
+ end
269
+ end
270
+
271
+ str
272
+ end
273
+ end
@@ -0,0 +1,228 @@
1
+ class TSV
2
+
3
+ #{{{ Attach Methods
4
+
5
+ def attach_same_key(other, fields = nil)
6
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
7
+
8
+ through do |key, values|
9
+ if other.include? key
10
+ new_values = other[key].values_at *fields
11
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
12
+ new_values.collect!{|v| v.first} if not type == :double and other.type == :double
13
+ self[key] = self[key].concat new_values
14
+ else
15
+ if type == :double
16
+ self[key] = self[key].concat [[]] * fields.length
17
+ else
18
+ self[key] = self[key].concat [""] * fields.length
19
+ end
20
+ end
21
+ end
22
+
23
+ self.fields = self.fields.concat other.fields.values_at *fields
24
+ end
25
+
26
+ def attach_source_key(other, source, fields = nil)
27
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
28
+
29
+ other = other.tsv unless TSV === other
30
+ field_positions = fields.collect{|field| other.identify_field field}
31
+ field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
32
+
33
+ through do |key, values|
34
+ source_keys = values[source]
35
+ if source_keys.nil? or source_keys.empty?
36
+ all_new_values = []
37
+ else
38
+ all_new_values = []
39
+ source_keys.each do |source_key|
40
+ next unless other.include? source_key
41
+ new_values = field_positions.collect do |pos|
42
+ if pos == :key
43
+ source_key
44
+ else
45
+ other[source_key][pos]
46
+ end
47
+ end
48
+
49
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
50
+ new_values.collect!{|v| v.first} if not type == :double and other.type == :double
51
+ all_new_values << new_values
52
+ end
53
+ end
54
+
55
+ if all_new_values.empty?
56
+ if type == :double
57
+ self[key] = self[key].concat [[]] * field_positions.length
58
+ else
59
+ self[key] = self[key].concat [""] * field_positions.length
60
+ end
61
+ else
62
+ if type == :double
63
+ self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
64
+ else
65
+ self[key] = self[key].concat all_new_values.first
66
+ end
67
+ end
68
+ end
69
+
70
+ self.fields = self.fields.concat field_names
71
+ end
72
+
73
+ def attach_index(other, index, fields = nil)
74
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
75
+
76
+ other = other.tsv unless TSV === other
77
+ field_positions = fields.collect{|field| other.identify_field field}
78
+ field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
79
+
80
+ through do |key, values|
81
+ source_keys = index[key]
82
+ if source_keys.nil? or source_keys.empty?
83
+ all_new_values = []
84
+ else
85
+ all_new_values = []
86
+ source_keys.each do |source_key|
87
+ next unless other.include? source_key
88
+ new_values = field_positions.collect do |pos|
89
+ if pos == :key
90
+ source_key
91
+ else
92
+ other[source_key][pos]
93
+ end
94
+ end
95
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
96
+ new_values.collect!{|v| v.first} if not type == :double and other.type == :double
97
+ all_new_values << new_values
98
+ end
99
+ end
100
+
101
+ if all_new_values.empty?
102
+ if type == :double
103
+ self[key] = self[key].concat [[]] * field_positions.length
104
+ else
105
+ self[key] = self[key].concat [""] * field_positions.length
106
+ end
107
+ else
108
+ if type == :double
109
+ self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
110
+ else
111
+ self[key] = self[key].concat all_new_values.first
112
+ end
113
+ end
114
+ end
115
+
116
+ self.fields = self.fields.concat field_names
117
+ end
118
+
119
+ #{{{ Attach Helper
120
+
121
+ # May make an extra index!
122
+ def self.find_path(files, in_namespace = false)
123
+ if in_namespace
124
+ ids = [files.first.all_namespace_fields(in_namespace)]
125
+ ids += files[1..-1].collect{|f| f.all_fields}
126
+ else
127
+ ids = files.collect{|f| f.all_fields}
128
+ end
129
+ id_list = []
130
+
131
+ ids.flatten.each do |field|
132
+ end
133
+
134
+ ids.each_with_index do |list, i|
135
+ break if i == ids.length - 1
136
+ match = list.select{|field|
137
+ ids[i + 1].select{|f| field == f}.any?
138
+ }
139
+ return nil if match.empty?
140
+ id_list << match.first
141
+ end
142
+
143
+ if id_list.last.first != files.last.all_fields.first
144
+ id_list << files.last.all_fields.first
145
+ id_list.zip(files)
146
+ else
147
+ id_list.zip(files[0..-1])
148
+ end
149
+ end
150
+
151
+ def self.build_traverse_index(files, in_namespace = false)
152
+ path = find_path(files, in_namespace)
153
+
154
+ return nil if path.nil?
155
+
156
+ traversal_ids = path.collect{|p| p.first}
157
+
158
+ Log.medium "Found Traversal: #{traversal_ids * " => "}"
159
+
160
+ current_key = files.first.all_fields.first
161
+ target = files.last.all_fields.first
162
+ target = nil
163
+ current_id, current_file = path.shift
164
+ index = current_file.index :target => current_id, :fields => current_key, :persistence => false
165
+
166
+ while not path.empty?
167
+ current_id, current_file = path.shift
168
+ current_index = current_file.index :target => current_id, :fields => index.fields.first, :persistence => true
169
+ index.process 0 do |value|
170
+ current_index.values_at(*value).flatten.uniq
171
+ end
172
+ index.fields = current_index.fields
173
+ end
174
+
175
+ index
176
+ end
177
+
178
+ def self.find_traversal(tsv1, tsv2, in_namespace = false)
179
+ identifiers1 = tsv1.identifier_files || []
180
+ identifiers2 = tsv2.identifier_files || []
181
+
182
+ identifiers1.unshift tsv1
183
+ identifiers2.unshift tsv2
184
+
185
+ files1 = []
186
+ files2 = []
187
+ while identifiers1.any?
188
+ files1.push identifiers1.shift
189
+ identifiers2.each_with_index do |e,i|
190
+ files2 = identifiers2[(0..i)]
191
+ index = build_traverse_index(files1 + files2.reverse, in_namespace)
192
+ return index if not index.nil?
193
+ end
194
+ end
195
+
196
+ return nil
197
+ end
198
+
199
+ def attach(other, fields = nil, options = {})
200
+ options = Misc.add_defaults options, :in_namespace => true
201
+ in_namespace = Misc.process_options options, :in_namespace
202
+
203
+ fields = other.fields - [key_field].concat(self.fields) if fields == :all
204
+ fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
205
+ Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
206
+ case
207
+ when key_field == other.key_field
208
+ attach_same_key other, fields
209
+ when (not in_namespace and self.fields.include?(other.key_field))
210
+ attach_source_key other, other.key_field, fields
211
+ when (in_namespace and self.fields_in_namespace.include?(other.key_field))
212
+ attach_source_key other, other.key_field, fields
213
+ else
214
+ index = TSV.find_traversal(self, other, in_namespace)
215
+ raise "Cannot traverse identifiers" if index.nil?
216
+ attach_index other, index, fields
217
+ end
218
+ Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
219
+ end
220
+
221
+ def detach(file)
222
+ file_fields = file.fields.collect{|field| field.fullname}
223
+ detached_fields = []
224
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
225
+ reorder :key, detached_fields
226
+ end
227
+
228
+ end