rbbt-util 1.2.1 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/rbbt-util.rb +2 -1
  2. data/lib/rbbt/util/R.rb +18 -1
  3. data/lib/rbbt/util/cmd.rb +7 -6
  4. data/lib/rbbt/util/data_module.rb +31 -11
  5. data/lib/rbbt/util/fix_width_table.rb +209 -0
  6. data/lib/rbbt/util/log.rb +12 -2
  7. data/lib/rbbt/util/misc.rb +91 -12
  8. data/lib/rbbt/util/open.rb +18 -9
  9. data/lib/rbbt/util/path.rb +152 -0
  10. data/lib/rbbt/util/persistence.rb +282 -75
  11. data/lib/rbbt/util/pkg_data.rb +16 -59
  12. data/lib/rbbt/util/pkg_software.rb +15 -1
  13. data/lib/rbbt/util/rake.rb +5 -1
  14. data/lib/rbbt/util/tc_hash.rb +129 -59
  15. data/lib/rbbt/util/tsv.rb +109 -1284
  16. data/lib/rbbt/util/tsv/accessor.rb +273 -0
  17. data/lib/rbbt/util/tsv/attach.rb +228 -0
  18. data/lib/rbbt/util/tsv/index.rb +303 -0
  19. data/lib/rbbt/util/tsv/manipulate.rb +271 -0
  20. data/lib/rbbt/util/tsv/parse.rb +258 -0
  21. data/share/lib/R/util.R +5 -3
  22. data/test/rbbt/util/test_R.rb +9 -1
  23. data/test/rbbt/util/test_data_module.rb +5 -0
  24. data/test/rbbt/util/test_fix_width_table.rb +107 -0
  25. data/test/rbbt/util/test_misc.rb +43 -0
  26. data/test/rbbt/util/test_open.rb +0 -1
  27. data/test/rbbt/util/test_path.rb +10 -0
  28. data/test/rbbt/util/test_persistence.rb +63 -2
  29. data/test/rbbt/util/test_pkg_data.rb +29 -8
  30. data/test/rbbt/util/test_tc_hash.rb +52 -0
  31. data/test/rbbt/util/test_tsv.rb +55 -678
  32. data/test/rbbt/util/tsv/test_accessor.rb +109 -0
  33. data/test/rbbt/util/tsv/test_attach.rb +271 -0
  34. data/test/rbbt/util/tsv/test_index.rb +158 -0
  35. data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
  36. data/test/rbbt/util/tsv/test_parse.rb +72 -0
  37. data/test/test_helper.rb +1 -0
  38. metadata +25 -4
@@ -0,0 +1,273 @@
1
+ require 'rbbt/util/misc'
2
+
3
+ class TSV
4
+ ## Make sure we overwrite the methods declared by attr_accessor
5
+ MAIN_ACCESSORS = :data, :key_field, :fields, :cast
6
+ EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :datadir, :type, :case_insensitive
7
+ attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
8
+
9
+ def self.zip_fields(list, fields = nil)
10
+ return [] if list.nil? || list.empty?
11
+ fields ||= list.fields if list.respond_to? :fields
12
+ zipped = list[0].zip(*list[1..-1])
13
+ zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
14
+ zipped
15
+ end
16
+
17
+ module Field
18
+ attr_accessor :namespace
19
+
20
+ def self.field(field, namespace = nil)
21
+ field.extend Field
22
+ field.namespace = namespace
23
+ field
24
+ end
25
+
26
+ def self.namespace(string)
27
+ return nil unless string.match(/(.+):/)
28
+ namespace_str = $1
29
+ return nil if namespace_str.nil? or namespace_str.empty?
30
+ namespace_str
31
+ end
32
+
33
+ def fullname
34
+ return self if self =~ /:/ or namespace.nil?
35
+ namespace + ":" << self
36
+ end
37
+
38
+ def ==(string)
39
+ return false unless String === string
40
+ return true if self.casecmp(string) == 0
41
+ if Field === string
42
+ return true if self.fullname.casecmp(string.fullname) == 0
43
+ else
44
+ return true if self.fullname.casecmp(string) == 0
45
+ end
46
+ return true if self.sub(/.*:/,'').casecmp(string) == 0
47
+ return false
48
+ end
49
+
50
+ def namespace
51
+ Field.namespace(self) || @namespace
52
+ end
53
+
54
+ def matching_namespaces(other)
55
+ return true if namespace.nil?
56
+ return namespace == other.namespace
57
+ end
58
+ end
59
+
60
+ #{{{{ Field END
61
+
62
+ def identifier_files
63
+ case
64
+ when (identifiers and TSV === identifiers)
65
+ [identifiers]
66
+ when (identifiers and Array === identifiers)
67
+ case
68
+ when (TSV === identifiers.first or identifiers.empty?)
69
+ identifiers
70
+ when
71
+ identifiers.collect{|f| Path.path(f, datadir, namespace)}
72
+ end
73
+ when (identifiers and not Array === identifiers)
74
+ [Path.path(identifiers, datadir)]
75
+ when filename
76
+ Path.path(filename, datadir).identifier_files
77
+ else
78
+ []
79
+ end
80
+ end
81
+
82
+ def fields_in_namespace(namespace = nil)
83
+ namespace = self.namespace if namespace == nil or TrueClass === namespace
84
+ fields.select{|f| f.namespace.nil? or f.namespace == namespace}
85
+ end
86
+
87
+ def fields
88
+ return nil if @fields.nil?
89
+ fds = @fields
90
+ fds.each do |f| f.extend Field end if Array === @fields
91
+ fds.each do |f| f.namespace = namespace end unless namespace.nil?
92
+ NamedArray.name(fds, @fields)
93
+ end
94
+
95
+ def all_fields
96
+ return nil if @fields.nil?
97
+ all_fields = @fields.dup
98
+ all_fields.unshift key_field
99
+ all_fields.each do |f| f.extend Field end if Array === @fields
100
+ all_fields.each do |f| f.namespace = namespace end unless namespace.nil?
101
+ NamedArray.name(all_fields, [key_field] + @fields)
102
+ all_fields
103
+ end
104
+
105
+ def all_namespace_fields(namespace = nil)
106
+ namespace = self.namespace if namespace == nil or TrueClass === namespace
107
+ all_fields = self.all_fields
108
+ return nil if all_fields.nil?
109
+ return all_fields if namespace.nil?
110
+ all_fields.select{|f| f.namespace.nil? or f.namespace == namespace}
111
+ end
112
+
113
+ def self.identify_field(key, fields, field)
114
+ return field if Integer === field
115
+ return :key if field.nil? or field == 0 or field.to_sym == :key or key == field
116
+ return nil if fields.nil?
117
+ return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
118
+ return fields.index field
119
+ end
120
+
121
+ def identify_field(field)
122
+ TSV.identify_field(key_field, fields, field)
123
+ end
124
+
125
+ def fields=(new_fields)
126
+ new_fields.collect! do |field|
127
+ if Field === field
128
+ if field !~ /:/ and field.namespace != nil and field.namespace != namespace
129
+ field.namespace + ":" + field.to_s
130
+ else
131
+ field
132
+ end
133
+ else
134
+ field
135
+ end
136
+ end if Array === new_fields
137
+ @fields = new_fields
138
+ @data.fields = new_fields if @data.respond_to? :fields=
139
+ end
140
+
141
+ def old_fields=(new_fields)
142
+ @fields = new_fields
143
+ @data.fields = new_fields if @data.respond_to? :fields=
144
+ end
145
+
146
+ def keys
147
+ @data.keys
148
+ end
149
+
150
+ def values
151
+ @data.values
152
+ end
153
+
154
+ def size
155
+ @data.size
156
+ end
157
+
158
+ # Write
159
+
160
+ def []=(key, value)
161
+ key = key.downcase if @case_insensitive
162
+ @data[key] = value
163
+ end
164
+
165
+
166
+ def merge!(new_data)
167
+ new_data.each do |key, value|
168
+ self[key] = value
169
+ end
170
+ end
171
+
172
+ # Read
173
+
174
+ def follow(value)
175
+ return nil if value.nil?
176
+ if String === value && value =~ /__Ref:(.*)/
177
+ return self[$1]
178
+ else
179
+ value = NamedArray.name value, fields if Array === value and fields
180
+ value
181
+ end
182
+ end
183
+
184
+ def [](key)
185
+ if Array === key
186
+ return @data[key] if @data[key] != nil
187
+ key.each{|k| v = self[k]; return v unless v.nil?}
188
+ return nil
189
+ end
190
+
191
+ key = key.downcase if @case_insensitive and key !~ /^__Ref:/
192
+ follow @data[key]
193
+ end
194
+
195
+ def values_at(*keys)
196
+ keys.collect{|k|
197
+ self[k]
198
+ }
199
+ end
200
+
201
+ def each(&block)
202
+ @data.each do |key, value|
203
+ block.call(key, follow(value))
204
+ end
205
+ end
206
+
207
+ def collect
208
+ if block_given?
209
+ @data.collect do |key, value|
210
+ value = follow(value)
211
+ yield key, value
212
+ end
213
+ else
214
+ @data.collect do |key, value|
215
+ [key, follow(value)]
216
+ end
217
+ end
218
+ end
219
+
220
+ def sort(&block)
221
+ collect.sort(&block).collect{|p|
222
+ key, value = p
223
+ value = NamedArray.name value, fields if fields
224
+ [key, value]
225
+ }
226
+ end
227
+
228
+ def sort_by(&block)
229
+ collect.sort_by &block
230
+ end
231
+
232
+ def values_to_s(values)
233
+ case
234
+ when (values.nil? and fields.nil?)
235
+ "\n"
236
+ when (values.nil? and not fields.nil?)
237
+ "\t" << ([""] * fields.length) * "\t" << "\n"
238
+ when (not Array === values)
239
+ "\t" << values.to_s << "\n"
240
+ when Array === values.first
241
+ "\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
242
+ else
243
+ "\t" << values * "\t" << "\n"
244
+ end
245
+ end
246
+
247
+ def include?(key)
248
+ data.include? key
249
+ end
250
+
251
+ def to_s(keys = nil)
252
+ str = ""
253
+
254
+ str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n"
255
+ if fields
256
+ str << "#" << key_field << "\t" << fields * "\t" << "\n"
257
+ end
258
+
259
+ if keys.nil?
260
+ each do |key, values|
261
+ key = key.to_s if Symbol === key
262
+ str << key.dup << values_to_s(values)
263
+ end
264
+ else
265
+ keys.zip(values_at(*keys)).each do |key, values|
266
+ key = key.to_s if Symbol === key
267
+ str << key.dup << values_to_s(values)
268
+ end
269
+ end
270
+
271
+ str
272
+ end
273
+ end
@@ -0,0 +1,228 @@
1
+ class TSV
2
+
3
+ #{{{ Attach Methods
4
+
5
+ def attach_same_key(other, fields = nil)
6
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
7
+
8
+ through do |key, values|
9
+ if other.include? key
10
+ new_values = other[key].values_at *fields
11
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
12
+ new_values.collect!{|v| v.first} if not type == :double and other.type == :double
13
+ self[key] = self[key].concat new_values
14
+ else
15
+ if type == :double
16
+ self[key] = self[key].concat [[]] * fields.length
17
+ else
18
+ self[key] = self[key].concat [""] * fields.length
19
+ end
20
+ end
21
+ end
22
+
23
+ self.fields = self.fields.concat other.fields.values_at *fields
24
+ end
25
+
26
+ def attach_source_key(other, source, fields = nil)
27
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
28
+
29
+ other = other.tsv unless TSV === other
30
+ field_positions = fields.collect{|field| other.identify_field field}
31
+ field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
32
+
33
+ through do |key, values|
34
+ source_keys = values[source]
35
+ if source_keys.nil? or source_keys.empty?
36
+ all_new_values = []
37
+ else
38
+ all_new_values = []
39
+ source_keys.each do |source_key|
40
+ next unless other.include? source_key
41
+ new_values = field_positions.collect do |pos|
42
+ if pos == :key
43
+ source_key
44
+ else
45
+ other[source_key][pos]
46
+ end
47
+ end
48
+
49
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
50
+ new_values.collect!{|v| v.first} if not type == :double and other.type == :double
51
+ all_new_values << new_values
52
+ end
53
+ end
54
+
55
+ if all_new_values.empty?
56
+ if type == :double
57
+ self[key] = self[key].concat [[]] * field_positions.length
58
+ else
59
+ self[key] = self[key].concat [""] * field_positions.length
60
+ end
61
+ else
62
+ if type == :double
63
+ self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
64
+ else
65
+ self[key] = self[key].concat all_new_values.first
66
+ end
67
+ end
68
+ end
69
+
70
+ self.fields = self.fields.concat field_names
71
+ end
72
+
73
+ def attach_index(other, index, fields = nil)
74
+ fields = other.fields - [key_field].concat(self.fields) if fields.nil?
75
+
76
+ other = other.tsv unless TSV === other
77
+ field_positions = fields.collect{|field| other.identify_field field}
78
+ field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
79
+
80
+ through do |key, values|
81
+ source_keys = index[key]
82
+ if source_keys.nil? or source_keys.empty?
83
+ all_new_values = []
84
+ else
85
+ all_new_values = []
86
+ source_keys.each do |source_key|
87
+ next unless other.include? source_key
88
+ new_values = field_positions.collect do |pos|
89
+ if pos == :key
90
+ source_key
91
+ else
92
+ other[source_key][pos]
93
+ end
94
+ end
95
+ new_values.collect!{|v| [v]} if type == :double and not other.type == :double
96
+ new_values.collect!{|v| v.first} if not type == :double and other.type == :double
97
+ all_new_values << new_values
98
+ end
99
+ end
100
+
101
+ if all_new_values.empty?
102
+ if type == :double
103
+ self[key] = self[key].concat [[]] * field_positions.length
104
+ else
105
+ self[key] = self[key].concat [""] * field_positions.length
106
+ end
107
+ else
108
+ if type == :double
109
+ self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
110
+ else
111
+ self[key] = self[key].concat all_new_values.first
112
+ end
113
+ end
114
+ end
115
+
116
+ self.fields = self.fields.concat field_names
117
+ end
118
+
119
+ #{{{ Attach Helper
120
+
121
+ # May make an extra index!
122
+ def self.find_path(files, in_namespace = false)
123
+ if in_namespace
124
+ ids = [files.first.all_namespace_fields(in_namespace)]
125
+ ids += files[1..-1].collect{|f| f.all_fields}
126
+ else
127
+ ids = files.collect{|f| f.all_fields}
128
+ end
129
+ id_list = []
130
+
131
+ ids.flatten.each do |field|
132
+ end
133
+
134
+ ids.each_with_index do |list, i|
135
+ break if i == ids.length - 1
136
+ match = list.select{|field|
137
+ ids[i + 1].select{|f| field == f}.any?
138
+ }
139
+ return nil if match.empty?
140
+ id_list << match.first
141
+ end
142
+
143
+ if id_list.last.first != files.last.all_fields.first
144
+ id_list << files.last.all_fields.first
145
+ id_list.zip(files)
146
+ else
147
+ id_list.zip(files[0..-1])
148
+ end
149
+ end
150
+
151
+ def self.build_traverse_index(files, in_namespace = false)
152
+ path = find_path(files, in_namespace)
153
+
154
+ return nil if path.nil?
155
+
156
+ traversal_ids = path.collect{|p| p.first}
157
+
158
+ Log.medium "Found Traversal: #{traversal_ids * " => "}"
159
+
160
+ current_key = files.first.all_fields.first
161
+ target = files.last.all_fields.first
162
+ target = nil
163
+ current_id, current_file = path.shift
164
+ index = current_file.index :target => current_id, :fields => current_key, :persistence => false
165
+
166
+ while not path.empty?
167
+ current_id, current_file = path.shift
168
+ current_index = current_file.index :target => current_id, :fields => index.fields.first, :persistence => true
169
+ index.process 0 do |value|
170
+ current_index.values_at(*value).flatten.uniq
171
+ end
172
+ index.fields = current_index.fields
173
+ end
174
+
175
+ index
176
+ end
177
+
178
+ def self.find_traversal(tsv1, tsv2, in_namespace = false)
179
+ identifiers1 = tsv1.identifier_files || []
180
+ identifiers2 = tsv2.identifier_files || []
181
+
182
+ identifiers1.unshift tsv1
183
+ identifiers2.unshift tsv2
184
+
185
+ files1 = []
186
+ files2 = []
187
+ while identifiers1.any?
188
+ files1.push identifiers1.shift
189
+ identifiers2.each_with_index do |e,i|
190
+ files2 = identifiers2[(0..i)]
191
+ index = build_traverse_index(files1 + files2.reverse, in_namespace)
192
+ return index if not index.nil?
193
+ end
194
+ end
195
+
196
+ return nil
197
+ end
198
+
199
+ def attach(other, fields = nil, options = {})
200
+ options = Misc.add_defaults options, :in_namespace => true
201
+ in_namespace = Misc.process_options options, :in_namespace
202
+
203
+ fields = other.fields - [key_field].concat(self.fields) if fields == :all
204
+ fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
205
+ Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
206
+ case
207
+ when key_field == other.key_field
208
+ attach_same_key other, fields
209
+ when (not in_namespace and self.fields.include?(other.key_field))
210
+ attach_source_key other, other.key_field, fields
211
+ when (in_namespace and self.fields_in_namespace.include?(other.key_field))
212
+ attach_source_key other, other.key_field, fields
213
+ else
214
+ index = TSV.find_traversal(self, other, in_namespace)
215
+ raise "Cannot traverse identifiers" if index.nil?
216
+ attach_index other, index, fields
217
+ end
218
+ Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
219
+ end
220
+
221
+ def detach(file)
222
+ file_fields = file.fields.collect{|field| field.fullname}
223
+ detached_fields = []
224
+ self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
225
+ reorder :key, detached_fields
226
+ end
227
+
228
+ end