rbbt-util 1.2.1 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rbbt-util.rb +2 -1
- data/lib/rbbt/util/R.rb +18 -1
- data/lib/rbbt/util/cmd.rb +7 -6
- data/lib/rbbt/util/data_module.rb +31 -11
- data/lib/rbbt/util/fix_width_table.rb +209 -0
- data/lib/rbbt/util/log.rb +12 -2
- data/lib/rbbt/util/misc.rb +91 -12
- data/lib/rbbt/util/open.rb +18 -9
- data/lib/rbbt/util/path.rb +152 -0
- data/lib/rbbt/util/persistence.rb +282 -75
- data/lib/rbbt/util/pkg_data.rb +16 -59
- data/lib/rbbt/util/pkg_software.rb +15 -1
- data/lib/rbbt/util/rake.rb +5 -1
- data/lib/rbbt/util/tc_hash.rb +129 -59
- data/lib/rbbt/util/tsv.rb +109 -1284
- data/lib/rbbt/util/tsv/accessor.rb +273 -0
- data/lib/rbbt/util/tsv/attach.rb +228 -0
- data/lib/rbbt/util/tsv/index.rb +303 -0
- data/lib/rbbt/util/tsv/manipulate.rb +271 -0
- data/lib/rbbt/util/tsv/parse.rb +258 -0
- data/share/lib/R/util.R +5 -3
- data/test/rbbt/util/test_R.rb +9 -1
- data/test/rbbt/util/test_data_module.rb +5 -0
- data/test/rbbt/util/test_fix_width_table.rb +107 -0
- data/test/rbbt/util/test_misc.rb +43 -0
- data/test/rbbt/util/test_open.rb +0 -1
- data/test/rbbt/util/test_path.rb +10 -0
- data/test/rbbt/util/test_persistence.rb +63 -2
- data/test/rbbt/util/test_pkg_data.rb +29 -8
- data/test/rbbt/util/test_tc_hash.rb +52 -0
- data/test/rbbt/util/test_tsv.rb +55 -678
- data/test/rbbt/util/tsv/test_accessor.rb +109 -0
- data/test/rbbt/util/tsv/test_attach.rb +271 -0
- data/test/rbbt/util/tsv/test_index.rb +158 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
- data/test/rbbt/util/tsv/test_parse.rb +72 -0
- data/test/test_helper.rb +1 -0
- metadata +25 -4
@@ -0,0 +1,273 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
|
3
|
+
class TSV
|
4
|
+
## Make sure we overwrite the methods declared by attr_accessor
|
5
|
+
MAIN_ACCESSORS = :data, :key_field, :fields, :cast
|
6
|
+
EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :datadir, :type, :case_insensitive
|
7
|
+
attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
|
8
|
+
|
9
|
+
def self.zip_fields(list, fields = nil)
|
10
|
+
return [] if list.nil? || list.empty?
|
11
|
+
fields ||= list.fields if list.respond_to? :fields
|
12
|
+
zipped = list[0].zip(*list[1..-1])
|
13
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
14
|
+
zipped
|
15
|
+
end
|
16
|
+
|
17
|
+
module Field
|
18
|
+
attr_accessor :namespace
|
19
|
+
|
20
|
+
def self.field(field, namespace = nil)
|
21
|
+
field.extend Field
|
22
|
+
field.namespace = namespace
|
23
|
+
field
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.namespace(string)
|
27
|
+
return nil unless string.match(/(.+):/)
|
28
|
+
namespace_str = $1
|
29
|
+
return nil if namespace_str.nil? or namespace_str.empty?
|
30
|
+
namespace_str
|
31
|
+
end
|
32
|
+
|
33
|
+
def fullname
|
34
|
+
return self if self =~ /:/ or namespace.nil?
|
35
|
+
namespace + ":" << self
|
36
|
+
end
|
37
|
+
|
38
|
+
def ==(string)
|
39
|
+
return false unless String === string
|
40
|
+
return true if self.casecmp(string) == 0
|
41
|
+
if Field === string
|
42
|
+
return true if self.fullname.casecmp(string.fullname) == 0
|
43
|
+
else
|
44
|
+
return true if self.fullname.casecmp(string) == 0
|
45
|
+
end
|
46
|
+
return true if self.sub(/.*:/,'').casecmp(string) == 0
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
|
50
|
+
def namespace
|
51
|
+
Field.namespace(self) || @namespace
|
52
|
+
end
|
53
|
+
|
54
|
+
def matching_namespaces(other)
|
55
|
+
return true if namespace.nil?
|
56
|
+
return namespace == other.namespace
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
#{{{{ Field END
|
61
|
+
|
62
|
+
def identifier_files
|
63
|
+
case
|
64
|
+
when (identifiers and TSV === identifiers)
|
65
|
+
[identifiers]
|
66
|
+
when (identifiers and Array === identifiers)
|
67
|
+
case
|
68
|
+
when (TSV === identifiers.first or identifiers.empty?)
|
69
|
+
identifiers
|
70
|
+
when
|
71
|
+
identifiers.collect{|f| Path.path(f, datadir, namespace)}
|
72
|
+
end
|
73
|
+
when (identifiers and not Array === identifiers)
|
74
|
+
[Path.path(identifiers, datadir)]
|
75
|
+
when filename
|
76
|
+
Path.path(filename, datadir).identifier_files
|
77
|
+
else
|
78
|
+
[]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def fields_in_namespace(namespace = nil)
|
83
|
+
namespace = self.namespace if namespace == nil or TrueClass === namespace
|
84
|
+
fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
85
|
+
end
|
86
|
+
|
87
|
+
def fields
|
88
|
+
return nil if @fields.nil?
|
89
|
+
fds = @fields
|
90
|
+
fds.each do |f| f.extend Field end if Array === @fields
|
91
|
+
fds.each do |f| f.namespace = namespace end unless namespace.nil?
|
92
|
+
NamedArray.name(fds, @fields)
|
93
|
+
end
|
94
|
+
|
95
|
+
def all_fields
|
96
|
+
return nil if @fields.nil?
|
97
|
+
all_fields = @fields.dup
|
98
|
+
all_fields.unshift key_field
|
99
|
+
all_fields.each do |f| f.extend Field end if Array === @fields
|
100
|
+
all_fields.each do |f| f.namespace = namespace end unless namespace.nil?
|
101
|
+
NamedArray.name(all_fields, [key_field] + @fields)
|
102
|
+
all_fields
|
103
|
+
end
|
104
|
+
|
105
|
+
def all_namespace_fields(namespace = nil)
|
106
|
+
namespace = self.namespace if namespace == nil or TrueClass === namespace
|
107
|
+
all_fields = self.all_fields
|
108
|
+
return nil if all_fields.nil?
|
109
|
+
return all_fields if namespace.nil?
|
110
|
+
all_fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.identify_field(key, fields, field)
|
114
|
+
return field if Integer === field
|
115
|
+
return :key if field.nil? or field == 0 or field.to_sym == :key or key == field
|
116
|
+
return nil if fields.nil?
|
117
|
+
return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
|
118
|
+
return fields.index field
|
119
|
+
end
|
120
|
+
|
121
|
+
def identify_field(field)
|
122
|
+
TSV.identify_field(key_field, fields, field)
|
123
|
+
end
|
124
|
+
|
125
|
+
def fields=(new_fields)
|
126
|
+
new_fields.collect! do |field|
|
127
|
+
if Field === field
|
128
|
+
if field !~ /:/ and field.namespace != nil and field.namespace != namespace
|
129
|
+
field.namespace + ":" + field.to_s
|
130
|
+
else
|
131
|
+
field
|
132
|
+
end
|
133
|
+
else
|
134
|
+
field
|
135
|
+
end
|
136
|
+
end if Array === new_fields
|
137
|
+
@fields = new_fields
|
138
|
+
@data.fields = new_fields if @data.respond_to? :fields=
|
139
|
+
end
|
140
|
+
|
141
|
+
def old_fields=(new_fields)
|
142
|
+
@fields = new_fields
|
143
|
+
@data.fields = new_fields if @data.respond_to? :fields=
|
144
|
+
end
|
145
|
+
|
146
|
+
def keys
|
147
|
+
@data.keys
|
148
|
+
end
|
149
|
+
|
150
|
+
def values
|
151
|
+
@data.values
|
152
|
+
end
|
153
|
+
|
154
|
+
def size
|
155
|
+
@data.size
|
156
|
+
end
|
157
|
+
|
158
|
+
# Write
|
159
|
+
|
160
|
+
def []=(key, value)
|
161
|
+
key = key.downcase if @case_insensitive
|
162
|
+
@data[key] = value
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def merge!(new_data)
|
167
|
+
new_data.each do |key, value|
|
168
|
+
self[key] = value
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# Read
|
173
|
+
|
174
|
+
def follow(value)
|
175
|
+
return nil if value.nil?
|
176
|
+
if String === value && value =~ /__Ref:(.*)/
|
177
|
+
return self[$1]
|
178
|
+
else
|
179
|
+
value = NamedArray.name value, fields if Array === value and fields
|
180
|
+
value
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def [](key)
|
185
|
+
if Array === key
|
186
|
+
return @data[key] if @data[key] != nil
|
187
|
+
key.each{|k| v = self[k]; return v unless v.nil?}
|
188
|
+
return nil
|
189
|
+
end
|
190
|
+
|
191
|
+
key = key.downcase if @case_insensitive and key !~ /^__Ref:/
|
192
|
+
follow @data[key]
|
193
|
+
end
|
194
|
+
|
195
|
+
def values_at(*keys)
|
196
|
+
keys.collect{|k|
|
197
|
+
self[k]
|
198
|
+
}
|
199
|
+
end
|
200
|
+
|
201
|
+
def each(&block)
|
202
|
+
@data.each do |key, value|
|
203
|
+
block.call(key, follow(value))
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def collect
|
208
|
+
if block_given?
|
209
|
+
@data.collect do |key, value|
|
210
|
+
value = follow(value)
|
211
|
+
yield key, value
|
212
|
+
end
|
213
|
+
else
|
214
|
+
@data.collect do |key, value|
|
215
|
+
[key, follow(value)]
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def sort(&block)
|
221
|
+
collect.sort(&block).collect{|p|
|
222
|
+
key, value = p
|
223
|
+
value = NamedArray.name value, fields if fields
|
224
|
+
[key, value]
|
225
|
+
}
|
226
|
+
end
|
227
|
+
|
228
|
+
def sort_by(&block)
|
229
|
+
collect.sort_by &block
|
230
|
+
end
|
231
|
+
|
232
|
+
def values_to_s(values)
|
233
|
+
case
|
234
|
+
when (values.nil? and fields.nil?)
|
235
|
+
"\n"
|
236
|
+
when (values.nil? and not fields.nil?)
|
237
|
+
"\t" << ([""] * fields.length) * "\t" << "\n"
|
238
|
+
when (not Array === values)
|
239
|
+
"\t" << values.to_s << "\n"
|
240
|
+
when Array === values.first
|
241
|
+
"\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
|
242
|
+
else
|
243
|
+
"\t" << values * "\t" << "\n"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def include?(key)
|
248
|
+
data.include? key
|
249
|
+
end
|
250
|
+
|
251
|
+
def to_s(keys = nil)
|
252
|
+
str = ""
|
253
|
+
|
254
|
+
str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n"
|
255
|
+
if fields
|
256
|
+
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
257
|
+
end
|
258
|
+
|
259
|
+
if keys.nil?
|
260
|
+
each do |key, values|
|
261
|
+
key = key.to_s if Symbol === key
|
262
|
+
str << key.dup << values_to_s(values)
|
263
|
+
end
|
264
|
+
else
|
265
|
+
keys.zip(values_at(*keys)).each do |key, values|
|
266
|
+
key = key.to_s if Symbol === key
|
267
|
+
str << key.dup << values_to_s(values)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
str
|
272
|
+
end
|
273
|
+
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
class TSV
|
2
|
+
|
3
|
+
#{{{ Attach Methods
|
4
|
+
|
5
|
+
def attach_same_key(other, fields = nil)
|
6
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
7
|
+
|
8
|
+
through do |key, values|
|
9
|
+
if other.include? key
|
10
|
+
new_values = other[key].values_at *fields
|
11
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
12
|
+
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
13
|
+
self[key] = self[key].concat new_values
|
14
|
+
else
|
15
|
+
if type == :double
|
16
|
+
self[key] = self[key].concat [[]] * fields.length
|
17
|
+
else
|
18
|
+
self[key] = self[key].concat [""] * fields.length
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
self.fields = self.fields.concat other.fields.values_at *fields
|
24
|
+
end
|
25
|
+
|
26
|
+
def attach_source_key(other, source, fields = nil)
|
27
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
28
|
+
|
29
|
+
other = other.tsv unless TSV === other
|
30
|
+
field_positions = fields.collect{|field| other.identify_field field}
|
31
|
+
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
32
|
+
|
33
|
+
through do |key, values|
|
34
|
+
source_keys = values[source]
|
35
|
+
if source_keys.nil? or source_keys.empty?
|
36
|
+
all_new_values = []
|
37
|
+
else
|
38
|
+
all_new_values = []
|
39
|
+
source_keys.each do |source_key|
|
40
|
+
next unless other.include? source_key
|
41
|
+
new_values = field_positions.collect do |pos|
|
42
|
+
if pos == :key
|
43
|
+
source_key
|
44
|
+
else
|
45
|
+
other[source_key][pos]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
50
|
+
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
51
|
+
all_new_values << new_values
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if all_new_values.empty?
|
56
|
+
if type == :double
|
57
|
+
self[key] = self[key].concat [[]] * field_positions.length
|
58
|
+
else
|
59
|
+
self[key] = self[key].concat [""] * field_positions.length
|
60
|
+
end
|
61
|
+
else
|
62
|
+
if type == :double
|
63
|
+
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
64
|
+
else
|
65
|
+
self[key] = self[key].concat all_new_values.first
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
self.fields = self.fields.concat field_names
|
71
|
+
end
|
72
|
+
|
73
|
+
def attach_index(other, index, fields = nil)
|
74
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
75
|
+
|
76
|
+
other = other.tsv unless TSV === other
|
77
|
+
field_positions = fields.collect{|field| other.identify_field field}
|
78
|
+
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
79
|
+
|
80
|
+
through do |key, values|
|
81
|
+
source_keys = index[key]
|
82
|
+
if source_keys.nil? or source_keys.empty?
|
83
|
+
all_new_values = []
|
84
|
+
else
|
85
|
+
all_new_values = []
|
86
|
+
source_keys.each do |source_key|
|
87
|
+
next unless other.include? source_key
|
88
|
+
new_values = field_positions.collect do |pos|
|
89
|
+
if pos == :key
|
90
|
+
source_key
|
91
|
+
else
|
92
|
+
other[source_key][pos]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
96
|
+
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
97
|
+
all_new_values << new_values
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
if all_new_values.empty?
|
102
|
+
if type == :double
|
103
|
+
self[key] = self[key].concat [[]] * field_positions.length
|
104
|
+
else
|
105
|
+
self[key] = self[key].concat [""] * field_positions.length
|
106
|
+
end
|
107
|
+
else
|
108
|
+
if type == :double
|
109
|
+
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
110
|
+
else
|
111
|
+
self[key] = self[key].concat all_new_values.first
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
self.fields = self.fields.concat field_names
|
117
|
+
end
|
118
|
+
|
119
|
+
#{{{ Attach Helper
|
120
|
+
|
121
|
+
# May make an extra index!
|
122
|
+
def self.find_path(files, in_namespace = false)
|
123
|
+
if in_namespace
|
124
|
+
ids = [files.first.all_namespace_fields(in_namespace)]
|
125
|
+
ids += files[1..-1].collect{|f| f.all_fields}
|
126
|
+
else
|
127
|
+
ids = files.collect{|f| f.all_fields}
|
128
|
+
end
|
129
|
+
id_list = []
|
130
|
+
|
131
|
+
ids.flatten.each do |field|
|
132
|
+
end
|
133
|
+
|
134
|
+
ids.each_with_index do |list, i|
|
135
|
+
break if i == ids.length - 1
|
136
|
+
match = list.select{|field|
|
137
|
+
ids[i + 1].select{|f| field == f}.any?
|
138
|
+
}
|
139
|
+
return nil if match.empty?
|
140
|
+
id_list << match.first
|
141
|
+
end
|
142
|
+
|
143
|
+
if id_list.last.first != files.last.all_fields.first
|
144
|
+
id_list << files.last.all_fields.first
|
145
|
+
id_list.zip(files)
|
146
|
+
else
|
147
|
+
id_list.zip(files[0..-1])
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.build_traverse_index(files, in_namespace = false)
|
152
|
+
path = find_path(files, in_namespace)
|
153
|
+
|
154
|
+
return nil if path.nil?
|
155
|
+
|
156
|
+
traversal_ids = path.collect{|p| p.first}
|
157
|
+
|
158
|
+
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
159
|
+
|
160
|
+
current_key = files.first.all_fields.first
|
161
|
+
target = files.last.all_fields.first
|
162
|
+
target = nil
|
163
|
+
current_id, current_file = path.shift
|
164
|
+
index = current_file.index :target => current_id, :fields => current_key, :persistence => false
|
165
|
+
|
166
|
+
while not path.empty?
|
167
|
+
current_id, current_file = path.shift
|
168
|
+
current_index = current_file.index :target => current_id, :fields => index.fields.first, :persistence => true
|
169
|
+
index.process 0 do |value|
|
170
|
+
current_index.values_at(*value).flatten.uniq
|
171
|
+
end
|
172
|
+
index.fields = current_index.fields
|
173
|
+
end
|
174
|
+
|
175
|
+
index
|
176
|
+
end
|
177
|
+
|
178
|
+
def self.find_traversal(tsv1, tsv2, in_namespace = false)
|
179
|
+
identifiers1 = tsv1.identifier_files || []
|
180
|
+
identifiers2 = tsv2.identifier_files || []
|
181
|
+
|
182
|
+
identifiers1.unshift tsv1
|
183
|
+
identifiers2.unshift tsv2
|
184
|
+
|
185
|
+
files1 = []
|
186
|
+
files2 = []
|
187
|
+
while identifiers1.any?
|
188
|
+
files1.push identifiers1.shift
|
189
|
+
identifiers2.each_with_index do |e,i|
|
190
|
+
files2 = identifiers2[(0..i)]
|
191
|
+
index = build_traverse_index(files1 + files2.reverse, in_namespace)
|
192
|
+
return index if not index.nil?
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
return nil
|
197
|
+
end
|
198
|
+
|
199
|
+
def attach(other, fields = nil, options = {})
|
200
|
+
options = Misc.add_defaults options, :in_namespace => true
|
201
|
+
in_namespace = Misc.process_options options, :in_namespace
|
202
|
+
|
203
|
+
fields = other.fields - [key_field].concat(self.fields) if fields == :all
|
204
|
+
fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
205
|
+
Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
|
206
|
+
case
|
207
|
+
when key_field == other.key_field
|
208
|
+
attach_same_key other, fields
|
209
|
+
when (not in_namespace and self.fields.include?(other.key_field))
|
210
|
+
attach_source_key other, other.key_field, fields
|
211
|
+
when (in_namespace and self.fields_in_namespace.include?(other.key_field))
|
212
|
+
attach_source_key other, other.key_field, fields
|
213
|
+
else
|
214
|
+
index = TSV.find_traversal(self, other, in_namespace)
|
215
|
+
raise "Cannot traverse identifiers" if index.nil?
|
216
|
+
attach_index other, index, fields
|
217
|
+
end
|
218
|
+
Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
219
|
+
end
|
220
|
+
|
221
|
+
def detach(file)
|
222
|
+
file_fields = file.fields.collect{|field| field.fullname}
|
223
|
+
detached_fields = []
|
224
|
+
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
225
|
+
reorder :key, detached_fields
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|