rbbt-util 1.2.1 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rbbt-util.rb +2 -1
- data/lib/rbbt/util/R.rb +18 -1
- data/lib/rbbt/util/cmd.rb +7 -6
- data/lib/rbbt/util/data_module.rb +31 -11
- data/lib/rbbt/util/fix_width_table.rb +209 -0
- data/lib/rbbt/util/log.rb +12 -2
- data/lib/rbbt/util/misc.rb +91 -12
- data/lib/rbbt/util/open.rb +18 -9
- data/lib/rbbt/util/path.rb +152 -0
- data/lib/rbbt/util/persistence.rb +282 -75
- data/lib/rbbt/util/pkg_data.rb +16 -59
- data/lib/rbbt/util/pkg_software.rb +15 -1
- data/lib/rbbt/util/rake.rb +5 -1
- data/lib/rbbt/util/tc_hash.rb +129 -59
- data/lib/rbbt/util/tsv.rb +109 -1284
- data/lib/rbbt/util/tsv/accessor.rb +273 -0
- data/lib/rbbt/util/tsv/attach.rb +228 -0
- data/lib/rbbt/util/tsv/index.rb +303 -0
- data/lib/rbbt/util/tsv/manipulate.rb +271 -0
- data/lib/rbbt/util/tsv/parse.rb +258 -0
- data/share/lib/R/util.R +5 -3
- data/test/rbbt/util/test_R.rb +9 -1
- data/test/rbbt/util/test_data_module.rb +5 -0
- data/test/rbbt/util/test_fix_width_table.rb +107 -0
- data/test/rbbt/util/test_misc.rb +43 -0
- data/test/rbbt/util/test_open.rb +0 -1
- data/test/rbbt/util/test_path.rb +10 -0
- data/test/rbbt/util/test_persistence.rb +63 -2
- data/test/rbbt/util/test_pkg_data.rb +29 -8
- data/test/rbbt/util/test_tc_hash.rb +52 -0
- data/test/rbbt/util/test_tsv.rb +55 -678
- data/test/rbbt/util/tsv/test_accessor.rb +109 -0
- data/test/rbbt/util/tsv/test_attach.rb +271 -0
- data/test/rbbt/util/tsv/test_index.rb +158 -0
- data/test/rbbt/util/tsv/test_manipulate.rb +226 -0
- data/test/rbbt/util/tsv/test_parse.rb +72 -0
- data/test/test_helper.rb +1 -0
- metadata +25 -4
@@ -0,0 +1,273 @@
|
|
1
|
+
require 'rbbt/util/misc'
|
2
|
+
|
3
|
+
class TSV
|
4
|
+
## Make sure we overwrite the methods declared by attr_accessor
|
5
|
+
MAIN_ACCESSORS = :data, :key_field, :fields, :cast
|
6
|
+
EXTRA_ACCESSORS = :filename, :identifiers, :namespace, :datadir, :type, :case_insensitive
|
7
|
+
attr_accessor *(MAIN_ACCESSORS + EXTRA_ACCESSORS)
|
8
|
+
|
9
|
+
def self.zip_fields(list, fields = nil)
|
10
|
+
return [] if list.nil? || list.empty?
|
11
|
+
fields ||= list.fields if list.respond_to? :fields
|
12
|
+
zipped = list[0].zip(*list[1..-1])
|
13
|
+
zipped = zipped.collect{|v| NamedArray.name(v, fields)} if fields
|
14
|
+
zipped
|
15
|
+
end
|
16
|
+
|
17
|
+
module Field
|
18
|
+
attr_accessor :namespace
|
19
|
+
|
20
|
+
def self.field(field, namespace = nil)
|
21
|
+
field.extend Field
|
22
|
+
field.namespace = namespace
|
23
|
+
field
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.namespace(string)
|
27
|
+
return nil unless string.match(/(.+):/)
|
28
|
+
namespace_str = $1
|
29
|
+
return nil if namespace_str.nil? or namespace_str.empty?
|
30
|
+
namespace_str
|
31
|
+
end
|
32
|
+
|
33
|
+
def fullname
|
34
|
+
return self if self =~ /:/ or namespace.nil?
|
35
|
+
namespace + ":" << self
|
36
|
+
end
|
37
|
+
|
38
|
+
def ==(string)
|
39
|
+
return false unless String === string
|
40
|
+
return true if self.casecmp(string) == 0
|
41
|
+
if Field === string
|
42
|
+
return true if self.fullname.casecmp(string.fullname) == 0
|
43
|
+
else
|
44
|
+
return true if self.fullname.casecmp(string) == 0
|
45
|
+
end
|
46
|
+
return true if self.sub(/.*:/,'').casecmp(string) == 0
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
|
50
|
+
def namespace
|
51
|
+
Field.namespace(self) || @namespace
|
52
|
+
end
|
53
|
+
|
54
|
+
def matching_namespaces(other)
|
55
|
+
return true if namespace.nil?
|
56
|
+
return namespace == other.namespace
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
#{{{{ Field END
|
61
|
+
|
62
|
+
def identifier_files
|
63
|
+
case
|
64
|
+
when (identifiers and TSV === identifiers)
|
65
|
+
[identifiers]
|
66
|
+
when (identifiers and Array === identifiers)
|
67
|
+
case
|
68
|
+
when (TSV === identifiers.first or identifiers.empty?)
|
69
|
+
identifiers
|
70
|
+
when
|
71
|
+
identifiers.collect{|f| Path.path(f, datadir, namespace)}
|
72
|
+
end
|
73
|
+
when (identifiers and not Array === identifiers)
|
74
|
+
[Path.path(identifiers, datadir)]
|
75
|
+
when filename
|
76
|
+
Path.path(filename, datadir).identifier_files
|
77
|
+
else
|
78
|
+
[]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def fields_in_namespace(namespace = nil)
|
83
|
+
namespace = self.namespace if namespace == nil or TrueClass === namespace
|
84
|
+
fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
85
|
+
end
|
86
|
+
|
87
|
+
def fields
|
88
|
+
return nil if @fields.nil?
|
89
|
+
fds = @fields
|
90
|
+
fds.each do |f| f.extend Field end if Array === @fields
|
91
|
+
fds.each do |f| f.namespace = namespace end unless namespace.nil?
|
92
|
+
NamedArray.name(fds, @fields)
|
93
|
+
end
|
94
|
+
|
95
|
+
def all_fields
|
96
|
+
return nil if @fields.nil?
|
97
|
+
all_fields = @fields.dup
|
98
|
+
all_fields.unshift key_field
|
99
|
+
all_fields.each do |f| f.extend Field end if Array === @fields
|
100
|
+
all_fields.each do |f| f.namespace = namespace end unless namespace.nil?
|
101
|
+
NamedArray.name(all_fields, [key_field] + @fields)
|
102
|
+
all_fields
|
103
|
+
end
|
104
|
+
|
105
|
+
def all_namespace_fields(namespace = nil)
|
106
|
+
namespace = self.namespace if namespace == nil or TrueClass === namespace
|
107
|
+
all_fields = self.all_fields
|
108
|
+
return nil if all_fields.nil?
|
109
|
+
return all_fields if namespace.nil?
|
110
|
+
all_fields.select{|f| f.namespace.nil? or f.namespace == namespace}
|
111
|
+
end
|
112
|
+
|
113
|
+
def self.identify_field(key, fields, field)
|
114
|
+
return field if Integer === field
|
115
|
+
return :key if field.nil? or field == 0 or field.to_sym == :key or key == field
|
116
|
+
return nil if fields.nil?
|
117
|
+
return fields.collect{|f| f.to_s}.index field if fields.collect{|f| f.to_s}.index field
|
118
|
+
return fields.index field
|
119
|
+
end
|
120
|
+
|
121
|
+
def identify_field(field)
|
122
|
+
TSV.identify_field(key_field, fields, field)
|
123
|
+
end
|
124
|
+
|
125
|
+
def fields=(new_fields)
|
126
|
+
new_fields.collect! do |field|
|
127
|
+
if Field === field
|
128
|
+
if field !~ /:/ and field.namespace != nil and field.namespace != namespace
|
129
|
+
field.namespace + ":" + field.to_s
|
130
|
+
else
|
131
|
+
field
|
132
|
+
end
|
133
|
+
else
|
134
|
+
field
|
135
|
+
end
|
136
|
+
end if Array === new_fields
|
137
|
+
@fields = new_fields
|
138
|
+
@data.fields = new_fields if @data.respond_to? :fields=
|
139
|
+
end
|
140
|
+
|
141
|
+
def old_fields=(new_fields)
|
142
|
+
@fields = new_fields
|
143
|
+
@data.fields = new_fields if @data.respond_to? :fields=
|
144
|
+
end
|
145
|
+
|
146
|
+
def keys
|
147
|
+
@data.keys
|
148
|
+
end
|
149
|
+
|
150
|
+
def values
|
151
|
+
@data.values
|
152
|
+
end
|
153
|
+
|
154
|
+
def size
|
155
|
+
@data.size
|
156
|
+
end
|
157
|
+
|
158
|
+
# Write
|
159
|
+
|
160
|
+
def []=(key, value)
|
161
|
+
key = key.downcase if @case_insensitive
|
162
|
+
@data[key] = value
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def merge!(new_data)
|
167
|
+
new_data.each do |key, value|
|
168
|
+
self[key] = value
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
# Read
|
173
|
+
|
174
|
+
def follow(value)
|
175
|
+
return nil if value.nil?
|
176
|
+
if String === value && value =~ /__Ref:(.*)/
|
177
|
+
return self[$1]
|
178
|
+
else
|
179
|
+
value = NamedArray.name value, fields if Array === value and fields
|
180
|
+
value
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def [](key)
|
185
|
+
if Array === key
|
186
|
+
return @data[key] if @data[key] != nil
|
187
|
+
key.each{|k| v = self[k]; return v unless v.nil?}
|
188
|
+
return nil
|
189
|
+
end
|
190
|
+
|
191
|
+
key = key.downcase if @case_insensitive and key !~ /^__Ref:/
|
192
|
+
follow @data[key]
|
193
|
+
end
|
194
|
+
|
195
|
+
def values_at(*keys)
|
196
|
+
keys.collect{|k|
|
197
|
+
self[k]
|
198
|
+
}
|
199
|
+
end
|
200
|
+
|
201
|
+
def each(&block)
|
202
|
+
@data.each do |key, value|
|
203
|
+
block.call(key, follow(value))
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def collect
|
208
|
+
if block_given?
|
209
|
+
@data.collect do |key, value|
|
210
|
+
value = follow(value)
|
211
|
+
yield key, value
|
212
|
+
end
|
213
|
+
else
|
214
|
+
@data.collect do |key, value|
|
215
|
+
[key, follow(value)]
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
def sort(&block)
|
221
|
+
collect.sort(&block).collect{|p|
|
222
|
+
key, value = p
|
223
|
+
value = NamedArray.name value, fields if fields
|
224
|
+
[key, value]
|
225
|
+
}
|
226
|
+
end
|
227
|
+
|
228
|
+
def sort_by(&block)
|
229
|
+
collect.sort_by &block
|
230
|
+
end
|
231
|
+
|
232
|
+
def values_to_s(values)
|
233
|
+
case
|
234
|
+
when (values.nil? and fields.nil?)
|
235
|
+
"\n"
|
236
|
+
when (values.nil? and not fields.nil?)
|
237
|
+
"\t" << ([""] * fields.length) * "\t" << "\n"
|
238
|
+
when (not Array === values)
|
239
|
+
"\t" << values.to_s << "\n"
|
240
|
+
when Array === values.first
|
241
|
+
"\t" << values.collect{|list| (list || []) * "|"} * "\t" << "\n"
|
242
|
+
else
|
243
|
+
"\t" << values * "\t" << "\n"
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def include?(key)
|
248
|
+
data.include? key
|
249
|
+
end
|
250
|
+
|
251
|
+
def to_s(keys = nil)
|
252
|
+
str = ""
|
253
|
+
|
254
|
+
str << "#: " << Misc.hash2string(EXTRA_ACCESSORS.collect{|key| [key, self.send(key)]}) << "\n"
|
255
|
+
if fields
|
256
|
+
str << "#" << key_field << "\t" << fields * "\t" << "\n"
|
257
|
+
end
|
258
|
+
|
259
|
+
if keys.nil?
|
260
|
+
each do |key, values|
|
261
|
+
key = key.to_s if Symbol === key
|
262
|
+
str << key.dup << values_to_s(values)
|
263
|
+
end
|
264
|
+
else
|
265
|
+
keys.zip(values_at(*keys)).each do |key, values|
|
266
|
+
key = key.to_s if Symbol === key
|
267
|
+
str << key.dup << values_to_s(values)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
str
|
272
|
+
end
|
273
|
+
end
|
@@ -0,0 +1,228 @@
|
|
1
|
+
class TSV
|
2
|
+
|
3
|
+
#{{{ Attach Methods
|
4
|
+
|
5
|
+
def attach_same_key(other, fields = nil)
|
6
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
7
|
+
|
8
|
+
through do |key, values|
|
9
|
+
if other.include? key
|
10
|
+
new_values = other[key].values_at *fields
|
11
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
12
|
+
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
13
|
+
self[key] = self[key].concat new_values
|
14
|
+
else
|
15
|
+
if type == :double
|
16
|
+
self[key] = self[key].concat [[]] * fields.length
|
17
|
+
else
|
18
|
+
self[key] = self[key].concat [""] * fields.length
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
self.fields = self.fields.concat other.fields.values_at *fields
|
24
|
+
end
|
25
|
+
|
26
|
+
def attach_source_key(other, source, fields = nil)
|
27
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
28
|
+
|
29
|
+
other = other.tsv unless TSV === other
|
30
|
+
field_positions = fields.collect{|field| other.identify_field field}
|
31
|
+
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
32
|
+
|
33
|
+
through do |key, values|
|
34
|
+
source_keys = values[source]
|
35
|
+
if source_keys.nil? or source_keys.empty?
|
36
|
+
all_new_values = []
|
37
|
+
else
|
38
|
+
all_new_values = []
|
39
|
+
source_keys.each do |source_key|
|
40
|
+
next unless other.include? source_key
|
41
|
+
new_values = field_positions.collect do |pos|
|
42
|
+
if pos == :key
|
43
|
+
source_key
|
44
|
+
else
|
45
|
+
other[source_key][pos]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
50
|
+
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
51
|
+
all_new_values << new_values
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if all_new_values.empty?
|
56
|
+
if type == :double
|
57
|
+
self[key] = self[key].concat [[]] * field_positions.length
|
58
|
+
else
|
59
|
+
self[key] = self[key].concat [""] * field_positions.length
|
60
|
+
end
|
61
|
+
else
|
62
|
+
if type == :double
|
63
|
+
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
64
|
+
else
|
65
|
+
self[key] = self[key].concat all_new_values.first
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
self.fields = self.fields.concat field_names
|
71
|
+
end
|
72
|
+
|
73
|
+
def attach_index(other, index, fields = nil)
|
74
|
+
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
75
|
+
|
76
|
+
other = other.tsv unless TSV === other
|
77
|
+
field_positions = fields.collect{|field| other.identify_field field}
|
78
|
+
field_names = field_positions.collect{|pos| pos == :key ? other.key_field : other.fields[pos] }
|
79
|
+
|
80
|
+
through do |key, values|
|
81
|
+
source_keys = index[key]
|
82
|
+
if source_keys.nil? or source_keys.empty?
|
83
|
+
all_new_values = []
|
84
|
+
else
|
85
|
+
all_new_values = []
|
86
|
+
source_keys.each do |source_key|
|
87
|
+
next unless other.include? source_key
|
88
|
+
new_values = field_positions.collect do |pos|
|
89
|
+
if pos == :key
|
90
|
+
source_key
|
91
|
+
else
|
92
|
+
other[source_key][pos]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
new_values.collect!{|v| [v]} if type == :double and not other.type == :double
|
96
|
+
new_values.collect!{|v| v.first} if not type == :double and other.type == :double
|
97
|
+
all_new_values << new_values
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
if all_new_values.empty?
|
102
|
+
if type == :double
|
103
|
+
self[key] = self[key].concat [[]] * field_positions.length
|
104
|
+
else
|
105
|
+
self[key] = self[key].concat [""] * field_positions.length
|
106
|
+
end
|
107
|
+
else
|
108
|
+
if type == :double
|
109
|
+
self[key] = self[key].concat TSV.zip_fields(all_new_values).collect{|l| l.flatten}
|
110
|
+
else
|
111
|
+
self[key] = self[key].concat all_new_values.first
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
self.fields = self.fields.concat field_names
|
117
|
+
end
|
118
|
+
|
119
|
+
#{{{ Attach Helper
|
120
|
+
|
121
|
+
# May make an extra index!
|
122
|
+
def self.find_path(files, in_namespace = false)
|
123
|
+
if in_namespace
|
124
|
+
ids = [files.first.all_namespace_fields(in_namespace)]
|
125
|
+
ids += files[1..-1].collect{|f| f.all_fields}
|
126
|
+
else
|
127
|
+
ids = files.collect{|f| f.all_fields}
|
128
|
+
end
|
129
|
+
id_list = []
|
130
|
+
|
131
|
+
ids.flatten.each do |field|
|
132
|
+
end
|
133
|
+
|
134
|
+
ids.each_with_index do |list, i|
|
135
|
+
break if i == ids.length - 1
|
136
|
+
match = list.select{|field|
|
137
|
+
ids[i + 1].select{|f| field == f}.any?
|
138
|
+
}
|
139
|
+
return nil if match.empty?
|
140
|
+
id_list << match.first
|
141
|
+
end
|
142
|
+
|
143
|
+
if id_list.last.first != files.last.all_fields.first
|
144
|
+
id_list << files.last.all_fields.first
|
145
|
+
id_list.zip(files)
|
146
|
+
else
|
147
|
+
id_list.zip(files[0..-1])
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def self.build_traverse_index(files, in_namespace = false)
|
152
|
+
path = find_path(files, in_namespace)
|
153
|
+
|
154
|
+
return nil if path.nil?
|
155
|
+
|
156
|
+
traversal_ids = path.collect{|p| p.first}
|
157
|
+
|
158
|
+
Log.medium "Found Traversal: #{traversal_ids * " => "}"
|
159
|
+
|
160
|
+
current_key = files.first.all_fields.first
|
161
|
+
target = files.last.all_fields.first
|
162
|
+
target = nil
|
163
|
+
current_id, current_file = path.shift
|
164
|
+
index = current_file.index :target => current_id, :fields => current_key, :persistence => false
|
165
|
+
|
166
|
+
while not path.empty?
|
167
|
+
current_id, current_file = path.shift
|
168
|
+
current_index = current_file.index :target => current_id, :fields => index.fields.first, :persistence => true
|
169
|
+
index.process 0 do |value|
|
170
|
+
current_index.values_at(*value).flatten.uniq
|
171
|
+
end
|
172
|
+
index.fields = current_index.fields
|
173
|
+
end
|
174
|
+
|
175
|
+
index
|
176
|
+
end
|
177
|
+
|
178
|
+
def self.find_traversal(tsv1, tsv2, in_namespace = false)
|
179
|
+
identifiers1 = tsv1.identifier_files || []
|
180
|
+
identifiers2 = tsv2.identifier_files || []
|
181
|
+
|
182
|
+
identifiers1.unshift tsv1
|
183
|
+
identifiers2.unshift tsv2
|
184
|
+
|
185
|
+
files1 = []
|
186
|
+
files2 = []
|
187
|
+
while identifiers1.any?
|
188
|
+
files1.push identifiers1.shift
|
189
|
+
identifiers2.each_with_index do |e,i|
|
190
|
+
files2 = identifiers2[(0..i)]
|
191
|
+
index = build_traverse_index(files1 + files2.reverse, in_namespace)
|
192
|
+
return index if not index.nil?
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
return nil
|
197
|
+
end
|
198
|
+
|
199
|
+
def attach(other, fields = nil, options = {})
|
200
|
+
options = Misc.add_defaults options, :in_namespace => true
|
201
|
+
in_namespace = Misc.process_options options, :in_namespace
|
202
|
+
|
203
|
+
fields = other.fields - [key_field].concat(self.fields) if fields == :all
|
204
|
+
fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
205
|
+
Log.high("Attaching fields:#{fields.inspect} from #{other.filename.inspect}.")
|
206
|
+
case
|
207
|
+
when key_field == other.key_field
|
208
|
+
attach_same_key other, fields
|
209
|
+
when (not in_namespace and self.fields.include?(other.key_field))
|
210
|
+
attach_source_key other, other.key_field, fields
|
211
|
+
when (in_namespace and self.fields_in_namespace.include?(other.key_field))
|
212
|
+
attach_source_key other, other.key_field, fields
|
213
|
+
else
|
214
|
+
index = TSV.find_traversal(self, other, in_namespace)
|
215
|
+
raise "Cannot traverse identifiers" if index.nil?
|
216
|
+
attach_index other, index, fields
|
217
|
+
end
|
218
|
+
Log.medium("Attachment of fields:#{fields.inspect} from #{other.filename.inspect} finished.")
|
219
|
+
end
|
220
|
+
|
221
|
+
def detach(file)
|
222
|
+
file_fields = file.fields.collect{|field| field.fullname}
|
223
|
+
detached_fields = []
|
224
|
+
self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
225
|
+
reorder :key, detached_fields
|
226
|
+
end
|
227
|
+
|
228
|
+
end
|