rbbt-util 5.27.7 → 5.27.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/attach.rb +34 -30
- data/lib/rbbt/tsv/attach/util.rb +17 -0
- data/lib/rbbt/util/misc/format.rb +1 -1
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/test/rbbt/tsv/test_attach.rb +3 -2
- data/test/rbbt/util/misc/test_format.rb +10 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d3d7b4d19322b271a94d3e2fe046158857bbedf9adf79d0408b4c50d2ba9254
|
4
|
+
data.tar.gz: 78d6fcffe3d5efd08ec42e0493f3f7c731d31001b726ade88802460e5958354b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc8ba1db9052fb92dc87bd037b8745f232ff20110b7e3d0da4d2e3c9f0ed90b4bdf18f68887c8a1ad1eb44f1828e9fbd614770259e73deda196047cf4f783d06
|
7
|
+
data.tar.gz: 3623a36d9282152a71710a71767fb829ce1258f29161451ffc1b0045c0eee152267dd62c8ca8af0e2e3de7f4ef86d9242279c3d34bbd7a3fdfa18841d857ac8a
|
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -32,7 +32,8 @@ module TSV
|
|
32
32
|
done = false
|
33
33
|
Open.write(output) do |os|
|
34
34
|
options.delete :sep if options[:sep] == "\t"
|
35
|
-
|
35
|
+
header_lines = TSV.header_lines(key_field, fields, options)
|
36
|
+
os.puts header_lines unless header_lines.empty?
|
36
37
|
|
37
38
|
while line
|
38
39
|
key, *parts = line.sub("\n",'').split(sep, -1)
|
@@ -212,6 +213,35 @@ module TSV
|
|
212
213
|
other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
|
213
214
|
Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
|
214
215
|
|
216
|
+
same_key = true
|
217
|
+
begin
|
218
|
+
case
|
219
|
+
when (Misc.match_fields(key_field, other.key_field) and same_key)
|
220
|
+
Log.debug "Attachment with same key: #{other.key_field}"
|
221
|
+
attach_same_key other, fields
|
222
|
+
when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
223
|
+
Log.debug "Found other key field: #{other.key_field}"
|
224
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
225
|
+
when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
226
|
+
Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
227
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
228
|
+
else
|
229
|
+
index = TSV.find_traversal(self, other, options)
|
230
|
+
raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
231
|
+
Log.debug "Attachment with index: #{other.key_field}"
|
232
|
+
attach_index other, index, fields
|
233
|
+
end
|
234
|
+
rescue Exception
|
235
|
+
if same_key
|
236
|
+
Log.warn "Could not translate identifiers with same_key"
|
237
|
+
same_key = false
|
238
|
+
retry
|
239
|
+
else
|
240
|
+
raise $!
|
241
|
+
end
|
242
|
+
end
|
243
|
+
Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
244
|
+
|
215
245
|
if complete
|
216
246
|
fill = TrueClass === complete ? nil : complete
|
217
247
|
field_length = self.fields.length
|
@@ -219,6 +249,9 @@ module TSV
|
|
219
249
|
other_common_pos = common_fields.collect{|f| other.fields.index f}
|
220
250
|
this_common_pos = common_fields.collect{|f| self.fields.index f}
|
221
251
|
missing = other.keys - self.keys
|
252
|
+
|
253
|
+
other = other.to_list if other.type == :single
|
254
|
+
|
222
255
|
case type
|
223
256
|
when :single
|
224
257
|
missing.each do |k|
|
@@ -249,35 +282,6 @@ module TSV
|
|
249
282
|
end
|
250
283
|
end
|
251
284
|
|
252
|
-
same_key = true
|
253
|
-
begin
|
254
|
-
case
|
255
|
-
when (Misc.match_fields(key_field, other.key_field) and same_key)
|
256
|
-
Log.debug "Attachment with same key: #{other.key_field}"
|
257
|
-
attach_same_key other, fields
|
258
|
-
when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
259
|
-
Log.debug "Found other key field: #{other.key_field}"
|
260
|
-
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
261
|
-
when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
262
|
-
Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
263
|
-
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
264
|
-
else
|
265
|
-
index = TSV.find_traversal(self, other, options)
|
266
|
-
raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
267
|
-
Log.debug "Attachment with index: #{other.key_field}"
|
268
|
-
attach_index other, index, fields
|
269
|
-
end
|
270
|
-
rescue Exception
|
271
|
-
if same_key
|
272
|
-
Log.warn "Could not translate identifiers with same_key"
|
273
|
-
same_key = false
|
274
|
-
retry
|
275
|
-
else
|
276
|
-
raise $!
|
277
|
-
end
|
278
|
-
end
|
279
|
-
Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
280
|
-
|
281
285
|
self
|
282
286
|
end
|
283
287
|
|
data/lib/rbbt/tsv/attach/util.rb
CHANGED
@@ -4,9 +4,20 @@ module TSV
|
|
4
4
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
5
5
|
|
6
6
|
fields = [fields].compact unless Array === fields
|
7
|
+
|
8
|
+
common_fields = self.fields & fields
|
9
|
+
|
10
|
+
fields = fields - common_fields
|
11
|
+
|
7
12
|
num_fields = fields.length
|
8
13
|
|
9
14
|
field_positions = fields.collect{|field| other.identify_field field}
|
15
|
+
|
16
|
+
if common_fields.any?
|
17
|
+
common_field_positions = common_fields.collect{|field| self.identify_field field}
|
18
|
+
common_field_positions_other = common_fields.collect{|field| other.identify_field field}
|
19
|
+
end
|
20
|
+
|
10
21
|
other.with_unnamed do
|
11
22
|
with_unnamed do
|
12
23
|
through do |key, values|
|
@@ -28,6 +39,12 @@ module TSV
|
|
28
39
|
new_values = field_positions.collect do |pos|
|
29
40
|
pos == :key ? key : other_values[pos]
|
30
41
|
end
|
42
|
+
|
43
|
+
if common_fields.any?
|
44
|
+
common_field_positions.zip(common_field_positions_other).each do |p1,p2|
|
45
|
+
current[p1] += other_values[p2]
|
46
|
+
end
|
47
|
+
end
|
31
48
|
end
|
32
49
|
|
33
50
|
new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
|
@@ -163,7 +163,7 @@ module Misc
|
|
163
163
|
|
164
164
|
def self.fixutf8(string)
|
165
165
|
return nil if string.nil?
|
166
|
-
return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
|
166
|
+
return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
|
167
167
|
(string.respond_to?(:valid_encoding) && string.valid_encoding)
|
168
168
|
|
169
169
|
if string.respond_to?(:encode)
|
data/lib/rbbt/workflow/usage.rb
CHANGED
@@ -213,8 +213,9 @@ row2 E
|
|
213
213
|
|
214
214
|
tsv1 = tsv2 = nil
|
215
215
|
|
216
|
-
tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
|
217
|
-
tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
|
216
|
+
tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
|
217
|
+
tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
|
218
|
+
Log.tsv tsv2
|
218
219
|
|
219
220
|
tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
|
220
221
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.27.
|
4
|
+
version: 5.27.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -487,6 +487,7 @@ files:
|
|
487
487
|
- test/rbbt/util/concurrency/test_threads.rb
|
488
488
|
- test/rbbt/util/log/test_progress.rb
|
489
489
|
- test/rbbt/util/misc/test_bgzf.rb
|
490
|
+
- test/rbbt/util/misc/test_format.rb
|
490
491
|
- test/rbbt/util/misc/test_lock.rb
|
491
492
|
- test/rbbt/util/misc/test_multipart_payload.rb
|
492
493
|
- test/rbbt/util/misc/test_omics.rb
|
@@ -552,6 +553,7 @@ test_files:
|
|
552
553
|
- test/rbbt/util/test_colorize.rb
|
553
554
|
- test/rbbt/util/misc/test_omics.rb
|
554
555
|
- test/rbbt/util/misc/test_pipes.rb
|
556
|
+
- test/rbbt/util/misc/test_format.rb
|
555
557
|
- test/rbbt/util/misc/test_lock.rb
|
556
558
|
- test/rbbt/util/misc/test_multipart_payload.rb
|
557
559
|
- test/rbbt/util/misc/test_bgzf.rb
|