rbbt-util 5.27.7 → 5.27.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/tsv/attach.rb +34 -30
- data/lib/rbbt/tsv/attach/util.rb +17 -0
- data/lib/rbbt/util/misc/format.rb +1 -1
- data/lib/rbbt/workflow/usage.rb +1 -1
- data/test/rbbt/tsv/test_attach.rb +3 -2
- data/test/rbbt/util/misc/test_format.rb +10 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d3d7b4d19322b271a94d3e2fe046158857bbedf9adf79d0408b4c50d2ba9254
|
4
|
+
data.tar.gz: 78d6fcffe3d5efd08ec42e0493f3f7c731d31001b726ade88802460e5958354b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc8ba1db9052fb92dc87bd037b8745f232ff20110b7e3d0da4d2e3c9f0ed90b4bdf18f68887c8a1ad1eb44f1828e9fbd614770259e73deda196047cf4f783d06
|
7
|
+
data.tar.gz: 3623a36d9282152a71710a71767fb829ce1258f29161451ffc1b0045c0eee152267dd62c8ca8af0e2e3de7f4ef86d9242279c3d34bbd7a3fdfa18841d857ac8a
|
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -32,7 +32,8 @@ module TSV
|
|
32
32
|
done = false
|
33
33
|
Open.write(output) do |os|
|
34
34
|
options.delete :sep if options[:sep] == "\t"
|
35
|
-
|
35
|
+
header_lines = TSV.header_lines(key_field, fields, options)
|
36
|
+
os.puts header_lines unless header_lines.empty?
|
36
37
|
|
37
38
|
while line
|
38
39
|
key, *parts = line.sub("\n",'').split(sep, -1)
|
@@ -212,6 +213,35 @@ module TSV
|
|
212
213
|
other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
|
213
214
|
Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
|
214
215
|
|
216
|
+
same_key = true
|
217
|
+
begin
|
218
|
+
case
|
219
|
+
when (Misc.match_fields(key_field, other.key_field) and same_key)
|
220
|
+
Log.debug "Attachment with same key: #{other.key_field}"
|
221
|
+
attach_same_key other, fields
|
222
|
+
when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
223
|
+
Log.debug "Found other key field: #{other.key_field}"
|
224
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
225
|
+
when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
226
|
+
Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
227
|
+
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
228
|
+
else
|
229
|
+
index = TSV.find_traversal(self, other, options)
|
230
|
+
raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
231
|
+
Log.debug "Attachment with index: #{other.key_field}"
|
232
|
+
attach_index other, index, fields
|
233
|
+
end
|
234
|
+
rescue Exception
|
235
|
+
if same_key
|
236
|
+
Log.warn "Could not translate identifiers with same_key"
|
237
|
+
same_key = false
|
238
|
+
retry
|
239
|
+
else
|
240
|
+
raise $!
|
241
|
+
end
|
242
|
+
end
|
243
|
+
Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
244
|
+
|
215
245
|
if complete
|
216
246
|
fill = TrueClass === complete ? nil : complete
|
217
247
|
field_length = self.fields.length
|
@@ -219,6 +249,9 @@ module TSV
|
|
219
249
|
other_common_pos = common_fields.collect{|f| other.fields.index f}
|
220
250
|
this_common_pos = common_fields.collect{|f| self.fields.index f}
|
221
251
|
missing = other.keys - self.keys
|
252
|
+
|
253
|
+
other = other.to_list if other.type == :single
|
254
|
+
|
222
255
|
case type
|
223
256
|
when :single
|
224
257
|
missing.each do |k|
|
@@ -249,35 +282,6 @@ module TSV
|
|
249
282
|
end
|
250
283
|
end
|
251
284
|
|
252
|
-
same_key = true
|
253
|
-
begin
|
254
|
-
case
|
255
|
-
when (Misc.match_fields(key_field, other.key_field) and same_key)
|
256
|
-
Log.debug "Attachment with same key: #{other.key_field}"
|
257
|
-
attach_same_key other, fields
|
258
|
-
when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
259
|
-
Log.debug "Found other key field: #{other.key_field}"
|
260
|
-
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
261
|
-
when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
262
|
-
Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
263
|
-
attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
264
|
-
else
|
265
|
-
index = TSV.find_traversal(self, other, options)
|
266
|
-
raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
267
|
-
Log.debug "Attachment with index: #{other.key_field}"
|
268
|
-
attach_index other, index, fields
|
269
|
-
end
|
270
|
-
rescue Exception
|
271
|
-
if same_key
|
272
|
-
Log.warn "Could not translate identifiers with same_key"
|
273
|
-
same_key = false
|
274
|
-
retry
|
275
|
-
else
|
276
|
-
raise $!
|
277
|
-
end
|
278
|
-
end
|
279
|
-
Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
280
|
-
|
281
285
|
self
|
282
286
|
end
|
283
287
|
|
data/lib/rbbt/tsv/attach/util.rb
CHANGED
@@ -4,9 +4,20 @@ module TSV
|
|
4
4
|
fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
5
5
|
|
6
6
|
fields = [fields].compact unless Array === fields
|
7
|
+
|
8
|
+
common_fields = self.fields & fields
|
9
|
+
|
10
|
+
fields = fields - common_fields
|
11
|
+
|
7
12
|
num_fields = fields.length
|
8
13
|
|
9
14
|
field_positions = fields.collect{|field| other.identify_field field}
|
15
|
+
|
16
|
+
if common_fields.any?
|
17
|
+
common_field_positions = common_fields.collect{|field| self.identify_field field}
|
18
|
+
common_field_positions_other = common_fields.collect{|field| other.identify_field field}
|
19
|
+
end
|
20
|
+
|
10
21
|
other.with_unnamed do
|
11
22
|
with_unnamed do
|
12
23
|
through do |key, values|
|
@@ -28,6 +39,12 @@ module TSV
|
|
28
39
|
new_values = field_positions.collect do |pos|
|
29
40
|
pos == :key ? key : other_values[pos]
|
30
41
|
end
|
42
|
+
|
43
|
+
if common_fields.any?
|
44
|
+
common_field_positions.zip(common_field_positions_other).each do |p1,p2|
|
45
|
+
current[p1] += other_values[p2]
|
46
|
+
end
|
47
|
+
end
|
31
48
|
end
|
32
49
|
|
33
50
|
new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
|
@@ -163,7 +163,7 @@ module Misc
|
|
163
163
|
|
164
164
|
def self.fixutf8(string)
|
165
165
|
return nil if string.nil?
|
166
|
-
return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
|
166
|
+
return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
|
167
167
|
(string.respond_to?(:valid_encoding) && string.valid_encoding)
|
168
168
|
|
169
169
|
if string.respond_to?(:encode)
|
data/lib/rbbt/workflow/usage.rb
CHANGED
@@ -213,8 +213,9 @@ row2 E
|
|
213
213
|
|
214
214
|
tsv1 = tsv2 = nil
|
215
215
|
|
216
|
-
tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
|
217
|
-
tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
|
216
|
+
tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
|
217
|
+
tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
|
218
|
+
Log.tsv tsv2
|
218
219
|
|
219
220
|
tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
|
220
221
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.27.
|
4
|
+
version: 5.27.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -487,6 +487,7 @@ files:
|
|
487
487
|
- test/rbbt/util/concurrency/test_threads.rb
|
488
488
|
- test/rbbt/util/log/test_progress.rb
|
489
489
|
- test/rbbt/util/misc/test_bgzf.rb
|
490
|
+
- test/rbbt/util/misc/test_format.rb
|
490
491
|
- test/rbbt/util/misc/test_lock.rb
|
491
492
|
- test/rbbt/util/misc/test_multipart_payload.rb
|
492
493
|
- test/rbbt/util/misc/test_omics.rb
|
@@ -552,6 +553,7 @@ test_files:
|
|
552
553
|
- test/rbbt/util/test_colorize.rb
|
553
554
|
- test/rbbt/util/misc/test_omics.rb
|
554
555
|
- test/rbbt/util/misc/test_pipes.rb
|
556
|
+
- test/rbbt/util/misc/test_format.rb
|
555
557
|
- test/rbbt/util/misc/test_lock.rb
|
556
558
|
- test/rbbt/util/misc/test_multipart_payload.rb
|
557
559
|
- test/rbbt/util/misc/test_bgzf.rb
|