rbbt-util 5.44.1 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +67 -90
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +0 -15
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
- data/lib/rbbt/workflow/refactor.rb +153 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +19 -1
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/server +11 -1
- data/share/rbbt_commands/workflow/task +76 -71
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +40 -2
data/lib/rbbt/tsv/attach.rb
CHANGED
@@ -1,299 +1,303 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
when (String === input
|
15
|
-
CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :
|
16
|
-
|
17
|
-
input
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
line =
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
current_parts[i] =
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
current_parts
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
when (String === file2
|
87
|
-
file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :
|
88
|
-
when
|
89
|
-
file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
when
|
156
|
-
key =
|
157
|
-
|
158
|
-
key = key1
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
file1.
|
169
|
-
file2.
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
tsv
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
when :
|
263
|
-
missing.each do |k|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
1
|
+
require_relative '../refactor'
|
2
|
+
Rbbt.require_instead 'scout/tsv'
|
3
|
+
#require 'rbbt/tsv'
|
4
|
+
#require 'rbbt/tsv/attach/util'
|
5
|
+
#module TSV
|
6
|
+
#
|
7
|
+
# # Merge columns from different rows of a file
|
8
|
+
# def self.merge_row_fields(input, output, options = {})
|
9
|
+
# options = Misc.add_defaults options, :sep => "\t"
|
10
|
+
# key_field, fields = Misc.process_options options, :key_field, :fields
|
11
|
+
# sep = options[:sep]
|
12
|
+
#
|
13
|
+
# is = case
|
14
|
+
# when (String === input and not input.index("\n") and input.length < 250 and File.exist?(input))
|
15
|
+
# CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
|
16
|
+
# when (String === input or StringIO === input)
|
17
|
+
# CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
|
18
|
+
# else
|
19
|
+
# input
|
20
|
+
# end
|
21
|
+
#
|
22
|
+
# if key_field.nil? or fields.nil?
|
23
|
+
# parser = TSV::Parser.new(is, options.dup)
|
24
|
+
# fields ||= parser.fields
|
25
|
+
# key_field ||= parser.key_field
|
26
|
+
# line = parser.first_line
|
27
|
+
# else
|
28
|
+
# line = is.gets
|
29
|
+
# end
|
30
|
+
#
|
31
|
+
# current_key = nil
|
32
|
+
# current_parts = []
|
33
|
+
#
|
34
|
+
# done = false
|
35
|
+
# Open.write(output) do |os|
|
36
|
+
# options.delete :sep if options[:sep] == "\t"
|
37
|
+
# header_lines = TSV.header_lines(key_field, fields, options)
|
38
|
+
# os.puts header_lines unless header_lines.empty?
|
39
|
+
#
|
40
|
+
# while line
|
41
|
+
# key, *parts = line.sub("\n",'').split(sep, -1)
|
42
|
+
# current_key ||= key
|
43
|
+
# case
|
44
|
+
# when key.nil?
|
45
|
+
# when current_key == key
|
46
|
+
# parts.each_with_index do |part,i|
|
47
|
+
# if current_parts[i].nil?
|
48
|
+
# current_parts[i] = part
|
49
|
+
# else
|
50
|
+
# current_parts[i] = current_parts[i] << "|" << part
|
51
|
+
# end
|
52
|
+
# end
|
53
|
+
# when current_key != key
|
54
|
+
# os.puts [current_key, current_parts].flatten * sep
|
55
|
+
# current_key = key
|
56
|
+
# current_parts = parts
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# line = is.gets
|
60
|
+
# end
|
61
|
+
#
|
62
|
+
# os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
|
63
|
+
#
|
64
|
+
# end
|
65
|
+
# end
|
66
|
+
#
|
67
|
+
# # Merge two files with the same keys and different fields
|
68
|
+
# def self.merge_different_fields(file1, file2, output, options = {})
|
69
|
+
# options = IndiferentHash.add_defaults options, :sep => "\t"
|
70
|
+
# monitor, key_field, fields = IndiferentHash.process_options options, :monitor, :key_field, :fields
|
71
|
+
# sep = options[:sep] || "\t"
|
72
|
+
#
|
73
|
+
# case
|
74
|
+
# when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exist?(file1))
|
75
|
+
# size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
|
76
|
+
# file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
|
77
|
+
# when (String === file1 or StringIO === file1)
|
78
|
+
# size = file1.length if monitor
|
79
|
+
# file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
|
80
|
+
# when TSV === file1
|
81
|
+
# size = file1.size if monitor
|
82
|
+
# file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
|
83
|
+
# end
|
84
|
+
#
|
85
|
+
# case
|
86
|
+
# when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exist?(file2))
|
87
|
+
# file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
|
88
|
+
# when (String === file2 or StringIO === file2)
|
89
|
+
# file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
|
90
|
+
# when TSV === file2
|
91
|
+
# file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
|
92
|
+
# end
|
93
|
+
#
|
94
|
+
# begin
|
95
|
+
# output = File.open(output, 'w') if String === output
|
96
|
+
#
|
97
|
+
# cols1 = nil
|
98
|
+
# cols2 = nil
|
99
|
+
#
|
100
|
+
# done1 = false
|
101
|
+
# done2 = false
|
102
|
+
#
|
103
|
+
# key1 = key2 = nil
|
104
|
+
# while key1.nil?
|
105
|
+
# while (line1 = file1.gets) =~ /^#/
|
106
|
+
# key_field1, *fields1 = line1.chomp.sub('#','').split(sep)
|
107
|
+
# end
|
108
|
+
# key1, *parts1 = line1.sub("\n",'').split(sep, -1)
|
109
|
+
# cols1 = parts1.length
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# while key2.nil?
|
113
|
+
# while (line2 = file2.gets) =~ /^#/
|
114
|
+
# key_field2, *fields2 = line2.chomp.sub('#','').split(sep)
|
115
|
+
# end
|
116
|
+
# key2, *parts2 = line2.sub("\n",'').split(sep, -1)
|
117
|
+
# cols2 = parts2.length
|
118
|
+
# end
|
119
|
+
#
|
120
|
+
# #progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
|
121
|
+
# progress_monitor = Log::ProgressBar.new(size, :desc => "Merging fields") if monitor
|
122
|
+
#
|
123
|
+
# entry_hash = options
|
124
|
+
# entry_hash.delete :sep if entry_hash[:sep] == "\t"
|
125
|
+
# output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
|
126
|
+
#
|
127
|
+
# key = key1 < key2 ? key1 : key2
|
128
|
+
# parts = [""] * (cols1 + cols2)
|
129
|
+
# while not (done1 and done2)
|
130
|
+
# while (not done1 and key1 == key)
|
131
|
+
# parts1.each_with_index do |part, i|
|
132
|
+
# parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
133
|
+
# end
|
134
|
+
# key1 = nil
|
135
|
+
# while key1.nil? and not done1
|
136
|
+
# if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
|
137
|
+
# end
|
138
|
+
# progress_monitor.tick if monitor
|
139
|
+
# end
|
140
|
+
# while (not done2 and key2 == key)
|
141
|
+
# parts2.each_with_index do |part, i|
|
142
|
+
# i += cols1
|
143
|
+
# parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
|
144
|
+
# end
|
145
|
+
# key2 = nil
|
146
|
+
# while key2.nil? and not done2
|
147
|
+
# if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
#
|
151
|
+
# output.puts [key, parts].flatten * sep
|
152
|
+
# parts = [""] * (cols1 + cols2)
|
153
|
+
#
|
154
|
+
# case
|
155
|
+
# when done1
|
156
|
+
# key = key2
|
157
|
+
# when done2
|
158
|
+
# key = key1
|
159
|
+
# else
|
160
|
+
# key = key1 < key2 ? key1 : key2
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
#
|
164
|
+
# output.close
|
165
|
+
# file1.join if file1.respond_to? :join
|
166
|
+
# file2.join if file2.respond_to? :join
|
167
|
+
# rescue
|
168
|
+
# file1.abort if file1.respond_to? :abort
|
169
|
+
# file2.abort if file2.respond_to? :abort
|
170
|
+
# file1.join if file1.respond_to? :join
|
171
|
+
# file2.join if file2.respond_to? :join
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
#
|
175
|
+
# def merge_different_fields(other, options = {})
|
176
|
+
# TmpFile.with_file do |output|
|
177
|
+
# TSV.merge_different_fields(self, other, output, options)
|
178
|
+
# tsv = TSV.open output, options
|
179
|
+
# tsv.key_field = self.key_field unless self.key_field.nil?
|
180
|
+
# tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
|
181
|
+
# tsv
|
182
|
+
# end
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# # Merge columns from different files
|
186
|
+
# def self.merge_paste(files, delim = "$")
|
187
|
+
# CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
|
188
|
+
# end
|
189
|
+
#
|
190
|
+
# end
|
191
|
+
#
|
192
|
+
# def merge_zip(other)
|
193
|
+
# other.each do |k,v|
|
194
|
+
# self.zip_new k, v
|
195
|
+
# end
|
196
|
+
# self
|
197
|
+
# end
|
198
|
+
#
|
199
|
+
#
|
200
|
+
# def attach(other, options = {})
|
201
|
+
# options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
|
202
|
+
# fields, one2one, complete = Misc.process_options options, :fields, :one2one, :complete
|
203
|
+
# in_namespace = options[:in_namespace]
|
204
|
+
#
|
205
|
+
# unless TSV === other
|
206
|
+
# other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
|
207
|
+
# other = TSV.open(other, :persist => options[:persist_input].to_s == "true")
|
208
|
+
# other.identifiers ||= other_identifier_file
|
209
|
+
# end
|
210
|
+
#
|
211
|
+
# fields = other.fields - [key_field].concat(self.fields) if other.fields and (fields.nil? or fields == :all)
|
212
|
+
# if in_namespace
|
213
|
+
# fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
|
214
|
+
# else
|
215
|
+
# fields = other.fields - [key_field].concat(self.fields) if fields.nil?
|
216
|
+
# end
|
217
|
+
#
|
218
|
+
# other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
|
219
|
+
# Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
|
220
|
+
#
|
221
|
+
# same_key = true
|
222
|
+
# begin
|
223
|
+
# case
|
224
|
+
# when (Misc.match_fields(key_field, other.key_field) and same_key)
|
225
|
+
# Log.debug "Attachment with same key: #{other.key_field}"
|
226
|
+
# attach_same_key other, fields
|
227
|
+
# when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
228
|
+
# Log.debug "Found other key field: #{other.key_field}"
|
229
|
+
# attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
230
|
+
# when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
|
231
|
+
# Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
|
232
|
+
# attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
|
233
|
+
# else
|
234
|
+
# index = TSV.find_traversal(self, other, options)
|
235
|
+
# raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
|
236
|
+
# Log.debug "Attachment with index: #{other.key_field}"
|
237
|
+
# attach_index other, index, fields
|
238
|
+
# end
|
239
|
+
# rescue Exception
|
240
|
+
# if same_key
|
241
|
+
# Log.warn "Could not translate identifiers with same_key"
|
242
|
+
# same_key = false
|
243
|
+
# retry
|
244
|
+
# else
|
245
|
+
# raise $!
|
246
|
+
# end
|
247
|
+
# end
|
248
|
+
# Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
|
249
|
+
#
|
250
|
+
# if complete
|
251
|
+
# Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
|
252
|
+
# fill = TrueClass === complete ? nil : complete
|
253
|
+
# field_length = self.fields.length
|
254
|
+
# common_fields = (other.fields & self.fields)
|
255
|
+
# other_common_pos = common_fields.collect{|f| other.fields.index f}
|
256
|
+
# this_common_pos = common_fields.collect{|f| self.fields.index f}
|
257
|
+
# missing = other.keys - self.keys
|
258
|
+
#
|
259
|
+
# other = other.to_list if other.type == :single
|
260
|
+
#
|
261
|
+
# case type
|
262
|
+
# when :single
|
263
|
+
# missing.each do |k|
|
264
|
+
# self[k] = fill
|
265
|
+
# end
|
266
|
+
# when :list
|
267
|
+
# missing.each do |k|
|
268
|
+
# values = [fill] * field_length
|
269
|
+
# other_values = other[k]
|
270
|
+
# other_common_pos.zip(this_common_pos).each do |o,t|
|
271
|
+
# values[t] = other_values[o]
|
272
|
+
# end
|
273
|
+
# self[k] = values
|
274
|
+
# end
|
275
|
+
# when :double
|
276
|
+
# fill = [] if fill.nil?
|
277
|
+
# missing.each do |k|
|
278
|
+
# values = [fill] * field_length
|
279
|
+
# other_values = other[k]
|
280
|
+
# other_common_pos.zip(this_common_pos).each do |o,t|
|
281
|
+
# values[t] = other_values[o]
|
282
|
+
# end
|
283
|
+
# self[k] = values
|
284
|
+
# end
|
285
|
+
# when :flat
|
286
|
+
# fill = [] if fill.nil?
|
287
|
+
# missing.each do |k|
|
288
|
+
# self[k] = fill
|
289
|
+
# end
|
290
|
+
# end
|
291
|
+
# end
|
292
|
+
#
|
293
|
+
# self
|
294
|
+
# end
|
295
|
+
#
|
296
|
+
# def detach(file)
|
297
|
+
# file_fields = file.fields.collect{|field| field.fullname}
|
298
|
+
# detached_fields = []
|
299
|
+
# self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
|
300
|
+
# reorder :key, detached_fields
|
301
|
+
# end
|
302
|
+
#
|
303
|
+
#end
|