rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,299 +1,303 @@
1
- require 'rbbt/tsv'
2
- require 'rbbt/tsv/attach/util'
3
- module TSV
4
-
5
- # Merge columns from different rows of a file
6
- def self.merge_row_fields(input, output, options = {})
7
- options = Misc.add_defaults options, :sep => "\t"
8
- key_field, fields = Misc.process_options options, :key_field, :fields
9
- sep = options[:sep]
10
-
11
- is = case
12
- when (String === input and not input.index("\n") and input.length < 250 and File.exist?(input))
13
- CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
14
- when (String === input or StringIO === input)
15
- CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
16
- else
17
- input
18
- end
19
-
20
- if key_field.nil? or fields.nil?
21
- parser = TSV::Parser.new(is, options.dup)
22
- fields ||= parser.fields
23
- key_field ||= parser.key_field
24
- line = parser.first_line
25
- else
26
- line = is.gets
27
- end
28
-
29
- current_key = nil
30
- current_parts = []
31
-
32
- done = false
33
- Open.write(output) do |os|
34
- options.delete :sep if options[:sep] == "\t"
35
- header_lines = TSV.header_lines(key_field, fields, options)
36
- os.puts header_lines unless header_lines.empty?
37
-
38
- while line
39
- key, *parts = line.sub("\n",'').split(sep, -1)
40
- current_key ||= key
41
- case
42
- when key.nil?
43
- when current_key == key
44
- parts.each_with_index do |part,i|
45
- if current_parts[i].nil?
46
- current_parts[i] = part
47
- else
48
- current_parts[i] = current_parts[i] << "|" << part
49
- end
50
- end
51
- when current_key != key
52
- os.puts [current_key, current_parts].flatten * sep
53
- current_key = key
54
- current_parts = parts
55
- end
56
-
57
- line = is.gets
58
- end
59
-
60
- os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
61
-
62
- end
63
- end
64
-
65
- # Merge two files with the same keys and different fields
66
- def self.merge_different_fields(file1, file2, output, options = {})
67
- options = Misc.add_defaults options, :sep => "\t"
68
- monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
69
- sep = options[:sep] || "\t"
70
-
71
- case
72
- when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exist?(file1))
73
- size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
74
- file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
75
- when (String === file1 or StringIO === file1)
76
- size = file1.length if monitor
77
- file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
78
- when TSV === file1
79
- size = file1.size if monitor
80
- file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
81
- end
82
-
83
- case
84
- when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exist?(file2))
85
- file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
86
- when (String === file2 or StringIO === file2)
87
- file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
88
- when TSV === file2
89
- file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
90
- end
91
-
92
- begin
93
- output = File.open(output, 'w') if String === output
94
-
95
- cols1 = nil
96
- cols2 = nil
97
-
98
- done1 = false
99
- done2 = false
100
-
101
- key1 = key2 = nil
102
- while key1.nil?
103
- while (line1 = file1.gets) =~ /^#/
104
- key_field1, *fields1 = line1.chomp.sub('#','').split(sep)
105
- end
106
- key1, *parts1 = line1.sub("\n",'').split(sep, -1)
107
- cols1 = parts1.length
108
- end
109
-
110
- while key2.nil?
111
- while (line2 = file2.gets) =~ /^#/
112
- key_field2, *fields2 = line2.chomp.sub('#','').split(sep)
113
- end
114
- key2, *parts2 = line2.sub("\n",'').split(sep, -1)
115
- cols2 = parts2.length
116
- end
117
-
118
- #progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
119
- progress_monitor = Log::ProgressBar.new(size, :desc => "Merging fields") if monitor
120
-
121
- entry_hash = options
122
- entry_hash.delete :sep if entry_hash[:sep] == "\t"
123
- output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
124
-
125
- key = key1 < key2 ? key1 : key2
126
- parts = [""] * (cols1 + cols2)
127
- while not (done1 and done2)
128
- while (not done1 and key1 == key)
129
- parts1.each_with_index do |part, i|
130
- parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
131
- end
132
- key1 = nil
133
- while key1.nil? and not done1
134
- if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
135
- end
136
- progress_monitor.tick if monitor
137
- end
138
- while (not done2 and key2 == key)
139
- parts2.each_with_index do |part, i|
140
- i += cols1
141
- parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
142
- end
143
- key2 = nil
144
- while key2.nil? and not done2
145
- if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
146
- end
147
- end
148
-
149
- output.puts [key, parts].flatten * sep
150
- parts = [""] * (cols1 + cols2)
151
-
152
- case
153
- when done1
154
- key = key2
155
- when done2
156
- key = key1
157
- else
158
- key = key1 < key2 ? key1 : key2
159
- end
160
- end
161
-
162
- output.close
163
- file1.join if file1.respond_to? :join
164
- file2.join if file2.respond_to? :join
165
- rescue
166
- file1.abort if file1.respond_to? :abort
167
- file2.abort if file2.respond_to? :abort
168
- file1.join if file1.respond_to? :join
169
- file2.join if file2.respond_to? :join
170
- end
171
- end
172
-
173
- # Merge columns from different files
174
- def self.merge_paste(files, delim = "$")
175
- CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
176
- end
177
-
178
- def merge_different_fields(other, options = {})
179
- TmpFile.with_file do |output|
180
- TSV.merge_different_fields(self, other, output, options)
181
- tsv = TSV.open output, options
182
- tsv.key_field = self.key_field unless self.key_field.nil?
183
- tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
184
- tsv
185
- end
186
- end
187
-
188
- def merge_zip(other)
189
- other.each do |k,v|
190
- self.zip_new k, v
191
- end
192
- self
193
- end
194
-
195
-
196
- def attach(other, options = {})
197
- options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
198
- fields, one2one, complete = Misc.process_options options, :fields, :one2one, :complete
199
- in_namespace = options[:in_namespace]
200
-
201
- unless TSV === other
202
- other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
203
- other = TSV.open(other, :persist => options[:persist_input].to_s == "true")
204
- other.identifiers ||= other_identifier_file
205
- end
206
-
207
- fields = other.fields - [key_field].concat(self.fields) if other.fields and (fields.nil? or fields == :all)
208
- if in_namespace
209
- fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
210
- else
211
- fields = other.fields - [key_field].concat(self.fields) if fields.nil?
212
- end
213
-
214
- other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
215
- Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
216
-
217
- same_key = true
218
- begin
219
- case
220
- when (Misc.match_fields(key_field, other.key_field) and same_key)
221
- Log.debug "Attachment with same key: #{other.key_field}"
222
- attach_same_key other, fields
223
- when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
224
- Log.debug "Found other key field: #{other.key_field}"
225
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
226
- when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
227
- Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
228
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
229
- else
230
- index = TSV.find_traversal(self, other, options)
231
- raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
232
- Log.debug "Attachment with index: #{other.key_field}"
233
- attach_index other, index, fields
234
- end
235
- rescue Exception
236
- if same_key
237
- Log.warn "Could not translate identifiers with same_key"
238
- same_key = false
239
- retry
240
- else
241
- raise $!
242
- end
243
- end
244
- Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
245
-
246
- if complete
247
- Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
248
- fill = TrueClass === complete ? nil : complete
249
- field_length = self.fields.length
250
- common_fields = (other.fields & self.fields)
251
- other_common_pos = common_fields.collect{|f| other.fields.index f}
252
- this_common_pos = common_fields.collect{|f| self.fields.index f}
253
- missing = other.keys - self.keys
254
-
255
- other = other.to_list if other.type == :single
256
-
257
- case type
258
- when :single
259
- missing.each do |k|
260
- self[k] = fill
261
- end
262
- when :list
263
- missing.each do |k|
264
- values = [fill] * field_length
265
- other_values = other[k]
266
- other_common_pos.zip(this_common_pos).each do |o,t|
267
- values[t] = other_values[o]
268
- end
269
- self[k] = values
270
- end
271
- when :double
272
- fill = [] if fill.nil?
273
- missing.each do |k|
274
- values = [fill] * field_length
275
- other_values = other[k]
276
- other_common_pos.zip(this_common_pos).each do |o,t|
277
- values[t] = other_values[o]
278
- end
279
- self[k] = values
280
- end
281
- when :flat
282
- fill = [] if fill.nil?
283
- missing.each do |k|
284
- self[k] = fill
285
- end
286
- end
287
- end
288
-
289
- self
290
- end
291
-
292
- def detach(file)
293
- file_fields = file.fields.collect{|field| field.fullname}
294
- detached_fields = []
295
- self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
296
- reorder :key, detached_fields
297
- end
298
-
299
- end
1
+ require_relative '../refactor'
2
+ Rbbt.require_instead 'scout/tsv'
3
+ #require 'rbbt/tsv'
4
+ #require 'rbbt/tsv/attach/util'
5
+ #module TSV
6
+ #
7
+ # # Merge columns from different rows of a file
8
+ # def self.merge_row_fields(input, output, options = {})
9
+ # options = Misc.add_defaults options, :sep => "\t"
10
+ # key_field, fields = Misc.process_options options, :key_field, :fields
11
+ # sep = options[:sep]
12
+ #
13
+ # is = case
14
+ # when (String === input and not input.index("\n") and input.length < 250 and File.exist?(input))
15
+ # CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
16
+ # when (String === input or StringIO === input)
17
+ # CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
18
+ # else
19
+ # input
20
+ # end
21
+ #
22
+ # if key_field.nil? or fields.nil?
23
+ # parser = TSV::Parser.new(is, options.dup)
24
+ # fields ||= parser.fields
25
+ # key_field ||= parser.key_field
26
+ # line = parser.first_line
27
+ # else
28
+ # line = is.gets
29
+ # end
30
+ #
31
+ # current_key = nil
32
+ # current_parts = []
33
+ #
34
+ # done = false
35
+ # Open.write(output) do |os|
36
+ # options.delete :sep if options[:sep] == "\t"
37
+ # header_lines = TSV.header_lines(key_field, fields, options)
38
+ # os.puts header_lines unless header_lines.empty?
39
+ #
40
+ # while line
41
+ # key, *parts = line.sub("\n",'').split(sep, -1)
42
+ # current_key ||= key
43
+ # case
44
+ # when key.nil?
45
+ # when current_key == key
46
+ # parts.each_with_index do |part,i|
47
+ # if current_parts[i].nil?
48
+ # current_parts[i] = part
49
+ # else
50
+ # current_parts[i] = current_parts[i] << "|" << part
51
+ # end
52
+ # end
53
+ # when current_key != key
54
+ # os.puts [current_key, current_parts].flatten * sep
55
+ # current_key = key
56
+ # current_parts = parts
57
+ # end
58
+ #
59
+ # line = is.gets
60
+ # end
61
+ #
62
+ # os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
63
+ #
64
+ # end
65
+ # end
66
+ #
67
+ # # Merge two files with the same keys and different fields
68
+ # def self.merge_different_fields(file1, file2, output, options = {})
69
+ # options = IndiferentHash.add_defaults options, :sep => "\t"
70
+ # monitor, key_field, fields = IndiferentHash.process_options options, :monitor, :key_field, :fields
71
+ # sep = options[:sep] || "\t"
72
+ #
73
+ # case
74
+ # when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exist?(file1))
75
+ # size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
76
+ # file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
77
+ # when (String === file1 or StringIO === file1)
78
+ # size = file1.length if monitor
79
+ # file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
80
+ # when TSV === file1
81
+ # size = file1.size if monitor
82
+ # file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
83
+ # end
84
+ #
85
+ # case
86
+ # when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exist?(file2))
87
+ # file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
88
+ # when (String === file2 or StringIO === file2)
89
+ # file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
90
+ # when TSV === file2
91
+ # file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
92
+ # end
93
+ #
94
+ # begin
95
+ # output = File.open(output, 'w') if String === output
96
+ #
97
+ # cols1 = nil
98
+ # cols2 = nil
99
+ #
100
+ # done1 = false
101
+ # done2 = false
102
+ #
103
+ # key1 = key2 = nil
104
+ # while key1.nil?
105
+ # while (line1 = file1.gets) =~ /^#/
106
+ # key_field1, *fields1 = line1.chomp.sub('#','').split(sep)
107
+ # end
108
+ # key1, *parts1 = line1.sub("\n",'').split(sep, -1)
109
+ # cols1 = parts1.length
110
+ # end
111
+ #
112
+ # while key2.nil?
113
+ # while (line2 = file2.gets) =~ /^#/
114
+ # key_field2, *fields2 = line2.chomp.sub('#','').split(sep)
115
+ # end
116
+ # key2, *parts2 = line2.sub("\n",'').split(sep, -1)
117
+ # cols2 = parts2.length
118
+ # end
119
+ #
120
+ # #progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
121
+ # progress_monitor = Log::ProgressBar.new(size, :desc => "Merging fields") if monitor
122
+ #
123
+ # entry_hash = options
124
+ # entry_hash.delete :sep if entry_hash[:sep] == "\t"
125
+ # output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
126
+ #
127
+ # key = key1 < key2 ? key1 : key2
128
+ # parts = [""] * (cols1 + cols2)
129
+ # while not (done1 and done2)
130
+ # while (not done1 and key1 == key)
131
+ # parts1.each_with_index do |part, i|
132
+ # parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
133
+ # end
134
+ # key1 = nil
135
+ # while key1.nil? and not done1
136
+ # if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
137
+ # end
138
+ # progress_monitor.tick if monitor
139
+ # end
140
+ # while (not done2 and key2 == key)
141
+ # parts2.each_with_index do |part, i|
142
+ # i += cols1
143
+ # parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
144
+ # end
145
+ # key2 = nil
146
+ # while key2.nil? and not done2
147
+ # if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
148
+ # end
149
+ # end
150
+ #
151
+ # output.puts [key, parts].flatten * sep
152
+ # parts = [""] * (cols1 + cols2)
153
+ #
154
+ # case
155
+ # when done1
156
+ # key = key2
157
+ # when done2
158
+ # key = key1
159
+ # else
160
+ # key = key1 < key2 ? key1 : key2
161
+ # end
162
+ # end
163
+ #
164
+ # output.close
165
+ # file1.join if file1.respond_to? :join
166
+ # file2.join if file2.respond_to? :join
167
+ # rescue
168
+ # file1.abort if file1.respond_to? :abort
169
+ # file2.abort if file2.respond_to? :abort
170
+ # file1.join if file1.respond_to? :join
171
+ # file2.join if file2.respond_to? :join
172
+ # end
173
+ # end
174
+ #
175
+ # def merge_different_fields(other, options = {})
176
+ # TmpFile.with_file do |output|
177
+ # TSV.merge_different_fields(self, other, output, options)
178
+ # tsv = TSV.open output, options
179
+ # tsv.key_field = self.key_field unless self.key_field.nil?
180
+ # tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
181
+ # tsv
182
+ # end
183
+ # end
184
+ #
185
+ # # Merge columns from different files
186
+ # def self.merge_paste(files, delim = "$")
187
+ # CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
188
+ # end
189
+ #
190
+ # end
191
+ #
192
+ # def merge_zip(other)
193
+ # other.each do |k,v|
194
+ # self.zip_new k, v
195
+ # end
196
+ # self
197
+ # end
198
+ #
199
+ #
200
+ # def attach(other, options = {})
201
+ # options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
202
+ # fields, one2one, complete = Misc.process_options options, :fields, :one2one, :complete
203
+ # in_namespace = options[:in_namespace]
204
+ #
205
+ # unless TSV === other
206
+ # other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
207
+ # other = TSV.open(other, :persist => options[:persist_input].to_s == "true")
208
+ # other.identifiers ||= other_identifier_file
209
+ # end
210
+ #
211
+ # fields = other.fields - [key_field].concat(self.fields) if other.fields and (fields.nil? or fields == :all)
212
+ # if in_namespace
213
+ # fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
214
+ # else
215
+ # fields = other.fields - [key_field].concat(self.fields) if fields.nil?
216
+ # end
217
+ #
218
+ # other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
219
+ # Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
220
+ #
221
+ # same_key = true
222
+ # begin
223
+ # case
224
+ # when (Misc.match_fields(key_field, other.key_field) and same_key)
225
+ # Log.debug "Attachment with same key: #{other.key_field}"
226
+ # attach_same_key other, fields
227
+ # when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
228
+ # Log.debug "Found other key field: #{other.key_field}"
229
+ # attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
230
+ # when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
231
+ # Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
232
+ # attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
233
+ # else
234
+ # index = TSV.find_traversal(self, other, options)
235
+ # raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
236
+ # Log.debug "Attachment with index: #{other.key_field}"
237
+ # attach_index other, index, fields
238
+ # end
239
+ # rescue Exception
240
+ # if same_key
241
+ # Log.warn "Could not translate identifiers with same_key"
242
+ # same_key = false
243
+ # retry
244
+ # else
245
+ # raise $!
246
+ # end
247
+ # end
248
+ # Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
249
+ #
250
+ # if complete
251
+ # Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
252
+ # fill = TrueClass === complete ? nil : complete
253
+ # field_length = self.fields.length
254
+ # common_fields = (other.fields & self.fields)
255
+ # other_common_pos = common_fields.collect{|f| other.fields.index f}
256
+ # this_common_pos = common_fields.collect{|f| self.fields.index f}
257
+ # missing = other.keys - self.keys
258
+ #
259
+ # other = other.to_list if other.type == :single
260
+ #
261
+ # case type
262
+ # when :single
263
+ # missing.each do |k|
264
+ # self[k] = fill
265
+ # end
266
+ # when :list
267
+ # missing.each do |k|
268
+ # values = [fill] * field_length
269
+ # other_values = other[k]
270
+ # other_common_pos.zip(this_common_pos).each do |o,t|
271
+ # values[t] = other_values[o]
272
+ # end
273
+ # self[k] = values
274
+ # end
275
+ # when :double
276
+ # fill = [] if fill.nil?
277
+ # missing.each do |k|
278
+ # values = [fill] * field_length
279
+ # other_values = other[k]
280
+ # other_common_pos.zip(this_common_pos).each do |o,t|
281
+ # values[t] = other_values[o]
282
+ # end
283
+ # self[k] = values
284
+ # end
285
+ # when :flat
286
+ # fill = [] if fill.nil?
287
+ # missing.each do |k|
288
+ # self[k] = fill
289
+ # end
290
+ # end
291
+ # end
292
+ #
293
+ # self
294
+ # end
295
+ #
296
+ # def detach(file)
297
+ # file_fields = file.fields.collect{|field| field.fullname}
298
+ # detached_fields = []
299
+ # self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
300
+ # reorder :key, detached_fields
301
+ # end
302
+ #
303
+ #end