rbbt-util 5.44.1 → 6.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +67 -90
  3. data/etc/app.d/base.rb +2 -2
  4. data/etc/app.d/semaphores.rb +3 -3
  5. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  6. data/lib/rbbt/annotations/refactor.rb +27 -0
  7. data/lib/rbbt/annotations/util.rb +282 -282
  8. data/lib/rbbt/annotations.rb +343 -320
  9. data/lib/rbbt/association/database.rb +200 -225
  10. data/lib/rbbt/association/index.rb +294 -291
  11. data/lib/rbbt/association/item.rb +227 -227
  12. data/lib/rbbt/association/open.rb +35 -34
  13. data/lib/rbbt/association/util.rb +0 -169
  14. data/lib/rbbt/association.rb +2 -4
  15. data/lib/rbbt/entity/identifiers.rb +119 -118
  16. data/lib/rbbt/entity/refactor.rb +12 -0
  17. data/lib/rbbt/entity.rb +319 -315
  18. data/lib/rbbt/hpc/batch.rb +72 -53
  19. data/lib/rbbt/hpc/lsf.rb +2 -2
  20. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  21. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  22. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  24. data/lib/rbbt/hpc/slurm.rb +18 -18
  25. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  26. data/lib/rbbt/knowledge_base/query.rb +2 -2
  27. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  28. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  29. data/lib/rbbt/knowledge_base.rb +1 -1
  30. data/lib/rbbt/monitor.rb +36 -25
  31. data/lib/rbbt/persist/refactor.rb +166 -0
  32. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  33. data/lib/rbbt/persist/tsv.rb +187 -185
  34. data/lib/rbbt/persist.rb +556 -551
  35. data/lib/rbbt/refactor.rb +20 -0
  36. data/lib/rbbt/resource/path/refactor.rb +178 -0
  37. data/lib/rbbt/resource/path.rb +317 -497
  38. data/lib/rbbt/resource/util.rb +0 -48
  39. data/lib/rbbt/resource.rb +3 -390
  40. data/lib/rbbt/tsv/accessor.rb +2 -838
  41. data/lib/rbbt/tsv/attach.rb +303 -299
  42. data/lib/rbbt/tsv/change_id.rb +244 -245
  43. data/lib/rbbt/tsv/csv.rb +87 -85
  44. data/lib/rbbt/tsv/dumper.rb +2 -100
  45. data/lib/rbbt/tsv/excel.rb +26 -24
  46. data/lib/rbbt/tsv/field_index.rb +4 -1
  47. data/lib/rbbt/tsv/filter.rb +3 -2
  48. data/lib/rbbt/tsv/index.rb +2 -284
  49. data/lib/rbbt/tsv/manipulate.rb +750 -747
  50. data/lib/rbbt/tsv/marshal.rb +3 -3
  51. data/lib/rbbt/tsv/matrix.rb +2 -2
  52. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  53. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  54. data/lib/rbbt/tsv/parser.rb +678 -678
  55. data/lib/rbbt/tsv/refactor.rb +195 -0
  56. data/lib/rbbt/tsv/stream.rb +253 -251
  57. data/lib/rbbt/tsv/util.rb +420 -420
  58. data/lib/rbbt/tsv.rb +210 -208
  59. data/lib/rbbt/util/R/eval.rb +4 -4
  60. data/lib/rbbt/util/R/plot.rb +62 -166
  61. data/lib/rbbt/util/R.rb +21 -18
  62. data/lib/rbbt/util/cmd.rb +2 -318
  63. data/lib/rbbt/util/color.rb +269 -269
  64. data/lib/rbbt/util/colorize.rb +89 -89
  65. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  66. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  67. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  68. data/lib/rbbt/util/config.rb +169 -167
  69. data/lib/rbbt/util/iruby.rb +20 -0
  70. data/lib/rbbt/util/log/progress/report.rb +241 -241
  71. data/lib/rbbt/util/log/progress/util.rb +99 -99
  72. data/lib/rbbt/util/log/progress.rb +102 -102
  73. data/lib/rbbt/util/log/refactor.rb +49 -0
  74. data/lib/rbbt/util/log.rb +486 -532
  75. data/lib/rbbt/util/migrate.rb +1 -1
  76. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  77. data/lib/rbbt/util/misc/development.rb +12 -11
  78. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  79. data/lib/rbbt/util/misc/format.rb +2 -230
  80. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  81. data/lib/rbbt/util/misc/inspect.rb +2 -476
  82. data/lib/rbbt/util/misc/lock.rb +109 -106
  83. data/lib/rbbt/util/misc/omics.rb +9 -1
  84. data/lib/rbbt/util/misc/pipes.rb +765 -793
  85. data/lib/rbbt/util/misc/refactor.rb +20 -0
  86. data/lib/rbbt/util/misc/ssw.rb +27 -17
  87. data/lib/rbbt/util/misc/system.rb +0 -15
  88. data/lib/rbbt/util/misc.rb +39 -20
  89. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  90. data/lib/rbbt/util/named_array.rb +3 -220
  91. data/lib/rbbt/util/open/refactor.rb +7 -0
  92. data/lib/rbbt/util/open.rb +3 -857
  93. data/lib/rbbt/util/procpath.rb +6 -6
  94. data/lib/rbbt/util/python/paths.rb +27 -0
  95. data/lib/rbbt/util/python/run.rb +115 -0
  96. data/lib/rbbt/util/python/script.rb +110 -0
  97. data/lib/rbbt/util/python/util.rb +3 -3
  98. data/lib/rbbt/util/python.rb +22 -81
  99. data/lib/rbbt/util/semaphore.rb +152 -148
  100. data/lib/rbbt/util/simpleopt.rb +9 -8
  101. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  102. data/lib/rbbt/util/ssh.rb +122 -118
  103. data/lib/rbbt/util/tar.rb +117 -115
  104. data/lib/rbbt/util/tmpfile.rb +69 -67
  105. data/lib/rbbt/util/version.rb +2 -0
  106. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  107. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  108. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  109. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  110. data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
  111. data/lib/rbbt/workflow/refactor.rb +153 -0
  112. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  113. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  114. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  115. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  116. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  117. data/lib/rbbt/workflow/step/run.rb +766 -766
  118. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  119. data/lib/rbbt/workflow/step.rb +2 -362
  120. data/lib/rbbt/workflow/task.rb +118 -118
  121. data/lib/rbbt/workflow/usage.rb +289 -287
  122. data/lib/rbbt/workflow/util/archive.rb +6 -5
  123. data/lib/rbbt/workflow/util/data.rb +1 -1
  124. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  125. data/lib/rbbt/workflow/util/trace.rb +79 -44
  126. data/lib/rbbt/workflow.rb +4 -882
  127. data/lib/rbbt-util.rb +21 -13
  128. data/lib/rbbt.rb +16 -3
  129. data/python/rbbt/__init__.py +19 -1
  130. data/share/Rlib/plot.R +37 -37
  131. data/share/Rlib/svg.R +22 -5
  132. data/share/install/software/lib/install_helpers +1 -1
  133. data/share/rbbt_commands/hpc/list +2 -3
  134. data/share/rbbt_commands/hpc/orchestrate +4 -4
  135. data/share/rbbt_commands/hpc/tail +2 -0
  136. data/share/rbbt_commands/hpc/task +10 -7
  137. data/share/rbbt_commands/lsf/list +2 -3
  138. data/share/rbbt_commands/lsf/orchestrate +4 -4
  139. data/share/rbbt_commands/lsf/tail +2 -0
  140. data/share/rbbt_commands/lsf/task +10 -7
  141. data/share/rbbt_commands/migrate +1 -1
  142. data/share/rbbt_commands/pbs/list +2 -3
  143. data/share/rbbt_commands/pbs/orchestrate +4 -4
  144. data/share/rbbt_commands/pbs/tail +2 -0
  145. data/share/rbbt_commands/pbs/task +10 -7
  146. data/share/rbbt_commands/resource/produce +8 -1
  147. data/share/rbbt_commands/slurm/list +2 -3
  148. data/share/rbbt_commands/slurm/orchestrate +4 -4
  149. data/share/rbbt_commands/slurm/tail +2 -0
  150. data/share/rbbt_commands/slurm/task +10 -7
  151. data/share/rbbt_commands/system/clean +5 -5
  152. data/share/rbbt_commands/system/status +5 -5
  153. data/share/rbbt_commands/tsv/get +2 -3
  154. data/share/rbbt_commands/tsv/info +10 -13
  155. data/share/rbbt_commands/tsv/keys +18 -14
  156. data/share/rbbt_commands/tsv/slice +2 -2
  157. data/share/rbbt_commands/tsv/transpose +6 -2
  158. data/share/rbbt_commands/workflow/info +20 -24
  159. data/share/rbbt_commands/workflow/list +1 -1
  160. data/share/rbbt_commands/workflow/prov +20 -13
  161. data/share/rbbt_commands/workflow/server +11 -1
  162. data/share/rbbt_commands/workflow/task +76 -71
  163. data/share/rbbt_commands/workflow/write_info +26 -9
  164. data/share/software/opt/ssw/ssw.c +861 -0
  165. data/share/software/opt/ssw/ssw.h +130 -0
  166. data/share/workflow_config.ru +3 -3
  167. metadata +40 -2
@@ -1,299 +1,303 @@
1
- require 'rbbt/tsv'
2
- require 'rbbt/tsv/attach/util'
3
- module TSV
4
-
5
- # Merge columns from different rows of a file
6
- def self.merge_row_fields(input, output, options = {})
7
- options = Misc.add_defaults options, :sep => "\t"
8
- key_field, fields = Misc.process_options options, :key_field, :fields
9
- sep = options[:sep]
10
-
11
- is = case
12
- when (String === input and not input.index("\n") and input.length < 250 and File.exist?(input))
13
- CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
14
- when (String === input or StringIO === input)
15
- CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
16
- else
17
- input
18
- end
19
-
20
- if key_field.nil? or fields.nil?
21
- parser = TSV::Parser.new(is, options.dup)
22
- fields ||= parser.fields
23
- key_field ||= parser.key_field
24
- line = parser.first_line
25
- else
26
- line = is.gets
27
- end
28
-
29
- current_key = nil
30
- current_parts = []
31
-
32
- done = false
33
- Open.write(output) do |os|
34
- options.delete :sep if options[:sep] == "\t"
35
- header_lines = TSV.header_lines(key_field, fields, options)
36
- os.puts header_lines unless header_lines.empty?
37
-
38
- while line
39
- key, *parts = line.sub("\n",'').split(sep, -1)
40
- current_key ||= key
41
- case
42
- when key.nil?
43
- when current_key == key
44
- parts.each_with_index do |part,i|
45
- if current_parts[i].nil?
46
- current_parts[i] = part
47
- else
48
- current_parts[i] = current_parts[i] << "|" << part
49
- end
50
- end
51
- when current_key != key
52
- os.puts [current_key, current_parts].flatten * sep
53
- current_key = key
54
- current_parts = parts
55
- end
56
-
57
- line = is.gets
58
- end
59
-
60
- os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
61
-
62
- end
63
- end
64
-
65
- # Merge two files with the same keys and different fields
66
- def self.merge_different_fields(file1, file2, output, options = {})
67
- options = Misc.add_defaults options, :sep => "\t"
68
- monitor, key_field, fields = Misc.process_options options, :monitor, :key_field, :fields
69
- sep = options[:sep] || "\t"
70
-
71
- case
72
- when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exist?(file1))
73
- size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
74
- file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
75
- when (String === file1 or StringIO === file1)
76
- size = file1.length if monitor
77
- file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
78
- when TSV === file1
79
- size = file1.size if monitor
80
- file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
81
- end
82
-
83
- case
84
- when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exist?(file2))
85
- file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
86
- when (String === file2 or StringIO === file2)
87
- file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
88
- when TSV === file2
89
- file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
90
- end
91
-
92
- begin
93
- output = File.open(output, 'w') if String === output
94
-
95
- cols1 = nil
96
- cols2 = nil
97
-
98
- done1 = false
99
- done2 = false
100
-
101
- key1 = key2 = nil
102
- while key1.nil?
103
- while (line1 = file1.gets) =~ /^#/
104
- key_field1, *fields1 = line1.chomp.sub('#','').split(sep)
105
- end
106
- key1, *parts1 = line1.sub("\n",'').split(sep, -1)
107
- cols1 = parts1.length
108
- end
109
-
110
- while key2.nil?
111
- while (line2 = file2.gets) =~ /^#/
112
- key_field2, *fields2 = line2.chomp.sub('#','').split(sep)
113
- end
114
- key2, *parts2 = line2.sub("\n",'').split(sep, -1)
115
- cols2 = parts2.length
116
- end
117
-
118
- #progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
119
- progress_monitor = Log::ProgressBar.new(size, :desc => "Merging fields") if monitor
120
-
121
- entry_hash = options
122
- entry_hash.delete :sep if entry_hash[:sep] == "\t"
123
- output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
124
-
125
- key = key1 < key2 ? key1 : key2
126
- parts = [""] * (cols1 + cols2)
127
- while not (done1 and done2)
128
- while (not done1 and key1 == key)
129
- parts1.each_with_index do |part, i|
130
- parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
131
- end
132
- key1 = nil
133
- while key1.nil? and not done1
134
- if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
135
- end
136
- progress_monitor.tick if monitor
137
- end
138
- while (not done2 and key2 == key)
139
- parts2.each_with_index do |part, i|
140
- i += cols1
141
- parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
142
- end
143
- key2 = nil
144
- while key2.nil? and not done2
145
- if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
146
- end
147
- end
148
-
149
- output.puts [key, parts].flatten * sep
150
- parts = [""] * (cols1 + cols2)
151
-
152
- case
153
- when done1
154
- key = key2
155
- when done2
156
- key = key1
157
- else
158
- key = key1 < key2 ? key1 : key2
159
- end
160
- end
161
-
162
- output.close
163
- file1.join if file1.respond_to? :join
164
- file2.join if file2.respond_to? :join
165
- rescue
166
- file1.abort if file1.respond_to? :abort
167
- file2.abort if file2.respond_to? :abort
168
- file1.join if file1.respond_to? :join
169
- file2.join if file2.respond_to? :join
170
- end
171
- end
172
-
173
- # Merge columns from different files
174
- def self.merge_paste(files, delim = "$")
175
- CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
176
- end
177
-
178
- def merge_different_fields(other, options = {})
179
- TmpFile.with_file do |output|
180
- TSV.merge_different_fields(self, other, output, options)
181
- tsv = TSV.open output, options
182
- tsv.key_field = self.key_field unless self.key_field.nil?
183
- tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
184
- tsv
185
- end
186
- end
187
-
188
- def merge_zip(other)
189
- other.each do |k,v|
190
- self.zip_new k, v
191
- end
192
- self
193
- end
194
-
195
-
196
- def attach(other, options = {})
197
- options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
198
- fields, one2one, complete = Misc.process_options options, :fields, :one2one, :complete
199
- in_namespace = options[:in_namespace]
200
-
201
- unless TSV === other
202
- other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
203
- other = TSV.open(other, :persist => options[:persist_input].to_s == "true")
204
- other.identifiers ||= other_identifier_file
205
- end
206
-
207
- fields = other.fields - [key_field].concat(self.fields) if other.fields and (fields.nil? or fields == :all)
208
- if in_namespace
209
- fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
210
- else
211
- fields = other.fields - [key_field].concat(self.fields) if fields.nil?
212
- end
213
-
214
- other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
215
- Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
216
-
217
- same_key = true
218
- begin
219
- case
220
- when (Misc.match_fields(key_field, other.key_field) and same_key)
221
- Log.debug "Attachment with same key: #{other.key_field}"
222
- attach_same_key other, fields
223
- when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
224
- Log.debug "Found other key field: #{other.key_field}"
225
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
226
- when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
227
- Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
228
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
229
- else
230
- index = TSV.find_traversal(self, other, options)
231
- raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
232
- Log.debug "Attachment with index: #{other.key_field}"
233
- attach_index other, index, fields
234
- end
235
- rescue Exception
236
- if same_key
237
- Log.warn "Could not translate identifiers with same_key"
238
- same_key = false
239
- retry
240
- else
241
- raise $!
242
- end
243
- end
244
- Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
245
-
246
- if complete
247
- Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
248
- fill = TrueClass === complete ? nil : complete
249
- field_length = self.fields.length
250
- common_fields = (other.fields & self.fields)
251
- other_common_pos = common_fields.collect{|f| other.fields.index f}
252
- this_common_pos = common_fields.collect{|f| self.fields.index f}
253
- missing = other.keys - self.keys
254
-
255
- other = other.to_list if other.type == :single
256
-
257
- case type
258
- when :single
259
- missing.each do |k|
260
- self[k] = fill
261
- end
262
- when :list
263
- missing.each do |k|
264
- values = [fill] * field_length
265
- other_values = other[k]
266
- other_common_pos.zip(this_common_pos).each do |o,t|
267
- values[t] = other_values[o]
268
- end
269
- self[k] = values
270
- end
271
- when :double
272
- fill = [] if fill.nil?
273
- missing.each do |k|
274
- values = [fill] * field_length
275
- other_values = other[k]
276
- other_common_pos.zip(this_common_pos).each do |o,t|
277
- values[t] = other_values[o]
278
- end
279
- self[k] = values
280
- end
281
- when :flat
282
- fill = [] if fill.nil?
283
- missing.each do |k|
284
- self[k] = fill
285
- end
286
- end
287
- end
288
-
289
- self
290
- end
291
-
292
- def detach(file)
293
- file_fields = file.fields.collect{|field| field.fullname}
294
- detached_fields = []
295
- self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
296
- reorder :key, detached_fields
297
- end
298
-
299
- end
1
+ require_relative '../refactor'
2
+ Rbbt.require_instead 'scout/tsv'
3
+ #require 'rbbt/tsv'
4
+ #require 'rbbt/tsv/attach/util'
5
+ #module TSV
6
+ #
7
+ # # Merge columns from different rows of a file
8
+ # def self.merge_row_fields(input, output, options = {})
9
+ # options = Misc.add_defaults options, :sep => "\t"
10
+ # key_field, fields = Misc.process_options options, :key_field, :fields
11
+ # sep = options[:sep]
12
+ #
13
+ # is = case
14
+ # when (String === input and not input.index("\n") and input.length < 250 and File.exist?(input))
15
+ # CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ input } | grep -v '^#{sep}' ", :pipe => true)
16
+ # when (String === input or StringIO === input)
17
+ # CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => input, :pipe => true)
18
+ # else
19
+ # input
20
+ # end
21
+ #
22
+ # if key_field.nil? or fields.nil?
23
+ # parser = TSV::Parser.new(is, options.dup)
24
+ # fields ||= parser.fields
25
+ # key_field ||= parser.key_field
26
+ # line = parser.first_line
27
+ # else
28
+ # line = is.gets
29
+ # end
30
+ #
31
+ # current_key = nil
32
+ # current_parts = []
33
+ #
34
+ # done = false
35
+ # Open.write(output) do |os|
36
+ # options.delete :sep if options[:sep] == "\t"
37
+ # header_lines = TSV.header_lines(key_field, fields, options)
38
+ # os.puts header_lines unless header_lines.empty?
39
+ #
40
+ # while line
41
+ # key, *parts = line.sub("\n",'').split(sep, -1)
42
+ # current_key ||= key
43
+ # case
44
+ # when key.nil?
45
+ # when current_key == key
46
+ # parts.each_with_index do |part,i|
47
+ # if current_parts[i].nil?
48
+ # current_parts[i] = part
49
+ # else
50
+ # current_parts[i] = current_parts[i] << "|" << part
51
+ # end
52
+ # end
53
+ # when current_key != key
54
+ # os.puts [current_key, current_parts].flatten * sep
55
+ # current_key = key
56
+ # current_parts = parts
57
+ # end
58
+ #
59
+ # line = is.gets
60
+ # end
61
+ #
62
+ # os.puts [current_key, current_parts].flatten * sep unless current_key.nil?
63
+ #
64
+ # end
65
+ # end
66
+ #
67
+ # # Merge two files with the same keys and different fields
68
+ # def self.merge_different_fields(file1, file2, output, options = {})
69
+ # options = IndiferentHash.add_defaults options, :sep => "\t"
70
+ # monitor, key_field, fields = IndiferentHash.process_options options, :monitor, :key_field, :fields
71
+ # sep = options[:sep] || "\t"
72
+ #
73
+ # case
74
+ # when (String === file1 and not file1 =~ /\n/ and file1.length < 250 and File.exist?(file1))
75
+ # size = CMD.cmd("wc -c '#{file1}'").read.to_f if monitor
76
+ # file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file1 } | grep -v '^#{sep}' ", :pipe => true)
77
+ # when (String === file1 or StringIO === file1)
78
+ # size = file1.length if monitor
79
+ # file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1, :pipe => true)
80
+ # when TSV === file1
81
+ # size = file1.size if monitor
82
+ # file1 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file1.to_s(:sort, true), :pipe => true)
83
+ # end
84
+ #
85
+ # case
86
+ # when (String === file2 and not file2 =~ /\n/ and file2.length < 250 and File.exist?(file2))
87
+ # file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' #{ file2 } | grep -v '^#{sep}' ", :pipe => true)
88
+ # when (String === file2 or StringIO === file2)
89
+ # file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2, :pipe => true)
90
+ # when TSV === file2
91
+ # file2 = CMD.cmd("env LC_ALL=C sort -k1,1 -t'#{sep}' | grep -v '^#{sep}'", :in => file2.to_s(:sort, true), :pipe => true)
92
+ # end
93
+ #
94
+ # begin
95
+ # output = File.open(output, 'w') if String === output
96
+ #
97
+ # cols1 = nil
98
+ # cols2 = nil
99
+ #
100
+ # done1 = false
101
+ # done2 = false
102
+ #
103
+ # key1 = key2 = nil
104
+ # while key1.nil?
105
+ # while (line1 = file1.gets) =~ /^#/
106
+ # key_field1, *fields1 = line1.chomp.sub('#','').split(sep)
107
+ # end
108
+ # key1, *parts1 = line1.sub("\n",'').split(sep, -1)
109
+ # cols1 = parts1.length
110
+ # end
111
+ #
112
+ # while key2.nil?
113
+ # while (line2 = file2.gets) =~ /^#/
114
+ # key_field2, *fields2 = line2.chomp.sub('#','').split(sep)
115
+ # end
116
+ # key2, *parts2 = line2.sub("\n",'').split(sep, -1)
117
+ # cols2 = parts2.length
118
+ # end
119
+ #
120
+ # #progress_monitor = Progress::Bar.new(size, 0, 100, "Merging fields") if monitor
121
+ # progress_monitor = Log::ProgressBar.new(size, :desc => "Merging fields") if monitor
122
+ #
123
+ # entry_hash = options
124
+ # entry_hash.delete :sep if entry_hash[:sep] == "\t"
125
+ # output.puts TSV.header_lines key_field1, fields1 + fields2, entry_hash if key_field1 and fields1 and fields2
126
+ #
127
+ # key = key1 < key2 ? key1 : key2
128
+ # parts = [""] * (cols1 + cols2)
129
+ # while not (done1 and done2)
130
+ # while (not done1 and key1 == key)
131
+ # parts1.each_with_index do |part, i|
132
+ # parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
133
+ # end
134
+ # key1 = nil
135
+ # while key1.nil? and not done1
136
+ # if file1.eof?; done1 = true; else key1, *parts1 = file1.gets.sub("\n",'').split(sep, -1) end
137
+ # end
138
+ # progress_monitor.tick if monitor
139
+ # end
140
+ # while (not done2 and key2 == key)
141
+ # parts2.each_with_index do |part, i|
142
+ # i += cols1
143
+ # parts[i] = (parts[i].nil? or parts[i].empty?) ? part : parts[i] << "|" << part
144
+ # end
145
+ # key2 = nil
146
+ # while key2.nil? and not done2
147
+ # if file2.eof?; done2 = true; else key2, *parts2 = file2.gets.sub("\n",'').split(sep, -1) end
148
+ # end
149
+ # end
150
+ #
151
+ # output.puts [key, parts].flatten * sep
152
+ # parts = [""] * (cols1 + cols2)
153
+ #
154
+ # case
155
+ # when done1
156
+ # key = key2
157
+ # when done2
158
+ # key = key1
159
+ # else
160
+ # key = key1 < key2 ? key1 : key2
161
+ # end
162
+ # end
163
+ #
164
+ # output.close
165
+ # file1.join if file1.respond_to? :join
166
+ # file2.join if file2.respond_to? :join
167
+ # rescue
168
+ # file1.abort if file1.respond_to? :abort
169
+ # file2.abort if file2.respond_to? :abort
170
+ # file1.join if file1.respond_to? :join
171
+ # file2.join if file2.respond_to? :join
172
+ # end
173
+ # end
174
+ #
175
+ # def merge_different_fields(other, options = {})
176
+ # TmpFile.with_file do |output|
177
+ # TSV.merge_different_fields(self, other, output, options)
178
+ # tsv = TSV.open output, options
179
+ # tsv.key_field = self.key_field unless self.key_field.nil?
180
+ # tsv.fields = self.fields + other.fields unless self.fields.nil? or other.fields.nil?
181
+ # tsv
182
+ # end
183
+ # end
184
+ #
185
+ # # Merge columns from different files
186
+ # def self.merge_paste(files, delim = "$")
187
+ # CMD.cmd("paste #{ files.collect{|f| "'#{f}'"} * " "} -d'#{delim}' |sed 's/#{delim}[^\\t]*//g'", :pipe => true)
188
+ # end
189
+ #
190
+ # end
191
+ #
192
+ # def merge_zip(other)
193
+ # other.each do |k,v|
194
+ # self.zip_new k, v
195
+ # end
196
+ # self
197
+ # end
198
+ #
199
+ #
200
+ # def attach(other, options = {})
201
+ # options = Misc.add_defaults options, :in_namespace => false, :persist_input => false
202
+ # fields, one2one, complete = Misc.process_options options, :fields, :one2one, :complete
203
+ # in_namespace = options[:in_namespace]
204
+ #
205
+ # unless TSV === other
206
+ # other_identifier_file = other.identifier_files.first if other.respond_to? :identifier_files
207
+ # other = TSV.open(other, :persist => options[:persist_input].to_s == "true")
208
+ # other.identifiers ||= other_identifier_file
209
+ # end
210
+ #
211
+ # fields = other.fields - [key_field].concat(self.fields) if other.fields and (fields.nil? or fields == :all)
212
+ # if in_namespace
213
+ # fields = other.fields_in_namespace - [key_field].concat(self.fields) if fields.nil?
214
+ # else
215
+ # fields = other.fields - [key_field].concat(self.fields) if fields.nil?
216
+ # end
217
+ #
218
+ # other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
219
+ # Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
220
+ #
221
+ # same_key = true
222
+ # begin
223
+ # case
224
+ # when (Misc.match_fields(key_field, other.key_field) and same_key)
225
+ # Log.debug "Attachment with same key: #{other.key_field}"
226
+ # attach_same_key other, fields
227
+ # when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
228
+ # Log.debug "Found other key field: #{other.key_field}"
229
+ # attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
230
+ # when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
231
+ # Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
232
+ # attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
233
+ # else
234
+ # index = TSV.find_traversal(self, other, options)
235
+ # raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
236
+ # Log.debug "Attachment with index: #{other.key_field}"
237
+ # attach_index other, index, fields
238
+ # end
239
+ # rescue Exception
240
+ # if same_key
241
+ # Log.warn "Could not translate identifiers with same_key"
242
+ # same_key = false
243
+ # retry
244
+ # else
245
+ # raise $!
246
+ # end
247
+ # end
248
+ # Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
249
+ #
250
+ # if complete
251
+ # Log.warn "Attaching through index and completing empty rows; keys with wrong format may appear (#{other.key_field} insted of #{self.key_field})" if index
252
+ # fill = TrueClass === complete ? nil : complete
253
+ # field_length = self.fields.length
254
+ # common_fields = (other.fields & self.fields)
255
+ # other_common_pos = common_fields.collect{|f| other.fields.index f}
256
+ # this_common_pos = common_fields.collect{|f| self.fields.index f}
257
+ # missing = other.keys - self.keys
258
+ #
259
+ # other = other.to_list if other.type == :single
260
+ #
261
+ # case type
262
+ # when :single
263
+ # missing.each do |k|
264
+ # self[k] = fill
265
+ # end
266
+ # when :list
267
+ # missing.each do |k|
268
+ # values = [fill] * field_length
269
+ # other_values = other[k]
270
+ # other_common_pos.zip(this_common_pos).each do |o,t|
271
+ # values[t] = other_values[o]
272
+ # end
273
+ # self[k] = values
274
+ # end
275
+ # when :double
276
+ # fill = [] if fill.nil?
277
+ # missing.each do |k|
278
+ # values = [fill] * field_length
279
+ # other_values = other[k]
280
+ # other_common_pos.zip(this_common_pos).each do |o,t|
281
+ # values[t] = other_values[o]
282
+ # end
283
+ # self[k] = values
284
+ # end
285
+ # when :flat
286
+ # fill = [] if fill.nil?
287
+ # missing.each do |k|
288
+ # self[k] = fill
289
+ # end
290
+ # end
291
+ # end
292
+ #
293
+ # self
294
+ # end
295
+ #
296
+ # def detach(file)
297
+ # file_fields = file.fields.collect{|field| field.fullname}
298
+ # detached_fields = []
299
+ # self.fields.each_with_index{|field,i| detached_fields << i if file_fields.include? field.fullname}
300
+ # reorder :key, detached_fields
301
+ # end
302
+ #
303
+ #end