rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,245 +1,244 @@
1
- require 'rbbt/tsv'
2
- require 'rbbt/persist'
3
-
4
-
5
- module TSV
6
- def self.change_key(tsv, format, options = {}, &block)
7
- options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers
8
-
9
- identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
10
-
11
- identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? && tsv.namespace &&
12
- defined?(Organism) && Organism.identifiers(tsv.namespace).exists?
13
-
14
- if ! tsv.fields.include?(format)
15
- new = {}
16
- tsv.each do |k,v|
17
- if v === String or v === Array
18
- new[k] = v.dup
19
- else
20
- new[k] = v
21
- end
22
- end
23
- orig_fields = tsv.fields
24
- tsv = tsv.annotate new
25
- new.fields = new.fields.collect{|f| "TMP-" << f }
26
-
27
- orig_type = tsv.type
28
- tsv = tsv.to_double if orig_type != :double
29
-
30
- if Array === identifiers
31
- tsv = tsv.attach identifiers.first, :fields => [format], :persist_input => true, :identifiers => identifiers.last
32
- else
33
- tsv = tsv.attach identifiers, :fields => [format], :persist_input => true
34
- end
35
-
36
-
37
- tsv = tsv.reorder(format, tsv.fields[0..-2])
38
-
39
- tsv = tsv.to_flat if orig_type == :flat
40
-
41
- tsv = tsv.to_list(&block) if orig_type == :list
42
-
43
- tsv.fields = orig_fields
44
-
45
- tsv
46
- else
47
- tsv.reorder(format)
48
- end
49
- end
50
-
51
- def change_key(format, options = {}, &block)
52
- options = Misc.add_defaults options, :identifiers => self.identifiers
53
- TSV.change_key(self, format, options, &block)
54
- end
55
-
56
- def self.swap_id(tsv, field, format, options = {}, &block)
57
- options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers, :compact => true
58
-
59
- identifiers, persist_input, compact = Misc.process_options options, :identifiers, :persist, :compact
60
- identifiers = tsv.identifier_files.first if identifiers.nil?
61
-
62
- identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? && tsv.namespace &&
63
- defined?(Organism) && Organism.identifiers(tsv.namespace).exists?
64
-
65
- identifiers.namespace ||= tsv.namespace
66
-
67
- fields = (identifiers and identifiers.all_fields.include?(field))? [field] : nil
68
- #index = identifiers.index :target => format, :fields => fields, :persist => persist_input, :order => true
69
-
70
- grep = Organism.blacklist_genes(tsv.namespace).list if defined?(Organism) && identifiers.namespace && Organism.blacklist_genes(tsv.namespace).exists?
71
- if fields.nil?
72
- index = identifiers.index(:data_tsv_grep => grep, :data_invert_grep => true, :target => format, :persist => true, :order => true, :unnamed => true, :data_persist => true)
73
- else
74
- index = identifiers.index(:data_tsv_grep => grep, :data_invert_grep => true, :target => format, :fields => fields, :order => true, :unnamed => true, :persist => true, :data_persist => true)
75
- end
76
-
77
- orig_type = tsv.type
78
- tsv = tsv.to_double if orig_type != :double
79
-
80
- pos = tsv.fields.index field
81
- tsv.with_unnamed do
82
- if tsv.type == :list or tsv.type == :single
83
- tsv.through do |k,v|
84
- v[pos] = index[v[pos]]
85
- tsv[k] = v
86
- end
87
- else
88
- tsv.through do |k,v|
89
- _values = index.values_at(*v[pos])
90
- _values.compact! if compact
91
- v[pos] = _values
92
- tsv[k] = v
93
- end
94
- end
95
-
96
- tsv.fields = tsv.fields.collect{|f| f == field ? format : f}
97
- end
98
-
99
- tsv = tsv.to_flat if orig_type == :flat
100
-
101
- tsv = tsv.to_list(&block) if orig_type == :list
102
-
103
- tsv
104
- end
105
-
106
- def swap_id(*args)
107
- TSV.swap_id(self, *args)
108
- end
109
-
110
- def self.translation_index(files, target = nil, source = nil, options = {})
111
- return nil if source == target
112
- options = Misc.add_defaults options.dup, :persist => true
113
-
114
- target = Entity.formats.find(target) if Entity.formats.find(target)
115
- source = Entity.formats.find(source) if Entity.formats.find(source)
116
- fields = (source and not source.empty?) ? [source] : nil
117
-
118
- files.each do |file|
119
- if TSV === file
120
- all_fields = file.all_fields
121
- target = file.fields.first if target.nil?
122
- if (source.nil? or all_fields.include? source) and all_fields.include? target
123
- return file.index(options.merge(:target => target, :fields => fields, :order => true))
124
- end
125
- else
126
- next unless file.exists?
127
- begin
128
- all_fields = TSV.parse_header(file).all_fields
129
- target = all_fields[1] if target.nil?
130
- if (source.nil? or all_fields.include? source) and all_fields.include? target
131
- index = TSV.index(file, options.merge(:target => target, :fields => fields, :order => true))
132
- return index
133
- end
134
- rescue Exception
135
- Log.exception $!
136
- Log.error "Exception reading identifier file: #{file.find}"
137
- end
138
- end
139
- end
140
-
141
- files.each do |file|
142
- all_fields = TSV === file ? file.all_fields : TSV.parse_header(file).all_fields
143
-
144
- files.each do |other_file|
145
- next if file == other_file
146
-
147
- other_all_fields = TSV === other_file ? other_file.all_fields : TSV.parse_header(other_file).all_fields
148
-
149
- common_field = (all_fields & other_all_fields).first
150
-
151
- if common_field and (source.nil? or source.empty? or all_fields.include? source) and other_all_fields.include? target
152
-
153
- index = Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
154
-
155
- index = TSV === file ?
156
- file.index(options.merge(:target => common_field, :fields => fields)) :
157
- TSV.index(file, options.merge(:target => common_field, :fields => fields))
158
-
159
- other_index = TSV === other_file ?
160
- other_file.index(options.merge(:target => target, :fields => [common_field])) :
161
- TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))
162
-
163
- data.serializer = :clean
164
-
165
- # ToDo: remove the need to to the `to_list` transformation
166
- data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
167
- end
168
- return index
169
- end
170
- end
171
- end
172
- return nil
173
- end
174
-
175
- def self.translate(tsv, field, format, options = {})
176
- persist_options = Misc.pull_keys options, :persist
177
- new = TSV.open translate_stream(tsv, field, format, options), :persist => persist_options[:persist], :persist_data => persist_options[:data], :persist_file => persist_options[:file]
178
- new.identifiers = tsv.identifiers
179
- new
180
- end
181
-
182
- def self.translate_stream(tsv, field, format, options = {}, &block)
183
- options = Misc.add_defaults options, :persist => false, :identifier_files => tsv.identifier_files, :compact => true
184
-
185
- identifier_files, identifiers, persist_input, compact = Misc.process_options options, :identifier_files, :identifiers, :persist, :compact
186
- identifier_files = [tsv, identifiers].compact if identifier_files.nil? or identifier_files.empty?
187
-
188
- identifier_files.uniq!
189
-
190
- index = translation_index identifier_files, format, field, options.dup
191
- raise "No index: #{Misc.fingerprint([identifier_files, field, format])}" if index.nil?
192
-
193
- orig_type = tsv.type
194
- tsv = tsv.to_double if orig_type != :double
195
-
196
- pos = tsv.identify_field field
197
-
198
- new_options = tsv.options
199
- new_options[:identifiers] = tsv.identifiers.find if tsv.identifiers
200
-
201
- case pos
202
- when :key
203
- new_options[:key_field] = format if tsv.key_field == field
204
- dumper = TSV::Dumper.new new_options
205
- dumper.init
206
- TSV.traverse tsv, :into => dumper do |key,values|
207
- new_key = index[key]
208
- [new_key, values]
209
- end
210
- else
211
- new_options[:fields] = tsv.fields.collect{|f| f == field ? format : f }
212
- dumper = TSV::Dumper.new new_options
213
- dumper.init
214
-
215
- case tsv.type
216
- when :double
217
- TSV.traverse tsv, :into => dumper do |key,values|
218
- original = values[pos]
219
- new = index.values_at *original
220
- values[pos] = new
221
- [key, values]
222
- end
223
- when :list
224
- TSV.traverse tsv, :into => dumper do |key,values|
225
- original = values[pos]
226
- new = index[original]
227
- values[pos] = new
228
- [key, values]
229
- end
230
- when :flat
231
- TSV.traverse tsv, :into => dumper do |key,values|
232
- new = index.values_at *values
233
- [key, new]
234
- end
235
- when :single
236
- TSV.traverse tsv, :into => dumper do |key,original|
237
- new = index[original]
238
- [key, new]
239
- end
240
- end
241
- end
242
-
243
- dumper.stream
244
- end
245
- end
1
+ require_relative '../refactor'
2
+ Rbbt.require_instead 'scout/tsv'
3
+ #require 'rbbt/tsv'
4
+ #require 'rbbt/persist'
5
+ #
6
+ #
7
+ #module TSV
8
+ # def self.change_key(tsv, format, options = {}, &block)
9
+ # options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers
10
+ #
11
+ # identifiers, persist_input = Misc.process_options options, :identifiers, :persist_input
12
+ #
13
+ # identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
14
+ #
15
+ #
16
+ # if ! tsv.fields.include?(format)
17
+ # new = {}
18
+ # tsv.each do |k,v|
19
+ # if v === String or v === Array
20
+ # new[k] = v.dup
21
+ # else
22
+ # new[k] = v
23
+ # end
24
+ # end
25
+ # orig_fields = tsv.fields
26
+ # tsv = tsv.annotate new
27
+ # new.fields = new.fields.collect{|f| "TMP-" << f }
28
+ #
29
+ # orig_type = tsv.type
30
+ # tsv = tsv.to_double if orig_type != :double
31
+ #
32
+ # if Array === identifiers
33
+ # tsv = tsv.attach identifiers.first, :fields => [format], :persist_input => true, :identifiers => identifiers.last
34
+ # else
35
+ # tsv = tsv.attach identifiers, :fields => [format], :persist_input => true
36
+ # end
37
+ #
38
+ #
39
+ # tsv = tsv.reorder(format, tsv.fields[0..-2])
40
+ #
41
+ # tsv = tsv.to_flat if orig_type == :flat
42
+ #
43
+ # tsv = tsv.to_list(&block) if orig_type == :list
44
+ #
45
+ # tsv.fields = orig_fields
46
+ #
47
+ # tsv
48
+ # else
49
+ # tsv.reorder(format)
50
+ # end
51
+ # end
52
+ #
53
+ # def change_key(format, options = {}, &block)
54
+ # options = Misc.add_defaults options, :identifiers => self.identifiers
55
+ # TSV.change_key(self, format, options, &block)
56
+ # end
57
+ #
58
+ # def self.swap_id(tsv, field, format, options = {}, &block)
59
+ # options = Misc.add_defaults options, :persist => false, :identifiers => tsv.identifiers, :compact => true
60
+ #
61
+ # identifiers, persist_input, compact = Misc.process_options options, :identifiers, :persist, :compact
62
+ # identifiers = tsv.identifier_files.first if identifiers.nil?
63
+ # identifiers = Organism.identifiers(tsv.namespace) if defined?(Organism) && identifiers.nil? && tsv.namespace && Organism.identifiers(tsv.namespace).exists?
64
+ # identifiers.namespace ||= tsv.namespace
65
+ #
66
+ # fields = (identifiers and identifiers.all_fields.include?(field))? [field] : nil
67
+ # #index = identifiers.index :target => format, :fields => fields, :persist => persist_input, :order => true
68
+ #
69
+ # grep = Organism.blacklist_genes(tsv.namespace).list if defined?(Organism) && identifiers.namespace && Organism.blacklist_genes(tsv.namespace).exists?
70
+ # if fields.nil?
71
+ # index = identifiers.index(:data_tsv_grep => grep, :data_invert_grep => true, :target => format, :persist => true, :order => true, :unnamed => true, :data_persist => true)
72
+ # else
73
+ # index = identifiers.index(:data_tsv_grep => grep, :data_invert_grep => true, :target => format, :fields => fields, :order => true, :unnamed => true, :persist => true, :data_persist => true)
74
+ # end
75
+ #
76
+ # orig_type = tsv.type
77
+ # tsv = tsv.to_double if orig_type != :double
78
+ #
79
+ # pos = tsv.fields.index field
80
+ # tsv.with_unnamed do
81
+ # if tsv.type == :list or tsv.type == :single
82
+ # tsv.through do |k,v|
83
+ # v[pos] = index[v[pos]]
84
+ # tsv[k] = v
85
+ # end
86
+ # else
87
+ # tsv.through do |k,v|
88
+ # _values = index.values_at(*v[pos])
89
+ # _values.compact! if compact
90
+ # v[pos] = _values
91
+ # tsv[k] = v
92
+ # end
93
+ # end
94
+ #
95
+ # tsv.fields = tsv.fields.collect{|f| f == field ? format : f}
96
+ # end
97
+ #
98
+ # tsv = tsv.to_flat if orig_type == :flat
99
+ #
100
+ # tsv = tsv.to_list(&block) if orig_type == :list
101
+ #
102
+ # tsv
103
+ # end
104
+ #
105
+ # def swap_id(*args)
106
+ # TSV.swap_id(self, *args)
107
+ # end
108
+ #
109
+ # def self.translation_index(files, target = nil, source = nil, options = {})
110
+ # return nil if source == target
111
+ # options = Misc.add_defaults options.dup, :persist => true
112
+ #
113
+ # target = Entity.formats.find(target) if Entity.formats.find(target)
114
+ # source = Entity.formats.find(source) if Entity.formats.find(source)
115
+ # fields = (source and not source.empty?) ? [source] : nil
116
+ #
117
+ # files.each do |file|
118
+ # if TSV === file
119
+ # all_fields = file.all_fields
120
+ # target = file.fields.first if target.nil?
121
+ # if (source.nil? or all_fields.include? source) and all_fields.include? target
122
+ # return file.index(options.merge(:target => target, :fields => fields, :order => true))
123
+ # end
124
+ # else
125
+ # next unless file.exists?
126
+ # begin
127
+ # all_fields = TSV.parse_header(file).all_fields
128
+ # target = all_fields[1] if target.nil?
129
+ # if (source.nil? or all_fields.include? source) and all_fields.include? target
130
+ # index = TSV.index(file, options.merge(:target => target, :fields => fields, :order => true))
131
+ # return index
132
+ # end
133
+ # rescue Exception
134
+ # Log.exception $!
135
+ # Log.error "Exception reading identifier file: #{file.find}"
136
+ # end
137
+ # end
138
+ # end
139
+ #
140
+ # files.each do |file|
141
+ # all_fields = TSV === file ? file.all_fields : TSV.parse_header(file).all_fields
142
+ #
143
+ # files.each do |other_file|
144
+ # next if file == other_file
145
+ #
146
+ # other_all_fields = TSV === other_file ? other_file.all_fields : TSV.parse_header(other_file).all_fields
147
+ #
148
+ # common_field = (all_fields & other_all_fields).first
149
+ #
150
+ # if common_field and (source.nil? or source.empty? or all_fields.include? source) and other_all_fields.include? target
151
+ #
152
+ # index = Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
153
+ #
154
+ # index = TSV === file ?
155
+ # file.index(options.merge(:target => common_field, :fields => fields)) :
156
+ # TSV.index(file, options.merge(:target => common_field, :fields => fields))
157
+ #
158
+ # other_index = TSV === other_file ?
159
+ # other_file.index(options.merge(:target => target, :fields => [common_field])) :
160
+ # TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))
161
+ #
162
+ # data.serializer = :clean
163
+ #
164
+ # # ToDo: remove the need to to the `to_list` transformation
165
+ # data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
166
+ # end
167
+ # return index
168
+ # end
169
+ # end
170
+ # end
171
+ # return nil
172
+ # end
173
+ #
174
+ # def self.translate(tsv, field, format, options = {})
175
+ # persist_options = Misc.pull_keys options, :persist
176
+ # new = TSV.open translate_stream(tsv, field, format, options), :persist => persist_options[:persist], :persist_data => persist_options[:data], :persist_file => persist_options[:file]
177
+ # new.identifiers = tsv.identifiers
178
+ # new
179
+ # end
180
+ #
181
+ # def self.translate_stream(tsv, field, format, options = {}, &block)
182
+ # options = Misc.add_defaults options, :persist => false, :identifier_files => tsv.identifier_files, :compact => true
183
+ #
184
+ # identifier_files, identifiers, persist_input, compact = Misc.process_options options, :identifier_files, :identifiers, :persist, :compact
185
+ # identifier_files = [tsv, identifiers].compact if identifier_files.nil? or identifier_files.empty?
186
+ #
187
+ # identifier_files.uniq!
188
+ #
189
+ # index = translation_index identifier_files, format, field, options.dup
190
+ # raise "No index: #{Misc.fingerprint([identifier_files, field, format])}" if index.nil?
191
+ #
192
+ # orig_type = tsv.type
193
+ # tsv = tsv.to_double if orig_type != :double
194
+ #
195
+ # pos = tsv.identify_field field
196
+ #
197
+ # new_options = tsv.options
198
+ # new_options[:identifiers] = tsv.identifiers.find if tsv.identifiers
199
+ #
200
+ # case pos
201
+ # when :key
202
+ # new_options[:key_field] = format if tsv.key_field == field
203
+ # dumper = TSV::Dumper.new new_options
204
+ # dumper.init
205
+ # TSV.traverse tsv, :into => dumper do |key,values|
206
+ # new_key = index[key]
207
+ # [new_key, values]
208
+ # end
209
+ # else
210
+ # new_options[:fields] = tsv.fields.collect{|f| f == field ? format : f }
211
+ # dumper = TSV::Dumper.new new_options
212
+ # dumper.init
213
+ #
214
+ # case tsv.type
215
+ # when :double
216
+ # TSV.traverse tsv, :into => dumper do |key,values|
217
+ # original = values[pos]
218
+ # new = index.values_at *original
219
+ # values[pos] = new
220
+ # [key, values]
221
+ # end
222
+ # when :list
223
+ # TSV.traverse tsv, :into => dumper do |key,values|
224
+ # original = values[pos]
225
+ # new = index[original]
226
+ # values[pos] = new
227
+ # [key, values]
228
+ # end
229
+ # when :flat
230
+ # TSV.traverse tsv, :into => dumper do |key,values|
231
+ # new = index.values_at *values
232
+ # [key, new]
233
+ # end
234
+ # when :single
235
+ # TSV.traverse tsv, :into => dumper do |key,original|
236
+ # new = index[original]
237
+ # [key, new]
238
+ # end
239
+ # end
240
+ # end
241
+ #
242
+ # dumper.stream
243
+ # end
244
+ #end