scout-gear 10.4.0 → 10.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +100 -656
  3. data/Rakefile +1 -0
  4. data/VERSION +1 -1
  5. data/bin/scout +1 -3
  6. data/lib/scout/association/fields.rb +170 -0
  7. data/lib/scout/association/index.rb +229 -0
  8. data/lib/scout/association/item.rb +227 -0
  9. data/lib/scout/association/util.rb +7 -0
  10. data/lib/scout/association.rb +100 -0
  11. data/lib/scout/entity/format.rb +62 -0
  12. data/lib/scout/entity/identifiers.rb +111 -0
  13. data/lib/scout/entity/object.rb +20 -0
  14. data/lib/scout/entity/property.rb +165 -0
  15. data/lib/scout/entity.rb +41 -0
  16. data/lib/scout/offsite/step.rb +2 -2
  17. data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
  18. data/lib/scout/persist/engine/packed_index.rb +100 -0
  19. data/lib/scout/persist/engine/sharder.rb +219 -0
  20. data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
  21. data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
  22. data/lib/scout/persist/engine.rb +4 -0
  23. data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
  24. data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
  25. data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
  26. data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
  27. data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
  28. data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
  29. data/lib/scout/persist/tsv/adapter.rb +6 -0
  30. data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
  31. data/lib/scout/persist/tsv.rb +107 -0
  32. data/lib/scout/tsv/annotation/repo.rb +87 -0
  33. data/lib/scout/tsv/annotation.rb +169 -0
  34. data/lib/scout/tsv/attach.rb +97 -21
  35. data/lib/scout/tsv/change_id/translate.rb +148 -0
  36. data/lib/scout/tsv/change_id.rb +3 -0
  37. data/lib/scout/tsv/csv.rb +85 -0
  38. data/lib/scout/tsv/dumper.rb +113 -25
  39. data/lib/scout/tsv/index.rb +88 -36
  40. data/lib/scout/tsv/open.rb +21 -8
  41. data/lib/scout/tsv/parser.rb +153 -90
  42. data/lib/scout/tsv/path.rb +7 -2
  43. data/lib/scout/tsv/stream.rb +48 -6
  44. data/lib/scout/tsv/transformer.rb +5 -3
  45. data/lib/scout/tsv/traverse.rb +28 -19
  46. data/lib/scout/tsv/util/process.rb +7 -0
  47. data/lib/scout/tsv/util/reorder.rb +25 -15
  48. data/lib/scout/tsv/util/select.rb +9 -1
  49. data/lib/scout/tsv/util/sort.rb +90 -2
  50. data/lib/scout/tsv/util/unzip.rb +56 -0
  51. data/lib/scout/tsv/util.rb +52 -5
  52. data/lib/scout/tsv.rb +42 -27
  53. data/lib/scout/work_queue/socket.rb +8 -0
  54. data/lib/scout/work_queue/worker.rb +22 -5
  55. data/lib/scout/work_queue.rb +41 -24
  56. data/lib/scout/workflow/definition.rb +15 -12
  57. data/lib/scout/workflow/deployment/orchestrator.rb +21 -3
  58. data/lib/scout/workflow/deployment/trace.rb +205 -0
  59. data/lib/scout/workflow/deployment.rb +1 -0
  60. data/lib/scout/workflow/documentation.rb +1 -1
  61. data/lib/scout/workflow/step/archive.rb +42 -0
  62. data/lib/scout/workflow/step/children.rb +51 -0
  63. data/lib/scout/workflow/step/config.rb +1 -1
  64. data/lib/scout/workflow/step/dependencies.rb +25 -8
  65. data/lib/scout/workflow/step/file.rb +19 -0
  66. data/lib/scout/workflow/step/info.rb +37 -9
  67. data/lib/scout/workflow/step/progress.rb +11 -2
  68. data/lib/scout/workflow/step/status.rb +9 -1
  69. data/lib/scout/workflow/step.rb +80 -25
  70. data/lib/scout/workflow/task/dependencies.rb +5 -2
  71. data/lib/scout/workflow/task/inputs.rb +91 -41
  72. data/lib/scout/workflow/task.rb +54 -57
  73. data/lib/scout/workflow/usage.rb +1 -1
  74. data/lib/scout/workflow/util.rb +4 -0
  75. data/lib/scout/workflow.rb +110 -13
  76. data/lib/scout-gear.rb +2 -0
  77. data/lib/scout.rb +0 -1
  78. data/scout-gear.gemspec +78 -23
  79. data/scout_commands/rbbt +2 -0
  80. data/test/data/person/brothers +4 -0
  81. data/test/data/person/identifiers +10 -0
  82. data/test/data/person/marriages +3 -0
  83. data/test/data/person/parents +6 -0
  84. data/test/scout/association/test_fields.rb +105 -0
  85. data/test/scout/association/test_index.rb +70 -0
  86. data/test/scout/association/test_item.rb +21 -0
  87. data/test/scout/entity/test_format.rb +19 -0
  88. data/test/scout/entity/test_identifiers.rb +58 -0
  89. data/test/scout/entity/test_object.rb +0 -0
  90. data/test/scout/entity/test_property.rb +345 -0
  91. data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
  92. data/test/scout/persist/engine/test_packed_index.rb +99 -0
  93. data/test/scout/persist/engine/test_sharder.rb +31 -0
  94. data/test/scout/persist/engine/test_tkrzw.rb +0 -0
  95. data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
  96. data/test/scout/persist/test_tsv.rb +146 -0
  97. data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
  98. data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
  99. data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
  100. data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
  101. data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
  102. data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
  103. data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
  104. data/test/scout/persist/tsv/test_serialize.rb +12 -0
  105. data/test/scout/test_association.rb +51 -0
  106. data/test/scout/test_entity.rb +40 -0
  107. data/test/scout/test_tsv.rb +33 -4
  108. data/test/scout/test_work_queue.rb +5 -2
  109. data/test/scout/test_workflow.rb +31 -14
  110. data/test/scout/tsv/annotation/test_repo.rb +150 -0
  111. data/test/scout/tsv/change_id/test_translate.rb +178 -0
  112. data/test/scout/tsv/test_annotation.rb +52 -0
  113. data/test/scout/tsv/test_attach.rb +255 -1
  114. data/test/scout/tsv/test_change_id.rb +25 -0
  115. data/test/scout/tsv/test_csv.rb +50 -0
  116. data/test/scout/tsv/test_dumper.rb +38 -0
  117. data/test/scout/tsv/test_index.rb +82 -0
  118. data/test/scout/tsv/test_open.rb +44 -0
  119. data/test/scout/tsv/test_parser.rb +70 -0
  120. data/test/scout/tsv/test_stream.rb +22 -0
  121. data/test/scout/tsv/test_transformer.rb +27 -3
  122. data/test/scout/tsv/test_traverse.rb +78 -0
  123. data/test/scout/tsv/util/test_process.rb +16 -0
  124. data/test/scout/tsv/util/test_reorder.rb +67 -0
  125. data/test/scout/tsv/util/test_sort.rb +28 -1
  126. data/test/scout/tsv/util/test_unzip.rb +32 -0
  127. data/test/scout/work_queue/test_socket.rb +4 -1
  128. data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
  129. data/test/scout/workflow/deployment/test_trace.rb +25 -0
  130. data/test/scout/workflow/step/test_archive.rb +28 -0
  131. data/test/scout/workflow/step/test_children.rb +25 -0
  132. data/test/scout/workflow/step/test_info.rb +16 -0
  133. data/test/scout/workflow/task/test_dependencies.rb +16 -16
  134. data/test/scout/workflow/task/test_inputs.rb +45 -1
  135. data/test/scout/workflow/test_definition.rb +52 -0
  136. data/test/scout/workflow/test_step.rb +57 -0
  137. data/test/scout/workflow/test_task.rb +26 -1
  138. data/test/scout/workflow/test_usage.rb +4 -4
  139. data/test/test_helper.rb +23 -1
  140. metadata +69 -14
  141. data/lib/scout/tsv/persist.rb +0 -27
  142. data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
  143. data/test/scout/tsv/test_persist.rb +0 -45
@@ -0,0 +1,169 @@
1
+ require_relative 'annotation/repo'
2
+ module Annotation
3
+
4
+ def self.obj_tsv_values(obj, fields)
5
+
6
+ annotation_info = obj.annotation_info
7
+ annotation_info[:annotated_array] = true if AnnotatedArray === obj
8
+
9
+ fields.collect do |field|
10
+ field = field.to_s if Symbol === field
11
+ case field
12
+ when Proc
13
+ field.call(obj)
14
+
15
+ when "JSON"
16
+ annotation_info.to_json
17
+
18
+ when "annotation_types"
19
+ annotation_info[:annotation_types].collect{|t| t.to_s} * "|"
20
+
21
+ when "annotated_array"
22
+ AnnotatedArray === obj
23
+
24
+ when "literal"
25
+ (Array === obj ? "Array:" << obj * "|" : obj).gsub(/\n|\t/, ' ')
26
+
27
+ else
28
+ if annotation_info.include?(field.to_sym)
29
+ res = annotation_info[field.to_sym]
30
+ Array === res ? "Array:" << res * "|" : res
31
+ elsif self.respond_to?(field)
32
+ res = self.send(field)
33
+ Array === res ? "Array:"<< res * "|" : res
34
+ else
35
+ raise
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ def self.list_tsv_values(objs, fields)
42
+ obj_tsv_values(objs, fields)
43
+ end
44
+
45
+
46
+ def self.tsv(objs, *fields)
47
+ return nil if objs.nil?
48
+
49
+ fields = fields.flatten.compact.uniq
50
+
51
+ annotations = if Annotation.is_annotated?(objs)
52
+ objs.annotations
53
+ elsif (Array === objs && objs.any?)
54
+ first = objs.compact.first
55
+ if Annotation.is_annotated?(first)
56
+ objs.compact.first.annotations
57
+ else
58
+ raise "Objects didn't have annotations"
59
+ end
60
+ else
61
+ []
62
+ end
63
+
64
+ if fields.empty?
65
+ fields = annotations + [:annotation_types]
66
+ elsif fields == ["all"] || fields == [:all]
67
+ fields = annotations + [:annotation_types, :literal]
68
+ end
69
+
70
+ fields = fields.collect{|f| Symbol === f ? f.to_s : f }
71
+
72
+ tsv = TSV.setup({}, :key_field => nil, :fields => fields, :type => :list, :unnamed => true)
73
+
74
+ case
75
+ when Annotation.is_annotated?(objs)
76
+ tsv.key_field = "List"
77
+
78
+ tsv[objs.annotation_id] = self.list_tsv_values(objs, fields).dup
79
+ when Array === objs
80
+ tsv.key_field = "ID"
81
+
82
+ if Annotation.is_annotated?(objs.compact.first)
83
+ objs.compact.each_with_index do |obj,i|
84
+ tsv[obj.annotation_id + "#" << i.to_s] = self.obj_tsv_values(obj, fields).dup
85
+ end
86
+ elsif (objs.any? && Annotation.is_annotated?(objs.compact.first.compact.first))
87
+ objs.flatten.compact.each_with_index do |obj,i|
88
+ tsv[obj.annotation_id + "#" << i.to_s] = self.obj_tsv_values(obj, fields).dup
89
+ end
90
+ end
91
+ else
92
+ raise "Annotations need to be an Array to create TSV"
93
+ end
94
+
95
+ tsv
96
+ end
97
+
98
+ # Load TSV
99
+
100
+ def self.resolve_tsv_array(entry)
101
+ if String === entry && entry =~ /^Array:/
102
+ entry["Array:".length..-1].split("|")
103
+ else
104
+ entry
105
+ end
106
+ end
107
+
108
+ def self.load_info(fields, values)
109
+ info = {}
110
+ fields.each_with_index do |field,i|
111
+ next if field == "literal"
112
+
113
+ case field
114
+ when "JSON"
115
+ JSON.parse(values[i]).each do |key, value|
116
+ info[key.to_sym] = value
117
+ end
118
+ when nil
119
+ next
120
+ else
121
+ info[field.to_sym] = resolve_tsv_array(values[i])
122
+ end
123
+ end
124
+ info
125
+ end
126
+
127
+ def self.load_tsv_values(id, values, *fields)
128
+ fields = fields.flatten
129
+ literal_pos = fields.index "literal"
130
+
131
+ object = case
132
+ when literal_pos
133
+ values[literal_pos].tap{|o| o.force_encoding(Encoding.default_external)}
134
+ else
135
+ id.dup
136
+ end
137
+
138
+ object = resolve_tsv_array(object)
139
+
140
+ if Array === values.first
141
+ NamedArray.zip_fields(values).collect do |v|
142
+ info = load_info(fields, v)
143
+ end
144
+ else
145
+ info = load_info(fields, values)
146
+ end
147
+
148
+ self.setup(object, info[:annotation_types], info)
149
+
150
+ object.extend AnnotatedArray if Array === object
151
+
152
+ object
153
+ end
154
+
155
+ def self.load_tsv(tsv)
156
+ tsv.with_unnamed do
157
+ annotated_objects = tsv.collect do |id, values|
158
+ Annotation.load_tsv_values(id, values, tsv.fields)
159
+ end
160
+
161
+ case tsv.key_field
162
+ when "List"
163
+ annotated_objects.first
164
+ else
165
+ annotated_objects
166
+ end
167
+ end
168
+ end
169
+ end
@@ -1,7 +1,11 @@
1
1
  module TSV
2
2
 
3
3
  def self.match_keys(source, other, match_key: nil, other_key: nil)
4
- match_key = (source.all_fields & other.all_fields).first if match_key.nil?
4
+ #match_key = (source.all_fields & other.all_fields).first if match_key.nil?
5
+ if match_key.nil?
6
+ match_key_pos = NamedArray.identify_name(source.all_fields, other.all_fields).first
7
+ match_key = source.all_fields[match_key_pos] if match_key_pos
8
+ end
5
9
 
6
10
  if match_key.nil?
7
11
  source.all_fields.collect do |f|
@@ -32,13 +36,13 @@ module TSV
32
36
 
33
37
  other_key = other.key_field if other_key.nil?
34
38
 
35
- match_key = :key if match_key == source.key_field
36
- other_key = :key if other_key == other.key_field
39
+ match_key = :key if NamedArray.field_match(match_key, source.key_field)
40
+ other_key = :key if NamedArray.field_match(other_key, other.key_field)
37
41
 
38
42
  [match_key, other_key]
39
43
  end
40
44
 
41
- def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
45
+ def self.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
42
46
  source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
43
47
  other = TSV::Parser.new other unless TSV === other || TSV::Parser === other
44
48
 
@@ -70,11 +74,24 @@ module TSV
70
74
  other_key_name = other.fields[other_key_name] if Integer === other_key
71
75
  fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
72
76
 
77
+ match_key_name = match_key == :key ? source.key_field : match_key_name
78
+
79
+ if index.nil? && ! source.identify_field(other_key_name)
80
+ identifier_files = []
81
+ identifier_files << identifiers if identifiers
82
+ identifier_files << source
83
+ identifier_files << TSV.identifier_files(source)
84
+ identifier_files << TSV.identifier_files(other)
85
+ identifier_files << other
86
+
87
+ index = TSV.translation_index(identifier_files.flatten, match_key_name, other_key_name)
88
+ end
89
+
73
90
  if other_key != :key
74
- other = other.reorder other_key, fields, one2one: one2one
91
+ other = other.reorder other_key, fields, one2one: one2one, merge: true, type: :double
75
92
  end
76
93
 
77
- other_field_positions = other.identify_field(fields)
94
+ other_field_positions = other.identify_field(fields.dup)
78
95
 
79
96
  log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
80
97
  Log.debug log_message
@@ -85,8 +102,11 @@ module TSV
85
102
  source.fields = (source.fields + fields).uniq
86
103
 
87
104
  overlaps = source.identify_field(fields)
105
+ orig_type = source.type
88
106
 
89
- empty_other_values = case source.type
107
+ type = source.type == :single ? :list : source.type
108
+
109
+ empty_other_values = case type
90
110
  when :list
91
111
  [nil] * other.fields.length
92
112
  when :flat
@@ -95,20 +115,28 @@ module TSV
95
115
  [[]] * other.fields.length
96
116
  end
97
117
 
118
+ empty_other_values = nil if other.type == :single
119
+
98
120
  insitu = TSV === source ? true : false if insitu.nil?
121
+ insitu = false if source.type == :single
99
122
 
100
123
  match_key_pos = source.identify_field(match_key)
101
124
  source.traverse bar: bar, unnamed: true do |orig_key,current_values|
125
+ current_values = [current_values] if source.type == :single
126
+
102
127
  keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
103
- keys = [keys] unless Array === keys
128
+ keys = [keys].compact unless Array === keys
129
+
130
+ keys = index.chunked_values_at(keys).flatten if index
104
131
 
105
132
  current_values = current_values.dup unless insitu
133
+ keys = [nil] if keys.empty?
106
134
  keys.each do |current_key|
107
- other_values = other[current_key]
135
+ other_values = current_key.nil? ? empty_other_values : other[current_key]
108
136
 
109
137
  if other_values.nil?
110
138
  other_values = empty_other_values
111
- elsif other.type == :flat
139
+ elsif other.type == :flat
112
140
  other_values = [other_values]
113
141
  elsif other.type == :list && source.type == :double
114
142
  other_values = other_values.collect{|v| [v] }
@@ -116,14 +144,25 @@ module TSV
116
144
  other_values = other_values.collect{|v| v.first }
117
145
  end
118
146
 
119
- other_values = other_values.values_at *other_field_positions
147
+ other_values = other_field_positions.collect do |pos|
148
+ if pos == :key
149
+ current_key
150
+ else
151
+ other.type == :single ? other_values : other_values[pos]
152
+ end
153
+ end
120
154
 
121
155
  other_values.zip(overlaps).each do |v,overlap|
122
- if source.type == :list
123
- current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
156
+ if type == :list
157
+ current_values[overlap] = v if current_values[overlap].nil? || (String === current_values[overlap] && current_values[overlap].empty?)
158
+ elsif type == :flat
159
+ next if v.nil?
160
+ v = [v] unless Array === v
161
+ current_values.concat v
124
162
  else
125
163
  current_values[overlap] ||= []
126
164
  next if v.nil?
165
+ v = [v] unless Array === v
127
166
  current_values[overlap].concat (v - current_values[overlap])
128
167
  end
129
168
  end
@@ -133,7 +172,7 @@ module TSV
133
172
  end
134
173
 
135
174
  if complete && match_key == :key
136
- empty_self_values = case source.type
175
+ empty_self_values = case type
137
176
  when :list
138
177
  [nil] * source.fields.length
139
178
  when :flat
@@ -143,15 +182,17 @@ module TSV
143
182
  end
144
183
  other.each do |other_key,other_values|
145
184
  next if source.include?(other_key)
146
- if other.type == :flat
185
+ if other.type == :flat
147
186
  other_values = [other_values]
148
- elsif other.type == :list && source.type == :double
187
+ elsif other.type == :single
188
+ other_values = [other_values]
189
+ elsif other.type == :list && type == :double
149
190
  other_values = other_values.collect{|v| [v] }
150
- elsif other.type == :double && source.type == :list
191
+ elsif other.type == :double && type == :list
151
192
  other_values = other_values.collect{|v| v.first }
152
193
  end
153
194
 
154
- new_values = case source.type
195
+ new_values = case type
155
196
  when :list
156
197
  [nil] * source.fields.length
157
198
  when :flat
@@ -162,17 +203,19 @@ module TSV
162
203
 
163
204
  other_values.zip(overlaps).each do |v,overlap|
164
205
  next if v.nil?
165
- if false && overlap == :key
206
+ if overlap == :key
166
207
  other_key = Array === v ? v : v.first
167
- elsif source.type == :list
168
- new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
208
+ elsif type == :list
209
+ new_values[overlap] = v if new_values[overlap].nil? || (String === new_values[overlap] && new_values[overlap].empty?)
169
210
  else
211
+ v = [v] unless Array === v
170
212
  new_values[overlap].concat v
171
213
  end
172
214
  end
173
215
  source[other_key] = new_values
174
216
  end
175
217
  end
218
+ source.type = type
176
219
  end
177
220
  end
178
221
 
@@ -182,4 +225,37 @@ module TSV
182
225
  def attach(*args, **kwargs)
183
226
  TSV.attach(self, *args, **kwargs)
184
227
  end
228
+
229
+ def identifier_files
230
+ case
231
+ when (identifiers and TSV === identifiers)
232
+ [identifiers]
233
+ when (identifiers and Array === identifiers)
234
+ case
235
+ when (TSV === identifiers.first or identifiers.empty?)
236
+ identifiers
237
+ else
238
+ identifiers.collect{|f| Path === f ? f : Path.setup(f)}
239
+ end
240
+ when identifiers
241
+ [ Path === identifiers ? identifiers : Path.setup(identifiers) ]
242
+ when Path === filename
243
+ path_files = filename.dirname.identifiers
244
+ [path_files].flatten.compact.select{|f| f.exists?}
245
+ when filename
246
+ [Path.setup(filename.dup).dirname.identifiers]
247
+ else
248
+ []
249
+ end
250
+ end
251
+
252
+ def self.identifier_files(obj)
253
+ if TSV === obj
254
+ obj.identifier_files
255
+ elsif Path === obj
256
+ obj.dirname.identifiers
257
+ else
258
+ nil
259
+ end
260
+ end
185
261
  end
@@ -0,0 +1,148 @@
1
+ module TSV
2
+
3
+ def self.identify_field_in_obj(obj, field)
4
+ case obj
5
+ when TSV
6
+ obj.identify_field(field)
7
+ when TSV::Parser, TSV::Dumper
8
+ TSV.identify_field(obj.key_field, obj.fields, field)
9
+ when Path, String
10
+ all_fields = TSV.parse_header(obj)["all_fields"]
11
+ identify_field_in_obj(all_fields, field)
12
+ when Array
13
+ key_field, *fields = obj
14
+ TSV.identify_field(key_field, fields, field)
15
+ end
16
+ end
17
+
18
+ def self.translation_path(file_fields, source, target)
19
+ target_files = file_fields.select{|f,fields| identify_field_in_obj(fields, target) }.collect{|file,f| file }
20
+ if source.nil?
21
+ source_files = file_fields.keys
22
+ else
23
+ source_files = file_fields.select{|f,fields| identify_field_in_obj(fields, source) }.collect{|file,f| file }
24
+ end
25
+
26
+ if source && (one_step = target_files & source_files).any?
27
+ [one_step.first]
28
+ else
29
+ source_fields = file_fields.values_at(*source_files).flatten
30
+ target_fields = file_fields.values_at(*target_files).flatten
31
+ if (common_fields = source_fields & target_fields).any?
32
+ source_file = source_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
33
+ target_file = target_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
34
+ [source_file, target_file]
35
+ else
36
+ file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }
37
+ middle_file, middle_fields = file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }.first
38
+ if middle_file
39
+ source_file = source_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
40
+ target_file = target_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
41
+ [source_file, middle_file, target_file]
42
+ else
43
+ raise "Could not traverse identifier path from #{Log.fingerprint source} to #{Log.fingerprint target} in #{Log.fingerprint file_fields}"
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def self.translation_index(files, source, target, persist_options = {})
50
+ return nil if source == target
51
+ persist_options = IndiferentHash.add_defaults persist_options.dup, :persist => true, :prefix => "Translation index"
52
+
53
+ file_fields = {}
54
+
55
+ files = [files] unless Array === files
56
+
57
+ files.each do |file|
58
+ next if Path === file && ! Open.exist?(file)
59
+ file = file.find if Path === file
60
+ file_fields[file] = all_fields(file)
61
+ end
62
+
63
+ begin
64
+ path = translation_path(file_fields, source, target)
65
+ rescue
66
+ exception = $!
67
+ begin
68
+ path = translation_path(file_fields, source, target)
69
+ rescue
70
+ raise exception
71
+ end
72
+ end
73
+
74
+ name = [source || "all", target] * "->" + " (#{files.length} files - #{Misc.digest(files)})"
75
+ second_target = if path.length == 1
76
+ target
77
+ else
78
+ file1, file2 = path.values_at 0, 1
79
+ pos = NamedArray.identify_name(TSV.all_fields(file1), TSV.all_fields(file2))
80
+ TSV.all_fields(file1)[pos.compact.first]
81
+ end
82
+ Persist.persist(name, "HDB", persist_options) do
83
+ index = path.inject(nil) do |acc,file|
84
+ if acc.nil?
85
+ if source.nil?
86
+ if TSV === file
87
+ acc = file.index target: second_target
88
+ else
89
+ acc = TSV.index(file, target: second_target)
90
+ end
91
+ else
92
+ if TSV === file
93
+ acc = (file.key_field == source || source.nil?) ? file.annotate(file.dup) : file.reorder(source)
94
+ else
95
+ acc = TSV.open(file, key_field: source)
96
+ end
97
+ end
98
+ else
99
+ acc = acc.attach file, insitu: false
100
+ end
101
+
102
+ acc
103
+ end
104
+ index.slice([target]).to_single
105
+ end
106
+ end
107
+
108
+ def self.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true)
109
+
110
+ identifiers ||= tsv.identifier_files
111
+ index = translation_index([tsv, identifiers].flatten.compact, field, format, persist: persist_index)
112
+
113
+ key_field, *fields = TSV.all_fields(tsv)
114
+ if field == key_field
115
+ new_key_field = format
116
+ new_fields = fields
117
+ else
118
+ new_key_field = key_field
119
+ new_fields = fields.collect{|f| f == field ? format : f }
120
+ end
121
+
122
+ field_pos = new_key_field == key_field ? new_fields.index(format) : :key
123
+
124
+ transformer = TSV::Transformer.new tsv
125
+ transformer.key_field = new_key_field
126
+ transformer.fields = new_fields
127
+ transformer.traverse one2one: one2one, unnamed: true do |k,v|
128
+ if field_pos == :key
129
+ [index[k], v]
130
+ else
131
+ v = v.dup
132
+ if Array === v[field_pos]
133
+ v[field_pos] = index.values_at(*v[field_pos]).compact
134
+ else
135
+ v[field_pos] = index[v[field_pos]]
136
+ end
137
+ [k, v]
138
+ end
139
+ end
140
+
141
+ stream ? transformer : transformer.tsv(merge: merge, one2one: one2one)
142
+ end
143
+
144
+ def translate(*args, **kwargs)
145
+ TSV.translate(self, *args, **kwargs)
146
+ end
147
+
148
+ end
@@ -1,6 +1,9 @@
1
+ require_relative 'change_id/translate'
2
+
1
3
  module TSV
2
4
  def self.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil)
3
5
  source = TSV::Parser.new source if String === source
6
+ identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
4
7
  if identifiers && source.identify_field(new_key_field, strict: true).nil?
5
8
  identifiers = identifiers.nil? ? source.identifiers : identifiers
6
9
  new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
@@ -0,0 +1,85 @@
1
+ require 'csv'
2
+
3
+ module TSV
4
+ def self.csv(obj, options = {})
5
+ options = IndiferentHash.add_defaults options, :headers => true, :type => :list
6
+ headers = options[:headers]
7
+
8
+ noheaders = ! headers
9
+
10
+ type = options.delete :type
11
+ cast = options.delete :cast
12
+ merge = options.delete :merge
13
+ key_field = options.delete :key_field
14
+ fields = options.delete :fields
15
+
16
+ if key_field || fields
17
+ orig_type = type
18
+ type = :double
19
+ merge = true
20
+ end
21
+
22
+ options[:headers] = false
23
+
24
+ csv = case obj
25
+ when Path
26
+ CSV.read obj.find.open, **options
27
+ when String
28
+ if Open.remote?(obj)
29
+ CSV.read Open.open(obj), **options
30
+ elsif Path.is_filename?(obj)
31
+ CSV.read obj, **options
32
+ else
33
+ CSV.new obj, **options
34
+ end
35
+ else
36
+ CSV.new obj, **options
37
+ end
38
+
39
+ tsv = if noheaders
40
+ TSV.setup({}, :key_field => nil, :fields => nil, :type => type)
41
+ else
42
+ key, *csv_fields = csv.shift
43
+ TSV.setup({}, :key_field => key, :fields => csv_fields, :type => type)
44
+ end
45
+
46
+ csv.each_with_index do |row,i|
47
+ if noheaders
48
+ key, values = ["row-#{i}", row]
49
+ else
50
+ key, *values = row
51
+ end
52
+
53
+ if cast
54
+ values = values.collect{|v| v.send cast }
55
+ end
56
+
57
+ case type
58
+ when :double, :flat
59
+ tsv.zip_new(key, values)
60
+ when :single
61
+ tsv[key] = values.first
62
+ when :list
63
+ tsv[key] = values
64
+ end
65
+ end
66
+
67
+ if key_field || fields
68
+ tsv = tsv.reorder(key_field, fields, :one2one => true, :merge => true)
69
+ if tsv.type != orig_type
70
+ tsv = case orig_type
71
+ when :list
72
+ tsv.to_list
73
+ when :single
74
+ tsv.to_single
75
+ when :list
76
+ tsv.to_list
77
+ when :flat
78
+ tsv.to_flat
79
+ end
80
+ end
81
+ end
82
+
83
+ tsv
84
+ end
85
+ end