scout-gear 10.3.0 → 10.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +100 -657
  3. data/Rakefile +1 -0
  4. data/VERSION +1 -1
  5. data/bin/scout +1 -3
  6. data/lib/scout/association/fields.rb +170 -0
  7. data/lib/scout/association/index.rb +229 -0
  8. data/lib/scout/association/item.rb +227 -0
  9. data/lib/scout/association/util.rb +7 -0
  10. data/lib/scout/association.rb +100 -0
  11. data/lib/scout/entity/format.rb +62 -0
  12. data/lib/scout/entity/identifiers.rb +111 -0
  13. data/lib/scout/entity/object.rb +20 -0
  14. data/lib/scout/entity/property.rb +165 -0
  15. data/lib/scout/entity.rb +40 -0
  16. data/lib/scout/offsite/step.rb +2 -2
  17. data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
  18. data/lib/scout/persist/engine/packed_index.rb +100 -0
  19. data/lib/scout/persist/engine/sharder.rb +219 -0
  20. data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
  21. data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
  22. data/lib/scout/persist/engine.rb +4 -0
  23. data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
  24. data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
  25. data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
  26. data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
  27. data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
  28. data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
  29. data/lib/scout/persist/tsv/adapter.rb +6 -0
  30. data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
  31. data/lib/scout/persist/tsv.rb +107 -0
  32. data/lib/scout/tsv/annotation/repo.rb +83 -0
  33. data/lib/scout/tsv/annotation.rb +169 -0
  34. data/lib/scout/tsv/attach.rb +104 -20
  35. data/lib/scout/tsv/change_id/translate.rb +148 -0
  36. data/lib/scout/tsv/change_id.rb +6 -3
  37. data/lib/scout/tsv/csv.rb +85 -0
  38. data/lib/scout/tsv/dumper.rb +113 -25
  39. data/lib/scout/tsv/entity.rb +5 -0
  40. data/lib/scout/tsv/index.rb +89 -37
  41. data/lib/scout/tsv/open.rb +21 -8
  42. data/lib/scout/tsv/parser.rb +156 -91
  43. data/lib/scout/tsv/path.rb +7 -2
  44. data/lib/scout/tsv/stream.rb +48 -6
  45. data/lib/scout/tsv/transformer.rb +25 -3
  46. data/lib/scout/tsv/traverse.rb +26 -18
  47. data/lib/scout/tsv/util/process.rb +8 -1
  48. data/lib/scout/tsv/util/reorder.rb +25 -15
  49. data/lib/scout/tsv/util/select.rb +9 -1
  50. data/lib/scout/tsv/util/sort.rb +90 -2
  51. data/lib/scout/tsv/util/unzip.rb +56 -0
  52. data/lib/scout/tsv/util.rb +52 -5
  53. data/lib/scout/tsv.rb +85 -19
  54. data/lib/scout/work_queue/socket.rb +8 -0
  55. data/lib/scout/work_queue/worker.rb +22 -5
  56. data/lib/scout/work_queue.rb +38 -24
  57. data/lib/scout/workflow/definition.rb +19 -11
  58. data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
  59. data/lib/scout/workflow/deployment/trace.rb +205 -0
  60. data/lib/scout/workflow/deployment.rb +1 -0
  61. data/lib/scout/workflow/documentation.rb +1 -1
  62. data/lib/scout/workflow/step/archive.rb +42 -0
  63. data/lib/scout/workflow/step/children.rb +51 -0
  64. data/lib/scout/workflow/step/config.rb +1 -1
  65. data/lib/scout/workflow/step/dependencies.rb +24 -7
  66. data/lib/scout/workflow/step/file.rb +19 -0
  67. data/lib/scout/workflow/step/info.rb +37 -9
  68. data/lib/scout/workflow/step/progress.rb +11 -2
  69. data/lib/scout/workflow/step/status.rb +8 -1
  70. data/lib/scout/workflow/step.rb +80 -25
  71. data/lib/scout/workflow/task/dependencies.rb +4 -1
  72. data/lib/scout/workflow/task/inputs.rb +91 -41
  73. data/lib/scout/workflow/task.rb +54 -57
  74. data/lib/scout/workflow/usage.rb +1 -1
  75. data/lib/scout/workflow/util.rb +4 -0
  76. data/lib/scout/workflow.rb +110 -13
  77. data/lib/scout-gear.rb +2 -0
  78. data/lib/scout.rb +0 -1
  79. data/scout-gear.gemspec +80 -23
  80. data/scout_commands/rbbt +2 -0
  81. data/test/data/person/brothers +4 -0
  82. data/test/data/person/identifiers +10 -0
  83. data/test/data/person/marriages +3 -0
  84. data/test/data/person/parents +6 -0
  85. data/test/scout/association/test_fields.rb +105 -0
  86. data/test/scout/association/test_index.rb +70 -0
  87. data/test/scout/association/test_item.rb +21 -0
  88. data/test/scout/entity/test_format.rb +19 -0
  89. data/test/scout/entity/test_identifiers.rb +58 -0
  90. data/test/scout/entity/test_object.rb +0 -0
  91. data/test/scout/entity/test_property.rb +345 -0
  92. data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
  93. data/test/scout/persist/engine/test_packed_index.rb +99 -0
  94. data/test/scout/persist/engine/test_sharder.rb +31 -0
  95. data/test/scout/persist/engine/test_tkrzw.rb +0 -0
  96. data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
  97. data/test/scout/persist/test_tsv.rb +146 -0
  98. data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
  99. data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
  100. data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
  101. data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
  102. data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
  103. data/test/scout/persist/tsv/adapter/test_tkrzw.rb +126 -0
  104. data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
  105. data/test/scout/persist/tsv/test_serialize.rb +12 -0
  106. data/test/scout/test_association.rb +51 -0
  107. data/test/scout/test_entity.rb +40 -0
  108. data/test/scout/test_tsv.rb +63 -4
  109. data/test/scout/test_work_queue.rb +3 -2
  110. data/test/scout/test_workflow.rb +16 -15
  111. data/test/scout/tsv/annotation/test_repo.rb +150 -0
  112. data/test/scout/tsv/change_id/test_translate.rb +178 -0
  113. data/test/scout/tsv/test_annotation.rb +52 -0
  114. data/test/scout/tsv/test_attach.rb +226 -1
  115. data/test/scout/tsv/test_change_id.rb +25 -0
  116. data/test/scout/tsv/test_csv.rb +50 -0
  117. data/test/scout/tsv/test_dumper.rb +38 -0
  118. data/test/scout/tsv/test_entity.rb +0 -0
  119. data/test/scout/tsv/test_index.rb +82 -0
  120. data/test/scout/tsv/test_open.rb +44 -0
  121. data/test/scout/tsv/test_parser.rb +70 -0
  122. data/test/scout/tsv/test_stream.rb +22 -0
  123. data/test/scout/tsv/test_transformer.rb +39 -3
  124. data/test/scout/tsv/test_traverse.rb +78 -0
  125. data/test/scout/tsv/util/test_process.rb +36 -0
  126. data/test/scout/tsv/util/test_reorder.rb +67 -0
  127. data/test/scout/tsv/util/test_sort.rb +28 -1
  128. data/test/scout/tsv/util/test_unzip.rb +32 -0
  129. data/test/scout/work_queue/test_socket.rb +4 -1
  130. data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
  131. data/test/scout/workflow/deployment/test_trace.rb +25 -0
  132. data/test/scout/workflow/step/test_archive.rb +28 -0
  133. data/test/scout/workflow/step/test_children.rb +25 -0
  134. data/test/scout/workflow/step/test_info.rb +16 -0
  135. data/test/scout/workflow/task/test_dependencies.rb +16 -16
  136. data/test/scout/workflow/task/test_inputs.rb +45 -1
  137. data/test/scout/workflow/test_definition.rb +52 -0
  138. data/test/scout/workflow/test_step.rb +57 -0
  139. data/test/scout/workflow/test_task.rb +26 -1
  140. data/test/scout/workflow/test_usage.rb +4 -4
  141. data/test/test_helper.rb +23 -1
  142. metadata +71 -14
  143. data/lib/scout/tsv/persist.rb +0 -27
  144. data/test/scout/tsv/persist/test_tkrzw.rb +0 -123
  145. data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
  146. data/test/scout/tsv/test_persist.rb +0 -45
@@ -0,0 +1,169 @@
1
+ require_relative 'annotation/repo'
2
+ module Annotation
3
+
4
+ def self.obj_tsv_values(obj, fields)
5
+
6
+ annotation_info = obj.annotation_info
7
+ annotation_info[:annotated_array] = true if AnnotatedArray === obj
8
+
9
+ fields.collect do |field|
10
+ field = field.to_s if Symbol === field
11
+ case field
12
+ when Proc
13
+ field.call(obj)
14
+
15
+ when "JSON"
16
+ annotation_info.to_json
17
+
18
+ when "annotation_types"
19
+ annotation_info[:annotation_types].collect{|t| t.to_s} * "|"
20
+
21
+ when "annotated_array"
22
+ AnnotatedArray === obj
23
+
24
+ when "literal"
25
+ (Array === obj ? "Array:" << obj * "|" : obj).gsub(/\n|\t/, ' ')
26
+
27
+ else
28
+ if annotation_info.include?(field.to_sym)
29
+ res = annotation_info[field.to_sym]
30
+ Array === res ? "Array:" << res * "|" : res
31
+ elsif self.respond_to?(field)
32
+ res = self.send(field)
33
+ Array === res ? "Array:"<< res * "|" : res
34
+ else
35
+ raise
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ def self.list_tsv_values(objs, fields)
42
+ obj_tsv_values(objs, fields)
43
+ end
44
+
45
+
46
+ def self.tsv(objs, *fields)
47
+ return nil if objs.nil?
48
+
49
+ fields = fields.flatten.compact.uniq
50
+
51
+ annotations = if Annotation.is_annotated?(objs)
52
+ objs.annotations
53
+ elsif (Array === objs && objs.any?)
54
+ first = objs.compact.first
55
+ if Annotation.is_annotated?(first)
56
+ objs.compact.first.annotations
57
+ else
58
+ raise "Objects didn't have annotations"
59
+ end
60
+ else
61
+ []
62
+ end
63
+
64
+ if fields.empty?
65
+ fields = annotations + [:annotation_types]
66
+ elsif fields == ["all"] || fields == [:all]
67
+ fields = annotations + [:annotation_types, :literal]
68
+ end
69
+
70
+ fields = fields.collect{|f| Symbol === f ? f.to_s : f }
71
+
72
+ tsv = TSV.setup({}, :key_field => nil, :fields => fields, :type => :list, :unnamed => true)
73
+
74
+ case
75
+ when Annotation.is_annotated?(objs)
76
+ tsv.key_field = "List"
77
+
78
+ tsv[objs.annotation_id] = self.list_tsv_values(objs, fields).dup
79
+ when Array === objs
80
+ tsv.key_field = "ID"
81
+
82
+ if Annotation.is_annotated?(objs.compact.first)
83
+ objs.compact.each_with_index do |obj,i|
84
+ tsv[obj.annotation_id + "#" << i.to_s] = self.obj_tsv_values(obj, fields).dup
85
+ end
86
+ elsif (objs.any? && Annotation.is_annotated?(objs.compact.first.compact.first))
87
+ objs.flatten.compact.each_with_index do |obj,i|
88
+ tsv[obj.annotation_id + "#" << i.to_s] = self.obj_tsv_values(obj, fields).dup
89
+ end
90
+ end
91
+ else
92
+ raise "Annotations need to be an Array to create TSV"
93
+ end
94
+
95
+ tsv
96
+ end
97
+
98
+ # Load TSV
99
+
100
+ def self.resolve_tsv_array(entry)
101
+ if String === entry && entry =~ /^Array:/
102
+ entry["Array:".length..-1].split("|")
103
+ else
104
+ entry
105
+ end
106
+ end
107
+
108
+ def self.load_info(fields, values)
109
+ info = {}
110
+ fields.each_with_index do |field,i|
111
+ next if field == "literal"
112
+
113
+ case field
114
+ when "JSON"
115
+ JSON.parse(values[i]).each do |key, value|
116
+ info[key.to_sym] = value
117
+ end
118
+ when nil
119
+ next
120
+ else
121
+ info[field.to_sym] = resolve_tsv_array(values[i])
122
+ end
123
+ end
124
+ info
125
+ end
126
+
127
+ def self.load_tsv_values(id, values, *fields)
128
+ fields = fields.flatten
129
+ literal_pos = fields.index "literal"
130
+
131
+ object = case
132
+ when literal_pos
133
+ values[literal_pos].tap{|o| o.force_encoding(Encoding.default_external)}
134
+ else
135
+ id.dup
136
+ end
137
+
138
+ object = resolve_tsv_array(object)
139
+
140
+ if Array === values.first
141
+ NamedArray.zip_fields(values).collect do |v|
142
+ info = load_info(fields, v)
143
+ end
144
+ else
145
+ info = load_info(fields, values)
146
+ end
147
+
148
+ self.setup(object, info[:annotation_types], info)
149
+
150
+ object.extend AnnotatedArray if Array === object
151
+
152
+ object
153
+ end
154
+
155
+ def self.load_tsv(tsv)
156
+ tsv.with_unnamed do
157
+ annotated_objects = tsv.collect do |id, values|
158
+ Annotation.load_tsv_values(id, values, tsv.fields)
159
+ end
160
+
161
+ case tsv.key_field
162
+ when "List"
163
+ annotated_objects.first
164
+ else
165
+ annotated_objects
166
+ end
167
+ end
168
+ end
169
+ end
@@ -1,7 +1,11 @@
1
1
  module TSV
2
2
 
3
3
  def self.match_keys(source, other, match_key: nil, other_key: nil)
4
- match_key = (source.all_fields & other.all_fields).first if match_key.nil?
4
+ #match_key = (source.all_fields & other.all_fields).first if match_key.nil?
5
+ if match_key.nil?
6
+ match_key_pos = NamedArray.identify_name(source.all_fields, other.all_fields).first
7
+ match_key = source.all_fields[match_key_pos] if match_key_pos
8
+ end
5
9
 
6
10
  if match_key.nil?
7
11
  source.all_fields.collect do |f|
@@ -32,20 +36,26 @@ module TSV
32
36
 
33
37
  other_key = other.key_field if other_key.nil?
34
38
 
35
- match_key = :key if match_key == source.key_field
36
- other_key = :key if other_key == other.key_field
39
+ match_key = :key if NamedArray.field_match(match_key, source.key_field)
40
+ other_key = :key if NamedArray.field_match(other_key, other.key_field)
37
41
 
38
42
  [match_key, other_key]
39
43
  end
40
44
 
41
- def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
45
+ def self.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
42
46
  source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
43
- other = TSV.open other, persist: persist_input unless TSV === other
47
+ other = TSV::Parser.new other unless TSV === other || TSV::Parser === other
44
48
 
45
49
  fields = [fields] if String === fields
46
50
 
47
51
  match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key)
48
52
 
53
+ if ! (TSV === other)
54
+ other_key_name = other_key == :key ? other.key_field : other.fields[other_key]
55
+ other = TSV.open other, key_field: other_key_name, fields: fields, one2one: true, persist: persist_input
56
+ other_key = :key if other.key_field == source.key_field
57
+ end
58
+
49
59
  if TSV::Transformer === source
50
60
  source.dumper = case target
51
61
  when :stream
@@ -64,11 +74,24 @@ module TSV
64
74
  other_key_name = other.fields[other_key_name] if Integer === other_key
65
75
  fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
66
76
 
77
+ match_key_name = match_key == :key ? source.key_field : match_key_name
78
+
79
+ if index.nil? && ! source.identify_field(other_key_name)
80
+ identifier_files = []
81
+ identifier_files << identifiers if identifiers
82
+ identifier_files << source
83
+ identifier_files << TSV.identifier_files(source)
84
+ identifier_files << TSV.identifier_files(other)
85
+ identifier_files << other
86
+
87
+ index = TSV.translation_index(identifier_files.flatten, match_key_name, other_key_name)
88
+ end
89
+
67
90
  if other_key != :key
68
- other = other.reorder other_key, fields, one2one: one2one
91
+ other = other.reorder other_key, fields, one2one: one2one, merge: true, type: :double
69
92
  end
70
93
 
71
- other_field_positions = other.identify_field(fields)
94
+ other_field_positions = other.identify_field(fields.dup)
72
95
 
73
96
  log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
74
97
  Log.debug log_message
@@ -79,8 +102,11 @@ module TSV
79
102
  source.fields = (source.fields + fields).uniq
80
103
 
81
104
  overlaps = source.identify_field(fields)
105
+ orig_type = source.type
106
+
107
+ type = source.type == :single ? :list : source.type
82
108
 
83
- empty_other_values = case source.type
109
+ empty_other_values = case type
84
110
  when :list
85
111
  [nil] * other.fields.length
86
112
  when :flat
@@ -89,20 +115,28 @@ module TSV
89
115
  [[]] * other.fields.length
90
116
  end
91
117
 
118
+ empty_other_values = nil if other.type == :single
119
+
92
120
  insitu = TSV === source ? true : false if insitu.nil?
121
+ insitu = false if source.type == :single
93
122
 
94
123
  match_key_pos = source.identify_field(match_key)
95
124
  source.traverse bar: bar, unnamed: true do |orig_key,current_values|
125
+ current_values = [current_values] if source.type == :single
126
+
96
127
  keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
97
- keys = [keys] unless Array === keys
128
+ keys = [keys].compact unless Array === keys
129
+
130
+ keys = index.chunked_values_at(keys).flatten if index
98
131
 
99
132
  current_values = current_values.dup unless insitu
133
+ keys = [nil] if keys.empty?
100
134
  keys.each do |current_key|
101
- other_values = other[current_key]
135
+ other_values = current_key.nil? ? empty_other_values : other[current_key]
102
136
 
103
137
  if other_values.nil?
104
138
  other_values = empty_other_values
105
- elsif other.type == :flat
139
+ elsif other.type == :flat
106
140
  other_values = [other_values]
107
141
  elsif other.type == :list && source.type == :double
108
142
  other_values = other_values.collect{|v| [v] }
@@ -110,13 +144,25 @@ module TSV
110
144
  other_values = other_values.collect{|v| v.first }
111
145
  end
112
146
 
113
- other_values = other_values.values_at *other_field_positions
147
+ other_values = other_field_positions.collect do |pos|
148
+ if pos == :key
149
+ current_key
150
+ else
151
+ other.type == :single ? other_values : other_values[pos]
152
+ end
153
+ end
114
154
 
115
155
  other_values.zip(overlaps).each do |v,overlap|
116
- if source.type == :list
156
+ if type == :list
117
157
  current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
158
+ elsif type == :flat
159
+ next if v.nil?
160
+ v = [v] unless Array === v
161
+ current_values.concat v
118
162
  else
119
163
  current_values[overlap] ||= []
164
+ next if v.nil?
165
+ v = [v] unless Array === v
120
166
  current_values[overlap].concat (v - current_values[overlap])
121
167
  end
122
168
  end
@@ -126,7 +172,7 @@ module TSV
126
172
  end
127
173
 
128
174
  if complete && match_key == :key
129
- empty_self_values = case source.type
175
+ empty_self_values = case type
130
176
  when :list
131
177
  [nil] * source.fields.length
132
178
  when :flat
@@ -136,15 +182,17 @@ module TSV
136
182
  end
137
183
  other.each do |other_key,other_values|
138
184
  next if source.include?(other_key)
139
- if other.type == :flat
185
+ if other.type == :flat
140
186
  other_values = [other_values]
141
- elsif other.type == :list && source.type == :double
187
+ elsif other.type == :single
188
+ other_values = [other_values]
189
+ elsif other.type == :list && type == :double
142
190
  other_values = other_values.collect{|v| [v] }
143
- elsif other.type == :double && source.type == :list
191
+ elsif other.type == :double && type == :list
144
192
  other_values = other_values.collect{|v| v.first }
145
193
  end
146
194
 
147
- new_values = case source.type
195
+ new_values = case type
148
196
  when :list
149
197
  [nil] * source.fields.length
150
198
  when :flat
@@ -154,17 +202,20 @@ module TSV
154
202
  end
155
203
 
156
204
  other_values.zip(overlaps).each do |v,overlap|
157
- if false && overlap == :key
205
+ next if v.nil?
206
+ if overlap == :key
158
207
  other_key = Array === v ? v : v.first
159
- elsif source.type == :list
208
+ elsif type == :list
160
209
  new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
161
210
  else
211
+ v = [v] unless Array === v
162
212
  new_values[overlap].concat v
163
213
  end
164
214
  end
165
215
  source[other_key] = new_values
166
216
  end
167
217
  end
218
+ source.type = type
168
219
  end
169
220
  end
170
221
 
@@ -174,4 +225,37 @@ module TSV
174
225
  def attach(*args, **kwargs)
175
226
  TSV.attach(self, *args, **kwargs)
176
227
  end
228
+
229
+ def identifier_files
230
+ case
231
+ when (identifiers and TSV === identifiers)
232
+ [identifiers]
233
+ when (identifiers and Array === identifiers)
234
+ case
235
+ when (TSV === identifiers.first or identifiers.empty?)
236
+ identifiers
237
+ else
238
+ identifiers.collect{|f| Path === f ? f : Path.setup(f)}
239
+ end
240
+ when identifiers
241
+ [ Path === identifiers ? identifiers : Path.setup(identifiers) ]
242
+ when Path === filename
243
+ path_files = filename.dirname.identifiers
244
+ [path_files].flatten.compact.select{|f| f.exists?}
245
+ when filename
246
+ [Path.setup(filename.dup).dirname.identifiers]
247
+ else
248
+ []
249
+ end
250
+ end
251
+
252
+ def self.identifier_files(obj)
253
+ if TSV === obj
254
+ obj.identifier_files
255
+ elsif Path === obj
256
+ obj.dirname.identifiers
257
+ else
258
+ nil
259
+ end
260
+ end
177
261
  end
@@ -0,0 +1,148 @@
1
+ module TSV
2
+
3
+ def self.identify_field_in_obj(obj, field)
4
+ case obj
5
+ when TSV
6
+ obj.identify_field(field)
7
+ when TSV::Parser, TSV::Dumper
8
+ TSV.identify_field(obj.key_field, obj.fields, field)
9
+ when Path, String
10
+ all_fields = TSV.parse_header(obj)["all_fields"]
11
+ identify_field_in_obj(all_fields, field)
12
+ when Array
13
+ key_field, *fields = obj
14
+ TSV.identify_field(key_field, fields, field)
15
+ end
16
+ end
17
+
18
+ def self.translation_path(file_fields, source, target)
19
+ target_files = file_fields.select{|f,fields| identify_field_in_obj(fields, target) }.collect{|file,f| file }
20
+ if source.nil?
21
+ source_files = file_fields.keys
22
+ else
23
+ source_files = file_fields.select{|f,fields| identify_field_in_obj(fields, source) }.collect{|file,f| file }
24
+ end
25
+
26
+ if source && (one_step = target_files & source_files).any?
27
+ [one_step.first]
28
+ else
29
+ source_fields = file_fields.values_at(*source_files).flatten
30
+ target_fields = file_fields.values_at(*target_files).flatten
31
+ if (common_fields = source_fields & target_fields).any?
32
+ source_file = source_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
33
+ target_file = target_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
34
+ [source_file, target_file]
35
+ else
36
+ file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }
37
+ middle_file, middle_fields = file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }.first
38
+ if middle_file
39
+ source_file = source_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
40
+ target_file = target_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
41
+ [source_file, middle_file, target_file]
42
+ else
43
+ raise "Could not traverse identifier path from #{Log.fingerprint source} to #{Log.fingerprint target} in #{Log.fingerprint file_fields}"
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ def self.translation_index(files, source, target, persist_options = {})
50
+ return nil if source == target
51
+ persist_options = IndiferentHash.add_defaults persist_options.dup, :persist => true, :prefix => "Translation index"
52
+
53
+ file_fields = {}
54
+
55
+ files = [files] unless Array === files
56
+
57
+ files.each do |file|
58
+ next if Path === file && ! Open.exist?(file)
59
+ file = file.find if Path === file
60
+ file_fields[file] = all_fields(file)
61
+ end
62
+
63
+ begin
64
+ path = translation_path(file_fields, source, target)
65
+ rescue
66
+ exception = $!
67
+ begin
68
+ path = translation_path(file_fields, source, target)
69
+ rescue
70
+ raise exception
71
+ end
72
+ end
73
+
74
+ name = [source || "all", target] * "->" + " (#{files.length} files - #{Misc.digest(files)})"
75
+ second_target = if path.length == 1
76
+ target
77
+ else
78
+ file1, file2 = path.values_at 0, 1
79
+ pos = NamedArray.identify_name(TSV.all_fields(file1), TSV.all_fields(file2))
80
+ TSV.all_fields(file1)[pos.compact.first]
81
+ end
82
+ Persist.persist(name, "HDB", persist_options) do
83
+ index = path.inject(nil) do |acc,file|
84
+ if acc.nil?
85
+ if source.nil?
86
+ if TSV === file
87
+ acc = file.index target: second_target
88
+ else
89
+ acc = TSV.index(file, target: second_target)
90
+ end
91
+ else
92
+ if TSV === file
93
+ acc = (file.key_field == source || source.nil?) ? file.annotate(file.dup) : file.reorder(source)
94
+ else
95
+ acc = TSV.open(file, key_field: source)
96
+ end
97
+ end
98
+ else
99
+ acc = acc.attach file, insitu: false
100
+ end
101
+
102
+ acc
103
+ end
104
+ index.slice([target]).to_single
105
+ end
106
+ end
107
+
108
+ def self.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true)
109
+
110
+ identifiers ||= tsv.identifier_files
111
+ index = translation_index([tsv, identifiers].flatten.compact, field, format, persist: persist_index)
112
+
113
+ key_field, *fields = TSV.all_fields(tsv)
114
+ if field == key_field
115
+ new_key_field = format
116
+ new_fields = fields
117
+ else
118
+ new_key_field = key_field
119
+ new_fields = fields.collect{|f| f == field ? format : f }
120
+ end
121
+
122
+ field_pos = new_key_field == key_field ? new_fields.index(format) : :key
123
+
124
+ transformer = TSV::Transformer.new tsv
125
+ transformer.key_field = new_key_field
126
+ transformer.fields = new_fields
127
+ transformer.traverse one2one: one2one, unnamed: true do |k,v|
128
+ if field_pos == :key
129
+ [index[k], v]
130
+ else
131
+ v = v.dup
132
+ if Array === v[field_pos]
133
+ v[field_pos] = index.values_at(*v[field_pos]).compact
134
+ else
135
+ v[field_pos] = index[v[field_pos]]
136
+ end
137
+ [k, v]
138
+ end
139
+ end
140
+
141
+ stream ? transformer : transformer.tsv(merge: merge, one2one: one2one)
142
+ end
143
+
144
+ def translate(*args, **kwargs)
145
+ TSV.translate(self, *args, **kwargs)
146
+ end
147
+
148
+ end
@@ -1,10 +1,13 @@
1
+ require_relative 'change_id/translate'
2
+
1
3
  module TSV
2
- def self.change_key(source, new_key_field, identifiers: nil, one2one: false, stream: false, keep: false, persist_identifiers: nil)
4
+ def self.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil)
3
5
  source = TSV::Parser.new source if String === source
6
+ identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
4
7
  if identifiers && source.identify_field(new_key_field, strict: true).nil?
5
8
  identifiers = identifiers.nil? ? source.identifiers : identifiers
6
9
  new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
7
- new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one)
10
+ new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge)
8
11
  return new
9
12
  end
10
13
 
@@ -17,7 +20,7 @@ module TSV
17
20
  [k, v]
18
21
  end
19
22
 
20
- stream ? transformer : transformer.tsv
23
+ stream ? transformer : transformer.tsv(merge: merge, one2one: one2one)
21
24
  end
22
25
 
23
26
  def change_key(*args, **kwargs)
@@ -0,0 +1,85 @@
1
+ require 'csv'
2
+
3
+ module TSV
4
+ def self.csv(obj, options = {})
5
+ options = IndiferentHash.add_defaults options, :headers => true, :type => :list
6
+ headers = options[:headers]
7
+
8
+ noheaders = ! headers
9
+
10
+ type = options.delete :type
11
+ cast = options.delete :cast
12
+ merge = options.delete :merge
13
+ key_field = options.delete :key_field
14
+ fields = options.delete :fields
15
+
16
+ if key_field || fields
17
+ orig_type = type
18
+ type = :double
19
+ merge = true
20
+ end
21
+
22
+ options[:headers] = false
23
+
24
+ csv = case obj
25
+ when Path
26
+ CSV.read obj.find.open, **options
27
+ when String
28
+ if Open.remote?(obj)
29
+ CSV.read Open.open(obj), **options
30
+ elsif Path.is_filename?(obj)
31
+ CSV.read obj, **options
32
+ else
33
+ CSV.new obj, **options
34
+ end
35
+ else
36
+ CSV.new obj, **options
37
+ end
38
+
39
+ tsv = if noheaders
40
+ TSV.setup({}, :key_field => nil, :fields => nil, :type => type)
41
+ else
42
+ key, *csv_fields = csv.shift
43
+ TSV.setup({}, :key_field => key, :fields => csv_fields, :type => type)
44
+ end
45
+
46
+ csv.each_with_index do |row,i|
47
+ if noheaders
48
+ key, values = ["row-#{i}", row]
49
+ else
50
+ key, *values = row
51
+ end
52
+
53
+ if cast
54
+ values = values.collect{|v| v.send cast }
55
+ end
56
+
57
+ case type
58
+ when :double, :flat
59
+ tsv.zip_new(key, values)
60
+ when :single
61
+ tsv[key] = values.first
62
+ when :list
63
+ tsv[key] = values
64
+ end
65
+ end
66
+
67
+ if key_field || fields
68
+ tsv = tsv.reorder(key_field, fields, :one2one => true, :merge => true)
69
+ if tsv.type != orig_type
70
+ tsv = case orig_type
71
+ when :list
72
+ tsv.to_list
73
+ when :single
74
+ tsv.to_single
75
+ when :list
76
+ tsv.to_list
77
+ when :flat
78
+ tsv.to_flat
79
+ end
80
+ end
81
+ end
82
+
83
+ tsv
84
+ end
85
+ end