scout-gear 10.4.0 → 10.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +100 -656
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/scout +1 -3
- data/lib/scout/association/fields.rb +170 -0
- data/lib/scout/association/index.rb +229 -0
- data/lib/scout/association/item.rb +227 -0
- data/lib/scout/association/util.rb +7 -0
- data/lib/scout/association.rb +100 -0
- data/lib/scout/entity/format.rb +62 -0
- data/lib/scout/entity/identifiers.rb +111 -0
- data/lib/scout/entity/object.rb +20 -0
- data/lib/scout/entity/property.rb +165 -0
- data/lib/scout/entity.rb +40 -0
- data/lib/scout/offsite/step.rb +2 -2
- data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
- data/lib/scout/persist/engine/packed_index.rb +100 -0
- data/lib/scout/persist/engine/sharder.rb +219 -0
- data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
- data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
- data/lib/scout/persist/engine.rb +4 -0
- data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
- data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
- data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
- data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
- data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
- data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
- data/lib/scout/persist/tsv/adapter.rb +6 -0
- data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
- data/lib/scout/persist/tsv.rb +107 -0
- data/lib/scout/tsv/annotation/repo.rb +83 -0
- data/lib/scout/tsv/annotation.rb +169 -0
- data/lib/scout/tsv/attach.rb +95 -19
- data/lib/scout/tsv/change_id/translate.rb +148 -0
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/csv.rb +85 -0
- data/lib/scout/tsv/dumper.rb +113 -25
- data/lib/scout/tsv/entity.rb +5 -0
- data/lib/scout/tsv/index.rb +88 -36
- data/lib/scout/tsv/open.rb +21 -8
- data/lib/scout/tsv/parser.rb +153 -90
- data/lib/scout/tsv/path.rb +7 -2
- data/lib/scout/tsv/stream.rb +48 -6
- data/lib/scout/tsv/transformer.rb +4 -3
- data/lib/scout/tsv/traverse.rb +26 -18
- data/lib/scout/tsv/util/process.rb +7 -0
- data/lib/scout/tsv/util/reorder.rb +25 -15
- data/lib/scout/tsv/util/select.rb +9 -1
- data/lib/scout/tsv/util/sort.rb +90 -2
- data/lib/scout/tsv/util/unzip.rb +56 -0
- data/lib/scout/tsv/util.rb +52 -5
- data/lib/scout/tsv.rb +45 -27
- data/lib/scout/work_queue/socket.rb +8 -0
- data/lib/scout/work_queue/worker.rb +22 -5
- data/lib/scout/work_queue.rb +38 -24
- data/lib/scout/workflow/definition.rb +11 -10
- data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
- data/lib/scout/workflow/deployment/trace.rb +205 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/documentation.rb +1 -1
- data/lib/scout/workflow/step/archive.rb +42 -0
- data/lib/scout/workflow/step/children.rb +51 -0
- data/lib/scout/workflow/step/config.rb +1 -1
- data/lib/scout/workflow/step/dependencies.rb +24 -7
- data/lib/scout/workflow/step/file.rb +19 -0
- data/lib/scout/workflow/step/info.rb +37 -9
- data/lib/scout/workflow/step/progress.rb +11 -2
- data/lib/scout/workflow/step/status.rb +8 -1
- data/lib/scout/workflow/step.rb +80 -25
- data/lib/scout/workflow/task/dependencies.rb +4 -1
- data/lib/scout/workflow/task/inputs.rb +91 -41
- data/lib/scout/workflow/task.rb +54 -57
- data/lib/scout/workflow/usage.rb +1 -1
- data/lib/scout/workflow/util.rb +4 -0
- data/lib/scout/workflow.rb +110 -13
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +0 -1
- data/scout-gear.gemspec +80 -23
- data/scout_commands/rbbt +2 -0
- data/test/data/person/brothers +4 -0
- data/test/data/person/identifiers +10 -0
- data/test/data/person/marriages +3 -0
- data/test/data/person/parents +6 -0
- data/test/scout/association/test_fields.rb +105 -0
- data/test/scout/association/test_index.rb +70 -0
- data/test/scout/association/test_item.rb +21 -0
- data/test/scout/entity/test_format.rb +19 -0
- data/test/scout/entity/test_identifiers.rb +58 -0
- data/test/scout/entity/test_object.rb +0 -0
- data/test/scout/entity/test_property.rb +345 -0
- data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
- data/test/scout/persist/engine/test_packed_index.rb +99 -0
- data/test/scout/persist/engine/test_sharder.rb +31 -0
- data/test/scout/persist/engine/test_tkrzw.rb +0 -0
- data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
- data/test/scout/persist/test_tsv.rb +146 -0
- data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
- data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
- data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
- data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
- data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
- data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
- data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
- data/test/scout/persist/tsv/test_serialize.rb +12 -0
- data/test/scout/test_association.rb +51 -0
- data/test/scout/test_entity.rb +40 -0
- data/test/scout/test_tsv.rb +33 -4
- data/test/scout/test_work_queue.rb +3 -2
- data/test/scout/test_workflow.rb +16 -15
- data/test/scout/tsv/annotation/test_repo.rb +150 -0
- data/test/scout/tsv/change_id/test_translate.rb +178 -0
- data/test/scout/tsv/test_annotation.rb +52 -0
- data/test/scout/tsv/test_attach.rb +226 -1
- data/test/scout/tsv/test_change_id.rb +25 -0
- data/test/scout/tsv/test_csv.rb +50 -0
- data/test/scout/tsv/test_dumper.rb +38 -0
- data/test/scout/tsv/test_entity.rb +0 -0
- data/test/scout/tsv/test_index.rb +82 -0
- data/test/scout/tsv/test_open.rb +44 -0
- data/test/scout/tsv/test_parser.rb +70 -0
- data/test/scout/tsv/test_stream.rb +22 -0
- data/test/scout/tsv/test_transformer.rb +27 -3
- data/test/scout/tsv/test_traverse.rb +78 -0
- data/test/scout/tsv/util/test_process.rb +16 -0
- data/test/scout/tsv/util/test_reorder.rb +67 -0
- data/test/scout/tsv/util/test_sort.rb +28 -1
- data/test/scout/tsv/util/test_unzip.rb +32 -0
- data/test/scout/work_queue/test_socket.rb +4 -1
- data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
- data/test/scout/workflow/deployment/test_trace.rb +25 -0
- data/test/scout/workflow/step/test_archive.rb +28 -0
- data/test/scout/workflow/step/test_children.rb +25 -0
- data/test/scout/workflow/step/test_info.rb +16 -0
- data/test/scout/workflow/task/test_dependencies.rb +16 -16
- data/test/scout/workflow/task/test_inputs.rb +45 -1
- data/test/scout/workflow/test_definition.rb +52 -0
- data/test/scout/workflow/test_step.rb +57 -0
- data/test/scout/workflow/test_task.rb +26 -1
- data/test/scout/workflow/test_usage.rb +4 -4
- data/test/test_helper.rb +23 -1
- metadata +71 -14
- data/lib/scout/tsv/persist.rb +0 -27
- data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
- data/test/scout/tsv/test_persist.rb +0 -45
@@ -0,0 +1,169 @@
|
|
1
|
+
require_relative 'annotation/repo'
|
2
|
+
module Annotation
|
3
|
+
|
4
|
+
def self.obj_tsv_values(obj, fields)
|
5
|
+
|
6
|
+
annotation_info = obj.annotation_info
|
7
|
+
annotation_info[:annotated_array] = true if AnnotatedArray === obj
|
8
|
+
|
9
|
+
fields.collect do |field|
|
10
|
+
field = field.to_s if Symbol === field
|
11
|
+
case field
|
12
|
+
when Proc
|
13
|
+
field.call(obj)
|
14
|
+
|
15
|
+
when "JSON"
|
16
|
+
annotation_info.to_json
|
17
|
+
|
18
|
+
when "annotation_types"
|
19
|
+
annotation_info[:annotation_types].collect{|t| t.to_s} * "|"
|
20
|
+
|
21
|
+
when "annotated_array"
|
22
|
+
AnnotatedArray === obj
|
23
|
+
|
24
|
+
when "literal"
|
25
|
+
(Array === obj ? "Array:" << obj * "|" : obj).gsub(/\n|\t/, ' ')
|
26
|
+
|
27
|
+
else
|
28
|
+
if annotation_info.include?(field.to_sym)
|
29
|
+
res = annotation_info[field.to_sym]
|
30
|
+
Array === res ? "Array:" << res * "|" : res
|
31
|
+
elsif self.respond_to?(field)
|
32
|
+
res = self.send(field)
|
33
|
+
Array === res ? "Array:"<< res * "|" : res
|
34
|
+
else
|
35
|
+
raise
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.list_tsv_values(objs, fields)
|
42
|
+
obj_tsv_values(objs, fields)
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
def self.tsv(objs, *fields)
|
47
|
+
return nil if objs.nil?
|
48
|
+
|
49
|
+
fields = fields.flatten.compact.uniq
|
50
|
+
|
51
|
+
annotations = if Annotation.is_annotated?(objs)
|
52
|
+
objs.annotations
|
53
|
+
elsif (Array === objs && objs.any?)
|
54
|
+
first = objs.compact.first
|
55
|
+
if Annotation.is_annotated?(first)
|
56
|
+
objs.compact.first.annotations
|
57
|
+
else
|
58
|
+
raise "Objects didn't have annotations"
|
59
|
+
end
|
60
|
+
else
|
61
|
+
[]
|
62
|
+
end
|
63
|
+
|
64
|
+
if fields.empty?
|
65
|
+
fields = annotations + [:annotation_types]
|
66
|
+
elsif fields == ["all"] || fields == [:all]
|
67
|
+
fields = annotations + [:annotation_types, :literal]
|
68
|
+
end
|
69
|
+
|
70
|
+
fields = fields.collect{|f| Symbol === f ? f.to_s : f }
|
71
|
+
|
72
|
+
tsv = TSV.setup({}, :key_field => nil, :fields => fields, :type => :list, :unnamed => true)
|
73
|
+
|
74
|
+
case
|
75
|
+
when Annotation.is_annotated?(objs)
|
76
|
+
tsv.key_field = "List"
|
77
|
+
|
78
|
+
tsv[objs.annotation_id] = self.list_tsv_values(objs, fields).dup
|
79
|
+
when Array === objs
|
80
|
+
tsv.key_field = "ID"
|
81
|
+
|
82
|
+
if Annotation.is_annotated?(objs.compact.first)
|
83
|
+
objs.compact.each_with_index do |obj,i|
|
84
|
+
tsv[obj.annotation_id + "#" << i.to_s] = self.obj_tsv_values(obj, fields).dup
|
85
|
+
end
|
86
|
+
elsif (objs.any? && Annotation.is_annotated?(objs.compact.first.compact.first))
|
87
|
+
objs.flatten.compact.each_with_index do |obj,i|
|
88
|
+
tsv[obj.annotation_id + "#" << i.to_s] = self.obj_tsv_values(obj, fields).dup
|
89
|
+
end
|
90
|
+
end
|
91
|
+
else
|
92
|
+
raise "Annotations need to be an Array to create TSV"
|
93
|
+
end
|
94
|
+
|
95
|
+
tsv
|
96
|
+
end
|
97
|
+
|
98
|
+
# Load TSV
|
99
|
+
|
100
|
+
def self.resolve_tsv_array(entry)
|
101
|
+
if String === entry && entry =~ /^Array:/
|
102
|
+
entry["Array:".length..-1].split("|")
|
103
|
+
else
|
104
|
+
entry
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.load_info(fields, values)
|
109
|
+
info = {}
|
110
|
+
fields.each_with_index do |field,i|
|
111
|
+
next if field == "literal"
|
112
|
+
|
113
|
+
case field
|
114
|
+
when "JSON"
|
115
|
+
JSON.parse(values[i]).each do |key, value|
|
116
|
+
info[key.to_sym] = value
|
117
|
+
end
|
118
|
+
when nil
|
119
|
+
next
|
120
|
+
else
|
121
|
+
info[field.to_sym] = resolve_tsv_array(values[i])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
info
|
125
|
+
end
|
126
|
+
|
127
|
+
def self.load_tsv_values(id, values, *fields)
|
128
|
+
fields = fields.flatten
|
129
|
+
literal_pos = fields.index "literal"
|
130
|
+
|
131
|
+
object = case
|
132
|
+
when literal_pos
|
133
|
+
values[literal_pos].tap{|o| o.force_encoding(Encoding.default_external)}
|
134
|
+
else
|
135
|
+
id.dup
|
136
|
+
end
|
137
|
+
|
138
|
+
object = resolve_tsv_array(object)
|
139
|
+
|
140
|
+
if Array === values.first
|
141
|
+
NamedArray.zip_fields(values).collect do |v|
|
142
|
+
info = load_info(fields, v)
|
143
|
+
end
|
144
|
+
else
|
145
|
+
info = load_info(fields, values)
|
146
|
+
end
|
147
|
+
|
148
|
+
self.setup(object, info[:annotation_types], info)
|
149
|
+
|
150
|
+
object.extend AnnotatedArray if Array === object
|
151
|
+
|
152
|
+
object
|
153
|
+
end
|
154
|
+
|
155
|
+
def self.load_tsv(tsv)
|
156
|
+
tsv.with_unnamed do
|
157
|
+
annotated_objects = tsv.collect do |id, values|
|
158
|
+
Annotation.load_tsv_values(id, values, tsv.fields)
|
159
|
+
end
|
160
|
+
|
161
|
+
case tsv.key_field
|
162
|
+
when "List"
|
163
|
+
annotated_objects.first
|
164
|
+
else
|
165
|
+
annotated_objects
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
data/lib/scout/tsv/attach.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
module TSV
|
2
2
|
|
3
3
|
def self.match_keys(source, other, match_key: nil, other_key: nil)
|
4
|
-
match_key = (source.all_fields & other.all_fields).first if match_key.nil?
|
4
|
+
#match_key = (source.all_fields & other.all_fields).first if match_key.nil?
|
5
|
+
if match_key.nil?
|
6
|
+
match_key_pos = NamedArray.identify_name(source.all_fields, other.all_fields).first
|
7
|
+
match_key = source.all_fields[match_key_pos] if match_key_pos
|
8
|
+
end
|
5
9
|
|
6
10
|
if match_key.nil?
|
7
11
|
source.all_fields.collect do |f|
|
@@ -32,13 +36,13 @@ module TSV
|
|
32
36
|
|
33
37
|
other_key = other.key_field if other_key.nil?
|
34
38
|
|
35
|
-
match_key = :key if match_key
|
36
|
-
other_key = :key if other_key
|
39
|
+
match_key = :key if NamedArray.field_match(match_key, source.key_field)
|
40
|
+
other_key = :key if NamedArray.field_match(other_key, other.key_field)
|
37
41
|
|
38
42
|
[match_key, other_key]
|
39
43
|
end
|
40
44
|
|
41
|
-
def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
|
45
|
+
def self.attach(source, other, target: nil, fields: nil, index: nil, identifiers: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
|
42
46
|
source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
|
43
47
|
other = TSV::Parser.new other unless TSV === other || TSV::Parser === other
|
44
48
|
|
@@ -70,11 +74,24 @@ module TSV
|
|
70
74
|
other_key_name = other.fields[other_key_name] if Integer === other_key
|
71
75
|
fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
|
72
76
|
|
77
|
+
match_key_name = match_key == :key ? source.key_field : match_key_name
|
78
|
+
|
79
|
+
if index.nil? && ! source.identify_field(other_key_name)
|
80
|
+
identifier_files = []
|
81
|
+
identifier_files << identifiers if identifiers
|
82
|
+
identifier_files << source
|
83
|
+
identifier_files << TSV.identifier_files(source)
|
84
|
+
identifier_files << TSV.identifier_files(other)
|
85
|
+
identifier_files << other
|
86
|
+
|
87
|
+
index = TSV.translation_index(identifier_files.flatten, match_key_name, other_key_name)
|
88
|
+
end
|
89
|
+
|
73
90
|
if other_key != :key
|
74
|
-
other = other.reorder other_key, fields, one2one: one2one
|
91
|
+
other = other.reorder other_key, fields, one2one: one2one, merge: true, type: :double
|
75
92
|
end
|
76
93
|
|
77
|
-
other_field_positions = other.identify_field(fields)
|
94
|
+
other_field_positions = other.identify_field(fields.dup)
|
78
95
|
|
79
96
|
log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
|
80
97
|
Log.debug log_message
|
@@ -85,8 +102,11 @@ module TSV
|
|
85
102
|
source.fields = (source.fields + fields).uniq
|
86
103
|
|
87
104
|
overlaps = source.identify_field(fields)
|
105
|
+
orig_type = source.type
|
88
106
|
|
89
|
-
|
107
|
+
type = source.type == :single ? :list : source.type
|
108
|
+
|
109
|
+
empty_other_values = case type
|
90
110
|
when :list
|
91
111
|
[nil] * other.fields.length
|
92
112
|
when :flat
|
@@ -95,20 +115,28 @@ module TSV
|
|
95
115
|
[[]] * other.fields.length
|
96
116
|
end
|
97
117
|
|
118
|
+
empty_other_values = nil if other.type == :single
|
119
|
+
|
98
120
|
insitu = TSV === source ? true : false if insitu.nil?
|
121
|
+
insitu = false if source.type == :single
|
99
122
|
|
100
123
|
match_key_pos = source.identify_field(match_key)
|
101
124
|
source.traverse bar: bar, unnamed: true do |orig_key,current_values|
|
125
|
+
current_values = [current_values] if source.type == :single
|
126
|
+
|
102
127
|
keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
|
103
|
-
keys = [keys] unless Array === keys
|
128
|
+
keys = [keys].compact unless Array === keys
|
129
|
+
|
130
|
+
keys = index.chunked_values_at(keys).flatten if index
|
104
131
|
|
105
132
|
current_values = current_values.dup unless insitu
|
133
|
+
keys = [nil] if keys.empty?
|
106
134
|
keys.each do |current_key|
|
107
|
-
other_values = other[current_key]
|
135
|
+
other_values = current_key.nil? ? empty_other_values : other[current_key]
|
108
136
|
|
109
137
|
if other_values.nil?
|
110
138
|
other_values = empty_other_values
|
111
|
-
elsif other.type == :flat
|
139
|
+
elsif other.type == :flat
|
112
140
|
other_values = [other_values]
|
113
141
|
elsif other.type == :list && source.type == :double
|
114
142
|
other_values = other_values.collect{|v| [v] }
|
@@ -116,14 +144,25 @@ module TSV
|
|
116
144
|
other_values = other_values.collect{|v| v.first }
|
117
145
|
end
|
118
146
|
|
119
|
-
other_values =
|
147
|
+
other_values = other_field_positions.collect do |pos|
|
148
|
+
if pos == :key
|
149
|
+
current_key
|
150
|
+
else
|
151
|
+
other.type == :single ? other_values : other_values[pos]
|
152
|
+
end
|
153
|
+
end
|
120
154
|
|
121
155
|
other_values.zip(overlaps).each do |v,overlap|
|
122
|
-
if
|
156
|
+
if type == :list
|
123
157
|
current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
|
158
|
+
elsif type == :flat
|
159
|
+
next if v.nil?
|
160
|
+
v = [v] unless Array === v
|
161
|
+
current_values.concat v
|
124
162
|
else
|
125
163
|
current_values[overlap] ||= []
|
126
164
|
next if v.nil?
|
165
|
+
v = [v] unless Array === v
|
127
166
|
current_values[overlap].concat (v - current_values[overlap])
|
128
167
|
end
|
129
168
|
end
|
@@ -133,7 +172,7 @@ module TSV
|
|
133
172
|
end
|
134
173
|
|
135
174
|
if complete && match_key == :key
|
136
|
-
empty_self_values = case
|
175
|
+
empty_self_values = case type
|
137
176
|
when :list
|
138
177
|
[nil] * source.fields.length
|
139
178
|
when :flat
|
@@ -143,15 +182,17 @@ module TSV
|
|
143
182
|
end
|
144
183
|
other.each do |other_key,other_values|
|
145
184
|
next if source.include?(other_key)
|
146
|
-
if other.type == :flat
|
185
|
+
if other.type == :flat
|
147
186
|
other_values = [other_values]
|
148
|
-
elsif other.type == :
|
187
|
+
elsif other.type == :single
|
188
|
+
other_values = [other_values]
|
189
|
+
elsif other.type == :list && type == :double
|
149
190
|
other_values = other_values.collect{|v| [v] }
|
150
|
-
elsif other.type == :double &&
|
191
|
+
elsif other.type == :double && type == :list
|
151
192
|
other_values = other_values.collect{|v| v.first }
|
152
193
|
end
|
153
194
|
|
154
|
-
new_values = case
|
195
|
+
new_values = case type
|
155
196
|
when :list
|
156
197
|
[nil] * source.fields.length
|
157
198
|
when :flat
|
@@ -162,17 +203,19 @@ module TSV
|
|
162
203
|
|
163
204
|
other_values.zip(overlaps).each do |v,overlap|
|
164
205
|
next if v.nil?
|
165
|
-
if
|
206
|
+
if overlap == :key
|
166
207
|
other_key = Array === v ? v : v.first
|
167
|
-
elsif
|
208
|
+
elsif type == :list
|
168
209
|
new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
|
169
210
|
else
|
211
|
+
v = [v] unless Array === v
|
170
212
|
new_values[overlap].concat v
|
171
213
|
end
|
172
214
|
end
|
173
215
|
source[other_key] = new_values
|
174
216
|
end
|
175
217
|
end
|
218
|
+
source.type = type
|
176
219
|
end
|
177
220
|
end
|
178
221
|
|
@@ -182,4 +225,37 @@ module TSV
|
|
182
225
|
def attach(*args, **kwargs)
|
183
226
|
TSV.attach(self, *args, **kwargs)
|
184
227
|
end
|
228
|
+
|
229
|
+
def identifier_files
|
230
|
+
case
|
231
|
+
when (identifiers and TSV === identifiers)
|
232
|
+
[identifiers]
|
233
|
+
when (identifiers and Array === identifiers)
|
234
|
+
case
|
235
|
+
when (TSV === identifiers.first or identifiers.empty?)
|
236
|
+
identifiers
|
237
|
+
else
|
238
|
+
identifiers.collect{|f| Path === f ? f : Path.setup(f)}
|
239
|
+
end
|
240
|
+
when identifiers
|
241
|
+
[ Path === identifiers ? identifiers : Path.setup(identifiers) ]
|
242
|
+
when Path === filename
|
243
|
+
path_files = filename.dirname.identifiers
|
244
|
+
[path_files].flatten.compact.select{|f| f.exists?}
|
245
|
+
when filename
|
246
|
+
[Path.setup(filename.dup).dirname.identifiers]
|
247
|
+
else
|
248
|
+
[]
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def self.identifier_files(obj)
|
253
|
+
if TSV === obj
|
254
|
+
obj.identifier_files
|
255
|
+
elsif Path === obj
|
256
|
+
obj.dirname.identifiers
|
257
|
+
else
|
258
|
+
nil
|
259
|
+
end
|
260
|
+
end
|
185
261
|
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
module TSV
|
2
|
+
|
3
|
+
def self.identify_field_in_obj(obj, field)
|
4
|
+
case obj
|
5
|
+
when TSV
|
6
|
+
obj.identify_field(field)
|
7
|
+
when TSV::Parser, TSV::Dumper
|
8
|
+
TSV.identify_field(obj.key_field, obj.fields, field)
|
9
|
+
when Path, String
|
10
|
+
all_fields = TSV.parse_header(obj)["all_fields"]
|
11
|
+
identify_field_in_obj(all_fields, field)
|
12
|
+
when Array
|
13
|
+
key_field, *fields = obj
|
14
|
+
TSV.identify_field(key_field, fields, field)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.translation_path(file_fields, source, target)
|
19
|
+
target_files = file_fields.select{|f,fields| identify_field_in_obj(fields, target) }.collect{|file,f| file }
|
20
|
+
if source.nil?
|
21
|
+
source_files = file_fields.keys
|
22
|
+
else
|
23
|
+
source_files = file_fields.select{|f,fields| identify_field_in_obj(fields, source) }.collect{|file,f| file }
|
24
|
+
end
|
25
|
+
|
26
|
+
if source && (one_step = target_files & source_files).any?
|
27
|
+
[one_step.first]
|
28
|
+
else
|
29
|
+
source_fields = file_fields.values_at(*source_files).flatten
|
30
|
+
target_fields = file_fields.values_at(*target_files).flatten
|
31
|
+
if (common_fields = source_fields & target_fields).any?
|
32
|
+
source_file = source_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
|
33
|
+
target_file = target_files.select{|file| fields = file_fields[file]; (fields & common_fields).any? }.collect{|file,f| file }.first
|
34
|
+
[source_file, target_file]
|
35
|
+
else
|
36
|
+
file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }
|
37
|
+
middle_file, middle_fields = file_fields.select{|f,fields| (fields & source_fields).any? && (fields & target_fields).any? }.first
|
38
|
+
if middle_file
|
39
|
+
source_file = source_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
|
40
|
+
target_file = target_files.select{|file| fields = file_fields[file]; (fields & middle_fields).any? }.collect{|file,f| file }.first
|
41
|
+
[source_file, middle_file, target_file]
|
42
|
+
else
|
43
|
+
raise "Could not traverse identifier path from #{Log.fingerprint source} to #{Log.fingerprint target} in #{Log.fingerprint file_fields}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.translation_index(files, source, target, persist_options = {})
|
50
|
+
return nil if source == target
|
51
|
+
persist_options = IndiferentHash.add_defaults persist_options.dup, :persist => true, :prefix => "Translation index"
|
52
|
+
|
53
|
+
file_fields = {}
|
54
|
+
|
55
|
+
files = [files] unless Array === files
|
56
|
+
|
57
|
+
files.each do |file|
|
58
|
+
next if Path === file && ! Open.exist?(file)
|
59
|
+
file = file.find if Path === file
|
60
|
+
file_fields[file] = all_fields(file)
|
61
|
+
end
|
62
|
+
|
63
|
+
begin
|
64
|
+
path = translation_path(file_fields, source, target)
|
65
|
+
rescue
|
66
|
+
exception = $!
|
67
|
+
begin
|
68
|
+
path = translation_path(file_fields, source, target)
|
69
|
+
rescue
|
70
|
+
raise exception
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
name = [source || "all", target] * "->" + " (#{files.length} files - #{Misc.digest(files)})"
|
75
|
+
second_target = if path.length == 1
|
76
|
+
target
|
77
|
+
else
|
78
|
+
file1, file2 = path.values_at 0, 1
|
79
|
+
pos = NamedArray.identify_name(TSV.all_fields(file1), TSV.all_fields(file2))
|
80
|
+
TSV.all_fields(file1)[pos.compact.first]
|
81
|
+
end
|
82
|
+
Persist.persist(name, "HDB", persist_options) do
|
83
|
+
index = path.inject(nil) do |acc,file|
|
84
|
+
if acc.nil?
|
85
|
+
if source.nil?
|
86
|
+
if TSV === file
|
87
|
+
acc = file.index target: second_target
|
88
|
+
else
|
89
|
+
acc = TSV.index(file, target: second_target)
|
90
|
+
end
|
91
|
+
else
|
92
|
+
if TSV === file
|
93
|
+
acc = (file.key_field == source || source.nil?) ? file.annotate(file.dup) : file.reorder(source)
|
94
|
+
else
|
95
|
+
acc = TSV.open(file, key_field: source)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
acc = acc.attach file, insitu: false
|
100
|
+
end
|
101
|
+
|
102
|
+
acc
|
103
|
+
end
|
104
|
+
index.slice([target]).to_single
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.translate(tsv, field, format, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_index: true)
|
109
|
+
|
110
|
+
identifiers ||= tsv.identifier_files
|
111
|
+
index = translation_index([tsv, identifiers].flatten.compact, field, format, persist: persist_index)
|
112
|
+
|
113
|
+
key_field, *fields = TSV.all_fields(tsv)
|
114
|
+
if field == key_field
|
115
|
+
new_key_field = format
|
116
|
+
new_fields = fields
|
117
|
+
else
|
118
|
+
new_key_field = key_field
|
119
|
+
new_fields = fields.collect{|f| f == field ? format : f }
|
120
|
+
end
|
121
|
+
|
122
|
+
field_pos = new_key_field == key_field ? new_fields.index(format) : :key
|
123
|
+
|
124
|
+
transformer = TSV::Transformer.new tsv
|
125
|
+
transformer.key_field = new_key_field
|
126
|
+
transformer.fields = new_fields
|
127
|
+
transformer.traverse one2one: one2one, unnamed: true do |k,v|
|
128
|
+
if field_pos == :key
|
129
|
+
[index[k], v]
|
130
|
+
else
|
131
|
+
v = v.dup
|
132
|
+
if Array === v[field_pos]
|
133
|
+
v[field_pos] = index.values_at(*v[field_pos]).compact
|
134
|
+
else
|
135
|
+
v[field_pos] = index[v[field_pos]]
|
136
|
+
end
|
137
|
+
[k, v]
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
stream ? transformer : transformer.tsv(merge: merge, one2one: one2one)
|
142
|
+
end
|
143
|
+
|
144
|
+
def translate(*args, **kwargs)
|
145
|
+
TSV.translate(self, *args, **kwargs)
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
data/lib/scout/tsv/change_id.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
+
require_relative 'change_id/translate'
|
2
|
+
|
1
3
|
module TSV
|
2
4
|
def self.change_key(source, new_key_field, identifiers: nil, one2one: false, merge: true, stream: false, keep: false, persist_identifiers: nil)
|
3
5
|
source = TSV::Parser.new source if String === source
|
6
|
+
identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
|
4
7
|
if identifiers && source.identify_field(new_key_field, strict: true).nil?
|
5
8
|
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
6
9
|
new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module TSV
|
4
|
+
def self.csv(obj, options = {})
|
5
|
+
options = IndiferentHash.add_defaults options, :headers => true, :type => :list
|
6
|
+
headers = options[:headers]
|
7
|
+
|
8
|
+
noheaders = ! headers
|
9
|
+
|
10
|
+
type = options.delete :type
|
11
|
+
cast = options.delete :cast
|
12
|
+
merge = options.delete :merge
|
13
|
+
key_field = options.delete :key_field
|
14
|
+
fields = options.delete :fields
|
15
|
+
|
16
|
+
if key_field || fields
|
17
|
+
orig_type = type
|
18
|
+
type = :double
|
19
|
+
merge = true
|
20
|
+
end
|
21
|
+
|
22
|
+
options[:headers] = false
|
23
|
+
|
24
|
+
csv = case obj
|
25
|
+
when Path
|
26
|
+
CSV.read obj.find.open, **options
|
27
|
+
when String
|
28
|
+
if Open.remote?(obj)
|
29
|
+
CSV.read Open.open(obj), **options
|
30
|
+
elsif Path.is_filename?(obj)
|
31
|
+
CSV.read obj, **options
|
32
|
+
else
|
33
|
+
CSV.new obj, **options
|
34
|
+
end
|
35
|
+
else
|
36
|
+
CSV.new obj, **options
|
37
|
+
end
|
38
|
+
|
39
|
+
tsv = if noheaders
|
40
|
+
TSV.setup({}, :key_field => nil, :fields => nil, :type => type)
|
41
|
+
else
|
42
|
+
key, *csv_fields = csv.shift
|
43
|
+
TSV.setup({}, :key_field => key, :fields => csv_fields, :type => type)
|
44
|
+
end
|
45
|
+
|
46
|
+
csv.each_with_index do |row,i|
|
47
|
+
if noheaders
|
48
|
+
key, values = ["row-#{i}", row]
|
49
|
+
else
|
50
|
+
key, *values = row
|
51
|
+
end
|
52
|
+
|
53
|
+
if cast
|
54
|
+
values = values.collect{|v| v.send cast }
|
55
|
+
end
|
56
|
+
|
57
|
+
case type
|
58
|
+
when :double, :flat
|
59
|
+
tsv.zip_new(key, values)
|
60
|
+
when :single
|
61
|
+
tsv[key] = values.first
|
62
|
+
when :list
|
63
|
+
tsv[key] = values
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
if key_field || fields
|
68
|
+
tsv = tsv.reorder(key_field, fields, :one2one => true, :merge => true)
|
69
|
+
if tsv.type != orig_type
|
70
|
+
tsv = case orig_type
|
71
|
+
when :list
|
72
|
+
tsv.to_list
|
73
|
+
when :single
|
74
|
+
tsv.to_single
|
75
|
+
when :list
|
76
|
+
tsv.to_list
|
77
|
+
when :flat
|
78
|
+
tsv.to_flat
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
tsv
|
84
|
+
end
|
85
|
+
end
|