etna 0.1.27 → 0.1.32

Sign up to get free protection for your applications and to get access to all the features.
@@ -226,7 +226,7 @@ module Etna
226
226
  if renames && (attribute_renames = renames[model_name]) && (new_name = attribute_renames[attribute_name])
227
227
  new_name = target_attribute_of_source(model_name, new_name)
228
228
 
229
- unless target_model.template.attributes.include?(new_name)
229
+ unless target_model.template.attributes.attribute_keys.include?(new_name)
230
230
  if target_original_attribute
231
231
  rename = RenameAttributeAction.new(model_name: target_model_name, attribute_name: target_attribute_name, new_attribute_name: new_name)
232
232
  queue_update(rename)
@@ -37,8 +37,7 @@ module Etna
37
37
  end
38
38
 
39
39
  def update_attributes
40
- method = json_values ? :update_json : :update
41
- magma_crud.update_records(method: method) do |update_request|
40
+ magma_crud.update_records(method: :update_json) do |update_request|
42
41
  each_revision do |model_name, record_name, revision|
43
42
  update_request.update_revision(model_name, record_name, revision)
44
43
  end
@@ -53,10 +52,18 @@ module Etna
53
52
  end
54
53
 
55
54
  class RowBase
56
- def stripped_value(attribute_value)
55
+ def attribute_is_json?(attribute)
56
+ [Etna::Clients::Magma::AttributeType::FILE,
57
+ Etna::Clients::Magma::AttributeType::FILE_COLLECTION,
58
+ Etna::Clients::Magma::AttributeType::IMAGE].include?(attribute.attribute_type)
59
+ end
60
+
61
+ def stripped_value(attribute, attribute_value)
57
62
  attribute_value = attribute_value&.strip
58
63
 
59
- if attribute_value && @workflow.json_values && attribute_value != @workflow.hole_value
64
+ if attribute_value &&
65
+ ( @workflow.json_values || attribute_is_json?(attribute) ) &&
66
+ attribute_value != @workflow.hole_value
60
67
  attribute_value = JSON.parse(attribute_value)
61
68
  end
62
69
  attribute_value
@@ -123,7 +130,7 @@ module Etna
123
130
  raise "Invalid attribute #{attribute_name} for model #{model_name}."
124
131
  end
125
132
 
126
- stripped = stripped_value(@raw[index + 1])
133
+ stripped = stripped_value(attribute, @raw[index + 1])
127
134
  unless @workflow.hole_value.nil?
128
135
  next if stripped == @workflow.hole_value
129
136
  end
@@ -234,7 +241,13 @@ module Etna
234
241
  attribute_name_clean = attribute_name.strip
235
242
  raise "Invalid attribute \"#{attribute_name_clean}\" for model #{@model_name}." unless attribute = @workflow.find_attribute(@model_name, attribute_name_clean)
236
243
 
237
- attributes[attribute_name_clean] = stripped_value(@raw[attribute_name])
244
+ stripped = stripped_value(attribute, @raw[attribute_name])
245
+
246
+ unless @workflow.hole_value.nil?
247
+ next if stripped == @workflow.hole_value
248
+ end
249
+
250
+ attributes[attribute_name_clean] = stripped
238
251
  end
239
252
  end
240
253
  end
@@ -7,6 +7,31 @@ module Etna
7
7
  module Clients
8
8
  class Magma
9
9
  class WalkModelTreeWorkflow < Struct.new(:magma_crud, :logger, keyword_init: true)
10
+ def initialize(**args)
11
+ super(**({}.update(args)))
12
+ @template_for = {}
13
+ end
14
+
15
+ def masked_attributes(template:, model_attributes_mask:, model_name:)
16
+ attributes_mask = model_attributes_mask[model_name]
17
+ return ["all", "all"] if attributes_mask.nil?
18
+ [(attributes_mask + [template.identifier, 'parent']).uniq, attributes_mask]
19
+ end
20
+
21
+ def attribute_included?(mask, attribute_name)
22
+ return true if mask == "all"
23
+ mask.include?(attribute_name)
24
+ end
25
+
26
+ def template_for(model_name)
27
+ @template_for[model_name] ||= magma_crud.magma_client.retrieve(RetrievalRequest.new(
28
+ project_name: magma_crud.project_name,
29
+ model_name: model_name,
30
+ record_names: [],
31
+ attribute_names: [],
32
+ )).models.model(model_name).template
33
+ end
34
+
10
35
  def walk_from(
11
36
  model_name,
12
37
  record_names = 'all',
@@ -22,28 +47,30 @@ module Etna
22
47
  next if seen.include?([path[:from], model_name])
23
48
  seen.add([path[:from], model_name])
24
49
 
50
+ template = template_for(model_name)
51
+ query_attributes, walk_attributes = masked_attributes(template: template, model_attributes_mask: model_attributes_mask, model_name: model_name)
52
+
25
53
  request = RetrievalRequest.new(
26
54
  project_name: magma_crud.project_name,
27
55
  model_name: model_name,
28
56
  record_names: path[:record_names],
29
57
  filter: model_filters[model_name],
58
+ attribute_names: query_attributes,
30
59
  page_size: page_size, page: 1
31
60
  )
32
61
 
33
62
  related_models = {}
34
63
 
35
64
  magma_crud.page_records(model_name, request) do |response|
36
- model = response.models.model(model_name)
37
- template = model.template
38
-
39
65
  tables = []
40
66
  collections = []
41
67
  links = []
42
68
  attributes = []
43
69
 
70
+ model = response.models.model(model_name)
71
+
44
72
  template.attributes.attribute_keys.each do |attr_name|
45
- attributes_mask = model_attributes_mask[model_name]
46
- next if !attributes_mask.nil? && !attributes_mask.include?(attr_name) && attr_name != template.identifier
73
+ next unless attribute_included?(query_attributes, attr_name)
47
74
  attributes << attr_name
48
75
 
49
76
  attr = template.attributes.attribute(attr_name)
@@ -58,7 +85,7 @@ module Etna
58
85
  elsif attr.attribute_type == AttributeType::CHILD
59
86
  related_models[attr.link_model_name] ||= Set.new
60
87
  links << attr_name
61
- elsif attr.attribute_type == AttributeType::PARENT
88
+ elsif attr.attribute_type == AttributeType::PARENT && attribute_included?(walk_attributes, attr_name)
62
89
  related_models[attr.link_model_name] ||= Set.new
63
90
  links << attr_name
64
91
  end
@@ -69,6 +69,11 @@ module Etna
69
69
  @etna_client.folder_remove(delete_folder_request.to_h))
70
70
  end
71
71
 
72
+ def delete_file(delete_file_request)
73
+ FilesResponse.new(
74
+ @etna_client.file_remove(delete_file_request.to_h))
75
+ end
76
+
72
77
  def find(find_request)
73
78
  FoldersAndFilesResponse.new(
74
79
  @etna_client.bucket_find(find_request.to_h))
@@ -98,7 +103,7 @@ module Etna
98
103
  @etna_client.get(download_path) do |response|
99
104
  return {
100
105
  etag: response['ETag'].gsub(/"/, ''),
101
- size: response['Content-Length'],
106
+ size: response['Content-Length'].to_i,
102
107
  }
103
108
  end
104
109
  end
@@ -95,6 +95,21 @@ module Etna
95
95
  end
96
96
  end
97
97
 
98
+ class DeleteFileRequest < Struct.new(:project_name, :bucket_name, :file_path, keyword_init: true)
99
+ include JsonSerializableStruct
100
+
101
+ def initialize(**params)
102
+ super({}.update(params))
103
+ end
104
+
105
+ def to_h
106
+ # The :project_name comes in from Polyphemus as a symbol value,
107
+ # we need to make sure it's a string because it's going
108
+ # in the URL.
109
+ super().compact.transform_values(&:to_s)
110
+ end
111
+ end
112
+
98
113
  class FindRequest < Struct.new(:project_name, :bucket_name, :limit, :offset, :params, keyword_init: true)
99
114
  include JsonSerializableStruct
100
115
 
@@ -13,23 +13,27 @@ module Etna
13
13
  end
14
14
 
15
15
  # TODO: Might be possible to use range headers to select and resume downloads on failure in the future.
16
- def do_download(dest_file, metis_file, &block)
16
+ def do_download(dest_file_or_io, metis_file, &block)
17
17
  size = metis_file.size
18
18
  completed = 0.0
19
19
  start = Time.now
20
20
 
21
- ::File.open(dest_file, "w") do |io|
22
- metis_client.download_file(metis_file) do |chunk|
23
- io.write chunk
24
- completed += chunk.size
25
-
26
- block.call([
27
- :progress,
28
- size == 0 ? 1 : completed / size,
29
- (completed / (Time.now - start)).round(2),
30
- ]) unless block.nil?
21
+ unless dest_file_or_io.is_a?(IO)
22
+ ::File.open(dest_file_or_io, 'w') do |io|
23
+ return do_download(dest_file_or_io, metis_file, &block)
31
24
  end
32
25
  end
26
+
27
+ metis_client.download_file(metis_file) do |chunk|
28
+ dest_file_or_io.write chunk
29
+ completed += chunk.size
30
+
31
+ block.call([
32
+ :progress,
33
+ size == 0 ? 1 : completed / size,
34
+ (completed / (Time.now - start)).round(2),
35
+ ]) unless block.nil?
36
+ end
33
37
  end
34
38
  end
35
39
  end
@@ -4,17 +4,26 @@ require 'fileutils'
4
4
  require 'tempfile'
5
5
  require 'securerandom'
6
6
 
7
+ $digest_mutx = Mutex.new
8
+
7
9
  module Etna
8
10
  module Clients
9
11
  class Metis
10
12
  class MetisUploadWorkflow < Struct.new(:metis_client, :metis_uid, :project_name, :bucket_name, :max_attempts, keyword_init: true)
13
+ class StreamingUploadError < StandardError
14
+ end
15
+
11
16
 
12
17
  def initialize(args)
13
18
  super({max_attempts: 3, metis_uid: SecureRandom.hex}.update(args))
14
19
  end
15
20
 
16
- def do_upload(source_file, dest_path, &block)
17
- upload = Upload.new(source_file: source_file)
21
+ def do_upload(source_file_or_upload, dest_path, &block)
22
+ unless source_file_or_upload.is_a?(Upload)
23
+ upload = Upload.new(source_file: source_file_or_upload)
24
+ else
25
+ upload = source_file_or_upload
26
+ end
18
27
 
19
28
  dir = ::File.dirname(dest_path)
20
29
  metis_client.create_folder(CreateFolderRequest.new(
@@ -68,9 +77,11 @@ module Etna
68
77
 
69
78
  unsent_zero_byte_file = false
70
79
  rescue Etna::Error => e
71
- m = yield [:error, e] unless block.nil?
72
- if m == false
73
- raise e
80
+ unless block.nil?
81
+ m = yield [:error, e]
82
+ if m == false
83
+ raise e
84
+ end
74
85
  end
75
86
 
76
87
  if e.status == 422
@@ -90,13 +101,17 @@ module Etna
90
101
  end
91
102
  end
92
103
 
93
- class Upload < Struct.new(:source_file, :next_blob_size, :current_byte_position, keyword_init: true)
104
+ class Upload
94
105
  INITIAL_BLOB_SIZE = 2 ** 10
95
106
  MAX_BLOB_SIZE = 2 ** 22
96
107
  ZERO_HASH = 'd41d8cd98f00b204e9800998ecf8427e'
97
108
 
98
- def initialize(**args)
99
- super
109
+ attr_accessor :source_file, :next_blob_size, :current_byte_position
110
+
111
+ def initialize(source_file: nil, next_blob_size: nil, current_byte_position: nil)
112
+ self.source_file = source_file
113
+ self.next_blob_size = next_blob_size
114
+ self.current_byte_position = current_byte_position
100
115
  self.next_blob_size = [file_size, INITIAL_BLOB_SIZE].min
101
116
  self.current_byte_position = 0
102
117
  end
@@ -108,10 +123,10 @@ module Etna
108
123
  def advance_position!
109
124
  self.current_byte_position = self.current_byte_position + self.next_blob_size
110
125
  self.next_blob_size = [
111
- MAX_BLOB_SIZE,
112
- # in fact we should stop when we hit the end of the file
113
- file_size - current_byte_position
114
- ].min
126
+ MAX_BLOB_SIZE,
127
+ # in fact we should stop when we hit the end of the file
128
+ file_size - current_byte_position
129
+ ].min
115
130
  end
116
131
 
117
132
  def complete?
@@ -119,7 +134,14 @@ module Etna
119
134
  end
120
135
 
121
136
  def next_blob_hash
122
- Digest::MD5.hexdigest(next_blob_bytes)
137
+ bytes = next_blob_bytes
138
+ if bytes.empty?
139
+ return ZERO_HASH
140
+ end
141
+
142
+ $digest_mutx.synchronize do
143
+ return Digest::MD5.hexdigest(bytes)
144
+ end
123
145
  end
124
146
 
125
147
  def next_blob_bytes
@@ -131,6 +153,54 @@ module Etna
131
153
  self.next_blob_size = upload_response.next_blob_size
132
154
  end
133
155
  end
156
+
157
+ class StreamingIOUpload < Upload
158
+ def initialize(readable_io:, size_hint: 0, **args)
159
+ @readable_io = readable_io
160
+ @size_hint = size_hint
161
+ @read_position = 0
162
+ @last_bytes = ""
163
+ super(**args)
164
+ end
165
+
166
+ def file_size
167
+ @size_hint
168
+ end
169
+
170
+ def next_blob_bytes
171
+ next_left = current_byte_position
172
+ next_right = current_byte_position + next_blob_size
173
+
174
+ if next_right < @read_position
175
+ raise StreamingUploadError.new("Upload needs restart, but source is streaming and ephemeral. #{next_right} #{@read_position} You need to restart the source stream and create a new upload.")
176
+ elsif @read_position < next_left
177
+ # read from the stream and discard until we are positioned for the next read.
178
+ data = @readable_io.read(next_left - @read_position)
179
+ raise StreamingUploadError.new("Unexpected EOF in read stream") if data.nil?
180
+
181
+ @read_position += data.bytes.length
182
+ end
183
+
184
+ # If we have consumed all requested data, return what we have consumed.
185
+ # If we have requested no data, make sure to provide "" as the result.
186
+ if next_right == @read_position
187
+ return @last_bytes
188
+ end
189
+
190
+ if @read_position != next_left
191
+ raise StreamingUploadError.new("Alignment error, source data does not match expected upload resume. #{@read_position} #{next_left} Restart the upload to address.")
192
+ end
193
+
194
+ @last_bytes = "".tap do |bytes|
195
+ while @read_position < next_right
196
+ bytes << @readable_io.read(next_right - @read_position).tap do |data|
197
+ raise StreamingUploadError.new("Unexpected EOF in read stream") if data.nil?
198
+ @read_position += data.bytes.length
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
134
204
  end
135
205
  end
136
206
  end
@@ -1,107 +1,71 @@
1
1
  require 'ostruct'
2
- require 'digest'
3
2
  require 'fileutils'
4
3
  require 'tempfile'
5
4
 
6
5
  module Etna
7
6
  module Clients
8
7
  class Metis
9
- class SyncMetisDataWorkflow < Struct.new(:metis_client, :filesystem, :project_name, :bucket_name,
10
- :logger, :skip_tmpdir, keyword_init: true)
11
- def copy_directory(src, dest, root = dest, tmpdir = nil)
12
- own_tmpdir = tmpdir.nil? && !skip_tmpdir
13
- if own_tmpdir
14
- tmpdir = filesystem.tmpdir
15
- end
16
-
17
- begin
18
- response = metis_client.list_folder(ListFolderRequest.new(project_name: project_name, bucket_name: bucket_name, folder_path: src))
8
+ class SyncMetisDataWorkflow < Struct.new(:metis_client, :filesystem, :project_name, :bucket_name, :logger, keyword_init: true)
9
+ def copy_directory(src, dest, root = dest)
10
+ response = metis_client.list_folder(ListFolderRequest.new(project_name: project_name, bucket_name: bucket_name, folder_path: src))
19
11
 
20
- response.files.all.each do |file|
21
- logger&.info("Copying file #{file.file_path} (#{Etna::Formatting.as_size(file.size)})")
22
- copy_file(bin_root_dir: root, tmpdir: tmpdir, dest: ::File.join(dest, file.file_name), url: file.download_url)
23
- end
24
-
25
- response.folders.all.each do |folder|
26
- copy_directory(::File.join(src, folder.folder_name), ::File.join(dest, folder.folder_name), root, tmpdir)
27
- end
28
- ensure
29
- filesystem.rm_rf(tmpdir) if own_tmpdir
12
+ response.files.all.each do |file|
13
+ logger&.info("Copying file #{file.file_path} (#{Etna::Formatting.as_size(file.size)})")
14
+ copy_file(dest: ::File.join(dest, file.file_name), url: file.download_url)
30
15
  end
31
- end
32
-
33
- def bin_file_name(etag:)
34
- "bin/#{etag}"
35
- end
36
16
 
37
- def with_maybe_intermediate_tmp_dest(bin_file_name:, tmpdir:, dest_file_name:, &block)
38
- filesystem.mkdir_p(::File.dirname(dest_file_name))
39
- if tmpdir.nil?
40
- yield dest_file_name
41
- else
42
- tmp_file = ::File.join(tmpdir, ::File.basename(bin_file_name))
43
- yield tmp_file
44
- filesystem.mv(tmp_file, dest_file_name)
17
+ response.folders.all.each do |folder|
18
+ copy_directory(::File.join(src, folder.folder_name), ::File.join(dest, folder.folder_name), root)
45
19
  end
46
20
  end
47
21
 
48
- def copy_file(bin_root_dir:, tmpdir:, dest:, url:, stub: false)
22
+ def copy_file(dest:, url:, stub: false)
49
23
  metadata = metis_client.file_metadata(url)
50
- etag = metadata[:etag]
51
24
  size = metadata[:size]
52
25
 
53
- dest_bin_file = ::File.join(bin_root_dir, bin_file_name(etag: etag))
54
- # Already materialized, continue
55
- if filesystem.exist?(dest_bin_file)
56
- return
57
- end
58
-
59
- with_maybe_intermediate_tmp_dest(bin_file_name: dest_bin_file, tmpdir: tmpdir, dest_file_name: dest) do |tmp_file|
60
- upload_timings = []
61
- upload_amount = 0
62
- last_rate = 0.00001
63
-
64
- filesystem.with_writeable(tmp_file, "w", size_hint: size) do |io|
65
- if stub
66
- io.write("(stub) #{size} bytes")
67
- else
68
- metis_client.download_file(url) do |chunk|
69
- io.write(chunk)
70
-
71
- upload_timings << [chunk.length, Time.now.to_f]
72
- upload_amount += chunk.length
73
-
74
- if upload_timings.length > 150
75
- s, _ = upload_timings.shift
76
- upload_amount -= s
77
- end
26
+ tmp_file = dest
27
+ upload_timings = []
28
+ upload_amount = 0
29
+ last_rate = 0.00001
30
+ remaining = size
31
+
32
+ filesystem.with_writeable(tmp_file, "w", size_hint: size) do |io|
33
+ if stub
34
+ io.write("(stub) #{size} bytes")
35
+ else
36
+ metis_client.download_file(url) do |chunk|
37
+ io.write(chunk)
38
+
39
+ upload_timings << [chunk.length, Time.now.to_f]
40
+ upload_amount += chunk.length
41
+ remaining -= chunk.length
42
+
43
+ if upload_timings.length > 150
44
+ s, _ = upload_timings.shift
45
+ upload_amount -= s
46
+ end
78
47
 
79
- _, start_time = upload_timings.first
80
- _, end_time = upload_timings.last
48
+ _, start_time = upload_timings.first
49
+ _, end_time = upload_timings.last
81
50
 
82
- if start_time == end_time
83
- next
84
- end
51
+ if start_time == end_time
52
+ next
53
+ end
85
54
 
86
- rate = upload_amount / (end_time - start_time)
55
+ rate = upload_amount / (end_time - start_time)
87
56
 
88
- if rate / last_rate > 1.3 || rate / last_rate < 0.7
89
- logger&.info("Uploading #{Etna::Formatting.as_size(rate)} per second")
57
+ if rate / last_rate > 1.3 || rate / last_rate < 0.7
58
+ logger&.info("Uploading #{Etna::Formatting.as_size(rate)} per second, #{Etna::Formatting.as_size(remaining)} remaining")
90
59
 
91
- if rate == 0
92
- last_rate = 0.0001
93
- else
94
- last_rate = rate
95
- end
60
+ if rate == 0
61
+ last_rate = 0.0001
62
+ else
63
+ last_rate = rate
96
64
  end
97
65
  end
98
66
  end
99
- end
100
- end
101
67
 
102
- filesystem.mkdir_p(::File.dirname(dest_bin_file))
103
- filesystem.with_writeable(dest_bin_file, 'w', size_hint: 0) do |io|
104
- # empty file marking that this etag has been moved, to save a future write.
68
+ end
105
69
  end
106
70
  end
107
71
  end