etna 0.1.27 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -226,7 +226,7 @@ module Etna
226
226
  if renames && (attribute_renames = renames[model_name]) && (new_name = attribute_renames[attribute_name])
227
227
  new_name = target_attribute_of_source(model_name, new_name)
228
228
 
229
- unless target_model.template.attributes.include?(new_name)
229
+ unless target_model.template.attributes.attribute_keys.include?(new_name)
230
230
  if target_original_attribute
231
231
  rename = RenameAttributeAction.new(model_name: target_model_name, attribute_name: target_attribute_name, new_attribute_name: new_name)
232
232
  queue_update(rename)
@@ -37,8 +37,7 @@ module Etna
37
37
  end
38
38
 
39
39
  def update_attributes
40
- method = json_values ? :update_json : :update
41
- magma_crud.update_records(method: method) do |update_request|
40
+ magma_crud.update_records(method: :update_json) do |update_request|
42
41
  each_revision do |model_name, record_name, revision|
43
42
  update_request.update_revision(model_name, record_name, revision)
44
43
  end
@@ -53,10 +52,18 @@ module Etna
53
52
  end
54
53
 
55
54
  class RowBase
56
- def stripped_value(attribute_value)
55
+ def attribute_is_json?(attribute)
56
+ [Etna::Clients::Magma::AttributeType::FILE,
57
+ Etna::Clients::Magma::AttributeType::FILE_COLLECTION,
58
+ Etna::Clients::Magma::AttributeType::IMAGE].include?(attribute.attribute_type)
59
+ end
60
+
61
+ def stripped_value(attribute, attribute_value)
57
62
  attribute_value = attribute_value&.strip
58
63
 
59
- if attribute_value && @workflow.json_values && attribute_value != @workflow.hole_value
64
+ if attribute_value &&
65
+ ( @workflow.json_values || attribute_is_json?(attribute) ) &&
66
+ attribute_value != @workflow.hole_value
60
67
  attribute_value = JSON.parse(attribute_value)
61
68
  end
62
69
  attribute_value
@@ -123,7 +130,7 @@ module Etna
123
130
  raise "Invalid attribute #{attribute_name} for model #{model_name}."
124
131
  end
125
132
 
126
- stripped = stripped_value(@raw[index + 1])
133
+ stripped = stripped_value(attribute, @raw[index + 1])
127
134
  unless @workflow.hole_value.nil?
128
135
  next if stripped == @workflow.hole_value
129
136
  end
@@ -234,7 +241,13 @@ module Etna
234
241
  attribute_name_clean = attribute_name.strip
235
242
  raise "Invalid attribute \"#{attribute_name_clean}\" for model #{@model_name}." unless attribute = @workflow.find_attribute(@model_name, attribute_name_clean)
236
243
 
237
- attributes[attribute_name_clean] = stripped_value(@raw[attribute_name])
244
+ stripped = stripped_value(attribute, @raw[attribute_name])
245
+
246
+ unless @workflow.hole_value.nil?
247
+ next if stripped == @workflow.hole_value
248
+ end
249
+
250
+ attributes[attribute_name_clean] = stripped
238
251
  end
239
252
  end
240
253
  end
@@ -7,6 +7,31 @@ module Etna
7
7
  module Clients
8
8
  class Magma
9
9
  class WalkModelTreeWorkflow < Struct.new(:magma_crud, :logger, keyword_init: true)
10
+ def initialize(**args)
11
+ super(**({}.update(args)))
12
+ @template_for = {}
13
+ end
14
+
15
+ def masked_attributes(template:, model_attributes_mask:, model_name:)
16
+ attributes_mask = model_attributes_mask[model_name]
17
+ return ["all", "all"] if attributes_mask.nil?
18
+ [(attributes_mask + [template.identifier, 'parent']).uniq, attributes_mask]
19
+ end
20
+
21
+ def attribute_included?(mask, attribute_name)
22
+ return true if mask == "all"
23
+ mask.include?(attribute_name)
24
+ end
25
+
26
+ def template_for(model_name)
27
+ @template_for[model_name] ||= magma_crud.magma_client.retrieve(RetrievalRequest.new(
28
+ project_name: magma_crud.project_name,
29
+ model_name: model_name,
30
+ record_names: [],
31
+ attribute_names: [],
32
+ )).models.model(model_name).template
33
+ end
34
+
10
35
  def walk_from(
11
36
  model_name,
12
37
  record_names = 'all',
@@ -22,28 +47,30 @@ module Etna
22
47
  next if seen.include?([path[:from], model_name])
23
48
  seen.add([path[:from], model_name])
24
49
 
50
+ template = template_for(model_name)
51
+ query_attributes, walk_attributes = masked_attributes(template: template, model_attributes_mask: model_attributes_mask, model_name: model_name)
52
+
25
53
  request = RetrievalRequest.new(
26
54
  project_name: magma_crud.project_name,
27
55
  model_name: model_name,
28
56
  record_names: path[:record_names],
29
57
  filter: model_filters[model_name],
58
+ attribute_names: query_attributes,
30
59
  page_size: page_size, page: 1
31
60
  )
32
61
 
33
62
  related_models = {}
34
63
 
35
64
  magma_crud.page_records(model_name, request) do |response|
36
- model = response.models.model(model_name)
37
- template = model.template
38
-
39
65
  tables = []
40
66
  collections = []
41
67
  links = []
42
68
  attributes = []
43
69
 
70
+ model = response.models.model(model_name)
71
+
44
72
  template.attributes.attribute_keys.each do |attr_name|
45
- attributes_mask = model_attributes_mask[model_name]
46
- next if !attributes_mask.nil? && !attributes_mask.include?(attr_name) && attr_name != template.identifier
73
+ next unless attribute_included?(query_attributes, attr_name)
47
74
  attributes << attr_name
48
75
 
49
76
  attr = template.attributes.attribute(attr_name)
@@ -58,7 +85,7 @@ module Etna
58
85
  elsif attr.attribute_type == AttributeType::CHILD
59
86
  related_models[attr.link_model_name] ||= Set.new
60
87
  links << attr_name
61
- elsif attr.attribute_type == AttributeType::PARENT
88
+ elsif attr.attribute_type == AttributeType::PARENT && attribute_included?(walk_attributes, attr_name)
62
89
  related_models[attr.link_model_name] ||= Set.new
63
90
  links << attr_name
64
91
  end
@@ -69,6 +69,11 @@ module Etna
69
69
  @etna_client.folder_remove(delete_folder_request.to_h))
70
70
  end
71
71
 
72
+ def delete_file(delete_file_request)
73
+ FilesResponse.new(
74
+ @etna_client.file_remove(delete_file_request.to_h))
75
+ end
76
+
72
77
  def find(find_request)
73
78
  FoldersAndFilesResponse.new(
74
79
  @etna_client.bucket_find(find_request.to_h))
@@ -98,7 +103,7 @@ module Etna
98
103
  @etna_client.get(download_path) do |response|
99
104
  return {
100
105
  etag: response['ETag'].gsub(/"/, ''),
101
- size: response['Content-Length'],
106
+ size: response['Content-Length'].to_i,
102
107
  }
103
108
  end
104
109
  end
@@ -95,6 +95,21 @@ module Etna
95
95
  end
96
96
  end
97
97
 
98
+ class DeleteFileRequest < Struct.new(:project_name, :bucket_name, :file_path, keyword_init: true)
99
+ include JsonSerializableStruct
100
+
101
+ def initialize(**params)
102
+ super({}.update(params))
103
+ end
104
+
105
+ def to_h
106
+ # The :project_name comes in from Polyphemus as a symbol value,
107
+ # we need to make sure it's a string because it's going
108
+ # in the URL.
109
+ super().compact.transform_values(&:to_s)
110
+ end
111
+ end
112
+
98
113
  class FindRequest < Struct.new(:project_name, :bucket_name, :limit, :offset, :params, keyword_init: true)
99
114
  include JsonSerializableStruct
100
115
 
@@ -13,23 +13,27 @@ module Etna
13
13
  end
14
14
 
15
15
  # TODO: Might be possible to use range headers to select and resume downloads on failure in the future.
16
- def do_download(dest_file, metis_file, &block)
16
+ def do_download(dest_file_or_io, metis_file, &block)
17
17
  size = metis_file.size
18
18
  completed = 0.0
19
19
  start = Time.now
20
20
 
21
- ::File.open(dest_file, "w") do |io|
22
- metis_client.download_file(metis_file) do |chunk|
23
- io.write chunk
24
- completed += chunk.size
25
-
26
- block.call([
27
- :progress,
28
- size == 0 ? 1 : completed / size,
29
- (completed / (Time.now - start)).round(2),
30
- ]) unless block.nil?
21
+ unless dest_file_or_io.is_a?(IO)
22
+ ::File.open(dest_file_or_io, 'w') do |io|
23
+ return do_download(dest_file_or_io, metis_file, &block)
31
24
  end
32
25
  end
26
+
27
+ metis_client.download_file(metis_file) do |chunk|
28
+ dest_file_or_io.write chunk
29
+ completed += chunk.size
30
+
31
+ block.call([
32
+ :progress,
33
+ size == 0 ? 1 : completed / size,
34
+ (completed / (Time.now - start)).round(2),
35
+ ]) unless block.nil?
36
+ end
33
37
  end
34
38
  end
35
39
  end
@@ -4,17 +4,26 @@ require 'fileutils'
4
4
  require 'tempfile'
5
5
  require 'securerandom'
6
6
 
7
+ $digest_mutx = Mutex.new
8
+
7
9
  module Etna
8
10
  module Clients
9
11
  class Metis
10
12
  class MetisUploadWorkflow < Struct.new(:metis_client, :metis_uid, :project_name, :bucket_name, :max_attempts, keyword_init: true)
13
+ class StreamingUploadError < StandardError
14
+ end
15
+
11
16
 
12
17
  def initialize(args)
13
18
  super({max_attempts: 3, metis_uid: SecureRandom.hex}.update(args))
14
19
  end
15
20
 
16
- def do_upload(source_file, dest_path, &block)
17
- upload = Upload.new(source_file: source_file)
21
+ def do_upload(source_file_or_upload, dest_path, &block)
22
+ unless source_file_or_upload.is_a?(Upload)
23
+ upload = Upload.new(source_file: source_file_or_upload)
24
+ else
25
+ upload = source_file_or_upload
26
+ end
18
27
 
19
28
  dir = ::File.dirname(dest_path)
20
29
  metis_client.create_folder(CreateFolderRequest.new(
@@ -68,9 +77,11 @@ module Etna
68
77
 
69
78
  unsent_zero_byte_file = false
70
79
  rescue Etna::Error => e
71
- m = yield [:error, e] unless block.nil?
72
- if m == false
73
- raise e
80
+ unless block.nil?
81
+ m = yield [:error, e]
82
+ if m == false
83
+ raise e
84
+ end
74
85
  end
75
86
 
76
87
  if e.status == 422
@@ -90,13 +101,17 @@ module Etna
90
101
  end
91
102
  end
92
103
 
93
- class Upload < Struct.new(:source_file, :next_blob_size, :current_byte_position, keyword_init: true)
104
+ class Upload
94
105
  INITIAL_BLOB_SIZE = 2 ** 10
95
106
  MAX_BLOB_SIZE = 2 ** 22
96
107
  ZERO_HASH = 'd41d8cd98f00b204e9800998ecf8427e'
97
108
 
98
- def initialize(**args)
99
- super
109
+ attr_accessor :source_file, :next_blob_size, :current_byte_position
110
+
111
+ def initialize(source_file: nil, next_blob_size: nil, current_byte_position: nil)
112
+ self.source_file = source_file
113
+ self.next_blob_size = next_blob_size
114
+ self.current_byte_position = current_byte_position
100
115
  self.next_blob_size = [file_size, INITIAL_BLOB_SIZE].min
101
116
  self.current_byte_position = 0
102
117
  end
@@ -108,10 +123,10 @@ module Etna
108
123
  def advance_position!
109
124
  self.current_byte_position = self.current_byte_position + self.next_blob_size
110
125
  self.next_blob_size = [
111
- MAX_BLOB_SIZE,
112
- # in fact we should stop when we hit the end of the file
113
- file_size - current_byte_position
114
- ].min
126
+ MAX_BLOB_SIZE,
127
+ # in fact we should stop when we hit the end of the file
128
+ file_size - current_byte_position
129
+ ].min
115
130
  end
116
131
 
117
132
  def complete?
@@ -119,7 +134,14 @@ module Etna
119
134
  end
120
135
 
121
136
  def next_blob_hash
122
- Digest::MD5.hexdigest(next_blob_bytes)
137
+ bytes = next_blob_bytes
138
+ if bytes.empty?
139
+ return ZERO_HASH
140
+ end
141
+
142
+ $digest_mutx.synchronize do
143
+ return Digest::MD5.hexdigest(bytes)
144
+ end
123
145
  end
124
146
 
125
147
  def next_blob_bytes
@@ -131,6 +153,54 @@ module Etna
131
153
  self.next_blob_size = upload_response.next_blob_size
132
154
  end
133
155
  end
156
+
157
+ class StreamingIOUpload < Upload
158
+ def initialize(readable_io:, size_hint: 0, **args)
159
+ @readable_io = readable_io
160
+ @size_hint = size_hint
161
+ @read_position = 0
162
+ @last_bytes = ""
163
+ super(**args)
164
+ end
165
+
166
+ def file_size
167
+ @size_hint
168
+ end
169
+
170
+ def next_blob_bytes
171
+ next_left = current_byte_position
172
+ next_right = current_byte_position + next_blob_size
173
+
174
+ if next_right < @read_position
175
+ raise StreamingUploadError.new("Upload needs restart, but source is streaming and ephemeral. #{next_right} #{@read_position} You need to restart the source stream and create a new upload.")
176
+ elsif @read_position < next_left
177
+ # read from the stream and discard until we are positioned for the next read.
178
+ data = @readable_io.read(next_left - @read_position)
179
+ raise StreamingUploadError.new("Unexpected EOF in read stream") if data.nil?
180
+
181
+ @read_position += data.bytes.length
182
+ end
183
+
184
+ # If we have consumed all requested data, return what we have consumed.
185
+ # If we have requested no data, make sure to provide "" as the result.
186
+ if next_right == @read_position
187
+ return @last_bytes
188
+ end
189
+
190
+ if @read_position != next_left
191
+ raise StreamingUploadError.new("Alignment error, source data does not match expected upload resume. #{@read_position} #{next_left} Restart the upload to address.")
192
+ end
193
+
194
+ @last_bytes = "".tap do |bytes|
195
+ while @read_position < next_right
196
+ bytes << @readable_io.read(next_right - @read_position).tap do |data|
197
+ raise StreamingUploadError.new("Unexpected EOF in read stream") if data.nil?
198
+ @read_position += data.bytes.length
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
134
204
  end
135
205
  end
136
206
  end
@@ -1,107 +1,71 @@
1
1
  require 'ostruct'
2
- require 'digest'
3
2
  require 'fileutils'
4
3
  require 'tempfile'
5
4
 
6
5
  module Etna
7
6
  module Clients
8
7
  class Metis
9
- class SyncMetisDataWorkflow < Struct.new(:metis_client, :filesystem, :project_name, :bucket_name,
10
- :logger, :skip_tmpdir, keyword_init: true)
11
- def copy_directory(src, dest, root = dest, tmpdir = nil)
12
- own_tmpdir = tmpdir.nil? && !skip_tmpdir
13
- if own_tmpdir
14
- tmpdir = filesystem.tmpdir
15
- end
16
-
17
- begin
18
- response = metis_client.list_folder(ListFolderRequest.new(project_name: project_name, bucket_name: bucket_name, folder_path: src))
8
+ class SyncMetisDataWorkflow < Struct.new(:metis_client, :filesystem, :project_name, :bucket_name, :logger, keyword_init: true)
9
+ def copy_directory(src, dest, root = dest)
10
+ response = metis_client.list_folder(ListFolderRequest.new(project_name: project_name, bucket_name: bucket_name, folder_path: src))
19
11
 
20
- response.files.all.each do |file|
21
- logger&.info("Copying file #{file.file_path} (#{Etna::Formatting.as_size(file.size)})")
22
- copy_file(bin_root_dir: root, tmpdir: tmpdir, dest: ::File.join(dest, file.file_name), url: file.download_url)
23
- end
24
-
25
- response.folders.all.each do |folder|
26
- copy_directory(::File.join(src, folder.folder_name), ::File.join(dest, folder.folder_name), root, tmpdir)
27
- end
28
- ensure
29
- filesystem.rm_rf(tmpdir) if own_tmpdir
12
+ response.files.all.each do |file|
13
+ logger&.info("Copying file #{file.file_path} (#{Etna::Formatting.as_size(file.size)})")
14
+ copy_file(dest: ::File.join(dest, file.file_name), url: file.download_url)
30
15
  end
31
- end
32
-
33
- def bin_file_name(etag:)
34
- "bin/#{etag}"
35
- end
36
16
 
37
- def with_maybe_intermediate_tmp_dest(bin_file_name:, tmpdir:, dest_file_name:, &block)
38
- filesystem.mkdir_p(::File.dirname(dest_file_name))
39
- if tmpdir.nil?
40
- yield dest_file_name
41
- else
42
- tmp_file = ::File.join(tmpdir, ::File.basename(bin_file_name))
43
- yield tmp_file
44
- filesystem.mv(tmp_file, dest_file_name)
17
+ response.folders.all.each do |folder|
18
+ copy_directory(::File.join(src, folder.folder_name), ::File.join(dest, folder.folder_name), root)
45
19
  end
46
20
  end
47
21
 
48
- def copy_file(bin_root_dir:, tmpdir:, dest:, url:, stub: false)
22
+ def copy_file(dest:, url:, stub: false)
49
23
  metadata = metis_client.file_metadata(url)
50
- etag = metadata[:etag]
51
24
  size = metadata[:size]
52
25
 
53
- dest_bin_file = ::File.join(bin_root_dir, bin_file_name(etag: etag))
54
- # Already materialized, continue
55
- if filesystem.exist?(dest_bin_file)
56
- return
57
- end
58
-
59
- with_maybe_intermediate_tmp_dest(bin_file_name: dest_bin_file, tmpdir: tmpdir, dest_file_name: dest) do |tmp_file|
60
- upload_timings = []
61
- upload_amount = 0
62
- last_rate = 0.00001
63
-
64
- filesystem.with_writeable(tmp_file, "w", size_hint: size) do |io|
65
- if stub
66
- io.write("(stub) #{size} bytes")
67
- else
68
- metis_client.download_file(url) do |chunk|
69
- io.write(chunk)
70
-
71
- upload_timings << [chunk.length, Time.now.to_f]
72
- upload_amount += chunk.length
73
-
74
- if upload_timings.length > 150
75
- s, _ = upload_timings.shift
76
- upload_amount -= s
77
- end
26
+ tmp_file = dest
27
+ upload_timings = []
28
+ upload_amount = 0
29
+ last_rate = 0.00001
30
+ remaining = size
31
+
32
+ filesystem.with_writeable(tmp_file, "w", size_hint: size) do |io|
33
+ if stub
34
+ io.write("(stub) #{size} bytes")
35
+ else
36
+ metis_client.download_file(url) do |chunk|
37
+ io.write(chunk)
38
+
39
+ upload_timings << [chunk.length, Time.now.to_f]
40
+ upload_amount += chunk.length
41
+ remaining -= chunk.length
42
+
43
+ if upload_timings.length > 150
44
+ s, _ = upload_timings.shift
45
+ upload_amount -= s
46
+ end
78
47
 
79
- _, start_time = upload_timings.first
80
- _, end_time = upload_timings.last
48
+ _, start_time = upload_timings.first
49
+ _, end_time = upload_timings.last
81
50
 
82
- if start_time == end_time
83
- next
84
- end
51
+ if start_time == end_time
52
+ next
53
+ end
85
54
 
86
- rate = upload_amount / (end_time - start_time)
55
+ rate = upload_amount / (end_time - start_time)
87
56
 
88
- if rate / last_rate > 1.3 || rate / last_rate < 0.7
89
- logger&.info("Uploading #{Etna::Formatting.as_size(rate)} per second")
57
+ if rate / last_rate > 1.3 || rate / last_rate < 0.7
58
+ logger&.info("Uploading #{Etna::Formatting.as_size(rate)} per second, #{Etna::Formatting.as_size(remaining)} remaining")
90
59
 
91
- if rate == 0
92
- last_rate = 0.0001
93
- else
94
- last_rate = rate
95
- end
60
+ if rate == 0
61
+ last_rate = 0.0001
62
+ else
63
+ last_rate = rate
96
64
  end
97
65
  end
98
66
  end
99
- end
100
- end
101
67
 
102
- filesystem.mkdir_p(::File.dirname(dest_bin_file))
103
- filesystem.with_writeable(dest_bin_file, 'w', size_hint: 0) do |io|
104
- # empty file marking that this etag has been moved, to save a future write.
68
+ end
105
69
  end
106
70
  end
107
71
  end