etna 0.1.25 → 0.1.26

Sign up to get free protection for your applications and to get access to all the features.
@@ -351,14 +351,16 @@ class EtnaApp
351
351
 
352
352
  boolean_flags << '--commit'
353
353
  string_flags << '--models'
354
+ string_flags << '--record_names'
354
355
 
355
- def execute(project_name, redcap_tokens, models: "all", commit: false)
356
+ def execute(project_name, redcap_tokens, models: "all", record_names: nil, commit: false)
356
357
  raise "Must provide at least one REDCap token (comma-separated)." unless redcap_tokens.split(',').length > 0
357
358
 
358
359
  puts "NOTE: This is a **preview** of what the data loading will look like. Use the --commit flag to load records into Magma." unless commit
359
360
 
360
361
  polyphemus_client.job(Etna::Clients::Polyphemus::RedcapJobRequest.new(
361
362
  model_names: "all" == models ? "all" : models.split(','),
363
+ record_names: nil == record_names || "existing" == record_names ? record_names : record_names.split(','),
362
364
  redcap_tokens: redcap_tokens.split(','),
363
365
  project_name: project_name,
364
366
  commit: commit
@@ -19,6 +19,7 @@ require_relative './etna/clients'
19
19
  require_relative './etna/csvs'
20
20
  require_relative './etna/environment_scoped'
21
21
  require_relative './etna/filesystem'
22
+ require_relative './etna/formatting'
22
23
 
23
24
  class EtnaApp
24
25
  include Etna::Application
@@ -24,8 +24,8 @@ module Etna
24
24
  @model_walker ||= WalkModelTreeWorkflow.new(magma_crud: magma_crud, logger: logger)
25
25
  end
26
26
 
27
- def with_materialized_dir(&block)
28
- tmp_dir = filesystem.tmpdir
27
+ def materialize_all(dest = filesystem.tmpdir)
28
+ tmpdir = filesystem.tmpdir
29
29
 
30
30
  begin
31
31
  model_walker.walk_from(
@@ -34,12 +34,10 @@ module Etna
34
34
  model_filters: model_filters,
35
35
  ) do |template, document|
36
36
  logger&.info("Materializing #{template.name}##{document[template.identifier]}")
37
- materialize_record(tmp_dir, template, document)
37
+ materialize_record(dest, tmpdir, template, document)
38
38
  end
39
-
40
- yield tmp_dir
41
39
  ensure
42
- filesystem.rm_rf(tmp_dir)
40
+ filesystem.rm_rf(tmpdir)
43
41
  end
44
42
  end
45
43
 
@@ -76,58 +74,35 @@ module Etna
76
74
  end
77
75
  end
78
76
 
79
- def materialize_record(dest_dir, template, record)
77
+ def sync_metis_data_workflow
78
+ @sync_metis_data_workflow ||= Etna::Clients::Metis::SyncMetisDataWorkflow.new(
79
+ metis_client: metis_client,
80
+ logger: logger,
81
+ filesystem: filesystem)
82
+ end
83
+
84
+ def materialize_record(dest_dir, tmpdir, template, record)
80
85
  record_to_serialize = record.dup
81
- metadata_path = metadata_file_name(record_name: record[template.identifier], record_model_name: template.name)
82
86
 
83
87
  each_file(template, record) do |attr_name, url, filename, idx|
84
- metadata = metis_client.file_metadata(url)
85
- etag = metadata[:etag]
86
- size = metadata[:size]
87
-
88
88
  if idx == 0
89
89
  record_to_serialize[attr_name] = []
90
90
  end
91
91
 
92
- dest_file = bin_file_name(etag: etag)
92
+ dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: "_#{attr_name}_#{idx}#{File.extname(filename)}"))
93
+ sync_metis_data_workflow.copy_file(bin_root_dir: dest_dir, tmpdir: tmpdir, dest: dest_file, url: url, stub: stub_files)
93
94
  record_to_serialize[attr_name] << { file: dest_file, original_filename: filename }
94
-
95
- # Already materialized, continue
96
- if filesystem.exist?(dest_file)
97
- next
98
- end
99
-
100
- logger&.info("materializing file #{filename} (#{size} bytes)")
101
- filesystem.mkdir_p(File.dirname(File.join(dest_dir, dest_file)))
102
-
103
- filesystem.with_writeable(File.join(dest_dir, dest_file), "w") do |io|
104
- if stub_files
105
- io.write("(stub) #{filename}: #{size} bytes")
106
- else
107
- metis_client.download_file(url) do |chunk|
108
- if Random.rand < 0.1
109
- logger&.info("Writing #{chunk.length} bytes into #{dest_file}")
110
- end
111
-
112
- io.write(chunk)
113
- end
114
- end
115
- end
116
95
  end
117
96
 
118
- dest_file = File.join(dest_dir, metadata_path)
97
+ dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: '.json'))
119
98
  filesystem.mkdir_p(File.dirname(dest_file))
120
99
  filesystem.with_writeable(dest_file, "w") do |io|
121
100
  io.write(record_to_serialize.to_json)
122
101
  end
123
102
  end
124
103
 
125
- def metadata_file_name(record_name:, record_model_name:)
126
- "#{record_model_name}/#{record_name.gsub(/\s/, '_')}.json"
127
- end
128
-
129
- def bin_file_name(etag:)
130
- "bin/#{etag}"
104
+ def metadata_file_name(record_name:, record_model_name:, ext:)
105
+ "#{record_model_name}/#{record_name.gsub(/\s/, '_')}#{ext}"
131
106
  end
132
107
  end
133
108
  end
@@ -171,7 +171,11 @@ module Etna
171
171
 
172
172
  class FoldersAndFilesResponse < FoldersResponse
173
173
  def files
174
- Files.new(raw[:files])
174
+ Files.new(raw[:files] || [])
175
+ end
176
+
177
+ def folders
178
+ Folders.new(raw[:folders] || [])
175
179
  end
176
180
  end
177
181
 
@@ -253,6 +257,10 @@ module Etna
253
257
  raw[:folder_path]
254
258
  end
255
259
 
260
+ def folder_name
261
+ raw[:folder_name]
262
+ end
263
+
256
264
  def bucket_name
257
265
  raw[:bucket_name]
258
266
  end
@@ -1,2 +1,3 @@
1
1
  require_relative './workflows/metis_download_workflow'
2
2
  require_relative './workflows/metis_upload_workflow'
3
+ require_relative './workflows/sync_metis_data_workflow'
@@ -0,0 +1,102 @@
1
+ require 'ostruct'
2
+ require 'digest'
3
+ require 'fileutils'
4
+ require 'tempfile'
5
+
6
+ module Etna
7
+ module Clients
8
+ class Metis
9
+ class SyncMetisDataWorkflow < Struct.new(:metis_client, :filesystem, :project_name, :bucket_name, :logger, keyword_init: true)
10
+ def copy_directory(src, dest, root = dest, tmpdir = nil)
11
+ own_tmpdir = tmpdir.nil?
12
+ if own_tmpdir
13
+ tmpdir = filesystem.tmpdir
14
+ end
15
+
16
+ begin
17
+ response = metis_client.list_folder(ListFolderRequest.new(project_name: project_name, bucket_name: bucket_name, folder_path: src))
18
+
19
+ response.files.all.each do |file|
20
+ logger&.info("Copying file #{file.file_path} (#{Etna::Formatting.as_size(file.size)})")
21
+ copy_file(bin_root_dir: root, tmpdir: tmpdir, dest: ::File.join(dest, file.file_name), url: file.download_url)
22
+ end
23
+
24
+ response.folders.all.each do |folder|
25
+ copy_directory(::File.join(src, folder.folder_name), ::File.join(dest, folder.folder_name), root, tmpdir)
26
+ end
27
+ ensure
28
+ filesystem.rm_rf(tmpdir) if own_tmpdir
29
+ end
30
+ end
31
+
32
+ def bin_file_name(etag:)
33
+ "bin/#{etag}"
34
+ end
35
+
36
+ def copy_file(bin_root_dir:, tmpdir:, dest:, url:, stub: false)
37
+ metadata = metis_client.file_metadata(url)
38
+ etag = metadata[:etag]
39
+ size = metadata[:size]
40
+
41
+ dest_bin_file = ::File.join(bin_root_dir, bin_file_name(etag: etag))
42
+ # Already materialized, continue
43
+ if filesystem.exist?(dest_bin_file)
44
+ return
45
+ end
46
+
47
+ tmp_file = ::File.join(tmpdir, etag)
48
+
49
+
50
+ upload_timings = []
51
+ upload_amount = 0
52
+ last_rate = 0.00001
53
+
54
+ filesystem.with_writeable(tmp_file, "w", size_hint: size) do |io|
55
+ if stub
56
+ io.write("(stub) #{size} bytes")
57
+ else
58
+ metis_client.download_file(url) do |chunk|
59
+ io.write(chunk)
60
+
61
+ upload_timings << [chunk.length, Time.now.to_f]
62
+ upload_amount += chunk.length
63
+
64
+ if upload_timings.length > 150
65
+ s, _ = upload_timings.shift
66
+ upload_amount -= s
67
+ end
68
+
69
+ _, start_time = upload_timings.first
70
+ _, end_time = upload_timings.last
71
+
72
+ if start_time == end_time
73
+ next
74
+ end
75
+
76
+ rate = upload_amount / (end_time - start_time)
77
+
78
+ if rate / last_rate > 1.3 || rate / last_rate < 0.7
79
+ logger&.info("Uploading #{Etna::Formatting.as_size(rate)} per second")
80
+
81
+ if rate == 0
82
+ last_rate = 0.0001
83
+ else
84
+ last_rate = rate
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ filesystem.mkdir_p(::File.dirname(dest))
92
+ filesystem.mv(tmp_file, dest)
93
+
94
+ filesystem.mkdir_p(::File.dirname(dest_bin_file))
95
+ filesystem.with_writeable(dest_bin_file, 'w', size_hint: 0) do |io|
96
+ # empty file
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -13,11 +13,11 @@ module Etna
13
13
  end
14
14
  end
15
15
 
16
- class RedcapJobRequest < Struct.new(:model_names, :redcap_tokens, :commit, :project_name, keyword_init: true)
16
+ class RedcapJobRequest < Struct.new(:model_names, :redcap_tokens, :commit, :project_name, :record_names, keyword_init: true)
17
17
  include JsonSerializableStruct
18
18
 
19
19
  def initialize(**params)
20
- super({model_names: 'all', commit: false}.update(params))
20
+ super({model_names: 'all', record_names: nil, commit: false}.update(params))
21
21
  end
22
22
 
23
23
  def to_json
@@ -26,7 +26,8 @@ module Etna
26
26
  job_params: {
27
27
  commit: commit,
28
28
  model_names: model_names,
29
- redcap_tokens: redcap_tokens
29
+ redcap_tokens: redcap_tokens,
30
+ record_names: record_names
30
31
  }
31
32
  }.to_json
32
33
  end
@@ -1,8 +1,12 @@
1
+ require 'yaml'
2
+ require 'fileutils'
3
+ require 'open3'
4
+
1
5
  module Etna
2
6
  # A class that encapsulates opening / reading file system entries that abstracts normal file access in order
3
7
  # to make stubbing, substituting, and testing easier.
4
8
  class Filesystem
5
- def with_writeable(dest, opts = 'w', &block)
9
+ def with_writeable(dest, opts = 'w', size_hint: nil, &block)
6
10
  ::File.open(dest, opts, &block)
7
11
  end
8
12
 
@@ -11,29 +15,185 @@ module Etna
11
15
  end
12
16
 
13
17
  def mkdir_p(dir)
14
- require 'fileutils'
15
18
  ::FileUtils.mkdir_p(dir)
16
19
  end
17
20
 
18
21
  def rm_rf(dir)
19
- require 'fileutils'
20
- FileUtils.rm_rf(dir)
22
+ ::FileUtils.rm_rf(dir)
21
23
  end
22
24
 
23
25
  def tmpdir
24
- ::Dir.tmpdir
26
+ ::Dir.mktmpdir
25
27
  end
26
28
 
27
29
  def exist?(src)
28
30
  ::File.exist?(src)
29
31
  end
30
32
 
33
+ def mv(src, dest)
34
+ ::FileUtils.mv(src, dest)
35
+ end
36
+
31
37
  class EmptyIO < StringIO
32
38
  def write(*args)
33
39
  # Do nothing -- always leave empty
34
40
  end
35
41
  end
36
42
 
43
+ module WithPipeConsumer
44
+ def mkio(file, opts, size_hint: nil, &block)
45
+ rd, wd = IO.pipe
46
+
47
+ pid = spawn(*mkcommand(rd, wd, file, opts, size_hint: size_hint))
48
+ q = Queue.new
49
+
50
+ closer = Thread.new do
51
+ _, status = Process.wait2 pid
52
+ q << status
53
+ end
54
+
55
+ begin
56
+ if opts.include?('w')
57
+ rd.close
58
+ yield wd
59
+ wd.close
60
+ else
61
+ wd.close
62
+ yield rd
63
+ rd.close
64
+ end
65
+
66
+ closer.join
67
+ rescue => e
68
+ wd.close
69
+ rd.close
70
+ Process.kill("HUP", pid)
71
+ raise e
72
+ end
73
+
74
+ status = q.pop
75
+ raise IOError.new("Failed to run external process, got status code #{status}") unless status.success?
76
+ end
77
+ end
78
+
79
+ class AsperaCliFilesystem < Filesystem
80
+ include WithPipeConsumer
81
+
82
+ def initialize(ascli_bin:, ascp_bin:, host:, username:, password: nil, key_file: nil, port: 33001)
83
+ @ascli_bin = ascli_bin
84
+ @ascp_bin = ascp_bin
85
+ @username = username
86
+ @password = password
87
+ @key_file = key_file
88
+ @host = host
89
+ @port = port
90
+
91
+ @config_file = File.join(Dir.mktmpdir, "config.yml")
92
+ config = {}
93
+ config["config"] = {"version" => `#{ascli_bin} --version`.chomp}
94
+ config["default"] = {"server" => "clifilesystem"}
95
+ server_config = config["clifilesystem"] = {
96
+ "url" => "ssh://#{host}:#{port}",
97
+ "username" => username,
98
+ "ssh_options" => {append_all_supported_algorithms: true},
99
+ }
100
+
101
+ if password
102
+ server_config["password"] = password
103
+ elsif key_file
104
+ server_config["ssh_keys"] = key_file
105
+ else
106
+ raise "One of password or key_file must be provided"
107
+ end
108
+
109
+ ::File.open(@config_file, "w") do |file|
110
+ file.write(config.to_yaml)
111
+ end
112
+ end
113
+
114
+ def run_ascli_cmd(cmd, *opts)
115
+ output, status = Open3.capture2(@ascli_bin, "server", cmd, *opts, "--format=json", "--config=#{@config_file}")
116
+
117
+ if status.success?
118
+ return JSON.parse(output)
119
+ end
120
+
121
+ nil
122
+ end
123
+
124
+ def with_writeable(dest, opts = 'w', size_hint: nil, &block)
125
+ mkio(dest, opts, size_hint: size_hint, &block)
126
+ end
127
+
128
+ def with_readable(src, opts = 'r', &block)
129
+ mkio(src, opts, &block)
130
+ end
131
+
132
+ def mkdir_p(dir)
133
+ raise "Failed to mkdir #{dir}" unless run_ascli_cmd("mkdir", dir)
134
+ end
135
+
136
+ def rm_rf(dir)
137
+ raise "Failed to rm_rf #{dir}" unless run_ascli_cmd("rm", dir)
138
+ end
139
+
140
+ def tmpdir
141
+ tmpdir = "/Upload/Temp/#{SecureRandom.hex}"
142
+ mkdir_p(tmpdir)
143
+ tmpdir
144
+ end
145
+
146
+ def exist?(src)
147
+ !run_ascli_cmd("ls", src).nil?
148
+ end
149
+
150
+ def mv(src, dest)
151
+ raise "Failed to mv #{src} to #{dest}" unless run_ascli_cmd("mv", src, dest)
152
+ end
153
+
154
+ def mkcommand(rd, wd, file, opts, size_hint: nil)
155
+ env = {}
156
+ cmd = [env, @ascp_bin]
157
+
158
+ if @password
159
+ env['ASPERA_SCP_PASS'] = @password
160
+ else
161
+ cmd << "-i"
162
+ cmd << @key_file
163
+ end
164
+
165
+ cmd << "-P"
166
+ cmd << @port.to_s
167
+
168
+ remote_path = file
169
+ # https://download.asperasoft.com/download/docs/entsrv/3.9.1/es_admin_linux/webhelp/index.html#dita/stdio_2.html
170
+ local_path = "stdio://"
171
+ if size_hint
172
+ local_path += "/?#{size_hint}"
173
+ end
174
+
175
+ if opts.include?('r')
176
+ cmd << '--mode=recv'
177
+ cmd << "--host=#{@host}"
178
+ cmd << "--user=#{@username}"
179
+ cmd << remote_path
180
+ cmd << local_path
181
+
182
+ cmd << { out: wd }
183
+ elsif opts.include?('w')
184
+ cmd << '--mode=send'
185
+ cmd << "--host=#{@host}"
186
+ cmd << "--user=#{@username}"
187
+ cmd << local_path
188
+ cmd << remote_path
189
+
190
+ cmd << { in: rd }
191
+ end
192
+
193
+ cmd
194
+ end
195
+ end
196
+
37
197
  class Mock < Filesystem
38
198
  def initialize(&new_io)
39
199
  @files = {}
@@ -49,7 +209,7 @@ module Etna
49
209
  end
50
210
  end
51
211
 
52
- def with_writeable(dest, opts = 'w', &block)
212
+ def with_writeable(dest, opts = 'w', size_hint: nil, &block)
53
213
  if @dirs.include?(dest)
54
214
  raise IOError.new("Path #{dest} is a directory")
55
215
  end
@@ -68,21 +228,36 @@ module Etna
68
228
  end
69
229
  end
70
230
 
231
+ def mv(src, dest)
232
+ if exist?(dest)
233
+ raise "#{dest} already exists, cannot move"
234
+ end
235
+
236
+ if @dirs.include?(src)
237
+ @dirs[dest] = @dirs.delete(src)
238
+ elsif @files.include?(src)
239
+ @files[dest] = @files.delete(src)
240
+ else
241
+ raise "#{src} does not exist, cannot move"
242
+ end
243
+ end
244
+
71
245
  def tmpdir
72
246
  require 'securerandom'
73
247
  "/tmp-#{SecureRandom::uuid}"
74
248
  end
75
249
 
76
250
  def with_readable(src, opts = 'r', &block)
77
- if @dirs.include?(dest)
78
- raise IOError.new("Path #{dest} is a directory")
251
+ if @dirs.include?(src)
252
+ raise IOError.new("Path #{src} is a directory")
79
253
  end
80
254
 
81
- if !@files.include?(dest)
82
- raise IOError.new("Path #{dest} does not exist")
255
+ if !@files.include?(src)
256
+ raise IOError.new("Path #{src} does not exist")
83
257
  end
84
258
 
85
- yield (@files[dest] ||= mkio(src, opts))
259
+ @files[src].rewind
260
+ yield @files[src]
86
261
  end
87
262
 
88
263
  def exist?(src)