etna 0.1.25 → 0.1.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etna.completion +3 -26810
- data/lib/commands.rb +3 -1
- data/lib/etna.rb +1 -0
- data/lib/etna/clients/magma/workflows/materialize_magma_record_files_workflow.rb +17 -42
- data/lib/etna/clients/metis/models.rb +9 -1
- data/lib/etna/clients/metis/workflows.rb +1 -0
- data/lib/etna/clients/metis/workflows/sync_metis_data_workflow.rb +102 -0
- data/lib/etna/clients/polyphemus/models.rb +4 -3
- data/lib/etna/filesystem.rb +186 -11
- data/lib/etna/formatting.rb +16 -0
- metadata +4 -2
data/lib/commands.rb
CHANGED
@@ -351,14 +351,16 @@ class EtnaApp
|
|
351
351
|
|
352
352
|
boolean_flags << '--commit'
|
353
353
|
string_flags << '--models'
|
354
|
+
string_flags << '--record_names'
|
354
355
|
|
355
|
-
def execute(project_name, redcap_tokens, models: "all", commit: false)
|
356
|
+
def execute(project_name, redcap_tokens, models: "all", record_names: nil, commit: false)
|
356
357
|
raise "Must provide at least one REDCap token (comma-separated)." unless redcap_tokens.split(',').length > 0
|
357
358
|
|
358
359
|
puts "NOTE: This is a **preview** of what the data loading will look like. Use the --commit flag to load records into Magma." unless commit
|
359
360
|
|
360
361
|
polyphemus_client.job(Etna::Clients::Polyphemus::RedcapJobRequest.new(
|
361
362
|
model_names: "all" == models ? "all" : models.split(','),
|
363
|
+
record_names: nil == record_names || "existing" == record_names ? record_names : record_names.split(','),
|
362
364
|
redcap_tokens: redcap_tokens.split(','),
|
363
365
|
project_name: project_name,
|
364
366
|
commit: commit
|
data/lib/etna.rb
CHANGED
@@ -24,8 +24,8 @@ module Etna
|
|
24
24
|
@model_walker ||= WalkModelTreeWorkflow.new(magma_crud: magma_crud, logger: logger)
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
|
27
|
+
def materialize_all(dest = filesystem.tmpdir)
|
28
|
+
tmpdir = filesystem.tmpdir
|
29
29
|
|
30
30
|
begin
|
31
31
|
model_walker.walk_from(
|
@@ -34,12 +34,10 @@ module Etna
|
|
34
34
|
model_filters: model_filters,
|
35
35
|
) do |template, document|
|
36
36
|
logger&.info("Materializing #{template.name}##{document[template.identifier]}")
|
37
|
-
materialize_record(
|
37
|
+
materialize_record(dest, tmpdir, template, document)
|
38
38
|
end
|
39
|
-
|
40
|
-
yield tmp_dir
|
41
39
|
ensure
|
42
|
-
filesystem.rm_rf(
|
40
|
+
filesystem.rm_rf(tmpdir)
|
43
41
|
end
|
44
42
|
end
|
45
43
|
|
@@ -76,58 +74,35 @@ module Etna
|
|
76
74
|
end
|
77
75
|
end
|
78
76
|
|
79
|
-
def
|
77
|
+
def sync_metis_data_workflow
|
78
|
+
@sync_metis_data_workflow ||= Etna::Clients::Metis::SyncMetisDataWorkflow.new(
|
79
|
+
metis_client: metis_client,
|
80
|
+
logger: logger,
|
81
|
+
filesystem: filesystem)
|
82
|
+
end
|
83
|
+
|
84
|
+
def materialize_record(dest_dir, tmpdir, template, record)
|
80
85
|
record_to_serialize = record.dup
|
81
|
-
metadata_path = metadata_file_name(record_name: record[template.identifier], record_model_name: template.name)
|
82
86
|
|
83
87
|
each_file(template, record) do |attr_name, url, filename, idx|
|
84
|
-
metadata = metis_client.file_metadata(url)
|
85
|
-
etag = metadata[:etag]
|
86
|
-
size = metadata[:size]
|
87
|
-
|
88
88
|
if idx == 0
|
89
89
|
record_to_serialize[attr_name] = []
|
90
90
|
end
|
91
91
|
|
92
|
-
dest_file =
|
92
|
+
dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: "_#{attr_name}_#{idx}#{File.extname(filename)}"))
|
93
|
+
sync_metis_data_workflow.copy_file(bin_root_dir: dest_dir, tmpdir: tmpdir, dest: dest_file, url: url, stub: stub_files)
|
93
94
|
record_to_serialize[attr_name] << { file: dest_file, original_filename: filename }
|
94
|
-
|
95
|
-
# Already materialized, continue
|
96
|
-
if filesystem.exist?(dest_file)
|
97
|
-
next
|
98
|
-
end
|
99
|
-
|
100
|
-
logger&.info("materializing file #{filename} (#{size} bytes)")
|
101
|
-
filesystem.mkdir_p(File.dirname(File.join(dest_dir, dest_file)))
|
102
|
-
|
103
|
-
filesystem.with_writeable(File.join(dest_dir, dest_file), "w") do |io|
|
104
|
-
if stub_files
|
105
|
-
io.write("(stub) #{filename}: #{size} bytes")
|
106
|
-
else
|
107
|
-
metis_client.download_file(url) do |chunk|
|
108
|
-
if Random.rand < 0.1
|
109
|
-
logger&.info("Writing #{chunk.length} bytes into #{dest_file}")
|
110
|
-
end
|
111
|
-
|
112
|
-
io.write(chunk)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
116
95
|
end
|
117
96
|
|
118
|
-
dest_file = File.join(dest_dir,
|
97
|
+
dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: '.json'))
|
119
98
|
filesystem.mkdir_p(File.dirname(dest_file))
|
120
99
|
filesystem.with_writeable(dest_file, "w") do |io|
|
121
100
|
io.write(record_to_serialize.to_json)
|
122
101
|
end
|
123
102
|
end
|
124
103
|
|
125
|
-
def metadata_file_name(record_name:, record_model_name:)
|
126
|
-
"#{record_model_name}/#{record_name.gsub(/\s/, '_')}
|
127
|
-
end
|
128
|
-
|
129
|
-
def bin_file_name(etag:)
|
130
|
-
"bin/#{etag}"
|
104
|
+
def metadata_file_name(record_name:, record_model_name:, ext:)
|
105
|
+
"#{record_model_name}/#{record_name.gsub(/\s/, '_')}#{ext}"
|
131
106
|
end
|
132
107
|
end
|
133
108
|
end
|
@@ -171,7 +171,11 @@ module Etna
|
|
171
171
|
|
172
172
|
class FoldersAndFilesResponse < FoldersResponse
|
173
173
|
def files
|
174
|
-
Files.new(raw[:files])
|
174
|
+
Files.new(raw[:files] || [])
|
175
|
+
end
|
176
|
+
|
177
|
+
def folders
|
178
|
+
Folders.new(raw[:folders] || [])
|
175
179
|
end
|
176
180
|
end
|
177
181
|
|
@@ -253,6 +257,10 @@ module Etna
|
|
253
257
|
raw[:folder_path]
|
254
258
|
end
|
255
259
|
|
260
|
+
def folder_name
|
261
|
+
raw[:folder_name]
|
262
|
+
end
|
263
|
+
|
256
264
|
def bucket_name
|
257
265
|
raw[:bucket_name]
|
258
266
|
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
require 'digest'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'tempfile'
|
5
|
+
|
6
|
+
module Etna
|
7
|
+
module Clients
|
8
|
+
class Metis
|
9
|
+
class SyncMetisDataWorkflow < Struct.new(:metis_client, :filesystem, :project_name, :bucket_name, :logger, keyword_init: true)
|
10
|
+
def copy_directory(src, dest, root = dest, tmpdir = nil)
|
11
|
+
own_tmpdir = tmpdir.nil?
|
12
|
+
if own_tmpdir
|
13
|
+
tmpdir = filesystem.tmpdir
|
14
|
+
end
|
15
|
+
|
16
|
+
begin
|
17
|
+
response = metis_client.list_folder(ListFolderRequest.new(project_name: project_name, bucket_name: bucket_name, folder_path: src))
|
18
|
+
|
19
|
+
response.files.all.each do |file|
|
20
|
+
logger&.info("Copying file #{file.file_path} (#{Etna::Formatting.as_size(file.size)})")
|
21
|
+
copy_file(bin_root_dir: root, tmpdir: tmpdir, dest: ::File.join(dest, file.file_name), url: file.download_url)
|
22
|
+
end
|
23
|
+
|
24
|
+
response.folders.all.each do |folder|
|
25
|
+
copy_directory(::File.join(src, folder.folder_name), ::File.join(dest, folder.folder_name), root, tmpdir)
|
26
|
+
end
|
27
|
+
ensure
|
28
|
+
filesystem.rm_rf(tmpdir) if own_tmpdir
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def bin_file_name(etag:)
|
33
|
+
"bin/#{etag}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def copy_file(bin_root_dir:, tmpdir:, dest:, url:, stub: false)
|
37
|
+
metadata = metis_client.file_metadata(url)
|
38
|
+
etag = metadata[:etag]
|
39
|
+
size = metadata[:size]
|
40
|
+
|
41
|
+
dest_bin_file = ::File.join(bin_root_dir, bin_file_name(etag: etag))
|
42
|
+
# Already materialized, continue
|
43
|
+
if filesystem.exist?(dest_bin_file)
|
44
|
+
return
|
45
|
+
end
|
46
|
+
|
47
|
+
tmp_file = ::File.join(tmpdir, etag)
|
48
|
+
|
49
|
+
|
50
|
+
upload_timings = []
|
51
|
+
upload_amount = 0
|
52
|
+
last_rate = 0.00001
|
53
|
+
|
54
|
+
filesystem.with_writeable(tmp_file, "w", size_hint: size) do |io|
|
55
|
+
if stub
|
56
|
+
io.write("(stub) #{size} bytes")
|
57
|
+
else
|
58
|
+
metis_client.download_file(url) do |chunk|
|
59
|
+
io.write(chunk)
|
60
|
+
|
61
|
+
upload_timings << [chunk.length, Time.now.to_f]
|
62
|
+
upload_amount += chunk.length
|
63
|
+
|
64
|
+
if upload_timings.length > 150
|
65
|
+
s, _ = upload_timings.shift
|
66
|
+
upload_amount -= s
|
67
|
+
end
|
68
|
+
|
69
|
+
_, start_time = upload_timings.first
|
70
|
+
_, end_time = upload_timings.last
|
71
|
+
|
72
|
+
if start_time == end_time
|
73
|
+
next
|
74
|
+
end
|
75
|
+
|
76
|
+
rate = upload_amount / (end_time - start_time)
|
77
|
+
|
78
|
+
if rate / last_rate > 1.3 || rate / last_rate < 0.7
|
79
|
+
logger&.info("Uploading #{Etna::Formatting.as_size(rate)} per second")
|
80
|
+
|
81
|
+
if rate == 0
|
82
|
+
last_rate = 0.0001
|
83
|
+
else
|
84
|
+
last_rate = rate
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
filesystem.mkdir_p(::File.dirname(dest))
|
92
|
+
filesystem.mv(tmp_file, dest)
|
93
|
+
|
94
|
+
filesystem.mkdir_p(::File.dirname(dest_bin_file))
|
95
|
+
filesystem.with_writeable(dest_bin_file, 'w', size_hint: 0) do |io|
|
96
|
+
# empty file
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -13,11 +13,11 @@ module Etna
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
class RedcapJobRequest < Struct.new(:model_names, :redcap_tokens, :commit, :project_name, keyword_init: true)
|
16
|
+
class RedcapJobRequest < Struct.new(:model_names, :redcap_tokens, :commit, :project_name, :record_names, keyword_init: true)
|
17
17
|
include JsonSerializableStruct
|
18
18
|
|
19
19
|
def initialize(**params)
|
20
|
-
super({model_names: 'all', commit: false}.update(params))
|
20
|
+
super({model_names: 'all', record_names: nil, commit: false}.update(params))
|
21
21
|
end
|
22
22
|
|
23
23
|
def to_json
|
@@ -26,7 +26,8 @@ module Etna
|
|
26
26
|
job_params: {
|
27
27
|
commit: commit,
|
28
28
|
model_names: model_names,
|
29
|
-
redcap_tokens: redcap_tokens
|
29
|
+
redcap_tokens: redcap_tokens,
|
30
|
+
record_names: record_names
|
30
31
|
}
|
31
32
|
}.to_json
|
32
33
|
end
|
data/lib/etna/filesystem.rb
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'open3'
|
4
|
+
|
1
5
|
module Etna
|
2
6
|
# A class that encapsulates opening / reading file system entries that abstracts normal file access in order
|
3
7
|
# to make stubbing, substituting, and testing easier.
|
4
8
|
class Filesystem
|
5
|
-
def with_writeable(dest, opts = 'w', &block)
|
9
|
+
def with_writeable(dest, opts = 'w', size_hint: nil, &block)
|
6
10
|
::File.open(dest, opts, &block)
|
7
11
|
end
|
8
12
|
|
@@ -11,29 +15,185 @@ module Etna
|
|
11
15
|
end
|
12
16
|
|
13
17
|
def mkdir_p(dir)
|
14
|
-
require 'fileutils'
|
15
18
|
::FileUtils.mkdir_p(dir)
|
16
19
|
end
|
17
20
|
|
18
21
|
def rm_rf(dir)
|
19
|
-
|
20
|
-
FileUtils.rm_rf(dir)
|
22
|
+
::FileUtils.rm_rf(dir)
|
21
23
|
end
|
22
24
|
|
23
25
|
def tmpdir
|
24
|
-
::Dir.
|
26
|
+
::Dir.mktmpdir
|
25
27
|
end
|
26
28
|
|
27
29
|
def exist?(src)
|
28
30
|
::File.exist?(src)
|
29
31
|
end
|
30
32
|
|
33
|
+
def mv(src, dest)
|
34
|
+
::FileUtils.mv(src, dest)
|
35
|
+
end
|
36
|
+
|
31
37
|
class EmptyIO < StringIO
|
32
38
|
def write(*args)
|
33
39
|
# Do nothing -- always leave empty
|
34
40
|
end
|
35
41
|
end
|
36
42
|
|
43
|
+
module WithPipeConsumer
|
44
|
+
def mkio(file, opts, size_hint: nil, &block)
|
45
|
+
rd, wd = IO.pipe
|
46
|
+
|
47
|
+
pid = spawn(*mkcommand(rd, wd, file, opts, size_hint: size_hint))
|
48
|
+
q = Queue.new
|
49
|
+
|
50
|
+
closer = Thread.new do
|
51
|
+
_, status = Process.wait2 pid
|
52
|
+
q << status
|
53
|
+
end
|
54
|
+
|
55
|
+
begin
|
56
|
+
if opts.include?('w')
|
57
|
+
rd.close
|
58
|
+
yield wd
|
59
|
+
wd.close
|
60
|
+
else
|
61
|
+
wd.close
|
62
|
+
yield rd
|
63
|
+
rd.close
|
64
|
+
end
|
65
|
+
|
66
|
+
closer.join
|
67
|
+
rescue => e
|
68
|
+
wd.close
|
69
|
+
rd.close
|
70
|
+
Process.kill("HUP", pid)
|
71
|
+
raise e
|
72
|
+
end
|
73
|
+
|
74
|
+
status = q.pop
|
75
|
+
raise IOError.new("Failed to run external process, got status code #{status}") unless status.success?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
class AsperaCliFilesystem < Filesystem
|
80
|
+
include WithPipeConsumer
|
81
|
+
|
82
|
+
def initialize(ascli_bin:, ascp_bin:, host:, username:, password: nil, key_file: nil, port: 33001)
|
83
|
+
@ascli_bin = ascli_bin
|
84
|
+
@ascp_bin = ascp_bin
|
85
|
+
@username = username
|
86
|
+
@password = password
|
87
|
+
@key_file = key_file
|
88
|
+
@host = host
|
89
|
+
@port = port
|
90
|
+
|
91
|
+
@config_file = File.join(Dir.mktmpdir, "config.yml")
|
92
|
+
config = {}
|
93
|
+
config["config"] = {"version" => `#{ascli_bin} --version`.chomp}
|
94
|
+
config["default"] = {"server" => "clifilesystem"}
|
95
|
+
server_config = config["clifilesystem"] = {
|
96
|
+
"url" => "ssh://#{host}:#{port}",
|
97
|
+
"username" => username,
|
98
|
+
"ssh_options" => {append_all_supported_algorithms: true},
|
99
|
+
}
|
100
|
+
|
101
|
+
if password
|
102
|
+
server_config["password"] = password
|
103
|
+
elsif key_file
|
104
|
+
server_config["ssh_keys"] = key_file
|
105
|
+
else
|
106
|
+
raise "One of password or key_file must be provided"
|
107
|
+
end
|
108
|
+
|
109
|
+
::File.open(@config_file, "w") do |file|
|
110
|
+
file.write(config.to_yaml)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def run_ascli_cmd(cmd, *opts)
|
115
|
+
output, status = Open3.capture2(@ascli_bin, "server", cmd, *opts, "--format=json", "--config=#{@config_file}")
|
116
|
+
|
117
|
+
if status.success?
|
118
|
+
return JSON.parse(output)
|
119
|
+
end
|
120
|
+
|
121
|
+
nil
|
122
|
+
end
|
123
|
+
|
124
|
+
def with_writeable(dest, opts = 'w', size_hint: nil, &block)
|
125
|
+
mkio(dest, opts, size_hint: size_hint, &block)
|
126
|
+
end
|
127
|
+
|
128
|
+
def with_readable(src, opts = 'r', &block)
|
129
|
+
mkio(src, opts, &block)
|
130
|
+
end
|
131
|
+
|
132
|
+
def mkdir_p(dir)
|
133
|
+
raise "Failed to mkdir #{dir}" unless run_ascli_cmd("mkdir", dir)
|
134
|
+
end
|
135
|
+
|
136
|
+
def rm_rf(dir)
|
137
|
+
raise "Failed to rm_rf #{dir}" unless run_ascli_cmd("rm", dir)
|
138
|
+
end
|
139
|
+
|
140
|
+
def tmpdir
|
141
|
+
tmpdir = "/Upload/Temp/#{SecureRandom.hex}"
|
142
|
+
mkdir_p(tmpdir)
|
143
|
+
tmpdir
|
144
|
+
end
|
145
|
+
|
146
|
+
def exist?(src)
|
147
|
+
!run_ascli_cmd("ls", src).nil?
|
148
|
+
end
|
149
|
+
|
150
|
+
def mv(src, dest)
|
151
|
+
raise "Failed to mv #{src} to #{dest}" unless run_ascli_cmd("mv", src, dest)
|
152
|
+
end
|
153
|
+
|
154
|
+
def mkcommand(rd, wd, file, opts, size_hint: nil)
|
155
|
+
env = {}
|
156
|
+
cmd = [env, @ascp_bin]
|
157
|
+
|
158
|
+
if @password
|
159
|
+
env['ASPERA_SCP_PASS'] = @password
|
160
|
+
else
|
161
|
+
cmd << "-i"
|
162
|
+
cmd << @key_file
|
163
|
+
end
|
164
|
+
|
165
|
+
cmd << "-P"
|
166
|
+
cmd << @port.to_s
|
167
|
+
|
168
|
+
remote_path = file
|
169
|
+
# https://download.asperasoft.com/download/docs/entsrv/3.9.1/es_admin_linux/webhelp/index.html#dita/stdio_2.html
|
170
|
+
local_path = "stdio://"
|
171
|
+
if size_hint
|
172
|
+
local_path += "/?#{size_hint}"
|
173
|
+
end
|
174
|
+
|
175
|
+
if opts.include?('r')
|
176
|
+
cmd << '--mode=recv'
|
177
|
+
cmd << "--host=#{@host}"
|
178
|
+
cmd << "--user=#{@username}"
|
179
|
+
cmd << remote_path
|
180
|
+
cmd << local_path
|
181
|
+
|
182
|
+
cmd << { out: wd }
|
183
|
+
elsif opts.include?('w')
|
184
|
+
cmd << '--mode=send'
|
185
|
+
cmd << "--host=#{@host}"
|
186
|
+
cmd << "--user=#{@username}"
|
187
|
+
cmd << local_path
|
188
|
+
cmd << remote_path
|
189
|
+
|
190
|
+
cmd << { in: rd }
|
191
|
+
end
|
192
|
+
|
193
|
+
cmd
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
37
197
|
class Mock < Filesystem
|
38
198
|
def initialize(&new_io)
|
39
199
|
@files = {}
|
@@ -49,7 +209,7 @@ module Etna
|
|
49
209
|
end
|
50
210
|
end
|
51
211
|
|
52
|
-
def with_writeable(dest, opts = 'w', &block)
|
212
|
+
def with_writeable(dest, opts = 'w', size_hint: nil, &block)
|
53
213
|
if @dirs.include?(dest)
|
54
214
|
raise IOError.new("Path #{dest} is a directory")
|
55
215
|
end
|
@@ -68,21 +228,36 @@ module Etna
|
|
68
228
|
end
|
69
229
|
end
|
70
230
|
|
231
|
+
def mv(src, dest)
|
232
|
+
if exist?(dest)
|
233
|
+
raise "#{dest} already exists, cannot move"
|
234
|
+
end
|
235
|
+
|
236
|
+
if @dirs.include?(src)
|
237
|
+
@dirs[dest] = @dirs.delete(src)
|
238
|
+
elsif @files.include?(src)
|
239
|
+
@files[dest] = @files.delete(src)
|
240
|
+
else
|
241
|
+
raise "#{src} does not exist, cannot move"
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
71
245
|
def tmpdir
|
72
246
|
require 'securerandom'
|
73
247
|
"/tmp-#{SecureRandom::uuid}"
|
74
248
|
end
|
75
249
|
|
76
250
|
def with_readable(src, opts = 'r', &block)
|
77
|
-
if @dirs.include?(
|
78
|
-
raise IOError.new("Path #{
|
251
|
+
if @dirs.include?(src)
|
252
|
+
raise IOError.new("Path #{src} is a directory")
|
79
253
|
end
|
80
254
|
|
81
|
-
if !@files.include?(
|
82
|
-
raise IOError.new("Path #{
|
255
|
+
if !@files.include?(src)
|
256
|
+
raise IOError.new("Path #{src} does not exist")
|
83
257
|
end
|
84
258
|
|
85
|
-
|
259
|
+
@files[src].rewind
|
260
|
+
yield @files[src]
|
86
261
|
end
|
87
262
|
|
88
263
|
def exist?(src)
|