cnvrg 1.9.9.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/cnvrg +9 -0
- data/cnvrg.gemspec +47 -0
- data/lib/cnvrg.rb +7 -0
- data/lib/cnvrg/Images.rb +351 -0
- data/lib/cnvrg/api.rb +247 -0
- data/lib/cnvrg/api_v2.rb +14 -0
- data/lib/cnvrg/auth.rb +79 -0
- data/lib/cnvrg/cli.rb +5715 -0
- data/lib/cnvrg/cli/flow.rb +166 -0
- data/lib/cnvrg/cli/library_cli.rb +33 -0
- data/lib/cnvrg/cli/subcommand.rb +28 -0
- data/lib/cnvrg/cli/task.rb +116 -0
- data/lib/cnvrg/colors.rb +8 -0
- data/lib/cnvrg/connect_job_ssh.rb +31 -0
- data/lib/cnvrg/data.rb +335 -0
- data/lib/cnvrg/datafiles.rb +1325 -0
- data/lib/cnvrg/dataset.rb +892 -0
- data/lib/cnvrg/downloader/client.rb +101 -0
- data/lib/cnvrg/downloader/clients/azure_client.rb +45 -0
- data/lib/cnvrg/downloader/clients/gcp_client.rb +50 -0
- data/lib/cnvrg/downloader/clients/s3_client.rb +78 -0
- data/lib/cnvrg/experiment.rb +209 -0
- data/lib/cnvrg/files.rb +1047 -0
- data/lib/cnvrg/flow.rb +137 -0
- data/lib/cnvrg/helpers.rb +422 -0
- data/lib/cnvrg/helpers/agent.rb +188 -0
- data/lib/cnvrg/helpers/executer.rb +213 -0
- data/lib/cnvrg/hyper.rb +21 -0
- data/lib/cnvrg/image.rb +113 -0
- data/lib/cnvrg/image_cli.rb +25 -0
- data/lib/cnvrg/job_cli.rb +73 -0
- data/lib/cnvrg/job_ssh.rb +48 -0
- data/lib/cnvrg/logger.rb +111 -0
- data/lib/cnvrg/org_helpers.rb +5 -0
- data/lib/cnvrg/project.rb +822 -0
- data/lib/cnvrg/result.rb +29 -0
- data/lib/cnvrg/runner.rb +49 -0
- data/lib/cnvrg/ssh.rb +94 -0
- data/lib/cnvrg/storage.rb +128 -0
- data/lib/cnvrg/task.rb +165 -0
- data/lib/cnvrg/version.rb +3 -0
- metadata +460 -0
@@ -0,0 +1,892 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
module Cnvrg
|
3
|
+
class Dataset
|
4
|
+
attr_reader :slug, :owner, :title, :local_path, :working_dir
|
5
|
+
|
6
|
+
RemoteURL ||= "https://cnvrg.io"
|
7
|
+
IDXParallelThreads ||= Cnvrg::Helpers.parallel_threads
|
8
|
+
IDXParallelProcesses ||= Parallel.processor_count
|
9
|
+
|
10
|
+
def initialize(project_home = '', dataset_url: '', dataset_info: '')
|
11
|
+
begin
|
12
|
+
@info = {}
|
13
|
+
if project_home.present?
|
14
|
+
@local_path = project_home
|
15
|
+
@working_dir = project_home
|
16
|
+
config = YAML.load_file(project_home + "/.cnvrg/config.yml")
|
17
|
+
@title = config[:dataset_name]
|
18
|
+
@slug = config[:dataset_slug]
|
19
|
+
@owner = config[:owner]
|
20
|
+
elsif dataset_info.present?
|
21
|
+
@title = dataset_info[:slug]
|
22
|
+
@slug = dataset_info[:slug]
|
23
|
+
@owner = dataset_info[:owner]
|
24
|
+
@local_path = Dir.pwd
|
25
|
+
else
|
26
|
+
owner, slug = Cnvrg::Helpers.extract_owner_slug_from_url(dataset_url, 'datasets')
|
27
|
+
@title = slug
|
28
|
+
@slug = slug
|
29
|
+
@owner = owner
|
30
|
+
@local_path = Dir.pwd
|
31
|
+
end
|
32
|
+
rescue => e
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def soft_linked?
|
38
|
+
@dataset_call["dataset_type"] == "soft_link_dataset"
|
39
|
+
end
|
40
|
+
|
41
|
+
def init_home(remote: false)
|
42
|
+
dataset_home = File.join(Dir.pwd, @slug)
|
43
|
+
if Dir.exists? dataset_home
|
44
|
+
if !remote
|
45
|
+
Cnvrg::CLI.log_message("Error: Conflict with dir #{@slug}", Thor::Shell::Color::RED)
|
46
|
+
if Thor::Shell::Basic.new.no? "Sync to repository anyway? (current data might lost)", Thor::Shell::Color::YELLOW
|
47
|
+
Cnvrg::CLI.log_message("Remove dir in order to clone #{@slug}", Thor::Shell::Color::RED)
|
48
|
+
exit(1)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
FileUtils.rm_rf(dataset_home)
|
52
|
+
end
|
53
|
+
|
54
|
+
# if Dataset.clone(owner, dataset_name, slug, remote)
|
55
|
+
Dataset.clone(@owner, @slug, @slug, remote)
|
56
|
+
@local_path = dataset_home
|
57
|
+
Cnvrg::CLI.log_message('')
|
58
|
+
true
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_dataset(commit: nil, query: nil)
|
62
|
+
if @dataset_call
|
63
|
+
return @dataset_call
|
64
|
+
end
|
65
|
+
response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}/clone", 'POST',{ commit: commit, query:query})
|
66
|
+
Cnvrg::CLI.is_response_success(response,true)
|
67
|
+
@dataset_call = response["result"]
|
68
|
+
@dataset_call
|
69
|
+
end
|
70
|
+
|
71
|
+
def softlinked?
|
72
|
+
get_dataset["dataset_type"] == "soft_link_dataset"
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
def get_storage_client
|
77
|
+
response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/client", 'GET')
|
78
|
+
if Cnvrg::CLI.is_response_success(response, false)
|
79
|
+
client_params = response['client']
|
80
|
+
else
|
81
|
+
client_params = get_storage_client_fallback
|
82
|
+
end
|
83
|
+
Cnvrg::Downloader::Client.factory(client_params)
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_storage_client_fallback
|
87
|
+
response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/download_multi", "POST", {files: []})
|
88
|
+
raise StandardError.new("Can't find dataset credentials") unless Cnvrg::CLI.is_response_success(response, false)
|
89
|
+
files = response['files']
|
90
|
+
storage = files['is_s3'] ? 's3' : 'minio'
|
91
|
+
files['storage'] = storage
|
92
|
+
files
|
93
|
+
end
|
94
|
+
|
95
|
+
def get_stats(commit: nil, query: nil)
|
96
|
+
response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone", 'POST', {commit: commit, query: query})
|
97
|
+
Cnvrg::CLI.is_response_success(response, true)
|
98
|
+
response['result']
|
99
|
+
end
|
100
|
+
|
101
|
+
def get_clone_chunk(latest_id: nil, chunk_size: 1000, offset: 0, commit: 'latest')
|
102
|
+
response = Cnvrg::API.request("users/#{@owner}/datasets/#{@slug}/clone_chunk", 'POST', {commit: commit, chunk_size: chunk_size, latest_id: latest_id, offset: offset})
|
103
|
+
return nil unless Cnvrg::CLI.is_response_success(response, false)
|
104
|
+
response['result']['files']['keys']
|
105
|
+
end
|
106
|
+
|
107
|
+
def backup_idx
|
108
|
+
Cnvrg::Logger.log_info("Backup idx")
|
109
|
+
if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
|
110
|
+
FileUtils.cp "#{self.local_path}/.cnvrg/idx.yml", "#{self.local_path}/.cnvrg/idx.yml.backup"
|
111
|
+
else
|
112
|
+
idx = {commit: nil, tree: {}}
|
113
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml.backup", 'w') {|f| f.write idx.to_yaml}
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def restore_idx
|
118
|
+
Cnvrg::Logger.log_info("Restore idx because an error.")
|
119
|
+
Cnvrg::Logger.log_method(bind: binding)
|
120
|
+
idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml.backup")
|
121
|
+
self.set_idx(idx)
|
122
|
+
end
|
123
|
+
|
124
|
+
def change_url(owner: '', slug: '', title: '')
|
125
|
+
config = {dataset_home: title, dataset_slug: slug, owner: owner}
|
126
|
+
File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
127
|
+
end
|
128
|
+
|
129
|
+
def self.delete(dataset_slug, owner)
|
130
|
+
response = Cnvrg::API.request("users/#{owner}/datasets/#{dataset_slug}/delete", 'DELETE')
|
131
|
+
return response
|
132
|
+
end
|
133
|
+
|
134
|
+
def last_local_commit
|
135
|
+
if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
|
136
|
+
return nil
|
137
|
+
end
|
138
|
+
idx = YAML.load_file(@local_path + "/.cnvrg/idx.yml")
|
139
|
+
return idx[:commit]
|
140
|
+
end
|
141
|
+
|
142
|
+
def snapshot
|
143
|
+
commit = last_local_commit
|
144
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST', {data_commit: commit})
|
145
|
+
CLI.is_response_success(response)
|
146
|
+
return response
|
147
|
+
end
|
148
|
+
|
149
|
+
def list(owner)
|
150
|
+
response = Cnvrg::API.request("users/#{owner}/datasets/list", 'GET')
|
151
|
+
CLI.is_response_success(response)
|
152
|
+
return response
|
153
|
+
end
|
154
|
+
|
155
|
+
def search_queries
|
156
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/queries/list", 'GET')
|
157
|
+
CLI.is_response_success(response)
|
158
|
+
row = [["name", "id", "created_at", "username"]]
|
159
|
+
response["results"]["queries"].each do |query|
|
160
|
+
row << [query["name"], query["slug"], query["created_at"].in_time_zone.to_s, query["username"]]
|
161
|
+
end
|
162
|
+
return row
|
163
|
+
end
|
164
|
+
|
165
|
+
def get_query_file(query_slug)
|
166
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/search/#{query_slug}", 'GET')
|
167
|
+
CLI.is_response_success(response)
|
168
|
+
row = [["Name", "Full path", "URL"]]
|
169
|
+
response["results"]["query_files"].each do |file|
|
170
|
+
row << [file["name"], file["fullpath"], file["s3_url"]]
|
171
|
+
end
|
172
|
+
return row
|
173
|
+
end
|
174
|
+
|
175
|
+
def download_tags_yaml
|
176
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/tags_yml", 'GET')
|
177
|
+
CLI.is_response_success(response)
|
178
|
+
begin
|
179
|
+
path = self.working_dir
|
180
|
+
File.open("#{path}/#{response["results"]["filename"]}", "w+") {|f| f.write response["results"]["file_content"]}
|
181
|
+
return true
|
182
|
+
rescue
|
183
|
+
return false
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
def list_commits(commit_sha1: nil)
|
188
|
+
response = Cnvrg::API.request(
|
189
|
+
"users/#{self.owner}/datasets/#{self.slug}/list_commits?commit=#{commit_sha1}",
|
190
|
+
'GET'
|
191
|
+
)
|
192
|
+
CLI.is_response_success(response)
|
193
|
+
return response
|
194
|
+
end
|
195
|
+
|
196
|
+
def upload_tags_via_yml(tag_file = nil)
|
197
|
+
records_yml = YAML.load_file(tag_file)
|
198
|
+
tag_file.close
|
199
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/data_tags_create", 'POST', {records_yml: records_yml})
|
200
|
+
if response["status"] == 200
|
201
|
+
return true
|
202
|
+
else
|
203
|
+
return false
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def url
|
208
|
+
url = Cnvrg::Helpers.remote_url
|
209
|
+
"#{url}/#{self.owner}/projects/#{self.slug}"
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.verify_cnvrgignore_exist(dataset_name, remote)
|
213
|
+
path = ".cnvrgignore"
|
214
|
+
if !File.exist? path
|
215
|
+
path = "#{dataset_name}/.cnvrgignore"
|
216
|
+
end
|
217
|
+
ignore_exits = File.exist? path
|
218
|
+
if !ignore_exits
|
219
|
+
begin
|
220
|
+
list_files = [
|
221
|
+
path
|
222
|
+
]
|
223
|
+
FileUtils.touch list_files
|
224
|
+
cnvrgignore = Helpers.cnvrgignore_content
|
225
|
+
File.open(path, "w+") {|f| f.write cnvrgignore}
|
226
|
+
rescue => e
|
227
|
+
return false
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
def update_ignore_list(new_ignore)
|
234
|
+
|
235
|
+
if new_ignore.nil? or new_ignore.empty?
|
236
|
+
return true
|
237
|
+
end
|
238
|
+
begin
|
239
|
+
File.open(self.local_path + "/.cnvrgignore", "a+") do |f|
|
240
|
+
f.puts("\n")
|
241
|
+
|
242
|
+
new_ignore.each do |i|
|
243
|
+
f.puts("#{i}\n")
|
244
|
+
end
|
245
|
+
end
|
246
|
+
return true
|
247
|
+
rescue
|
248
|
+
return false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def get_ignore_list
|
253
|
+
### handle case when after clone .cnvrgignore doesnt exists
|
254
|
+
if not File.exists?(self.local_path + "/.cnvrgignore")
|
255
|
+
self.generate_cnvrg_ignore
|
256
|
+
end
|
257
|
+
|
258
|
+
ignore_list = []
|
259
|
+
if not File.exists? "#{self.local_path}/.cnvrgignore"
|
260
|
+
return ignore_list
|
261
|
+
end
|
262
|
+
File.open(self.local_path + "/.cnvrgignore", "r").each_line do |line|
|
263
|
+
line = line.strip
|
264
|
+
if line.start_with? "#" or ignore_list.include? line or line.empty?
|
265
|
+
next
|
266
|
+
end
|
267
|
+
if line.end_with? "/" or File.directory?(line)
|
268
|
+
ignore_list << line
|
269
|
+
all_sub = Dir.glob("#{line}/**/*", File::FNM_DOTMATCH).flatten
|
270
|
+
|
271
|
+
ignore_list << all_sub.flatten
|
272
|
+
elsif line.include? "*"
|
273
|
+
regex_list = Dir.glob("**/*#{line}", File::FNM_DOTMATCH).flatten
|
274
|
+
ignore_list << regex_list
|
275
|
+
else
|
276
|
+
ignore_list << line
|
277
|
+
end
|
278
|
+
end
|
279
|
+
return ignore_list.flatten
|
280
|
+
end
|
281
|
+
|
282
|
+
|
283
|
+
def self.init(owner, dataset_name, is_public = false, bucket: nil)
|
284
|
+
list_dirs = [".cnvrg"
|
285
|
+
]
|
286
|
+
list_files = [
|
287
|
+
".cnvrg/config.yml"
|
288
|
+
]
|
289
|
+
create_ignore = false
|
290
|
+
if !File.exist? ".cnvrgignore"
|
291
|
+
list_files << ".cnvrgignore"
|
292
|
+
create_ignore = true
|
293
|
+
end
|
294
|
+
|
295
|
+
cnvrgignore = Helpers.cnvrgignore_content
|
296
|
+
begin
|
297
|
+
response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public, bucket: bucket})
|
298
|
+
Cnvrg::CLI.is_response_success(response)
|
299
|
+
response = JSON.parse response["result"]
|
300
|
+
dataset_slug = response["slug"]
|
301
|
+
|
302
|
+
config = {dataset_name: dataset_name,
|
303
|
+
dataset_slug: dataset_slug,
|
304
|
+
owner: owner}
|
305
|
+
|
306
|
+
FileUtils.mkdir_p list_dirs
|
307
|
+
FileUtils.touch list_files
|
308
|
+
File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
309
|
+
File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
|
310
|
+
rescue => e
|
311
|
+
return false
|
312
|
+
end
|
313
|
+
return true
|
314
|
+
end
|
315
|
+
|
316
|
+
def self.link_dataset(owner: nil, slug: nil)
|
317
|
+
begin
|
318
|
+
return false if owner.blank? or slug.blank?
|
319
|
+
|
320
|
+
response = Cnvrg::API.request("users/#{owner}/datasets/#{slug}", 'GET')
|
321
|
+
success = Cnvrg::CLI.is_response_success(response, false)
|
322
|
+
return unless success
|
323
|
+
result = response["result"]
|
324
|
+
|
325
|
+
sha1 = result["init_commit_sha1"]
|
326
|
+
|
327
|
+
# We need to write init IDX that contain init commit sha1 so the user will be able to doing actions on the dataset
|
328
|
+
# so it only relevant for new server
|
329
|
+
raise Exception.new("This feature is not available for your cnvrg version. Please contact support for more information") if sha1.blank? ## means this is old version of server
|
330
|
+
|
331
|
+
config = {dataset_name: result["title"],
|
332
|
+
dataset_slug: result["slug"],
|
333
|
+
owner: owner}
|
334
|
+
|
335
|
+
list_dirs = [".cnvrg"]
|
336
|
+
list_files = [".cnvrg/config.yml"]
|
337
|
+
|
338
|
+
|
339
|
+
FileUtils.mkdir_p list_dirs
|
340
|
+
FileUtils.touch list_files
|
341
|
+
File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
342
|
+
|
343
|
+
dataset = Dataset.new(Dir.pwd)
|
344
|
+
dataset.write_idx({}, sha1)
|
345
|
+
true
|
346
|
+
rescue => e
|
347
|
+
raise Exception.new(e)
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
def self.blank_clone(owner, dataset_name, dataset_slug)
|
352
|
+
list_dirs = ["#{dataset_slug}/.cnvrg"
|
353
|
+
]
|
354
|
+
list_files = [
|
355
|
+
"#{dataset_slug}/.cnvrg/config.yml"
|
356
|
+
]
|
357
|
+
create_ignore = false
|
358
|
+
if !File.exist? ".cnvrgignore"
|
359
|
+
list_files << "#{dataset_slug}/.cnvrgignore"
|
360
|
+
create_ignore = true
|
361
|
+
end
|
362
|
+
|
363
|
+
|
364
|
+
cnvrgignore = Helpers.cnvrgignore_content
|
365
|
+
begin
|
366
|
+
|
367
|
+
config = {dataset_name: dataset_name,
|
368
|
+
dataset_slug: dataset_slug,
|
369
|
+
owner: owner}
|
370
|
+
|
371
|
+
FileUtils.mkdir_p list_dirs
|
372
|
+
FileUtils.touch list_files
|
373
|
+
File.open("#{dataset_slug}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
374
|
+
File.open("#{dataset_slug}/.cnvrgignore", "w+") {|f| f.write cnvrgignore} unless !create_ignore
|
375
|
+
rescue => e
|
376
|
+
return false
|
377
|
+
end
|
378
|
+
return true
|
379
|
+
end
|
380
|
+
|
381
|
+
def generate_cnvrg_ignore
|
382
|
+
cnvrgignore = Helpers.cnvrgignore_content
|
383
|
+
File.open(self.local_path + "/.cnvrgignore", "w+") {|f| f.write cnvrgignore}
|
384
|
+
end
|
385
|
+
|
386
|
+
def self.verify_dataset(dataset_slug)
|
387
|
+
config = YAML.load_file("/data/#{dataset_title}/.cnvrg/config.yml") rescue {}
|
388
|
+
config[:success] == true
|
389
|
+
end
|
390
|
+
|
391
|
+
def self.verify_datasets(dataset_titles, timeout = nil)
|
392
|
+
start_time = Time.now.to_i
|
393
|
+
Cnvrg::Logger.log_info("Verifying datasets #{dataset_titles}")
|
394
|
+
Cnvrg::Logger.log_info("Timeout is #{timeout}")
|
395
|
+
while true
|
396
|
+
begin
|
397
|
+
current_time = Time.now.to_i
|
398
|
+
return false if (timeout.present? and timeout < current_time - start_time)
|
399
|
+
all_are_ready = dataset_titles.all? do |dataset_title|
|
400
|
+
config = YAML.load_file("#{dataset_title}/.cnvrg/config.yml")
|
401
|
+
config[:success] == true
|
402
|
+
end
|
403
|
+
return true if all_are_ready
|
404
|
+
Cnvrg::Logger.log_info("Sleeping..")
|
405
|
+
sleep 10
|
406
|
+
rescue => e
|
407
|
+
Cnvrg::Logger.log_info("Got error")
|
408
|
+
Cnvrg::Logger.log_error(e)
|
409
|
+
sleep 10
|
410
|
+
end
|
411
|
+
end
|
412
|
+
end
|
413
|
+
|
414
|
+
def self.scan_datasets()
|
415
|
+
Cnvrg::Logger.log_info("Looking up datasets")
|
416
|
+
datasets = Dir.entries(Dir.pwd).map do |entry|
|
417
|
+
if File.directory? File.join(Dir.pwd,entry) and !(entry =='.' || entry == '..')
|
418
|
+
begin
|
419
|
+
config = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/config.yml") rescue nil
|
420
|
+
local_commit = YAML.load_file("#{Dir.pwd}/#{entry}/.cnvrg/idx.yml")[:commit] rescue nil
|
421
|
+
if config.present? and config[:success] == true and config[:dataset_name].present? and config[:dataset_slug].present? and local_commit.present?
|
422
|
+
{
|
423
|
+
"dataset_slug": config[:dataset_slug],
|
424
|
+
"dataset_name": config[:dataset_name],
|
425
|
+
"local_commit": local_commit,
|
426
|
+
}
|
427
|
+
else
|
428
|
+
nil
|
429
|
+
end
|
430
|
+
rescue
|
431
|
+
nil
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end.compact.uniq
|
435
|
+
datasets
|
436
|
+
end
|
437
|
+
|
438
|
+
def clone(commit)
|
439
|
+
return
|
440
|
+
end
|
441
|
+
|
442
|
+
def self.clone(owner, dataset_name, dataset_slug, remote = false)
|
443
|
+
begin
|
444
|
+
list_dirs = []
|
445
|
+
if !remote
|
446
|
+
list_dirs << dataset_name
|
447
|
+
end
|
448
|
+
list_dirs << "#{dataset_name}/.cnvrg"
|
449
|
+
list_files = [
|
450
|
+
"#{dataset_name}/.cnvrg/config.yml",
|
451
|
+
]
|
452
|
+
|
453
|
+
config = {dataset_name: dataset_name,
|
454
|
+
dataset_slug: dataset_slug,
|
455
|
+
owner: owner}
|
456
|
+
|
457
|
+
|
458
|
+
FileUtils.mkdir_p list_dirs
|
459
|
+
FileUtils.touch list_files
|
460
|
+
File.open("#{dataset_name}/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
461
|
+
rescue => e
|
462
|
+
puts "Exception in clone request:#{e.message}"
|
463
|
+
return false
|
464
|
+
end
|
465
|
+
return true
|
466
|
+
end
|
467
|
+
|
468
|
+
def list_files(commit_sha1: "latest", limit: 1000, offset: 0, expires: 3600)
|
469
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list", 'GET', {commit_sha1: commit_sha1, limit: limit, offset: offset, expires: expires})
|
470
|
+
return nil if response.blank?
|
471
|
+
response.to_json
|
472
|
+
end
|
473
|
+
|
474
|
+
def self.clone_tree(commit: 'latest', dataset_home: nil)
|
475
|
+
@dataset = Cnvrg::Dataset.new(dataset_home)
|
476
|
+
@files = Cnvrg::Datafiles.new(@dataset.owner, @dataset.slug, dataset: @dataset)
|
477
|
+
trees = @files.get_trees(commit: commit)
|
478
|
+
return false if trees.nil?
|
479
|
+
pb = ProgressBar.create(:title => "Download Progress",
|
480
|
+
:progress_mark => '=',
|
481
|
+
:format => "%b>>%i| %p%% %t",
|
482
|
+
:starting_at => 0,
|
483
|
+
:total => trees.size,
|
484
|
+
:autofinish => true)
|
485
|
+
trees.each do |tree|
|
486
|
+
pb.progress += 1
|
487
|
+
@files.download_dir(dataset_home, tree)
|
488
|
+
end
|
489
|
+
pb.finish
|
490
|
+
@dataset.write_success
|
491
|
+
true
|
492
|
+
end
|
493
|
+
|
494
|
+
def write_success(in_folder = false)
|
495
|
+
file_path = ".cnvrg/config.yml"
|
496
|
+
file_path = File.join(@local_path || @working_dir, file_path)
|
497
|
+
if File.exist?(file_path)
|
498
|
+
File.open(file_path, "a") {|f| f.puts(":success: true")}
|
499
|
+
end
|
500
|
+
end
|
501
|
+
|
502
|
+
def self.init_container(owner, dataset_slug, dataset_name)
|
503
|
+
|
504
|
+
cnvrgignore = Helpers.cnvrgignore_content
|
505
|
+
begin
|
506
|
+
list_dirs = [".cnvrg"
|
507
|
+
]
|
508
|
+
list_files = [
|
509
|
+
".cnvrgignore",
|
510
|
+
".cnvrg/config.yml"
|
511
|
+
]
|
512
|
+
FileUtils.mkdir_p list_dirs
|
513
|
+
FileUtils.touch list_files
|
514
|
+
|
515
|
+
config = {dataset_name: dataset_name,
|
516
|
+
dataset_slug: dataset_slug,
|
517
|
+
owner: owner}
|
518
|
+
File.open(".cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
|
519
|
+
|
520
|
+
File.open(".cnvrgignore", "w+") {|f| f.write cnvrgignore} unless File.exist? ".cnvrgignore"
|
521
|
+
rescue => e
|
522
|
+
return false
|
523
|
+
end
|
524
|
+
return true
|
525
|
+
end
|
526
|
+
|
527
|
+
|
528
|
+
def get_idx
|
529
|
+
if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
|
530
|
+
return YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
531
|
+
end
|
532
|
+
{commit: nil, tree: {}}
|
533
|
+
end
|
534
|
+
|
535
|
+
def set_idx(idx)
|
536
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w+') {|f| f.write idx.to_yaml}
|
537
|
+
end
|
538
|
+
|
539
|
+
def url
|
540
|
+
url = Cnvrg::Helpers.remote_url
|
541
|
+
"#{url}/#{self.owner}/datasets/#{self.slug}"
|
542
|
+
end
|
543
|
+
|
544
|
+
def generate_chunked_idx(list_files = [], threads: 15, prefix: '')
|
545
|
+
tree = {}
|
546
|
+
Parallel.map(list_files, in_threads: threads) do |file|
|
547
|
+
#check if prefix exists do prefix/path otherwise path
|
548
|
+
label = file.gsub(self.local_path + "/", "")
|
549
|
+
label = "#{prefix}/#{label}" if prefix.present?
|
550
|
+
if not Cnvrg::Files.valid_file_name?(label)
|
551
|
+
raise StandardError.new("#{label} is not a valid file name.")
|
552
|
+
end
|
553
|
+
if File.directory? file
|
554
|
+
tree[label + "/"] = nil
|
555
|
+
else
|
556
|
+
sha1 = OpenSSL::Digest::SHA1.file(file).hexdigest
|
557
|
+
file_name = File.basename file
|
558
|
+
file_size = File.size(file).to_f
|
559
|
+
mime_type = MimeMagic.by_path(file)
|
560
|
+
content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
|
561
|
+
relative_path = file.gsub(/^#{@local_path + "/"}/, "")
|
562
|
+
relative_path = "#{prefix}/#{relative_path}" if prefix.present?
|
563
|
+
tree[label] = {sha1: sha1, file_name: file_name, file_size: file_size, content_type: content_type, absolute_path: file, relative_path: relative_path}
|
564
|
+
end
|
565
|
+
end
|
566
|
+
if prefix.present? #add the prefix as dirs to the files
|
567
|
+
#lets say the prefix is a/b/c so we want that a/, a/b/, a/b/c/ will be in our files_list
|
568
|
+
dirs = prefix.split('/')
|
569
|
+
curr_path = []
|
570
|
+
dirs.each do |dir|
|
571
|
+
curr_path << dir
|
572
|
+
list_files << curr_path.join('/')
|
573
|
+
end
|
574
|
+
end
|
575
|
+
return tree
|
576
|
+
end
|
577
|
+
|
578
|
+
def revert_to_last_commit(commit: nil)
|
579
|
+
if commit.blank?
|
580
|
+
resp = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/last_valid_commit", 'GET')
|
581
|
+
if CLI.is_response_success(resp, false)
|
582
|
+
commit = resp['result']['commit_sha1']
|
583
|
+
end
|
584
|
+
end
|
585
|
+
self.update_idx_with_commit(commit) if commit.present?
|
586
|
+
self.revert_next_commit
|
587
|
+
end
|
588
|
+
|
589
|
+
def list_all_files(with_ignore = false)
|
590
|
+
list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject {|x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~ /^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~ /^#{self.local_path}\/\.cnvrgignore/)}
|
591
|
+
|
592
|
+
#we want that big files will
|
593
|
+
list = list.sort_by {|fn| File.size(fn)}
|
594
|
+
return list if with_ignore
|
595
|
+
list_ignore = self.get_ignore_list.map {|ignore_file| "#{self.local_path}/#{ignore_file}"}
|
596
|
+
(list - list_ignore)
|
597
|
+
end
|
598
|
+
|
599
|
+
def write_idx(tree = nil, commit = nil)
|
600
|
+
if tree.nil?
|
601
|
+
tree = self.generate_idx[:tree]
|
602
|
+
tree = tree.map {|k, v| (v.present?) ? [k, {sha1: v[:sha1], commit_time: Time.now}] : [k, v]}.to_h
|
603
|
+
end
|
604
|
+
idx = {tree: tree, commit: commit}
|
605
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
|
606
|
+
end
|
607
|
+
|
608
|
+
def write_tree(tree)
|
609
|
+
idx = self.get_idx
|
610
|
+
idx[:tree] = tree
|
611
|
+
self.set_idx(idx)
|
612
|
+
end
|
613
|
+
|
614
|
+
def generate_idx(show_progress = false)
|
615
|
+
if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
|
616
|
+
old_idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
617
|
+
else
|
618
|
+
old_idx = nil
|
619
|
+
end
|
620
|
+
tree_idx = Hash.new(0)
|
621
|
+
list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject {|x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~ /^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~ /^#{self.local_path}\/\.cnvrgignore/)}
|
622
|
+
list_ignore = self.get_ignore_list()
|
623
|
+
if show_progress
|
624
|
+
parallel_options = {
|
625
|
+
:progress => {
|
626
|
+
:title => "Checking Dataset",
|
627
|
+
:progress_mark => '=',
|
628
|
+
:format => "%b>>%i| %p%% %t",
|
629
|
+
:starting_at => 0,
|
630
|
+
:total => (list).size,
|
631
|
+
:autofinish => true
|
632
|
+
},
|
633
|
+
in_threads: IDXParallelThreads,
|
634
|
+
isolation: true
|
635
|
+
}
|
636
|
+
else
|
637
|
+
parallel_options = {
|
638
|
+
in_threads: IDXParallelThreads,
|
639
|
+
isolation: true
|
640
|
+
}
|
641
|
+
end
|
642
|
+
|
643
|
+
Parallel.map(list, parallel_options) do |e|
|
644
|
+
label = e.gsub(self.local_path + "/", "")
|
645
|
+
if File.directory? e
|
646
|
+
if list_ignore.include? label
|
647
|
+
next
|
648
|
+
end
|
649
|
+
tree_idx[label + "/"] = nil
|
650
|
+
else
|
651
|
+
if list_ignore.include? label
|
652
|
+
next
|
653
|
+
end
|
654
|
+
sha1 = OpenSSL::Digest::SHA1.file(e).hexdigest
|
655
|
+
if old_idx.nil? or old_idx.to_h["tree"].nil?
|
656
|
+
tree_idx[label] = {sha1: sha1, commit_time: nil}
|
657
|
+
elsif old_idx["tree"][label].nil? or old_idx["tree"][label]["sha1"] != sha1
|
658
|
+
tree_idx[label] = {sha1: sha1, commit_time: nil}
|
659
|
+
else
|
660
|
+
tree_idx[label] = old_idx["tree"][label]
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
if !old_idx.nil? and !old_idx[:next_commit].nil? and !old_idx[:next_commit].empty?
|
665
|
+
idx = {commit: old_idx.to_h[:commit], tree: tree_idx, next_commit: old_idx[:next_commit]}
|
666
|
+
else
|
667
|
+
idx = {commit: old_idx.to_h[:commit], tree: tree_idx}
|
668
|
+
end
|
669
|
+
idx_yaml = idx.to_yaml
|
670
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_yaml}
|
671
|
+
return idx
|
672
|
+
end
|
673
|
+
|
674
|
+
def create_volume
|
675
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/volumes/create", 'POST')
|
676
|
+
CLI.is_response_success(response)
|
677
|
+
return response
|
678
|
+
end
|
679
|
+
|
680
|
+
def download_updated_data(current_commit)
|
681
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_updated_data", 'POST', {current_commit: current_commit})
|
682
|
+
CLI.is_response_success(response, false)
|
683
|
+
return response
|
684
|
+
end
|
685
|
+
|
686
|
+
def compare_idx(new_branch, commit = last_local_commit, local_idx = nil, force = false, next_commit = nil)
|
687
|
+
if local_idx.nil?
|
688
|
+
local_idx = self.generate_idx
|
689
|
+
end
|
690
|
+
ignore_list = self.get_ignore_list()
|
691
|
+
if force
|
692
|
+
added = []
|
693
|
+
if local_idx[:tree]
|
694
|
+
added << local_idx[:tree].keys
|
695
|
+
added.flatten!
|
696
|
+
end
|
697
|
+
|
698
|
+
response = {"result" => {"commit" => next_commit, "tree" => {"added" => added,
|
699
|
+
"updated_on_server" => [],
|
700
|
+
"updated_on_local" => [],
|
701
|
+
"deleted" => [],
|
702
|
+
"conflicts" => []}}}
|
703
|
+
return response
|
704
|
+
|
705
|
+
end
|
706
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit, ignore: ignore_list, next_commit: next_commit})
|
707
|
+
CLI.is_response_success(response, false)
|
708
|
+
return response
|
709
|
+
end
|
710
|
+
|
711
|
+
def compare_idx_download(all_files: false, desired_commit: nil)
|
712
|
+
current_commit = self.last_local_commit
|
713
|
+
next_commit = self.get_next_commit
|
714
|
+
ignore_list = self.send_ignore_list()
|
715
|
+
return Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/download_status", 'POST', {current_commit: current_commit, next_commit: next_commit, ignore: ignore_list, all_files: all_files, desired_commit: desired_commit.presence})
|
716
|
+
end
|
717
|
+
|
718
|
+
def set_partial_commit(commit_sha1)
|
719
|
+
idx = self.get_idx
|
720
|
+
idx[:partial_commit] = commit_sha1
|
721
|
+
self.set_idx(idx)
|
722
|
+
end
|
723
|
+
|
724
|
+
def get_partial_commit
|
725
|
+
idx = self.get_idx
|
726
|
+
idx.try(:fetch, :partial_commit)
|
727
|
+
end
|
728
|
+
|
729
|
+
def current_status(new_branch)
|
730
|
+
commit = last_local_commit
|
731
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status_current", 'POST', {current_commit: commit, new_branch: new_branch})
|
732
|
+
CLI.is_response_success(response, true)
|
733
|
+
return response
|
734
|
+
end
|
735
|
+
|
736
|
+
def send_ignore_list()
|
737
|
+
begin
|
738
|
+
ignore_list = []
|
739
|
+
File.open(self.local_path + "/.cnvrgignore", "r").each_line do |line|
|
740
|
+
line = line.strip
|
741
|
+
if line.start_with? "#" or ignore_list.include? line or line.empty?
|
742
|
+
next
|
743
|
+
end
|
744
|
+
if line.end_with? "/"
|
745
|
+
ignore_list << line.gsub("/", "")
|
746
|
+
ignore_list << line + "."
|
747
|
+
elsif line.include? "*"
|
748
|
+
line = line.gsub("*", ".*")
|
749
|
+
ignore_list << line
|
750
|
+
else
|
751
|
+
ignore_list << line
|
752
|
+
end
|
753
|
+
end
|
754
|
+
return ignore_list.flatten
|
755
|
+
rescue
|
756
|
+
return []
|
757
|
+
end
|
758
|
+
end
|
759
|
+
|
760
|
+
|
761
|
+
def compare_commits(commit)
|
762
|
+
response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/compare_commits", 'POST', {compare_commit: commit, current_commit: last_local_commit})
|
763
|
+
CLI.is_response_success(response, false)
|
764
|
+
return response
|
765
|
+
end
|
766
|
+
|
767
|
+
def set_next_commit(commit_sha1)
|
768
|
+
if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
|
769
|
+
idx_hash = Hash.new()
|
770
|
+
idx_hash[:commit] = ""
|
771
|
+
idx_hash[:tree] = ""
|
772
|
+
else
|
773
|
+
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
774
|
+
end
|
775
|
+
idx_hash[:next_commit] = commit_sha1
|
776
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
|
777
|
+
return true
|
778
|
+
|
779
|
+
end
|
780
|
+
|
781
|
+
def get_next_commit()
|
782
|
+
if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
|
783
|
+
return nil
|
784
|
+
end
|
785
|
+
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
786
|
+
return idx_hash[:next_commit]
|
787
|
+
end
|
788
|
+
|
789
|
+
def remove_next_commit()
|
790
|
+
if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
|
791
|
+
return nil
|
792
|
+
end
|
793
|
+
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
794
|
+
idx = Hash.new()
|
795
|
+
idx[:commit] = idx_hash[:next_commit]
|
796
|
+
idx[:tree] = idx_hash[:tree]
|
797
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
|
798
|
+
end
|
799
|
+
|
800
|
+
def revert_next_commit()
|
801
|
+
if !File.exist? "#{self.local_path}/.cnvrg/idx.yml"
|
802
|
+
return nil
|
803
|
+
end
|
804
|
+
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
805
|
+
idx_hash = idx_hash.except(:next_commit)
|
806
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
|
807
|
+
end
|
808
|
+
|
809
|
+
def compare_commit(commit)
|
810
|
+
if commit.nil? or commit.empty?
|
811
|
+
commit = last_local_commit
|
812
|
+
end
|
813
|
+
response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/commit/compare", 'POST', {current_commit: commit})
|
814
|
+
CLI.is_response_success(response, false)
|
815
|
+
update_is_new_branch(response["result"]["new_branch"])
|
816
|
+
return response["result"]["new_branch"]
|
817
|
+
end
|
818
|
+
|
819
|
+
def update_idx_with_files_commits!(files, commit_time)
|
820
|
+
# files.flatten!
|
821
|
+
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
822
|
+
# idx_hash[:commit] = commit
|
823
|
+
|
824
|
+
files.each do |path|
|
825
|
+
idx_hash[:tree].to_h[path].to_h[:commit_time] = commit_time
|
826
|
+
end
|
827
|
+
idx_hash[:next_commit] = idx_hash[:next_commit]
|
828
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
|
829
|
+
|
830
|
+
return true
|
831
|
+
end
|
832
|
+
|
833
|
+
def update_idx(idx)
|
834
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx.to_yaml}
|
835
|
+
return true
|
836
|
+
end
|
837
|
+
|
838
|
+
def update_idx_with_commit!(commit)
|
839
|
+
idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
|
840
|
+
idx_hash[:commit] = commit
|
841
|
+
|
842
|
+
File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') {|f| f.write idx_hash.to_yaml}
|
843
|
+
return true
|
844
|
+
end
|
845
|
+
|
846
|
+
def revert(working_dir)
|
847
|
+
FileUtils.rm_rf working_dir
|
848
|
+
# response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/revert", 'GET')
|
849
|
+
# CLI.is_response_success(response)
|
850
|
+
end
|
851
|
+
|
852
|
+
def self.validate_config
|
853
|
+
## check that the .cnvrg folder exists:
|
854
|
+
dot_cnvrg_exists = Dir[".cnvrg"].present?
|
855
|
+
return {validation: Data::ConfigValidation::FAILED, message: ".cnvrg folder does not exists"} if not dot_cnvrg_exists
|
856
|
+
|
857
|
+
## check that the config.yml exists:
|
858
|
+
config_file_exists = Dir[".cnvrg/*"].include? ".cnvrg/config.yml"
|
859
|
+
return {validation: Data::ConfigValidation::FAILED, message: "config.yml exists"} if not config_file_exists
|
860
|
+
|
861
|
+
## check that the config.yml file not empty:
|
862
|
+
config = YAML.load_file("#{Dir.getwd}/.cnvrg/config.yml")
|
863
|
+
return {validation: Data::ConfigValidation::FAILED, message: "config.yml is empty"} if not config
|
864
|
+
|
865
|
+
## check that config.yml is valid:
|
866
|
+
title = config[:dataset_name]
|
867
|
+
slug = config[:dataset_slug]
|
868
|
+
owner = config[:owner]
|
869
|
+
return {validation: Data::ConfigValidation::FAILED, message: "config.yml is not valid or some keys are missing"} if title.blank? or slug.blank? or owner.blank?
|
870
|
+
|
871
|
+
## everything OK:
|
872
|
+
return {validation: Data::ConfigValidation::SUCCESS, message: "Directory is already linked to #{slug}"}
|
873
|
+
end
|
874
|
+
|
875
|
+
def self.stop_if_dataset_present(dataset_home, dataset_name, commit: nil)
|
876
|
+
|
877
|
+
cli = Cnvrg::CLI.new()
|
878
|
+
config = YAML.load_file(dataset_home + "/.cnvrg/config.yml")
|
879
|
+
if commit.present?
|
880
|
+
local_commit = YAML.load_file(dataset_home + "/.cnvrg/idx.yml")[:commit] rescue nil
|
881
|
+
return if commit != local_commit or local_commit.blank?
|
882
|
+
end
|
883
|
+
if config[:dataset_name] == dataset_name
|
884
|
+
cli.log_message("Dataset already present, clone aborted")
|
885
|
+
exit(0)
|
886
|
+
end
|
887
|
+
rescue => e
|
888
|
+
nil
|
889
|
+
end
|
890
|
+
|
891
|
+
end
|
892
|
+
end
|