cnvrg 0.0.148 → 0.0.149

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  require 'thor'
2
+ require 'cnvrg/cli'
2
3
 
3
4
  class SubCommandBase < Thor
4
5
  def self.banner(command, namespace = nil, subcommand = false)
@@ -11,15 +12,58 @@ class SubCommandBase < Thor
11
12
  end
12
13
  module Cnvrg
13
14
  class Data < SubCommandBase
14
- desc "init", "init data folder"
15
-
15
+ desc "data init", "init data folder"
16
+ method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
16
17
  def init
17
- puts "im in init"
18
+ cli = Cnvrg::CLI.new()
19
+ public = options["public"]
20
+ cli.init_data(public)
18
21
  end
19
- desc "upload", "upload data folder"
20
-
22
+ desc "data upload", "upload data folder"
23
+ method_option :ignore, :type => :array, :aliases => ["-i", "--i"], :desc => "ignore following files"
24
+ method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
25
+ method_option :sync, :type => :boolean, :aliases => ["-s"], :default => false
21
26
  def upload
22
- puts "im in upload"
27
+ cli = Cnvrg::CLI.new()
28
+ ignore = options["ignore"]
29
+ verbose = options["verbose"]
30
+ sync = options["sync"]
31
+
32
+ cli.upload_data_tar(ignore, verbose,sync)
33
+ end
34
+ desc 'data download', 'pull data'
35
+ method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
36
+ method_option :sync, :type => :boolean, :aliases => ["-s"], :default => false
37
+
38
+ def download()
39
+ cli = Cnvrg::CLI.new()
40
+ verbose = options["verbose"]
41
+ sync = options["sync"]
42
+
43
+ cli.download_data(verbose,sync,Dir.pwd)
44
+
45
+ end
46
+ desc 'data clone', 'clone datset'
47
+ def clone(dataset_url)
48
+ cli = Cnvrg::CLI.new()
49
+ cli.clone_data(dataset_url)
50
+
51
+ end
52
+ desc 'data list', 'list of datasets'
53
+ def list()
54
+ cli = Cnvrg::CLI.new()
55
+ verbose = options["verbose"]
56
+ sync = options["sync"]
57
+
58
+ cli.list_dataset()
59
+
60
+ end
61
+ desc 'data commits', 'pull data'
62
+
63
+ def commits()
64
+ cli = Cnvrg::CLI.new()
65
+ cli.list_dataset_commits()
66
+
23
67
  end
24
68
 
25
69
  end
@@ -6,6 +6,8 @@ module Cnvrg
6
6
  class Datafiles
7
7
 
8
8
  LARGE_FILE=1024*1024*5
9
+ MULTIPART_SPLIT=10000000
10
+
9
11
  attr_reader :base_resource
10
12
 
11
13
  def initialize(owner, dataset_slug)
@@ -19,9 +21,11 @@ module Cnvrg
19
21
  file_size = File.size(absolute_path).to_f
20
22
  mime_type = MimeMagic.by_path(absolute_path)
21
23
  content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
24
+ sha1 = Digest::SHA1.file(absolute_path).hexdigest
25
+
22
26
  upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
23
27
  commit_sha1: commit_sha1, file_name: file_name,
24
- file_size: file_size, file_content_type: content_type})
28
+ file_size: file_size, file_content_type: content_type,sha1:sha1})
25
29
  if Cnvrg::CLI.is_response_success(upload_resp, false)
26
30
  path = upload_resp["result"]["path"]
27
31
  if file_size.to_f>= Cnvrg::Files::LARGE_FILE.to_f
@@ -37,6 +41,27 @@ module Cnvrg
37
41
  end
38
42
  return false
39
43
  end
44
+ def upload_tar_file(absolute_path, relative_path, commit_sha1)
45
+ file_name = File.basename relative_path
46
+ file_size = File.size(absolute_path).to_f
47
+ mime_type = MimeMagic.by_path(absolute_path)
48
+ content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
49
+ sha1 = Digest::SHA1.file(absolute_path).hexdigest
50
+
51
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_tar_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
52
+ commit_sha1: commit_sha1, file_name: file_name,
53
+ file_size: file_size, file_content_type: content_type,sha1:sha1})
54
+ if Cnvrg::CLI.is_response_success(upload_resp, false)
55
+ path = upload_resp["result"]["path"]
56
+ s3_res = upload_large_files_s3(upload_resp, absolute_path)
57
+ if s3_res
58
+ Cnvrg::API.request(@base_resource + "update_s3", 'POST', {path: path, commit_id: upload_resp["result"]["commit_id"],
59
+ blob_id: upload_resp["result"]["id"]})
60
+ return true
61
+ end
62
+ end
63
+ return false
64
+ end
40
65
  def upload_log_file(absolute_path, relative_path,log_date)
41
66
  file_name = File.basename relative_path
42
67
  file_size = File.size(absolute_path).to_f
@@ -199,9 +224,11 @@ module Cnvrg
199
224
 
200
225
  end
201
226
 
202
- def upload_large_files_s3(upload_resp, file_path)
227
+ def upload_large_files_s3(upload_resp, file_path)
203
228
  begin
204
229
  sts_path = upload_resp["result"]["path_sts"]
230
+ s4cmd_path = upload_resp["result"]["path_s4cmd"]
231
+
205
232
  uri = URI.parse(sts_path)
206
233
  http_object = Net::HTTP.new(uri.host, uri.port)
207
234
  http_object.use_ssl = true if uri.scheme == 'https'
@@ -212,18 +239,72 @@ module Cnvrg
212
239
  response = http.request request
213
240
  body = response.read_body
214
241
  end
242
+
215
243
  URLcrypt::key = [body].pack('H*')
216
- s3 = Aws::S3::Resource.new(
217
- :access_key_id => URLcrypt.decrypt(upload_resp["result"]["sts_a"]),
218
- :secret_access_key => URLcrypt.decrypt(upload_resp["result"]["sts_s"]),
219
- :session_token => URLcrypt.decrypt(upload_resp["result"]["sts_st"]),
220
- :region => URLcrypt.decrypt(upload_resp["result"]["region"]))
221
- resp = s3.bucket(URLcrypt.decrypt(upload_resp["result"]["bucket"])).
222
- object(upload_resp["result"]["path"]+"/"+File.basename(file_path)).
223
- upload_file(file_path,{:use_accelerate_endpoint=>true})
244
+
245
+ python_version=`python --version > /dev/null 2>&1` ; is_python=$?.success?
246
+ if is_python
247
+
248
+ s4cmd=`pip freeze |grep s4cmd > /dev/null 2>&1` ; s4cmd_suc=$?.success?
249
+ if !s4cmd_suc
250
+ `pip install s4cmd > /dev/null 2>&1`
251
+ end
252
+
253
+ end
254
+
255
+ if !is_python
256
+ s3 = Aws::S3::Resource.new(
257
+ :access_key_id => URLcrypt.decrypt(upload_resp["result"]["sts_a"]),
258
+ :secret_access_key => URLcrypt.decrypt(upload_resp["result"]["sts_s"]),
259
+ :session_token => URLcrypt.decrypt(upload_resp["result"]["sts_st"]),
260
+ :region => URLcrypt.decrypt(upload_resp["result"]["region"]))
261
+ resp = s3.bucket(URLcrypt.decrypt(upload_resp["result"]["bucket"])).
262
+ object(upload_resp["result"]["path"]+"/"+File.basename(file_path)).
263
+ upload_file(file_path,{:use_accelerate_endpoint=>true})
264
+
265
+ else
266
+
267
+ s4cmd_uri = URI.parse(s4cmd_path)
268
+ s4cmd_http_object = Net::HTTP.new(s4cmd_uri.host, s4cmd_uri.port)
269
+ s4cmd_http_object.use_ssl = true if s4cmd_uri.scheme == 'https'
270
+ s4cmd_request = Net::HTTP::Get.new(s4cmd_path)
271
+
272
+ s4cmd_body = ""
273
+ s4cmd_http_object.start do |http|
274
+ response = http.request s4cmd_request
275
+ s4cmd_body = response.read_body
276
+ end
277
+
278
+ s4cmd_new_body = s4cmd_body.gsub(" self.client = self.boto3.client('s3',
279
+ aws_access_key_id=aws_access_key_id,
280
+ aws_secret_access_key=aws_secret_access_key)"," self.client = self.boto3.client('s3',
281
+ aws_access_key_id='#{ URLcrypt.decrypt(upload_resp["result"]["sts_a"])}',
282
+ aws_secret_access_key='#{URLcrypt.decrypt(upload_resp["result"]["sts_s"])}',
283
+ aws_session_token='#{URLcrypt.decrypt(upload_resp["result"]["sts_st"])}')")
284
+
285
+ tmp = Tempfile.new('s4cmd.py')
286
+ tmp << s4cmd_new_body
287
+ tmp.flush
288
+ tmp.close
289
+
290
+ is_success = false
291
+ count = 0
292
+ while !is_success and count <3
293
+ resp = `python #{tmp.path} --num-threads=128 --max-singlepart-upload-size=#{MULTIPART_SPLIT} put -f #{file_path} s3://#{URLcrypt.decrypt(upload_resp["result"]["bucket"])}/#{upload_resp["result"]["path"]+"/"+File.basename(file_path)} > /dev/null 2>&1`
294
+ is_success =$?.success?
295
+ count +=1
296
+
297
+ end
298
+ resp= is_success
299
+
300
+ end
301
+
224
302
  return resp
303
+
225
304
  rescue =>e
226
- puts e
305
+ if File.exist? tmp
306
+ FileUtils.rm_rf [tmp]
307
+ end
227
308
  return false
228
309
 
229
310
  end
@@ -231,6 +312,7 @@ module Cnvrg
231
312
 
232
313
  end
233
314
 
315
+
234
316
  def upload_small_files_s3(url_path, file_path, content_type)
235
317
  url = URI.parse(url_path)
236
318
  file = File.open(file_path, "rb")
@@ -273,9 +355,9 @@ module Cnvrg
273
355
  response = Cnvrg::API.request(@base_resource + "create_dir", 'POST', {absolute_path: absolute_path, relative_path: relative_path, commit_sha1: commit_sha1})
274
356
  return Cnvrg::CLI.is_response_success(response, false)
275
357
  end
276
- def download_file_s3(absolute_path, relative_path, project_home, conflict=false)
358
+ def download_file_s3(absolute_path, relative_path, project_home, conflict=false,commit_sha1=nil)
277
359
  begin
278
- res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path})
360
+ res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path,commit_sha1:commit_sha1})
279
361
  Cnvrg::CLI.is_response_success(res, false)
280
362
  if res["result"]
281
363
  download_resp = res
@@ -311,6 +393,46 @@ module Cnvrg
311
393
 
312
394
  end
313
395
  end
396
+
397
+ def download_data_file(commit_sha1,dataset_home)
398
+ begin
399
+ res = Cnvrg::API.request(@base_resource + "download_data_file", 'POST', {commit_sha1:commit_sha1})
400
+ Cnvrg::CLI.is_response_success(res, false)
401
+ if res["result"]
402
+ download_resp = res
403
+ filename = download_resp["result"]["filename"]
404
+
405
+ sts_path = download_resp["result"]["path_sts"]
406
+ uri = URI.parse(sts_path)
407
+ http_object = Net::HTTP.new(uri.host, uri.port)
408
+ http_object.use_ssl = true if uri.scheme == 'https'
409
+ request = Net::HTTP::Get.new(sts_path)
410
+
411
+ body = ""
412
+ http_object.start do |http|
413
+ response = http.request request
414
+ body = response.read_body
415
+ end
416
+ URLcrypt::key = [body].pack('H*')
417
+ s3 = Aws::S3::Client.new(
418
+ :access_key_id => URLcrypt.decrypt(download_resp["result"]["sts_a"]),
419
+ :secret_access_key => URLcrypt.decrypt(download_resp["result"]["sts_s"]),
420
+ :session_token => URLcrypt.decrypt(download_resp["result"]["sts_st"]),
421
+ :region => URLcrypt.decrypt(download_resp["result"]["region"]))
422
+
423
+ File.open(dataset_home+"/"+filename, 'wb') do |file|
424
+ resp = s3.get_object({ bucket:URLcrypt.decrypt(download_resp["result"]["bucket"]),
425
+ key:URLcrypt.decrypt(download_resp["result"]["key"])}, target: file)
426
+ end
427
+ return filename
428
+ end
429
+
430
+ rescue =>e
431
+ return false
432
+
433
+ end
434
+ end
435
+
314
436
  def download_file(absolute_path, relative_path, project_home, conflict=false)
315
437
  res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path})
316
438
  Cnvrg::CLI.is_response_success(res, false)
@@ -332,14 +454,14 @@ module Cnvrg
332
454
  return true
333
455
  end
334
456
 
335
- def download_dir(absolute_path, relative_path, project_home)
336
- FileUtils.mkdir_p("#{project_home}/#{absolute_path}")
457
+ def download_dir(dataset_home, absolute_path)
458
+ FileUtils.mkdir_p("#{dataset_home}/#{absolute_path}")
337
459
  end
338
- def revoke_download_dir(absolute_path, relative_path, project_home)
460
+ def revoke_download_dir(absolute_path)
339
461
  puts FileUtils.rmtree("#{absolute_path}")
340
462
  end
341
463
 
342
- def revoke_download_file(project_home,absolute_path,filename,conflict=false)
464
+ def revoke_download_file(absolute_path,filename,conflict=false)
343
465
  begin
344
466
  file_location = absolute_path.gsub(/#{filename}\/?$/, "")
345
467
 
@@ -363,6 +485,10 @@ module Cnvrg
363
485
  response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1})
364
486
  return response
365
487
  end
488
+ def end_commit_tar(commit_sha1,cur_idx)
489
+ response = Cnvrg::API.request("#{base_resource}/commit/end_tar", 'POST', {commit_sha1: commit_sha1,idx: cur_idx})
490
+ return response
491
+ end
366
492
 
367
493
  def rollback_commit(commit_sha1)
368
494
  response = Cnvrg::API.request("#{base_resource}/commit/rollback", 'POST', {commit_sha1: commit_sha1})
@@ -14,11 +14,11 @@ module Cnvrg
14
14
  @owner = config[:owner]
15
15
  @working_dir = project_home
16
16
  rescue => e
17
+
17
18
  end
18
19
 
19
20
  end
20
21
 
21
-
22
22
  def last_local_commit
23
23
  idx = YAML.load_file(@local_path + "/.cnvrg/idx.yml")
24
24
  return idx[:commit]
@@ -36,6 +36,12 @@ module Cnvrg
36
36
  CLI.is_response_success(response)
37
37
  return response
38
38
 
39
+ end
40
+ def list_commits
41
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list_commits", 'GET')
42
+ CLI.is_response_success(response)
43
+ return response
44
+
39
45
  end
40
46
  def url
41
47
  url = Cnvrg::Helpers.remote_url
@@ -43,11 +49,14 @@ module Cnvrg
43
49
  end
44
50
 
45
51
  def update_ignore_list(new_ignore)
46
- if new_ignore.empty?
52
+
53
+ if new_ignore.nil? or new_ignore.empty?
47
54
  return true
48
55
  end
49
56
  begin
50
57
  File.open(self.local_path+"/.cnvrgignore", "a+") do |f|
58
+ f.puts("\n")
59
+
51
60
  new_ignore.each do |i|
52
61
  f.puts("#{i}\n")
53
62
  end
@@ -62,13 +71,19 @@ module Cnvrg
62
71
  ignore_list = []
63
72
  File.open(self.local_path+"/.cnvrgignore", "r").each_line do |line|
64
73
  line = line.strip
65
- if line.start_with? "#"
74
+
75
+ if line.start_with? "#" or ignore_list.include? line
66
76
  next
67
77
  end
68
- if line.end_with? "/"
69
- ignore_list << line.chop
70
- sub_dirs = Dir.glob("#{line}/**/*").each { |x| x.gsub!("//", "/") }
71
- ignore_list << sub_dirs.flatten
78
+ if line.end_with? "/" or File.directory?(line)
79
+ if line.end_with? "/"
80
+ ignore_list << line.chop
81
+ else
82
+ ignore_list << line
83
+ end
84
+ all_sub = Dir.glob("#{line}/**/*", File::FNM_DOTMATCH).flatten
85
+ ignore_list << all_sub.flatten
86
+ ignore_list << line
72
87
  else
73
88
  ignore_list << line
74
89
  end
@@ -88,8 +103,9 @@ module Cnvrg
88
103
 
89
104
  cnvrgignore = Helpers.cnvrgignore_content
90
105
  begin
91
- response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public})
92
- Cnvrg::CLI.is_response_success(response)
106
+ response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public})
107
+
108
+ Cnvrg::CLI.is_response_success(response)
93
109
  response = JSON.parse response["result"]
94
110
  dataset_slug = response["slug"]
95
111
 
@@ -105,7 +121,53 @@ module Cnvrg
105
121
  return false
106
122
  end
107
123
  return true
124
+ end
125
+ def self.clone(owner, dataset_name,dataset_slug)
126
+
127
+ begin
128
+ list_dirs = [ dataset_name, "#{dataset_name}/.cnvrg"
129
+ ]
130
+ list_files = [
131
+ "#{dataset_name}/.cnvrgignore",
132
+ "#{dataset_name}/.cnvrg/config.yml"
133
+ ]
134
+
135
+ config = {dataset_name: dataset_name,
136
+ dataset_slug: dataset_slug,
137
+ owner: owner}
138
+
139
+
140
+ cnvrgignore = Helpers.cnvrgignore_content
141
+ FileUtils.mkdir_p list_dirs
142
+ FileUtils.touch list_files
143
+ File.open("#{dataset_name}/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
144
+ File.open("#{dataset_name}/.cnvrgignore", "w+") { |f| f.write cnvrgignore } unless File.exist? ".cnvrgignore"
145
+ rescue => e
146
+ return false
108
147
  end
148
+ return true
149
+ end
150
+
151
+ def self.init_container(owner, dataset_slug,dataset_name)
152
+
153
+
154
+ cnvrgignore = Helpers.cnvrgignore_content
155
+ begin
156
+
157
+
158
+ config = {dataset_name: dataset_name,
159
+ dataset_slug: dataset_slug,
160
+ owner: owner}
161
+ File.open("/home/ds/notebooks/data/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
162
+
163
+ File.open("/home/ds/notebooks/data/.cnvrgignore", "w+") { |f| f.write cnvrgignore } unless File.exist? ".cnvrgignore"
164
+ rescue => e
165
+ puts e
166
+ puts e.backtrace
167
+ return false
168
+ end
169
+ return true
170
+ end
109
171
 
110
172
 
111
173
  def get_idx
@@ -159,15 +221,29 @@ module Cnvrg
159
221
  return response
160
222
  end
161
223
 
162
- def compare_idx(new_branch, commit=last_local_commit)
224
+ def downlowd_updated_data(current_commit)
163
225
 
164
- local_idx = self.generate_idx
165
- response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit})
166
- CLI.is_response_success(response)
167
- return response
168
- end
226
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/downlowd_updated_data", 'POST', {current_commit: current_commit})
227
+ CLI.is_response_success(response)
228
+ return response
229
+ end
230
+ def compare_idx(new_branch, commit=last_local_commit)
169
231
 
170
- def compare_commit(commit)
232
+ local_idx = self.generate_idx
233
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit})
234
+ CLI.is_response_success(response)
235
+ return response
236
+ end
237
+
238
+
239
+
240
+ def compare_commits(commit)
241
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/compare_commits", 'POST', {compare_commit: commit,current_commit:last_local_commit})
242
+ CLI.is_response_success(response)
243
+ return response
244
+ end
245
+
246
+ def compare_commit(commit)
171
247
  if commit.nil? or commit.empty?
172
248
  commit = last_local_commit
173
249
  end
@@ -187,6 +263,11 @@ module Cnvrg
187
263
 
188
264
  return true
189
265
  end
266
+ def update_idx(idx)
267
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') { |f| f.write idx.to_yaml }
268
+
269
+ return true
270
+ end
190
271
 
191
272
 
192
273
  def update_idx_with_commit!(commit)