cnvrg 0.0.148 → 0.0.149

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  require 'thor'
2
+ require 'cnvrg/cli'
2
3
 
3
4
  class SubCommandBase < Thor
4
5
  def self.banner(command, namespace = nil, subcommand = false)
@@ -11,15 +12,58 @@ class SubCommandBase < Thor
11
12
  end
12
13
  module Cnvrg
13
14
  class Data < SubCommandBase
14
- desc "init", "init data folder"
15
-
15
+ desc "data init", "init data folder"
16
+ method_option :public, :type => :boolean, :aliases => ["-p", "--public"], :default => false
16
17
  def init
17
- puts "im in init"
18
+ cli = Cnvrg::CLI.new()
19
+ public = options["public"]
20
+ cli.init_data(public)
18
21
  end
19
- desc "upload", "upload data folder"
20
-
22
+ desc "data upload", "upload data folder"
23
+ method_option :ignore, :type => :array, :aliases => ["-i", "--i"], :desc => "ignore following files"
24
+ method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
25
+ method_option :sync, :type => :boolean, :aliases => ["-s"], :default => false
21
26
  def upload
22
- puts "im in upload"
27
+ cli = Cnvrg::CLI.new()
28
+ ignore = options["ignore"]
29
+ verbose = options["verbose"]
30
+ sync = options["sync"]
31
+
32
+ cli.upload_data_tar(ignore, verbose,sync)
33
+ end
34
+ desc 'data download', 'pull data'
35
+ method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
36
+ method_option :sync, :type => :boolean, :aliases => ["-s"], :default => false
37
+
38
+ def download()
39
+ cli = Cnvrg::CLI.new()
40
+ verbose = options["verbose"]
41
+ sync = options["sync"]
42
+
43
+ cli.download_data(verbose,sync,Dir.pwd)
44
+
45
+ end
46
+ desc 'data clone', 'clone datset'
47
+ def clone(dataset_url)
48
+ cli = Cnvrg::CLI.new()
49
+ cli.clone_data(dataset_url)
50
+
51
+ end
52
+ desc 'data list', 'list of datasets'
53
+ def list()
54
+ cli = Cnvrg::CLI.new()
55
+ verbose = options["verbose"]
56
+ sync = options["sync"]
57
+
58
+ cli.list_dataset()
59
+
60
+ end
61
+ desc 'data commits', 'pull data'
62
+
63
+ def commits()
64
+ cli = Cnvrg::CLI.new()
65
+ cli.list_dataset_commits()
66
+
23
67
  end
24
68
 
25
69
  end
@@ -6,6 +6,8 @@ module Cnvrg
6
6
  class Datafiles
7
7
 
8
8
  LARGE_FILE=1024*1024*5
9
+ MULTIPART_SPLIT=10000000
10
+
9
11
  attr_reader :base_resource
10
12
 
11
13
  def initialize(owner, dataset_slug)
@@ -19,9 +21,11 @@ module Cnvrg
19
21
  file_size = File.size(absolute_path).to_f
20
22
  mime_type = MimeMagic.by_path(absolute_path)
21
23
  content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
24
+ sha1 = Digest::SHA1.file(absolute_path).hexdigest
25
+
22
26
  upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
23
27
  commit_sha1: commit_sha1, file_name: file_name,
24
- file_size: file_size, file_content_type: content_type})
28
+ file_size: file_size, file_content_type: content_type,sha1:sha1})
25
29
  if Cnvrg::CLI.is_response_success(upload_resp, false)
26
30
  path = upload_resp["result"]["path"]
27
31
  if file_size.to_f>= Cnvrg::Files::LARGE_FILE.to_f
@@ -37,6 +41,27 @@ module Cnvrg
37
41
  end
38
42
  return false
39
43
  end
44
+ def upload_tar_file(absolute_path, relative_path, commit_sha1)
45
+ file_name = File.basename relative_path
46
+ file_size = File.size(absolute_path).to_f
47
+ mime_type = MimeMagic.by_path(absolute_path)
48
+ content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
49
+ sha1 = Digest::SHA1.file(absolute_path).hexdigest
50
+
51
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_tar_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
52
+ commit_sha1: commit_sha1, file_name: file_name,
53
+ file_size: file_size, file_content_type: content_type,sha1:sha1})
54
+ if Cnvrg::CLI.is_response_success(upload_resp, false)
55
+ path = upload_resp["result"]["path"]
56
+ s3_res = upload_large_files_s3(upload_resp, absolute_path)
57
+ if s3_res
58
+ Cnvrg::API.request(@base_resource + "update_s3", 'POST', {path: path, commit_id: upload_resp["result"]["commit_id"],
59
+ blob_id: upload_resp["result"]["id"]})
60
+ return true
61
+ end
62
+ end
63
+ return false
64
+ end
40
65
  def upload_log_file(absolute_path, relative_path,log_date)
41
66
  file_name = File.basename relative_path
42
67
  file_size = File.size(absolute_path).to_f
@@ -199,9 +224,11 @@ module Cnvrg
199
224
 
200
225
  end
201
226
 
202
- def upload_large_files_s3(upload_resp, file_path)
227
+ def upload_large_files_s3(upload_resp, file_path)
203
228
  begin
204
229
  sts_path = upload_resp["result"]["path_sts"]
230
+ s4cmd_path = upload_resp["result"]["path_s4cmd"]
231
+
205
232
  uri = URI.parse(sts_path)
206
233
  http_object = Net::HTTP.new(uri.host, uri.port)
207
234
  http_object.use_ssl = true if uri.scheme == 'https'
@@ -212,18 +239,72 @@ module Cnvrg
212
239
  response = http.request request
213
240
  body = response.read_body
214
241
  end
242
+
215
243
  URLcrypt::key = [body].pack('H*')
216
- s3 = Aws::S3::Resource.new(
217
- :access_key_id => URLcrypt.decrypt(upload_resp["result"]["sts_a"]),
218
- :secret_access_key => URLcrypt.decrypt(upload_resp["result"]["sts_s"]),
219
- :session_token => URLcrypt.decrypt(upload_resp["result"]["sts_st"]),
220
- :region => URLcrypt.decrypt(upload_resp["result"]["region"]))
221
- resp = s3.bucket(URLcrypt.decrypt(upload_resp["result"]["bucket"])).
222
- object(upload_resp["result"]["path"]+"/"+File.basename(file_path)).
223
- upload_file(file_path,{:use_accelerate_endpoint=>true})
244
+
245
+ python_version=`python --version > /dev/null 2>&1` ; is_python=$?.success?
246
+ if is_python
247
+
248
+ s4cmd=`pip freeze |grep s4cmd > /dev/null 2>&1` ; s4cmd_suc=$?.success?
249
+ if !s4cmd_suc
250
+ `pip install s4cmd > /dev/null 2>&1`
251
+ end
252
+
253
+ end
254
+
255
+ if !is_python
256
+ s3 = Aws::S3::Resource.new(
257
+ :access_key_id => URLcrypt.decrypt(upload_resp["result"]["sts_a"]),
258
+ :secret_access_key => URLcrypt.decrypt(upload_resp["result"]["sts_s"]),
259
+ :session_token => URLcrypt.decrypt(upload_resp["result"]["sts_st"]),
260
+ :region => URLcrypt.decrypt(upload_resp["result"]["region"]))
261
+ resp = s3.bucket(URLcrypt.decrypt(upload_resp["result"]["bucket"])).
262
+ object(upload_resp["result"]["path"]+"/"+File.basename(file_path)).
263
+ upload_file(file_path,{:use_accelerate_endpoint=>true})
264
+
265
+ else
266
+
267
+ s4cmd_uri = URI.parse(s4cmd_path)
268
+ s4cmd_http_object = Net::HTTP.new(s4cmd_uri.host, s4cmd_uri.port)
269
+ s4cmd_http_object.use_ssl = true if s4cmd_uri.scheme == 'https'
270
+ s4cmd_request = Net::HTTP::Get.new(s4cmd_path)
271
+
272
+ s4cmd_body = ""
273
+ s4cmd_http_object.start do |http|
274
+ response = http.request s4cmd_request
275
+ s4cmd_body = response.read_body
276
+ end
277
+
278
+ s4cmd_new_body = s4cmd_body.gsub(" self.client = self.boto3.client('s3',
279
+ aws_access_key_id=aws_access_key_id,
280
+ aws_secret_access_key=aws_secret_access_key)"," self.client = self.boto3.client('s3',
281
+ aws_access_key_id='#{ URLcrypt.decrypt(upload_resp["result"]["sts_a"])}',
282
+ aws_secret_access_key='#{URLcrypt.decrypt(upload_resp["result"]["sts_s"])}',
283
+ aws_session_token='#{URLcrypt.decrypt(upload_resp["result"]["sts_st"])}')")
284
+
285
+ tmp = Tempfile.new('s4cmd.py')
286
+ tmp << s4cmd_new_body
287
+ tmp.flush
288
+ tmp.close
289
+
290
+ is_success = false
291
+ count = 0
292
+ while !is_success and count <3
293
+ resp = `python #{tmp.path} --num-threads=128 --max-singlepart-upload-size=#{MULTIPART_SPLIT} put -f #{file_path} s3://#{URLcrypt.decrypt(upload_resp["result"]["bucket"])}/#{upload_resp["result"]["path"]+"/"+File.basename(file_path)} > /dev/null 2>&1`
294
+ is_success =$?.success?
295
+ count +=1
296
+
297
+ end
298
+ resp= is_success
299
+
300
+ end
301
+
224
302
  return resp
303
+
225
304
  rescue =>e
226
- puts e
305
+ if File.exist? tmp
306
+ FileUtils.rm_rf [tmp]
307
+ end
227
308
  return false
228
309
 
229
310
  end
@@ -231,6 +312,7 @@ module Cnvrg
231
312
 
232
313
  end
233
314
 
315
+
234
316
  def upload_small_files_s3(url_path, file_path, content_type)
235
317
  url = URI.parse(url_path)
236
318
  file = File.open(file_path, "rb")
@@ -273,9 +355,9 @@ module Cnvrg
273
355
  response = Cnvrg::API.request(@base_resource + "create_dir", 'POST', {absolute_path: absolute_path, relative_path: relative_path, commit_sha1: commit_sha1})
274
356
  return Cnvrg::CLI.is_response_success(response, false)
275
357
  end
276
- def download_file_s3(absolute_path, relative_path, project_home, conflict=false)
358
+ def download_file_s3(absolute_path, relative_path, project_home, conflict=false,commit_sha1=nil)
277
359
  begin
278
- res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path})
360
+ res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path,commit_sha1:commit_sha1})
279
361
  Cnvrg::CLI.is_response_success(res, false)
280
362
  if res["result"]
281
363
  download_resp = res
@@ -311,6 +393,46 @@ module Cnvrg
311
393
 
312
394
  end
313
395
  end
396
+
397
+ def download_data_file(commit_sha1,dataset_home)
398
+ begin
399
+ res = Cnvrg::API.request(@base_resource + "download_data_file", 'POST', {commit_sha1:commit_sha1})
400
+ Cnvrg::CLI.is_response_success(res, false)
401
+ if res["result"]
402
+ download_resp = res
403
+ filename = download_resp["result"]["filename"]
404
+
405
+ sts_path = download_resp["result"]["path_sts"]
406
+ uri = URI.parse(sts_path)
407
+ http_object = Net::HTTP.new(uri.host, uri.port)
408
+ http_object.use_ssl = true if uri.scheme == 'https'
409
+ request = Net::HTTP::Get.new(sts_path)
410
+
411
+ body = ""
412
+ http_object.start do |http|
413
+ response = http.request request
414
+ body = response.read_body
415
+ end
416
+ URLcrypt::key = [body].pack('H*')
417
+ s3 = Aws::S3::Client.new(
418
+ :access_key_id => URLcrypt.decrypt(download_resp["result"]["sts_a"]),
419
+ :secret_access_key => URLcrypt.decrypt(download_resp["result"]["sts_s"]),
420
+ :session_token => URLcrypt.decrypt(download_resp["result"]["sts_st"]),
421
+ :region => URLcrypt.decrypt(download_resp["result"]["region"]))
422
+
423
+ File.open(dataset_home+"/"+filename, 'wb') do |file|
424
+ resp = s3.get_object({ bucket:URLcrypt.decrypt(download_resp["result"]["bucket"]),
425
+ key:URLcrypt.decrypt(download_resp["result"]["key"])}, target: file)
426
+ end
427
+ return filename
428
+ end
429
+
430
+ rescue =>e
431
+ return false
432
+
433
+ end
434
+ end
435
+
314
436
  def download_file(absolute_path, relative_path, project_home, conflict=false)
315
437
  res = Cnvrg::API.request(@base_resource + "download_file", 'POST', {absolute_path: absolute_path, relative_path: relative_path})
316
438
  Cnvrg::CLI.is_response_success(res, false)
@@ -332,14 +454,14 @@ module Cnvrg
332
454
  return true
333
455
  end
334
456
 
335
- def download_dir(absolute_path, relative_path, project_home)
336
- FileUtils.mkdir_p("#{project_home}/#{absolute_path}")
457
+ def download_dir(dataset_home, absolute_path)
458
+ FileUtils.mkdir_p("#{dataset_home}/#{absolute_path}")
337
459
  end
338
- def revoke_download_dir(absolute_path, relative_path, project_home)
460
+ def revoke_download_dir(absolute_path)
339
461
  puts FileUtils.rmtree("#{absolute_path}")
340
462
  end
341
463
 
342
- def revoke_download_file(project_home,absolute_path,filename,conflict=false)
464
+ def revoke_download_file(absolute_path,filename,conflict=false)
343
465
  begin
344
466
  file_location = absolute_path.gsub(/#{filename}\/?$/, "")
345
467
 
@@ -363,6 +485,10 @@ module Cnvrg
363
485
  response = Cnvrg::API.request("#{base_resource}/commit/end", 'POST', {commit_sha1: commit_sha1})
364
486
  return response
365
487
  end
488
+ def end_commit_tar(commit_sha1,cur_idx)
489
+ response = Cnvrg::API.request("#{base_resource}/commit/end_tar", 'POST', {commit_sha1: commit_sha1,idx: cur_idx})
490
+ return response
491
+ end
366
492
 
367
493
  def rollback_commit(commit_sha1)
368
494
  response = Cnvrg::API.request("#{base_resource}/commit/rollback", 'POST', {commit_sha1: commit_sha1})
@@ -14,11 +14,11 @@ module Cnvrg
14
14
  @owner = config[:owner]
15
15
  @working_dir = project_home
16
16
  rescue => e
17
+
17
18
  end
18
19
 
19
20
  end
20
21
 
21
-
22
22
  def last_local_commit
23
23
  idx = YAML.load_file(@local_path + "/.cnvrg/idx.yml")
24
24
  return idx[:commit]
@@ -36,6 +36,12 @@ module Cnvrg
36
36
  CLI.is_response_success(response)
37
37
  return response
38
38
 
39
+ end
40
+ def list_commits
41
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list_commits", 'GET')
42
+ CLI.is_response_success(response)
43
+ return response
44
+
39
45
  end
40
46
  def url
41
47
  url = Cnvrg::Helpers.remote_url
@@ -43,11 +49,14 @@ module Cnvrg
43
49
  end
44
50
 
45
51
  def update_ignore_list(new_ignore)
46
- if new_ignore.empty?
52
+
53
+ if new_ignore.nil? or new_ignore.empty?
47
54
  return true
48
55
  end
49
56
  begin
50
57
  File.open(self.local_path+"/.cnvrgignore", "a+") do |f|
58
+ f.puts("\n")
59
+
51
60
  new_ignore.each do |i|
52
61
  f.puts("#{i}\n")
53
62
  end
@@ -62,13 +71,19 @@ module Cnvrg
62
71
  ignore_list = []
63
72
  File.open(self.local_path+"/.cnvrgignore", "r").each_line do |line|
64
73
  line = line.strip
65
- if line.start_with? "#"
74
+
75
+ if line.start_with? "#" or ignore_list.include? line
66
76
  next
67
77
  end
68
- if line.end_with? "/"
69
- ignore_list << line.chop
70
- sub_dirs = Dir.glob("#{line}/**/*").each { |x| x.gsub!("//", "/") }
71
- ignore_list << sub_dirs.flatten
78
+ if line.end_with? "/" or File.directory?(line)
79
+ if line.end_with? "/"
80
+ ignore_list << line.chop
81
+ else
82
+ ignore_list << line
83
+ end
84
+ all_sub = Dir.glob("#{line}/**/*", File::FNM_DOTMATCH).flatten
85
+ ignore_list << all_sub.flatten
86
+ ignore_list << line
72
87
  else
73
88
  ignore_list << line
74
89
  end
@@ -88,8 +103,9 @@ module Cnvrg
88
103
 
89
104
  cnvrgignore = Helpers.cnvrgignore_content
90
105
  begin
91
- response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public})
92
- Cnvrg::CLI.is_response_success(response)
106
+ response = Cnvrg::API.request("cli/create_dataset", 'POST', {title: dataset_name, owner: owner, is_public: is_public})
107
+
108
+ Cnvrg::CLI.is_response_success(response)
93
109
  response = JSON.parse response["result"]
94
110
  dataset_slug = response["slug"]
95
111
 
@@ -105,7 +121,53 @@ module Cnvrg
105
121
  return false
106
122
  end
107
123
  return true
124
+ end
125
+ def self.clone(owner, dataset_name,dataset_slug)
126
+
127
+ begin
128
+ list_dirs = [ dataset_name, "#{dataset_name}/.cnvrg"
129
+ ]
130
+ list_files = [
131
+ "#{dataset_name}/.cnvrgignore",
132
+ "#{dataset_name}/.cnvrg/config.yml"
133
+ ]
134
+
135
+ config = {dataset_name: dataset_name,
136
+ dataset_slug: dataset_slug,
137
+ owner: owner}
138
+
139
+
140
+ cnvrgignore = Helpers.cnvrgignore_content
141
+ FileUtils.mkdir_p list_dirs
142
+ FileUtils.touch list_files
143
+ File.open("#{dataset_name}/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
144
+ File.open("#{dataset_name}/.cnvrgignore", "w+") { |f| f.write cnvrgignore } unless File.exist? ".cnvrgignore"
145
+ rescue => e
146
+ return false
108
147
  end
148
+ return true
149
+ end
150
+
151
+ def self.init_container(owner, dataset_slug,dataset_name)
152
+
153
+
154
+ cnvrgignore = Helpers.cnvrgignore_content
155
+ begin
156
+
157
+
158
+ config = {dataset_name: dataset_name,
159
+ dataset_slug: dataset_slug,
160
+ owner: owner}
161
+ File.open("/home/ds/notebooks/data/.cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
162
+
163
+ File.open("/home/ds/notebooks/data/.cnvrgignore", "w+") { |f| f.write cnvrgignore } unless File.exist? ".cnvrgignore"
164
+ rescue => e
165
+ puts e
166
+ puts e.backtrace
167
+ return false
168
+ end
169
+ return true
170
+ end
109
171
 
110
172
 
111
173
  def get_idx
@@ -159,15 +221,29 @@ module Cnvrg
159
221
  return response
160
222
  end
161
223
 
162
- def compare_idx(new_branch, commit=last_local_commit)
224
+ def downlowd_updated_data(current_commit)
163
225
 
164
- local_idx = self.generate_idx
165
- response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit})
166
- CLI.is_response_success(response)
167
- return response
168
- end
226
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/downlowd_updated_data", 'POST', {current_commit: current_commit})
227
+ CLI.is_response_success(response)
228
+ return response
229
+ end
230
+ def compare_idx(new_branch, commit=last_local_commit)
169
231
 
170
- def compare_commit(commit)
232
+ local_idx = self.generate_idx
233
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch, current_commit: commit})
234
+ CLI.is_response_success(response)
235
+ return response
236
+ end
237
+
238
+
239
+
240
+ def compare_commits(commit)
241
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/compare_commits", 'POST', {compare_commit: commit,current_commit:last_local_commit})
242
+ CLI.is_response_success(response)
243
+ return response
244
+ end
245
+
246
+ def compare_commit(commit)
171
247
  if commit.nil? or commit.empty?
172
248
  commit = last_local_commit
173
249
  end
@@ -187,6 +263,11 @@ module Cnvrg
187
263
 
188
264
  return true
189
265
  end
266
+ def update_idx(idx)
267
+ File.open("#{self.local_path}/.cnvrg/idx.yml", 'w') { |f| f.write idx.to_yaml }
268
+
269
+ return true
270
+ end
190
271
 
191
272
 
192
273
  def update_idx_with_commit!(commit)