cnvrg 0.5.0 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,7 @@ module Cnvrg
26
26
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
27
27
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
28
28
  method_option :sync, :type => :boolean, :aliases => ["-s","--sync"], :default => false
29
+ method_option :tags, :type => :boolean, :aliases => ["--tags"], :desc => "upload file tags", :default => false
29
30
 
30
31
  def upload
31
32
  cli = Cnvrg::CLI.new()
@@ -33,14 +34,17 @@ module Cnvrg
33
34
  sync = options["sync"]
34
35
  force = options["force"]
35
36
  new_branch = options["new_branch"]
36
- cli.upload_data_new(new_branch, verbose,sync,force)
37
+ tags = options["tags"]
38
+ cli.upload_data_new(new_branch, verbose,sync,force, tags)
37
39
  end
38
40
  desc 'sync', 'sync_data_new'
39
41
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits"
40
42
  method_option :force, :type => :boolean, :aliases => ["-f","--force"], :default => false
41
43
  method_option :verbose, :type => :boolean, :aliases => ["-v"], :default => false
42
44
  method_option :commit, :type => :string, :aliases => ["-c"], :desc => "download specified commit", :default => nil
43
- method_option :all_files, :type => :boolean, :aliases => ["--all"], :desc => "download specified commit", :default => true
45
+ method_option :all_files, :type => :boolean, :aliases => ["--all"], :desc => "download specified commit", :default => false
46
+ method_option :tags, :type => :boolean, :aliases => ["--tags"], :desc => "upload file tags", :default => false
47
+ method_option :parallel, :type => :numeric, :aliases => ["-p", "--parallel"], :desc => "uparallel upload at the same time", :default => 15
44
48
 
45
49
  def sync_data_new()
46
50
  cli = Cnvrg::CLI.new()
@@ -49,8 +53,11 @@ module Cnvrg
49
53
  verbose = options["verbose"]
50
54
  commit = options["commit"]
51
55
  all_files = options["all_files"]
56
+ tags = options["tags"]
57
+ parallel=options["parallel"]
58
+
52
59
 
53
- cli.sync_data_new(new_branch, force, verbose,commit,all_files)
60
+ cli.sync_data_new(new_branch, force, verbose,commit,all_files, tags,parallel)
54
61
  end
55
62
  desc 'data download', 'pull data'
56
63
  method_option :new_branch, :type => :boolean, :aliases => ["-nb"], :desc => "create new branch of commits", :default => false
@@ -73,14 +80,26 @@ module Cnvrg
73
80
  desc 'data clone', 'clone datset'
74
81
  method_option :only_tree, :type => :boolean, :aliases => ["-t", "--tree"], :default => false
75
82
  method_option :commit, :type => :string, :aliases => ["-c", "--commit"], :default => nil
83
+ method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => nil
84
+
76
85
  def clone(dataset_url)
77
86
  cli = Cnvrg::CLI.new()
78
- only_tree =options[:only_tree]
87
+ only_tree =options[:only_tree]
79
88
  commit =options[:commit]
89
+ query =options[:query]
90
+
91
+ cli.clone_data(dataset_url, only_tree=only_tree,commit=commit, query=query)
80
92
 
81
- cli.clone_data(dataset_url, only_tree=only_tree,commit=commit)
93
+ end
82
94
 
95
+ desc 'data clone_query', 'clone query datset'
96
+ method_option :query, :type => :string, :aliases => ["-q", "--query"], :default => nil
97
+ def clone_query(dataset_url)
98
+ cli = Cnvrg::CLI.new()
99
+ query =options[:query]
100
+ cli.clone_data_query(dataset_url,query=query)
83
101
  end
102
+
84
103
  desc 'data delete', 'delete datset'
85
104
  def delete(dataset_slug)
86
105
  cli = Cnvrg::CLI.new()
@@ -102,5 +121,23 @@ module Cnvrg
102
121
 
103
122
  end
104
123
 
124
+ desc 'data queries', 'list of all queries'
125
+ def queries()
126
+ cli = Cnvrg::CLI.new()
127
+ cli.queries()
128
+ end
129
+
130
+ desc 'data query_files', 'list of all queries'
131
+ def query_files(query)
132
+ cli = Cnvrg::CLI.new()
133
+ cli.query_files(query)
134
+ end
135
+
136
+ desc 'data download_tags_yaml', 'Download dataset tags yml file in current directory'
137
+ def download_tags_yaml
138
+ cli = Cnvrg::CLI.new()
139
+ cli.download_tags_yaml()
140
+ end
141
+
105
142
  end
106
143
  end
@@ -1,10 +1,13 @@
1
1
  require 'mimemagic'
2
2
  require 'aws-sdk'
3
3
  require 'URLcrypt'
4
- require "down"
4
+ require 'parallel'
5
+ require 'fileutils'
6
+
5
7
 
6
8
  module Cnvrg
7
9
  class Datafiles
10
+ ParallelThreads ||= 15
8
11
 
9
12
  LARGE_FILE=1024*1024*5
10
13
  MULTIPART_SPLIT=10000000
@@ -23,9 +26,44 @@ module Cnvrg
23
26
  mime_type = MimeMagic.by_path(absolute_path)
24
27
  content_type = !(mime_type.nil? or mime_type.text?) ? mime_type.type : "text/plain"
25
28
  sha1 = OpenSSL::Digest::SHA1.file(absolute_path).hexdigest
26
- upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
27
- commit_sha1: commit_sha1, file_name: file_name,
28
- file_size: file_size, file_content_type: content_type, sha1: sha1})
29
+ if (absolute_path.include? "_tags.yml" or absolute_path.include? "_tags.yaml")
30
+ is_valid = false
31
+ begin
32
+ content = open(absolute_path).read()
33
+ hash = YAML.load(open(absolute_path).read())
34
+ # if level 1 keys count is 1
35
+ if hash.keys.count == 1
36
+ if hash["tags"].present?
37
+ is_valid = true
38
+ elsif hash[hash.keys.first].class != Hash
39
+ is_valid = true
40
+ end
41
+ # if level 1 keys count is greater than 1
42
+ elsif hash.keys.count > 1
43
+ if hash["tags"].present? and hash["tags"].class == Hash
44
+ is_valid = false
45
+ else
46
+ is_valid = true
47
+ end
48
+ end
49
+ rescue
50
+ is_valid = false
51
+ end
52
+
53
+ if is_valid
54
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
55
+ commit_sha1: commit_sha1, file_name: file_name,
56
+ file_size: file_size, file_content_type: content_type, sha1: sha1, content: content})
57
+ else
58
+ puts("#{absolute_path} is invalid")
59
+ puts("Please check yaml structure.")
60
+ end
61
+ else
62
+ upload_resp = Cnvrg::API.request(@base_resource + "upload_file", 'POST_FILE', {absolute_path: absolute_path, relative_path: relative_path,
63
+ commit_sha1: commit_sha1, file_name: file_name,
64
+ file_size: file_size, file_content_type: content_type, sha1: sha1})
65
+ end
66
+
29
67
  if Cnvrg::CLI.is_response_success(upload_resp, false)
30
68
  s3_res = upload_large_files_s3(upload_resp, absolute_path)
31
69
 
@@ -359,6 +397,82 @@ module Cnvrg
359
397
  def create_dir(absolute_path, relative_path, commit_sha1)
360
398
  response = Cnvrg::API.request(@base_resource + "create_dir", 'POST', {absolute_path: absolute_path, relative_path: relative_path, commit_sha1: commit_sha1})
361
399
  return Cnvrg::CLI.is_response_success(response, false)
400
+ end
401
+ def download_list_files_in_query(response, dataset_home)
402
+ sts_path = response["path_sts"]
403
+ if !Helpers.is_verify_ssl
404
+ body = open(sts_path, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read
405
+ else
406
+ body = open(sts_path).read
407
+ end
408
+ split = body.split("\n")
409
+ key = split[0]
410
+ iv = split[1]
411
+
412
+ access = Cnvrg::Helpers.decrypt(key, iv, response["sts_a"])
413
+
414
+ secret = Cnvrg::Helpers.decrypt(key,iv, response["sts_s"])
415
+
416
+ session = Cnvrg::Helpers.decrypt(key,iv, response["sts_st"])
417
+ region = Cnvrg::Helpers.decrypt(key,iv, response["region"])
418
+
419
+ bucket = Cnvrg::Helpers.decrypt(key,iv, response["bucket"])
420
+ is_s3 = response["is_s3"]
421
+ if is_s3 or is_s3.nil?
422
+ client = Aws::S3::Client.new(
423
+ :access_key_id =>access,
424
+ :secret_access_key => secret,
425
+ :session_token => session,
426
+ :region => region,
427
+ :http_open_timeout => 60, :retry_limit => 20)
428
+ else
429
+ endpoint = Cnvrg::Helpers.decrypt(key,iv, response["endpoint_url"])
430
+ client = Aws::S3::Client.new(
431
+ :access_key_id =>access,
432
+ :secret_access_key => secret,
433
+ :region => region,
434
+ :endpoint=> endpoint,:force_path_style=> true,:ssl_verify_peer=>false,
435
+ :http_open_timeout => 60, :retry_limit => 20)
436
+ end
437
+ list_files = response["files"]
438
+ parallel_options = {
439
+ :progress => {
440
+ :title => "Download Progress",
441
+ :progress_mark => '=',
442
+ :format => "%b>>%i| %p%% %t",
443
+ :starting_at => 0,
444
+ :total => list_files.size,
445
+ :autofinish => true
446
+ },
447
+ in_threads: ParallelThreads
448
+ }
449
+ download_count = 0
450
+ Parallel.map((list_files), parallel_options) do |f|
451
+ file_key = Cnvrg::Helpers.decrypt(key,iv, f["path"])
452
+ begin
453
+ begin
454
+ dir = File.dirname f["fullpath"]
455
+ FileUtils.mkdir_p(dataset_home+"/"+ dir) unless File.exist? (dataset_home+"/"+ dir)
456
+ end
457
+
458
+ File.open(dataset_home+"/"+f["fullpath"], 'w+') do |file|
459
+ resp = client.get_object({bucket:bucket,
460
+ key:file_key}, target: file)
461
+ end
462
+ download_count += 1
463
+ rescue
464
+ end
465
+
466
+ end
467
+ if download_count == list_files.size
468
+ return true
469
+ else
470
+ return false
471
+ end
472
+
473
+
474
+
475
+
362
476
  end
363
477
 
364
478
 
@@ -44,12 +44,57 @@ module Cnvrg
44
44
  return response
45
45
 
46
46
  end
47
+ def search_queries
48
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/queries/list", 'GET')
49
+ CLI.is_response_success(response)
50
+ row = [["name","id", "created_at", "username"]]
51
+ response["results"]["queries"].each do |query|
52
+ row << [query["name"],query["slug"], query["created_at"].in_time_zone.to_s, query["username"]]
53
+ end
54
+ return row
55
+
56
+ end
57
+ def get_query_file(query_slug)
58
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/search/#{query_slug}", 'GET')
59
+ CLI.is_response_success(response)
60
+ row = [["Name","Full path","URL"]]
61
+ response["results"]["query_files"].each do |file|
62
+ row << [file["name"],file["fullpath"],file["s3_url"]]
63
+ end
64
+ return row
65
+
66
+ end
67
+
68
+ def download_tags_yaml
69
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/tags_yml", 'GET')
70
+ CLI.is_response_success(response)
71
+ begin
72
+ path = self.working_dir
73
+ File.open("#{path}/#{response["results"]["filename"]}", "w+") { |f| f.write response["results"]["file_content"] }
74
+ return true
75
+ rescue
76
+ return false
77
+ end
78
+ end
79
+
47
80
  def list_commits
48
81
  response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/list_commits", 'GET')
49
82
  CLI.is_response_success(response)
50
83
  return response
51
84
 
52
85
  end
86
+
87
+ def upload_tags_via_yml(tag_file=nil)
88
+ records_yml = YAML.load_file(tag_file)
89
+ tag_file.close
90
+ response = Cnvrg::API.request("users/#{self.owner}/datasets/#{self.slug}/data_tags_create", 'POST', {records_yml: records_yml})
91
+ if response["status"] == 200
92
+ return true
93
+ else
94
+ return false
95
+ end
96
+ end
97
+
53
98
  def url
54
99
  url = Cnvrg::Helpers.remote_url
55
100
  "#{url}/#{self.owner}/projects/#{self.slug}"
@@ -153,13 +198,44 @@ module Cnvrg
153
198
  end
154
199
 
155
200
 
201
+ def self.blank_clone(owner, dataset_name, dataset_slug)
202
+ list_dirs = [".cnvrg"
203
+ ]
204
+ list_files = [
205
+ ".cnvrg/config.yml"
206
+ ]
207
+ create_ignore = false
208
+ if !File.exist? ".cnvrgignore"
209
+ list_files << ".cnvrgignore"
210
+ create_ignore = true
211
+ end
212
+
213
+
214
+ cnvrgignore = Helpers.cnvrgignore_content
215
+ begin
216
+
217
+ config = {dataset_name: dataset_name,
218
+ dataset_slug: dataset_slug,
219
+ owner: owner}
220
+
221
+ FileUtils.mkdir_p list_dirs
222
+ FileUtils.touch list_files
223
+ File.open(".cnvrg/config.yml", "w+") { |f| f.write config.to_yaml }
224
+ File.open(".cnvrgignore", "w+") { |f| f.write cnvrgignore } unless !create_ignore
225
+ rescue => e
226
+ return false
227
+ end
228
+ return true
229
+ end
230
+
231
+
156
232
  def clone( commit)
157
233
 
158
234
 
159
235
  return response
160
236
  end
161
237
 
162
- def self.clone(owner, dataset_name,dataset_slug)
238
+ def self.clone(owner, dataset_name, dataset_slug)
163
239
 
164
240
  begin
165
241
  list_dirs = [ dataset_name, "#{dataset_name}/.cnvrg"
@@ -216,7 +292,7 @@ module Cnvrg
216
292
  url = Cnvrg::Helpers.remote_url
217
293
  "#{url}/#{self.owner}/datasets/#{self.slug}"
218
294
  end
219
- def generate_idx(list_ignore=nil)
295
+ def generate_idx(show_progress=false)
220
296
  if File.exists? "#{self.local_path}/.cnvrg/idx.yml"
221
297
  old_idx = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
222
298
  else
@@ -227,7 +303,27 @@ module Cnvrg
227
303
 
228
304
  list = Dir.glob("#{self.local_path}/**/*", File::FNM_DOTMATCH).reject { |x| (x =~ /\/\.{1,2}$/) or (x =~ /^#{self.local_path}\/\.cnvrg\/*/) or (x =~/^#{self.local_path}\/\.cnvrgignore.conflict*/) and not (x =~/^#{self.local_path}\/\.cnvrgignore/) }
229
305
  list_ignore = self.get_ignore_list()
230
- Parallel.map(list, in_threads: IDXParallelThreads) do |e|
306
+ if show_progress
307
+ parallel_options = {
308
+ :progress => {
309
+ :title => "Checking Dataset",
310
+ :progress_mark => '=',
311
+ :format => "%b>>%i| %p%% %t",
312
+ :starting_at => 0,
313
+ :total => (list).size,
314
+ :autofinish => true
315
+ },
316
+ in_threads: IDXParallelThreads,
317
+ isolation: true
318
+ }
319
+ else
320
+ parallel_options = {
321
+ in_threads: IDXParallelThreads,
322
+ isolation: true
323
+ }
324
+ end
325
+
326
+ Parallel.map(list, parallel_options ) do |e|
231
327
  label = e.gsub(self.local_path + "/", "")
232
328
  if File.directory? e
233
329
  if list_ignore.include? label
@@ -393,7 +489,7 @@ module Cnvrg
393
489
  end
394
490
 
395
491
  def update_idx_with_files_commits!(files, commit_time)
396
- files.flatten!
492
+ # files.flatten!
397
493
  idx_hash = YAML.load_file("#{self.local_path}/.cnvrg/idx.yml")
398
494
  # idx_hash[:commit] = commit
399
495
 
@@ -4,6 +4,7 @@ module Cnvrg
4
4
  attr_reader :slug
5
5
  attr_reader :sync_before_terminate
6
6
  attr_reader :sync_delay_time
7
+ attr_reader :output_dir
7
8
 
8
9
  def initialize(owner, project_slug)
9
10
  @project_slug = project_slug
@@ -12,17 +13,21 @@ module Cnvrg
12
13
  @slug = nil
13
14
  @sync_before_terminate = nil
14
15
  @sync_delay_time = nil
16
+ @output_dir = nil
15
17
  end
16
18
 
17
- def start(input, platform, machine_name, start_commit, name, email_notification, machine_activity,script_path,sync_before_terminate, periodic_sync)
19
+ def start(input, platform, machine_name, start_commit, name, email_notification, machine_activity,script_path,
20
+ sync_before_terminate, periodic_sync)
18
21
 
19
22
  res = Cnvrg::API.request(@base_resource + "experiment/start", 'POST',
20
- {input: input, platform: platform, machine_name: machine_name, start_commit: start_commit,sync_before_terminate:sync_before_terminate, periodic_sync:periodic_sync,
23
+ {input: input, platform: platform, machine_name: machine_name, start_commit: start_commit,
21
24
  title: name, email_notification: email_notification, machine_activity: machine_activity,script_path:script_path})
22
25
  Cnvrg::CLI.is_response_success(res,false)
26
+
23
27
  @slug = res.to_h["result"].to_h["slug"]
24
28
  @sync_before_terminate = res.to_h["result"].to_h["sync_before_terminate"]
25
29
  @sync_delay_time = res.to_h["result"].to_h["sync_delay_time"]
30
+ @output_dir = res.to_h["result"].to_h["output_dir"]
26
31
  return res
27
32
 
28
33
  end
@@ -83,7 +88,8 @@ module Cnvrg
83
88
 
84
89
  end
85
90
 
86
- def exec_remote(command, commit_to_run, instance_type, image_slug,scheduling_query,local_timestamp, grid,path_to_cmd,data, data_commit,periodic_sync, sync_before_terminate, max_time, ds_sync_options=0)
91
+ def exec_remote(command, commit_to_run, instance_type, image_slug,scheduling_query,local_timestamp, grid,path_to_cmd,data, data_commit,periodic_sync,
92
+ sync_before_terminate, max_time, ds_sync_options=0,output_dir=nil,data_query=nil, git_commit=nil, git_branch=nil)
87
93
  response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/experiment/remote", 'POST', {command: command, image_slug: image_slug,
88
94
  commit_sha1: commit_to_run,
89
95
  instance_type: instance_type,
@@ -93,13 +99,15 @@ module Cnvrg
93
99
  path_to_cmd:path_to_cmd,dataset_slug:data,
94
100
  dataset_commit: data_commit,max_time:max_time,
95
101
  periodic_sync:periodic_sync, sync_before_terminate:sync_before_terminate,
96
- dataset_sync_options:ds_sync_options})
102
+ dataset_sync_options:ds_sync_options,output_dir:output_dir,
103
+ dataset_query:data_query,git_commit:git_commit,git_branch:git_branch })
97
104
  return response
98
105
  end
99
- def remote_notebook(instance_type, commit, data, data_commit, notebook_type,ds_sync_options=0)
106
+ def remote_notebook(instance_type, commit, data, data_commit, notebook_type,ds_sync_options=0,data_query=nil)
100
107
  response = Cnvrg::API.request("users/#{@owner}/projects/#{@project_slug}/notebook/remote", 'POST', {instance_type: instance_type,dataset_slug:data,
101
108
  dataset_commit: data_commit,
102
- commit:commit,notebook_type:notebook_type,dataset_sync_options:ds_sync_options})
109
+ commit:commit,notebook_type:notebook_type,dataset_sync_options:ds_sync_options,
110
+ dataset_query:data_query})
103
111
  return response
104
112
  end
105
113
 
@@ -130,6 +138,14 @@ module Cnvrg
130
138
 
131
139
  def restart_spot_instance
132
140
  restart = false
141
+ #TODO: remove this later
142
+ fall_number = rand(1..8)
143
+ if fall_number == 5
144
+ return true
145
+ else
146
+ return false
147
+ end
148
+
133
149
  begin
134
150
  url = URI.parse('http://169.254.169.254/latest/meta-data/spot/termination-time')
135
151
  req = Net::HTTP::Get.new(url.to_s)
@@ -143,14 +159,16 @@ module Cnvrg
143
159
  restart = false
144
160
  end
145
161
  rescue
162
+ restart = false
163
+
146
164
  end
147
165
 
148
166
  return restart
149
167
 
150
168
  end
151
169
 
152
- def send_restart_request()
153
- response = Cnvrg::API.request(@base_resource + "experiment/check_spot_instance", 'POST', {exp_slug: @slug})
170
+ def send_restart_request(sha1=nil)
171
+ Cnvrg::API.request(@base_resource + "experiment/check_spot_instance", 'POST', {exp_slug: @slug, end_commit: sha1})
154
172
  end
155
173
  end
156
174
  end