cnvrg 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6533e6de1028236f163d504243a2afef14da2f2b
4
- data.tar.gz: 5327a071845d376bff81c76197a0b6844377d36e
3
+ metadata.gz: e44751ed52140fdaee81469a183a0f06f86968e1
4
+ data.tar.gz: 7b95fb6b3720a17d2edfe7a12da9c21ef0d3034f
5
5
  SHA512:
6
- metadata.gz: d959e3931999dfbf64b45105b9f5e1e048219ee0b6e4f695d87193540229e998093347d3aa81896faa6f6e4d72e6cdfb629aca448558c3e19b7b88d8c2b397ce
7
- data.tar.gz: b0ba98342dd6675c19e34376f6bd4f3157431e562c9d3850f988ec3b74a5d60008592b9a21cd3498c3a53de389e1f82460bcd930b09d736edd070ca8c3dba16d
6
+ metadata.gz: 2766aba6a508f5b073b4b12e987ee1d4568f84c47c632e6fefa6bf6fc0416a3737727cae88dfa7b97873c3f6461c5821d15a4ca9c7c54e32e9ca763b3e8b79d2
7
+ data.tar.gz: 8d2985f8c0ff5ea5d70131e993d896ce3ddd20a1f855dcf7ba7cad4a862a09fa13424bcdb3c5ebccb2a4e2ec9746f00eff2d7be49b7ee810826dc94d41a05a35
@@ -3095,9 +3095,10 @@ module Cnvrg
3095
3095
  method_option :small, :type => :boolean, :aliases => ["-sm", "--small"], :default => false
3096
3096
  method_option :medium, :type => :boolean, :aliases => ["-md", "--medium"], :default => false
3097
3097
  method_option :large, :type => :boolean, :aliases => ["-lg", "--large"], :default => false
3098
- method_option :gpu_1, :type => :boolean, :aliases => ["--gpu"], :default => false
3099
- method_option :gpu_2, :type => :boolean, :aliases => ["--gpuxl"], :default => false
3100
- method_option :gpu_3, :type => :boolean, :aliases => ["--gpuxxl"], :default => false
3098
+ method_option :gpu, :type => :boolean, :aliases => ["--gpu"], :default => false
3099
+ method_option :gpuxl, :type => :boolean, :aliases => ["--gpuxl"], :default => false
3100
+ method_option :gpuxxl, :type => :boolean, :aliases => ["--gpuxxl"], :default => false
3101
+ method_option :machine, :type => :string, :aliases => ["-m", "--machine"], :default => nil
3101
3102
  method_option :sync_before, :type => :boolean, :aliases => ["-sb", "--sync_before"], :default => true
3102
3103
  method_option :sync_after, :type => :boolean, :aliases => ["-sa", "--sync_after"], :default => true
3103
3104
  method_option :title, :type => :string, :aliases => ["-t", "--title"], :default => ""
@@ -3139,12 +3140,9 @@ module Cnvrg
3139
3140
  force = options["force"]
3140
3141
  max_time = options["max_time"]
3141
3142
  dataset_only_tree = options["dataset_only_tree"]
3143
+ custom_machine = options["machine"]
3142
3144
 
3143
3145
  options_hash = Hash[options]
3144
- real_options = []
3145
- options_hash.each do |o|
3146
- real_options << o if (!o[1].eql? "" and !["small", "medium", "large", "gpu_1", "gpu_2", "gpu_3"].include? o[0])
3147
- end
3148
3146
  if local
3149
3147
  invoke :exec, [cmd], :sync_before => sync_before, :sync_after => sync_after, :title => title,
3150
3148
  :log => log, :email_notification => email_notification, :upload_output => upload_output,
@@ -3163,15 +3161,10 @@ module Cnvrg
3163
3161
 
3164
3162
  end
3165
3163
  end
3166
- real_options.delete(["local", false])
3167
3164
  instances = {"small" => options["small"], "medium" => options["medium"], "large" => options["large"],
3168
- "gpu_1" => options["gpu_2"], "gpu_3" => options["gpuxxl"]}
3165
+ "gpu" => options["gpu"], "gpuxl" => options["gpuxl"], "gpuxxl" => options["gpuxxl"],
3166
+ options["machine"] => !options["machine"].blank?}
3169
3167
  instance_type = get_instance_type(instances)
3170
- if !instance_type.nil? and !instance_type.empty?
3171
- real_options << ["machine_type", instance_type]
3172
- end
3173
- exec_options = real_options.map {|x| "--#{x[0]}=#{x[1]}"}.flatten.join(" ")
3174
- cmd_to_exec = "#{exec_options} #{cmd.join(" ")}"
3175
3168
  invoke :exec_remote, [cmd], :sync_before => sync_before, :sync_after => sync_after, :title => title, :machine_type => instance_type,
3176
3169
  :schedule => schedule, :log => log, :email_notification => email_notification, :upload_output => upload_output, :commit => commit,
3177
3170
  :image => image, :grid => grid, :data => data, :data_commit => data_commit, :ignore => ignore, :force => force, :sync_before_terminate => sync_before_terminate,
@@ -3227,7 +3220,6 @@ module Cnvrg
3227
3220
  time_to_upload = calc_output_time(upload_output)
3228
3221
  project_home = get_project_home
3229
3222
  @project = Project.new(project_home)
3230
-
3231
3223
  is_new_branch = @project.compare_commit(commit)
3232
3224
  begin
3233
3225
  if !commit.nil? and !commit.empty?
@@ -3244,8 +3236,6 @@ module Cnvrg
3244
3236
  end
3245
3237
  if !indocker
3246
3238
  image_proj = is_project_with_docker(working_dir)
3247
-
3248
-
3249
3239
  if image_proj and image_proj.is_docker
3250
3240
  container = image_proj.get_container
3251
3241
  if !container
@@ -3292,7 +3282,7 @@ module Cnvrg
3292
3282
  stdout, stderr = '', ''
3293
3283
  begin
3294
3284
 
3295
- # if remote
3285
+ if remote
3296
3286
  if @exp.sync_before_terminate
3297
3287
  spot_status_thread = Thread.new do
3298
3288
  begin
@@ -3312,24 +3302,11 @@ module Cnvrg
3312
3302
  end
3313
3303
  end
3314
3304
  end
3315
- if @exp.sync_delay_time
3316
- regular_sync = Thread.new do
3317
- begin
3318
- loop do
3319
- # sync
3320
- sleep(@exp.sync_delay_time)
3321
-
3322
- download(sync = true, ignore_list = [])
3323
- upload(link = false, sync = true, direct = false, ignore_list = [])
3324
- end
3325
- rescue => e
3326
- log_error(e)
3327
- end
3328
- end
3329
- end
3330
- # end
3305
+ end
3306
+ process_running = true
3331
3307
  stats_thread = Thread.new do
3332
- loop do
3308
+ while process_running do
3309
+ sleep 30
3333
3310
  begin
3334
3311
  stats = remote ? usage_metrics_in_docker(docker_id) : Helpers.linux? ? {memory: memory_usage, cpu: cpu_usage} : {}
3335
3312
  if is_on_gpu
@@ -3337,85 +3314,90 @@ module Cnvrg
3337
3314
  stats['gpu'] = gu[0]
3338
3315
  stats['gpu_util'] = gu[1]
3339
3316
  end
3340
- @exp.send_machine_stats [stats]
3317
+ @exp.send_machine_stats [stats] unless stats.empty?
3341
3318
  rescue => e
3342
3319
  log_error(e)
3343
3320
  log_message("Failed to upload ongoing stats, continuing with experiment", Thor::Shell::Color::YELLOW)
3344
- ensure
3345
- sleep(30)
3346
3321
  end
3347
3322
  end
3348
3323
  end
3349
-
3350
-
3351
3324
  log_thread = Thread.new do
3352
- loop do
3353
- log_count = log.count
3354
- time_to_upload = calc_output_log_time(log_count)
3325
+ while process_running or !log.empty? do
3355
3326
  begin
3356
- if log_count != 0
3357
- if time_to_upload <= Time.now - start_loop
3358
- @exp.upload_temp_log(log)
3359
- log = []
3360
- start_loop = Time.now
3361
- end
3327
+ temp_log = log
3328
+ if !temp_log.empty?
3329
+ @exp.upload_temp_log(temp_log) unless temp_log.empty?
3330
+
3362
3331
  end
3332
+ log -= temp_log
3363
3333
  rescue => e
3364
3334
  log_message("Failed to upload ongoing results, continuing with experiment", Thor::Shell::Color::YELLOW)
3365
3335
  log_error(e)
3336
+ ensure
3337
+ sleep 10
3366
3338
  end
3367
- sleep(1)
3368
3339
  end
3369
3340
  end
3370
-
3371
3341
  PTY.spawn(cmd) do |stdout, stdin, pid, stderr|
3372
3342
  begin
3373
3343
  stdout.each do |line|
3374
3344
  cur_time = Time.now
3375
3345
  real_time = Time.now - real
3376
-
3377
3346
  cur_log = {time: cur_time,
3378
3347
  message: line,
3379
3348
  type: "stdout",
3380
3349
  real: real_time
3381
-
3382
3350
  }
3383
-
3384
-
3351
+ $LOG.info(cur_log)
3385
3352
  if print_log
3386
3353
  puts cur_log
3387
3354
  end
3388
3355
  log << cur_log
3356
+ # if log.size >= 10
3357
+ # @exp.upload_temp_log(log)
3358
+ # log = []
3359
+ # end
3389
3360
  end
3390
-
3391
-
3392
3361
  if stderr
3393
-
3394
3362
  stderr.each do |err|
3395
-
3396
3363
  log << {time: Time.now, message: err, type: "stderr"}
3397
3364
  end
3398
3365
  end
3399
-
3400
3366
  rescue Errno::EIO => e
3401
3367
  log_error(e)
3402
- rescue Errno::ENOENT => e
3368
+ if !log.empty?
3403
3369
 
3370
+ temp_log = log
3371
+ @exp.upload_temp_log(temp_log) unless temp_log.empty?
3372
+
3373
+ log -= temp_log
3374
+ end
3375
+ rescue Errno::ENOENT => e
3404
3376
  exp_success = false
3405
3377
  log_message("command \"#{cmd}\" couldn't be executed, verify command is valid", Thor::Shell::Color::RED)
3406
3378
  log_error(e)
3407
- rescue Open4::ChildExited
3408
- exp_success = false
3409
- log_message("The process exited!", Thor::Shell::Color::RED)
3379
+ # rescue Open4::ChildExited
3380
+ # exp_success = false
3381
+ # log_message("The process exited!", Thor::Shell::Color::RED)
3410
3382
  rescue => e
3411
3383
  sleep(20) # end cycle
3412
- res = @exp.end(log, 1, start_commit, cpu_average, memory_average)
3384
+ res = @exp.end(log, 1, start_commit, 0, 0)
3413
3385
  log_message("Error occurred,aborting", Thor::Shell::Color::RED)
3414
3386
  log_error(e)
3415
3387
  exit(0)
3416
3388
  end
3417
3389
  ::Process.wait pid
3418
3390
  end
3391
+ process_running = false
3392
+
3393
+ if !log.empty?
3394
+
3395
+ temp_log = log
3396
+ @exp.upload_temp_log(temp_log) unless temp_log.empty?
3397
+
3398
+ log -= temp_log
3399
+ end
3400
+
3419
3401
  cpu_average = cpu_total.inject(0) {|sum, el| sum + el}.to_f / cpu_total.size
3420
3402
  memory_average = memory_total.inject(0) {|sum, el| sum + el}.to_f / memory_total.size
3421
3403
  exit_status = $?.exitstatus
@@ -3438,8 +3420,10 @@ module Cnvrg
3438
3420
  end
3439
3421
  end_commit = @project.last_local_commit
3440
3422
 
3441
- sleep(30) # end cycle
3442
- res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average)
3423
+ log_thread.join
3424
+ stats_thread.join
3425
+
3426
+ res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average)
3443
3427
 
3444
3428
 
3445
3429
  if !exp_success
@@ -3465,13 +3449,15 @@ module Cnvrg
3465
3449
 
3466
3450
  }
3467
3451
  log << cur_log
3468
- sleep(20) # end cycle
3452
+ process_running = false
3453
+ log_thread.join
3454
+ stats_thread.join
3469
3455
  res = @exp.end(log, "-1", end_commit, cpu_average, memory_average)
3470
3456
 
3471
3457
  end
3472
3458
  log_error(e)
3473
- Thread.kill(log_thread)
3474
- Thread.kill(stats_thread)
3459
+ # Thread.kill(log_thread)
3460
+ # Thread.kill(stats_thread)
3475
3461
 
3476
3462
  exit(1)
3477
3463
  end
@@ -3483,7 +3469,9 @@ module Cnvrg
3483
3469
  rescue SignalException
3484
3470
  exit_status = -1
3485
3471
  end_commit = @project.last_local_commit
3486
- sleep(20) # end cycle
3472
+ process_running = false
3473
+ log_thread.join
3474
+ stats_thread.join
3487
3475
 
3488
3476
  res = @exp.end(log, exit_status, end_commit, cpu_average, memory_average)
3489
3477
  if container
@@ -5519,7 +5507,12 @@ module Cnvrg
5519
5507
  if config and !config.nil? and !config.empty? and !config.to_h[:compression_path].nil?
5520
5508
  compression_path = config.to_h[:compression_path]
5521
5509
  end
5522
- config = {owner: owner, username: username, version_last_check: get_start_day(), api: url, compression_path: compression_path}
5510
+ verify_ssl = false
5511
+
5512
+ if config and !config.nil? and !config.empty? and !config.to_h[:verfiy_ssl].nil?
5513
+ verify_ssl = config.to_h[:verify_ssl]
5514
+ end
5515
+ config = {owner: owner, username: username, version_last_check: get_start_day(), api: url, compression_path: compression_path, verfiy_ssl:verify_ssl}
5523
5516
 
5524
5517
  File.open(home_dir + "/.cnvrg/config.yml", "w+") {|f| f.write config.to_yaml}
5525
5518
  return true
@@ -6145,27 +6138,30 @@ module Cnvrg
6145
6138
  end
6146
6139
 
6147
6140
  def gpu_util
6148
- gpu = [0.0, 0.0]
6141
+ stats = [[],[]]
6149
6142
  begin
6150
- gpu_stats = `nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv |tail -1`
6143
+ gpu_stats = `nvidia-smi --query-gpu=utilization.gpu,utilization.memory --format=csv`
6151
6144
 
6152
6145
  if !gpu_stats.nil?
6153
- gpu = gpu_stats.strip
6154
- gpu = gpu_stats.gsub!("%", "").split(",")
6155
- gpu[0] = gpu[0].to_f
6156
- gpu[1] = gpu[1].to_f
6157
- return gpu
6146
+ gpu_stats = gpu_stats.split("\n")[1..-1]
6147
+ stats = [[],[]]
6148
+ gpu_stats.each do |stat|
6149
+ gpu = stat.strip.gsub!("%", "").split(",")
6150
+ stats[0] << gpu[0].to_f
6151
+ stats[1] << gpu[1].to_f
6152
+ end
6153
+ return stats
6158
6154
  end
6159
6155
 
6160
6156
  rescue
6161
- return gpu
6157
+ return stats
6162
6158
  end
6163
6159
 
6164
6160
 
6165
6161
  end
6166
6162
 
6167
6163
  def usage_metrics_in_docker(docker_id)
6168
- res = {cpu: 0.0, memory: 0.0}
6164
+ res = {cpu: 0.0, memory: 0.0, block_io: {input: 0, output: 0.0}}
6169
6165
  begin
6170
6166
  if docker_id.nil?
6171
6167
  docker_id = `cat /etc/hostname`
@@ -6174,15 +6170,46 @@ module Cnvrg
6174
6170
  if !stats.nil?
6175
6171
  conv = stats.split(",")
6176
6172
  cpu = conv[0].gsub!("%", "").to_f
6173
+ res[:cpu] = cpu
6177
6174
  memory = conv[1].gsub!("%", "").to_f
6178
- res = {cpu: cpu, memory: memory}
6175
+ res[:memory] = memory
6176
+ block_io = parse_io conv[2]
6177
+ res = {cpu: cpu, memory: memory, block_io: block_io}
6179
6178
  return res
6180
6179
  end
6181
6180
  rescue
6182
6181
  return res
6183
6182
  end
6183
+ end
6184
+
6184
6185
 
6185
6186
 
6187
+ def parse_io(block_io_str)
6188
+ block_io = block_io_str.gsub!(" ", "").split('/')
6189
+ input = block_io[0]
6190
+ output = block_io[1]
6191
+ r = Regexp.new('(\d+(\.\d+)?)([A-Za-z]+)')
6192
+ input_match = r.match(input)
6193
+ input = input_match[1].to_f * size_to_bytes(input_match[3])
6194
+ output_match = r.match(output)
6195
+ output = output_match[1].to_f * size_to_bytes(output_match[3])
6196
+ {input: input, output: output}
6197
+ end
6198
+
6199
+
6200
+ def size_to_bytes size
6201
+ case size.try(:downcase)
6202
+ when 'b'
6203
+ 1
6204
+ when 'kb'
6205
+ 2**10
6206
+ when 'mb'
6207
+ 2**20
6208
+ when 'gb'
6209
+ 2**30
6210
+ else
6211
+ 1
6212
+ end
6186
6213
  end
6187
6214
 
6188
6215
  end
@@ -381,7 +381,7 @@ module Cnvrg
381
381
  end
382
382
  response = Cnvrg::API.request("users/#{self.owner}/projects/#{self.slug}/status", 'POST', {idx: local_idx, new_branch: new_branch,
383
383
  current_commit: commit,ignore:ignore_list, force:force,in_exp:in_exp})
384
- CLI.is_response_success(response,false)
384
+ CLI.is_response_success(response,true)
385
385
  return response
386
386
  end
387
387
  def jump_idx(new_branch, commit=last_local_commit)
@@ -1,4 +1,4 @@
1
1
  module Cnvrg
2
- VERSION = '0.4.4'
2
+ VERSION = '0.5.0'
3
3
  end
4
4
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cnvrg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yochay Ettun
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-05-17 00:00:00.000000000 Z
12
+ date: 2018-06-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler