omniopt2 8424__tar.gz → 8455__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omniopt2 might be problematic. Click here for more details.

Files changed (40) hide show
  1. {omniopt2-8424 → omniopt2-8455}/.omniopt.py +143 -78
  2. {omniopt2-8424 → omniopt2-8455}/.tpe.py +1 -1
  3. {omniopt2-8424 → omniopt2-8455}/PKG-INFO +1 -1
  4. {omniopt2-8424 → omniopt2-8455}/omniopt +37 -15
  5. {omniopt2-8424 → omniopt2-8455}/omniopt2.egg-info/PKG-INFO +1 -1
  6. {omniopt2-8424 → omniopt2-8455}/pyproject.toml +1 -1
  7. {omniopt2-8424 → omniopt2-8455}/.colorfunctions.sh +0 -0
  8. {omniopt2-8424 → omniopt2-8455}/.dockerignore +0 -0
  9. {omniopt2-8424 → omniopt2-8455}/.general.sh +0 -0
  10. {omniopt2-8424 → omniopt2-8455}/.gitignore +0 -0
  11. {omniopt2-8424 → omniopt2-8455}/.helpers.py +0 -0
  12. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_cpu_ram_usage.py +0 -0
  13. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_general.py +0 -0
  14. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_gpu_usage.py +0 -0
  15. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_kde.py +0 -0
  16. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_scatter.py +0 -0
  17. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_scatter_generation_method.py +0 -0
  18. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_scatter_hex.py +0 -0
  19. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_time_and_exit_code.py +0 -0
  20. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_trial_index_result.py +0 -0
  21. {omniopt2-8424 → omniopt2-8455}/.omniopt_plot_worker.py +0 -0
  22. {omniopt2-8424 → omniopt2-8455}/.random_generator.py +0 -0
  23. {omniopt2-8424 → omniopt2-8455}/.shellscript_functions +0 -0
  24. {omniopt2-8424 → omniopt2-8455}/.tests/pylint.rc +0 -0
  25. {omniopt2-8424 → omniopt2-8455}/LICENSE +0 -0
  26. {omniopt2-8424 → omniopt2-8455}/MANIFEST.in +0 -0
  27. {omniopt2-8424 → omniopt2-8455}/README.md +0 -0
  28. {omniopt2-8424 → omniopt2-8455}/apt-dependencies.txt +0 -0
  29. {omniopt2-8424 → omniopt2-8455}/omniopt2.egg-info/SOURCES.txt +0 -0
  30. {omniopt2-8424 → omniopt2-8455}/omniopt2.egg-info/dependency_links.txt +0 -0
  31. {omniopt2-8424 → omniopt2-8455}/omniopt2.egg-info/requires.txt +0 -0
  32. {omniopt2-8424 → omniopt2-8455}/omniopt2.egg-info/top_level.txt +0 -0
  33. {omniopt2-8424 → omniopt2-8455}/omniopt_docker +0 -0
  34. {omniopt2-8424 → omniopt2-8455}/omniopt_evaluate +0 -0
  35. {omniopt2-8424 → omniopt2-8455}/omniopt_plot +0 -0
  36. {omniopt2-8424 → omniopt2-8455}/omniopt_share +0 -0
  37. {omniopt2-8424 → omniopt2-8455}/requirements.txt +0 -0
  38. {omniopt2-8424 → omniopt2-8455}/setup.cfg +0 -0
  39. {omniopt2-8424 → omniopt2-8455}/setup.py +0 -0
  40. {omniopt2-8424 → omniopt2-8455}/test_requirements.txt +0 -0
@@ -478,7 +478,7 @@ def get_current_run_folder(name: Optional[str] = None) -> str:
478
478
 
479
479
  return CURRENT_RUN_FOLDER
480
480
 
481
- def get_state_file_name(name) -> str:
481
+ def get_state_file_name(name: str) -> str:
482
482
  state_files_folder = f"{get_current_run_folder()}/state_files/"
483
483
  makedirs(state_files_folder)
484
484
 
@@ -577,20 +577,22 @@ def _debug(msg: str, _lvl: int = 0, eee: Union[None, str, Exception] = None) ->
577
577
  def _get_debug_json(time_str: str, msg: str) -> str:
578
578
  function_stack = []
579
579
  try:
580
- frame = inspect.currentframe().f_back # skip _get_debug_json
581
- while frame:
582
- func_name = _function_name_cache.get(frame.f_code)
583
- if func_name is None:
584
- func_name = frame.f_code.co_name
585
- _function_name_cache[frame.f_code] = func_name
586
-
587
- if func_name not in ("<module>", "print_debug", "wrapper"):
588
- function_stack.append({
589
- "function": func_name,
590
- "line_number": frame.f_lineno
591
- })
592
-
593
- frame = frame.f_back
580
+ cf = inspect.currentframe()
581
+ if cf:
582
+ frame = cf.f_back # skip _get_debug_json
583
+ while frame:
584
+ func_name = _function_name_cache.get(frame.f_code)
585
+ if func_name is None:
586
+ func_name = frame.f_code.co_name
587
+ _function_name_cache[frame.f_code] = func_name
588
+
589
+ if func_name not in ("<module>", "print_debug", "wrapper"):
590
+ function_stack.append({
591
+ "function": func_name,
592
+ "line_number": frame.f_lineno
593
+ })
594
+
595
+ frame = frame.f_back
594
596
  except (SignalUSR, SignalINT, SignalCONT):
595
597
  print_red("\n⚠ You pressed CTRL-C. This is ignored in _get_debug_json.")
596
598
 
@@ -692,11 +694,14 @@ def my_exit(_code: int = 0) -> None:
692
694
  if is_skip_search() and os.getenv("SKIP_SEARCH_EXIT_CODE"):
693
695
  skip_search_exit_code = os.getenv("SKIP_SEARCH_EXIT_CODE")
694
696
 
697
+ skip_search_exit_code_found = None
698
+
695
699
  try:
696
- sys.exit(int(skip_search_exit_code))
700
+ skip_search_exit_code_found = int(skip_search_exit_code)
701
+
702
+ sys.exit(skip_search_exit_code_found)
697
703
  except ValueError:
698
- print(f"Trying to look for SKIP_SEARCH_EXIT_CODE failed. Exiting with original exit code {_code}")
699
- sys.exit(_code)
704
+ print_debug(f"Trying to look for SKIP_SEARCH_EXIT_CODE failed. Exiting with original exit code {_code}")
700
705
 
701
706
  sys.exit(_code)
702
707
 
@@ -2087,12 +2092,6 @@ def init_live_share() -> bool:
2087
2092
 
2088
2093
  return ret
2089
2094
 
2090
- async def start_periodic_live_share() -> None:
2091
- if args.live_share and not os.environ.get("CI"):
2092
- while True:
2093
- live_share(force=False)
2094
- time.sleep(30)
2095
-
2096
2095
  def init_storage(db_url: str) -> None:
2097
2096
  init_engine_and_session_factory(url=db_url, force_init=True)
2098
2097
  engine = get_engine()
@@ -2162,6 +2161,9 @@ def save_results_csv() -> Optional[str]:
2162
2161
 
2163
2162
  try:
2164
2163
  df = fetch_and_prepare_trials()
2164
+ if df is None:
2165
+ print_red(f"save_results_csv: fetch_and_prepare_trials returned an empty element: {df}")
2166
+ return None
2165
2167
  write_csv(df, pd_csv)
2166
2168
  write_json_snapshot(pd_json)
2167
2169
  save_experiment_to_file()
@@ -2174,14 +2176,17 @@ def save_results_csv() -> Optional[str]:
2174
2176
  except (SignalUSR, SignalCONT, SignalINT) as e:
2175
2177
  raise type(e)(str(e)) from e
2176
2178
  except Exception as e:
2177
- print_red(f"While saving all trials as a pandas-dataframe-csv, an error occurred: {e}")
2179
+ print_red(f"\nWhile saving all trials as a pandas-dataframe-csv, an error occurred: {e}")
2178
2180
 
2179
2181
  return pd_csv
2180
2182
 
2181
2183
  def get_results_paths() -> tuple[str, str]:
2182
2184
  return (get_current_run_folder(RESULTS_CSV_FILENAME), get_state_file_name('pd.json'))
2183
2185
 
2184
- def fetch_and_prepare_trials() -> pd.DataFrame:
2186
+ def fetch_and_prepare_trials() -> Optional[pd.DataFrame]:
2187
+ if not ax_client:
2188
+ return None
2189
+
2185
2190
  ax_client.experiment.fetch_data()
2186
2191
  df = ax_client.get_trials_data_frame()
2187
2192
 
@@ -2202,15 +2207,21 @@ def write_csv(df: pd.DataFrame, path: str) -> None:
2202
2207
  df.to_csv(path, index=False, float_format="%.30f")
2203
2208
 
2204
2209
  def write_json_snapshot(path: str) -> None:
2205
- json_snapshot = ax_client.to_json_snapshot()
2206
- with open(path, "w", encoding="utf-8") as f:
2207
- json.dump(json_snapshot, f, indent=4)
2210
+ if ax_client is not None:
2211
+ json_snapshot = ax_client.to_json_snapshot()
2212
+ with open(path, "w", encoding="utf-8") as f:
2213
+ json.dump(json_snapshot, f, indent=4)
2214
+ else:
2215
+ print_red("write_json_snapshot: ax_client was None")
2208
2216
 
2209
2217
  def save_experiment_to_file() -> None:
2210
- save_experiment(
2211
- ax_client.experiment,
2212
- get_state_file_name("ax_client.experiment.json")
2213
- )
2218
+ if ax_client is not None:
2219
+ save_experiment(
2220
+ ax_client.experiment,
2221
+ get_state_file_name("ax_client.experiment.json")
2222
+ )
2223
+ else:
2224
+ print_red("save_experiment: ax_client is None")
2214
2225
 
2215
2226
  def should_save_to_database() -> bool:
2216
2227
  return args.model not in uncontinuable_models and args.save_to_database
@@ -5431,9 +5442,14 @@ def set_objectives() -> dict:
5431
5442
 
5432
5443
  return objectives
5433
5444
 
5434
- def set_experiment_constraints(experiment_constraints: Optional[list], experiment_args: dict, _experiment_parameters: Union[dict, list]) -> dict:
5435
- if experiment_constraints and len(experiment_constraints):
5445
+ def set_experiment_constraints(experiment_constraints: Optional[list], experiment_args: dict, _experiment_parameters: Optional[Union[dict, list]]) -> dict:
5446
+ if _experiment_parameters is None:
5447
+ print_red("set_experiment_constraints: _experiment_parameters was None")
5448
+ my_exit(95)
5436
5449
 
5450
+ return {}
5451
+
5452
+ if experiment_constraints and len(experiment_constraints):
5437
5453
  experiment_args["parameter_constraints"] = []
5438
5454
 
5439
5455
  if experiment_constraints:
@@ -5463,6 +5479,10 @@ def set_experiment_constraints(experiment_constraints: Optional[list], experimen
5463
5479
  return experiment_args
5464
5480
 
5465
5481
  def replace_parameters_for_continued_jobs(parameter: Optional[list], cli_params_experiment_parameters: Optional[list]) -> None:
5482
+ if not experiment_parameters:
5483
+ print_red("replace_parameters_for_continued_jobs: experiment_parameters was False")
5484
+ return None
5485
+
5466
5486
  if args.worker_generator_path:
5467
5487
  return None
5468
5488
 
@@ -5548,13 +5568,13 @@ def copy_continue_uuid() -> None:
5548
5568
  print_debug(f"copy_continue_uuid: Source file does not exist: {source_file}")
5549
5569
 
5550
5570
  def load_ax_client_from_experiment_parameters() -> None:
5551
- #pprint(experiment_parameters)
5552
- global ax_client
5571
+ if experiment_parameters:
5572
+ global ax_client
5553
5573
 
5554
- tmp_file_path = get_tmp_file_from_json(experiment_parameters)
5555
- ax_client = AxClient.load_from_json_file(tmp_file_path)
5556
- ax_client = cast(AxClient, ax_client)
5557
- os.unlink(tmp_file_path)
5574
+ tmp_file_path = get_tmp_file_from_json(experiment_parameters)
5575
+ ax_client = AxClient.load_from_json_file(tmp_file_path)
5576
+ ax_client = cast(AxClient, ax_client)
5577
+ os.unlink(tmp_file_path)
5558
5578
 
5559
5579
  def save_checkpoint_for_continued() -> None:
5560
5580
  checkpoint_filepath = get_state_file_name('checkpoint.json')
@@ -5566,12 +5586,15 @@ def save_checkpoint_for_continued() -> None:
5566
5586
  _fatal_error(f"{checkpoint_filepath} not found. Cannot continue_previous_job without.", 47)
5567
5587
 
5568
5588
  def load_original_generation_strategy(original_ax_client_file: str) -> None:
5569
- with open(original_ax_client_file, encoding="utf-8") as f:
5570
- loaded_original_ax_client_json = json.load(f)
5571
- original_generation_strategy = loaded_original_ax_client_json["generation_strategy"]
5589
+ if experiment_parameters:
5590
+ with open(original_ax_client_file, encoding="utf-8") as f:
5591
+ loaded_original_ax_client_json = json.load(f)
5592
+ original_generation_strategy = loaded_original_ax_client_json["generation_strategy"]
5572
5593
 
5573
- if original_generation_strategy:
5574
- experiment_parameters["generation_strategy"] = original_generation_strategy
5594
+ if original_generation_strategy:
5595
+ experiment_parameters["generation_strategy"] = original_generation_strategy
5596
+ else:
5597
+ print_red("load_original_generation_strategy: experiment_parameters was empty!")
5575
5598
 
5576
5599
  def wait_for_checkpoint_file(checkpoint_file: str) -> None:
5577
5600
  start_time = time.time()
@@ -5611,6 +5634,11 @@ def validate_experiment_parameters() -> None:
5611
5634
  my_exit(95)
5612
5635
 
5613
5636
  def __get_experiment_parameters__load_from_checkpoint(continue_previous_job: str, cli_params_experiment_parameters: Optional[list]) -> Tuple[Any, str, str]:
5637
+ if not ax_client:
5638
+ print_red("__get_experiment_parameters__load_from_checkpoint: ax_client was None")
5639
+ my_exit(101)
5640
+ return {}, "", ""
5641
+
5614
5642
  print_debug(f"Load from checkpoint: {continue_previous_job}")
5615
5643
 
5616
5644
  checkpoint_file = f"{continue_previous_job}/state_files/checkpoint.json"
@@ -5652,6 +5680,12 @@ def __get_experiment_parameters__load_from_checkpoint(continue_previous_job: str
5652
5680
 
5653
5681
  experiment_constraints = get_constraints()
5654
5682
  if experiment_constraints:
5683
+
5684
+ if not experiment_parameters:
5685
+ print_red("__get_experiment_parameters__load_from_checkpoint: experiment_parameters was None")
5686
+
5687
+ return {}, "", ""
5688
+
5655
5689
  experiment_args = set_experiment_constraints(
5656
5690
  experiment_constraints,
5657
5691
  experiment_args,
@@ -5661,6 +5695,12 @@ def __get_experiment_parameters__load_from_checkpoint(continue_previous_job: str
5661
5695
  return experiment_args, gpu_string, gpu_color
5662
5696
 
5663
5697
  def __get_experiment_parameters__create_new_experiment() -> Tuple[dict, str, str]:
5698
+ if ax_client is None:
5699
+ print_red("__get_experiment_parameters__create_new_experiment: ax_client is None")
5700
+ my_exit(101)
5701
+
5702
+ return {}, "", ""
5703
+
5664
5704
  objectives = set_objectives()
5665
5705
 
5666
5706
  experiment_args = {
@@ -5950,10 +5990,13 @@ def print_overview_tables(classic_params: Optional[Union[list, dict]], experimen
5950
5990
  print_result_names_overview_table()
5951
5991
 
5952
5992
  def update_progress_bar(nr: int) -> None:
5953
- try:
5954
- progress_bar.update(nr)
5955
- except Exception as e:
5956
- print(f"Error updating progress bar: {e}")
5993
+ if progress_bar is not None:
5994
+ try:
5995
+ progress_bar.update(nr)
5996
+ except Exception as e:
5997
+ print(f"Error updating progress bar: {e}")
5998
+ else:
5999
+ print_red("update_progress_bar: progress_bar was None")
5957
6000
 
5958
6001
  def get_current_model_name() -> str:
5959
6002
  if overwritten_to_random:
@@ -6077,7 +6120,7 @@ def submitted_jobs(nr: int = 0) -> int:
6077
6120
  def count_jobs_in_squeue() -> tuple[int, str]:
6078
6121
  global _last_count_time, _last_count_result
6079
6122
 
6080
- now = time.time()
6123
+ now = int(time.time())
6081
6124
  if _last_count_result != (0, "") and now - _last_count_time < 15:
6082
6125
  return _last_count_result
6083
6126
 
@@ -6299,7 +6342,7 @@ def load_existing_job_data_into_ax_client() -> None:
6299
6342
  nr_of_imported_jobs = get_nr_of_imported_jobs()
6300
6343
  set_nr_inserted_jobs(NR_INSERTED_JOBS + nr_of_imported_jobs)
6301
6344
 
6302
- def parse_parameter_type_error(_error_message: Union[str, None]) -> Optional[dict]:
6345
+ def parse_parameter_type_error(_error_message: Union[Exception, str, None]) -> Optional[dict]:
6303
6346
  if not _error_message:
6304
6347
  return None
6305
6348
 
@@ -6491,7 +6534,7 @@ def normalize_path(file_path: str) -> str:
6491
6534
 
6492
6535
  def insert_jobs_from_lists(csv_path: str, arm_params_list: Any, results_list: Any, __status: Any) -> None:
6493
6536
  cnt = 0
6494
- err_msgs = []
6537
+ err_msgs: list = []
6495
6538
 
6496
6539
  for i, (arm_params, result) in enumerate(zip(arm_params_list, results_list)):
6497
6540
  base_str = f"[bold green]Loading job {i}/{len(results_list)} from {csv_path} into ax_client, result: {result}"
@@ -6525,9 +6568,13 @@ def try_insert_job(csv_path: str, arm_params: Dict, result: Any, i: int, arm_par
6525
6568
  f"This can happen when the csv file has different parameters or results as the main job one's "
6526
6569
  f"or other imported jobs. Error: {e}"
6527
6570
  )
6528
- if err_msg not in err_msgs:
6529
- print_red(err_msg)
6530
- err_msgs.append(err_msg)
6571
+
6572
+ if err_msgs is None:
6573
+ print_red("try_insert_job: err_msgs was None")
6574
+ else:
6575
+ if err_msg not in err_msgs:
6576
+ print_red(err_msg)
6577
+ err_msgs.append(err_msg)
6531
6578
 
6532
6579
  return cnt
6533
6580
 
@@ -6553,12 +6600,18 @@ def __insert_job_into_ax_client__check_ax_client() -> None:
6553
6600
  _fatal_error("insert_job_into_ax_client: ax_client was not defined where it should have been", 101)
6554
6601
 
6555
6602
  def __insert_job_into_ax_client__attach_trial(arm_params: dict) -> Tuple[Any, int]:
6603
+ if ax_client is None:
6604
+ raise RuntimeError("__insert_job_into_ax_client__attach_trial: ax_client was empty")
6605
+
6556
6606
  new_trial = ax_client.attach_trial(arm_params)
6557
6607
  if not isinstance(new_trial, tuple) or len(new_trial) < 2:
6558
6608
  raise RuntimeError("attach_trial didn't return the expected tuple")
6559
6609
  return new_trial
6560
6610
 
6561
6611
  def __insert_job_into_ax_client__get_trial(trial_idx: int) -> Any:
6612
+ if ax_client is None:
6613
+ raise RuntimeError("__insert_job_into_ax_client__get_trial: ax_client was empty")
6614
+
6562
6615
  trial = ax_client.experiment.trials.get(trial_idx)
6563
6616
  if trial is None:
6564
6617
  raise RuntimeError(f"Trial with index {trial_idx} not found")
@@ -6569,6 +6622,9 @@ def __insert_job_into_ax_client__create_generator_run(arm_params: dict, trial_id
6569
6622
  return GeneratorRun(arms=[arm], generation_node_name=new_job_type)
6570
6623
 
6571
6624
  def __insert_job_into_ax_client__complete_trial_if_result(trial_idx: int, result: dict, __status: Optional[Any], base_str: Optional[str]) -> None:
6625
+ if ax_client is None:
6626
+ raise RuntimeError("__insert_job_into_ax_client__complete_trial_if_result: ax_client was empty")
6627
+
6572
6628
  if f"{result}" != "":
6573
6629
  __insert_job_into_ax_client__update_status(__status, base_str, "Completing trial")
6574
6630
  is_ok = True
@@ -7376,11 +7432,15 @@ def is_already_in_defective_nodes(hostname: str) -> bool:
7376
7432
  return True
7377
7433
  except Exception as e:
7378
7434
  print_red(f"is_already_in_defective_nodes: Error reading the file {file_path}: {e}")
7379
- return False
7380
7435
 
7381
7436
  return False
7382
7437
 
7383
7438
  def submit_new_job(parameters: Union[dict, str], trial_index: int) -> Any:
7439
+ if submitit_executor is None:
7440
+ print_red("submit_new_job: submitit_executor was None")
7441
+
7442
+ return None
7443
+
7384
7444
  print_debug(f"Submitting new job for trial_index {trial_index}, parameters {parameters}")
7385
7445
 
7386
7446
  start = time.time()
@@ -7396,18 +7456,21 @@ def submit_new_job(parameters: Union[dict, str], trial_index: int) -> Any:
7396
7456
  def orchestrator_start_trial(parameters: Union[dict, str], trial_index: int) -> None:
7397
7457
  if submitit_executor and ax_client:
7398
7458
  new_job = submit_new_job(parameters, trial_index)
7399
- submitted_jobs(1)
7459
+ if new_job:
7460
+ submitted_jobs(1)
7400
7461
 
7401
- _trial = ax_client.get_trial(trial_index)
7462
+ _trial = ax_client.get_trial(trial_index)
7402
7463
 
7403
- try:
7404
- _trial.mark_staged(unsafe=True)
7405
- except Exception as e:
7406
- print_debug(f"orchestrator_start_trial: error {e}")
7407
- _trial.mark_running(unsafe=True, no_runner_required=True)
7464
+ try:
7465
+ _trial.mark_staged(unsafe=True)
7466
+ except Exception as e:
7467
+ print_debug(f"orchestrator_start_trial: error {e}")
7468
+ _trial.mark_running(unsafe=True, no_runner_required=True)
7408
7469
 
7409
- print_debug(f"orchestrator_start_trial: appending job {new_job} to global_vars['jobs'], trial_index: {trial_index}")
7410
- global_vars["jobs"].append((new_job, trial_index))
7470
+ print_debug(f"orchestrator_start_trial: appending job {new_job} to global_vars['jobs'], trial_index: {trial_index}")
7471
+ global_vars["jobs"].append((new_job, trial_index))
7472
+ else:
7473
+ print_red("orchestrator_start_trial: Failed to start new job")
7411
7474
  else:
7412
7475
  _fatal_error("submitit_executor or ax_client could not be found properly", 9)
7413
7476
 
@@ -7539,15 +7602,18 @@ def execute_evaluation(_params: list) -> Optional[int]:
7539
7602
  try:
7540
7603
  initialize_job_environment()
7541
7604
  new_job = submit_new_job(parameters, trial_index)
7542
- submitted_jobs(1)
7605
+ if new_job:
7606
+ submitted_jobs(1)
7543
7607
 
7544
- print_debug(f"execute_evaluation: appending job {new_job} to global_vars['jobs'], trial_index: {trial_index}")
7545
- global_vars["jobs"].append((new_job, trial_index))
7608
+ print_debug(f"execute_evaluation: appending job {new_job} to global_vars['jobs'], trial_index: {trial_index}")
7609
+ global_vars["jobs"].append((new_job, trial_index))
7546
7610
 
7547
- mark_trial_stage("mark_running", "Marking the trial as running failed")
7548
- trial_counter += 1
7611
+ mark_trial_stage("mark_running", "Marking the trial as running failed")
7612
+ trial_counter += 1
7549
7613
 
7550
- progressbar_description("started new job")
7614
+ progressbar_description("started new job")
7615
+ else:
7616
+ progressbar_description("Failed to start new job")
7551
7617
  except submitit.core.utils.FailedJobError as error:
7552
7618
  handle_failed_job(error, trial_index, new_job)
7553
7619
  trial_counter += 1
@@ -7645,10 +7711,12 @@ def show_debug_table_for_break_run_search(_name: str, _max_eval: Optional[int])
7645
7711
  ("failed_jobs()", failed_jobs()),
7646
7712
  ("count_done_jobs()", count_done_jobs()),
7647
7713
  ("_max_eval", _max_eval),
7648
- ("progress_bar.total", progress_bar.total),
7649
7714
  ("NR_INSERTED_JOBS", NR_INSERTED_JOBS)
7650
7715
  ]
7651
7716
 
7717
+ if progress_bar is not None:
7718
+ rows.append(("progress_bar.total", progress_bar.total))
7719
+
7652
7720
  for row in rows:
7653
7721
  table.add_row(str(row[0]), str(row[1]))
7654
7722
 
@@ -8400,7 +8468,7 @@ def get_model_from_name(name: str) -> Any:
8400
8468
  return gen
8401
8469
  raise ValueError(f"Unknown or unsupported model: {name}")
8402
8470
 
8403
- def get_name_from_model(model) -> Optional[str]:
8471
+ def get_name_from_model(model: Any) -> Optional[str]:
8404
8472
  if not isinstance(SUPPORTED_MODELS, (list, set, tuple)):
8405
8473
  return None
8406
8474
 
@@ -10403,8 +10471,6 @@ def main() -> None:
10403
10471
 
10404
10472
  init_live_share()
10405
10473
 
10406
- start_periodic_live_share()
10407
-
10408
10474
  show_available_hardware_and_generation_strategy_string(gpu_string, gpu_color)
10409
10475
 
10410
10476
  original_print(f"Run-Program: {global_vars['joined_run_program']}")
@@ -11101,7 +11167,7 @@ def main_outside() -> None:
11101
11167
 
11102
11168
  print_logo()
11103
11169
 
11104
- start_logging_daemon()
11170
+ start_logging_daemon() # type: ignore[unused-coroutine]
11105
11171
 
11106
11172
  fool_linter(args.num_cpus_main_job)
11107
11173
  fool_linter(args.flame_graph)
@@ -11169,7 +11235,6 @@ def auto_wrap_namespace(namespace: Any) -> Any:
11169
11235
  "_record_stats",
11170
11236
  "_open",
11171
11237
  "_check_memory_leak",
11172
- "start_periodic_live_share",
11173
11238
  "start_logging_daemon",
11174
11239
  "get_current_run_folder",
11175
11240
  "show_func_name_wrapper"
@@ -53,7 +53,7 @@ def tpe_suggest_point(trial: optuna.Trial, parameters: dict) -> dict:
53
53
  if pvaltype == 'INT':
54
54
  point[param_name] = trial.suggest_int(param_name, rmin, rmax)
55
55
  elif pvaltype == 'FLOAT':
56
- point[param_name] = trial.suggest_float(param_name, rmin, rmax)
56
+ point[param_name] = trial.suggest_float(param_name, rmin, rmax) # type: ignore[assignment]
57
57
  else:
58
58
  raise ValueError(f"Unsupported type {pvaltype} for RANGE")
59
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omniopt2
3
- Version: 8424
3
+ Version: 8455
4
4
  Summary: Automatic highly parallelized hyperparameter optimizer based on Ax/Botorch
5
5
  Home-page: https://scads.ai/transfer-2/verfuegbare-software-dienste-en/omniopt/
6
6
  Author: Norman Koch
@@ -346,6 +346,31 @@
346
346
  send_anonymized_usage_stats=0
347
347
  already_shown_oo_base_url_msg=0
348
348
 
349
+ function run_live_share {
350
+ if [[ $RUN_UUID != "" ]]; then
351
+ full_log_file="$ORIGINAL_PWD/logs/$RUN_UUID"
352
+ if [[ -e "$full_log_file" ]]; then
353
+ set +e
354
+ run_folder=$(cat "$full_log_file" | grep "Run-folder:" | sed -e 's#Run-folder: ##')
355
+ if [[ -z $run_folder ]]; then
356
+ true
357
+ else
358
+ bash "$SCRIPT_DIR/omniopt_share" --username="$USER" "$run_folder" 2>/dev/null >/dev/null
359
+ fi
360
+ set -e
361
+ else
362
+ red_text "--live_share enabled, but $full_log_file could not be found. Cannot share once again in finalization.\n"
363
+ fi
364
+ fi
365
+ }
366
+
367
+ function start_periodidic_live_share {
368
+ while true; do
369
+ run_live_share 2>/dev/null >/dev/null
370
+ sleep 30
371
+ done
372
+ }
373
+
349
374
  function myexit {
350
375
  CODE=$1
351
376
 
@@ -382,21 +407,7 @@
382
407
 
383
408
  if [[ $follow -eq 1 ]] || ! command -v sbatch 2>/dev/null >/dev/null || [[ $force_local_execution -eq 1 ]]; then
384
409
  if [[ $live_share -eq 1 ]]; then
385
- if [[ $RUN_UUID != "" ]]; then
386
- full_log_file="$ORIGINAL_PWD/logs/$RUN_UUID"
387
- if [[ -e "$full_log_file" ]]; then
388
- set +e
389
- run_folder=$(cat "$full_log_file" | grep "Run-folder:" | sed -e 's#Run-folder: ##')
390
- if [[ -z $run_folder ]]; then
391
- true
392
- else
393
- bash "$SCRIPT_DIR/omniopt_share" --username="$USER" "$run_folder" 2>/dev/null >/dev/null
394
- fi
395
- set -e
396
- else
397
- red_text "--live_share enabled, but $full_log_file could not be found. Cannot share once again in finalization.\n"
398
- fi
399
- fi
410
+ run_live_share
400
411
  fi
401
412
  fi
402
413
 
@@ -1620,6 +1631,13 @@ EOF
1620
1631
  set +e
1621
1632
  trap - ERR
1622
1633
 
1634
+ live_share_pid=""
1635
+
1636
+ if [[ $live_share -eq 1 ]]; then
1637
+ start_periodidic_live_share &
1638
+ live_share_pid=$!
1639
+ fi
1640
+
1623
1641
  if [[ -z $RUN_WITH_COVERAGE ]]; then
1624
1642
  if [[ -z $RUN_WITH_PYSPY ]]; then
1625
1643
  stdbuf -e 0 -o 0 python3 "$SCRIPT_DIR/.omniopt.py" $args_string
@@ -1636,6 +1654,10 @@ EOF
1636
1654
  EXIT_CODE=$?
1637
1655
  fi
1638
1656
 
1657
+ if [[ $live_share -eq 1 ]] && [[ -n $live_share_pid ]]; then
1658
+ kill -9 $live_share_pid
1659
+ fi
1660
+
1639
1661
  set -e
1640
1662
  trap 'calltracer' ERR
1641
1663
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omniopt2
3
- Version: 8424
3
+ Version: 8455
4
4
  Summary: Automatic highly parallelized hyperparameter optimizer based on Ax/Botorch
5
5
  Home-page: https://scads.ai/transfer-2/verfuegbare-software-dienste-en/omniopt/
6
6
  Author: Norman Koch
@@ -5,7 +5,7 @@ authors = [
5
5
  {email = "norman.koch@tu-dresden.de"},
6
6
  {name = "Norman Koch"}
7
7
  ]
8
- version = "8424"
8
+ version = "8455"
9
9
 
10
10
  readme = "README.md"
11
11
  dynamic = ["dependencies"]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes