omniopt2 7094__py3-none-any.whl → 7099__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. .omniopt.py +238 -247
  2. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt.py +238 -247
  3. {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/METADATA +1 -1
  4. {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/RECORD +35 -35
  5. omniopt2.egg-info/PKG-INFO +1 -1
  6. pyproject.toml +1 -1
  7. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.colorfunctions.sh +0 -0
  8. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.general.sh +0 -0
  9. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.helpers.py +0 -0
  10. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_cpu_ram_usage.py +0 -0
  11. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_general.py +0 -0
  12. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_gpu_usage.py +0 -0
  13. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_kde.py +0 -0
  14. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_scatter.py +0 -0
  15. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_scatter_generation_method.py +0 -0
  16. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_scatter_hex.py +0 -0
  17. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_time_and_exit_code.py +0 -0
  18. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_trial_index_result.py +0 -0
  19. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_worker.py +0 -0
  20. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.random_generator.py +0 -0
  21. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.shellscript_functions +0 -0
  22. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.tpe.py +0 -0
  23. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/LICENSE +0 -0
  24. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/apt-dependencies.txt +0 -0
  25. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt +0 -0
  26. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_docker +0 -0
  27. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_evaluate +0 -0
  28. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_plot +0 -0
  29. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_share +0 -0
  30. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/requirements.txt +0 -0
  31. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/setup.py +0 -0
  32. {omniopt2-7094.data → omniopt2-7099.data}/data/bin/test_requirements.txt +0 -0
  33. {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/WHEEL +0 -0
  34. {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/licenses/LICENSE +0 -0
  35. {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/top_level.txt +0 -0
@@ -565,6 +565,7 @@ class ConfigLoader:
565
565
  verbose_break_run_search_table: bool
566
566
  send_anonymized_usage_stats: bool
567
567
  max_failed_jobs: Optional[int]
568
+ max_abandoned_retrial: int
568
569
  show_ram_every_n_seconds: int
569
570
  config_toml: Optional[str]
570
571
  config_json: Optional[str]
@@ -653,6 +654,7 @@ class ConfigLoader:
653
654
  optional.add_argument('--calculate_pareto_front_of_job', help='This can be used to calculate a pareto-front for a multi-objective job that previously has results, but has been cancelled, and has no pareto-front (yet)', type=str, nargs='+', default=[])
654
655
  optional.add_argument('--show_generate_time_table', help='Generate a table at the end, showing how much time was spent trying to generate new points', action='store_true', default=False)
655
656
  optional.add_argument('--force_choice_for_ranges', help='Force float ranges to be converted to choice', action='store_true', default=False)
657
+ optional.add_argument('--max_abandoned_retrial', help='Maximum number retrials to get when a job is abandoned post-generation', default=20, type=int)
656
658
 
657
659
  speed.add_argument('--dont_warm_start_refitting', help='Do not keep Model weights, thus, refit for every generator (may be more accurate, but slower)', action='store_true', default=False)
658
660
  speed.add_argument('--refit_on_cv', help='Refit on Cross-Validation (helps in accuracy, but makes generating new points slower)', action='store_true', default=False)
@@ -794,9 +796,13 @@ if args.seed is not None:
794
796
 
795
797
  set_rng_seed(args.seed)
796
798
 
799
+ @beartype
800
+ def _fatal_error(message: str, code: int) -> None:
801
+ print_red(message)
802
+ my_exit(code)
803
+
797
804
  if args.max_eval is None and args.generation_strategy is None and args.continue_previous_job is None and (not args.calculate_pareto_front_of_job or len(args.calculate_pareto_front_of_job) == 0):
798
- print_red("Either --max_eval or --generation_strategy must be set.")
799
- my_exit(104)
805
+ _fatal_error("Either --max_eval or --generation_strategy must be set.", 104)
800
806
 
801
807
  arg_result_names = []
802
808
  arg_result_min_or_max = []
@@ -1273,8 +1279,7 @@ class ExternalProgramGenerationNode(ExternalGenerationNode):
1273
1279
  if param_type == ParameterType.STRING:
1274
1280
  return "STRING"
1275
1281
 
1276
- print_red(f"Unknown data type {param_type}")
1277
- my_exit(33)
1282
+ _fatal_error(f"Unknown data type {param_type}", 33)
1278
1283
 
1279
1284
  return ""
1280
1285
 
@@ -1303,8 +1308,7 @@ class ExternalProgramGenerationNode(ExternalGenerationNode):
1303
1308
  "values": param.values
1304
1309
  }
1305
1310
  else:
1306
- print_red(f"Unknown parameter type: {param}")
1307
- my_exit(15)
1311
+ _fatal_error(f"Unknown parameter type: {param}", 15)
1308
1312
 
1309
1313
  return serialized
1310
1314
 
@@ -1583,8 +1587,7 @@ if isinstance(args.num_parallel_jobs, int) or helpers.looks_like_int(args.num_pa
1583
1587
  num_parallel_jobs = int(args.num_parallel_jobs)
1584
1588
 
1585
1589
  if num_parallel_jobs <= 0:
1586
- print_red(f"--num_parallel_jobs must be 1 or larger, is {num_parallel_jobs}")
1587
- my_exit(106)
1590
+ _fatal_error(f"--num_parallel_jobs must be 1 or larger, is {num_parallel_jobs}", 106)
1588
1591
 
1589
1592
  class SearchSpaceExhausted (Exception):
1590
1593
  pass
@@ -1836,11 +1839,9 @@ def log_nr_of_workers() -> None:
1836
1839
  with open(logfile_nr_workers, mode='a+', encoding="utf-8") as f:
1837
1840
  f.write(str(nr_current_workers) + "\n")
1838
1841
  except FileNotFoundError:
1839
- print_red(f"It seems like the folder for writing {logfile_nr_workers} was deleted during the run. Cannot continue.")
1840
- my_exit(99)
1842
+ _fatal_error(f"It seems like the folder for writing {logfile_nr_workers} was deleted during the run. Cannot continue.", 99)
1841
1843
  except OSError as e:
1842
- print_red(f"Tried writing log_nr_of_workers to file {logfile_nr_workers}, but failed with error: {e}. This may mean that the file system you are running on is instable. OmniOpt2 probably cannot do anything about it.")
1843
- my_exit(199)
1844
+ _fatal_error(f"Tried writing log_nr_of_workers to file {logfile_nr_workers}, but failed with error: {e}. This may mean that the file system you are running on is instable. OmniOpt2 probably cannot do anything about it.", 199)
1844
1845
 
1845
1846
  return None
1846
1847
 
@@ -2015,12 +2016,10 @@ else:
2015
2016
  if os.path.exists(prev_job_file):
2016
2017
  global_vars["joined_run_program"] = get_file_as_string(prev_job_file)
2017
2018
  else:
2018
- print_red(f"The previous job file {prev_job_file} could not be found. You may forgot to add the run number at the end.")
2019
- my_exit(44)
2019
+ _fatal_error(f"The previous job file {prev_job_file} could not be found. You may forgot to add the run number at the end.", 44)
2020
2020
 
2021
2021
  if not args.tests and len(global_vars["joined_run_program"]) == 0 and not args.calculate_pareto_front_of_job:
2022
- print_red("--run_program was empty")
2023
- my_exit(19)
2022
+ _fatal_error("--run_program was empty", 19)
2024
2023
 
2025
2024
  global_vars["experiment_name"] = args.experiment_name
2026
2025
 
@@ -2029,20 +2028,17 @@ def load_global_vars(_file: str) -> None:
2029
2028
  global global_vars
2030
2029
 
2031
2030
  if not os.path.exists(_file):
2032
- print_red(f"You've tried to continue a non-existing job: {_file}")
2033
- my_exit(44)
2031
+ _fatal_error(f"You've tried to continue a non-existing job: {_file}", 44)
2034
2032
  try:
2035
2033
  with open(_file, encoding="utf-8") as f:
2036
2034
  global_vars = json.load(f)
2037
2035
  except Exception as e:
2038
- print_red(f"Error while loading old global_vars: {e}, trying to load {_file}")
2039
- my_exit(44)
2036
+ _fatal_error(f"Error while loading old global_vars: {e}, trying to load {_file}", 44)
2040
2037
 
2041
2038
  @beartype
2042
2039
  def load_or_exit(filepath: str, error_msg: str, exit_code: int) -> None:
2043
2040
  if not os.path.exists(filepath):
2044
- print_red(error_msg)
2045
- my_exit(exit_code)
2041
+ _fatal_error(error_msg, exit_code)
2046
2042
 
2047
2043
  @beartype
2048
2044
  def get_file_content_or_exit(filepath: str, error_msg: str, exit_code: int) -> str:
@@ -2052,8 +2048,7 @@ def get_file_content_or_exit(filepath: str, error_msg: str, exit_code: int) -> s
2052
2048
  @beartype
2053
2049
  def check_param_or_exit(param: Any, error_msg: str, exit_code: int) -> None:
2054
2050
  if param is None:
2055
- print_red(error_msg)
2056
- my_exit(exit_code)
2051
+ _fatal_error(error_msg, exit_code)
2057
2052
 
2058
2053
  @beartype
2059
2054
  def check_continue_previous_job(continue_previous_job: Optional[str]) -> dict:
@@ -2103,8 +2098,7 @@ def load_time_or_exit(_args: Any) -> None:
2103
2098
  print_yellow(f"Time-setting: The contents of {time_file} do not contain a single number")
2104
2099
  else:
2105
2100
  if len(args.calculate_pareto_front_of_job) == 0:
2106
- print_red("Missing --time parameter. Cannot continue.")
2107
- my_exit(19)
2101
+ _fatal_error("Missing --time parameter. Cannot continue.", 19)
2108
2102
 
2109
2103
  @beartype
2110
2104
  def load_mem_gb_or_exit(_args: Any) -> Optional[int]:
@@ -2122,8 +2116,7 @@ def load_mem_gb_or_exit(_args: Any) -> Optional[int]:
2122
2116
  print_yellow(f"mem_gb-setting: The contents of {mem_gb_file} do not contain a single number")
2123
2117
  return None
2124
2118
 
2125
- print_red("--mem_gb needs to be set")
2126
- my_exit(19)
2119
+ _fatal_error("--mem_gb needs to be set", 19)
2127
2120
 
2128
2121
  return None
2129
2122
 
@@ -2145,8 +2138,7 @@ def load_max_eval_or_exit(_args: Any) -> None:
2145
2138
  if _args.max_eval:
2146
2139
  set_max_eval(_args.max_eval)
2147
2140
  if _args.max_eval <= 0:
2148
- print_red("--max_eval must be larger than 0")
2149
- my_exit(19)
2141
+ _fatal_error("--max_eval must be larger than 0", 19)
2150
2142
  elif _args.continue_previous_job:
2151
2143
  max_eval_file = f"{_args.continue_previous_job}/state_files/max_eval"
2152
2144
  max_eval_content = get_file_content_or_exit(max_eval_file, f"neither --max_eval nor file {max_eval_file} found", 19)
@@ -2285,8 +2277,7 @@ if not SYSTEM_HAS_SBATCH:
2285
2277
  num_parallel_jobs = 1
2286
2278
 
2287
2279
  if SYSTEM_HAS_SBATCH and not args.force_local_execution and args.raw_samples < args.num_parallel_jobs:
2288
- print_red(f"Has --raw_samples={args.raw_samples}, but --num_parallel_jobs={args.num_parallel_jobs}. Cannot continue, since --raw_samples must be larger or equal to --num_parallel_jobs.")
2289
- my_exit(48)
2280
+ _fatal_error(f"Has --raw_samples={args.raw_samples}, but --num_parallel_jobs={args.num_parallel_jobs}. Cannot continue, since --raw_samples must be larger or equal to --num_parallel_jobs.", 48)
2290
2281
 
2291
2282
  @beartype
2292
2283
  def save_global_vars() -> None:
@@ -2426,14 +2417,12 @@ def get_bounds(this_args: Union[str, list], j: int) -> Tuple[float, float]:
2426
2417
  try:
2427
2418
  lower_bound = float(this_args[j + 2])
2428
2419
  except Exception:
2429
- print_red(f"\n{this_args[j + 2]} is not a number")
2430
- my_exit(181)
2420
+ _fatal_error(f"\n{this_args[j + 2]} is not a number", 181)
2431
2421
 
2432
2422
  try:
2433
2423
  upper_bound = float(this_args[j + 3])
2434
2424
  except Exception:
2435
- print_red(f"\n{this_args[j + 3]} is not a number")
2436
- my_exit(181)
2425
+ _fatal_error(f"\n{this_args[j + 3]} is not a number", 181)
2437
2426
 
2438
2427
  return lower_bound, upper_bound
2439
2428
 
@@ -2482,8 +2471,7 @@ def create_range_param(name: str, lower_bound: Union[float, int], upper_bound: U
2482
2471
  @beartype
2483
2472
  def handle_grid_search(name: Union[list, str], lower_bound: Union[float, int], upper_bound: Union[float, int], value_type: str) -> dict:
2484
2473
  if lower_bound is None or upper_bound is None:
2485
- print_red("handle_grid_search: lower_bound or upper_bound is None")
2486
- my_exit(91)
2474
+ _fatal_error("handle_grid_search: lower_bound or upper_bound is None", 91)
2487
2475
 
2488
2476
  return {}
2489
2477
 
@@ -2587,8 +2575,7 @@ def validate_value_type(value_type: str) -> None:
2587
2575
  @beartype
2588
2576
  def parse_fixed_param(classic_params: list, params: list, j: int, this_args: Union[str, list], name: Union[list, str], search_space_reduction_warning: bool) -> Tuple[int, list, list, bool]:
2589
2577
  if len(this_args) != 3:
2590
- print_red("⚠ --parameter for type fixed must have 3 parameters: <NAME> fixed <VALUE>")
2591
- my_exit(181)
2578
+ _fatal_error("⚠ --parameter for type fixed must have 3 parameters: <NAME> fixed <VALUE>", 181)
2592
2579
 
2593
2580
  value = this_args[j + 2]
2594
2581
 
@@ -2611,8 +2598,7 @@ def parse_fixed_param(classic_params: list, params: list, j: int, this_args: Uni
2611
2598
  @beartype
2612
2599
  def parse_choice_param(classic_params: list, params: list, j: int, this_args: Union[str, list], name: Union[list, str], search_space_reduction_warning: bool) -> Tuple[int, list, list, bool]:
2613
2600
  if len(this_args) != 3:
2614
- print_red("⚠ --parameter for type choice must have 3 parameters: <NAME> choice <VALUE,VALUE,VALUE,...>")
2615
- my_exit(181)
2601
+ _fatal_error("⚠ --parameter for type choice must have 3 parameters: <NAME> choice <VALUE,VALUE,VALUE,...>", 181)
2616
2602
 
2617
2603
  values = re.split(r'\s*,\s*', str(this_args[j + 2]))
2618
2604
 
@@ -2635,67 +2621,84 @@ def parse_choice_param(classic_params: list, params: list, j: int, this_args: Un
2635
2621
  return j, params, classic_params, search_space_reduction_warning
2636
2622
 
2637
2623
  @beartype
2638
- def parse_experiment_parameters() -> Tuple[list, list]:
2639
- params: list = []
2640
- classic_params: list = []
2641
- param_names: List[str] = []
2624
+ def _parse_experiment_parameters_validate_name(name: str, invalid_names: List[str], param_names: List[str]) -> None:
2625
+ if name in invalid_names:
2626
+ _fatal_error(f"\n⚠ Name for argument is invalid: {name}. Invalid names are: {', '.join(invalid_names)}", 181)
2627
+ if name in param_names:
2628
+ _fatal_error(f"\n⚠ Parameter name '{name}' is not unique. Names for parameters must be unique!", 181)
2642
2629
 
2643
- i = 0
2630
+ @beartype
2631
+ def _parse_experiment_parameters_get_param_type(this_args: List[Any], j: int) -> str:
2632
+ try:
2633
+ return this_args[j + 1]
2634
+ except Exception:
2635
+ _fatal_error("Not enough arguments for --parameter", 181)
2644
2636
 
2645
- search_space_reduction_warning = False
2637
+ return ""
2646
2638
 
2647
- valid_types = ["range", "fixed", "choice"]
2648
- invalid_names = ["start_time", "end_time", "run_time", "program_string", *arg_result_names, "exit_code", "signal"]
2639
+ @beartype
2640
+ def _parse_experiment_parameters_parse_this_args(
2641
+ this_args: List[Any],
2642
+ invalid_names: List[str],
2643
+ param_names: List[str],
2644
+ classic_params: List[Dict[str, Any]],
2645
+ params: List[Dict[str, Any]],
2646
+ search_space_reduction_warning: bool
2647
+ ) -> Tuple[int, List[Dict[str, Any]], List[Dict[str, Any]], bool]:
2648
+ j = 0
2649
+ param_parsers = {
2650
+ "range": parse_range_param,
2651
+ "fixed": parse_fixed_param,
2652
+ "choice": parse_choice_param
2653
+ }
2654
+ valid_types = list(param_parsers.keys())
2649
2655
 
2650
- while args.parameter and i < len(args.parameter):
2651
- this_args = args.parameter[i]
2652
- j = 0
2656
+ while j < len(this_args) - 1:
2657
+ name = this_args[j]
2658
+ _parse_experiment_parameters_validate_name(name, invalid_names, param_names)
2653
2659
 
2654
- if this_args is not None and isinstance(this_args, dict) and "param" in this_args:
2655
- this_args = this_args["param"]
2660
+ param_names.append(name)
2661
+ global_param_names.append(name)
2656
2662
 
2657
- while j < len(this_args) - 1:
2658
- name = this_args[j]
2663
+ param_type = _parse_experiment_parameters_get_param_type(this_args, j)
2659
2664
 
2660
- if name in invalid_names:
2661
- print_red(f"\nName for argument no. {j} is invalid: {name}. Invalid names are: {', '.join(invalid_names)}")
2662
- my_exit(181)
2665
+ if param_type not in param_parsers:
2666
+ _fatal_error(f"⚠ Parameter type '{param_type}' not yet implemented.", 181)
2663
2667
 
2664
- if name in param_names:
2665
- print_red(f"\n⚠ Parameter name '{name}' is not unique. Names for parameters must be unique!")
2666
- my_exit(181)
2668
+ if param_type not in valid_types:
2669
+ valid_types_string = ', '.join(valid_types)
2670
+ _fatal_error(f"\n⚠ Invalid type {param_type}, valid types are: {valid_types_string}", 181)
2667
2671
 
2668
- param_names.append(name)
2669
- global_param_names.append(name)
2672
+ j, params, classic_params, search_space_reduction_warning = param_parsers[param_type](
2673
+ classic_params, params, j, this_args, name, search_space_reduction_warning)
2670
2674
 
2671
- try:
2672
- param_type = this_args[j + 1]
2673
- except Exception:
2674
- print_red("Not enough arguments for --parameter")
2675
- my_exit(181)
2676
-
2677
- param_parsers = {
2678
- "range": parse_range_param,
2679
- "fixed": parse_fixed_param,
2680
- "choice": parse_choice_param
2681
- }
2675
+ return j, params, classic_params, search_space_reduction_warning
2682
2676
 
2683
- if param_type not in param_parsers:
2684
- print_red(f"⚠ Parameter type '{param_type}' not yet implemented.")
2685
- my_exit(181)
2677
+ @beartype
2678
+ def parse_experiment_parameters() -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
2679
+ params: List[Dict[str, Any]] = []
2680
+ classic_params: List[Dict[str, Any]] = []
2681
+ param_names: List[str] = []
2686
2682
 
2687
- if param_type not in valid_types:
2688
- valid_types_string = ', '.join(valid_types)
2689
- print_red(f"\n⚠ Invalid type {param_type}, valid types are: {valid_types_string}")
2690
- my_exit(181)
2683
+ search_space_reduction_warning = False
2691
2684
 
2692
- j, params, classic_params, search_space_reduction_warning = param_parsers[param_type](classic_params, params, j, this_args, name, search_space_reduction_warning)
2685
+ invalid_names = ["start_time", "end_time", "run_time", "program_string", *arg_result_names, "exit_code", "signal"]
2686
+
2687
+ i = 0
2688
+ while args.parameter and i < len(args.parameter):
2689
+ this_args = args.parameter[i]
2690
+ if this_args is not None and isinstance(this_args, dict) and "param" in this_args:
2691
+ this_args = this_args["param"]
2692
+
2693
+ _, params, classic_params, search_space_reduction_warning = _parse_experiment_parameters_parse_this_args(
2694
+ this_args, invalid_names, param_names, classic_params, params, search_space_reduction_warning)
2693
2695
 
2694
2696
  i += 1
2695
2697
 
2696
2698
  if search_space_reduction_warning:
2697
2699
  print_red("⚠ Search space reduction is not currently supported on continued runs or runs that have previous data.")
2698
2700
 
2701
+ # Remove duplicates by 'name' key preserving order
2699
2702
  params = list({p['name']: p for p in params}.values())
2700
2703
  classic_params = list({p['name']: p for p in classic_params}.values())
2701
2704
 
@@ -2704,31 +2707,26 @@ def parse_experiment_parameters() -> Tuple[list, list]:
2704
2707
  @beartype
2705
2708
  def check_factorial_range() -> None:
2706
2709
  if args.model and args.model == "FACTORIAL":
2707
- print_red("\n⚠ --model FACTORIAL cannot be used with range parameter")
2708
- my_exit(181)
2710
+ _fatal_error("\n⚠ --model FACTORIAL cannot be used with range parameter", 181)
2709
2711
 
2710
2712
  @beartype
2711
2713
  def check_if_range_types_are_invalid(value_type: str, valid_value_types: list) -> None:
2712
2714
  if value_type not in valid_value_types:
2713
2715
  valid_value_types_string = ", ".join(valid_value_types)
2714
- print_red(f"⚠ {value_type} is not a valid value type. Valid types for range are: {valid_value_types_string}")
2715
- my_exit(181)
2716
+ _fatal_error(f"⚠ {value_type} is not a valid value type. Valid types for range are: {valid_value_types_string}", 181)
2716
2717
 
2717
2718
  @beartype
2718
2719
  def check_range_params_length(this_args: Union[str, list]) -> None:
2719
2720
  if len(this_args) != 5 and len(this_args) != 4 and len(this_args) != 6:
2720
- print_red("\n⚠ --parameter for type range must have 4 (or 5, the last one being optional and float by default, or 6, while the last one is true or false) parameters: <NAME> range <START> <END> (<TYPE (int or float)>, <log_scale: bool>)")
2721
- my_exit(181)
2721
+ _fatal_error("\n⚠ --parameter for type range must have 4 (or 5, the last one being optional and float by default, or 6, while the last one is true or false) parameters: <NAME> range <START> <END> (<TYPE (int or float)>, <log_scale: bool>)", 181)
2722
2722
 
2723
2723
  @beartype
2724
2724
  def die_181_or_91_if_lower_and_upper_bound_equal_zero(lower_bound: Union[int, float], upper_bound: Union[int, float]) -> None:
2725
2725
  if upper_bound is None or lower_bound is None:
2726
- print_red("die_181_or_91_if_lower_and_upper_bound_equal_zero: upper_bound or lower_bound is None. Cannot continue.")
2727
- my_exit(91)
2726
+ _fatal_error("die_181_or_91_if_lower_and_upper_bound_equal_zero: upper_bound or lower_bound is None. Cannot continue.", 91)
2728
2727
  if upper_bound == lower_bound:
2729
2728
  if lower_bound == 0:
2730
- print_red(f"⚠ Lower bound and upper bound are equal: {lower_bound}, cannot automatically fix this, because they -0 = +0 (usually a quickfix would be to set lower_bound = -upper_bound)")
2731
- my_exit(181)
2729
+ _fatal_error(f"⚠ Lower bound and upper bound are equal: {lower_bound}, cannot automatically fix this, because they -0 = +0 (usually a quickfix would be to set lower_bound = -upper_bound)", 181)
2732
2730
  print_red(f"⚠ Lower bound and upper bound are equal: {lower_bound}, setting lower_bound = -upper_bound")
2733
2731
  if upper_bound is not None:
2734
2732
  lower_bound = -upper_bound
@@ -3170,8 +3168,7 @@ def calculate_signed_weighted_euclidean_distance(_args: Union[dict, List[float]]
3170
3168
  pattern = r'^\s*-?\d+(\.\d+)?\s*(,\s*-?\d+(\.\d+)?\s*)*$'
3171
3169
 
3172
3170
  if not re.fullmatch(pattern, weights_string):
3173
- print_red(f"String '{weights_string}' does not match pattern {pattern}")
3174
- my_exit(32)
3171
+ _fatal_error(f"String '{weights_string}' does not match pattern {pattern}", 32)
3175
3172
 
3176
3173
  weights = [float(w.strip()) for w in weights_string.split(",") if w.strip()]
3177
3174
 
@@ -4214,8 +4211,7 @@ def abandon_job(job: Job, trial_index: int) -> bool:
4214
4211
  print_debug(f"abandon_job: removing job {job}, trial_index: {trial_index}")
4215
4212
  global_vars["jobs"].remove((job, trial_index))
4216
4213
  else:
4217
- print_red("ax_client could not be found")
4218
- my_exit(9)
4214
+ _fatal_error("ax_client could not be found", 9)
4219
4215
  except Exception as e:
4220
4216
  print(f"ERROR in line {get_line_info()}: {e}")
4221
4217
  print_debug(f"ERROR in line {get_line_info()}: {e}")
@@ -4320,8 +4316,7 @@ def save_checkpoint(trial_nr: int = 0, eee: Union[None, str, Exception] = None)
4320
4316
  if ax_client:
4321
4317
  ax_client.save_to_json_file(filepath=checkpoint_filepath)
4322
4318
  else:
4323
- print_red("Something went wrong using the ax_client")
4324
- my_exit(9)
4319
+ _fatal_error("Something went wrong using the ax_client", 9)
4325
4320
  except Exception as e:
4326
4321
  save_checkpoint(trial_nr + 1, e)
4327
4322
 
@@ -4417,8 +4412,7 @@ def get_ax_param_representation(data: dict) -> dict:
4417
4412
 
4418
4413
  print("data:")
4419
4414
  pprint(data)
4420
- print_red(f"Unknown data range {data['type']}")
4421
- my_exit(19)
4415
+ _fatal_error(f"Unknown data range {data['type']}", 19)
4422
4416
 
4423
4417
  return {}
4424
4418
 
@@ -4448,8 +4442,7 @@ def set_torch_device_to_experiment_args(experiment_args: Union[None, dict]) -> T
4448
4442
  if experiment_args:
4449
4443
  experiment_args["choose_generation_strategy_kwargs"]["torch_device"] = torch_device
4450
4444
  else:
4451
- print_red("experiment_args could not be created.")
4452
- my_exit(90)
4445
+ _fatal_error("experiment_args could not be created.", 90)
4453
4446
 
4454
4447
  if experiment_args:
4455
4448
  return experiment_args, gpu_string, gpu_color
@@ -4459,8 +4452,7 @@ def set_torch_device_to_experiment_args(experiment_args: Union[None, dict]) -> T
4459
4452
  @beartype
4460
4453
  def die_with_47_if_file_doesnt_exists(_file: str) -> None:
4461
4454
  if not os.path.exists(_file):
4462
- print_red(f"Cannot find {_file}")
4463
- my_exit(47)
4455
+ _fatal_error(f"Cannot find {_file}", 47)
4464
4456
 
4465
4457
  @beartype
4466
4458
  def copy_state_files_from_previous_job(continue_previous_job: str) -> None:
@@ -4740,8 +4732,7 @@ def set_experiment_constraints(experiment_constraints: Optional[list], experimen
4740
4732
  if equation:
4741
4733
  experiment_args["parameter_constraints"].append(constraints_string)
4742
4734
  else:
4743
- print_red(f"Experiment constraint '{constraints_string}' is invalid. Cannot continue.")
4744
- my_exit(19)
4735
+ _fatal_error(f"Experiment constraint '{constraints_string}' is invalid. Cannot continue.", 19)
4745
4736
 
4746
4737
  file_path = os.path.join(get_current_run_folder(), "state_files", "constraints")
4747
4738
 
@@ -4836,8 +4827,7 @@ def get_experiment_parameters(_params: list) -> Tuple[AxClient, Union[list, dict
4836
4827
  global ax_client
4837
4828
 
4838
4829
  if not ax_client:
4839
- print_red("Something went wrong with the ax_client")
4840
- my_exit(9)
4830
+ _fatal_error("Something went wrong with the ax_client", 9)
4841
4831
 
4842
4832
  gpu_string = ""
4843
4833
  gpu_color = "green"
@@ -4884,8 +4874,7 @@ def get_experiment_parameters(_params: list) -> Tuple[AxClient, Union[list, dict
4884
4874
  json.dump(experiment_parameters, outfile)
4885
4875
 
4886
4876
  if not os.path.exists(checkpoint_filepath):
4887
- print_red(f"{checkpoint_filepath} not found. Cannot continue_previous_job without.")
4888
- my_exit(47)
4877
+ _fatal_error(f"{checkpoint_filepath} not found. Cannot continue_previous_job without.", 47)
4889
4878
 
4890
4879
  with open(f'{get_current_run_folder()}/checkpoint_load_source', mode='w', encoding="utf-8") as f:
4891
4880
  print(f"Continuation from checkpoint {continue_previous_job}", file=f)
@@ -4923,17 +4912,13 @@ def get_experiment_parameters(_params: list) -> Tuple[AxClient, Union[list, dict
4923
4912
  new_metrics = [Metric(k) for k in arg_result_names if k not in ax_client.metric_names]
4924
4913
  ax_client.experiment.add_tracking_metrics(new_metrics)
4925
4914
  except AssertionError as error:
4926
- print_red(f"An error has occurred while creating the experiment (0): {error}. This can happen when you have invalid parameter constraints.")
4927
- my_exit(102)
4915
+ _fatal_error(f"An error has occurred while creating the experiment (0): {error}. This can happen when you have invalid parameter constraints.", 102)
4928
4916
  except ValueError as error:
4929
- print_red(f"An error has occurred while creating the experiment (1): {error}")
4930
- my_exit(49)
4917
+ _fatal_error(f"An error has occurred while creating the experiment (1): {error}", 49)
4931
4918
  except TypeError as error:
4932
- print_red(f"An error has occurred while creating the experiment (2): {error}. This is probably a bug in OmniOpt2.")
4933
- my_exit(49)
4919
+ _fatal_error(f"An error has occurred while creating the experiment (2): {error}. This is probably a bug in OmniOpt2.", 49)
4934
4920
  except ax.exceptions.core.UserInputError as error:
4935
- print_red(f"An error occurred while creating the experiment (3): {error}")
4936
- my_exit(49)
4921
+ _fatal_error(f"An error occurred while creating the experiment (3): {error}", 49)
4937
4922
 
4938
4923
  return ax_client, experiment_parameters, experiment_args, gpu_string, gpu_color
4939
4924
 
@@ -5001,8 +4986,7 @@ def parse_single_experiment_parameter_table(classic_params: Optional[Union[list,
5001
4986
 
5002
4987
  rows.append([str(param["name"]), get_type_short(_type), "", "", ", ".join(values), "", ""])
5003
4988
  else:
5004
- print_red(f"Type {_type} is not yet implemented in the overview table.")
5005
- my_exit(15)
4989
+ _fatal_error(f"Type {_type} is not yet implemented in the overview table.", 15)
5006
4990
 
5007
4991
  k = k + 1
5008
4992
 
@@ -5072,8 +5056,7 @@ def print_ax_parameter_constraints_table(experiment_args: dict) -> None:
5072
5056
  @beartype
5073
5057
  def print_result_names_overview_table() -> None:
5074
5058
  if not ax_client:
5075
- print_red("Tried to access ax_client in print_result_names_overview_table, but it failed, because the ax_client was not defined.")
5076
- my_exit(101)
5059
+ _fatal_error("Tried to access ax_client in print_result_names_overview_table, but it failed, because the ax_client was not defined.", 101)
5077
5060
 
5078
5061
  return None
5079
5062
 
@@ -5703,8 +5686,7 @@ def insert_job_into_ax_client(arm_params: dict, result: dict, new_job_type: str
5703
5686
  done_converting = False
5704
5687
 
5705
5688
  if ax_client is None or not ax_client:
5706
- print_red("insert_job_into_ax_client: ax_client was not defined where it should have been")
5707
- my_exit(101)
5689
+ _fatal_error("insert_job_into_ax_client: ax_client was not defined where it should have been", 101)
5708
5690
 
5709
5691
  while not done_converting:
5710
5692
  try:
@@ -6100,8 +6082,7 @@ def mark_trial_as_failed(trial_index: int, _trial: Any) -> None:
6100
6082
  print_debug(f"Marking trial {_trial} as failed")
6101
6083
  try:
6102
6084
  if not ax_client:
6103
- print_red("mark_trial_as_failed: ax_client is not defined")
6104
- my_exit(101)
6085
+ _fatal_error("mark_trial_as_failed: ax_client is not defined", 101)
6105
6086
 
6106
6087
  return None
6107
6088
 
@@ -6135,8 +6116,7 @@ def _finish_job_core_helper_complete_trial(trial_index: int, raw_result: dict) -
6135
6116
  ax_client.update_trial_data(trial_index=trial_index, raw_data=raw_result)
6136
6117
  print_debug(f"Completing trial: {trial_index} with result: {raw_result} after failure... Done!")
6137
6118
  else:
6138
- print_red(f"Error completing trial: {e}")
6139
- my_exit(234)
6119
+ _fatal_error(f"Error completing trial: {e}", 234)
6140
6120
 
6141
6121
  @beartype
6142
6122
  def _finish_job_core_helper_mark_success(_trial: ax.core.trial.Trial, result: Union[float, int, tuple]) -> None:
@@ -6195,8 +6175,7 @@ def finish_job_core(job: Any, trial_index: int, this_jobs_finished: int) -> int:
6195
6175
  else:
6196
6176
  _finish_job_core_helper_mark_failure(job, trial_index, _trial)
6197
6177
  else:
6198
- print_red("ax_client could not be found or used")
6199
- my_exit(9)
6178
+ _fatal_error("ax_client could not be found or used", 9)
6200
6179
 
6201
6180
  print_debug(f"finish_job_core: removing job {job}, trial_index: {trial_index}")
6202
6181
  global_vars["jobs"].remove((job, trial_index))
@@ -6269,8 +6248,8 @@ def finish_previous_jobs(new_msgs: List[str]) -> None:
6269
6248
  global JOBS_FINISHED
6270
6249
 
6271
6250
  if not ax_client:
6272
- print_red("ax_client failed")
6273
- my_exit(101)
6251
+ _fatal_error("ax_client failed", 101)
6252
+
6274
6253
  return None
6275
6254
 
6276
6255
  this_jobs_finished = 0
@@ -6425,8 +6404,7 @@ def orchestrator_start_trial(params_from_out_file: Union[dict, str], trial_index
6425
6404
  print_debug(f"orchestrator_start_trial: appending job {new_job} to global_vars['jobs'], trial_index: {trial_index}")
6426
6405
  global_vars["jobs"].append((new_job, trial_index))
6427
6406
  else:
6428
- print_red("executor or ax_client could not be found properly")
6429
- my_exit(9)
6407
+ _fatal_error("executor or ax_client could not be found properly", 9)
6430
6408
 
6431
6409
  @beartype
6432
6410
  def handle_exclude_node(stdout_path: str, hostname_from_out_file: Union[None, str]) -> None:
@@ -6496,8 +6474,7 @@ def _orchestrate(stdout_path: str, trial_index: int) -> None:
6496
6474
  if handler:
6497
6475
  handler()
6498
6476
  else:
6499
- print_red(f"Orchestrator: {behav} not yet implemented!")
6500
- my_exit(210)
6477
+ _fatal_error(f"Orchestrator: {behav} not yet implemented!", 210)
6501
6478
 
6502
6479
  @beartype
6503
6480
  def write_continue_run_uuid_to_file() -> None:
@@ -6537,14 +6514,12 @@ def execute_evaluation(_params: list) -> Optional[int]:
6537
6514
  print_debug(f"execute_evaluation({_params})")
6538
6515
  trial_index, parameters, trial_counter, next_nr_steps, phase = _params
6539
6516
  if not ax_client:
6540
- print_red("Failed to get ax_client")
6541
- my_exit(9)
6517
+ _fatal_error("Failed to get ax_client", 9)
6542
6518
 
6543
6519
  return None
6544
6520
 
6545
6521
  if not executor:
6546
- print_red("executor could not be found")
6547
- my_exit(9)
6522
+ _fatal_error("executor could not be found", 9)
6548
6523
 
6549
6524
  return None
6550
6525
 
@@ -6608,8 +6583,7 @@ def exclude_defective_nodes() -> None:
6608
6583
  if executor:
6609
6584
  executor.update_parameters(exclude=excluded_string)
6610
6585
  else:
6611
- print_red("executor could not be found")
6612
- my_exit(9)
6586
+ _fatal_error("executor could not be found", 9)
6613
6587
 
6614
6588
  @beartype
6615
6589
  def handle_failed_job(error: Union[None, Exception, str], trial_index: int, new_job: Optional[Job]) -> None:
@@ -6638,8 +6612,7 @@ def cancel_failed_job(trial_index: int, new_job: Job) -> None:
6638
6612
  if ax_client:
6639
6613
  ax_client.log_trial_failure(trial_index=trial_index)
6640
6614
  else:
6641
- print_red("ax_client not defined")
6642
- my_exit(101)
6615
+ _fatal_error("ax_client not defined", 101)
6643
6616
  except Exception as e:
6644
6617
  print(f"ERROR in line {get_line_info()}: {e}")
6645
6618
  new_job.cancel()
@@ -6807,14 +6780,11 @@ def has_no_post_generation_constraints_or_matches_constraints(_post_generation_c
6807
6780
  @beartype
6808
6781
  def die_101_if_no_ax_client_or_experiment_or_gs() -> None:
6809
6782
  if ax_client is None:
6810
- print_red("Error: ax_client is not defined")
6811
- my_exit(101)
6783
+ _fatal_error("Error: ax_client is not defined", 101)
6812
6784
  elif ax_client.experiment is None:
6813
- print_red("Error: ax_client.experiment is not defined")
6814
- my_exit(101)
6785
+ _fatal_error("Error: ax_client.experiment is not defined", 101)
6815
6786
  elif global_gs is None:
6816
- print_red("Error: global_gs is not defined")
6817
- my_exit(101)
6787
+ _fatal_error("Error: global_gs is not defined", 101)
6818
6788
 
6819
6789
  @beartype
6820
6790
  def get_batched_arms(nr_of_jobs_to_get: int) -> list:
@@ -6822,8 +6792,7 @@ def get_batched_arms(nr_of_jobs_to_get: int) -> list:
6822
6792
  attempts = 0
6823
6793
 
6824
6794
  if global_gs is None:
6825
- print_red("Global generation strategy is not set. This is a bug in OmniOpt2.")
6826
- my_exit(107)
6795
+ _fatal_error("Global generation strategy is not set. This is a bug in OmniOpt2.", 107)
6827
6796
 
6828
6797
  return []
6829
6798
 
@@ -6855,101 +6824,140 @@ def get_batched_arms(nr_of_jobs_to_get: int) -> list:
6855
6824
 
6856
6825
  return batched_arms
6857
6826
 
6858
- @disable_logs
6859
6827
  @beartype
6860
6828
  def _fetch_next_trials(nr_of_jobs_to_get: int, recursion: bool = False) -> Optional[Tuple[Dict[int, Any], bool]]:
6861
- global gotten_jobs
6829
+ die_101_if_no_ax_client_or_experiment_or_gs()
6862
6830
 
6863
6831
  if not ax_client:
6864
- print_red("ax_client was not defined")
6865
- my_exit(9)
6832
+ _fatal_error("ax_client was not defined", 9)
6866
6833
 
6867
6834
  if global_gs is None:
6868
- print_red("Global generation strategy is not set. This is a bug in OmniOpt2.")
6869
- my_exit(107)
6835
+ _fatal_error("Global generation strategy is not set. This is a bug in OmniOpt2.", 107)
6870
6836
 
6871
- return None
6837
+ return _generate_trials(nr_of_jobs_to_get, recursion)
6872
6838
 
6873
- trials_dict: dict = {}
6839
+ @beartype
6840
+ def _generate_trials(n: int, recursion: bool) -> Tuple[Dict[int, Any], bool]:
6841
+ global gotten_jobs
6874
6842
 
6843
+ trials_dict: Dict[int, Any] = {}
6875
6844
  trial_durations: List[float] = []
6876
6845
 
6877
- die_101_if_no_ax_client_or_experiment_or_gs()
6846
+ start_time = time.time()
6847
+ cnt = 0
6848
+ retries = 0
6849
+ max_retries = args.max_abandoned_retrial
6878
6850
 
6879
6851
  try:
6880
- all_start_time = time.time()
6852
+ while cnt < n and retries < max_retries:
6853
+ for arm in get_batched_arms(n - cnt):
6854
+ if cnt >= n:
6855
+ break
6881
6856
 
6882
- batched_arms = get_batched_arms(nr_of_jobs_to_get)
6857
+ print_debug(f"Fetching trial {cnt + 1}/{n}...")
6858
+ progressbar_description([_get_trials_message(cnt + 1, n, trial_durations)])
6883
6859
 
6884
- cnt = 0
6860
+ try:
6861
+ result = _create_and_handle_trial(arm)
6862
+ if result is not None:
6863
+ trial_index, trial_duration, trial_successful = result
6885
6864
 
6886
- for k in range(len(batched_arms)):
6887
- print_debug(f"_fetch_next_trials: fetching trial {k + 1}/{nr_of_jobs_to_get}...")
6888
- progressbar_description([_get_trials_message(k + 1, nr_of_jobs_to_get, trial_durations)])
6865
+ except TrialRejected as e:
6866
+ print_debug(f"Trial rejected: {e}")
6867
+ retries += 1
6868
+ continue
6889
6869
 
6890
- start_time = time.time()
6870
+ trial_durations.append(trial_duration)
6891
6871
 
6892
- trial_index = ax_client.experiment.num_trials
6872
+ if trial_successful:
6873
+ cnt += 1
6874
+ trials_dict[trial_index] = arm.parameters
6875
+ gotten_jobs += 1
6893
6876
 
6894
- arm = batched_arms[k]
6895
- generator_run = GeneratorRun(
6896
- arms=[arm],
6897
- generation_node_name=global_gs.current_node_name
6898
- )
6877
+ return _finalize_generation(trials_dict, cnt, n, start_time)
6899
6878
 
6900
- trial = ax_client.experiment.new_trial(generator_run)
6901
- params = arm.parameters
6879
+ except Exception as e:
6880
+ return _handle_generation_failure(e, n, recursion)
6902
6881
 
6903
- trials_dict[trial_index] = params
6904
- gotten_jobs = gotten_jobs + 1
6882
+ class TrialRejected(Exception):
6883
+ pass
6905
6884
 
6906
- print_debug(f"_fetch_next_trials: got trial {k + 1}/{nr_of_jobs_to_get} (trial_index: {trial_index} [gotten_jobs: {gotten_jobs}, k: {k}])")
6907
- end_time = time.time()
6885
+ @beartype
6886
+ def _create_and_handle_trial(arm: Any) -> Optional[Tuple[int, float, bool]]:
6887
+ start = time.time()
6908
6888
 
6909
- trial_durations.append(float(end_time - start_time))
6889
+ if global_gs is None:
6890
+ _fatal_error("global_gs is not set", 107)
6910
6891
 
6911
- if not has_no_post_generation_constraints_or_matches_constraints(post_generation_constraints, params):
6912
- print_debug(f"Marking trial as abandoned since it doesn't fit a Post-Generation-constraint: {params}")
6913
- trial.mark_abandoned()
6914
- abandoned_trial_indices.append(trial_index)
6915
- else:
6916
- trial.mark_running(no_runner_required=True)
6892
+ return None
6917
6893
 
6918
- cnt = cnt + 1
6894
+ _current_node_name = global_gs.current_node_name
6919
6895
 
6920
- all_end_time = time.time()
6921
- all_time = float(all_end_time - all_start_time)
6896
+ trial_index = ax_client.experiment.num_trials
6897
+ generator_run = GeneratorRun(
6898
+ arms=[arm],
6899
+ generation_node_name=_current_node_name
6900
+ )
6922
6901
 
6923
- log_gen_times.append(all_time)
6924
- log_nr_gen_jobs.append(cnt)
6902
+ trial = ax_client.experiment.new_trial(generator_run)
6903
+ params = arm.parameters
6925
6904
 
6926
- if cnt:
6927
- progressbar_description([f"requested {nr_of_jobs_to_get} jobs, got {cnt}, {all_time / cnt} s/job"])
6928
- else:
6929
- progressbar_description([f"requested {nr_of_jobs_to_get} jobs, got {cnt}"])
6905
+ if not has_no_post_generation_constraints_or_matches_constraints(post_generation_constraints, params):
6906
+ print_debug(f"Trial {trial_index} does not meet post-generation constraints. Marking abandoned.")
6907
+ trial.mark_abandoned()
6908
+ abandoned_trial_indices.append(trial_index)
6909
+ raise TrialRejected("Post-generation constraints not met.")
6930
6910
 
6931
- return trials_dict, False
6932
- except np.linalg.LinAlgError as e:
6933
- _handle_linalg_error(e)
6934
- my_exit(242)
6935
- except (ax.exceptions.core.SearchSpaceExhausted, ax.exceptions.generation_strategy.GenerationStrategyRepeatedPoints, ax.exceptions.generation_strategy.MaxParallelismReachedException) as e:
6936
- if str(e) not in error_8_saved:
6937
- if recursion is False and args.revert_to_random_when_seemingly_exhausted:
6938
- print_yellow(f"\n⚠Error 8: {e} From now (done jobs: {count_done_jobs()}) on, random points will be generated.")
6939
- else:
6940
- print_red(f"\n⚠Error 8: {e}")
6911
+ trial.mark_running(no_runner_required=True)
6912
+ end = time.time()
6913
+ return trial_index, float(end - start), True
6941
6914
 
6942
- error_8_saved.append(str(e))
6915
+ @beartype
6916
+ def _finalize_generation(trials_dict: Dict[int, Any], cnt: int, requested: int, start_time: float) -> Tuple[Dict[int, Any], bool]:
6917
+ total_time = time.time() - start_time
6943
6918
 
6944
- if recursion is False and args.revert_to_random_when_seemingly_exhausted:
6945
- print_debug("The search space seems exhausted. Generating random points from here on.")
6919
+ log_gen_times.append(total_time)
6920
+ log_nr_gen_jobs.append(cnt)
6946
6921
 
6947
- set_global_gs_to_random()
6922
+ avg_time_str = f"{total_time / cnt:.2f} s/job" if cnt else "n/a"
6923
+ progressbar_description([f"requested {requested} jobs, got {cnt}, {avg_time_str}"])
6924
+
6925
+ return trials_dict, False
6948
6926
 
6949
- return _fetch_next_trials(nr_of_jobs_to_get, True)
6927
+ @beartype
6928
+ def _handle_generation_failure(
6929
+ e: Exception,
6930
+ requested: int,
6931
+ recursion: bool
6932
+ ) -> Tuple[Dict[int, Any], bool]:
6933
+ if isinstance(e, np.linalg.LinAlgError):
6934
+ _handle_linalg_error(e)
6935
+ my_exit(242)
6936
+
6937
+ elif isinstance(e, (
6938
+ ax.exceptions.core.SearchSpaceExhausted,
6939
+ ax.exceptions.generation_strategy.GenerationStrategyRepeatedPoints,
6940
+ ax.exceptions.generation_strategy.MaxParallelismReachedException
6941
+ )):
6942
+ msg = str(e)
6943
+ if msg not in error_8_saved:
6944
+ _print_exhaustion_warning(e, recursion)
6945
+ error_8_saved.append(msg)
6946
+
6947
+ if not recursion and args.revert_to_random_when_seemingly_exhausted:
6948
+ print_debug("Switching to random search strategy.")
6949
+ set_global_gs_to_random()
6950
+ return _fetch_next_trials(requested, True)
6950
6951
 
6951
6952
  return {}, True
6952
6953
 
6954
+ @beartype
6955
+ def _print_exhaustion_warning(e: Exception, recursion: bool) -> None:
6956
+ if not recursion and args.revert_to_random_when_seemingly_exhausted:
6957
+ print_yellow(f"\n⚠Error 8: {e} From now (done jobs: {count_done_jobs()}) on, random points will be generated.")
6958
+ else:
6959
+ print_red(f"\n⚠Error 8: {e}")
6960
+
6953
6961
  @beartype
6954
6962
  def get_model_kwargs() -> dict:
6955
6963
  if 'Cont_X_trans_Y_trans' in args.transforms:
@@ -7376,8 +7384,7 @@ def parse_generation_strategy_string(gen_strat_str: str) -> Tuple[list, int]:
7376
7384
  matching_model = get_matching_model_name(model_name)
7377
7385
 
7378
7386
  if matching_model in ["RANDOMFOREST", "EXTERNAL_GENERATOR"]:
7379
- print_red(f"Model {matching_model} is not valid for custom generation strategy.")
7380
- my_exit(56)
7387
+ _fatal_error(f"Model {matching_model} is not valid for custom generation strategy.", 56)
7381
7388
 
7382
7389
  if matching_model:
7383
7390
  gen_strat_list.append({matching_model: nr})
@@ -7412,8 +7419,7 @@ def write_state_file(name: str, var: str) -> None:
7412
7419
  file_path = f"{get_current_run_folder()}/state_files/{name}"
7413
7420
 
7414
7421
  if os.path.isdir(file_path):
7415
- print_red(f"{file_path} is a dir. Must be a file.")
7416
- my_exit(246)
7422
+ _fatal_error(f"{file_path} is a dir. Must be a file.", 246)
7417
7423
 
7418
7424
  makedirs(os.path.dirname(file_path))
7419
7425
 
@@ -7460,8 +7466,7 @@ def continue_not_supported_on_custom_generation_strategy() -> None:
7460
7466
  generation_strategy_file = f"{args.continue_previous_job}/state_files/custom_generation_strategy"
7461
7467
 
7462
7468
  if os.path.exists(generation_strategy_file):
7463
- print_red("Trying to continue a job which was started with --generation_strategy. This is currently not possible.")
7464
- my_exit(247)
7469
+ _fatal_error("Trying to continue a job which was started with --generation_strategy. This is currently not possible.", 247)
7465
7470
 
7466
7471
  @beartype
7467
7472
  def get_step_name(model_name: str, nr: int) -> str:
@@ -7503,8 +7508,7 @@ def get_torch_device_str() -> str:
7503
7508
  def create_node(model_name: str, threshold: int, next_model_name: Optional[str]) -> Union[RandomForestGenerationNode, GenerationNode]:
7504
7509
  if model_name == "RANDOMFOREST":
7505
7510
  if len(arg_result_names) != 1:
7506
- print_red("Currently, RANDOMFOREST does not support Multi-Objective-Optimization")
7507
- my_exit(251)
7511
+ _fatal_error("Currently, RANDOMFOREST does not support Multi-Objective-Optimization", 251)
7508
7512
 
7509
7513
  node = RandomForestGenerationNode(
7510
7514
  num_samples=threshold,
@@ -7518,8 +7522,7 @@ def create_node(model_name: str, threshold: int, next_model_name: Optional[str])
7518
7522
 
7519
7523
  if model_name == "TPE":
7520
7524
  if len(arg_result_names) != 1:
7521
- print_red(f"Has {len(arg_result_names)} results. TPE currently only supports single-objective-optimization.")
7522
- my_exit(108)
7525
+ _fatal_error(f"Has {len(arg_result_names)} results. TPE currently only supports single-objective-optimization.", 108)
7523
7526
 
7524
7527
  node = ExternalProgramGenerationNode(f"python3 {script_dir}/.tpe.py", "TPE")
7525
7528
 
@@ -7532,8 +7535,7 @@ def create_node(model_name: str, threshold: int, next_model_name: Optional[str])
7532
7535
 
7533
7536
  if model_name == "EXTERNAL_GENERATOR":
7534
7537
  if args.external_generator is None or args.external_generator == "":
7535
- print_red("--external_generator is missing. Cannot create points for EXTERNAL_GENERATOR without it.")
7536
- my_exit(204)
7538
+ _fatal_error("--external_generator is missing. Cannot create points for EXTERNAL_GENERATOR without it.", 204)
7537
7539
 
7538
7540
  node = ExternalProgramGenerationNode(args.external_generator)
7539
7541
 
@@ -7770,8 +7772,7 @@ def handle_exceptions_create_and_execute_next_runs(e: Exception) -> int:
7770
7772
  print_red(f"Error 2: {e}")
7771
7773
  elif isinstance(e, ax.exceptions.core.DataRequiredError):
7772
7774
  if "transform requires non-empty data" in str(e) and args.num_random_steps == 0:
7773
- print_red(f"Error 3: {e} Increase --num_random_steps to at least 1 to continue.")
7774
- my_exit(233)
7775
+ _fatal_error(f"Error 3: {e} Increase --num_random_steps to at least 1 to continue.", 233)
7775
7776
  else:
7776
7777
  print_debug(f"Error 4: {e}")
7777
7778
  elif isinstance(e, RuntimeError):
@@ -7922,8 +7923,7 @@ executor.update_parameters(
7922
7923
  if args.exclude:
7923
7924
  print_yellow(f"Excluding the following nodes: {args.exclude}")
7924
7925
  else:
7925
- print_red("executor could not be found")
7926
- my_exit(9)
7926
+ _fatal_error("executor could not be found", 9)
7927
7927
 
7928
7928
  @beartype
7929
7929
  def set_global_executor() -> None:
@@ -8178,8 +8178,7 @@ def set_orchestrator() -> None:
8178
8178
  def check_if_has_random_steps() -> None:
8179
8179
  with console.status("[bold green]Checking if has random steps..."):
8180
8180
  if (not args.continue_previous_job and "--continue" not in sys.argv) and (args.num_random_steps == 0 or not args.num_random_steps) and args.model not in ["EXTERNAL_GENERATOR", "SOBOL", "PSEUDORANDOM"]:
8181
- print_red("You have no random steps set. This is only allowed in continued jobs. To start, you need either some random steps, or a continued run.")
8182
- my_exit(233)
8181
+ _fatal_error("You have no random steps set. This is only allowed in continued jobs. To start, you need either some random steps, or a continued run.", 233)
8183
8182
 
8184
8183
  @beartype
8185
8184
  def add_exclude_to_defective_nodes() -> None:
@@ -8192,8 +8191,7 @@ def add_exclude_to_defective_nodes() -> None:
8192
8191
  @beartype
8193
8192
  def check_max_eval(_max_eval: int) -> None:
8194
8193
  if not _max_eval:
8195
- print_red("--max_eval needs to be set!")
8196
- my_exit(19)
8194
+ _fatal_error("--max_eval needs to be set!", 19)
8197
8195
 
8198
8196
  @beartype
8199
8197
  def parse_parameters() -> Any:
@@ -8648,23 +8646,19 @@ def get_result_minimize_flag(path_to_calculate: str, resname: str) -> bool:
8648
8646
  result_min_max_path = os.path.join(path_to_calculate, "result_min_max.txt")
8649
8647
 
8650
8648
  if not os.path.isdir(path_to_calculate):
8651
- print_red(f"Error: Directory '{path_to_calculate}' does not exist.")
8652
- my_exit(24)
8649
+ _fatal_error(f"Error: Directory '{path_to_calculate}' does not exist.", 24)
8653
8650
 
8654
8651
  if not os.path.isfile(result_names_path) or not os.path.isfile(result_min_max_path):
8655
- print_red(f"Error: Missing 'result_names.txt' or 'result_min_max.txt' in '{path_to_calculate}'.")
8656
- my_exit(24)
8652
+ _fatal_error(f"Error: Missing 'result_names.txt' or 'result_min_max.txt' in '{path_to_calculate}'.", 24)
8657
8653
 
8658
8654
  try:
8659
8655
  with open(result_names_path, "r", encoding="utf-8") as f:
8660
8656
  names = [line.strip() for line in f]
8661
8657
  except Exception as e:
8662
- print_red(f"Error: Failed to read 'result_names.txt': {e}")
8663
- my_exit(24)
8658
+ _fatal_error(f"Error: Failed to read 'result_names.txt': {e}", 24)
8664
8659
 
8665
8660
  if resname not in names:
8666
- print_red(f"Error: Result name '{resname}' not found in 'result_names.txt'.")
8667
- my_exit(24)
8661
+ _fatal_error(f"Error: Result name '{resname}' not found in 'result_names.txt'.", 24)
8668
8662
 
8669
8663
  index = names.index(resname)
8670
8664
 
@@ -8672,12 +8666,10 @@ def get_result_minimize_flag(path_to_calculate: str, resname: str) -> bool:
8672
8666
  with open(result_min_max_path, "r", encoding="utf-8") as f:
8673
8667
  minmax = [line.strip().lower() for line in f]
8674
8668
  except Exception as e:
8675
- print_red(f"Error: Failed to read 'result_min_max.txt': {e}")
8676
- my_exit(24)
8669
+ _fatal_error(f"Error: Failed to read 'result_min_max.txt': {e}", 24)
8677
8670
 
8678
8671
  if index >= len(minmax):
8679
- print_red(f"Error: Not enough entries in 'result_min_max.txt' for index {index}.")
8680
- my_exit(24)
8672
+ _fatal_error(f"Error: Not enough entries in 'result_min_max.txt' for index {index}.", 24)
8681
8673
 
8682
8674
  return minmax[index] == "min"
8683
8675
 
@@ -9406,8 +9398,7 @@ def complex_tests(_program_name: str, wanted_stderr: str, wanted_exit_code: int,
9406
9398
  program_path: str = f"./.tests/test_wronggoing_stuff.bin/bin/{_program_name}"
9407
9399
 
9408
9400
  if not os.path.exists(program_path):
9409
- print_red(f"Program path {program_path} not found!")
9410
- my_exit(18)
9401
+ _fatal_error(f"Program path {program_path} not found!", 18)
9411
9402
 
9412
9403
  program_path_with_program: str = f"{program_path}"
9413
9404