omniopt2 7094__py3-none-any.whl → 7099__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- .omniopt.py +238 -247
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt.py +238 -247
- {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/METADATA +1 -1
- {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/RECORD +35 -35
- omniopt2.egg-info/PKG-INFO +1 -1
- pyproject.toml +1 -1
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.colorfunctions.sh +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.general.sh +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.helpers.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_cpu_ram_usage.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_general.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_gpu_usage.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_kde.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_scatter.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_scatter_generation_method.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_scatter_hex.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_time_and_exit_code.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_trial_index_result.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.omniopt_plot_worker.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.random_generator.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.shellscript_functions +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/.tpe.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/LICENSE +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/apt-dependencies.txt +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_docker +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_evaluate +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_plot +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/omniopt_share +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/requirements.txt +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/setup.py +0 -0
- {omniopt2-7094.data → omniopt2-7099.data}/data/bin/test_requirements.txt +0 -0
- {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/WHEEL +0 -0
- {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/licenses/LICENSE +0 -0
- {omniopt2-7094.dist-info → omniopt2-7099.dist-info}/top_level.txt +0 -0
@@ -565,6 +565,7 @@ class ConfigLoader:
|
|
565
565
|
verbose_break_run_search_table: bool
|
566
566
|
send_anonymized_usage_stats: bool
|
567
567
|
max_failed_jobs: Optional[int]
|
568
|
+
max_abandoned_retrial: int
|
568
569
|
show_ram_every_n_seconds: int
|
569
570
|
config_toml: Optional[str]
|
570
571
|
config_json: Optional[str]
|
@@ -653,6 +654,7 @@ class ConfigLoader:
|
|
653
654
|
optional.add_argument('--calculate_pareto_front_of_job', help='This can be used to calculate a pareto-front for a multi-objective job that previously has results, but has been cancelled, and has no pareto-front (yet)', type=str, nargs='+', default=[])
|
654
655
|
optional.add_argument('--show_generate_time_table', help='Generate a table at the end, showing how much time was spent trying to generate new points', action='store_true', default=False)
|
655
656
|
optional.add_argument('--force_choice_for_ranges', help='Force float ranges to be converted to choice', action='store_true', default=False)
|
657
|
+
optional.add_argument('--max_abandoned_retrial', help='Maximum number retrials to get when a job is abandoned post-generation', default=20, type=int)
|
656
658
|
|
657
659
|
speed.add_argument('--dont_warm_start_refitting', help='Do not keep Model weights, thus, refit for every generator (may be more accurate, but slower)', action='store_true', default=False)
|
658
660
|
speed.add_argument('--refit_on_cv', help='Refit on Cross-Validation (helps in accuracy, but makes generating new points slower)', action='store_true', default=False)
|
@@ -794,9 +796,13 @@ if args.seed is not None:
|
|
794
796
|
|
795
797
|
set_rng_seed(args.seed)
|
796
798
|
|
799
|
+
@beartype
|
800
|
+
def _fatal_error(message: str, code: int) -> None:
|
801
|
+
print_red(message)
|
802
|
+
my_exit(code)
|
803
|
+
|
797
804
|
if args.max_eval is None and args.generation_strategy is None and args.continue_previous_job is None and (not args.calculate_pareto_front_of_job or len(args.calculate_pareto_front_of_job) == 0):
|
798
|
-
|
799
|
-
my_exit(104)
|
805
|
+
_fatal_error("Either --max_eval or --generation_strategy must be set.", 104)
|
800
806
|
|
801
807
|
arg_result_names = []
|
802
808
|
arg_result_min_or_max = []
|
@@ -1273,8 +1279,7 @@ class ExternalProgramGenerationNode(ExternalGenerationNode):
|
|
1273
1279
|
if param_type == ParameterType.STRING:
|
1274
1280
|
return "STRING"
|
1275
1281
|
|
1276
|
-
|
1277
|
-
my_exit(33)
|
1282
|
+
_fatal_error(f"Unknown data type {param_type}", 33)
|
1278
1283
|
|
1279
1284
|
return ""
|
1280
1285
|
|
@@ -1303,8 +1308,7 @@ class ExternalProgramGenerationNode(ExternalGenerationNode):
|
|
1303
1308
|
"values": param.values
|
1304
1309
|
}
|
1305
1310
|
else:
|
1306
|
-
|
1307
|
-
my_exit(15)
|
1311
|
+
_fatal_error(f"Unknown parameter type: {param}", 15)
|
1308
1312
|
|
1309
1313
|
return serialized
|
1310
1314
|
|
@@ -1583,8 +1587,7 @@ if isinstance(args.num_parallel_jobs, int) or helpers.looks_like_int(args.num_pa
|
|
1583
1587
|
num_parallel_jobs = int(args.num_parallel_jobs)
|
1584
1588
|
|
1585
1589
|
if num_parallel_jobs <= 0:
|
1586
|
-
|
1587
|
-
my_exit(106)
|
1590
|
+
_fatal_error(f"--num_parallel_jobs must be 1 or larger, is {num_parallel_jobs}", 106)
|
1588
1591
|
|
1589
1592
|
class SearchSpaceExhausted (Exception):
|
1590
1593
|
pass
|
@@ -1836,11 +1839,9 @@ def log_nr_of_workers() -> None:
|
|
1836
1839
|
with open(logfile_nr_workers, mode='a+', encoding="utf-8") as f:
|
1837
1840
|
f.write(str(nr_current_workers) + "\n")
|
1838
1841
|
except FileNotFoundError:
|
1839
|
-
|
1840
|
-
my_exit(99)
|
1842
|
+
_fatal_error(f"It seems like the folder for writing {logfile_nr_workers} was deleted during the run. Cannot continue.", 99)
|
1841
1843
|
except OSError as e:
|
1842
|
-
|
1843
|
-
my_exit(199)
|
1844
|
+
_fatal_error(f"Tried writing log_nr_of_workers to file {logfile_nr_workers}, but failed with error: {e}. This may mean that the file system you are running on is instable. OmniOpt2 probably cannot do anything about it.", 199)
|
1844
1845
|
|
1845
1846
|
return None
|
1846
1847
|
|
@@ -2015,12 +2016,10 @@ else:
|
|
2015
2016
|
if os.path.exists(prev_job_file):
|
2016
2017
|
global_vars["joined_run_program"] = get_file_as_string(prev_job_file)
|
2017
2018
|
else:
|
2018
|
-
|
2019
|
-
my_exit(44)
|
2019
|
+
_fatal_error(f"The previous job file {prev_job_file} could not be found. You may forgot to add the run number at the end.", 44)
|
2020
2020
|
|
2021
2021
|
if not args.tests and len(global_vars["joined_run_program"]) == 0 and not args.calculate_pareto_front_of_job:
|
2022
|
-
|
2023
|
-
my_exit(19)
|
2022
|
+
_fatal_error("--run_program was empty", 19)
|
2024
2023
|
|
2025
2024
|
global_vars["experiment_name"] = args.experiment_name
|
2026
2025
|
|
@@ -2029,20 +2028,17 @@ def load_global_vars(_file: str) -> None:
|
|
2029
2028
|
global global_vars
|
2030
2029
|
|
2031
2030
|
if not os.path.exists(_file):
|
2032
|
-
|
2033
|
-
my_exit(44)
|
2031
|
+
_fatal_error(f"You've tried to continue a non-existing job: {_file}", 44)
|
2034
2032
|
try:
|
2035
2033
|
with open(_file, encoding="utf-8") as f:
|
2036
2034
|
global_vars = json.load(f)
|
2037
2035
|
except Exception as e:
|
2038
|
-
|
2039
|
-
my_exit(44)
|
2036
|
+
_fatal_error(f"Error while loading old global_vars: {e}, trying to load {_file}", 44)
|
2040
2037
|
|
2041
2038
|
@beartype
|
2042
2039
|
def load_or_exit(filepath: str, error_msg: str, exit_code: int) -> None:
|
2043
2040
|
if not os.path.exists(filepath):
|
2044
|
-
|
2045
|
-
my_exit(exit_code)
|
2041
|
+
_fatal_error(error_msg, exit_code)
|
2046
2042
|
|
2047
2043
|
@beartype
|
2048
2044
|
def get_file_content_or_exit(filepath: str, error_msg: str, exit_code: int) -> str:
|
@@ -2052,8 +2048,7 @@ def get_file_content_or_exit(filepath: str, error_msg: str, exit_code: int) -> s
|
|
2052
2048
|
@beartype
|
2053
2049
|
def check_param_or_exit(param: Any, error_msg: str, exit_code: int) -> None:
|
2054
2050
|
if param is None:
|
2055
|
-
|
2056
|
-
my_exit(exit_code)
|
2051
|
+
_fatal_error(error_msg, exit_code)
|
2057
2052
|
|
2058
2053
|
@beartype
|
2059
2054
|
def check_continue_previous_job(continue_previous_job: Optional[str]) -> dict:
|
@@ -2103,8 +2098,7 @@ def load_time_or_exit(_args: Any) -> None:
|
|
2103
2098
|
print_yellow(f"Time-setting: The contents of {time_file} do not contain a single number")
|
2104
2099
|
else:
|
2105
2100
|
if len(args.calculate_pareto_front_of_job) == 0:
|
2106
|
-
|
2107
|
-
my_exit(19)
|
2101
|
+
_fatal_error("Missing --time parameter. Cannot continue.", 19)
|
2108
2102
|
|
2109
2103
|
@beartype
|
2110
2104
|
def load_mem_gb_or_exit(_args: Any) -> Optional[int]:
|
@@ -2122,8 +2116,7 @@ def load_mem_gb_or_exit(_args: Any) -> Optional[int]:
|
|
2122
2116
|
print_yellow(f"mem_gb-setting: The contents of {mem_gb_file} do not contain a single number")
|
2123
2117
|
return None
|
2124
2118
|
|
2125
|
-
|
2126
|
-
my_exit(19)
|
2119
|
+
_fatal_error("--mem_gb needs to be set", 19)
|
2127
2120
|
|
2128
2121
|
return None
|
2129
2122
|
|
@@ -2145,8 +2138,7 @@ def load_max_eval_or_exit(_args: Any) -> None:
|
|
2145
2138
|
if _args.max_eval:
|
2146
2139
|
set_max_eval(_args.max_eval)
|
2147
2140
|
if _args.max_eval <= 0:
|
2148
|
-
|
2149
|
-
my_exit(19)
|
2141
|
+
_fatal_error("--max_eval must be larger than 0", 19)
|
2150
2142
|
elif _args.continue_previous_job:
|
2151
2143
|
max_eval_file = f"{_args.continue_previous_job}/state_files/max_eval"
|
2152
2144
|
max_eval_content = get_file_content_or_exit(max_eval_file, f"neither --max_eval nor file {max_eval_file} found", 19)
|
@@ -2285,8 +2277,7 @@ if not SYSTEM_HAS_SBATCH:
|
|
2285
2277
|
num_parallel_jobs = 1
|
2286
2278
|
|
2287
2279
|
if SYSTEM_HAS_SBATCH and not args.force_local_execution and args.raw_samples < args.num_parallel_jobs:
|
2288
|
-
|
2289
|
-
my_exit(48)
|
2280
|
+
_fatal_error(f"Has --raw_samples={args.raw_samples}, but --num_parallel_jobs={args.num_parallel_jobs}. Cannot continue, since --raw_samples must be larger or equal to --num_parallel_jobs.", 48)
|
2290
2281
|
|
2291
2282
|
@beartype
|
2292
2283
|
def save_global_vars() -> None:
|
@@ -2426,14 +2417,12 @@ def get_bounds(this_args: Union[str, list], j: int) -> Tuple[float, float]:
|
|
2426
2417
|
try:
|
2427
2418
|
lower_bound = float(this_args[j + 2])
|
2428
2419
|
except Exception:
|
2429
|
-
|
2430
|
-
my_exit(181)
|
2420
|
+
_fatal_error(f"\n{this_args[j + 2]} is not a number", 181)
|
2431
2421
|
|
2432
2422
|
try:
|
2433
2423
|
upper_bound = float(this_args[j + 3])
|
2434
2424
|
except Exception:
|
2435
|
-
|
2436
|
-
my_exit(181)
|
2425
|
+
_fatal_error(f"\n{this_args[j + 3]} is not a number", 181)
|
2437
2426
|
|
2438
2427
|
return lower_bound, upper_bound
|
2439
2428
|
|
@@ -2482,8 +2471,7 @@ def create_range_param(name: str, lower_bound: Union[float, int], upper_bound: U
|
|
2482
2471
|
@beartype
|
2483
2472
|
def handle_grid_search(name: Union[list, str], lower_bound: Union[float, int], upper_bound: Union[float, int], value_type: str) -> dict:
|
2484
2473
|
if lower_bound is None or upper_bound is None:
|
2485
|
-
|
2486
|
-
my_exit(91)
|
2474
|
+
_fatal_error("handle_grid_search: lower_bound or upper_bound is None", 91)
|
2487
2475
|
|
2488
2476
|
return {}
|
2489
2477
|
|
@@ -2587,8 +2575,7 @@ def validate_value_type(value_type: str) -> None:
|
|
2587
2575
|
@beartype
|
2588
2576
|
def parse_fixed_param(classic_params: list, params: list, j: int, this_args: Union[str, list], name: Union[list, str], search_space_reduction_warning: bool) -> Tuple[int, list, list, bool]:
|
2589
2577
|
if len(this_args) != 3:
|
2590
|
-
|
2591
|
-
my_exit(181)
|
2578
|
+
_fatal_error("⚠ --parameter for type fixed must have 3 parameters: <NAME> fixed <VALUE>", 181)
|
2592
2579
|
|
2593
2580
|
value = this_args[j + 2]
|
2594
2581
|
|
@@ -2611,8 +2598,7 @@ def parse_fixed_param(classic_params: list, params: list, j: int, this_args: Uni
|
|
2611
2598
|
@beartype
|
2612
2599
|
def parse_choice_param(classic_params: list, params: list, j: int, this_args: Union[str, list], name: Union[list, str], search_space_reduction_warning: bool) -> Tuple[int, list, list, bool]:
|
2613
2600
|
if len(this_args) != 3:
|
2614
|
-
|
2615
|
-
my_exit(181)
|
2601
|
+
_fatal_error("⚠ --parameter for type choice must have 3 parameters: <NAME> choice <VALUE,VALUE,VALUE,...>", 181)
|
2616
2602
|
|
2617
2603
|
values = re.split(r'\s*,\s*', str(this_args[j + 2]))
|
2618
2604
|
|
@@ -2635,67 +2621,84 @@ def parse_choice_param(classic_params: list, params: list, j: int, this_args: Un
|
|
2635
2621
|
return j, params, classic_params, search_space_reduction_warning
|
2636
2622
|
|
2637
2623
|
@beartype
|
2638
|
-
def
|
2639
|
-
|
2640
|
-
|
2641
|
-
|
2624
|
+
def _parse_experiment_parameters_validate_name(name: str, invalid_names: List[str], param_names: List[str]) -> None:
|
2625
|
+
if name in invalid_names:
|
2626
|
+
_fatal_error(f"\n⚠ Name for argument is invalid: {name}. Invalid names are: {', '.join(invalid_names)}", 181)
|
2627
|
+
if name in param_names:
|
2628
|
+
_fatal_error(f"\n⚠ Parameter name '{name}' is not unique. Names for parameters must be unique!", 181)
|
2642
2629
|
|
2643
|
-
|
2630
|
+
@beartype
|
2631
|
+
def _parse_experiment_parameters_get_param_type(this_args: List[Any], j: int) -> str:
|
2632
|
+
try:
|
2633
|
+
return this_args[j + 1]
|
2634
|
+
except Exception:
|
2635
|
+
_fatal_error("Not enough arguments for --parameter", 181)
|
2644
2636
|
|
2645
|
-
|
2637
|
+
return ""
|
2646
2638
|
|
2647
|
-
|
2648
|
-
|
2639
|
+
@beartype
|
2640
|
+
def _parse_experiment_parameters_parse_this_args(
|
2641
|
+
this_args: List[Any],
|
2642
|
+
invalid_names: List[str],
|
2643
|
+
param_names: List[str],
|
2644
|
+
classic_params: List[Dict[str, Any]],
|
2645
|
+
params: List[Dict[str, Any]],
|
2646
|
+
search_space_reduction_warning: bool
|
2647
|
+
) -> Tuple[int, List[Dict[str, Any]], List[Dict[str, Any]], bool]:
|
2648
|
+
j = 0
|
2649
|
+
param_parsers = {
|
2650
|
+
"range": parse_range_param,
|
2651
|
+
"fixed": parse_fixed_param,
|
2652
|
+
"choice": parse_choice_param
|
2653
|
+
}
|
2654
|
+
valid_types = list(param_parsers.keys())
|
2649
2655
|
|
2650
|
-
while
|
2651
|
-
|
2652
|
-
|
2656
|
+
while j < len(this_args) - 1:
|
2657
|
+
name = this_args[j]
|
2658
|
+
_parse_experiment_parameters_validate_name(name, invalid_names, param_names)
|
2653
2659
|
|
2654
|
-
|
2655
|
-
|
2660
|
+
param_names.append(name)
|
2661
|
+
global_param_names.append(name)
|
2656
2662
|
|
2657
|
-
|
2658
|
-
name = this_args[j]
|
2663
|
+
param_type = _parse_experiment_parameters_get_param_type(this_args, j)
|
2659
2664
|
|
2660
|
-
|
2661
|
-
|
2662
|
-
my_exit(181)
|
2665
|
+
if param_type not in param_parsers:
|
2666
|
+
_fatal_error(f"⚠ Parameter type '{param_type}' not yet implemented.", 181)
|
2663
2667
|
|
2664
|
-
|
2665
|
-
|
2666
|
-
|
2668
|
+
if param_type not in valid_types:
|
2669
|
+
valid_types_string = ', '.join(valid_types)
|
2670
|
+
_fatal_error(f"\n⚠ Invalid type {param_type}, valid types are: {valid_types_string}", 181)
|
2667
2671
|
|
2668
|
-
|
2669
|
-
|
2672
|
+
j, params, classic_params, search_space_reduction_warning = param_parsers[param_type](
|
2673
|
+
classic_params, params, j, this_args, name, search_space_reduction_warning)
|
2670
2674
|
|
2671
|
-
|
2672
|
-
param_type = this_args[j + 1]
|
2673
|
-
except Exception:
|
2674
|
-
print_red("Not enough arguments for --parameter")
|
2675
|
-
my_exit(181)
|
2676
|
-
|
2677
|
-
param_parsers = {
|
2678
|
-
"range": parse_range_param,
|
2679
|
-
"fixed": parse_fixed_param,
|
2680
|
-
"choice": parse_choice_param
|
2681
|
-
}
|
2675
|
+
return j, params, classic_params, search_space_reduction_warning
|
2682
2676
|
|
2683
|
-
|
2684
|
-
|
2685
|
-
|
2677
|
+
@beartype
|
2678
|
+
def parse_experiment_parameters() -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
2679
|
+
params: List[Dict[str, Any]] = []
|
2680
|
+
classic_params: List[Dict[str, Any]] = []
|
2681
|
+
param_names: List[str] = []
|
2686
2682
|
|
2687
|
-
|
2688
|
-
valid_types_string = ', '.join(valid_types)
|
2689
|
-
print_red(f"\n⚠ Invalid type {param_type}, valid types are: {valid_types_string}")
|
2690
|
-
my_exit(181)
|
2683
|
+
search_space_reduction_warning = False
|
2691
2684
|
|
2692
|
-
|
2685
|
+
invalid_names = ["start_time", "end_time", "run_time", "program_string", *arg_result_names, "exit_code", "signal"]
|
2686
|
+
|
2687
|
+
i = 0
|
2688
|
+
while args.parameter and i < len(args.parameter):
|
2689
|
+
this_args = args.parameter[i]
|
2690
|
+
if this_args is not None and isinstance(this_args, dict) and "param" in this_args:
|
2691
|
+
this_args = this_args["param"]
|
2692
|
+
|
2693
|
+
_, params, classic_params, search_space_reduction_warning = _parse_experiment_parameters_parse_this_args(
|
2694
|
+
this_args, invalid_names, param_names, classic_params, params, search_space_reduction_warning)
|
2693
2695
|
|
2694
2696
|
i += 1
|
2695
2697
|
|
2696
2698
|
if search_space_reduction_warning:
|
2697
2699
|
print_red("⚠ Search space reduction is not currently supported on continued runs or runs that have previous data.")
|
2698
2700
|
|
2701
|
+
# Remove duplicates by 'name' key preserving order
|
2699
2702
|
params = list({p['name']: p for p in params}.values())
|
2700
2703
|
classic_params = list({p['name']: p for p in classic_params}.values())
|
2701
2704
|
|
@@ -2704,31 +2707,26 @@ def parse_experiment_parameters() -> Tuple[list, list]:
|
|
2704
2707
|
@beartype
|
2705
2708
|
def check_factorial_range() -> None:
|
2706
2709
|
if args.model and args.model == "FACTORIAL":
|
2707
|
-
|
2708
|
-
my_exit(181)
|
2710
|
+
_fatal_error("\n⚠ --model FACTORIAL cannot be used with range parameter", 181)
|
2709
2711
|
|
2710
2712
|
@beartype
|
2711
2713
|
def check_if_range_types_are_invalid(value_type: str, valid_value_types: list) -> None:
|
2712
2714
|
if value_type not in valid_value_types:
|
2713
2715
|
valid_value_types_string = ", ".join(valid_value_types)
|
2714
|
-
|
2715
|
-
my_exit(181)
|
2716
|
+
_fatal_error(f"⚠ {value_type} is not a valid value type. Valid types for range are: {valid_value_types_string}", 181)
|
2716
2717
|
|
2717
2718
|
@beartype
|
2718
2719
|
def check_range_params_length(this_args: Union[str, list]) -> None:
|
2719
2720
|
if len(this_args) != 5 and len(this_args) != 4 and len(this_args) != 6:
|
2720
|
-
|
2721
|
-
my_exit(181)
|
2721
|
+
_fatal_error("\n⚠ --parameter for type range must have 4 (or 5, the last one being optional and float by default, or 6, while the last one is true or false) parameters: <NAME> range <START> <END> (<TYPE (int or float)>, <log_scale: bool>)", 181)
|
2722
2722
|
|
2723
2723
|
@beartype
|
2724
2724
|
def die_181_or_91_if_lower_and_upper_bound_equal_zero(lower_bound: Union[int, float], upper_bound: Union[int, float]) -> None:
|
2725
2725
|
if upper_bound is None or lower_bound is None:
|
2726
|
-
|
2727
|
-
my_exit(91)
|
2726
|
+
_fatal_error("die_181_or_91_if_lower_and_upper_bound_equal_zero: upper_bound or lower_bound is None. Cannot continue.", 91)
|
2728
2727
|
if upper_bound == lower_bound:
|
2729
2728
|
if lower_bound == 0:
|
2730
|
-
|
2731
|
-
my_exit(181)
|
2729
|
+
_fatal_error(f"⚠ Lower bound and upper bound are equal: {lower_bound}, cannot automatically fix this, because they -0 = +0 (usually a quickfix would be to set lower_bound = -upper_bound)", 181)
|
2732
2730
|
print_red(f"⚠ Lower bound and upper bound are equal: {lower_bound}, setting lower_bound = -upper_bound")
|
2733
2731
|
if upper_bound is not None:
|
2734
2732
|
lower_bound = -upper_bound
|
@@ -3170,8 +3168,7 @@ def calculate_signed_weighted_euclidean_distance(_args: Union[dict, List[float]]
|
|
3170
3168
|
pattern = r'^\s*-?\d+(\.\d+)?\s*(,\s*-?\d+(\.\d+)?\s*)*$'
|
3171
3169
|
|
3172
3170
|
if not re.fullmatch(pattern, weights_string):
|
3173
|
-
|
3174
|
-
my_exit(32)
|
3171
|
+
_fatal_error(f"String '{weights_string}' does not match pattern {pattern}", 32)
|
3175
3172
|
|
3176
3173
|
weights = [float(w.strip()) for w in weights_string.split(",") if w.strip()]
|
3177
3174
|
|
@@ -4214,8 +4211,7 @@ def abandon_job(job: Job, trial_index: int) -> bool:
|
|
4214
4211
|
print_debug(f"abandon_job: removing job {job}, trial_index: {trial_index}")
|
4215
4212
|
global_vars["jobs"].remove((job, trial_index))
|
4216
4213
|
else:
|
4217
|
-
|
4218
|
-
my_exit(9)
|
4214
|
+
_fatal_error("ax_client could not be found", 9)
|
4219
4215
|
except Exception as e:
|
4220
4216
|
print(f"ERROR in line {get_line_info()}: {e}")
|
4221
4217
|
print_debug(f"ERROR in line {get_line_info()}: {e}")
|
@@ -4320,8 +4316,7 @@ def save_checkpoint(trial_nr: int = 0, eee: Union[None, str, Exception] = None)
|
|
4320
4316
|
if ax_client:
|
4321
4317
|
ax_client.save_to_json_file(filepath=checkpoint_filepath)
|
4322
4318
|
else:
|
4323
|
-
|
4324
|
-
my_exit(9)
|
4319
|
+
_fatal_error("Something went wrong using the ax_client", 9)
|
4325
4320
|
except Exception as e:
|
4326
4321
|
save_checkpoint(trial_nr + 1, e)
|
4327
4322
|
|
@@ -4417,8 +4412,7 @@ def get_ax_param_representation(data: dict) -> dict:
|
|
4417
4412
|
|
4418
4413
|
print("data:")
|
4419
4414
|
pprint(data)
|
4420
|
-
|
4421
|
-
my_exit(19)
|
4415
|
+
_fatal_error(f"Unknown data range {data['type']}", 19)
|
4422
4416
|
|
4423
4417
|
return {}
|
4424
4418
|
|
@@ -4448,8 +4442,7 @@ def set_torch_device_to_experiment_args(experiment_args: Union[None, dict]) -> T
|
|
4448
4442
|
if experiment_args:
|
4449
4443
|
experiment_args["choose_generation_strategy_kwargs"]["torch_device"] = torch_device
|
4450
4444
|
else:
|
4451
|
-
|
4452
|
-
my_exit(90)
|
4445
|
+
_fatal_error("experiment_args could not be created.", 90)
|
4453
4446
|
|
4454
4447
|
if experiment_args:
|
4455
4448
|
return experiment_args, gpu_string, gpu_color
|
@@ -4459,8 +4452,7 @@ def set_torch_device_to_experiment_args(experiment_args: Union[None, dict]) -> T
|
|
4459
4452
|
@beartype
|
4460
4453
|
def die_with_47_if_file_doesnt_exists(_file: str) -> None:
|
4461
4454
|
if not os.path.exists(_file):
|
4462
|
-
|
4463
|
-
my_exit(47)
|
4455
|
+
_fatal_error(f"Cannot find {_file}", 47)
|
4464
4456
|
|
4465
4457
|
@beartype
|
4466
4458
|
def copy_state_files_from_previous_job(continue_previous_job: str) -> None:
|
@@ -4740,8 +4732,7 @@ def set_experiment_constraints(experiment_constraints: Optional[list], experimen
|
|
4740
4732
|
if equation:
|
4741
4733
|
experiment_args["parameter_constraints"].append(constraints_string)
|
4742
4734
|
else:
|
4743
|
-
|
4744
|
-
my_exit(19)
|
4735
|
+
_fatal_error(f"Experiment constraint '{constraints_string}' is invalid. Cannot continue.", 19)
|
4745
4736
|
|
4746
4737
|
file_path = os.path.join(get_current_run_folder(), "state_files", "constraints")
|
4747
4738
|
|
@@ -4836,8 +4827,7 @@ def get_experiment_parameters(_params: list) -> Tuple[AxClient, Union[list, dict
|
|
4836
4827
|
global ax_client
|
4837
4828
|
|
4838
4829
|
if not ax_client:
|
4839
|
-
|
4840
|
-
my_exit(9)
|
4830
|
+
_fatal_error("Something went wrong with the ax_client", 9)
|
4841
4831
|
|
4842
4832
|
gpu_string = ""
|
4843
4833
|
gpu_color = "green"
|
@@ -4884,8 +4874,7 @@ def get_experiment_parameters(_params: list) -> Tuple[AxClient, Union[list, dict
|
|
4884
4874
|
json.dump(experiment_parameters, outfile)
|
4885
4875
|
|
4886
4876
|
if not os.path.exists(checkpoint_filepath):
|
4887
|
-
|
4888
|
-
my_exit(47)
|
4877
|
+
_fatal_error(f"{checkpoint_filepath} not found. Cannot continue_previous_job without.", 47)
|
4889
4878
|
|
4890
4879
|
with open(f'{get_current_run_folder()}/checkpoint_load_source', mode='w', encoding="utf-8") as f:
|
4891
4880
|
print(f"Continuation from checkpoint {continue_previous_job}", file=f)
|
@@ -4923,17 +4912,13 @@ def get_experiment_parameters(_params: list) -> Tuple[AxClient, Union[list, dict
|
|
4923
4912
|
new_metrics = [Metric(k) for k in arg_result_names if k not in ax_client.metric_names]
|
4924
4913
|
ax_client.experiment.add_tracking_metrics(new_metrics)
|
4925
4914
|
except AssertionError as error:
|
4926
|
-
|
4927
|
-
my_exit(102)
|
4915
|
+
_fatal_error(f"An error has occurred while creating the experiment (0): {error}. This can happen when you have invalid parameter constraints.", 102)
|
4928
4916
|
except ValueError as error:
|
4929
|
-
|
4930
|
-
my_exit(49)
|
4917
|
+
_fatal_error(f"An error has occurred while creating the experiment (1): {error}", 49)
|
4931
4918
|
except TypeError as error:
|
4932
|
-
|
4933
|
-
my_exit(49)
|
4919
|
+
_fatal_error(f"An error has occurred while creating the experiment (2): {error}. This is probably a bug in OmniOpt2.", 49)
|
4934
4920
|
except ax.exceptions.core.UserInputError as error:
|
4935
|
-
|
4936
|
-
my_exit(49)
|
4921
|
+
_fatal_error(f"An error occurred while creating the experiment (3): {error}", 49)
|
4937
4922
|
|
4938
4923
|
return ax_client, experiment_parameters, experiment_args, gpu_string, gpu_color
|
4939
4924
|
|
@@ -5001,8 +4986,7 @@ def parse_single_experiment_parameter_table(classic_params: Optional[Union[list,
|
|
5001
4986
|
|
5002
4987
|
rows.append([str(param["name"]), get_type_short(_type), "", "", ", ".join(values), "", ""])
|
5003
4988
|
else:
|
5004
|
-
|
5005
|
-
my_exit(15)
|
4989
|
+
_fatal_error(f"Type {_type} is not yet implemented in the overview table.", 15)
|
5006
4990
|
|
5007
4991
|
k = k + 1
|
5008
4992
|
|
@@ -5072,8 +5056,7 @@ def print_ax_parameter_constraints_table(experiment_args: dict) -> None:
|
|
5072
5056
|
@beartype
|
5073
5057
|
def print_result_names_overview_table() -> None:
|
5074
5058
|
if not ax_client:
|
5075
|
-
|
5076
|
-
my_exit(101)
|
5059
|
+
_fatal_error("Tried to access ax_client in print_result_names_overview_table, but it failed, because the ax_client was not defined.", 101)
|
5077
5060
|
|
5078
5061
|
return None
|
5079
5062
|
|
@@ -5703,8 +5686,7 @@ def insert_job_into_ax_client(arm_params: dict, result: dict, new_job_type: str
|
|
5703
5686
|
done_converting = False
|
5704
5687
|
|
5705
5688
|
if ax_client is None or not ax_client:
|
5706
|
-
|
5707
|
-
my_exit(101)
|
5689
|
+
_fatal_error("insert_job_into_ax_client: ax_client was not defined where it should have been", 101)
|
5708
5690
|
|
5709
5691
|
while not done_converting:
|
5710
5692
|
try:
|
@@ -6100,8 +6082,7 @@ def mark_trial_as_failed(trial_index: int, _trial: Any) -> None:
|
|
6100
6082
|
print_debug(f"Marking trial {_trial} as failed")
|
6101
6083
|
try:
|
6102
6084
|
if not ax_client:
|
6103
|
-
|
6104
|
-
my_exit(101)
|
6085
|
+
_fatal_error("mark_trial_as_failed: ax_client is not defined", 101)
|
6105
6086
|
|
6106
6087
|
return None
|
6107
6088
|
|
@@ -6135,8 +6116,7 @@ def _finish_job_core_helper_complete_trial(trial_index: int, raw_result: dict) -
|
|
6135
6116
|
ax_client.update_trial_data(trial_index=trial_index, raw_data=raw_result)
|
6136
6117
|
print_debug(f"Completing trial: {trial_index} with result: {raw_result} after failure... Done!")
|
6137
6118
|
else:
|
6138
|
-
|
6139
|
-
my_exit(234)
|
6119
|
+
_fatal_error(f"Error completing trial: {e}", 234)
|
6140
6120
|
|
6141
6121
|
@beartype
|
6142
6122
|
def _finish_job_core_helper_mark_success(_trial: ax.core.trial.Trial, result: Union[float, int, tuple]) -> None:
|
@@ -6195,8 +6175,7 @@ def finish_job_core(job: Any, trial_index: int, this_jobs_finished: int) -> int:
|
|
6195
6175
|
else:
|
6196
6176
|
_finish_job_core_helper_mark_failure(job, trial_index, _trial)
|
6197
6177
|
else:
|
6198
|
-
|
6199
|
-
my_exit(9)
|
6178
|
+
_fatal_error("ax_client could not be found or used", 9)
|
6200
6179
|
|
6201
6180
|
print_debug(f"finish_job_core: removing job {job}, trial_index: {trial_index}")
|
6202
6181
|
global_vars["jobs"].remove((job, trial_index))
|
@@ -6269,8 +6248,8 @@ def finish_previous_jobs(new_msgs: List[str]) -> None:
|
|
6269
6248
|
global JOBS_FINISHED
|
6270
6249
|
|
6271
6250
|
if not ax_client:
|
6272
|
-
|
6273
|
-
|
6251
|
+
_fatal_error("ax_client failed", 101)
|
6252
|
+
|
6274
6253
|
return None
|
6275
6254
|
|
6276
6255
|
this_jobs_finished = 0
|
@@ -6425,8 +6404,7 @@ def orchestrator_start_trial(params_from_out_file: Union[dict, str], trial_index
|
|
6425
6404
|
print_debug(f"orchestrator_start_trial: appending job {new_job} to global_vars['jobs'], trial_index: {trial_index}")
|
6426
6405
|
global_vars["jobs"].append((new_job, trial_index))
|
6427
6406
|
else:
|
6428
|
-
|
6429
|
-
my_exit(9)
|
6407
|
+
_fatal_error("executor or ax_client could not be found properly", 9)
|
6430
6408
|
|
6431
6409
|
@beartype
|
6432
6410
|
def handle_exclude_node(stdout_path: str, hostname_from_out_file: Union[None, str]) -> None:
|
@@ -6496,8 +6474,7 @@ def _orchestrate(stdout_path: str, trial_index: int) -> None:
|
|
6496
6474
|
if handler:
|
6497
6475
|
handler()
|
6498
6476
|
else:
|
6499
|
-
|
6500
|
-
my_exit(210)
|
6477
|
+
_fatal_error(f"Orchestrator: {behav} not yet implemented!", 210)
|
6501
6478
|
|
6502
6479
|
@beartype
|
6503
6480
|
def write_continue_run_uuid_to_file() -> None:
|
@@ -6537,14 +6514,12 @@ def execute_evaluation(_params: list) -> Optional[int]:
|
|
6537
6514
|
print_debug(f"execute_evaluation({_params})")
|
6538
6515
|
trial_index, parameters, trial_counter, next_nr_steps, phase = _params
|
6539
6516
|
if not ax_client:
|
6540
|
-
|
6541
|
-
my_exit(9)
|
6517
|
+
_fatal_error("Failed to get ax_client", 9)
|
6542
6518
|
|
6543
6519
|
return None
|
6544
6520
|
|
6545
6521
|
if not executor:
|
6546
|
-
|
6547
|
-
my_exit(9)
|
6522
|
+
_fatal_error("executor could not be found", 9)
|
6548
6523
|
|
6549
6524
|
return None
|
6550
6525
|
|
@@ -6608,8 +6583,7 @@ def exclude_defective_nodes() -> None:
|
|
6608
6583
|
if executor:
|
6609
6584
|
executor.update_parameters(exclude=excluded_string)
|
6610
6585
|
else:
|
6611
|
-
|
6612
|
-
my_exit(9)
|
6586
|
+
_fatal_error("executor could not be found", 9)
|
6613
6587
|
|
6614
6588
|
@beartype
|
6615
6589
|
def handle_failed_job(error: Union[None, Exception, str], trial_index: int, new_job: Optional[Job]) -> None:
|
@@ -6638,8 +6612,7 @@ def cancel_failed_job(trial_index: int, new_job: Job) -> None:
|
|
6638
6612
|
if ax_client:
|
6639
6613
|
ax_client.log_trial_failure(trial_index=trial_index)
|
6640
6614
|
else:
|
6641
|
-
|
6642
|
-
my_exit(101)
|
6615
|
+
_fatal_error("ax_client not defined", 101)
|
6643
6616
|
except Exception as e:
|
6644
6617
|
print(f"ERROR in line {get_line_info()}: {e}")
|
6645
6618
|
new_job.cancel()
|
@@ -6807,14 +6780,11 @@ def has_no_post_generation_constraints_or_matches_constraints(_post_generation_c
|
|
6807
6780
|
@beartype
|
6808
6781
|
def die_101_if_no_ax_client_or_experiment_or_gs() -> None:
|
6809
6782
|
if ax_client is None:
|
6810
|
-
|
6811
|
-
my_exit(101)
|
6783
|
+
_fatal_error("Error: ax_client is not defined", 101)
|
6812
6784
|
elif ax_client.experiment is None:
|
6813
|
-
|
6814
|
-
my_exit(101)
|
6785
|
+
_fatal_error("Error: ax_client.experiment is not defined", 101)
|
6815
6786
|
elif global_gs is None:
|
6816
|
-
|
6817
|
-
my_exit(101)
|
6787
|
+
_fatal_error("Error: global_gs is not defined", 101)
|
6818
6788
|
|
6819
6789
|
@beartype
|
6820
6790
|
def get_batched_arms(nr_of_jobs_to_get: int) -> list:
|
@@ -6822,8 +6792,7 @@ def get_batched_arms(nr_of_jobs_to_get: int) -> list:
|
|
6822
6792
|
attempts = 0
|
6823
6793
|
|
6824
6794
|
if global_gs is None:
|
6825
|
-
|
6826
|
-
my_exit(107)
|
6795
|
+
_fatal_error("Global generation strategy is not set. This is a bug in OmniOpt2.", 107)
|
6827
6796
|
|
6828
6797
|
return []
|
6829
6798
|
|
@@ -6855,101 +6824,140 @@ def get_batched_arms(nr_of_jobs_to_get: int) -> list:
|
|
6855
6824
|
|
6856
6825
|
return batched_arms
|
6857
6826
|
|
6858
|
-
@disable_logs
|
6859
6827
|
@beartype
|
6860
6828
|
def _fetch_next_trials(nr_of_jobs_to_get: int, recursion: bool = False) -> Optional[Tuple[Dict[int, Any], bool]]:
|
6861
|
-
|
6829
|
+
die_101_if_no_ax_client_or_experiment_or_gs()
|
6862
6830
|
|
6863
6831
|
if not ax_client:
|
6864
|
-
|
6865
|
-
my_exit(9)
|
6832
|
+
_fatal_error("ax_client was not defined", 9)
|
6866
6833
|
|
6867
6834
|
if global_gs is None:
|
6868
|
-
|
6869
|
-
my_exit(107)
|
6835
|
+
_fatal_error("Global generation strategy is not set. This is a bug in OmniOpt2.", 107)
|
6870
6836
|
|
6871
|
-
|
6837
|
+
return _generate_trials(nr_of_jobs_to_get, recursion)
|
6872
6838
|
|
6873
|
-
|
6839
|
+
@beartype
|
6840
|
+
def _generate_trials(n: int, recursion: bool) -> Tuple[Dict[int, Any], bool]:
|
6841
|
+
global gotten_jobs
|
6874
6842
|
|
6843
|
+
trials_dict: Dict[int, Any] = {}
|
6875
6844
|
trial_durations: List[float] = []
|
6876
6845
|
|
6877
|
-
|
6846
|
+
start_time = time.time()
|
6847
|
+
cnt = 0
|
6848
|
+
retries = 0
|
6849
|
+
max_retries = args.max_abandoned_retrial
|
6878
6850
|
|
6879
6851
|
try:
|
6880
|
-
|
6852
|
+
while cnt < n and retries < max_retries:
|
6853
|
+
for arm in get_batched_arms(n - cnt):
|
6854
|
+
if cnt >= n:
|
6855
|
+
break
|
6881
6856
|
|
6882
|
-
|
6857
|
+
print_debug(f"Fetching trial {cnt + 1}/{n}...")
|
6858
|
+
progressbar_description([_get_trials_message(cnt + 1, n, trial_durations)])
|
6883
6859
|
|
6884
|
-
|
6860
|
+
try:
|
6861
|
+
result = _create_and_handle_trial(arm)
|
6862
|
+
if result is not None:
|
6863
|
+
trial_index, trial_duration, trial_successful = result
|
6885
6864
|
|
6886
|
-
|
6887
|
-
|
6888
|
-
|
6865
|
+
except TrialRejected as e:
|
6866
|
+
print_debug(f"Trial rejected: {e}")
|
6867
|
+
retries += 1
|
6868
|
+
continue
|
6889
6869
|
|
6890
|
-
|
6870
|
+
trial_durations.append(trial_duration)
|
6891
6871
|
|
6892
|
-
|
6872
|
+
if trial_successful:
|
6873
|
+
cnt += 1
|
6874
|
+
trials_dict[trial_index] = arm.parameters
|
6875
|
+
gotten_jobs += 1
|
6893
6876
|
|
6894
|
-
|
6895
|
-
generator_run = GeneratorRun(
|
6896
|
-
arms=[arm],
|
6897
|
-
generation_node_name=global_gs.current_node_name
|
6898
|
-
)
|
6877
|
+
return _finalize_generation(trials_dict, cnt, n, start_time)
|
6899
6878
|
|
6900
|
-
|
6901
|
-
|
6879
|
+
except Exception as e:
|
6880
|
+
return _handle_generation_failure(e, n, recursion)
|
6902
6881
|
|
6903
|
-
|
6904
|
-
|
6882
|
+
class TrialRejected(Exception):
|
6883
|
+
pass
|
6905
6884
|
|
6906
|
-
|
6907
|
-
|
6885
|
+
@beartype
|
6886
|
+
def _create_and_handle_trial(arm: Any) -> Optional[Tuple[int, float, bool]]:
|
6887
|
+
start = time.time()
|
6908
6888
|
|
6909
|
-
|
6889
|
+
if global_gs is None:
|
6890
|
+
_fatal_error("global_gs is not set", 107)
|
6910
6891
|
|
6911
|
-
|
6912
|
-
print_debug(f"Marking trial as abandoned since it doesn't fit a Post-Generation-constraint: {params}")
|
6913
|
-
trial.mark_abandoned()
|
6914
|
-
abandoned_trial_indices.append(trial_index)
|
6915
|
-
else:
|
6916
|
-
trial.mark_running(no_runner_required=True)
|
6892
|
+
return None
|
6917
6893
|
|
6918
|
-
|
6894
|
+
_current_node_name = global_gs.current_node_name
|
6919
6895
|
|
6920
|
-
|
6921
|
-
|
6896
|
+
trial_index = ax_client.experiment.num_trials
|
6897
|
+
generator_run = GeneratorRun(
|
6898
|
+
arms=[arm],
|
6899
|
+
generation_node_name=_current_node_name
|
6900
|
+
)
|
6922
6901
|
|
6923
|
-
|
6924
|
-
|
6902
|
+
trial = ax_client.experiment.new_trial(generator_run)
|
6903
|
+
params = arm.parameters
|
6925
6904
|
|
6926
|
-
|
6927
|
-
|
6928
|
-
|
6929
|
-
|
6905
|
+
if not has_no_post_generation_constraints_or_matches_constraints(post_generation_constraints, params):
|
6906
|
+
print_debug(f"Trial {trial_index} does not meet post-generation constraints. Marking abandoned.")
|
6907
|
+
trial.mark_abandoned()
|
6908
|
+
abandoned_trial_indices.append(trial_index)
|
6909
|
+
raise TrialRejected("Post-generation constraints not met.")
|
6930
6910
|
|
6931
|
-
|
6932
|
-
|
6933
|
-
|
6934
|
-
my_exit(242)
|
6935
|
-
except (ax.exceptions.core.SearchSpaceExhausted, ax.exceptions.generation_strategy.GenerationStrategyRepeatedPoints, ax.exceptions.generation_strategy.MaxParallelismReachedException) as e:
|
6936
|
-
if str(e) not in error_8_saved:
|
6937
|
-
if recursion is False and args.revert_to_random_when_seemingly_exhausted:
|
6938
|
-
print_yellow(f"\n⚠Error 8: {e} From now (done jobs: {count_done_jobs()}) on, random points will be generated.")
|
6939
|
-
else:
|
6940
|
-
print_red(f"\n⚠Error 8: {e}")
|
6911
|
+
trial.mark_running(no_runner_required=True)
|
6912
|
+
end = time.time()
|
6913
|
+
return trial_index, float(end - start), True
|
6941
6914
|
|
6942
|
-
|
6915
|
+
@beartype
|
6916
|
+
def _finalize_generation(trials_dict: Dict[int, Any], cnt: int, requested: int, start_time: float) -> Tuple[Dict[int, Any], bool]:
|
6917
|
+
total_time = time.time() - start_time
|
6943
6918
|
|
6944
|
-
|
6945
|
-
|
6919
|
+
log_gen_times.append(total_time)
|
6920
|
+
log_nr_gen_jobs.append(cnt)
|
6946
6921
|
|
6947
|
-
|
6922
|
+
avg_time_str = f"{total_time / cnt:.2f} s/job" if cnt else "n/a"
|
6923
|
+
progressbar_description([f"requested {requested} jobs, got {cnt}, {avg_time_str}"])
|
6924
|
+
|
6925
|
+
return trials_dict, False
|
6948
6926
|
|
6949
|
-
|
6927
|
+
@beartype
|
6928
|
+
def _handle_generation_failure(
|
6929
|
+
e: Exception,
|
6930
|
+
requested: int,
|
6931
|
+
recursion: bool
|
6932
|
+
) -> Tuple[Dict[int, Any], bool]:
|
6933
|
+
if isinstance(e, np.linalg.LinAlgError):
|
6934
|
+
_handle_linalg_error(e)
|
6935
|
+
my_exit(242)
|
6936
|
+
|
6937
|
+
elif isinstance(e, (
|
6938
|
+
ax.exceptions.core.SearchSpaceExhausted,
|
6939
|
+
ax.exceptions.generation_strategy.GenerationStrategyRepeatedPoints,
|
6940
|
+
ax.exceptions.generation_strategy.MaxParallelismReachedException
|
6941
|
+
)):
|
6942
|
+
msg = str(e)
|
6943
|
+
if msg not in error_8_saved:
|
6944
|
+
_print_exhaustion_warning(e, recursion)
|
6945
|
+
error_8_saved.append(msg)
|
6946
|
+
|
6947
|
+
if not recursion and args.revert_to_random_when_seemingly_exhausted:
|
6948
|
+
print_debug("Switching to random search strategy.")
|
6949
|
+
set_global_gs_to_random()
|
6950
|
+
return _fetch_next_trials(requested, True)
|
6950
6951
|
|
6951
6952
|
return {}, True
|
6952
6953
|
|
6954
|
+
@beartype
|
6955
|
+
def _print_exhaustion_warning(e: Exception, recursion: bool) -> None:
|
6956
|
+
if not recursion and args.revert_to_random_when_seemingly_exhausted:
|
6957
|
+
print_yellow(f"\n⚠Error 8: {e} From now (done jobs: {count_done_jobs()}) on, random points will be generated.")
|
6958
|
+
else:
|
6959
|
+
print_red(f"\n⚠Error 8: {e}")
|
6960
|
+
|
6953
6961
|
@beartype
|
6954
6962
|
def get_model_kwargs() -> dict:
|
6955
6963
|
if 'Cont_X_trans_Y_trans' in args.transforms:
|
@@ -7376,8 +7384,7 @@ def parse_generation_strategy_string(gen_strat_str: str) -> Tuple[list, int]:
|
|
7376
7384
|
matching_model = get_matching_model_name(model_name)
|
7377
7385
|
|
7378
7386
|
if matching_model in ["RANDOMFOREST", "EXTERNAL_GENERATOR"]:
|
7379
|
-
|
7380
|
-
my_exit(56)
|
7387
|
+
_fatal_error(f"Model {matching_model} is not valid for custom generation strategy.", 56)
|
7381
7388
|
|
7382
7389
|
if matching_model:
|
7383
7390
|
gen_strat_list.append({matching_model: nr})
|
@@ -7412,8 +7419,7 @@ def write_state_file(name: str, var: str) -> None:
|
|
7412
7419
|
file_path = f"{get_current_run_folder()}/state_files/{name}"
|
7413
7420
|
|
7414
7421
|
if os.path.isdir(file_path):
|
7415
|
-
|
7416
|
-
my_exit(246)
|
7422
|
+
_fatal_error(f"{file_path} is a dir. Must be a file.", 246)
|
7417
7423
|
|
7418
7424
|
makedirs(os.path.dirname(file_path))
|
7419
7425
|
|
@@ -7460,8 +7466,7 @@ def continue_not_supported_on_custom_generation_strategy() -> None:
|
|
7460
7466
|
generation_strategy_file = f"{args.continue_previous_job}/state_files/custom_generation_strategy"
|
7461
7467
|
|
7462
7468
|
if os.path.exists(generation_strategy_file):
|
7463
|
-
|
7464
|
-
my_exit(247)
|
7469
|
+
_fatal_error("Trying to continue a job which was started with --generation_strategy. This is currently not possible.", 247)
|
7465
7470
|
|
7466
7471
|
@beartype
|
7467
7472
|
def get_step_name(model_name: str, nr: int) -> str:
|
@@ -7503,8 +7508,7 @@ def get_torch_device_str() -> str:
|
|
7503
7508
|
def create_node(model_name: str, threshold: int, next_model_name: Optional[str]) -> Union[RandomForestGenerationNode, GenerationNode]:
|
7504
7509
|
if model_name == "RANDOMFOREST":
|
7505
7510
|
if len(arg_result_names) != 1:
|
7506
|
-
|
7507
|
-
my_exit(251)
|
7511
|
+
_fatal_error("Currently, RANDOMFOREST does not support Multi-Objective-Optimization", 251)
|
7508
7512
|
|
7509
7513
|
node = RandomForestGenerationNode(
|
7510
7514
|
num_samples=threshold,
|
@@ -7518,8 +7522,7 @@ def create_node(model_name: str, threshold: int, next_model_name: Optional[str])
|
|
7518
7522
|
|
7519
7523
|
if model_name == "TPE":
|
7520
7524
|
if len(arg_result_names) != 1:
|
7521
|
-
|
7522
|
-
my_exit(108)
|
7525
|
+
_fatal_error(f"Has {len(arg_result_names)} results. TPE currently only supports single-objective-optimization.", 108)
|
7523
7526
|
|
7524
7527
|
node = ExternalProgramGenerationNode(f"python3 {script_dir}/.tpe.py", "TPE")
|
7525
7528
|
|
@@ -7532,8 +7535,7 @@ def create_node(model_name: str, threshold: int, next_model_name: Optional[str])
|
|
7532
7535
|
|
7533
7536
|
if model_name == "EXTERNAL_GENERATOR":
|
7534
7537
|
if args.external_generator is None or args.external_generator == "":
|
7535
|
-
|
7536
|
-
my_exit(204)
|
7538
|
+
_fatal_error("--external_generator is missing. Cannot create points for EXTERNAL_GENERATOR without it.", 204)
|
7537
7539
|
|
7538
7540
|
node = ExternalProgramGenerationNode(args.external_generator)
|
7539
7541
|
|
@@ -7770,8 +7772,7 @@ def handle_exceptions_create_and_execute_next_runs(e: Exception) -> int:
|
|
7770
7772
|
print_red(f"Error 2: {e}")
|
7771
7773
|
elif isinstance(e, ax.exceptions.core.DataRequiredError):
|
7772
7774
|
if "transform requires non-empty data" in str(e) and args.num_random_steps == 0:
|
7773
|
-
|
7774
|
-
my_exit(233)
|
7775
|
+
_fatal_error(f"Error 3: {e} Increase --num_random_steps to at least 1 to continue.", 233)
|
7775
7776
|
else:
|
7776
7777
|
print_debug(f"Error 4: {e}")
|
7777
7778
|
elif isinstance(e, RuntimeError):
|
@@ -7922,8 +7923,7 @@ executor.update_parameters(
|
|
7922
7923
|
if args.exclude:
|
7923
7924
|
print_yellow(f"Excluding the following nodes: {args.exclude}")
|
7924
7925
|
else:
|
7925
|
-
|
7926
|
-
my_exit(9)
|
7926
|
+
_fatal_error("executor could not be found", 9)
|
7927
7927
|
|
7928
7928
|
@beartype
|
7929
7929
|
def set_global_executor() -> None:
|
@@ -8178,8 +8178,7 @@ def set_orchestrator() -> None:
|
|
8178
8178
|
def check_if_has_random_steps() -> None:
|
8179
8179
|
with console.status("[bold green]Checking if has random steps..."):
|
8180
8180
|
if (not args.continue_previous_job and "--continue" not in sys.argv) and (args.num_random_steps == 0 or not args.num_random_steps) and args.model not in ["EXTERNAL_GENERATOR", "SOBOL", "PSEUDORANDOM"]:
|
8181
|
-
|
8182
|
-
my_exit(233)
|
8181
|
+
_fatal_error("You have no random steps set. This is only allowed in continued jobs. To start, you need either some random steps, or a continued run.", 233)
|
8183
8182
|
|
8184
8183
|
@beartype
|
8185
8184
|
def add_exclude_to_defective_nodes() -> None:
|
@@ -8192,8 +8191,7 @@ def add_exclude_to_defective_nodes() -> None:
|
|
8192
8191
|
@beartype
|
8193
8192
|
def check_max_eval(_max_eval: int) -> None:
|
8194
8193
|
if not _max_eval:
|
8195
|
-
|
8196
|
-
my_exit(19)
|
8194
|
+
_fatal_error("--max_eval needs to be set!", 19)
|
8197
8195
|
|
8198
8196
|
@beartype
|
8199
8197
|
def parse_parameters() -> Any:
|
@@ -8648,23 +8646,19 @@ def get_result_minimize_flag(path_to_calculate: str, resname: str) -> bool:
|
|
8648
8646
|
result_min_max_path = os.path.join(path_to_calculate, "result_min_max.txt")
|
8649
8647
|
|
8650
8648
|
if not os.path.isdir(path_to_calculate):
|
8651
|
-
|
8652
|
-
my_exit(24)
|
8649
|
+
_fatal_error(f"Error: Directory '{path_to_calculate}' does not exist.", 24)
|
8653
8650
|
|
8654
8651
|
if not os.path.isfile(result_names_path) or not os.path.isfile(result_min_max_path):
|
8655
|
-
|
8656
|
-
my_exit(24)
|
8652
|
+
_fatal_error(f"Error: Missing 'result_names.txt' or 'result_min_max.txt' in '{path_to_calculate}'.", 24)
|
8657
8653
|
|
8658
8654
|
try:
|
8659
8655
|
with open(result_names_path, "r", encoding="utf-8") as f:
|
8660
8656
|
names = [line.strip() for line in f]
|
8661
8657
|
except Exception as e:
|
8662
|
-
|
8663
|
-
my_exit(24)
|
8658
|
+
_fatal_error(f"Error: Failed to read 'result_names.txt': {e}", 24)
|
8664
8659
|
|
8665
8660
|
if resname not in names:
|
8666
|
-
|
8667
|
-
my_exit(24)
|
8661
|
+
_fatal_error(f"Error: Result name '{resname}' not found in 'result_names.txt'.", 24)
|
8668
8662
|
|
8669
8663
|
index = names.index(resname)
|
8670
8664
|
|
@@ -8672,12 +8666,10 @@ def get_result_minimize_flag(path_to_calculate: str, resname: str) -> bool:
|
|
8672
8666
|
with open(result_min_max_path, "r", encoding="utf-8") as f:
|
8673
8667
|
minmax = [line.strip().lower() for line in f]
|
8674
8668
|
except Exception as e:
|
8675
|
-
|
8676
|
-
my_exit(24)
|
8669
|
+
_fatal_error(f"Error: Failed to read 'result_min_max.txt': {e}", 24)
|
8677
8670
|
|
8678
8671
|
if index >= len(minmax):
|
8679
|
-
|
8680
|
-
my_exit(24)
|
8672
|
+
_fatal_error(f"Error: Not enough entries in 'result_min_max.txt' for index {index}.", 24)
|
8681
8673
|
|
8682
8674
|
return minmax[index] == "min"
|
8683
8675
|
|
@@ -9406,8 +9398,7 @@ def complex_tests(_program_name: str, wanted_stderr: str, wanted_exit_code: int,
|
|
9406
9398
|
program_path: str = f"./.tests/test_wronggoing_stuff.bin/bin/{_program_name}"
|
9407
9399
|
|
9408
9400
|
if not os.path.exists(program_path):
|
9409
|
-
|
9410
|
-
my_exit(18)
|
9401
|
+
_fatal_error(f"Program path {program_path} not found!", 18)
|
9411
9402
|
|
9412
9403
|
program_path_with_program: str = f"{program_path}"
|
9413
9404
|
|