omniopt2 8912__tar.gz → 8933__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omniopt2 might be problematic. Click here for more details.
- {omniopt2-8912 → omniopt2-8933}/.omniopt.py +597 -680
- omniopt2-8933/.pareto.py +134 -0
- {omniopt2-8912 → omniopt2-8933}/PKG-INFO +1 -1
- {omniopt2-8912 → omniopt2-8933}/omniopt2.egg-info/PKG-INFO +1 -1
- {omniopt2-8912 → omniopt2-8933}/omniopt2.egg-info/SOURCES.txt +1 -0
- {omniopt2-8912 → omniopt2-8933}/pyproject.toml +1 -1
- {omniopt2-8912 → omniopt2-8933}/.colorfunctions.sh +0 -0
- {omniopt2-8912 → omniopt2-8933}/.dockerignore +0 -0
- {omniopt2-8912 → omniopt2-8933}/.general.sh +0 -0
- {omniopt2-8912 → omniopt2-8933}/.gitignore +0 -0
- {omniopt2-8912 → omniopt2-8933}/.helpers.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_cpu_ram_usage.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_general.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_gpu_usage.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_kde.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_scatter.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_scatter_generation_method.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_scatter_hex.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_time_and_exit_code.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_trial_index_result.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.omniopt_plot_worker.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.random_generator.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/.shellscript_functions +0 -0
- {omniopt2-8912 → omniopt2-8933}/.tests/pylint.rc +0 -0
- {omniopt2-8912 → omniopt2-8933}/.tpe.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/LICENSE +0 -0
- {omniopt2-8912 → omniopt2-8933}/MANIFEST.in +0 -0
- {omniopt2-8912 → omniopt2-8933}/README.md +0 -0
- {omniopt2-8912 → omniopt2-8933}/apt-dependencies.txt +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt2.egg-info/dependency_links.txt +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt2.egg-info/requires.txt +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt2.egg-info/top_level.txt +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt_docker +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt_evaluate +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt_plot +0 -0
- {omniopt2-8912 → omniopt2-8933}/omniopt_share +0 -0
- {omniopt2-8912 → omniopt2-8933}/requirements.txt +0 -0
- {omniopt2-8912 → omniopt2-8933}/setup.cfg +0 -0
- {omniopt2-8912 → omniopt2-8933}/setup.py +0 -0
- {omniopt2-8912 → omniopt2-8933}/test_requirements.txt +0 -0
|
@@ -493,6 +493,24 @@ try:
|
|
|
493
493
|
dier: FunctionType = helpers.dier
|
|
494
494
|
is_equal: FunctionType = helpers.is_equal
|
|
495
495
|
is_not_equal: FunctionType = helpers.is_not_equal
|
|
496
|
+
with spinner("Importing pareto..."):
|
|
497
|
+
pareto_file: str = f"{script_dir}/.pareto.py"
|
|
498
|
+
spec = importlib.util.spec_from_file_location(
|
|
499
|
+
name="pareto",
|
|
500
|
+
location=pareto_file,
|
|
501
|
+
)
|
|
502
|
+
if spec is not None and spec.loader is not None:
|
|
503
|
+
pareto = importlib.util.module_from_spec(spec)
|
|
504
|
+
spec.loader.exec_module(pareto)
|
|
505
|
+
else:
|
|
506
|
+
raise ImportError(f"Could not load module from {pareto_file}")
|
|
507
|
+
|
|
508
|
+
pareto_front_table_filter_rows: FunctionType = pareto.pareto_front_table_filter_rows
|
|
509
|
+
pareto_front_table_add_headers: FunctionType = pareto.pareto_front_table_add_headers
|
|
510
|
+
pareto_front_table_add_rows: FunctionType = pareto.pareto_front_table_add_rows
|
|
511
|
+
pareto_front_filter_complete_points: FunctionType = pareto.pareto_front_filter_complete_points
|
|
512
|
+
pareto_front_select_pareto_points: FunctionType = pareto.pareto_front_select_pareto_points
|
|
513
|
+
|
|
496
514
|
except KeyboardInterrupt:
|
|
497
515
|
print("You pressed CTRL-c while importing the helpers file")
|
|
498
516
|
sys.exit(0)
|
|
@@ -546,7 +564,6 @@ def error_without_print(text: str) -> None:
|
|
|
546
564
|
helpers.print_color("red", f"Error: {e}. This may mean that the {get_current_run_folder()} was deleted during the run. Could not write '{text} to {get_current_run_folder()}/oo_errors.txt'")
|
|
547
565
|
sys.exit(99)
|
|
548
566
|
|
|
549
|
-
|
|
550
567
|
def print_red(text: str) -> None:
|
|
551
568
|
helpers.print_color("red", text)
|
|
552
569
|
|
|
@@ -3353,190 +3370,512 @@ def parse_experiment_parameters() -> None:
|
|
|
3353
3370
|
|
|
3354
3371
|
experiment_parameters = params # type: ignore[assignment]
|
|
3355
3372
|
|
|
3356
|
-
def
|
|
3357
|
-
|
|
3358
|
-
_fatal_error("\n⚠ --model FACTORIAL cannot be used with range parameter", 181)
|
|
3373
|
+
def job_calculate_pareto_front(path_to_calculate: str, disable_sixel_and_table: bool = False) -> bool:
|
|
3374
|
+
pf_start_time = time.time()
|
|
3359
3375
|
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
valid_value_types_string = ", ".join(valid_value_types)
|
|
3363
|
-
_fatal_error(f"⚠ {value_type} is not a valid value type. Valid types for range are: {valid_value_types_string}", 181)
|
|
3376
|
+
if not path_to_calculate:
|
|
3377
|
+
return False
|
|
3364
3378
|
|
|
3365
|
-
|
|
3366
|
-
|
|
3367
|
-
|
|
3379
|
+
global CURRENT_RUN_FOLDER
|
|
3380
|
+
global RESULT_CSV_FILE
|
|
3381
|
+
global arg_result_names
|
|
3368
3382
|
|
|
3369
|
-
|
|
3370
|
-
|
|
3371
|
-
|
|
3372
|
-
if upper_bound == lower_bound:
|
|
3373
|
-
if lower_bound == 0:
|
|
3374
|
-
_fatal_error(f"⚠ Lower bound and upper bound are equal: {lower_bound}, cannot automatically fix this, because they -0 = +0 (usually a quickfix would be to set lower_bound = -upper_bound)", 181)
|
|
3375
|
-
print_red(f"⚠ Lower bound and upper bound are equal: {lower_bound}, setting lower_bound = -upper_bound")
|
|
3376
|
-
if upper_bound is not None:
|
|
3377
|
-
lower_bound = -upper_bound
|
|
3383
|
+
if not path_to_calculate:
|
|
3384
|
+
print_red("Can only calculate pareto front of previous job when --calculate_pareto_front_of_job is set")
|
|
3385
|
+
return False
|
|
3378
3386
|
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
s = format(value, float_format)
|
|
3383
|
-
s = s.rstrip('0').rstrip('.') if '.' in s else s
|
|
3384
|
-
return s
|
|
3385
|
-
return str(value)
|
|
3386
|
-
except Exception as e:
|
|
3387
|
-
print_red(f"⚠ Error formatting the number {value}: {e}")
|
|
3388
|
-
return str(value)
|
|
3387
|
+
if not os.path.exists(path_to_calculate):
|
|
3388
|
+
print_red(f"Path '{path_to_calculate}' does not exist")
|
|
3389
|
+
return False
|
|
3389
3390
|
|
|
3390
|
-
|
|
3391
|
-
parameters: dict,
|
|
3392
|
-
input_string: str,
|
|
3393
|
-
float_format: str = '.20f',
|
|
3394
|
-
additional_prefixes: list[str] = [],
|
|
3395
|
-
additional_patterns: list[str] = [],
|
|
3396
|
-
) -> str:
|
|
3397
|
-
try:
|
|
3398
|
-
prefixes = ['$', '%'] + additional_prefixes
|
|
3399
|
-
patterns = ['{key}', '({key})'] + additional_patterns
|
|
3391
|
+
ax_client_json = f"{path_to_calculate}/state_files/ax_client.experiment.json"
|
|
3400
3392
|
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3404
|
-
for pattern in patterns:
|
|
3405
|
-
token = prefix + pattern.format(key=key)
|
|
3406
|
-
input_string = input_string.replace(token, replacement)
|
|
3393
|
+
if not os.path.exists(ax_client_json):
|
|
3394
|
+
print_red(f"Path '{ax_client_json}' not found")
|
|
3395
|
+
return False
|
|
3407
3396
|
|
|
3408
|
-
|
|
3409
|
-
|
|
3397
|
+
checkpoint_file: str = f"{path_to_calculate}/state_files/checkpoint.json"
|
|
3398
|
+
if not os.path.exists(checkpoint_file):
|
|
3399
|
+
print_red(f"The checkpoint file '{checkpoint_file}' does not exist")
|
|
3400
|
+
return False
|
|
3410
3401
|
|
|
3411
|
-
|
|
3412
|
-
|
|
3413
|
-
|
|
3402
|
+
RESULT_CSV_FILE = f"{path_to_calculate}/{RESULTS_CSV_FILENAME}"
|
|
3403
|
+
if not os.path.exists(RESULT_CSV_FILE):
|
|
3404
|
+
print_red(f"{RESULT_CSV_FILE} not found")
|
|
3405
|
+
return False
|
|
3414
3406
|
|
|
3415
|
-
|
|
3416
|
-
user_uid = os.getuid()
|
|
3407
|
+
res_names = []
|
|
3417
3408
|
|
|
3418
|
-
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
3409
|
+
res_names_file = f"{path_to_calculate}/result_names.txt"
|
|
3410
|
+
if not os.path.exists(res_names_file):
|
|
3411
|
+
print_red(f"File '{res_names_file}' does not exist")
|
|
3412
|
+
return False
|
|
3422
3413
|
|
|
3423
|
-
|
|
3414
|
+
try:
|
|
3415
|
+
with open(res_names_file, "r", encoding="utf-8") as file:
|
|
3416
|
+
lines = file.readlines()
|
|
3417
|
+
except Exception as e:
|
|
3418
|
+
print_red(f"Error reading file '{res_names_file}': {e}")
|
|
3419
|
+
return False
|
|
3424
3420
|
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3429
|
-
self.running = True
|
|
3430
|
-
self.thread = threading.Thread(target=self._monitor)
|
|
3431
|
-
self.thread.daemon = True
|
|
3421
|
+
for line in lines:
|
|
3422
|
+
entry = line.strip()
|
|
3423
|
+
if entry != "":
|
|
3424
|
+
res_names.append(entry)
|
|
3432
3425
|
|
|
3433
|
-
|
|
3426
|
+
if len(res_names) < 2:
|
|
3427
|
+
print_red(f"Error: There are less than 2 result names (is: {len(res_names)}, {', '.join(res_names)}) in {path_to_calculate}. Cannot continue calculating the pareto front.")
|
|
3428
|
+
return False
|
|
3434
3429
|
|
|
3435
|
-
|
|
3436
|
-
try:
|
|
3437
|
-
_internal_process = psutil.Process(self.pid)
|
|
3438
|
-
while self.running and _internal_process.is_running():
|
|
3439
|
-
crf = get_current_run_folder()
|
|
3430
|
+
load_username_to_args(path_to_calculate)
|
|
3440
3431
|
|
|
3441
|
-
|
|
3442
|
-
log_file_path = os.path.join(crf, "eval_nodes_cpu_ram_logs.txt")
|
|
3432
|
+
CURRENT_RUN_FOLDER = path_to_calculate
|
|
3443
3433
|
|
|
3444
|
-
|
|
3434
|
+
arg_result_names = res_names
|
|
3445
3435
|
|
|
3446
|
-
|
|
3447
|
-
hostname = socket.gethostname()
|
|
3436
|
+
load_experiment_parameters_from_checkpoint_file(checkpoint_file, False)
|
|
3448
3437
|
|
|
3449
|
-
|
|
3438
|
+
if experiment_parameters is None:
|
|
3439
|
+
return False
|
|
3450
3440
|
|
|
3451
|
-
|
|
3452
|
-
hostname += f"-SLURM-ID-{slurm_job_id}"
|
|
3441
|
+
show_pareto_or_error_msg(path_to_calculate, res_names, disable_sixel_and_table)
|
|
3453
3442
|
|
|
3454
|
-
|
|
3455
|
-
cpu_usage = psutil.cpu_percent(interval=5)
|
|
3443
|
+
pf_end_time = time.time()
|
|
3456
3444
|
|
|
3457
|
-
|
|
3445
|
+
print_debug(f"Calculating the Pareto-front took {pf_end_time - pf_start_time} seconds")
|
|
3458
3446
|
|
|
3459
|
-
|
|
3447
|
+
return True
|
|
3460
3448
|
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3449
|
+
def show_pareto_or_error_msg(path_to_calculate: str, res_names: list = arg_result_names, disable_sixel_and_table: bool = False) -> None:
|
|
3450
|
+
if args.dryrun:
|
|
3451
|
+
print_debug("Not showing Pareto-frontier data with --dryrun")
|
|
3452
|
+
return None
|
|
3465
3453
|
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3454
|
+
if len(res_names) > 1:
|
|
3455
|
+
try:
|
|
3456
|
+
show_pareto_frontier_data(path_to_calculate, res_names, disable_sixel_and_table)
|
|
3457
|
+
except Exception as e:
|
|
3458
|
+
inner_tb = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
3459
|
+
print_red(f"show_pareto_frontier_data() failed with exception '{e}':\n{inner_tb}")
|
|
3460
|
+
else:
|
|
3461
|
+
print_debug(f"show_pareto_frontier_data will NOT be executed because len(arg_result_names) is {len(arg_result_names)}")
|
|
3462
|
+
return None
|
|
3469
3463
|
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
self.thread.join()
|
|
3464
|
+
def get_pareto_front_data(path_to_calculate: str, res_names: list) -> dict:
|
|
3465
|
+
pareto_front_data: dict = {}
|
|
3473
3466
|
|
|
3474
|
-
|
|
3475
|
-
process_item = subprocess.Popen(code, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
3467
|
+
all_combinations = list(combinations(range(len(arg_result_names)), 2))
|
|
3476
3468
|
|
|
3477
|
-
|
|
3478
|
-
try:
|
|
3479
|
-
stdout, stderr = process_item.communicate()
|
|
3480
|
-
result = subprocess.CompletedProcess(
|
|
3481
|
-
args=code, returncode=process_item.returncode, stdout=stdout, stderr=stderr
|
|
3482
|
-
)
|
|
3483
|
-
return [result.stdout, result.stderr, result.returncode, None]
|
|
3484
|
-
except subprocess.CalledProcessError as e:
|
|
3485
|
-
real_exit_code = e.returncode
|
|
3486
|
-
signal_code = None
|
|
3487
|
-
if real_exit_code < 0:
|
|
3488
|
-
signal_code = abs(e.returncode)
|
|
3489
|
-
real_exit_code = 1
|
|
3490
|
-
return [e.stdout, e.stderr, real_exit_code, signal_code]
|
|
3469
|
+
skip = False
|
|
3491
3470
|
|
|
3492
|
-
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
shell=True,
|
|
3497
|
-
check=True,
|
|
3498
|
-
text=True,
|
|
3499
|
-
capture_output=True
|
|
3500
|
-
)
|
|
3471
|
+
for i, j in all_combinations:
|
|
3472
|
+
if not skip:
|
|
3473
|
+
metric_x = arg_result_names[i]
|
|
3474
|
+
metric_y = arg_result_names[j]
|
|
3501
3475
|
|
|
3502
|
-
|
|
3503
|
-
|
|
3476
|
+
x_minimize = get_result_minimize_flag(path_to_calculate, metric_x)
|
|
3477
|
+
y_minimize = get_result_minimize_flag(path_to_calculate, metric_y)
|
|
3504
3478
|
|
|
3505
|
-
|
|
3479
|
+
try:
|
|
3480
|
+
if metric_x not in pareto_front_data:
|
|
3481
|
+
pareto_front_data[metric_x] = {}
|
|
3506
3482
|
|
|
3507
|
-
|
|
3508
|
-
|
|
3509
|
-
|
|
3510
|
-
|
|
3483
|
+
pareto_front_data[metric_x][metric_y] = get_calculated_frontier(path_to_calculate, metric_x, metric_y, x_minimize, y_minimize, res_names)
|
|
3484
|
+
except ax.exceptions.core.DataRequiredError as e:
|
|
3485
|
+
print_red(f"Error computing Pareto frontier for {metric_x} and {metric_y}: {e}")
|
|
3486
|
+
except SignalINT:
|
|
3487
|
+
print_red("Calculating Pareto-fronts was cancelled by pressing CTRL-c")
|
|
3488
|
+
skip = True
|
|
3511
3489
|
|
|
3512
|
-
|
|
3490
|
+
return pareto_front_data
|
|
3513
3491
|
|
|
3514
|
-
|
|
3515
|
-
|
|
3492
|
+
def pareto_front_transform_objectives(
|
|
3493
|
+
points: List[Tuple[Any, float, float]],
|
|
3494
|
+
primary_name: str,
|
|
3495
|
+
secondary_name: str
|
|
3496
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
3497
|
+
primary_idx = arg_result_names.index(primary_name)
|
|
3498
|
+
secondary_idx = arg_result_names.index(secondary_name)
|
|
3516
3499
|
|
|
3517
|
-
|
|
3518
|
-
|
|
3519
|
-
signal_code = abs(e.returncode)
|
|
3520
|
-
real_exit_code = 1
|
|
3500
|
+
x = np.array([p[1] for p in points])
|
|
3501
|
+
y = np.array([p[2] for p in points])
|
|
3521
3502
|
|
|
3522
|
-
|
|
3523
|
-
|
|
3524
|
-
|
|
3525
|
-
|
|
3526
|
-
else:
|
|
3527
|
-
print("No stdout")
|
|
3503
|
+
if arg_result_min_or_max[primary_idx] == "max":
|
|
3504
|
+
x = -x
|
|
3505
|
+
elif arg_result_min_or_max[primary_idx] != "min":
|
|
3506
|
+
raise ValueError(f"Unknown mode for {primary_name}: {arg_result_min_or_max[primary_idx]}")
|
|
3528
3507
|
|
|
3529
|
-
|
|
3530
|
-
|
|
3531
|
-
|
|
3532
|
-
|
|
3508
|
+
if arg_result_min_or_max[secondary_idx] == "max":
|
|
3509
|
+
y = -y
|
|
3510
|
+
elif arg_result_min_or_max[secondary_idx] != "min":
|
|
3511
|
+
raise ValueError(f"Unknown mode for {secondary_name}: {arg_result_min_or_max[secondary_idx]}")
|
|
3533
3512
|
|
|
3534
|
-
|
|
3513
|
+
return x, y
|
|
3535
3514
|
|
|
3536
|
-
def
|
|
3537
|
-
|
|
3538
|
-
|
|
3539
|
-
|
|
3515
|
+
def get_pareto_frontier_points(
|
|
3516
|
+
path_to_calculate: str,
|
|
3517
|
+
primary_objective: str,
|
|
3518
|
+
secondary_objective: str,
|
|
3519
|
+
x_minimize: bool,
|
|
3520
|
+
y_minimize: bool,
|
|
3521
|
+
absolute_metrics: List[str],
|
|
3522
|
+
num_points: int
|
|
3523
|
+
) -> Optional[dict]:
|
|
3524
|
+
records = pareto_front_aggregate_data(path_to_calculate)
|
|
3525
|
+
|
|
3526
|
+
if records is None:
|
|
3527
|
+
return None
|
|
3528
|
+
|
|
3529
|
+
points = pareto_front_filter_complete_points(path_to_calculate, records, primary_objective, secondary_objective)
|
|
3530
|
+
x, y = pareto_front_transform_objectives(points, primary_objective, secondary_objective)
|
|
3531
|
+
selected_points = pareto_front_select_pareto_points(x, y, x_minimize, y_minimize, points, num_points)
|
|
3532
|
+
result = pareto_front_build_return_structure(path_to_calculate, selected_points, records, absolute_metrics, primary_objective, secondary_objective)
|
|
3533
|
+
|
|
3534
|
+
return result
|
|
3535
|
+
|
|
3536
|
+
def pareto_front_table_read_csv() -> List[Dict[str, str]]:
|
|
3537
|
+
with open(RESULT_CSV_FILE, mode="r", encoding="utf-8", newline="") as f:
|
|
3538
|
+
return list(csv.DictReader(f))
|
|
3539
|
+
|
|
3540
|
+
def create_pareto_front_table(idxs: List[int], metric_x: str, metric_y: str) -> Table:
|
|
3541
|
+
table = Table(title=f"Pareto-Front for {metric_y}/{metric_x}:", show_lines=True)
|
|
3542
|
+
|
|
3543
|
+
rows = pareto_front_table_read_csv()
|
|
3544
|
+
if not rows:
|
|
3545
|
+
table.add_column("No data found")
|
|
3546
|
+
return table
|
|
3547
|
+
|
|
3548
|
+
filtered_rows = pareto_front_table_filter_rows(rows, idxs)
|
|
3549
|
+
if not filtered_rows:
|
|
3550
|
+
table.add_column("No matching entries")
|
|
3551
|
+
return table
|
|
3552
|
+
|
|
3553
|
+
param_cols, result_cols = pareto_front_table_get_columns(filtered_rows[0])
|
|
3554
|
+
|
|
3555
|
+
pareto_front_table_add_headers(table, param_cols, result_cols)
|
|
3556
|
+
pareto_front_table_add_rows(table, filtered_rows, param_cols, result_cols)
|
|
3557
|
+
|
|
3558
|
+
return table
|
|
3559
|
+
|
|
3560
|
+
def pareto_front_build_return_structure(
|
|
3561
|
+
path_to_calculate: str,
|
|
3562
|
+
selected_points: List[Tuple[Any, float, float]],
|
|
3563
|
+
records: Dict[Tuple[int, str], Dict[str, Dict[str, float]]],
|
|
3564
|
+
absolute_metrics: List[str],
|
|
3565
|
+
primary_name: str,
|
|
3566
|
+
secondary_name: str
|
|
3567
|
+
) -> dict:
|
|
3568
|
+
results_csv_file = f"{path_to_calculate}/{RESULTS_CSV_FILENAME}"
|
|
3569
|
+
result_names_file = f"{path_to_calculate}/result_names.txt"
|
|
3570
|
+
|
|
3571
|
+
with open(result_names_file, mode="r", encoding="utf-8") as f:
|
|
3572
|
+
result_names = [line.strip() for line in f if line.strip()]
|
|
3573
|
+
|
|
3574
|
+
csv_rows = {}
|
|
3575
|
+
with open(results_csv_file, mode="r", encoding="utf-8", newline='') as csvfile:
|
|
3576
|
+
reader = csv.DictReader(csvfile)
|
|
3577
|
+
for row in reader:
|
|
3578
|
+
trial_index = int(row['trial_index'])
|
|
3579
|
+
csv_rows[trial_index] = row
|
|
3580
|
+
|
|
3581
|
+
ignored_columns = {'trial_index', 'arm_name', 'trial_status', 'generation_node'}
|
|
3582
|
+
ignored_columns.update(result_names)
|
|
3583
|
+
|
|
3584
|
+
param_dicts = []
|
|
3585
|
+
idxs = []
|
|
3586
|
+
means_dict = defaultdict(list)
|
|
3587
|
+
|
|
3588
|
+
for (trial_index, arm_name), _, _ in selected_points:
|
|
3589
|
+
row = csv_rows.get(trial_index, {})
|
|
3590
|
+
if row == {} or row is None or row['arm_name'] != arm_name:
|
|
3591
|
+
continue
|
|
3592
|
+
|
|
3593
|
+
idxs.append(int(row["trial_index"]))
|
|
3594
|
+
|
|
3595
|
+
param_dict: dict[str, int | float | str] = {}
|
|
3596
|
+
for key, value in row.items():
|
|
3597
|
+
if key not in ignored_columns:
|
|
3598
|
+
try:
|
|
3599
|
+
param_dict[key] = int(value)
|
|
3600
|
+
except ValueError:
|
|
3601
|
+
try:
|
|
3602
|
+
param_dict[key] = float(value)
|
|
3603
|
+
except ValueError:
|
|
3604
|
+
param_dict[key] = value
|
|
3605
|
+
|
|
3606
|
+
param_dicts.append(param_dict)
|
|
3607
|
+
|
|
3608
|
+
for metric in absolute_metrics:
|
|
3609
|
+
means_dict[metric].append(records[(trial_index, arm_name)]['means'].get(metric, float("nan")))
|
|
3610
|
+
|
|
3611
|
+
ret = {
|
|
3612
|
+
primary_name: {
|
|
3613
|
+
secondary_name: {
|
|
3614
|
+
"absolute_metrics": absolute_metrics,
|
|
3615
|
+
"param_dicts": param_dicts,
|
|
3616
|
+
"means": dict(means_dict),
|
|
3617
|
+
"idxs": idxs
|
|
3618
|
+
},
|
|
3619
|
+
"absolute_metrics": absolute_metrics
|
|
3620
|
+
}
|
|
3621
|
+
}
|
|
3622
|
+
|
|
3623
|
+
return ret
|
|
3624
|
+
|
|
3625
|
+
def pareto_front_aggregate_data(path_to_calculate: str) -> Optional[Dict[Tuple[int, str], Dict[str, Dict[str, float]]]]:
|
|
3626
|
+
results_csv_file = f"{path_to_calculate}/{RESULTS_CSV_FILENAME}"
|
|
3627
|
+
result_names_file = f"{path_to_calculate}/result_names.txt"
|
|
3628
|
+
|
|
3629
|
+
if not os.path.exists(results_csv_file) or not os.path.exists(result_names_file):
|
|
3630
|
+
return None
|
|
3631
|
+
|
|
3632
|
+
with open(result_names_file, mode="r", encoding="utf-8") as f:
|
|
3633
|
+
result_names = [line.strip() for line in f if line.strip()]
|
|
3634
|
+
|
|
3635
|
+
records: dict = defaultdict(lambda: {'means': {}})
|
|
3636
|
+
|
|
3637
|
+
with open(results_csv_file, encoding="utf-8", mode="r", newline='') as csvfile:
|
|
3638
|
+
reader = csv.DictReader(csvfile)
|
|
3639
|
+
for row in reader:
|
|
3640
|
+
trial_index = int(row['trial_index'])
|
|
3641
|
+
arm_name = row['arm_name']
|
|
3642
|
+
key = (trial_index, arm_name)
|
|
3643
|
+
|
|
3644
|
+
for metric in result_names:
|
|
3645
|
+
if metric in row:
|
|
3646
|
+
try:
|
|
3647
|
+
records[key]['means'][metric] = float(row[metric])
|
|
3648
|
+
except ValueError:
|
|
3649
|
+
continue
|
|
3650
|
+
|
|
3651
|
+
return records
|
|
3652
|
+
|
|
3653
|
+
def plot_pareto_frontier_sixel(data: Any, x_metric: str, y_metric: str) -> None:
|
|
3654
|
+
if data is None:
|
|
3655
|
+
print("[italic yellow]The data seems to be empty. Cannot plot pareto frontier.[/]")
|
|
3656
|
+
return
|
|
3657
|
+
|
|
3658
|
+
if not supports_sixel():
|
|
3659
|
+
print(f"[italic yellow]Your console does not support sixel-images. Will not print Pareto-frontier as a matplotlib-sixel-plot for {x_metric}/{y_metric}.[/]")
|
|
3660
|
+
return
|
|
3661
|
+
|
|
3662
|
+
import matplotlib.pyplot as plt
|
|
3663
|
+
|
|
3664
|
+
means = data[x_metric][y_metric]["means"]
|
|
3665
|
+
|
|
3666
|
+
x_values = means[x_metric]
|
|
3667
|
+
y_values = means[y_metric]
|
|
3668
|
+
|
|
3669
|
+
fig, _ax = plt.subplots()
|
|
3670
|
+
|
|
3671
|
+
_ax.scatter(x_values, y_values, s=50, marker='x', c='blue', label='Data Points')
|
|
3672
|
+
|
|
3673
|
+
_ax.set_xlabel(x_metric)
|
|
3674
|
+
_ax.set_ylabel(y_metric)
|
|
3675
|
+
|
|
3676
|
+
_ax.set_title(f'Pareto-Front {x_metric}/{y_metric}')
|
|
3677
|
+
|
|
3678
|
+
_ax.ticklabel_format(style='plain', axis='both', useOffset=False)
|
|
3679
|
+
|
|
3680
|
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp_file:
|
|
3681
|
+
plt.savefig(tmp_file.name, dpi=300)
|
|
3682
|
+
|
|
3683
|
+
print_image_to_cli(tmp_file.name, 1000)
|
|
3684
|
+
|
|
3685
|
+
plt.close(fig)
|
|
3686
|
+
|
|
3687
|
+
def pareto_front_table_get_columns(first_row: Dict[str, str]) -> Tuple[List[str], List[str]]:
|
|
3688
|
+
all_columns = list(first_row.keys())
|
|
3689
|
+
ignored_cols = set(special_col_names) - {"trial_index"}
|
|
3690
|
+
|
|
3691
|
+
param_cols = [col for col in all_columns if col not in ignored_cols and col not in arg_result_names and not col.startswith("OO_Info_")]
|
|
3692
|
+
result_cols = [col for col in arg_result_names if col in all_columns]
|
|
3693
|
+
return param_cols, result_cols
|
|
3694
|
+
|
|
3695
|
+
def check_factorial_range() -> None:
|
|
3696
|
+
if args.model and args.model == "FACTORIAL":
|
|
3697
|
+
_fatal_error("\n⚠ --model FACTORIAL cannot be used with range parameter", 181)
|
|
3698
|
+
|
|
3699
|
+
def check_if_range_types_are_invalid(value_type: str, valid_value_types: list) -> None:
|
|
3700
|
+
if value_type not in valid_value_types:
|
|
3701
|
+
valid_value_types_string = ", ".join(valid_value_types)
|
|
3702
|
+
_fatal_error(f"⚠ {value_type} is not a valid value type. Valid types for range are: {valid_value_types_string}", 181)
|
|
3703
|
+
|
|
3704
|
+
def check_range_params_length(this_args: Union[str, list]) -> None:
|
|
3705
|
+
if len(this_args) != 5 and len(this_args) != 4 and len(this_args) != 6:
|
|
3706
|
+
_fatal_error("\n⚠ --parameter for type range must have 4 (or 5, the last one being optional and float by default, or 6, while the last one is true or false) parameters: <NAME> range <START> <END> (<TYPE (int or float)>, <log_scale: bool>)", 181)
|
|
3707
|
+
|
|
3708
|
+
def die_if_lower_and_upper_bound_equal_zero(lower_bound: Union[int, float], upper_bound: Union[int, float]) -> None:
|
|
3709
|
+
if upper_bound is None or lower_bound is None:
|
|
3710
|
+
_fatal_error("die_if_lower_and_upper_bound_equal_zero: upper_bound or lower_bound is None. Cannot continue.", 91)
|
|
3711
|
+
if upper_bound == lower_bound:
|
|
3712
|
+
if lower_bound == 0:
|
|
3713
|
+
_fatal_error(f"⚠ Lower bound and upper bound are equal: {lower_bound}, cannot automatically fix this, because they -0 = +0 (usually a quickfix would be to set lower_bound = -upper_bound)", 181)
|
|
3714
|
+
print_red(f"⚠ Lower bound and upper bound are equal: {lower_bound}, setting lower_bound = -upper_bound")
|
|
3715
|
+
if upper_bound is not None:
|
|
3716
|
+
lower_bound = -upper_bound
|
|
3717
|
+
|
|
3718
|
+
def format_value(value: Any, float_format: str = '.80f') -> str:
|
|
3719
|
+
try:
|
|
3720
|
+
if isinstance(value, float):
|
|
3721
|
+
s = format(value, float_format)
|
|
3722
|
+
s = s.rstrip('0').rstrip('.') if '.' in s else s
|
|
3723
|
+
return s
|
|
3724
|
+
return str(value)
|
|
3725
|
+
except Exception as e:
|
|
3726
|
+
print_red(f"⚠ Error formatting the number {value}: {e}")
|
|
3727
|
+
return str(value)
|
|
3728
|
+
|
|
3729
|
+
def replace_parameters_in_string(
|
|
3730
|
+
parameters: dict,
|
|
3731
|
+
input_string: str,
|
|
3732
|
+
float_format: str = '.20f',
|
|
3733
|
+
additional_prefixes: list[str] = [],
|
|
3734
|
+
additional_patterns: list[str] = [],
|
|
3735
|
+
) -> str:
|
|
3736
|
+
try:
|
|
3737
|
+
prefixes = ['$', '%'] + additional_prefixes
|
|
3738
|
+
patterns = ['{' + 'key' + '}', '(' + '{' + 'key' + '}' + ')'] + additional_patterns
|
|
3739
|
+
|
|
3740
|
+
for key, value in parameters.items():
|
|
3741
|
+
replacement = format_value(value, float_format=float_format)
|
|
3742
|
+
for prefix in prefixes:
|
|
3743
|
+
for pattern in patterns:
|
|
3744
|
+
token = prefix + pattern.format(key=key)
|
|
3745
|
+
input_string = input_string.replace(token, replacement)
|
|
3746
|
+
|
|
3747
|
+
input_string = input_string.replace('\r', ' ').replace('\n', ' ')
|
|
3748
|
+
return input_string
|
|
3749
|
+
|
|
3750
|
+
except Exception as e:
|
|
3751
|
+
print_red(f"\n⚠ Error: {e}")
|
|
3752
|
+
return ""
|
|
3753
|
+
|
|
3754
|
+
def get_memory_usage() -> float:
|
|
3755
|
+
user_uid = os.getuid()
|
|
3756
|
+
|
|
3757
|
+
memory_usage = float(sum(
|
|
3758
|
+
p.memory_info().rss for p in psutil.process_iter(attrs=['memory_info', 'uids'])
|
|
3759
|
+
if p.info['uids'].real == user_uid
|
|
3760
|
+
) / (1024 * 1024))
|
|
3761
|
+
|
|
3762
|
+
return memory_usage
|
|
3763
|
+
|
|
3764
|
+
class MonitorProcess:
|
|
3765
|
+
def __init__(self: Any, pid: int, interval: float = 1.0) -> None:
|
|
3766
|
+
self.pid = pid
|
|
3767
|
+
self.interval = interval
|
|
3768
|
+
self.running = True
|
|
3769
|
+
self.thread = threading.Thread(target=self._monitor)
|
|
3770
|
+
self.thread.daemon = True
|
|
3771
|
+
|
|
3772
|
+
fool_linter(f"self.thread.daemon was set to {self.thread.daemon}")
|
|
3773
|
+
|
|
3774
|
+
def _monitor(self: Any) -> None:
|
|
3775
|
+
try:
|
|
3776
|
+
_internal_process = psutil.Process(self.pid)
|
|
3777
|
+
while self.running and _internal_process.is_running():
|
|
3778
|
+
crf = get_current_run_folder()
|
|
3779
|
+
|
|
3780
|
+
if crf and crf != "":
|
|
3781
|
+
log_file_path = os.path.join(crf, "eval_nodes_cpu_ram_logs.txt")
|
|
3782
|
+
|
|
3783
|
+
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
|
|
3784
|
+
|
|
3785
|
+
with open(log_file_path, mode="a", encoding="utf-8") as log_file:
|
|
3786
|
+
hostname = socket.gethostname()
|
|
3787
|
+
|
|
3788
|
+
slurm_job_id = os.getenv("SLURM_JOB_ID")
|
|
3789
|
+
|
|
3790
|
+
if slurm_job_id:
|
|
3791
|
+
hostname += f"-SLURM-ID-{slurm_job_id}"
|
|
3792
|
+
|
|
3793
|
+
total_memory = psutil.virtual_memory().total / (1024 * 1024)
|
|
3794
|
+
cpu_usage = psutil.cpu_percent(interval=5)
|
|
3795
|
+
|
|
3796
|
+
memory_usage = get_memory_usage()
|
|
3797
|
+
|
|
3798
|
+
unix_timestamp = int(time.time())
|
|
3799
|
+
|
|
3800
|
+
log_file.write(f"\nUnix-Timestamp: {unix_timestamp}, Hostname: {hostname}, CPU: {cpu_usage:.2f}%, RAM: {memory_usage:.2f} MB / {total_memory:.2f} MB\n")
|
|
3801
|
+
time.sleep(self.interval)
|
|
3802
|
+
except psutil.NoSuchProcess:
|
|
3803
|
+
pass
|
|
3804
|
+
|
|
3805
|
+
def __enter__(self: Any) -> None:
|
|
3806
|
+
self.thread.start()
|
|
3807
|
+
return self
|
|
3808
|
+
|
|
3809
|
+
def __exit__(self: Any, exc_type: Any, exc_value: Any, _traceback: Any) -> None:
|
|
3810
|
+
self.running = False
|
|
3811
|
+
self.thread.join()
|
|
3812
|
+
|
|
3813
|
+
def execute_bash_code_log_time(code: str) -> list:
|
|
3814
|
+
process_item = subprocess.Popen(code, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
3815
|
+
|
|
3816
|
+
with MonitorProcess(process_item.pid):
|
|
3817
|
+
try:
|
|
3818
|
+
stdout, stderr = process_item.communicate()
|
|
3819
|
+
result = subprocess.CompletedProcess(
|
|
3820
|
+
args=code, returncode=process_item.returncode, stdout=stdout, stderr=stderr
|
|
3821
|
+
)
|
|
3822
|
+
return [result.stdout, result.stderr, result.returncode, None]
|
|
3823
|
+
except subprocess.CalledProcessError as e:
|
|
3824
|
+
real_exit_code = e.returncode
|
|
3825
|
+
signal_code = None
|
|
3826
|
+
if real_exit_code < 0:
|
|
3827
|
+
signal_code = abs(e.returncode)
|
|
3828
|
+
real_exit_code = 1
|
|
3829
|
+
return [e.stdout, e.stderr, real_exit_code, signal_code]
|
|
3830
|
+
|
|
3831
|
+
def execute_bash_code(code: str) -> list:
|
|
3832
|
+
try:
|
|
3833
|
+
result = subprocess.run(
|
|
3834
|
+
code,
|
|
3835
|
+
shell=True,
|
|
3836
|
+
check=True,
|
|
3837
|
+
text=True,
|
|
3838
|
+
capture_output=True
|
|
3839
|
+
)
|
|
3840
|
+
|
|
3841
|
+
if result.returncode != 0:
|
|
3842
|
+
print(f"Exit-Code: {result.returncode}")
|
|
3843
|
+
|
|
3844
|
+
real_exit_code = result.returncode
|
|
3845
|
+
|
|
3846
|
+
signal_code = None
|
|
3847
|
+
if real_exit_code < 0:
|
|
3848
|
+
signal_code = abs(result.returncode)
|
|
3849
|
+
real_exit_code = 1
|
|
3850
|
+
|
|
3851
|
+
return [result.stdout, result.stderr, real_exit_code, signal_code]
|
|
3852
|
+
|
|
3853
|
+
except subprocess.CalledProcessError as e:
|
|
3854
|
+
real_exit_code = e.returncode
|
|
3855
|
+
|
|
3856
|
+
signal_code = None
|
|
3857
|
+
if real_exit_code < 0:
|
|
3858
|
+
signal_code = abs(e.returncode)
|
|
3859
|
+
real_exit_code = 1
|
|
3860
|
+
|
|
3861
|
+
if not args.tests:
|
|
3862
|
+
print(f"Error at execution of your program: {code}. Exit-Code: {real_exit_code}, Signal-Code: {signal_code}")
|
|
3863
|
+
if len(e.stdout):
|
|
3864
|
+
print(f"stdout: {e.stdout}")
|
|
3865
|
+
else:
|
|
3866
|
+
print("No stdout")
|
|
3867
|
+
|
|
3868
|
+
if len(e.stderr):
|
|
3869
|
+
print(f"stderr: {e.stderr}")
|
|
3870
|
+
else:
|
|
3871
|
+
print("No stderr")
|
|
3872
|
+
|
|
3873
|
+
return [e.stdout, e.stderr, real_exit_code, signal_code]
|
|
3874
|
+
|
|
3875
|
+
def get_results(input_string: Optional[Union[int, str]]) -> Optional[Union[Dict[str, Optional[float]], List[float]]]:
|
|
3876
|
+
if input_string is None:
|
|
3877
|
+
if not args.tests:
|
|
3878
|
+
print_red("get_results: Input-String is None")
|
|
3540
3879
|
return None
|
|
3541
3880
|
|
|
3542
3881
|
if not isinstance(input_string, str):
|
|
@@ -3604,7 +3943,7 @@ def _add_to_csv_acquire_lock(lockfile: str, dir_path: str) -> bool:
|
|
|
3604
3943
|
time.sleep(wait_time)
|
|
3605
3944
|
max_wait -= wait_time
|
|
3606
3945
|
except Exception as e:
|
|
3607
|
-
|
|
3946
|
+
print_red(f"Lock error: {e}")
|
|
3608
3947
|
return False
|
|
3609
3948
|
return False
|
|
3610
3949
|
|
|
@@ -3677,12 +4016,12 @@ def find_file_paths(_text: str) -> List[str]:
|
|
|
3677
4016
|
def check_file_info(file_path: str) -> str:
|
|
3678
4017
|
if not os.path.exists(file_path):
|
|
3679
4018
|
if not args.tests:
|
|
3680
|
-
|
|
4019
|
+
print_red(f"check_file_info: The file {file_path} does not exist.")
|
|
3681
4020
|
return ""
|
|
3682
4021
|
|
|
3683
4022
|
if not os.access(file_path, os.R_OK):
|
|
3684
4023
|
if not args.tests:
|
|
3685
|
-
|
|
4024
|
+
print_red(f"check_file_info: The file {file_path} is not readable.")
|
|
3686
4025
|
return ""
|
|
3687
4026
|
|
|
3688
4027
|
file_stat = os.stat(file_path)
|
|
@@ -3796,7 +4135,7 @@ def count_defective_nodes(file_path: Union[str, None] = None, entry: Any = None)
|
|
|
3796
4135
|
return sorted(set(entries))
|
|
3797
4136
|
|
|
3798
4137
|
except Exception as e:
|
|
3799
|
-
|
|
4138
|
+
print_red(f"An error has occurred: {e}")
|
|
3800
4139
|
return []
|
|
3801
4140
|
|
|
3802
4141
|
def test_gpu_before_evaluate(return_in_case_of_error: dict) -> Union[None, dict]:
|
|
@@ -3807,7 +4146,7 @@ def test_gpu_before_evaluate(return_in_case_of_error: dict) -> Union[None, dict]
|
|
|
3807
4146
|
|
|
3808
4147
|
fool_linter(tmp)
|
|
3809
4148
|
except RuntimeError:
|
|
3810
|
-
|
|
4149
|
+
print_red(f"Node {socket.gethostname()} was detected as faulty. It should have had a GPU, but there is an error initializing the CUDA driver. Adding this node to the --exclude list.")
|
|
3811
4150
|
count_defective_nodes(None, socket.gethostname())
|
|
3812
4151
|
return return_in_case_of_error
|
|
3813
4152
|
except Exception:
|
|
@@ -4517,7 +4856,7 @@ def replace_string_with_params(input_string: str, params: list) -> str:
|
|
|
4517
4856
|
return replaced_string
|
|
4518
4857
|
except AssertionError as e:
|
|
4519
4858
|
error_text = f"Error in replace_string_with_params: {e}"
|
|
4520
|
-
|
|
4859
|
+
print_red(error_text)
|
|
4521
4860
|
raise
|
|
4522
4861
|
|
|
4523
4862
|
return ""
|
|
@@ -4794,9 +5133,7 @@ def get_sixel_graphics_data(_pd_csv: str, _force: bool = False) -> list:
|
|
|
4794
5133
|
_params = [_command, plot, _tmp, plot_type, tmp_file, _width]
|
|
4795
5134
|
data.append(_params)
|
|
4796
5135
|
except Exception as e:
|
|
4797
|
-
|
|
4798
|
-
print_red(f"Error trying to print {plot_type} to CLI: {e}, {tb}")
|
|
4799
|
-
print_debug(f"Error trying to print {plot_type} to CLI: {e}")
|
|
5136
|
+
print_red(f"Error trying to print {plot_type} to CLI: {e}")
|
|
4800
5137
|
|
|
4801
5138
|
return data
|
|
4802
5139
|
|
|
@@ -4997,8 +5334,7 @@ def abandon_job(job: Job, trial_index: int, reason: str) -> bool:
|
|
|
4997
5334
|
else:
|
|
4998
5335
|
_fatal_error("ax_client could not be found", 101)
|
|
4999
5336
|
except Exception as e:
|
|
5000
|
-
|
|
5001
|
-
print_debug(f"ERROR in line {get_line_info()}: {e}")
|
|
5337
|
+
print_red(f"ERROR in line {get_line_info()}: {e}")
|
|
5002
5338
|
return False
|
|
5003
5339
|
job.cancel()
|
|
5004
5340
|
return True
|
|
@@ -5011,21 +5347,6 @@ def abandon_all_jobs() -> None:
|
|
|
5011
5347
|
if not abandoned:
|
|
5012
5348
|
print_debug(f"Job {job} could not be abandoned.")
|
|
5013
5349
|
|
|
5014
|
-
def show_pareto_or_error_msg(path_to_calculate: str, res_names: list = arg_result_names, disable_sixel_and_table: bool = False) -> None:
|
|
5015
|
-
if args.dryrun:
|
|
5016
|
-
print_debug("Not showing Pareto-frontier data with --dryrun")
|
|
5017
|
-
return None
|
|
5018
|
-
|
|
5019
|
-
if len(res_names) > 1:
|
|
5020
|
-
try:
|
|
5021
|
-
show_pareto_frontier_data(path_to_calculate, res_names, disable_sixel_and_table)
|
|
5022
|
-
except Exception as e:
|
|
5023
|
-
inner_tb = ''.join(traceback.format_exception(type(e), e, e.__traceback__))
|
|
5024
|
-
print_red(f"show_pareto_frontier_data() failed with exception '{e}':\n{inner_tb}")
|
|
5025
|
-
else:
|
|
5026
|
-
print_debug(f"show_pareto_frontier_data will NOT be executed because len(arg_result_names) is {len(arg_result_names)}")
|
|
5027
|
-
return None
|
|
5028
|
-
|
|
5029
5350
|
def end_program(_force: Optional[bool] = False, exit_code: Optional[int] = None) -> None:
|
|
5030
5351
|
global END_PROGRAM_RAN
|
|
5031
5352
|
|
|
@@ -5062,7 +5383,7 @@ def end_program(_force: Optional[bool] = False, exit_code: Optional[int] = None)
|
|
|
5062
5383
|
_exit = new_exit
|
|
5063
5384
|
except (SignalUSR, SignalINT, SignalCONT, KeyboardInterrupt):
|
|
5064
5385
|
print_red("\n⚠ You pressed CTRL+C or a signal was sent. Program execution halted while ending program.")
|
|
5065
|
-
|
|
5386
|
+
print_red("\n⚠ KeyboardInterrupt signal was sent. Ending program will still run.")
|
|
5066
5387
|
new_exit = show_end_table_and_save_end_files()
|
|
5067
5388
|
if new_exit > 0:
|
|
5068
5389
|
_exit = new_exit
|
|
@@ -5096,9 +5417,9 @@ def save_ax_client_to_json_file(checkpoint_filepath: str) -> None:
|
|
|
5096
5417
|
def save_checkpoint(trial_nr: int = 0, eee: Union[None, str, Exception] = None) -> None:
|
|
5097
5418
|
if trial_nr > 3:
|
|
5098
5419
|
if eee:
|
|
5099
|
-
|
|
5420
|
+
print_red(f"Error during saving checkpoint: {eee}")
|
|
5100
5421
|
else:
|
|
5101
|
-
|
|
5422
|
+
print_red("Error during saving checkpoint")
|
|
5102
5423
|
return
|
|
5103
5424
|
|
|
5104
5425
|
try:
|
|
@@ -5268,7 +5589,7 @@ def parse_equation_item(comparer_found: bool, item: str, parsed: list, parsed_or
|
|
|
5268
5589
|
})
|
|
5269
5590
|
elif item in [">=", "<="]:
|
|
5270
5591
|
if comparer_found:
|
|
5271
|
-
|
|
5592
|
+
print_red("There is already one comparison operator! Cannot have more than one in an equation!")
|
|
5272
5593
|
return_totally = True
|
|
5273
5594
|
comparer_found = True
|
|
5274
5595
|
|
|
@@ -5956,7 +6277,6 @@ def parse_single_experiment_parameter_table(classic_params: Optional[Union[list,
|
|
|
5956
6277
|
_upper = param["bounds"][1]
|
|
5957
6278
|
|
|
5958
6279
|
_possible_int_lower = str(helpers.to_int_when_possible(_lower))
|
|
5959
|
-
#print(f"name: {_name}, _possible_int_lower: {_possible_int_lower}, lower: {_lower}")
|
|
5960
6280
|
_possible_int_upper = str(helpers.to_int_when_possible(_upper))
|
|
5961
6281
|
|
|
5962
6282
|
rows.append([_name, _short_type, _possible_int_lower, _possible_int_upper, "", value_type, log_scale])
|
|
@@ -6189,7 +6509,7 @@ def update_progress_bar(nr: int) -> None:
|
|
|
6189
6509
|
try:
|
|
6190
6510
|
progress_bar.update(nr)
|
|
6191
6511
|
except Exception as e:
|
|
6192
|
-
|
|
6512
|
+
print_red(f"Error updating progress bar: {e}")
|
|
6193
6513
|
else:
|
|
6194
6514
|
print_red("update_progress_bar: progress_bar was None")
|
|
6195
6515
|
|
|
@@ -6629,7 +6949,7 @@ def get_generation_node_for_index(
|
|
|
6629
6949
|
|
|
6630
6950
|
return generation_node
|
|
6631
6951
|
except Exception as e:
|
|
6632
|
-
|
|
6952
|
+
print_red(f"Error while get_generation_node_for_index: {e}")
|
|
6633
6953
|
return "MANUAL"
|
|
6634
6954
|
|
|
6635
6955
|
def _get_generation_node_for_index_index_valid(
|
|
@@ -7244,7 +7564,7 @@ def get_parameters_from_outfile(stdout_path: str) -> Union[None, dict, str]:
|
|
|
7244
7564
|
if not args.tests:
|
|
7245
7565
|
original_print(f"get_parameters_from_outfile: The file '{stdout_path}' was not found.")
|
|
7246
7566
|
except Exception as e:
|
|
7247
|
-
|
|
7567
|
+
print_red(f"get_parameters_from_outfile: There was an error: {e}")
|
|
7248
7568
|
|
|
7249
7569
|
return None
|
|
7250
7570
|
|
|
@@ -7262,7 +7582,7 @@ def get_hostname_from_outfile(stdout_path: Optional[str]) -> Optional[str]:
|
|
|
7262
7582
|
original_print(f"The file '{stdout_path}' was not found.")
|
|
7263
7583
|
return None
|
|
7264
7584
|
except Exception as e:
|
|
7265
|
-
|
|
7585
|
+
print_red(f"There was an error: {e}")
|
|
7266
7586
|
return None
|
|
7267
7587
|
|
|
7268
7588
|
def add_to_global_error_list(msg: str) -> None:
|
|
@@ -7325,7 +7645,7 @@ def check_valid_result(result: Union[None, dict]) -> bool:
|
|
|
7325
7645
|
else:
|
|
7326
7646
|
values.append(obj)
|
|
7327
7647
|
except Exception as e:
|
|
7328
|
-
|
|
7648
|
+
print_red(f"Error while flattening values: {e}")
|
|
7329
7649
|
return values
|
|
7330
7650
|
|
|
7331
7651
|
if result is None:
|
|
@@ -7338,7 +7658,7 @@ def check_valid_result(result: Union[None, dict]) -> bool:
|
|
|
7338
7658
|
return False
|
|
7339
7659
|
return True
|
|
7340
7660
|
except Exception as e:
|
|
7341
|
-
|
|
7661
|
+
print_red(f"Error while checking result validity: {e}")
|
|
7342
7662
|
return False
|
|
7343
7663
|
|
|
7344
7664
|
def update_ax_client_trial(trial_idx: int, result: Union[list, dict]) -> None:
|
|
@@ -7474,7 +7794,7 @@ def finish_job_core(job: Any, trial_index: int, this_jobs_finished: int) -> int:
|
|
|
7474
7794
|
if len(arg_result_names) > 1 and count_done_jobs() > 1 and not job_calculate_pareto_front(get_current_run_folder(), True):
|
|
7475
7795
|
print_red("job_calculate_pareto_front post job failed")
|
|
7476
7796
|
except Exception as e:
|
|
7477
|
-
|
|
7797
|
+
print_red(f"ERROR in line {get_line_info()}: {e}")
|
|
7478
7798
|
else:
|
|
7479
7799
|
_finish_job_core_helper_mark_failure(job, trial_index, _trial)
|
|
7480
7800
|
else:
|
|
@@ -7813,7 +8133,7 @@ def handle_restart(stdout_path: str, trial_index: int) -> None:
|
|
|
7813
8133
|
if parameters:
|
|
7814
8134
|
orchestrator_start_trial(parameters, trial_index)
|
|
7815
8135
|
else:
|
|
7816
|
-
|
|
8136
|
+
print_red(f"Could not determine parameters from outfile {stdout_path} for restarting job")
|
|
7817
8137
|
|
|
7818
8138
|
def check_alternate_path(path: str) -> str:
|
|
7819
8139
|
if os.path.exists(path):
|
|
@@ -8009,7 +8329,7 @@ def cancel_failed_job(trial_index: int, new_job: Job) -> None:
|
|
|
8009
8329
|
else:
|
|
8010
8330
|
_fatal_error("ax_client not defined", 101)
|
|
8011
8331
|
except Exception as e:
|
|
8012
|
-
|
|
8332
|
+
print_red(f"ERROR in line {get_line_info()}: {e}")
|
|
8013
8333
|
new_job.cancel()
|
|
8014
8334
|
|
|
8015
8335
|
print_debug(f"cancel_failed_job: removing job {new_job}, trial_index: {trial_index}")
|
|
@@ -8309,7 +8629,7 @@ def mark_abandoned(trial: Any, reason: str, trial_index: int) -> None:
|
|
|
8309
8629
|
print_debug(f"[INFO] Marking trial {trial.index} ({trial.arm.name}) as abandoned, trial-index: {trial_index}. Reason: {reason}")
|
|
8310
8630
|
trial.mark_abandoned(reason)
|
|
8311
8631
|
except Exception as e:
|
|
8312
|
-
|
|
8632
|
+
print_red(f"[ERROR] Could not mark trial as abandoned: {e}")
|
|
8313
8633
|
|
|
8314
8634
|
def create_and_handle_trial(arm: Any) -> Optional[Tuple[int, float, bool]]:
|
|
8315
8635
|
if ax_client is None:
|
|
@@ -8836,10 +9156,10 @@ def parse_generation_strategy_string(gen_strat_str: str) -> tuple[list[dict[str,
|
|
|
8836
9156
|
|
|
8837
9157
|
for s in splitted_by_comma:
|
|
8838
9158
|
if "=" not in s:
|
|
8839
|
-
|
|
9159
|
+
print_red(f"'{s}' does not contain '='")
|
|
8840
9160
|
my_exit(123)
|
|
8841
9161
|
if s.count("=") != 1:
|
|
8842
|
-
|
|
9162
|
+
print_red(f"There can only be one '=' in the gen_strat_str's element '{s}'")
|
|
8843
9163
|
my_exit(123)
|
|
8844
9164
|
|
|
8845
9165
|
model_name, nr_str = s.split("=")
|
|
@@ -8849,13 +9169,13 @@ def parse_generation_strategy_string(gen_strat_str: str) -> tuple[list[dict[str,
|
|
|
8849
9169
|
_fatal_error(f"Model {matching_model} is not valid for custom generation strategy.", 56)
|
|
8850
9170
|
|
|
8851
9171
|
if not matching_model:
|
|
8852
|
-
|
|
9172
|
+
print_red(f"'{model_name}' not found in SUPPORTED_MODELS")
|
|
8853
9173
|
my_exit(123)
|
|
8854
9174
|
|
|
8855
9175
|
try:
|
|
8856
9176
|
nr = int(nr_str)
|
|
8857
9177
|
except ValueError:
|
|
8858
|
-
|
|
9178
|
+
print_red(f"Invalid number of generations '{nr_str}' for model '{model_name}'")
|
|
8859
9179
|
my_exit(123)
|
|
8860
9180
|
|
|
8861
9181
|
gen_strat_list.append({matching_model: nr})
|
|
@@ -9412,7 +9732,7 @@ def execute_nvidia_smi() -> None:
|
|
|
9412
9732
|
if not host:
|
|
9413
9733
|
print_debug("host not defined")
|
|
9414
9734
|
except Exception as e:
|
|
9415
|
-
|
|
9735
|
+
print_red(f"execute_nvidia_smi: An error occurred: {e}")
|
|
9416
9736
|
if is_slurm_job() and not args.force_local_execution:
|
|
9417
9737
|
_sleep(30)
|
|
9418
9738
|
|
|
@@ -9576,379 +9896,67 @@ def parse_orchestrator_file(_f: str, _test: bool = False) -> Union[dict, None]:
|
|
|
9576
9896
|
print_red(f"{key}-entry is not {expected_type.__name__} but {type(x[key])}")
|
|
9577
9897
|
die_orchestrator_exit_code_206(_test)
|
|
9578
9898
|
|
|
9579
|
-
for y in x["match_strings"]:
|
|
9580
|
-
if not isinstance(y, str):
|
|
9581
|
-
print_red("x['match_strings'] is not a string but {type(x['match_strings'])}")
|
|
9582
|
-
die_orchestrator_exit_code_206(_test)
|
|
9583
|
-
|
|
9584
|
-
return data
|
|
9585
|
-
except Exception as e:
|
|
9586
|
-
print(f"Error while parse_experiment_parameters({_f}): {e}")
|
|
9587
|
-
else:
|
|
9588
|
-
print_red(f"{_f} could not be found")
|
|
9589
|
-
|
|
9590
|
-
return None
|
|
9591
|
-
|
|
9592
|
-
def set_orchestrator() -> None:
|
|
9593
|
-
with spinner("Setting orchestrator..."):
|
|
9594
|
-
global orchestrator
|
|
9595
|
-
|
|
9596
|
-
if args.orchestrator_file:
|
|
9597
|
-
if SYSTEM_HAS_SBATCH:
|
|
9598
|
-
orchestrator = parse_orchestrator_file(args.orchestrator_file, False)
|
|
9599
|
-
else:
|
|
9600
|
-
print_yellow("--orchestrator_file will be ignored on non-sbatch-systems.")
|
|
9601
|
-
|
|
9602
|
-
def check_if_has_random_steps() -> None:
|
|
9603
|
-
if (not args.continue_previous_job and "--continue" not in sys.argv) and (args.num_random_steps == 0 or not args.num_random_steps) and args.model not in ["EXTERNAL_GENERATOR", "SOBOL", "PSEUDORANDOM"]:
|
|
9604
|
-
_fatal_error("You have no random steps set. This is only allowed in continued jobs. To start, you need either some random steps, or a continued run.", 233)
|
|
9605
|
-
|
|
9606
|
-
def add_exclude_to_defective_nodes() -> None:
|
|
9607
|
-
with spinner("Adding excluded nodes..."):
|
|
9608
|
-
if args.exclude:
|
|
9609
|
-
entries = [entry.strip() for entry in args.exclude.split(',')]
|
|
9610
|
-
|
|
9611
|
-
for entry in entries:
|
|
9612
|
-
count_defective_nodes(None, entry)
|
|
9613
|
-
|
|
9614
|
-
def check_max_eval(_max_eval: int) -> None:
|
|
9615
|
-
with spinner("Checking max_eval..."):
|
|
9616
|
-
if not _max_eval:
|
|
9617
|
-
_fatal_error("--max_eval needs to be set!", 19)
|
|
9618
|
-
|
|
9619
|
-
def parse_parameters() -> Any:
|
|
9620
|
-
cli_params_experiment_parameters = None
|
|
9621
|
-
if args.parameter:
|
|
9622
|
-
parse_experiment_parameters()
|
|
9623
|
-
cli_params_experiment_parameters = experiment_parameters
|
|
9624
|
-
|
|
9625
|
-
return cli_params_experiment_parameters
|
|
9626
|
-
|
|
9627
|
-
def create_pareto_front_table(idxs: List[int], metric_x: str, metric_y: str) -> Table:
|
|
9628
|
-
table = Table(title=f"Pareto-Front for {metric_y}/{metric_x}:", show_lines=True)
|
|
9629
|
-
|
|
9630
|
-
rows = pareto_front_table_read_csv()
|
|
9631
|
-
if not rows:
|
|
9632
|
-
table.add_column("No data found")
|
|
9633
|
-
return table
|
|
9634
|
-
|
|
9635
|
-
filtered_rows = pareto_front_table_filter_rows(rows, idxs)
|
|
9636
|
-
if not filtered_rows:
|
|
9637
|
-
table.add_column("No matching entries")
|
|
9638
|
-
return table
|
|
9639
|
-
|
|
9640
|
-
param_cols, result_cols = pareto_front_table_get_columns(filtered_rows[0])
|
|
9641
|
-
|
|
9642
|
-
pareto_front_table_add_headers(table, param_cols, result_cols)
|
|
9643
|
-
pareto_front_table_add_rows(table, filtered_rows, param_cols, result_cols)
|
|
9644
|
-
|
|
9645
|
-
return table
|
|
9646
|
-
|
|
9647
|
-
def pareto_front_table_read_csv() -> List[Dict[str, str]]:
|
|
9648
|
-
with open(RESULT_CSV_FILE, mode="r", encoding="utf-8", newline="") as f:
|
|
9649
|
-
return list(csv.DictReader(f))
|
|
9650
|
-
|
|
9651
|
-
def pareto_front_table_filter_rows(rows: List[Dict[str, str]], idxs: List[int]) -> List[Dict[str, str]]:
|
|
9652
|
-
result = []
|
|
9653
|
-
for row in rows:
|
|
9654
|
-
try:
|
|
9655
|
-
trial_index = int(row["trial_index"])
|
|
9656
|
-
except (KeyError, ValueError):
|
|
9657
|
-
continue
|
|
9658
|
-
|
|
9659
|
-
if row.get("trial_status", "").strip().upper() == "COMPLETED" and trial_index in idxs:
|
|
9660
|
-
result.append(row)
|
|
9661
|
-
return result
|
|
9662
|
-
|
|
9663
|
-
def pareto_front_table_get_columns(first_row: Dict[str, str]) -> Tuple[List[str], List[str]]:
|
|
9664
|
-
all_columns = list(first_row.keys())
|
|
9665
|
-
ignored_cols = set(special_col_names) - {"trial_index"}
|
|
9666
|
-
|
|
9667
|
-
param_cols = [col for col in all_columns if col not in ignored_cols and col not in arg_result_names and not col.startswith("OO_Info_")]
|
|
9668
|
-
result_cols = [col for col in arg_result_names if col in all_columns]
|
|
9669
|
-
return param_cols, result_cols
|
|
9670
|
-
|
|
9671
|
-
def pareto_front_table_add_headers(table: Table, param_cols: List[str], result_cols: List[str]) -> None:
|
|
9672
|
-
for col in param_cols:
|
|
9673
|
-
table.add_column(col, justify="center")
|
|
9674
|
-
for col in result_cols:
|
|
9675
|
-
table.add_column(Text(f"{col}", style="cyan"), justify="center")
|
|
9676
|
-
|
|
9677
|
-
def pareto_front_table_add_rows(table: Table, rows: List[Dict[str, str]], param_cols: List[str], result_cols: List[str]) -> None:
|
|
9678
|
-
for row in rows:
|
|
9679
|
-
values = [str(helpers.to_int_when_possible(row[col])) for col in param_cols]
|
|
9680
|
-
result_values = [Text(str(helpers.to_int_when_possible(row[col])), style="cyan") for col in result_cols]
|
|
9681
|
-
table.add_row(*values, *result_values, style="bold green")
|
|
9682
|
-
|
|
9683
|
-
def pareto_front_as_rich_table(idxs: list, metric_x: str, metric_y: str) -> Optional[Table]:
|
|
9684
|
-
if not os.path.exists(RESULT_CSV_FILE):
|
|
9685
|
-
print_debug(f"pareto_front_as_rich_table: File '{RESULT_CSV_FILE}' not found")
|
|
9686
|
-
return None
|
|
9687
|
-
|
|
9688
|
-
return create_pareto_front_table(idxs, metric_x, metric_y)
|
|
9689
|
-
|
|
9690
|
-
def supports_sixel() -> bool:
|
|
9691
|
-
term = os.environ.get("TERM", "").lower()
|
|
9692
|
-
if "xterm" in term or "mlterm" in term:
|
|
9693
|
-
return True
|
|
9694
|
-
|
|
9695
|
-
try:
|
|
9696
|
-
output = subprocess.run(["tput", "setab", "256"], capture_output=True, text=True, check=True)
|
|
9697
|
-
if output.returncode == 0 and "sixel" in output.stdout.lower():
|
|
9698
|
-
return True
|
|
9699
|
-
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
9700
|
-
pass
|
|
9701
|
-
|
|
9702
|
-
return False
|
|
9703
|
-
|
|
9704
|
-
def plot_pareto_frontier_sixel(data: Any, x_metric: str, y_metric: str) -> None:
|
|
9705
|
-
if data is None:
|
|
9706
|
-
print("[italic yellow]The data seems to be empty. Cannot plot pareto frontier.[/]")
|
|
9707
|
-
return
|
|
9708
|
-
|
|
9709
|
-
if not supports_sixel():
|
|
9710
|
-
print(f"[italic yellow]Your console does not support sixel-images. Will not print Pareto-frontier as a matplotlib-sixel-plot for {x_metric}/{y_metric}.[/]")
|
|
9711
|
-
return
|
|
9712
|
-
|
|
9713
|
-
import matplotlib.pyplot as plt
|
|
9714
|
-
|
|
9715
|
-
means = data[x_metric][y_metric]["means"]
|
|
9716
|
-
|
|
9717
|
-
x_values = means[x_metric]
|
|
9718
|
-
y_values = means[y_metric]
|
|
9719
|
-
|
|
9720
|
-
fig, _ax = plt.subplots()
|
|
9721
|
-
|
|
9722
|
-
_ax.scatter(x_values, y_values, s=50, marker='x', c='blue', label='Data Points')
|
|
9723
|
-
|
|
9724
|
-
_ax.set_xlabel(x_metric)
|
|
9725
|
-
_ax.set_ylabel(y_metric)
|
|
9726
|
-
|
|
9727
|
-
_ax.set_title(f'Pareto-Front {x_metric}/{y_metric}')
|
|
9728
|
-
|
|
9729
|
-
_ax.ticklabel_format(style='plain', axis='both', useOffset=False)
|
|
9730
|
-
|
|
9731
|
-
with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp_file:
|
|
9732
|
-
plt.savefig(tmp_file.name, dpi=300)
|
|
9733
|
-
|
|
9734
|
-
print_image_to_cli(tmp_file.name, 1000)
|
|
9735
|
-
|
|
9736
|
-
plt.close(fig)
|
|
9737
|
-
|
|
9738
|
-
def pareto_front_general_validate_shapes(x: np.ndarray, y: np.ndarray) -> None:
|
|
9739
|
-
if x.shape != y.shape:
|
|
9740
|
-
raise ValueError("Input arrays x and y must have the same shape.")
|
|
9741
|
-
|
|
9742
|
-
def pareto_front_general_compare(
|
|
9743
|
-
xi: float, yi: float, xj: float, yj: float,
|
|
9744
|
-
x_minimize: bool, y_minimize: bool
|
|
9745
|
-
) -> bool:
|
|
9746
|
-
x_better_eq = xj <= xi if x_minimize else xj >= xi
|
|
9747
|
-
y_better_eq = yj <= yi if y_minimize else yj >= yi
|
|
9748
|
-
x_strictly_better = xj < xi if x_minimize else xj > xi
|
|
9749
|
-
y_strictly_better = yj < yi if y_minimize else yj > yi
|
|
9750
|
-
|
|
9751
|
-
return bool(x_better_eq and y_better_eq and (x_strictly_better or y_strictly_better))
|
|
9752
|
-
|
|
9753
|
-
def pareto_front_general_find_dominated(
|
|
9754
|
-
x: np.ndarray, y: np.ndarray, x_minimize: bool, y_minimize: bool
|
|
9755
|
-
) -> np.ndarray:
|
|
9756
|
-
num_points = len(x)
|
|
9757
|
-
is_dominated = np.zeros(num_points, dtype=bool)
|
|
9758
|
-
|
|
9759
|
-
for i in range(num_points):
|
|
9760
|
-
for j in range(num_points):
|
|
9761
|
-
if i == j:
|
|
9762
|
-
continue
|
|
9763
|
-
|
|
9764
|
-
if pareto_front_general_compare(x[i], y[i], x[j], y[j], x_minimize, y_minimize):
|
|
9765
|
-
is_dominated[i] = True
|
|
9766
|
-
break
|
|
9767
|
-
|
|
9768
|
-
return is_dominated
|
|
9769
|
-
|
|
9770
|
-
def pareto_front_general(
|
|
9771
|
-
x: np.ndarray,
|
|
9772
|
-
y: np.ndarray,
|
|
9773
|
-
x_minimize: bool = True,
|
|
9774
|
-
y_minimize: bool = True
|
|
9775
|
-
) -> np.ndarray:
|
|
9776
|
-
try:
|
|
9777
|
-
pareto_front_general_validate_shapes(x, y)
|
|
9778
|
-
is_dominated = pareto_front_general_find_dominated(x, y, x_minimize, y_minimize)
|
|
9779
|
-
return np.where(~is_dominated)[0]
|
|
9780
|
-
except Exception as e:
|
|
9781
|
-
print("Error in pareto_front_general:", str(e))
|
|
9782
|
-
return np.array([], dtype=int)
|
|
9783
|
-
|
|
9784
|
-
def pareto_front_aggregate_data(path_to_calculate: str) -> Optional[Dict[Tuple[int, str], Dict[str, Dict[str, float]]]]:
|
|
9785
|
-
results_csv_file = f"{path_to_calculate}/{RESULTS_CSV_FILENAME}"
|
|
9786
|
-
result_names_file = f"{path_to_calculate}/result_names.txt"
|
|
9787
|
-
|
|
9788
|
-
if not os.path.exists(results_csv_file) or not os.path.exists(result_names_file):
|
|
9789
|
-
return None
|
|
9790
|
-
|
|
9791
|
-
with open(result_names_file, mode="r", encoding="utf-8") as f:
|
|
9792
|
-
result_names = [line.strip() for line in f if line.strip()]
|
|
9793
|
-
|
|
9794
|
-
records: dict = defaultdict(lambda: {'means': {}})
|
|
9795
|
-
|
|
9796
|
-
with open(results_csv_file, encoding="utf-8", mode="r", newline='') as csvfile:
|
|
9797
|
-
reader = csv.DictReader(csvfile)
|
|
9798
|
-
for row in reader:
|
|
9799
|
-
trial_index = int(row['trial_index'])
|
|
9800
|
-
arm_name = row['arm_name']
|
|
9801
|
-
key = (trial_index, arm_name)
|
|
9802
|
-
|
|
9803
|
-
for metric in result_names:
|
|
9804
|
-
if metric in row:
|
|
9805
|
-
try:
|
|
9806
|
-
records[key]['means'][metric] = float(row[metric])
|
|
9807
|
-
except ValueError:
|
|
9808
|
-
continue
|
|
9809
|
-
|
|
9810
|
-
return records
|
|
9811
|
-
|
|
9812
|
-
def pareto_front_filter_complete_points(
|
|
9813
|
-
path_to_calculate: str,
|
|
9814
|
-
records: Dict[Tuple[int, str], Dict[str, Dict[str, float]]],
|
|
9815
|
-
primary_name: str,
|
|
9816
|
-
secondary_name: str
|
|
9817
|
-
) -> List[Tuple[Tuple[int, str], float, float]]:
|
|
9818
|
-
points = []
|
|
9819
|
-
for key, metrics in records.items():
|
|
9820
|
-
means = metrics['means']
|
|
9821
|
-
if primary_name in means and secondary_name in means:
|
|
9822
|
-
x_val = means[primary_name]
|
|
9823
|
-
y_val = means[secondary_name]
|
|
9824
|
-
points.append((key, x_val, y_val))
|
|
9825
|
-
if len(points) == 0:
|
|
9826
|
-
raise ValueError(f"No full data points with both objectives found in {path_to_calculate}.")
|
|
9827
|
-
return points
|
|
9828
|
-
|
|
9829
|
-
def pareto_front_transform_objectives(
|
|
9830
|
-
points: List[Tuple[Any, float, float]],
|
|
9831
|
-
primary_name: str,
|
|
9832
|
-
secondary_name: str
|
|
9833
|
-
) -> Tuple[np.ndarray, np.ndarray]:
|
|
9834
|
-
primary_idx = arg_result_names.index(primary_name)
|
|
9835
|
-
secondary_idx = arg_result_names.index(secondary_name)
|
|
9836
|
-
|
|
9837
|
-
x = np.array([p[1] for p in points])
|
|
9838
|
-
y = np.array([p[2] for p in points])
|
|
9839
|
-
|
|
9840
|
-
if arg_result_min_or_max[primary_idx] == "max":
|
|
9841
|
-
x = -x
|
|
9842
|
-
elif arg_result_min_or_max[primary_idx] != "min":
|
|
9843
|
-
raise ValueError(f"Unknown mode for {primary_name}: {arg_result_min_or_max[primary_idx]}")
|
|
9844
|
-
|
|
9845
|
-
if arg_result_min_or_max[secondary_idx] == "max":
|
|
9846
|
-
y = -y
|
|
9847
|
-
elif arg_result_min_or_max[secondary_idx] != "min":
|
|
9848
|
-
raise ValueError(f"Unknown mode for {secondary_name}: {arg_result_min_or_max[secondary_idx]}")
|
|
9849
|
-
|
|
9850
|
-
return x, y
|
|
9851
|
-
|
|
9852
|
-
def pareto_front_select_pareto_points(
|
|
9853
|
-
x: np.ndarray,
|
|
9854
|
-
y: np.ndarray,
|
|
9855
|
-
x_minimize: bool,
|
|
9856
|
-
y_minimize: bool,
|
|
9857
|
-
points: List[Tuple[Any, float, float]],
|
|
9858
|
-
num_points: int
|
|
9859
|
-
) -> List[Tuple[Any, float, float]]:
|
|
9860
|
-
indices = pareto_front_general(x, y, x_minimize, y_minimize)
|
|
9861
|
-
sorted_indices = indices[np.argsort(x[indices])]
|
|
9862
|
-
sorted_indices = sorted_indices[:num_points]
|
|
9863
|
-
selected_points = [points[i] for i in sorted_indices]
|
|
9864
|
-
return selected_points
|
|
9865
|
-
|
|
9866
|
-
def pareto_front_build_return_structure(
|
|
9867
|
-
path_to_calculate: str,
|
|
9868
|
-
selected_points: List[Tuple[Any, float, float]],
|
|
9869
|
-
records: Dict[Tuple[int, str], Dict[str, Dict[str, float]]],
|
|
9870
|
-
absolute_metrics: List[str],
|
|
9871
|
-
primary_name: str,
|
|
9872
|
-
secondary_name: str
|
|
9873
|
-
) -> dict:
|
|
9874
|
-
results_csv_file = f"{path_to_calculate}/{RESULTS_CSV_FILENAME}"
|
|
9875
|
-
result_names_file = f"{path_to_calculate}/result_names.txt"
|
|
9876
|
-
|
|
9877
|
-
with open(result_names_file, mode="r", encoding="utf-8") as f:
|
|
9878
|
-
result_names = [line.strip() for line in f if line.strip()]
|
|
9879
|
-
|
|
9880
|
-
csv_rows = {}
|
|
9881
|
-
with open(results_csv_file, mode="r", encoding="utf-8", newline='') as csvfile:
|
|
9882
|
-
reader = csv.DictReader(csvfile)
|
|
9883
|
-
for row in reader:
|
|
9884
|
-
trial_index = int(row['trial_index'])
|
|
9885
|
-
csv_rows[trial_index] = row
|
|
9899
|
+
for y in x["match_strings"]:
|
|
9900
|
+
if not isinstance(y, str):
|
|
9901
|
+
print_red("x['match_strings'] is not a string but {type(x['match_strings'])}")
|
|
9902
|
+
die_orchestrator_exit_code_206(_test)
|
|
9886
9903
|
|
|
9887
|
-
|
|
9888
|
-
|
|
9904
|
+
return data
|
|
9905
|
+
except Exception as e:
|
|
9906
|
+
print_red(f"Error while parse_experiment_parameters({_f}): {e}")
|
|
9907
|
+
else:
|
|
9908
|
+
print_red(f"{_f} could not be found")
|
|
9889
9909
|
|
|
9890
|
-
|
|
9891
|
-
idxs = []
|
|
9892
|
-
means_dict = defaultdict(list)
|
|
9910
|
+
return None
|
|
9893
9911
|
|
|
9894
|
-
|
|
9895
|
-
|
|
9896
|
-
|
|
9897
|
-
print_debug(f"pareto_front_build_return_structure: trial_index '{trial_index}' could not be found and row returned as None")
|
|
9898
|
-
continue
|
|
9912
|
+
def set_orchestrator() -> None:
|
|
9913
|
+
with spinner("Setting orchestrator..."):
|
|
9914
|
+
global orchestrator
|
|
9899
9915
|
|
|
9900
|
-
|
|
9916
|
+
if args.orchestrator_file:
|
|
9917
|
+
if SYSTEM_HAS_SBATCH:
|
|
9918
|
+
orchestrator = parse_orchestrator_file(args.orchestrator_file, False)
|
|
9919
|
+
else:
|
|
9920
|
+
print_yellow("--orchestrator_file will be ignored on non-sbatch-systems.")
|
|
9901
9921
|
|
|
9902
|
-
|
|
9903
|
-
|
|
9904
|
-
|
|
9905
|
-
try:
|
|
9906
|
-
param_dict[key] = int(value)
|
|
9907
|
-
except ValueError:
|
|
9908
|
-
try:
|
|
9909
|
-
param_dict[key] = float(value)
|
|
9910
|
-
except ValueError:
|
|
9911
|
-
param_dict[key] = value
|
|
9922
|
+
def check_if_has_random_steps() -> None:
|
|
9923
|
+
if (not args.continue_previous_job and "--continue" not in sys.argv) and (args.num_random_steps == 0 or not args.num_random_steps) and args.model not in ["EXTERNAL_GENERATOR", "SOBOL", "PSEUDORANDOM"]:
|
|
9924
|
+
_fatal_error("You have no random steps set. This is only allowed in continued jobs. To start, you need either some random steps, or a continued run.", 233)
|
|
9912
9925
|
|
|
9913
|
-
|
|
9926
|
+
def add_exclude_to_defective_nodes() -> None:
|
|
9927
|
+
with spinner("Adding excluded nodes..."):
|
|
9928
|
+
if args.exclude:
|
|
9929
|
+
entries = [entry.strip() for entry in args.exclude.split(',')]
|
|
9914
9930
|
|
|
9915
|
-
|
|
9916
|
-
|
|
9931
|
+
for entry in entries:
|
|
9932
|
+
count_defective_nodes(None, entry)
|
|
9917
9933
|
|
|
9918
|
-
|
|
9919
|
-
|
|
9920
|
-
|
|
9921
|
-
|
|
9922
|
-
"param_dicts": param_dicts,
|
|
9923
|
-
"means": dict(means_dict),
|
|
9924
|
-
"idxs": idxs
|
|
9925
|
-
},
|
|
9926
|
-
"absolute_metrics": absolute_metrics
|
|
9927
|
-
}
|
|
9928
|
-
}
|
|
9934
|
+
def check_max_eval(_max_eval: int) -> None:
|
|
9935
|
+
with spinner("Checking max_eval..."):
|
|
9936
|
+
if not _max_eval:
|
|
9937
|
+
_fatal_error("--max_eval needs to be set!", 19)
|
|
9929
9938
|
|
|
9930
|
-
|
|
9939
|
+
def parse_parameters() -> Any:
|
|
9940
|
+
cli_params_experiment_parameters = None
|
|
9941
|
+
if args.parameter:
|
|
9942
|
+
parse_experiment_parameters()
|
|
9943
|
+
cli_params_experiment_parameters = experiment_parameters
|
|
9931
9944
|
|
|
9932
|
-
|
|
9933
|
-
path_to_calculate: str,
|
|
9934
|
-
primary_objective: str,
|
|
9935
|
-
secondary_objective: str,
|
|
9936
|
-
x_minimize: bool,
|
|
9937
|
-
y_minimize: bool,
|
|
9938
|
-
absolute_metrics: List[str],
|
|
9939
|
-
num_points: int
|
|
9940
|
-
) -> Optional[dict]:
|
|
9941
|
-
records = pareto_front_aggregate_data(path_to_calculate)
|
|
9945
|
+
return cli_params_experiment_parameters
|
|
9942
9946
|
|
|
9943
|
-
|
|
9944
|
-
|
|
9947
|
+
def supports_sixel() -> bool:
|
|
9948
|
+
term = os.environ.get("TERM", "").lower()
|
|
9949
|
+
if "xterm" in term or "mlterm" in term:
|
|
9950
|
+
return True
|
|
9945
9951
|
|
|
9946
|
-
|
|
9947
|
-
|
|
9948
|
-
|
|
9949
|
-
|
|
9952
|
+
try:
|
|
9953
|
+
output = subprocess.run(["tput", "setab", "256"], capture_output=True, text=True, check=True)
|
|
9954
|
+
if output.returncode == 0 and "sixel" in output.stdout.lower():
|
|
9955
|
+
return True
|
|
9956
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
9957
|
+
pass
|
|
9950
9958
|
|
|
9951
|
-
return
|
|
9959
|
+
return False
|
|
9952
9960
|
|
|
9953
9961
|
def save_experiment_state() -> None:
|
|
9954
9962
|
try:
|
|
@@ -9963,7 +9971,7 @@ def save_experiment_state() -> None:
|
|
|
9963
9971
|
def wait_for_state_file(state_path: str, min_size: int = 5, max_wait_seconds: int = 60) -> bool:
|
|
9964
9972
|
try:
|
|
9965
9973
|
if not os.path.exists(state_path):
|
|
9966
|
-
|
|
9974
|
+
print_debug(f"[ERROR] File '{state_path}' does not exist.")
|
|
9967
9975
|
return False
|
|
9968
9976
|
|
|
9969
9977
|
i = 0
|
|
@@ -10182,33 +10190,42 @@ def get_result_minimize_flag(path_to_calculate: str, resname: str) -> bool:
|
|
|
10182
10190
|
|
|
10183
10191
|
return minmax[index] == "min"
|
|
10184
10192
|
|
|
10185
|
-
def
|
|
10186
|
-
|
|
10193
|
+
def post_job_calculate_pareto_front() -> None:
|
|
10194
|
+
if not args.calculate_pareto_front_of_job:
|
|
10195
|
+
return
|
|
10187
10196
|
|
|
10188
|
-
|
|
10197
|
+
failure = False
|
|
10189
10198
|
|
|
10190
|
-
|
|
10199
|
+
_paths_to_calculate = []
|
|
10191
10200
|
|
|
10192
|
-
for
|
|
10193
|
-
|
|
10194
|
-
|
|
10195
|
-
metric_y = arg_result_names[j]
|
|
10201
|
+
for _path_to_calculate in list(set(args.calculate_pareto_front_of_job)):
|
|
10202
|
+
try:
|
|
10203
|
+
found_paths = find_results_paths(_path_to_calculate)
|
|
10196
10204
|
|
|
10197
|
-
|
|
10198
|
-
|
|
10205
|
+
for _fp in found_paths:
|
|
10206
|
+
if _fp not in _paths_to_calculate:
|
|
10207
|
+
_paths_to_calculate.append(_fp)
|
|
10208
|
+
except (FileNotFoundError, NotADirectoryError) as e:
|
|
10209
|
+
print_red(f"post_job_calculate_pareto_front: find_results_paths('{_path_to_calculate}') failed with {e}")
|
|
10199
10210
|
|
|
10200
|
-
|
|
10201
|
-
if metric_x not in pareto_front_data:
|
|
10202
|
-
pareto_front_data[metric_x] = {}
|
|
10211
|
+
failure = True
|
|
10203
10212
|
|
|
10204
|
-
|
|
10205
|
-
|
|
10206
|
-
|
|
10207
|
-
|
|
10208
|
-
print_red("Calculating Pareto-fronts was cancelled by pressing CTRL-c")
|
|
10209
|
-
skip = True
|
|
10213
|
+
for _path_to_calculate in _paths_to_calculate:
|
|
10214
|
+
for path_to_calculate in found_paths:
|
|
10215
|
+
if not job_calculate_pareto_front(path_to_calculate):
|
|
10216
|
+
failure = True
|
|
10210
10217
|
|
|
10211
|
-
|
|
10218
|
+
if failure:
|
|
10219
|
+
my_exit(24)
|
|
10220
|
+
|
|
10221
|
+
my_exit(0)
|
|
10222
|
+
|
|
10223
|
+
def pareto_front_as_rich_table(idxs: list, metric_x: str, metric_y: str) -> Optional[Table]:
|
|
10224
|
+
if not os.path.exists(RESULT_CSV_FILE):
|
|
10225
|
+
print_debug(f"pareto_front_as_rich_table: File '{RESULT_CSV_FILE}' not found")
|
|
10226
|
+
return None
|
|
10227
|
+
|
|
10228
|
+
return create_pareto_front_table(idxs, metric_x, metric_y)
|
|
10212
10229
|
|
|
10213
10230
|
def show_pareto_frontier_data(path_to_calculate: str, res_names: list, disable_sixel_and_table: bool = False) -> None:
|
|
10214
10231
|
if len(res_names) <= 1:
|
|
@@ -10552,112 +10569,6 @@ def find_results_paths(base_path: str) -> list:
|
|
|
10552
10569
|
|
|
10553
10570
|
return list(set(found_paths))
|
|
10554
10571
|
|
|
10555
|
-
def post_job_calculate_pareto_front() -> None:
|
|
10556
|
-
if not args.calculate_pareto_front_of_job:
|
|
10557
|
-
return
|
|
10558
|
-
|
|
10559
|
-
failure = False
|
|
10560
|
-
|
|
10561
|
-
_paths_to_calculate = []
|
|
10562
|
-
|
|
10563
|
-
for _path_to_calculate in list(set(args.calculate_pareto_front_of_job)):
|
|
10564
|
-
try:
|
|
10565
|
-
found_paths = find_results_paths(_path_to_calculate)
|
|
10566
|
-
|
|
10567
|
-
for _fp in found_paths:
|
|
10568
|
-
if _fp not in _paths_to_calculate:
|
|
10569
|
-
_paths_to_calculate.append(_fp)
|
|
10570
|
-
except (FileNotFoundError, NotADirectoryError) as e:
|
|
10571
|
-
print_red(f"post_job_calculate_pareto_front: find_results_paths('{_path_to_calculate}') failed with {e}")
|
|
10572
|
-
|
|
10573
|
-
failure = True
|
|
10574
|
-
|
|
10575
|
-
for _path_to_calculate in _paths_to_calculate:
|
|
10576
|
-
for path_to_calculate in found_paths:
|
|
10577
|
-
if not job_calculate_pareto_front(path_to_calculate):
|
|
10578
|
-
failure = True
|
|
10579
|
-
|
|
10580
|
-
if failure:
|
|
10581
|
-
my_exit(24)
|
|
10582
|
-
|
|
10583
|
-
my_exit(0)
|
|
10584
|
-
|
|
10585
|
-
def job_calculate_pareto_front(path_to_calculate: str, disable_sixel_and_table: bool = False) -> bool:
|
|
10586
|
-
pf_start_time = time.time()
|
|
10587
|
-
|
|
10588
|
-
if not path_to_calculate:
|
|
10589
|
-
return False
|
|
10590
|
-
|
|
10591
|
-
global CURRENT_RUN_FOLDER
|
|
10592
|
-
global RESULT_CSV_FILE
|
|
10593
|
-
global arg_result_names
|
|
10594
|
-
|
|
10595
|
-
if not path_to_calculate:
|
|
10596
|
-
print_red("Can only calculate pareto front of previous job when --calculate_pareto_front_of_job is set")
|
|
10597
|
-
return False
|
|
10598
|
-
|
|
10599
|
-
if not os.path.exists(path_to_calculate):
|
|
10600
|
-
print_red(f"Path '{path_to_calculate}' does not exist")
|
|
10601
|
-
return False
|
|
10602
|
-
|
|
10603
|
-
ax_client_json = f"{path_to_calculate}/state_files/ax_client.experiment.json"
|
|
10604
|
-
|
|
10605
|
-
if not os.path.exists(ax_client_json):
|
|
10606
|
-
print_red(f"Path '{ax_client_json}' not found")
|
|
10607
|
-
return False
|
|
10608
|
-
|
|
10609
|
-
checkpoint_file: str = f"{path_to_calculate}/state_files/checkpoint.json"
|
|
10610
|
-
if not os.path.exists(checkpoint_file):
|
|
10611
|
-
print_red(f"The checkpoint file '{checkpoint_file}' does not exist")
|
|
10612
|
-
return False
|
|
10613
|
-
|
|
10614
|
-
RESULT_CSV_FILE = f"{path_to_calculate}/{RESULTS_CSV_FILENAME}"
|
|
10615
|
-
if not os.path.exists(RESULT_CSV_FILE):
|
|
10616
|
-
print_red(f"{RESULT_CSV_FILE} not found")
|
|
10617
|
-
return False
|
|
10618
|
-
|
|
10619
|
-
res_names = []
|
|
10620
|
-
|
|
10621
|
-
res_names_file = f"{path_to_calculate}/result_names.txt"
|
|
10622
|
-
if not os.path.exists(res_names_file):
|
|
10623
|
-
print_red(f"File '{res_names_file}' does not exist")
|
|
10624
|
-
return False
|
|
10625
|
-
|
|
10626
|
-
try:
|
|
10627
|
-
with open(res_names_file, "r", encoding="utf-8") as file:
|
|
10628
|
-
lines = file.readlines()
|
|
10629
|
-
except Exception as e:
|
|
10630
|
-
print_red(f"Error reading file '{res_names_file}': {e}")
|
|
10631
|
-
return False
|
|
10632
|
-
|
|
10633
|
-
for line in lines:
|
|
10634
|
-
entry = line.strip()
|
|
10635
|
-
if entry != "":
|
|
10636
|
-
res_names.append(entry)
|
|
10637
|
-
|
|
10638
|
-
if len(res_names) < 2:
|
|
10639
|
-
print_red(f"Error: There are less than 2 result names (is: {len(res_names)}, {', '.join(res_names)}) in {path_to_calculate}. Cannot continue calculating the pareto front.")
|
|
10640
|
-
return False
|
|
10641
|
-
|
|
10642
|
-
load_username_to_args(path_to_calculate)
|
|
10643
|
-
|
|
10644
|
-
CURRENT_RUN_FOLDER = path_to_calculate
|
|
10645
|
-
|
|
10646
|
-
arg_result_names = res_names
|
|
10647
|
-
|
|
10648
|
-
load_experiment_parameters_from_checkpoint_file(checkpoint_file, False)
|
|
10649
|
-
|
|
10650
|
-
if experiment_parameters is None:
|
|
10651
|
-
return False
|
|
10652
|
-
|
|
10653
|
-
show_pareto_or_error_msg(path_to_calculate, res_names, disable_sixel_and_table)
|
|
10654
|
-
|
|
10655
|
-
pf_end_time = time.time()
|
|
10656
|
-
|
|
10657
|
-
print_debug(f"Calculating the Pareto-front took {pf_end_time - pf_start_time} seconds")
|
|
10658
|
-
|
|
10659
|
-
return True
|
|
10660
|
-
|
|
10661
10572
|
def set_arg_states_from_continue() -> None:
|
|
10662
10573
|
if args.continue_previous_job and not args.num_random_steps:
|
|
10663
10574
|
num_random_steps_file = f"{args.continue_previous_job}/state_files/num_random_steps"
|
|
@@ -10738,6 +10649,12 @@ def show_omniopt_call() -> None:
|
|
|
10738
10649
|
|
|
10739
10650
|
original_print(oo_call + " " + cleaned)
|
|
10740
10651
|
|
|
10652
|
+
if args.dependency is not None and args.dependency != "":
|
|
10653
|
+
print(f"Dependency: {args.dependency}")
|
|
10654
|
+
|
|
10655
|
+
if args.ui_url is not None and args.ui_url != "":
|
|
10656
|
+
print_yellow("--ui_url is deprecated. Do not use it anymore. It will be ignored and one day be removed.")
|
|
10657
|
+
|
|
10741
10658
|
def main() -> None:
|
|
10742
10659
|
global RESULT_CSV_FILE, LOGFILE_DEBUG_GET_NEXT_TRIALS
|
|
10743
10660
|
|