dragon-ml-toolbox 19.6.0__py3-none-any.whl → 19.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,7 @@ from evotorch.operators import functional as func_ops
20
20
  from ._SQL import DragonSQL
21
21
  from ._ML_inference import DragonInferenceHandler
22
22
  from ._ML_chaining_inference import DragonChainInference
23
+ from ._ML_configuration import DragonParetoConfig
23
24
  from ._optimization_tools import create_optimization_bounds, plot_optimal_feature_distributions_from_dataframe
24
25
  from ._math_utilities import discretize_categorical_values
25
26
  from ._utilities import save_dataframe_filename
@@ -57,26 +58,21 @@ class DragonParetoOptimizer:
57
58
  def __init__(self,
58
59
  inference_handler: Union[DragonInferenceHandler, DragonChainInference],
59
60
  schema: FeatureSchema,
60
- target_objectives: Dict[str, Literal["min", "max"]],
61
- continuous_bounds_map: Union[Dict[str, Tuple[float, float]], Dict[str, List[float]]],
62
- population_size: int = 400,
63
- discretize_start_at_zero: bool = True):
61
+ config: DragonParetoConfig):
64
62
  """
65
63
  Initialize the Pareto Optimizer.
66
64
 
67
65
  Args:
68
66
  inference_handler (DragonInferenceHandler | DragonChainInference): Validated model handler.
69
67
  schema (FeatureSchema): Feature schema for bounds and types.
70
- target_objectives (Dict[str, "min"|"max"]): Dictionary mapping target names to optimization direction.
71
- Example: {"price": "max", "error": "min"}
72
- continuous_bounds_map (Dict): Bounds for continuous features {name: (min, max)}.
73
- population_size (int): Size of the genetic population.
74
- discretize_start_at_zero (bool): Categorical encoding start index.
68
+ config (DragonParetoConfig): Configuration for the Pareto optimizer.
75
69
  """
76
70
  self.inference_handler = inference_handler
77
71
  self.schema = schema
78
- self.target_objectives = target_objectives
79
- self.discretize_start_at_zero = discretize_start_at_zero
72
+ self.config = config
73
+
74
+ self.target_objectives = config.target_objectives
75
+ self.discretize_start_at_zero = config.discretize_start_at_zero
80
76
 
81
77
  # Initialize state for results
82
78
  self.pareto_front: Optional[pd.DataFrame] = None
@@ -106,7 +102,7 @@ class DragonParetoOptimizer:
106
102
 
107
103
  available_targets = self.inference_handler.target_ids
108
104
 
109
- for name, direction in target_objectives.items():
105
+ for name, direction in self.target_objectives.items():
110
106
  if name not in available_targets:
111
107
  _LOGGER.error(f"Target '{name}' not found in model targets: {available_targets}")
112
108
  raise ValueError()
@@ -124,8 +120,8 @@ class DragonParetoOptimizer:
124
120
  # Uses the external tool which reads the schema to set correct bounds for both continuous and categorical
125
121
  bounds = create_optimization_bounds(
126
122
  schema=schema,
127
- continuous_bounds_map=continuous_bounds_map,
128
- start_at_zero=discretize_start_at_zero
123
+ continuous_bounds_map=config.continuous_bounds_map,
124
+ start_at_zero=self.discretize_start_at_zero
129
125
  )
130
126
  self.lower_bounds = list(bounds[0])
131
127
  self.upper_bounds = list(bounds[1])
@@ -136,7 +132,7 @@ class DragonParetoOptimizer:
136
132
  target_indices=self.target_indices, # Used by Standard Handler
137
133
  target_names=self.ordered_target_names, # Used by Chain Handler
138
134
  categorical_index_map=schema.categorical_index_map,
139
- discretize_start_at_zero=discretize_start_at_zero,
135
+ discretize_start_at_zero=self.discretize_start_at_zero,
140
136
  is_chain=self.is_chain
141
137
  )
142
138
 
@@ -155,7 +151,7 @@ class DragonParetoOptimizer:
155
151
  # GeneticAlgorithm. It automatically applies NSGA-II logic (Pareto sorting) when problem is multi-objective.
156
152
  self.algorithm = GeneticAlgorithm(
157
153
  self.problem,
158
- popsize=population_size,
154
+ popsize=config.population_size,
159
155
  operators=[
160
156
  SimulatedBinaryCrossOver(self.problem, tournament_size=3, eta=20.0, cross_over_rate=1.0),
161
157
  GaussianMutation(self.problem, stdev=0.1)
@@ -163,21 +159,17 @@ class DragonParetoOptimizer:
163
159
  re_evaluate=False # model is deterministic
164
160
  )
165
161
 
166
- def run(self,
167
- generations: int,
168
- save_dir: Union[str, Path],
169
- log_interval: int = 10) -> pd.DataFrame:
162
+ def run(self) -> pd.DataFrame:
170
163
  """
171
164
  Execute the optimization with progress tracking and periodic logging.
172
165
 
173
- Args:
174
- generations (int): Number of generations to evolve.
175
- save_dir (str|Path): Directory to save results and plots.
176
- log_interval (int): How often (in generations) to log population statistics.
177
-
178
166
  Returns:
179
167
  pd.DataFrame: A DataFrame containing the non-dominated solutions (Pareto Front).
180
168
  """
169
+ generations = self.config.generations
170
+ save_dir = self.config.save_directory
171
+ log_interval = self.config.log_interval
172
+
181
173
  save_path = make_fullpath(save_dir, make=True, enforce="directory")
182
174
  log_file = save_path / "optimization_log.txt"
183
175
 
@@ -189,26 +181,41 @@ class DragonParetoOptimizer:
189
181
  with open(log_file, "w") as f:
190
182
  f.write(f"Pareto Optimization Log - {generations} Generations\n")
191
183
  f.write("=" * 60 + "\n")
184
+
185
+ # History tracking for visualization
186
+ history_records = []
192
187
 
193
188
  # --- Optimization Loop with Progress Bar ---
194
189
  with tqdm(total=generations, desc="Evolving Pareto Front", unit="gen") as pbar:
195
190
  for gen in range(1, generations + 1):
196
191
  self.algorithm.step()
197
192
 
193
+ # Capture stats for history (every generation for smooth plots)
194
+ current_evals = self.algorithm.population.evals.clone() # type: ignore
195
+
196
+ gen_stats = {}
197
+ for i, target_name in enumerate(self.ordered_target_names):
198
+ vals = current_evals[:, i]
199
+ v_mean = float(vals.mean())
200
+ v_min = float(vals.min())
201
+ v_max = float(vals.max())
202
+
203
+ # Store for plotting
204
+ history_records.append({
205
+ "Generation": gen,
206
+ "Target": target_name,
207
+ "Mean": v_mean,
208
+ "Min": v_min,
209
+ "Max": v_max
210
+ })
211
+
212
+ gen_stats[target_name] = (v_mean, v_min, v_max)
213
+
198
214
  # Periodic Logging of Population Stats to FILE
199
215
  if gen % log_interval == 0 or gen == generations:
200
216
  stats_msg = [f"Gen {gen}:"]
201
-
202
- # Get current population values
203
- current_evals = self.algorithm.population.evals
204
-
205
- for i, target_name in enumerate(self.ordered_target_names):
206
- vals = current_evals[:, i]
207
- v_mean = float(vals.mean())
208
- v_min = float(vals.min())
209
- v_max = float(vals.max())
210
-
211
- stats_msg.append(f"{target_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
217
+ for t_name, (v_mean, v_min, v_max) in gen_stats.items():
218
+ stats_msg.append(f"{t_name}: {v_mean:.3f} (Range: {v_min:.3f}-{v_max:.3f})")
212
219
 
213
220
  log_line = " | ".join(stats_msg)
214
221
 
@@ -217,6 +224,12 @@ class DragonParetoOptimizer:
217
224
  f.write(log_line + "\n")
218
225
 
219
226
  pbar.update(1)
227
+
228
+ # --- Post-Optimization Visualization ---
229
+ if history_records:
230
+ _LOGGER.debug("Generating optimization history plots...")
231
+ history_df = pd.DataFrame(history_records)
232
+ self._plot_optimization_history(history_df, save_path)
220
233
 
221
234
  # --- Extract Pareto Front ---
222
235
  # Manually identify the Pareto front from the final population using domination counts
@@ -289,10 +302,6 @@ class DragonParetoOptimizer:
289
302
  return pareto_df
290
303
 
291
304
  def save_solutions(self,
292
- filename: str = "Pareto_Solutions",
293
- save_dir: Optional[Union[str, Path]] = None,
294
- columns_to_round: Optional[List[str]] = None,
295
- float_precision: int = 4,
296
305
  save_to_sql: bool = False,
297
306
  sql_table_name: Optional[str] = None,
298
307
  sql_if_exists: Literal['fail', 'replace', 'append'] = 'replace') -> None:
@@ -301,12 +310,8 @@ class DragonParetoOptimizer:
301
310
  for specific continuous columns. Optionally saves to a SQL database.
302
311
 
303
312
  Args:
304
- save_dir (str | Path | None): Directory to save the CSV. If None, uses the optimization directory.
305
- filename (str): Name of the file (without .csv extension).
306
- columns_to_round (List[str], optional): List of continuous column names that should be rounded to the nearest integer.
307
- float_precision (int): Number of decimal places to round standard float columns.
308
313
  save_to_sql (bool): If True, also writes the results to a SQLite database in the save_dir.
309
- sql_table_name (str, optional): Specific table name for SQL. If None, uses 'filename'.
314
+ sql_table_name (str, optional): Specific table name for SQL. If None, uses the solutions filename.
310
315
  sql_if_exists (str): Behavior if SQL table exists ('fail', 'replace', 'append').
311
316
  """
312
317
  if self.pareto_front is None:
@@ -314,11 +319,15 @@ class DragonParetoOptimizer:
314
319
  raise ValueError()
315
320
 
316
321
  # handle directory
317
- if save_dir is None:
318
- if self._metrics_dir is None:
319
- _LOGGER.error("No save directory specified and no optimization directory found.")
320
- raise ValueError()
321
- save_dir = self._metrics_dir
322
+ save_path = self._metrics_dir
323
+ if save_path is None:
324
+ _LOGGER.error("No save directory found. Cannot save solutions.")
325
+ raise ValueError()
326
+
327
+ # unpack values from config
328
+ filename = self.config.solutions_filename
329
+ columns_to_round = self.config.columns_to_round
330
+ float_precision = self.config.float_precision
322
331
 
323
332
  # Create a copy to avoid modifying the internal state
324
333
  df_to_save = self.pareto_front.copy()
@@ -354,8 +363,6 @@ class DragonParetoOptimizer:
354
363
  df_to_save[float_cols] = df_to_save[float_cols].round(float_precision)
355
364
 
356
365
  # Save CSV
357
- save_path = make_fullpath(save_dir, make=True, enforce="directory")
358
-
359
366
  # sanitize filename and add extension if missing
360
367
  sanitized_filename = sanitize_filename(filename)
361
368
  csv_filename = sanitized_filename if sanitized_filename.lower().endswith(".csv") else f"{sanitized_filename}.csv"
@@ -577,7 +584,7 @@ class DragonParetoOptimizer:
577
584
  """Standard 2D scatter plot."""
578
585
  x_name, y_name = self.ordered_target_names[0], self.ordered_target_names[1]
579
586
 
580
- plt.figure(figsize=(10, 8))
587
+ plt.figure(figsize=self.config.plot_size, dpi=ParetoOptimizationKeys.DPI)
581
588
 
582
589
  # Use a color gradient based on the Y-axis to make "better" values visually distinct
583
590
  sns.scatterplot(
@@ -592,7 +599,7 @@ class DragonParetoOptimizer:
592
599
  legend=False
593
600
  )
594
601
 
595
- plt.title(f"Pareto Front: {x_name} vs {y_name}", fontsize=14)
602
+ plt.title(f"Pareto Front: {x_name} vs {y_name}", fontsize=self.config.plot_font_size + 2, pad=ParetoOptimizationKeys.FONT_PAD)
596
603
  plt.grid(True, linestyle='--', alpha=0.6)
597
604
 
598
605
  # Add simple annotation for the 'corners' (extremes)
@@ -616,8 +623,7 @@ class DragonParetoOptimizer:
616
623
  x_target: Union[int, str],
617
624
  y_target: Union[int, str],
618
625
  z_target: Union[int, str],
619
- hue_target: Optional[Union[int, str]] = None,
620
- save_dir: Optional[Union[str, Path]] = None,):
626
+ hue_target: Optional[Union[int, str]] = None):
621
627
  """
622
628
  Public API to generate 3D visualizations for specific targets.
623
629
 
@@ -626,15 +632,11 @@ class DragonParetoOptimizer:
626
632
  y_target (int|str): Index or name of the target for the Y axis.
627
633
  z_target (int|str): Index or name of the target for the Z axis.
628
634
  hue_target (int|str, optional): Index or name of the target for coloring. Defaults to z_target if None.
629
- save_dir (str|Path, optional): Directory to save plots. Defaults to the directory used during optimization.
630
635
  """
631
- if save_dir is None:
632
- if self._metrics_dir is None:
633
- _LOGGER.error("No save directory specified and no previous optimization directory found.")
634
- raise ValueError()
635
- save_dir = self._metrics_dir
636
-
637
- save_path_root = make_fullpath(save_dir, make=True, enforce="directory")
636
+ if self._metrics_dir is None:
637
+ _LOGGER.error("No save directory specified and no previous optimization directory found.")
638
+ raise ValueError()
639
+ save_path_root = self._metrics_dir
638
640
 
639
641
  save_path = make_fullpath(save_path_root / ParetoOptimizationKeys.PARETO_PLOTS_DIR, make=True, enforce="directory")
640
642
 
@@ -716,6 +718,58 @@ class DragonParetoOptimizer:
716
718
  html_path = sub_dir_path / f"Pareto_3D_Interactive.html"
717
719
  fig_html.write_html(str(html_path))
718
720
 
721
+ def _plot_optimization_history(self, history_df: pd.DataFrame, save_dir: Path):
722
+ """
723
+ Generates convergence plots (Mean/Min/Max) for each objective over generations.
724
+
725
+ Args:
726
+ history_df: DataFrame with cols [Generation, Target, Mean, Min, Max]
727
+ save_dir: Base directory to save plots
728
+ """
729
+ # Create subdirectory for history plots
730
+ plot_dir = make_fullpath(save_dir / ParetoOptimizationKeys.HISTORY_PLOTS_DIR, make=True, enforce="directory")
731
+
732
+ unique_targets = history_df["Target"].unique()
733
+
734
+ for target in unique_targets:
735
+ subset = history_df[history_df["Target"] == target]
736
+
737
+ # Determine direction (just for annotation/context if needed, but plotting stats is neutral)
738
+ direction = self.target_objectives.get(target, "unknown")
739
+
740
+ plt.figure(figsize=self.config.plot_size, dpi=ParetoOptimizationKeys.DPI)
741
+
742
+ # Plot Mean
743
+ plt.plot(subset["Generation"], subset["Mean"], label="Population Mean", color="#4c72b0", linewidth=2)
744
+
745
+ # Plot Min/Max Range
746
+ plt.fill_between(
747
+ subset["Generation"],
748
+ subset["Min"],
749
+ subset["Max"],
750
+ color="#4c72b0",
751
+ alpha=0.15,
752
+ label="Min-Max Range"
753
+ )
754
+
755
+ # Plot extremes as dashed lines
756
+ plt.plot(subset["Generation"], subset["Min"], linestyle="--", color="#55a868", alpha=0.6, linewidth=1, label="Min")
757
+ plt.plot(subset["Generation"], subset["Max"], linestyle="--", color="#c44e52", alpha=0.6, linewidth=1, label="Max")
758
+
759
+ plt.title(f"Convergence History: {target} ({direction.upper()})", fontsize=self.config.plot_font_size + 2, pad=ParetoOptimizationKeys.FONT_PAD)
760
+ plt.xlabel("Generation", labelpad=ParetoOptimizationKeys.FONT_PAD, fontsize=self.config.plot_font_size)
761
+ plt.ylabel("Target Value", labelpad=ParetoOptimizationKeys.FONT_PAD, fontsize=self.config.plot_font_size)
762
+ plt.legend(loc='best', fontsize=self.config.plot_font_size)
763
+ plt.grid(True, linestyle="--", alpha=0.5)
764
+ plt.xticks(fontsize=self.config.plot_font_size - 4)
765
+ plt.yticks(fontsize=self.config.plot_font_size - 4)
766
+
767
+ plt.tight_layout()
768
+
769
+ fname = f"Convergence_{sanitize_filename(target)}.svg"
770
+ plt.savefig(plot_dir / fname, bbox_inches='tight')
771
+ plt.close()
772
+
719
773
  class _ParetoFitnessEvaluator:
720
774
  """
721
775
  Evaluates fitness for Multi-Objective optimization.
@@ -186,7 +186,7 @@ def sequence_to_sequence_metrics(
186
186
  _LOGGER.info(f"📝 Seq-to-Seq per-step report saved as '{report_path.name}'")
187
187
 
188
188
  # --- Create and save plot ---
189
- fig, ax1 = plt.subplots(figsize=format_config.plot_figsize, dpi=DPI_value)
189
+ fig, ax1 = plt.subplots(figsize=SEQUENCE_PLOT_SIZE, dpi=DPI_value)
190
190
 
191
191
  # Plot RMSE
192
192
  color_rmse = format_config.rmse_color
@@ -165,7 +165,7 @@ def segmentation_metrics(
165
165
  cm = confusion_matrix(y_true_flat, y_pred_flat, labels=labels)
166
166
 
167
167
  # Plot
168
- fig_cm, ax_cm = plt.subplots(figsize=(max(8, len(labels) * 0.8), max(8, len(labels) * 0.8)), dpi=100)
168
+ fig_cm, ax_cm = plt.subplots(figsize=(max(8, len(labels) * 0.8), max(8, len(labels) * 0.8)), dpi=DPI_value)
169
169
  disp = ConfusionMatrixDisplay(
170
170
  confusion_matrix=cm,
171
171
  display_labels=display_names
ml_tools/_core/_keys.py CHANGED
@@ -196,6 +196,11 @@ class ParetoOptimizationKeys:
196
196
  """Used by the ML optimization pareto module."""
197
197
  PARETO_PLOTS_DIR = "Pareto_Plots"
198
198
  SQL_DATABASE_FILENAME = "OptimizationResults.db"
199
+ HISTORY_PLOTS_DIR = "History"
200
+
201
+ # Plot Config values
202
+ FONT_PAD = 10
203
+ DPI = 400
199
204
 
200
205
 
201
206
  class OptimizationToolsKeys:
@@ -223,10 +228,19 @@ class SchemaKeys:
223
228
 
224
229
  class _EvaluationConfig:
225
230
  """Set config values for evaluation modules."""
226
- DPI = 250
227
- REGRESSION_PLOT_SIZE = (9, 6)
228
- SEQUENCE_PLOT_SIZE = (9, 6)
229
-
231
+ DPI = 400
232
+ LABEL_PADDING = 10
233
+ # large sizes for SVG layout to accommodate large fonts
234
+ REGRESSION_PLOT_SIZE = (12, 8)
235
+ SEQUENCE_PLOT_SIZE = (12, 8)
236
+ CLASSIFICATION_PLOT_SIZE = (10, 10)
237
+ # Loss plot
238
+ LOSS_PLOT_SIZE = (18, 9)
239
+ LOSS_PLOT_LABEL_SIZE = 24
240
+ LOSS_PLOT_TICK_SIZE = 22
241
+ LOSS_PLOT_LEGEND_SIZE = 24
242
+ # CM settings
243
+ CM_SIZE = (9, 8) # used for multi label binary classification confusion matrix
230
244
 
231
245
  class _OneHotOtherPlaceholder:
232
246
  """Used internally by GUI_tools."""