pso-segmentation 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ """pso-segmentation package.
2
+
3
+ A robust, professional-grade Python package for segmentation optimization
4
+ using Particle Swarm Optimization (PSO). Designed for any continuous
5
+ variable; credit scoring is a common example, not a requirement.
6
+
7
+ Version: 0.1.0
8
+ """
9
+
10
+ from pso_segmentation.api import segment_scores
11
+ from pso_segmentation.io import (
12
+ export_metrics_to_json,
13
+ export_segmentation_to_csv,
14
+ import_segmentation_from_csv,
15
+ load_optimizer_state,
16
+ save_optimizer_state,
17
+ )
18
+ from pso_segmentation.objective import (
19
+ ObjectiveContext,
20
+ empty_segment_penalty,
21
+ make_objective,
22
+ monotonic_penalty,
23
+ segment_size_penalty,
24
+ )
25
+ from pso_segmentation.objective_functions_examples import (
26
+ example_fitness_custom_business_metric,
27
+ example_fitness_gini_focused,
28
+ example_fitness_r2_only,
29
+ example_fitness_r2_with_all_constraints,
30
+ example_fitness_r2_with_balance_penalty,
31
+ example_fitness_r2_with_monotonic_penalty,
32
+ )
33
+ from pso_segmentation.optimizer import OptimizerConfig, SegmentationOptimizer
34
+ from pso_segmentation.segmentation import SegmentationResult
35
+ from pso_segmentation.segmentation.computation import compute_metrics
36
+ from pso_segmentation.segmentation.validation import validate_cuts
37
+ from pso_segmentation.selection import (
38
+ SegmentCandidate,
39
+ SegmentSelectionResult,
40
+ select_n_segments,
41
+ )
42
+
43
+ __version__ = "0.1.0"
44
+ __author__ = "Léo Colin"
45
+ __email__ = "leocolin7002@gmail.com"
46
+
47
+ __all__ = [
48
+ "segment_scores",
49
+ "SegmentationOptimizer",
50
+ "OptimizerConfig",
51
+ "SegmentationResult",
52
+ "SegmentCandidate",
53
+ "SegmentSelectionResult",
54
+ "select_n_segments",
55
+ "ObjectiveContext",
56
+ "make_objective",
57
+ "monotonic_penalty",
58
+ "segment_size_penalty",
59
+ "empty_segment_penalty",
60
+ "example_fitness_r2_only",
61
+ "example_fitness_r2_with_monotonic_penalty",
62
+ "example_fitness_r2_with_balance_penalty",
63
+ "example_fitness_r2_with_all_constraints",
64
+ "example_fitness_gini_focused",
65
+ "example_fitness_custom_business_metric",
66
+ "compute_metrics",
67
+ "validate_cuts",
68
+ "export_segmentation_to_csv",
69
+ "import_segmentation_from_csv",
70
+ "save_optimizer_state",
71
+ "load_optimizer_state",
72
+ "export_metrics_to_json",
73
+ ]
@@ -0,0 +1,112 @@
1
+ """Simple functional API for PSO-based segmentation.
2
+
3
+ This module provides a lightweight functional interface for quick segmentation tasks.
4
+ For more advanced use cases, see SegmentationOptimizer in the optimizer module.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Callable
10
+ from typing import Any
11
+
12
+ import numpy as np
13
+
14
+ from pso_segmentation.optimizer import OptimizerConfig, SegmentationOptimizer
15
+ from pso_segmentation.segmentation.metrics import SegmentationResult
16
+
17
+ # Type alias for NDArray with float64 dtype
18
+ NDArray = np.ndarray[Any, np.dtype[np.float64]]
19
+
20
+
21
+ def segment_scores(
22
+ scores: NDArray,
23
+ labels: NDArray,
24
+ objective_func: Callable[[NDArray], float],
25
+ config: OptimizerConfig | None = None,
26
+ ) -> SegmentationResult:
27
+ """Quick segmentation using PSO optimization.
28
+
29
+ Lightweight functional wrapper around SegmentationOptimizer for simple
30
+ segmentation tasks. For advanced configuration and result inspection,
31
+ use SegmentationOptimizer directly.
32
+
33
+ Parameters
34
+ ----------
35
+ scores : NDArray
36
+ Array of continuous values to segment (shape: (n_samples,))
37
+ Example: risk scores, probabilities, or any continuous signal
38
+ labels : NDArray
39
+ Target variable aligned with scores (shape: (n_samples,))
40
+ Used to compute metrics (R², segment means, etc.)
41
+ objective_func : Callable[[NDArray], float]
42
+ Fitness function to maximize during optimization
43
+ Input: Cut values (1D array)
44
+ Output: Scalar fitness score (higher is better)
45
+ Use ``make_objective`` or any callable with signature
46
+ ``objective(cuts) -> float``.
47
+ config : OptimizerConfig | None, optional
48
+ PSO configuration. If None, uses sensible defaults:
49
+ - pop_size=30
50
+ - max_iter=100
51
+ - w, c1, c2: standard PSO parameters
52
+ Default: None
53
+
54
+ Returns
55
+ -------
56
+ SegmentationResult
57
+ Segmentation metrics and segment assignments
58
+ Attributes:
59
+ - r2: Variance explained by segmentation
60
+ - n_segments: Number of segments created
61
+ - segment_proportions: Share of population per segment
62
+ - target_mean_by_segment: Segment mean of the target
63
+ - segment_sizes: Count of observations per segment
64
+ - h_inter, h_intra: Between/within-group heterogeneity
65
+
66
+ Raises
67
+ ------
68
+ ValueError
69
+ If scores and labels have mismatched lengths
70
+ RuntimeError
71
+ If PSO optimization fails to converge
72
+
73
+ Examples
74
+ --------
75
+ >>> from pso_segmentation import make_objective, segment_scores
76
+ >>> import numpy as np
77
+ >>> scores = np.random.rand(1000)
78
+ >>> labels = np.random.binomial(1, 0.3, 1000)
79
+ >>> objective = make_objective(scores, labels, metric="r2")
80
+ >>> result = segment_scores(scores, labels, objective)
81
+ >>> print(f"R²: {result.r2:.3f}, Segments: {result.n_segments}")
82
+
83
+ >>> # With custom config
84
+ >>> from pso_segmentation import OptimizerConfig
85
+ >>> config = OptimizerConfig(pop_size=100, max_iter=200)
86
+ >>> result = segment_scores(scores, labels, objective, config)
87
+
88
+ Notes
89
+ -----
90
+ - Use make_objective for standard objective construction
91
+ - Custom constraints can be expressed as objective penalties
92
+ - PSO is stochastic; results vary slightly across runs
93
+ - Larger pop_size and max_iter → better results but slower convergence
94
+ """
95
+ # Validate inputs
96
+ scores = np.asarray(scores, dtype=np.float64)
97
+ labels = np.asarray(labels, dtype=np.float64)
98
+
99
+ if scores.shape[0] != labels.shape[0]:
100
+ msg = f"Mismatched lengths: scores ({scores.shape[0]}) vs labels ({labels.shape[0]})"
101
+ raise ValueError(msg)
102
+
103
+ # Use default config if not provided
104
+ if config is None:
105
+ config = OptimizerConfig()
106
+
107
+ # Run optimization
108
+ optimizer = SegmentationOptimizer(config)
109
+ optimizer.fit(scores, labels, objective_func)
110
+
111
+ # Return metrics directly
112
+ return optimizer.get_metrics()
@@ -0,0 +1,8 @@
1
+ """Core PSO module.
2
+
3
+ Exports the main PSO class and result dataclass.
4
+ """
5
+
6
+ from .pso import PSO, PSO_Result
7
+
8
+ __all__ = ["PSO", "PSO_Result"]
@@ -0,0 +1,290 @@
1
+ """Particle Swarm Optimization (PSO) implementation.
2
+
3
+ This module provides a minimal, robust PSO algorithm for continuous optimization.
4
+ The algorithm maximizes the objective function by iteratively updating particle
5
+ positions and velocities based on cognitive and social components.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from dataclasses import dataclass, field
12
+ from typing import Any, cast
13
+
14
+ import numpy as np
15
+
16
+ # Type alias for cleaner code
17
+ NDArray = np.ndarray[Any, np.dtype[np.float64]]
18
+
19
+
20
+ @dataclass
21
+ class PSO_Result:
22
+ """Result of PSO optimization run.
23
+
24
+ Attributes
25
+ ----------
26
+ best_position : NDArray
27
+ The best position found (shape: (n_dim,))
28
+ best_fitness : float
29
+ The fitness value at best position
30
+ n_iterations : int
31
+ Number of iterations performed
32
+ converged : bool
33
+ Whether convergence was achieved
34
+ history : list[dict[str, Any]]
35
+ Optimization history with iteration, best_fitness, best_position
36
+ """
37
+
38
+ best_position: NDArray
39
+ best_fitness: float
40
+ n_iterations: int
41
+ converged: bool
42
+ history: list[dict[str, Any]] = field(default_factory=list)
43
+
44
+
45
+ class PSO:
46
+ """Particle Swarm Optimization algorithm for continuous optimization.
47
+
48
+ This implementation uses the standard PSO with inertia weight, cognitive
49
+ and social coefficients. It maximizes the objective function.
50
+
51
+ Parameters
52
+ ----------
53
+ objective_func : Callable[[NDArray], float]
54
+ Function to maximize. Takes array of shape (n_dim,) and returns float.
55
+ n_dim : int
56
+ Dimensionality of the optimization problem.
57
+ pop_size : int, default=30
58
+ Number of particles in the swarm.
59
+ max_iter : int, default=100
60
+ Maximum number of iterations.
61
+ lb : NDArray, optional
62
+ Lower bounds for each dimension. If None, uses -inf.
63
+ ub : NDArray, optional
64
+ Upper bounds for each dimension. If None, uses +inf.
65
+ w : float, default=0.7
66
+ Inertia weight. Controls influence of previous velocity.
67
+ c1 : float, default=1.5
68
+ Cognitive coefficient (attraction to personal best).
69
+ c2 : float, default=1.5
70
+ Social coefficient (attraction to global best).
71
+ seed : int, optional
72
+ Random seed for reproducibility.
73
+ track_history : bool, default=True
74
+ Whether to track optimization history.
75
+
76
+ Attributes
77
+ ----------
78
+ particles : NDArray
79
+ Current particle positions (pop_size, n_dim)
80
+ velocities : NDArray
81
+ Current particle velocities (pop_size, n_dim)
82
+ personal_best : NDArray
83
+ Best position for each particle (pop_size, n_dim)
84
+ personal_best_fitness : NDArray
85
+ Best fitness for each particle (pop_size,)
86
+ best_position : NDArray
87
+ Global best position found
88
+ best_fitness : float
89
+ Global best fitness found
90
+ history : list[dict[str, Any]]
91
+ Optimization history if track_history=True
92
+
93
+ Examples
94
+ --------
95
+ >>> def sphere(x: NDArray) -> float:
96
+ ... return -np.sum(x ** 2) # Maximize negative sphere
97
+ >>> pso = PSO(sphere, n_dim=3, max_iter=50)
98
+ >>> result = pso.run()
99
+ >>> print(f"Best fitness: {result.best_fitness}")
100
+ >>> print(f"Best position: {result.best_position}")
101
+ """
102
+
103
+ particles: NDArray
104
+ velocities: NDArray
105
+ personal_best: NDArray
106
+ personal_best_fitness: NDArray
107
+ best_position: NDArray
108
+ best_fitness: float
109
+ lb: NDArray
110
+ ub: NDArray
111
+ history: list[dict[str, Any]]
112
+
113
+ def __init__(
114
+ self,
115
+ objective_func: Callable[[NDArray], float],
116
+ n_dim: int,
117
+ pop_size: int = 30,
118
+ max_iter: int = 100,
119
+ lb: NDArray | None = None,
120
+ ub: NDArray | None = None,
121
+ w: float = 0.7,
122
+ c1: float = 1.5,
123
+ c2: float = 1.5,
124
+ seed: int | None = None,
125
+ track_history: bool = True,
126
+ ) -> None:
127
+ """Initialize PSO optimizer."""
128
+ self.objective_func = objective_func
129
+ self.n_dim = n_dim
130
+ self.pop_size = max(1, pop_size)
131
+ self.max_iter = max(0, max_iter)
132
+ self.w = w
133
+ self.c1 = c1
134
+ self.c2 = c2
135
+ self.track_history = track_history
136
+
137
+ # Set random seed
138
+ if seed is not None:
139
+ np.random.seed(seed)
140
+
141
+ # Set bounds
142
+ if lb is None:
143
+ self.lb = np.full(n_dim, -np.inf, dtype=np.float64)
144
+ else:
145
+ self.lb = np.asarray(lb, dtype=np.float64)
146
+
147
+ if ub is None:
148
+ self.ub = np.full(n_dim, np.inf, dtype=np.float64)
149
+ else:
150
+ self.ub = np.asarray(ub, dtype=np.float64)
151
+
152
+ # Validate bounds
153
+ if np.any(self.lb >= self.ub):
154
+ msg = "Lower bounds must be strictly less than upper bounds"
155
+ raise ValueError(msg)
156
+
157
+ # Initialize particles and velocities
158
+ self.particles = self._initialize_particles()
159
+ self.velocities = self._initialize_velocities()
160
+
161
+ # Track personal best
162
+ self.personal_best = self.particles.copy()
163
+ self.personal_best_fitness = np.full(self.pop_size, -np.inf, dtype=np.float64)
164
+
165
+ # Track global best
166
+ self.best_position = np.zeros(n_dim, dtype=np.float64)
167
+ self.best_fitness = -np.inf
168
+
169
+ # History
170
+ self.history = []
171
+
172
+ def _initialize_particles(self) -> NDArray:
173
+ """Initialize particles uniformly within bounds."""
174
+ if np.any(np.isinf(self.lb)) or np.any(np.isinf(self.ub)):
175
+ # If bounds are infinite, use standard normal
176
+ particles: NDArray = np.random.randn(self.pop_size, self.n_dim).astype(np.float64)
177
+ else:
178
+ # Uniform initialization within bounds
179
+ particles = np.random.uniform(
180
+ self.lb, self.ub, size=(self.pop_size, self.n_dim)
181
+ ).astype(np.float64)
182
+ return particles
183
+
184
+ def _initialize_velocities(self) -> NDArray:
185
+ """Initialize velocities uniformly in [-1, 1] per dimension."""
186
+ velocities: NDArray = np.random.uniform(-1, 1, size=(self.pop_size, self.n_dim)).astype(
187
+ np.float64
188
+ )
189
+ return velocities
190
+
191
+ def _clip_to_bounds(self, particles: NDArray) -> NDArray:
192
+ """Clip particles to specified bounds."""
193
+ clipped: NDArray = np.clip(particles, self.lb, self.ub)
194
+ return clipped
195
+
196
+ def _evaluate(self, particles: NDArray) -> NDArray:
197
+ """Evaluate objective function for all particles.
198
+
199
+ Parameters
200
+ ----------
201
+ particles : NDArray
202
+ Particle positions (pop_size, n_dim)
203
+
204
+ Returns
205
+ -------
206
+ NDArray
207
+ Fitness values (pop_size,)
208
+ """
209
+ fitness: list[float] = [self.objective_func(cast(NDArray, p)) for p in particles]
210
+ return np.array(fitness, dtype=np.float64)
211
+
212
+ def run(self) -> PSO_Result:
213
+ """Run PSO optimization.
214
+
215
+ Returns
216
+ -------
217
+ PSO_Result
218
+ Optimization result with best position, fitness, and history
219
+ """
220
+ # Evaluate initial positions
221
+ fitness = self._evaluate(self.particles)
222
+ self.personal_best_fitness = fitness.copy()
223
+ self.personal_best = self.particles.copy()
224
+
225
+ # Find initial global best
226
+ best_idx = int(np.argmax(fitness))
227
+ self.best_position = self.particles[best_idx].copy()
228
+ self.best_fitness = float(fitness[best_idx])
229
+
230
+ # Iterate
231
+ for iteration in range(self.max_iter):
232
+ # Update velocities and positions
233
+ r1 = np.random.uniform(0, 1, size=(self.pop_size, self.n_dim))
234
+ r2 = np.random.uniform(0, 1, size=(self.pop_size, self.n_dim))
235
+
236
+ cognitive = self.c1 * r1 * (self.personal_best - self.particles)
237
+ social = self.c2 * r2 * (self.best_position - self.particles)
238
+
239
+ self.velocities = self.w * self.velocities + cognitive + social
240
+
241
+ self.particles = self.particles + self.velocities
242
+ self.particles = self._clip_to_bounds(self.particles)
243
+
244
+ # Evaluate new positions
245
+ fitness = self._evaluate(self.particles)
246
+
247
+ # Update personal best
248
+ improved = fitness > self.personal_best_fitness
249
+ self.personal_best[improved] = self.particles[improved]
250
+ self.personal_best_fitness[improved] = fitness[improved]
251
+
252
+ # Update global best
253
+ current_best_idx = int(np.argmax(fitness))
254
+ current_best_fitness = float(fitness[current_best_idx])
255
+
256
+ if current_best_fitness > self.best_fitness:
257
+ self.best_position = self.particles[current_best_idx].copy()
258
+ self.best_fitness = current_best_fitness
259
+
260
+ # Track history
261
+ if self.track_history:
262
+ self.history.append(
263
+ {
264
+ "iteration": iteration,
265
+ "best_fitness": self.best_fitness,
266
+ "best_position": self.best_position.copy().tolist(),
267
+ "mean_fitness": float(np.mean(fitness)),
268
+ "std_fitness": float(np.std(fitness)),
269
+ }
270
+ )
271
+
272
+ # Return result
273
+ converged = self.max_iter > 0
274
+ return PSO_Result(
275
+ best_position=self.best_position.copy(),
276
+ best_fitness=self.best_fitness,
277
+ n_iterations=self.max_iter,
278
+ converged=converged,
279
+ history=self.history.copy(),
280
+ )
281
+
282
+ def get_history(self) -> list[dict[str, Any]]:
283
+ """Get optimization history.
284
+
285
+ Returns
286
+ -------
287
+ list[dict[str, Any]]
288
+ History with iteration number, best fitness, and statistics
289
+ """
290
+ return self.history.copy()