goad-py 0.8.5__pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goad_py/__init__.py +51 -0
- goad_py/_goad_py.pypy39-pp73-aarch64-linux-gnu.so +0 -0
- goad_py/convergence.py +835 -0
- goad_py/convergence_display.py +532 -0
- goad_py/goad_py.pyi +453 -0
- goad_py/phips_convergence.py +614 -0
- goad_py/unified_convergence.py +1337 -0
- goad_py-0.8.5.dist-info/METADATA +99 -0
- goad_py-0.8.5.dist-info/RECORD +10 -0
- goad_py-0.8.5.dist-info/WHEEL +5 -0
goad_py/convergence.py
ADDED
|
@@ -0,0 +1,835 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import sys
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from . import _goad_py as goad
|
|
13
|
+
from .convergence_display import (
|
|
14
|
+
ArrayConvergenceVariable,
|
|
15
|
+
ConvergenceDisplay,
|
|
16
|
+
ConvergenceVariable,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Convergable:
|
|
22
|
+
"""Represents a variable to monitor for convergence."""
|
|
23
|
+
|
|
24
|
+
variable: str # 'asymmetry', 'scatt', 'ext', 'albedo', or Mueller element like 'S11', 'S12', etc.
|
|
25
|
+
tolerance_type: str = "relative" # 'relative' or 'absolute'
|
|
26
|
+
tolerance: float = 0.01
|
|
27
|
+
theta_indices: Optional[List[int]] = (
|
|
28
|
+
None # For Mueller elements: specific theta bin indices to check (None = all bins)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def __post_init__(self):
|
|
32
|
+
# Scalar integrated parameters
|
|
33
|
+
valid_scalars = {"asymmetry", "scatt", "ext", "albedo"}
|
|
34
|
+
# Mueller matrix elements (S11, S12, ..., S44)
|
|
35
|
+
valid_mueller = {f"S{i}{j}" for i in range(1, 5) for j in range(1, 5)}
|
|
36
|
+
valid_variables = valid_scalars | valid_mueller
|
|
37
|
+
|
|
38
|
+
if self.variable not in valid_variables:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"Invalid variable '{self.variable}'. Must be one of {valid_scalars} or Mueller element (S11-S44)"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
valid_types = {"relative", "absolute"}
|
|
44
|
+
if self.tolerance_type not in valid_types:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Invalid tolerance_type '{self.tolerance_type}'. Must be one of {valid_types}"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if self.tolerance <= 0:
|
|
50
|
+
raise ValueError(f"Tolerance must be positive, got {self.tolerance}")
|
|
51
|
+
|
|
52
|
+
# Validate theta_indices only for Mueller elements
|
|
53
|
+
if self.theta_indices is not None:
|
|
54
|
+
if not self.is_mueller():
|
|
55
|
+
raise ValueError("theta_indices can only be used with Mueller elements")
|
|
56
|
+
# Convert range to list if needed
|
|
57
|
+
if isinstance(self.theta_indices, range):
|
|
58
|
+
self.theta_indices = list(self.theta_indices)
|
|
59
|
+
if not isinstance(self.theta_indices, list):
|
|
60
|
+
raise ValueError("theta_indices must be a list or range of integers")
|
|
61
|
+
|
|
62
|
+
def is_mueller(self) -> bool:
|
|
63
|
+
"""Check if this convergable is a Mueller matrix element."""
|
|
64
|
+
return self.variable.startswith("S") and len(self.variable) == 3
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class ConvergenceResults:
|
|
69
|
+
"""Results from a convergence study."""
|
|
70
|
+
|
|
71
|
+
converged: bool
|
|
72
|
+
n_orientations: int
|
|
73
|
+
values: Dict[str, float] # Final mean values for each tracked variable
|
|
74
|
+
sem_values: Dict[str, float] # Final SEM values for each tracked variable
|
|
75
|
+
mueller_1d: Optional[np.ndarray] = None
|
|
76
|
+
mueller_2d: Optional[np.ndarray] = None
|
|
77
|
+
convergence_history: List[Tuple[int, str, float]] = (
|
|
78
|
+
None # (n_orientations, variable, sem)
|
|
79
|
+
)
|
|
80
|
+
warning: Optional[str] = None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class Convergence:
|
|
84
|
+
"""Runs multiple MultiProblems until convergence criteria are met."""
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
settings: goad.Settings,
|
|
89
|
+
convergables: List[Convergable],
|
|
90
|
+
batch_size: int = 24,
|
|
91
|
+
max_orientations: int = 100_000,
|
|
92
|
+
min_batches: int = 10,
|
|
93
|
+
mueller_1d: bool = True,
|
|
94
|
+
mueller_2d: bool = False,
|
|
95
|
+
log_file: Optional[str] = None,
|
|
96
|
+
):
|
|
97
|
+
"""
|
|
98
|
+
Initialize a convergence study.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
settings: GOAD settings for the simulation
|
|
102
|
+
convergables: List of variables to monitor for convergence
|
|
103
|
+
batch_size: Number of orientations per iteration
|
|
104
|
+
max_orientations: Maximum total orientations before stopping
|
|
105
|
+
min_batches: Minimum number of batches before allowing convergence
|
|
106
|
+
mueller_1d: Whether to collect 1D Mueller matrices
|
|
107
|
+
mueller_2d: Whether to collect 2D Mueller matrices
|
|
108
|
+
log_file: Optional path to log file for convergence progress
|
|
109
|
+
"""
|
|
110
|
+
self.settings = settings
|
|
111
|
+
# Enable quiet mode to suppress Rust progress bars
|
|
112
|
+
self.settings.quiet = True
|
|
113
|
+
self.convergables = convergables
|
|
114
|
+
self.batch_size = batch_size
|
|
115
|
+
self.max_orientations = max_orientations
|
|
116
|
+
self.min_batches = min_batches
|
|
117
|
+
self.mueller_1d = mueller_1d
|
|
118
|
+
self.mueller_2d = mueller_2d
|
|
119
|
+
|
|
120
|
+
# Validate inputs
|
|
121
|
+
if not convergables:
|
|
122
|
+
raise ValueError("Must specify at least one convergable")
|
|
123
|
+
|
|
124
|
+
if batch_size <= 0:
|
|
125
|
+
raise ValueError(f"batch_size must be positive, got {batch_size}")
|
|
126
|
+
|
|
127
|
+
if max_orientations <= 0:
|
|
128
|
+
raise ValueError(
|
|
129
|
+
f"max_orientations must be positive, got {max_orientations}"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if min_batches <= 0:
|
|
133
|
+
raise ValueError(f"min_batches must be positive, got {min_batches}")
|
|
134
|
+
|
|
135
|
+
# Initialize tracking variables
|
|
136
|
+
self.n_orientations = 0
|
|
137
|
+
self.convergence_history = []
|
|
138
|
+
|
|
139
|
+
# Batch-based statistics tracking for rigorous SEM calculation
|
|
140
|
+
self.batch_data = [] # List of batch statistics
|
|
141
|
+
|
|
142
|
+
# Mueller matrix accumulation
|
|
143
|
+
self.mueller_1d_sum = None
|
|
144
|
+
self.mueller_2d_sum = None
|
|
145
|
+
|
|
146
|
+
# Rich console
|
|
147
|
+
self._console = Console()
|
|
148
|
+
|
|
149
|
+
# Create display variables for the new display system
|
|
150
|
+
display_variables = []
|
|
151
|
+
for conv in self.convergables:
|
|
152
|
+
if conv.is_mueller():
|
|
153
|
+
display_variables.append(
|
|
154
|
+
ArrayConvergenceVariable(
|
|
155
|
+
name=conv.variable,
|
|
156
|
+
tolerance=conv.tolerance,
|
|
157
|
+
tolerance_type=conv.tolerance_type,
|
|
158
|
+
indices=conv.theta_indices,
|
|
159
|
+
)
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
display_variables.append(
|
|
163
|
+
ConvergenceVariable(
|
|
164
|
+
name=conv.variable,
|
|
165
|
+
tolerance=conv.tolerance,
|
|
166
|
+
tolerance_type=conv.tolerance_type,
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Initialize display system
|
|
171
|
+
self._display = ConvergenceDisplay(
|
|
172
|
+
variables=display_variables,
|
|
173
|
+
batch_size=self.batch_size,
|
|
174
|
+
min_batches=self.min_batches,
|
|
175
|
+
convergence_type=self._get_convergence_type(),
|
|
176
|
+
console=self._console,
|
|
177
|
+
log_file=log_file,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def _update_statistics(self, results: goad.Results, batch_size: int):
|
|
181
|
+
"""Update statistics with new batch results.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
results: Results from a MultiProblem run (pre-averaged over batch_size orientations)
|
|
185
|
+
batch_size: Number of orientations in this batch
|
|
186
|
+
"""
|
|
187
|
+
# Check for None values indicating Custom binning
|
|
188
|
+
if (
|
|
189
|
+
results.asymmetry is None
|
|
190
|
+
or results.scat_cross is None
|
|
191
|
+
or results.ext_cross is None
|
|
192
|
+
or results.albedo is None
|
|
193
|
+
):
|
|
194
|
+
raise ValueError(
|
|
195
|
+
"Received None values for integrated properties. "
|
|
196
|
+
"This likely means Custom binning scheme is being used. "
|
|
197
|
+
"Convergence requires Simple or Interval binning schemes."
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Store batch data for proper statistical analysis
|
|
201
|
+
batch_info = {"batch_size": batch_size, "values": {}, "weights": {}}
|
|
202
|
+
|
|
203
|
+
# Always store all 4 integrated parameters (for unified API output)
|
|
204
|
+
batch_info["values"]["asymmetry"] = results.asymmetry
|
|
205
|
+
batch_info["weights"]["asymmetry"] = results.scat_cross
|
|
206
|
+
batch_info["values"]["scatt"] = results.scat_cross
|
|
207
|
+
batch_info["weights"]["scatt"] = 1.0 # Equal weighting
|
|
208
|
+
batch_info["values"]["ext"] = results.ext_cross
|
|
209
|
+
batch_info["weights"]["ext"] = 1.0 # Equal weighting
|
|
210
|
+
batch_info["values"]["albedo"] = results.albedo
|
|
211
|
+
batch_info["weights"]["albedo"] = results.ext_cross + results.scat_cross
|
|
212
|
+
|
|
213
|
+
# Always store ALL 16 Mueller elements (for unified API output with full SEM)
|
|
214
|
+
if self.mueller_1d and results.mueller_1d is not None:
|
|
215
|
+
mueller_1d_array = np.array(results.mueller_1d) # Shape: (n_theta, 16)
|
|
216
|
+
|
|
217
|
+
# Store all 16 Mueller elements (S11, S12, ..., S44)
|
|
218
|
+
for row in range(1, 5):
|
|
219
|
+
for col in range(1, 5):
|
|
220
|
+
element_name = f"S{row}{col}"
|
|
221
|
+
mueller_idx = (row - 1) * 4 + (col - 1)
|
|
222
|
+
mueller_element = mueller_1d_array[
|
|
223
|
+
:, mueller_idx
|
|
224
|
+
] # Shape: (n_theta,)
|
|
225
|
+
|
|
226
|
+
batch_info["values"][element_name] = mueller_element
|
|
227
|
+
batch_info["weights"][element_name] = 1.0 # Equal weighting
|
|
228
|
+
|
|
229
|
+
# Store theta bins if not already stored (for display purposes)
|
|
230
|
+
if "mueller_theta_bins" not in batch_info and results.bins_1d is not None:
|
|
231
|
+
batch_info["mueller_theta_bins"] = np.array(results.bins_1d)
|
|
232
|
+
|
|
233
|
+
self.batch_data.append(batch_info)
|
|
234
|
+
|
|
235
|
+
# Update Mueller matrices if enabled
|
|
236
|
+
if self.mueller_1d and results.mueller_1d is not None:
|
|
237
|
+
mueller_1d_array = np.array(results.mueller_1d)
|
|
238
|
+
if self.mueller_1d_sum is None:
|
|
239
|
+
self.mueller_1d_sum = mueller_1d_array * batch_size
|
|
240
|
+
else:
|
|
241
|
+
self.mueller_1d_sum += mueller_1d_array * batch_size
|
|
242
|
+
|
|
243
|
+
if self.mueller_2d and results.mueller is not None:
|
|
244
|
+
mueller_2d_array = np.array(results.mueller)
|
|
245
|
+
if self.mueller_2d_sum is None:
|
|
246
|
+
self.mueller_2d_sum = mueller_2d_array * batch_size
|
|
247
|
+
else:
|
|
248
|
+
self.mueller_2d_sum += mueller_2d_array * batch_size
|
|
249
|
+
|
|
250
|
+
# Update total orientation count
|
|
251
|
+
self.n_orientations += batch_size
|
|
252
|
+
|
|
253
|
+
def _calculate_mean_and_sem_array(
|
|
254
|
+
self, variable: str
|
|
255
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
256
|
+
"""Calculate mean and SEM arrays for Mueller matrix elements across theta bins.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
variable: Mueller element name (e.g., 'S11')
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Tuple of (mean_array, sem_array) where each is shape (n_theta,)
|
|
263
|
+
"""
|
|
264
|
+
if not self.batch_data:
|
|
265
|
+
return np.array([]), np.array([])
|
|
266
|
+
|
|
267
|
+
# Extract batch values (each is an array of theta values)
|
|
268
|
+
batch_arrays = []
|
|
269
|
+
batch_sizes = []
|
|
270
|
+
|
|
271
|
+
for batch in self.batch_data:
|
|
272
|
+
if variable in batch["values"]:
|
|
273
|
+
batch_arrays.append(batch["values"][variable]) # Shape: (n_theta,)
|
|
274
|
+
batch_sizes.append(batch["batch_size"])
|
|
275
|
+
|
|
276
|
+
if not batch_arrays:
|
|
277
|
+
return np.array([]), np.array([])
|
|
278
|
+
|
|
279
|
+
# Stack batches: shape (n_batches, n_theta)
|
|
280
|
+
batch_arrays = np.array(batch_arrays)
|
|
281
|
+
batch_sizes = np.array(batch_sizes)
|
|
282
|
+
n_theta = batch_arrays.shape[1]
|
|
283
|
+
|
|
284
|
+
if len(batch_arrays) < 2:
|
|
285
|
+
# Can't estimate variance with < 2 batches
|
|
286
|
+
mean_array = batch_arrays[0]
|
|
287
|
+
sem_array = np.full(n_theta, float("inf"))
|
|
288
|
+
return mean_array, sem_array
|
|
289
|
+
|
|
290
|
+
# Calculate mean and SEM independently for each theta bin
|
|
291
|
+
# Mean: weighted by batch size
|
|
292
|
+
mean_array = np.average(
|
|
293
|
+
batch_arrays, axis=0, weights=batch_sizes
|
|
294
|
+
) # Shape: (n_theta,)
|
|
295
|
+
|
|
296
|
+
# Variance between batches at each theta
|
|
297
|
+
batch_means_variance = np.var(batch_arrays, axis=0, ddof=1) # Shape: (n_theta,)
|
|
298
|
+
|
|
299
|
+
# Scale up to estimate population variance
|
|
300
|
+
avg_batch_size = np.mean(batch_sizes)
|
|
301
|
+
estimated_population_variance = batch_means_variance * avg_batch_size
|
|
302
|
+
|
|
303
|
+
# Calculate SEM for total sample
|
|
304
|
+
total_n = np.sum(batch_sizes)
|
|
305
|
+
sem_array = np.sqrt(
|
|
306
|
+
estimated_population_variance / (total_n - 1)
|
|
307
|
+
) # Shape: (n_theta,)
|
|
308
|
+
|
|
309
|
+
return mean_array, sem_array
|
|
310
|
+
|
|
311
|
+
def _calculate_mean_and_sem(self, variable: str) -> Tuple[float, float]:
|
|
312
|
+
"""Calculate mean and standard error of the mean for a variable using batch data.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
variable: Variable name
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Tuple of (mean, sem)
|
|
319
|
+
"""
|
|
320
|
+
if not self.batch_data:
|
|
321
|
+
return 0.0, float("inf")
|
|
322
|
+
|
|
323
|
+
# Extract batch values and weights
|
|
324
|
+
batch_values = []
|
|
325
|
+
batch_weights = []
|
|
326
|
+
batch_sizes = []
|
|
327
|
+
|
|
328
|
+
for batch in self.batch_data:
|
|
329
|
+
if variable in batch["values"]:
|
|
330
|
+
batch_values.append(batch["values"][variable])
|
|
331
|
+
batch_weights.append(batch["weights"][variable])
|
|
332
|
+
batch_sizes.append(batch["batch_size"])
|
|
333
|
+
|
|
334
|
+
if not batch_values:
|
|
335
|
+
return 0.0, float("inf")
|
|
336
|
+
|
|
337
|
+
batch_values = np.array(batch_values)
|
|
338
|
+
batch_weights = np.array(batch_weights)
|
|
339
|
+
batch_sizes = np.array(batch_sizes)
|
|
340
|
+
|
|
341
|
+
# For weighted variables (asymmetry, albedo), use weighted statistics
|
|
342
|
+
if variable in ["asymmetry", "albedo"]:
|
|
343
|
+
# Calculate weighted mean across batches
|
|
344
|
+
# Each batch contributes: weight * batch_size * value
|
|
345
|
+
total_weighted_sum = np.sum(batch_weights * batch_sizes * batch_values)
|
|
346
|
+
total_weight = np.sum(batch_weights * batch_sizes)
|
|
347
|
+
weighted_mean = total_weighted_sum / total_weight
|
|
348
|
+
|
|
349
|
+
# Calculate weighted variance between batches
|
|
350
|
+
if len(batch_values) < 2:
|
|
351
|
+
return weighted_mean, float(
|
|
352
|
+
"inf"
|
|
353
|
+
) # Cannot estimate variance with < 2 batches
|
|
354
|
+
|
|
355
|
+
# For batch means, we need to account for the effective weight of each batch
|
|
356
|
+
effective_weights = batch_weights * batch_sizes
|
|
357
|
+
weighted_variance_batch_means = np.sum(
|
|
358
|
+
effective_weights * (batch_values - weighted_mean) ** 2
|
|
359
|
+
) / np.sum(effective_weights)
|
|
360
|
+
|
|
361
|
+
# Scale up to estimate population variance
|
|
362
|
+
# Batch means have variance = population_variance / average_batch_size
|
|
363
|
+
# So population_variance ≈ batch_means_variance * average_batch_size
|
|
364
|
+
avg_batch_size = np.average(batch_sizes, weights=effective_weights)
|
|
365
|
+
estimated_population_variance = (
|
|
366
|
+
weighted_variance_batch_means * avg_batch_size
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
# Calculate SEM for the total sample (using n-1 for sample standard error)
|
|
370
|
+
total_n = np.sum(batch_sizes)
|
|
371
|
+
sem = np.sqrt(estimated_population_variance / (total_n - 1))
|
|
372
|
+
|
|
373
|
+
return weighted_mean, sem
|
|
374
|
+
|
|
375
|
+
else:
|
|
376
|
+
# For unweighted variables (scatt, ext), use simple batch statistics
|
|
377
|
+
# Calculate mean of batch means, weighted by batch size
|
|
378
|
+
total_sum = np.sum(batch_sizes * batch_values)
|
|
379
|
+
total_n = np.sum(batch_sizes)
|
|
380
|
+
mean = total_sum / total_n
|
|
381
|
+
|
|
382
|
+
# Calculate variance between batch means
|
|
383
|
+
if len(batch_values) < 2:
|
|
384
|
+
return mean, float("inf")
|
|
385
|
+
|
|
386
|
+
batch_means_variance = np.var(batch_values, ddof=1)
|
|
387
|
+
|
|
388
|
+
# Scale up to estimate population variance
|
|
389
|
+
# Batch means have variance = population_variance / average_batch_size
|
|
390
|
+
# So population_variance ≈ batch_means_variance * average_batch_size
|
|
391
|
+
avg_batch_size = np.mean(batch_sizes)
|
|
392
|
+
estimated_population_variance = batch_means_variance * avg_batch_size
|
|
393
|
+
|
|
394
|
+
# Calculate SEM for the total sample (using n-1 for sample standard error)
|
|
395
|
+
sem = np.sqrt(estimated_population_variance / (total_n - 1))
|
|
396
|
+
|
|
397
|
+
return mean, sem
|
|
398
|
+
|
|
399
|
+
def _check_convergence(self) -> Dict[str, bool]:
|
|
400
|
+
"""Check if all convergence criteria are met.
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
Dict mapping variable names to convergence status
|
|
404
|
+
"""
|
|
405
|
+
converged = {}
|
|
406
|
+
|
|
407
|
+
for conv in self.convergables:
|
|
408
|
+
if conv.is_mueller():
|
|
409
|
+
# Mueller element - check theta bins (all or specific indices)
|
|
410
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
411
|
+
conv.variable
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
if len(mean_array) == 0:
|
|
415
|
+
converged[conv.variable] = False
|
|
416
|
+
continue
|
|
417
|
+
|
|
418
|
+
# Select theta bins to check
|
|
419
|
+
if conv.theta_indices is not None:
|
|
420
|
+
# Check only specified indices
|
|
421
|
+
indices = [i for i in conv.theta_indices if i < len(mean_array)]
|
|
422
|
+
if not indices:
|
|
423
|
+
converged[conv.variable] = False
|
|
424
|
+
continue
|
|
425
|
+
mean_subset = mean_array[indices]
|
|
426
|
+
sem_subset = sem_array[indices]
|
|
427
|
+
else:
|
|
428
|
+
# Check all bins
|
|
429
|
+
mean_subset = mean_array
|
|
430
|
+
sem_subset = sem_array
|
|
431
|
+
|
|
432
|
+
# Check convergence at selected theta bins
|
|
433
|
+
if conv.tolerance_type == "relative":
|
|
434
|
+
# Relative tolerance: SEM / |mean| < tolerance
|
|
435
|
+
relative_sem = np.where(
|
|
436
|
+
mean_subset != 0,
|
|
437
|
+
sem_subset / np.abs(mean_subset),
|
|
438
|
+
sem_subset / conv.tolerance,
|
|
439
|
+
)
|
|
440
|
+
converged[conv.variable] = np.all(relative_sem < conv.tolerance)
|
|
441
|
+
else:
|
|
442
|
+
# Absolute tolerance: SEM < tolerance
|
|
443
|
+
converged[conv.variable] = np.all(sem_subset < conv.tolerance)
|
|
444
|
+
else:
|
|
445
|
+
# Scalar variable
|
|
446
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
447
|
+
|
|
448
|
+
# Calculate tolerance based on type
|
|
449
|
+
if conv.tolerance_type == "relative":
|
|
450
|
+
# Relative tolerance: SEM / |mean| < tolerance
|
|
451
|
+
if mean != 0:
|
|
452
|
+
relative_sem = sem / abs(mean)
|
|
453
|
+
converged[conv.variable] = relative_sem < conv.tolerance
|
|
454
|
+
else:
|
|
455
|
+
# If mean is zero, use absolute comparison
|
|
456
|
+
converged[conv.variable] = sem < conv.tolerance
|
|
457
|
+
else:
|
|
458
|
+
# Absolute tolerance: SEM < tolerance
|
|
459
|
+
converged[conv.variable] = sem < conv.tolerance
|
|
460
|
+
|
|
461
|
+
return converged
|
|
462
|
+
|
|
463
|
+
def _all_converged(self) -> bool:
|
|
464
|
+
"""Check if all variables have converged.
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
True if all variables meet their convergence criteria and minimum batches completed
|
|
468
|
+
"""
|
|
469
|
+
# Check minimum batches requirement first
|
|
470
|
+
if len(self.batch_data) < self.min_batches:
|
|
471
|
+
return False
|
|
472
|
+
|
|
473
|
+
converged_status = self._check_convergence()
|
|
474
|
+
return all(converged_status.values())
|
|
475
|
+
|
|
476
|
+
def _get_convergence_type(self) -> str:
|
|
477
|
+
"""Get the convergence type name for display."""
|
|
478
|
+
class_name = self.__class__.__name__
|
|
479
|
+
if class_name == "EnsembleConvergence":
|
|
480
|
+
return "Ensemble"
|
|
481
|
+
elif class_name == "Convergence":
|
|
482
|
+
return "Standard"
|
|
483
|
+
else:
|
|
484
|
+
return class_name
|
|
485
|
+
|
|
486
|
+
def _get_next_geometry(self, iteration: int) -> Tuple[str, Optional[str]]:
|
|
487
|
+
"""Hook method to get geometry for next batch.
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
iteration: Current iteration number
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
Tuple of (geom_path, optional_display_info)
|
|
494
|
+
- geom_path: Path to geometry file to use
|
|
495
|
+
- optional_display_info: Optional string to display (e.g., "Geometry: hex.obj")
|
|
496
|
+
"""
|
|
497
|
+
# Default implementation: use fixed geometry from settings
|
|
498
|
+
return self.settings.geom_path, None
|
|
499
|
+
|
|
500
|
+
def _handle_geometry_error(self, error: Exception, geom_path: str) -> bool:
|
|
501
|
+
"""Hook method to handle geometry loading errors.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
error: The exception that occurred
|
|
505
|
+
geom_path: Path to the geometry that failed
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
True to skip this geometry and continue, False to raise the error
|
|
509
|
+
"""
|
|
510
|
+
# Default implementation: re-raise error (fail fast for single geometry)
|
|
511
|
+
return False
|
|
512
|
+
|
|
513
|
+
def _get_theta_bins(self, variable: str) -> Optional[np.ndarray]:
|
|
514
|
+
"""Get theta bins for Mueller elements from batch data."""
|
|
515
|
+
for batch in self.batch_data:
|
|
516
|
+
if "mueller_theta_bins" in batch:
|
|
517
|
+
return batch["mueller_theta_bins"]
|
|
518
|
+
|
|
519
|
+
# Fallback: infer from array length
|
|
520
|
+
mean_array, _ = self._calculate_mean_and_sem_array(variable)
|
|
521
|
+
if len(mean_array) > 0:
|
|
522
|
+
return np.linspace(0, 180, len(mean_array))
|
|
523
|
+
|
|
524
|
+
return None
|
|
525
|
+
|
|
526
|
+
def _update_convergence_history(self):
|
|
527
|
+
"""Update convergence history with current SEM values."""
|
|
528
|
+
for conv in self.convergables:
|
|
529
|
+
if conv.is_mueller():
|
|
530
|
+
# Mueller element - track worst SEM
|
|
531
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
532
|
+
conv.variable
|
|
533
|
+
)
|
|
534
|
+
if len(mean_array) > 0:
|
|
535
|
+
if conv.tolerance_type == "relative":
|
|
536
|
+
relative_sem_array = np.where(
|
|
537
|
+
mean_array != 0,
|
|
538
|
+
sem_array / np.abs(mean_array),
|
|
539
|
+
float("inf"),
|
|
540
|
+
)
|
|
541
|
+
worst_sem = np.max(relative_sem_array)
|
|
542
|
+
else:
|
|
543
|
+
worst_sem = np.max(sem_array)
|
|
544
|
+
|
|
545
|
+
self.convergence_history.append(
|
|
546
|
+
(self.n_orientations, conv.variable, worst_sem)
|
|
547
|
+
)
|
|
548
|
+
else:
|
|
549
|
+
# Scalar variable
|
|
550
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
551
|
+
if conv.tolerance_type == "relative" and mean != 0:
|
|
552
|
+
sem = sem / abs(mean)
|
|
553
|
+
|
|
554
|
+
self.convergence_history.append(
|
|
555
|
+
(self.n_orientations, conv.variable, sem)
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
def run(self) -> ConvergenceResults:
|
|
559
|
+
"""Run the convergence study.
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
ConvergenceResults containing final values and convergence status
|
|
563
|
+
"""
|
|
564
|
+
iteration = 0
|
|
565
|
+
converged = False
|
|
566
|
+
warning = None
|
|
567
|
+
|
|
568
|
+
# Create Live context for smooth updating display
|
|
569
|
+
with self._display.create_live_context() as live:
|
|
570
|
+
# Show initial display before first batch
|
|
571
|
+
initial_display = self._display.build_display(
|
|
572
|
+
iteration=0,
|
|
573
|
+
n_orientations=self.n_orientations,
|
|
574
|
+
get_stats=self._calculate_mean_and_sem,
|
|
575
|
+
get_array_stats=self._calculate_mean_and_sem_array,
|
|
576
|
+
get_bin_labels=self._get_theta_bins,
|
|
577
|
+
power_ratio=None,
|
|
578
|
+
geom_info=None,
|
|
579
|
+
)
|
|
580
|
+
live.update(initial_display)
|
|
581
|
+
|
|
582
|
+
while not converged and self.n_orientations < self.max_orientations:
|
|
583
|
+
iteration += 1
|
|
584
|
+
|
|
585
|
+
# Get geometry for this batch (hook method - can be overridden)
|
|
586
|
+
geom_path, geom_info = self._get_next_geometry(iteration)
|
|
587
|
+
|
|
588
|
+
# Determine batch size for this iteration
|
|
589
|
+
remaining = self.max_orientations - self.n_orientations
|
|
590
|
+
batch_size = min(self.batch_size, remaining)
|
|
591
|
+
|
|
592
|
+
# Set batch size
|
|
593
|
+
orientations = goad.create_uniform_orientation(batch_size)
|
|
594
|
+
|
|
595
|
+
# Set the geometry path and orientations for the settings
|
|
596
|
+
self.settings.geom_path = geom_path
|
|
597
|
+
self.settings.orientation = orientations
|
|
598
|
+
|
|
599
|
+
# Run MultiProblem with error handling for bad geometries
|
|
600
|
+
# Suppress Rust progress bars by redirecting stderr at fd level
|
|
601
|
+
try:
|
|
602
|
+
mp = goad.MultiProblem(self.settings)
|
|
603
|
+
# Redirect stderr file descriptor to suppress Rust progress bars
|
|
604
|
+
stderr_fd = sys.stderr.fileno()
|
|
605
|
+
with open(os.devnull, "w") as devnull:
|
|
606
|
+
old_stderr_fd = os.dup(stderr_fd)
|
|
607
|
+
try:
|
|
608
|
+
os.dup2(devnull.fileno(), stderr_fd)
|
|
609
|
+
mp.py_solve()
|
|
610
|
+
finally:
|
|
611
|
+
os.dup2(old_stderr_fd, stderr_fd)
|
|
612
|
+
os.close(old_stderr_fd)
|
|
613
|
+
except Exception as e:
|
|
614
|
+
# Geometry loading failed (bad faces, degenerate geometry, etc.)
|
|
615
|
+
# Check if subclass wants to handle this error (e.g., skip for ensemble)
|
|
616
|
+
if self._handle_geometry_error(e, geom_path):
|
|
617
|
+
# Skip this geometry and continue
|
|
618
|
+
continue
|
|
619
|
+
else:
|
|
620
|
+
# For single-geometry convergence, we can't skip - must raise error
|
|
621
|
+
error_msg = (
|
|
622
|
+
f"Failed to initialize MultiProblem with geometry '{geom_path}': {e}\n"
|
|
623
|
+
f"Please check geometry file for:\n"
|
|
624
|
+
f" - Degenerate faces (area = 0)\n"
|
|
625
|
+
f" - Non-planar geometry\n"
|
|
626
|
+
f" - Faces that are too small\n"
|
|
627
|
+
f" - Invalid mesh topology\n"
|
|
628
|
+
f" - Geometry file corruption"
|
|
629
|
+
)
|
|
630
|
+
raise type(e)(error_msg) from e
|
|
631
|
+
|
|
632
|
+
# Update statistics
|
|
633
|
+
self._update_statistics(mp.results, batch_size)
|
|
634
|
+
|
|
635
|
+
# Extract power ratio from results
|
|
636
|
+
try:
|
|
637
|
+
powers = mp.results.powers # It's a property, not a method
|
|
638
|
+
power_in = powers.get("input", 1.0)
|
|
639
|
+
power_out = powers.get("output", 0.0)
|
|
640
|
+
power_ratio = power_out / power_in if power_in > 0 else 0.0
|
|
641
|
+
except Exception:
|
|
642
|
+
power_ratio = None
|
|
643
|
+
|
|
644
|
+
# Update convergence history
|
|
645
|
+
self._update_convergence_history()
|
|
646
|
+
|
|
647
|
+
# Update live display with optional geometry info
|
|
648
|
+
display = self._display.build_display(
|
|
649
|
+
iteration=iteration,
|
|
650
|
+
n_orientations=self.n_orientations,
|
|
651
|
+
get_stats=self._calculate_mean_and_sem,
|
|
652
|
+
get_array_stats=self._calculate_mean_and_sem_array,
|
|
653
|
+
get_bin_labels=self._get_theta_bins,
|
|
654
|
+
power_ratio=power_ratio,
|
|
655
|
+
geom_info=geom_info,
|
|
656
|
+
)
|
|
657
|
+
live.update(display)
|
|
658
|
+
|
|
659
|
+
# Check convergence
|
|
660
|
+
converged = self._all_converged()
|
|
661
|
+
|
|
662
|
+
# Prepare final results
|
|
663
|
+
if converged:
|
|
664
|
+
print(f"\nConverged after {self.n_orientations} orientations.")
|
|
665
|
+
else:
|
|
666
|
+
warning = f"Maximum orientations ({self.max_orientations}) reached without convergence"
|
|
667
|
+
print(f"\nWarning: {warning}")
|
|
668
|
+
|
|
669
|
+
# Calculate final values and SEMs
|
|
670
|
+
final_values = {}
|
|
671
|
+
final_sems = {}
|
|
672
|
+
for conv in self.convergables:
|
|
673
|
+
if conv.is_mueller():
|
|
674
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
675
|
+
conv.variable
|
|
676
|
+
)
|
|
677
|
+
final_values[conv.variable] = mean_array
|
|
678
|
+
final_sems[conv.variable] = sem_array
|
|
679
|
+
else:
|
|
680
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
681
|
+
final_values[conv.variable] = mean
|
|
682
|
+
final_sems[conv.variable] = sem
|
|
683
|
+
|
|
684
|
+
# Prepare Mueller matrices with SEM
|
|
685
|
+
mueller_1d = None
|
|
686
|
+
mueller_1d_sem = None
|
|
687
|
+
mueller_2d = None
|
|
688
|
+
|
|
689
|
+
if self.mueller_1d and self.mueller_1d_sum is not None:
|
|
690
|
+
mueller_1d = self.mueller_1d_sum / self.n_orientations
|
|
691
|
+
|
|
692
|
+
# Compute SEM for all 16 Mueller elements
|
|
693
|
+
# mueller_1d shape: (n_theta, 16)
|
|
694
|
+
n_theta = mueller_1d.shape[0]
|
|
695
|
+
mueller_1d_sem = np.zeros_like(mueller_1d)
|
|
696
|
+
|
|
697
|
+
for row in range(1, 5):
|
|
698
|
+
for col in range(1, 5):
|
|
699
|
+
element_name = f"S{row}{col}"
|
|
700
|
+
mueller_idx = (row - 1) * 4 + (col - 1)
|
|
701
|
+
|
|
702
|
+
# Calculate mean and SEM for this element across all theta bins
|
|
703
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
704
|
+
element_name
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
if len(sem_array) > 0:
|
|
708
|
+
mueller_1d_sem[:, mueller_idx] = sem_array
|
|
709
|
+
|
|
710
|
+
# Store mueller_1d_sem in final_values for unified API access
|
|
711
|
+
final_values["mueller_1d_sem"] = mueller_1d_sem
|
|
712
|
+
|
|
713
|
+
if self.mueller_2d and self.mueller_2d_sum is not None:
|
|
714
|
+
mueller_2d = self.mueller_2d_sum / self.n_orientations
|
|
715
|
+
|
|
716
|
+
return ConvergenceResults(
|
|
717
|
+
converged=converged,
|
|
718
|
+
n_orientations=self.n_orientations,
|
|
719
|
+
values=final_values,
|
|
720
|
+
sem_values=final_sems,
|
|
721
|
+
mueller_1d=mueller_1d,
|
|
722
|
+
mueller_2d=mueller_2d,
|
|
723
|
+
convergence_history=self.convergence_history,
|
|
724
|
+
warning=warning,
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
class EnsembleConvergence(Convergence):
|
|
729
|
+
"""Runs convergence study over an ensemble of particle geometries.
|
|
730
|
+
|
|
731
|
+
Each batch randomly samples from a directory of geometry files,
|
|
732
|
+
allowing convergence analysis of orientation-averaged and
|
|
733
|
+
geometry-averaged scattering properties.
|
|
734
|
+
"""
|
|
735
|
+
|
|
736
|
+
def __init__(
|
|
737
|
+
self,
|
|
738
|
+
settings: goad.Settings,
|
|
739
|
+
convergables: List[Convergable],
|
|
740
|
+
geom_dir: str,
|
|
741
|
+
batch_size: int = 24,
|
|
742
|
+
max_orientations: int = 100_000,
|
|
743
|
+
min_batches: int = 10,
|
|
744
|
+
mueller_1d: bool = True,
|
|
745
|
+
mueller_2d: bool = False,
|
|
746
|
+
log_file: Optional[str] = None,
|
|
747
|
+
):
|
|
748
|
+
"""
|
|
749
|
+
Initialize an ensemble convergence study.
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
settings: GOAD settings for the simulation (geom_path will be overridden)
|
|
753
|
+
convergables: List of variables to monitor for convergence
|
|
754
|
+
geom_dir: Directory containing .obj geometry files
|
|
755
|
+
batch_size: Number of orientations per iteration
|
|
756
|
+
max_orientations: Maximum total orientations before stopping
|
|
757
|
+
min_batches: Minimum number of batches before allowing convergence
|
|
758
|
+
mueller_1d: Whether to collect 1D Mueller matrices
|
|
759
|
+
mueller_2d: Whether to collect 2D Mueller matrices
|
|
760
|
+
"""
|
|
761
|
+
# Discover all .obj files in directory
|
|
762
|
+
geom_path = Path(geom_dir)
|
|
763
|
+
if not geom_path.exists():
|
|
764
|
+
raise ValueError(f"Geometry directory does not exist: {geom_dir}")
|
|
765
|
+
|
|
766
|
+
if not geom_path.is_dir():
|
|
767
|
+
raise ValueError(f"Path is not a directory: {geom_dir}")
|
|
768
|
+
|
|
769
|
+
self.geom_files = sorted([f.name for f in geom_path.glob("*.obj")])
|
|
770
|
+
|
|
771
|
+
if not self.geom_files:
|
|
772
|
+
raise ValueError(f"No .obj files found in directory: {geom_dir}")
|
|
773
|
+
|
|
774
|
+
self.geom_dir = str(geom_path.resolve())
|
|
775
|
+
|
|
776
|
+
print(f"Found {len(self.geom_files)} geometry files in {self.geom_dir}")
|
|
777
|
+
|
|
778
|
+
# Call parent constructor
|
|
779
|
+
super().__init__(
|
|
780
|
+
settings=settings,
|
|
781
|
+
convergables=convergables,
|
|
782
|
+
batch_size=batch_size,
|
|
783
|
+
max_orientations=max_orientations,
|
|
784
|
+
min_batches=min_batches,
|
|
785
|
+
mueller_1d=mueller_1d,
|
|
786
|
+
mueller_2d=mueller_2d,
|
|
787
|
+
log_file=log_file,
|
|
788
|
+
)
|
|
789
|
+
|
|
790
|
+
# Track skipped geometries for error handling
|
|
791
|
+
self.skipped_geometries = []
|
|
792
|
+
|
|
793
|
+
def _get_next_geometry(self, iteration: int) -> Tuple[str, Optional[str]]:
|
|
794
|
+
"""Override to randomly select geometry from ensemble.
|
|
795
|
+
|
|
796
|
+
Args:
|
|
797
|
+
iteration: Current iteration number
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
Tuple of (geom_path, display_info)
|
|
801
|
+
"""
|
|
802
|
+
# Randomly select a geometry file for this batch
|
|
803
|
+
geom_file = random.choice(self.geom_files)
|
|
804
|
+
geom_path = os.path.join(self.geom_dir, geom_file)
|
|
805
|
+
geom_info = f"Geom: {geom_file}"
|
|
806
|
+
|
|
807
|
+
return geom_path, geom_info
|
|
808
|
+
|
|
809
|
+
def _handle_geometry_error(self, error: Exception, geom_path: str) -> bool:
|
|
810
|
+
"""Override to skip bad geometries in ensemble mode.
|
|
811
|
+
|
|
812
|
+
Args:
|
|
813
|
+
error: The exception that occurred
|
|
814
|
+
geom_path: Path to the geometry that failed
|
|
815
|
+
|
|
816
|
+
Returns:
|
|
817
|
+
True to skip this geometry and continue
|
|
818
|
+
"""
|
|
819
|
+
# Extract just the filename
|
|
820
|
+
geom_file = os.path.basename(geom_path)
|
|
821
|
+
|
|
822
|
+
# Print warning and track skipped geometry
|
|
823
|
+
print(f"\nWarning: Skipping geometry '{geom_file}': {error}")
|
|
824
|
+
self.skipped_geometries.append(geom_file)
|
|
825
|
+
|
|
826
|
+
# Check if all geometries have been skipped
|
|
827
|
+
if len(self.skipped_geometries) >= len(self.geom_files):
|
|
828
|
+
raise ValueError(
|
|
829
|
+
f"All {len(self.geom_files)} geometry files failed to load. "
|
|
830
|
+
"Please check geometry files for degenerate faces, non-planar geometry, "
|
|
831
|
+
"or faces that are too small."
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
# Skip this geometry and continue
|
|
835
|
+
return True
|