goad-py 0.5.1__cp38-abi3-win_amd64.whl → 0.5.5__cp38-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of goad-py might be problematic. Click here for more details.
- goad_py/__init__.py +46 -2
- goad_py/_goad_py.pyd +0 -0
- goad_py/convergence.py +583 -83
- goad_py/phips_convergence.py +580 -0
- goad_py/unified_convergence.py +1085 -0
- {goad_py-0.5.1.dist-info → goad_py-0.5.5.dist-info}/METADATA +3 -1
- goad_py-0.5.5.dist-info/RECORD +9 -0
- {goad_py-0.5.1.dist-info → goad_py-0.5.5.dist-info}/WHEEL +1 -1
- goad_py-0.5.1.dist-info/RECORD +0 -7
goad_py/convergence.py
CHANGED
|
@@ -2,38 +2,71 @@ from dataclasses import dataclass
|
|
|
2
2
|
from typing import List, Dict, Optional, Tuple
|
|
3
3
|
import numpy as np
|
|
4
4
|
from . import _goad_py as goad
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
from pathlib import Path
|
|
5
8
|
|
|
6
9
|
|
|
7
10
|
@dataclass
|
|
8
11
|
class Convergable:
|
|
9
12
|
"""Represents a variable to monitor for convergence."""
|
|
10
|
-
|
|
11
|
-
|
|
13
|
+
|
|
14
|
+
variable: str # 'asymmetry', 'scatt', 'ext', 'albedo', or Mueller element like 'S11', 'S12', etc.
|
|
15
|
+
tolerance_type: str = "relative" # 'relative' or 'absolute'
|
|
12
16
|
tolerance: float = 0.01
|
|
17
|
+
theta_indices: Optional[List[int]] = (
|
|
18
|
+
None # For Mueller elements: specific theta bin indices to check (None = all bins)
|
|
19
|
+
)
|
|
13
20
|
|
|
14
21
|
def __post_init__(self):
|
|
15
|
-
|
|
22
|
+
# Scalar integrated parameters
|
|
23
|
+
valid_scalars = {"asymmetry", "scatt", "ext", "albedo"}
|
|
24
|
+
# Mueller matrix elements (S11, S12, ..., S44)
|
|
25
|
+
valid_mueller = {f"S{i}{j}" for i in range(1, 5) for j in range(1, 5)}
|
|
26
|
+
valid_variables = valid_scalars | valid_mueller
|
|
27
|
+
|
|
16
28
|
if self.variable not in valid_variables:
|
|
17
|
-
raise ValueError(
|
|
29
|
+
raise ValueError(
|
|
30
|
+
f"Invalid variable '{self.variable}'. Must be one of {valid_scalars} or Mueller element (S11-S44)"
|
|
31
|
+
)
|
|
18
32
|
|
|
19
|
-
valid_types = {
|
|
33
|
+
valid_types = {"relative", "absolute"}
|
|
20
34
|
if self.tolerance_type not in valid_types:
|
|
21
|
-
raise ValueError(
|
|
35
|
+
raise ValueError(
|
|
36
|
+
f"Invalid tolerance_type '{self.tolerance_type}'. Must be one of {valid_types}"
|
|
37
|
+
)
|
|
22
38
|
|
|
23
39
|
if self.tolerance <= 0:
|
|
24
40
|
raise ValueError(f"Tolerance must be positive, got {self.tolerance}")
|
|
25
41
|
|
|
42
|
+
# Validate theta_indices only for Mueller elements
|
|
43
|
+
if self.theta_indices is not None:
|
|
44
|
+
if not self.is_mueller():
|
|
45
|
+
raise ValueError("theta_indices can only be used with Mueller elements")
|
|
46
|
+
# Convert range to list if needed
|
|
47
|
+
if isinstance(self.theta_indices, range):
|
|
48
|
+
self.theta_indices = list(self.theta_indices)
|
|
49
|
+
if not isinstance(self.theta_indices, list):
|
|
50
|
+
raise ValueError("theta_indices must be a list or range of integers")
|
|
51
|
+
|
|
52
|
+
def is_mueller(self) -> bool:
|
|
53
|
+
"""Check if this convergable is a Mueller matrix element."""
|
|
54
|
+
return self.variable.startswith("S") and len(self.variable) == 3
|
|
55
|
+
|
|
26
56
|
|
|
27
57
|
@dataclass
|
|
28
58
|
class ConvergenceResults:
|
|
29
59
|
"""Results from a convergence study."""
|
|
60
|
+
|
|
30
61
|
converged: bool
|
|
31
62
|
n_orientations: int
|
|
32
63
|
values: Dict[str, float] # Final mean values for each tracked variable
|
|
33
64
|
sem_values: Dict[str, float] # Final SEM values for each tracked variable
|
|
34
65
|
mueller_1d: Optional[np.ndarray] = None
|
|
35
66
|
mueller_2d: Optional[np.ndarray] = None
|
|
36
|
-
convergence_history: List[Tuple[int, str, float]] =
|
|
67
|
+
convergence_history: List[Tuple[int, str, float]] = (
|
|
68
|
+
None # (n_orientations, variable, sem)
|
|
69
|
+
)
|
|
37
70
|
warning: Optional[str] = None
|
|
38
71
|
|
|
39
72
|
|
|
@@ -48,7 +81,7 @@ class Convergence:
|
|
|
48
81
|
max_orientations: int = 100_000,
|
|
49
82
|
min_batches: int = 10,
|
|
50
83
|
mueller_1d: bool = True,
|
|
51
|
-
mueller_2d: bool = False
|
|
84
|
+
mueller_2d: bool = False,
|
|
52
85
|
):
|
|
53
86
|
"""
|
|
54
87
|
Initialize a convergence study.
|
|
@@ -78,8 +111,10 @@ class Convergence:
|
|
|
78
111
|
raise ValueError(f"batch_size must be positive, got {batch_size}")
|
|
79
112
|
|
|
80
113
|
if max_orientations <= 0:
|
|
81
|
-
raise ValueError(
|
|
82
|
-
|
|
114
|
+
raise ValueError(
|
|
115
|
+
f"max_orientations must be positive, got {max_orientations}"
|
|
116
|
+
)
|
|
117
|
+
|
|
83
118
|
if min_batches <= 0:
|
|
84
119
|
raise ValueError(f"min_batches must be positive, got {min_batches}")
|
|
85
120
|
|
|
@@ -102,8 +137,12 @@ class Convergence:
|
|
|
102
137
|
batch_size: Number of orientations in this batch
|
|
103
138
|
"""
|
|
104
139
|
# Check for None values indicating Custom binning
|
|
105
|
-
if (
|
|
106
|
-
results.
|
|
140
|
+
if (
|
|
141
|
+
results.asymmetry is None
|
|
142
|
+
or results.scat_cross is None
|
|
143
|
+
or results.ext_cross is None
|
|
144
|
+
or results.albedo is None
|
|
145
|
+
):
|
|
107
146
|
raise ValueError(
|
|
108
147
|
"Received None values for integrated properties. "
|
|
109
148
|
"This likely means Custom binning scheme is being used. "
|
|
@@ -111,26 +150,37 @@ class Convergence:
|
|
|
111
150
|
)
|
|
112
151
|
|
|
113
152
|
# Store batch data for proper statistical analysis
|
|
114
|
-
batch_info = {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
153
|
+
batch_info = {"batch_size": batch_size, "values": {}, "weights": {}}
|
|
154
|
+
|
|
155
|
+
# Always store all 4 integrated parameters (for unified API output)
|
|
156
|
+
batch_info["values"]["asymmetry"] = results.asymmetry
|
|
157
|
+
batch_info["weights"]["asymmetry"] = results.scat_cross
|
|
158
|
+
batch_info["values"]["scatt"] = results.scat_cross
|
|
159
|
+
batch_info["weights"]["scatt"] = 1.0 # Equal weighting
|
|
160
|
+
batch_info["values"]["ext"] = results.ext_cross
|
|
161
|
+
batch_info["weights"]["ext"] = 1.0 # Equal weighting
|
|
162
|
+
batch_info["values"]["albedo"] = results.albedo
|
|
163
|
+
batch_info["weights"]["albedo"] = results.ext_cross + results.scat_cross
|
|
164
|
+
|
|
165
|
+
# Always store ALL 16 Mueller elements (for unified API output with full SEM)
|
|
166
|
+
if self.mueller_1d and results.mueller_1d is not None:
|
|
167
|
+
mueller_1d_array = np.array(results.mueller_1d) # Shape: (n_theta, 16)
|
|
119
168
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
169
|
+
# Store all 16 Mueller elements (S11, S12, ..., S44)
|
|
170
|
+
for row in range(1, 5):
|
|
171
|
+
for col in range(1, 5):
|
|
172
|
+
element_name = f"S{row}{col}"
|
|
173
|
+
mueller_idx = (row - 1) * 4 + (col - 1)
|
|
174
|
+
mueller_element = mueller_1d_array[
|
|
175
|
+
:, mueller_idx
|
|
176
|
+
] # Shape: (n_theta,)
|
|
177
|
+
|
|
178
|
+
batch_info["values"][element_name] = mueller_element
|
|
179
|
+
batch_info["weights"][element_name] = 1.0 # Equal weighting
|
|
180
|
+
|
|
181
|
+
# Store theta bins if not already stored (for display purposes)
|
|
182
|
+
if "mueller_theta_bins" not in batch_info and results.bins_1d is not None:
|
|
183
|
+
batch_info["mueller_theta_bins"] = np.array(results.bins_1d)
|
|
134
184
|
|
|
135
185
|
self.batch_data.append(batch_info)
|
|
136
186
|
|
|
@@ -152,6 +202,64 @@ class Convergence:
|
|
|
152
202
|
# Update total orientation count
|
|
153
203
|
self.n_orientations += batch_size
|
|
154
204
|
|
|
205
|
+
def _calculate_mean_and_sem_array(
|
|
206
|
+
self, variable: str
|
|
207
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
208
|
+
"""Calculate mean and SEM arrays for Mueller matrix elements across theta bins.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
variable: Mueller element name (e.g., 'S11')
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
Tuple of (mean_array, sem_array) where each is shape (n_theta,)
|
|
215
|
+
"""
|
|
216
|
+
if not self.batch_data:
|
|
217
|
+
return np.array([]), np.array([])
|
|
218
|
+
|
|
219
|
+
# Extract batch values (each is an array of theta values)
|
|
220
|
+
batch_arrays = []
|
|
221
|
+
batch_sizes = []
|
|
222
|
+
|
|
223
|
+
for batch in self.batch_data:
|
|
224
|
+
if variable in batch["values"]:
|
|
225
|
+
batch_arrays.append(batch["values"][variable]) # Shape: (n_theta,)
|
|
226
|
+
batch_sizes.append(batch["batch_size"])
|
|
227
|
+
|
|
228
|
+
if not batch_arrays:
|
|
229
|
+
return np.array([]), np.array([])
|
|
230
|
+
|
|
231
|
+
# Stack batches: shape (n_batches, n_theta)
|
|
232
|
+
batch_arrays = np.array(batch_arrays)
|
|
233
|
+
batch_sizes = np.array(batch_sizes)
|
|
234
|
+
n_theta = batch_arrays.shape[1]
|
|
235
|
+
|
|
236
|
+
if len(batch_arrays) < 2:
|
|
237
|
+
# Can't estimate variance with < 2 batches
|
|
238
|
+
mean_array = batch_arrays[0]
|
|
239
|
+
sem_array = np.full(n_theta, float("inf"))
|
|
240
|
+
return mean_array, sem_array
|
|
241
|
+
|
|
242
|
+
# Calculate mean and SEM independently for each theta bin
|
|
243
|
+
# Mean: weighted by batch size
|
|
244
|
+
mean_array = np.average(
|
|
245
|
+
batch_arrays, axis=0, weights=batch_sizes
|
|
246
|
+
) # Shape: (n_theta,)
|
|
247
|
+
|
|
248
|
+
# Variance between batches at each theta
|
|
249
|
+
batch_means_variance = np.var(batch_arrays, axis=0, ddof=1) # Shape: (n_theta,)
|
|
250
|
+
|
|
251
|
+
# Scale up to estimate population variance
|
|
252
|
+
avg_batch_size = np.mean(batch_sizes)
|
|
253
|
+
estimated_population_variance = batch_means_variance * avg_batch_size
|
|
254
|
+
|
|
255
|
+
# Calculate SEM for total sample
|
|
256
|
+
total_n = np.sum(batch_sizes)
|
|
257
|
+
sem_array = np.sqrt(
|
|
258
|
+
estimated_population_variance / (total_n - 1)
|
|
259
|
+
) # Shape: (n_theta,)
|
|
260
|
+
|
|
261
|
+
return mean_array, sem_array
|
|
262
|
+
|
|
155
263
|
def _calculate_mean_and_sem(self, variable: str) -> Tuple[float, float]:
|
|
156
264
|
"""Calculate mean and standard error of the mean for a variable using batch data.
|
|
157
265
|
|
|
@@ -162,7 +270,7 @@ class Convergence:
|
|
|
162
270
|
Tuple of (mean, sem)
|
|
163
271
|
"""
|
|
164
272
|
if not self.batch_data:
|
|
165
|
-
return 0.0, float(
|
|
273
|
+
return 0.0, float("inf")
|
|
166
274
|
|
|
167
275
|
# Extract batch values and weights
|
|
168
276
|
batch_values = []
|
|
@@ -170,20 +278,20 @@ class Convergence:
|
|
|
170
278
|
batch_sizes = []
|
|
171
279
|
|
|
172
280
|
for batch in self.batch_data:
|
|
173
|
-
if variable in batch[
|
|
174
|
-
batch_values.append(batch[
|
|
175
|
-
batch_weights.append(batch[
|
|
176
|
-
batch_sizes.append(batch[
|
|
281
|
+
if variable in batch["values"]:
|
|
282
|
+
batch_values.append(batch["values"][variable])
|
|
283
|
+
batch_weights.append(batch["weights"][variable])
|
|
284
|
+
batch_sizes.append(batch["batch_size"])
|
|
177
285
|
|
|
178
286
|
if not batch_values:
|
|
179
|
-
return 0.0, float(
|
|
287
|
+
return 0.0, float("inf")
|
|
180
288
|
|
|
181
289
|
batch_values = np.array(batch_values)
|
|
182
290
|
batch_weights = np.array(batch_weights)
|
|
183
291
|
batch_sizes = np.array(batch_sizes)
|
|
184
292
|
|
|
185
293
|
# For weighted variables (asymmetry, albedo), use weighted statistics
|
|
186
|
-
if variable in [
|
|
294
|
+
if variable in ["asymmetry", "albedo"]:
|
|
187
295
|
# Calculate weighted mean across batches
|
|
188
296
|
# Each batch contributes: weight * batch_size * value
|
|
189
297
|
total_weighted_sum = np.sum(batch_weights * batch_sizes * batch_values)
|
|
@@ -192,17 +300,23 @@ class Convergence:
|
|
|
192
300
|
|
|
193
301
|
# Calculate weighted variance between batches
|
|
194
302
|
if len(batch_values) < 2:
|
|
195
|
-
return weighted_mean, float(
|
|
303
|
+
return weighted_mean, float(
|
|
304
|
+
"inf"
|
|
305
|
+
) # Cannot estimate variance with < 2 batches
|
|
196
306
|
|
|
197
307
|
# For batch means, we need to account for the effective weight of each batch
|
|
198
308
|
effective_weights = batch_weights * batch_sizes
|
|
199
|
-
weighted_variance_batch_means = np.sum(
|
|
309
|
+
weighted_variance_batch_means = np.sum(
|
|
310
|
+
effective_weights * (batch_values - weighted_mean) ** 2
|
|
311
|
+
) / np.sum(effective_weights)
|
|
200
312
|
|
|
201
313
|
# Scale up to estimate population variance
|
|
202
314
|
# Batch means have variance = population_variance / average_batch_size
|
|
203
315
|
# So population_variance ≈ batch_means_variance * average_batch_size
|
|
204
316
|
avg_batch_size = np.average(batch_sizes, weights=effective_weights)
|
|
205
|
-
estimated_population_variance =
|
|
317
|
+
estimated_population_variance = (
|
|
318
|
+
weighted_variance_batch_means * avg_batch_size
|
|
319
|
+
)
|
|
206
320
|
|
|
207
321
|
# Calculate SEM for the total sample (using n-1 for sample standard error)
|
|
208
322
|
total_n = np.sum(batch_sizes)
|
|
@@ -219,7 +333,7 @@ class Convergence:
|
|
|
219
333
|
|
|
220
334
|
# Calculate variance between batch means
|
|
221
335
|
if len(batch_values) < 2:
|
|
222
|
-
return mean, float(
|
|
336
|
+
return mean, float("inf")
|
|
223
337
|
|
|
224
338
|
batch_means_variance = np.var(batch_values, ddof=1)
|
|
225
339
|
|
|
@@ -243,20 +357,58 @@ class Convergence:
|
|
|
243
357
|
converged = {}
|
|
244
358
|
|
|
245
359
|
for conv in self.convergables:
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
converged[conv.variable] =
|
|
360
|
+
if conv.is_mueller():
|
|
361
|
+
# Mueller element - check theta bins (all or specific indices)
|
|
362
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
363
|
+
conv.variable
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
if len(mean_array) == 0:
|
|
367
|
+
converged[conv.variable] = False
|
|
368
|
+
continue
|
|
369
|
+
|
|
370
|
+
# Select theta bins to check
|
|
371
|
+
if conv.theta_indices is not None:
|
|
372
|
+
# Check only specified indices
|
|
373
|
+
indices = [i for i in conv.theta_indices if i < len(mean_array)]
|
|
374
|
+
if not indices:
|
|
375
|
+
converged[conv.variable] = False
|
|
376
|
+
continue
|
|
377
|
+
mean_subset = mean_array[indices]
|
|
378
|
+
sem_subset = sem_array[indices]
|
|
254
379
|
else:
|
|
255
|
-
#
|
|
256
|
-
|
|
380
|
+
# Check all bins
|
|
381
|
+
mean_subset = mean_array
|
|
382
|
+
sem_subset = sem_array
|
|
383
|
+
|
|
384
|
+
# Check convergence at selected theta bins
|
|
385
|
+
if conv.tolerance_type == "relative":
|
|
386
|
+
# Relative tolerance: SEM / |mean| < tolerance
|
|
387
|
+
relative_sem = np.where(
|
|
388
|
+
mean_subset != 0,
|
|
389
|
+
sem_subset / np.abs(mean_subset),
|
|
390
|
+
sem_subset / conv.tolerance,
|
|
391
|
+
)
|
|
392
|
+
converged[conv.variable] = np.all(relative_sem < conv.tolerance)
|
|
393
|
+
else:
|
|
394
|
+
# Absolute tolerance: SEM < tolerance
|
|
395
|
+
converged[conv.variable] = np.all(sem_subset < conv.tolerance)
|
|
257
396
|
else:
|
|
258
|
-
#
|
|
259
|
-
|
|
397
|
+
# Scalar variable
|
|
398
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
399
|
+
|
|
400
|
+
# Calculate tolerance based on type
|
|
401
|
+
if conv.tolerance_type == "relative":
|
|
402
|
+
# Relative tolerance: SEM / |mean| < tolerance
|
|
403
|
+
if mean != 0:
|
|
404
|
+
relative_sem = sem / abs(mean)
|
|
405
|
+
converged[conv.variable] = relative_sem < conv.tolerance
|
|
406
|
+
else:
|
|
407
|
+
# If mean is zero, use absolute comparison
|
|
408
|
+
converged[conv.variable] = sem < conv.tolerance
|
|
409
|
+
else:
|
|
410
|
+
# Absolute tolerance: SEM < tolerance
|
|
411
|
+
converged[conv.variable] = sem < conv.tolerance
|
|
260
412
|
|
|
261
413
|
return converged
|
|
262
414
|
|
|
@@ -269,7 +421,7 @@ class Convergence:
|
|
|
269
421
|
# Check minimum batches requirement first
|
|
270
422
|
if len(self.batch_data) < self.min_batches:
|
|
271
423
|
return False
|
|
272
|
-
|
|
424
|
+
|
|
273
425
|
converged_status = self._check_convergence()
|
|
274
426
|
return all(converged_status.values())
|
|
275
427
|
|
|
@@ -279,38 +431,141 @@ class Convergence:
|
|
|
279
431
|
Args:
|
|
280
432
|
iteration: Current iteration number
|
|
281
433
|
"""
|
|
282
|
-
|
|
434
|
+
# Calculate minimum required orientations
|
|
435
|
+
min_required = self.min_batches * self.batch_size
|
|
436
|
+
|
|
437
|
+
# Show progress with min orientations requirement
|
|
438
|
+
if self.n_orientations < min_required:
|
|
439
|
+
print(
|
|
440
|
+
f"\nIteration {iteration} ({self.n_orientations}/{min_required} orientations, min not reached):"
|
|
441
|
+
)
|
|
442
|
+
else:
|
|
443
|
+
print(
|
|
444
|
+
f"\nIteration {iteration} ({self.n_orientations} orientations, min {min_required} reached):"
|
|
445
|
+
)
|
|
283
446
|
|
|
284
447
|
converged_status = self._check_convergence()
|
|
285
448
|
|
|
286
449
|
for conv in self.convergables:
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
450
|
+
if conv.is_mueller():
|
|
451
|
+
# Mueller element - show worst-case theta bin
|
|
452
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
453
|
+
conv.variable
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
if len(mean_array) == 0:
|
|
457
|
+
print(f" {conv.variable:<10}: No data yet")
|
|
458
|
+
continue
|
|
459
|
+
|
|
460
|
+
# Get theta bins from results (assuming we have access to bins_1d)
|
|
461
|
+
if hasattr(self, "settings") and hasattr(self.settings, "binning"):
|
|
462
|
+
# We'll get theta values from the first batch's mueller_1d if available
|
|
463
|
+
theta_bins = None
|
|
464
|
+
for batch in self.batch_data:
|
|
465
|
+
if "mueller_theta_bins" in batch:
|
|
466
|
+
theta_bins = batch["mueller_theta_bins"]
|
|
467
|
+
break
|
|
468
|
+
if theta_bins is None:
|
|
469
|
+
theta_bins = np.arange(len(mean_array))
|
|
299
470
|
else:
|
|
300
|
-
|
|
301
|
-
|
|
471
|
+
theta_bins = np.arange(len(mean_array))
|
|
472
|
+
|
|
473
|
+
# Calculate relative SEM for each theta
|
|
474
|
+
if conv.tolerance_type == "relative":
|
|
475
|
+
relative_sem_array = np.where(
|
|
476
|
+
mean_array != 0, sem_array / np.abs(mean_array), float("inf")
|
|
477
|
+
)
|
|
478
|
+
worst_idx = np.argmax(relative_sem_array)
|
|
479
|
+
worst_sem = relative_sem_array[worst_idx]
|
|
480
|
+
target_str = f"{conv.tolerance * 100:.1f}%"
|
|
481
|
+
current_str = f"{worst_sem * 100:.2f}%"
|
|
482
|
+
else:
|
|
483
|
+
worst_idx = np.argmax(sem_array)
|
|
484
|
+
worst_sem = sem_array[worst_idx]
|
|
485
|
+
target_str = f"{conv.tolerance}"
|
|
486
|
+
current_str = f"{worst_sem:.4g}"
|
|
487
|
+
|
|
488
|
+
worst_theta = theta_bins[worst_idx]
|
|
489
|
+
worst_mean = mean_array[worst_idx]
|
|
490
|
+
|
|
491
|
+
# Count converged bins (either all or specified indices)
|
|
492
|
+
if conv.theta_indices is not None:
|
|
493
|
+
# Only checking specific bins
|
|
494
|
+
indices = [i for i in conv.theta_indices if i < len(mean_array)]
|
|
495
|
+
if conv.tolerance_type == "relative":
|
|
496
|
+
converged_bins = np.sum(
|
|
497
|
+
relative_sem_array[indices] < conv.tolerance
|
|
498
|
+
)
|
|
499
|
+
else:
|
|
500
|
+
converged_bins = np.sum(sem_array[indices] < conv.tolerance)
|
|
501
|
+
total_bins = len(indices)
|
|
502
|
+
bin_desc = (
|
|
503
|
+
f"θ={[theta_bins[i] for i in indices]}"
|
|
504
|
+
if len(indices) <= 3
|
|
505
|
+
else f"{len(indices)} bins"
|
|
506
|
+
)
|
|
507
|
+
else:
|
|
508
|
+
# Checking all bins
|
|
509
|
+
if conv.tolerance_type == "relative":
|
|
510
|
+
converged_bins = np.sum(relative_sem_array < conv.tolerance)
|
|
511
|
+
else:
|
|
512
|
+
converged_bins = np.sum(sem_array < conv.tolerance)
|
|
513
|
+
total_bins = len(mean_array)
|
|
514
|
+
bin_desc = f"{total_bins} bins"
|
|
515
|
+
|
|
516
|
+
status = "✓" if converged_status[conv.variable] else "❌"
|
|
517
|
+
|
|
518
|
+
# Print Mueller convergence info
|
|
519
|
+
if conv.theta_indices is not None and len(conv.theta_indices) <= 3:
|
|
520
|
+
# For small number of specific bins, show them explicitly
|
|
521
|
+
print(
|
|
522
|
+
f" {conv.variable:<10}: {converged_bins}/{total_bins} {bin_desc} | "
|
|
523
|
+
f"Worst θ={worst_theta:.1f}°: {worst_mean:.4g} | SEM: {current_str} (target: {target_str}) {status}"
|
|
524
|
+
)
|
|
525
|
+
else:
|
|
526
|
+
# For many bins, use standard format
|
|
527
|
+
print(
|
|
528
|
+
f" {conv.variable:<10}: {converged_bins}/{total_bins} bins converged | "
|
|
529
|
+
f"Worst θ={worst_theta:.1f}°: {worst_mean:.4g} | SEM: {current_str} (target: {target_str}) {status}"
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
# Add worst SEM to convergence history
|
|
533
|
+
self.convergence_history.append(
|
|
534
|
+
(self.n_orientations, conv.variable, worst_sem)
|
|
535
|
+
)
|
|
302
536
|
else:
|
|
303
|
-
|
|
304
|
-
|
|
537
|
+
# Scalar variable
|
|
538
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
539
|
+
|
|
540
|
+
# Calculate 95% CI
|
|
541
|
+
ci_lower = mean - 1.96 * sem
|
|
542
|
+
ci_upper = mean + 1.96 * sem
|
|
543
|
+
|
|
544
|
+
# Format based on tolerance type
|
|
545
|
+
if conv.tolerance_type == "relative":
|
|
546
|
+
if mean != 0:
|
|
547
|
+
relative_sem = sem / abs(mean)
|
|
548
|
+
target_str = f"{conv.tolerance * 100:.1f}%"
|
|
549
|
+
current_str = f"{relative_sem * 100:.2f}%"
|
|
550
|
+
else:
|
|
551
|
+
target_str = f"{conv.tolerance} (abs, mean=0)"
|
|
552
|
+
current_str = f"{sem:.4g}"
|
|
553
|
+
else:
|
|
554
|
+
target_str = f"{conv.tolerance}"
|
|
555
|
+
current_str = f"{sem:.4g}"
|
|
305
556
|
|
|
306
|
-
|
|
307
|
-
|
|
557
|
+
# Status indicator
|
|
558
|
+
status = "✓" if converged_status[conv.variable] else "❌"
|
|
308
559
|
|
|
309
|
-
|
|
310
|
-
|
|
560
|
+
# Print line with mean, SEM, CI, and convergence status
|
|
561
|
+
print(
|
|
562
|
+
f" {conv.variable:<10}: {mean:.6f} ± {sem:.6f} [{ci_lower:.6f}, {ci_upper:.6f}] | SEM: {current_str} (target: {target_str}) {status}"
|
|
563
|
+
)
|
|
311
564
|
|
|
312
|
-
|
|
313
|
-
|
|
565
|
+
# Add to convergence history
|
|
566
|
+
self.convergence_history.append(
|
|
567
|
+
(self.n_orientations, conv.variable, sem)
|
|
568
|
+
)
|
|
314
569
|
|
|
315
570
|
def run(self) -> ConvergenceResults:
|
|
316
571
|
"""Run the convergence study.
|
|
@@ -358,15 +613,46 @@ class Convergence:
|
|
|
358
613
|
final_values = {}
|
|
359
614
|
final_sems = {}
|
|
360
615
|
for conv in self.convergables:
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
616
|
+
if conv.is_mueller():
|
|
617
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
618
|
+
conv.variable
|
|
619
|
+
)
|
|
620
|
+
final_values[conv.variable] = mean_array
|
|
621
|
+
final_sems[conv.variable] = sem_array
|
|
622
|
+
else:
|
|
623
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
624
|
+
final_values[conv.variable] = mean
|
|
625
|
+
final_sems[conv.variable] = sem
|
|
364
626
|
|
|
365
|
-
# Prepare Mueller matrices
|
|
627
|
+
# Prepare Mueller matrices with SEM
|
|
366
628
|
mueller_1d = None
|
|
629
|
+
mueller_1d_sem = None
|
|
367
630
|
mueller_2d = None
|
|
631
|
+
|
|
368
632
|
if self.mueller_1d and self.mueller_1d_sum is not None:
|
|
369
633
|
mueller_1d = self.mueller_1d_sum / self.n_orientations
|
|
634
|
+
|
|
635
|
+
# Compute SEM for all 16 Mueller elements
|
|
636
|
+
# mueller_1d shape: (n_theta, 16)
|
|
637
|
+
n_theta = mueller_1d.shape[0]
|
|
638
|
+
mueller_1d_sem = np.zeros_like(mueller_1d)
|
|
639
|
+
|
|
640
|
+
for row in range(1, 5):
|
|
641
|
+
for col in range(1, 5):
|
|
642
|
+
element_name = f"S{row}{col}"
|
|
643
|
+
mueller_idx = (row - 1) * 4 + (col - 1)
|
|
644
|
+
|
|
645
|
+
# Calculate mean and SEM for this element across all theta bins
|
|
646
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
647
|
+
element_name
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
if len(sem_array) > 0:
|
|
651
|
+
mueller_1d_sem[:, mueller_idx] = sem_array
|
|
652
|
+
|
|
653
|
+
# Store mueller_1d_sem in final_values for unified API access
|
|
654
|
+
final_values["mueller_1d_sem"] = mueller_1d_sem
|
|
655
|
+
|
|
370
656
|
if self.mueller_2d and self.mueller_2d_sum is not None:
|
|
371
657
|
mueller_2d = self.mueller_2d_sum / self.n_orientations
|
|
372
658
|
|
|
@@ -378,5 +664,219 @@ class Convergence:
|
|
|
378
664
|
mueller_1d=mueller_1d,
|
|
379
665
|
mueller_2d=mueller_2d,
|
|
380
666
|
convergence_history=self.convergence_history,
|
|
381
|
-
warning=warning
|
|
667
|
+
warning=warning,
|
|
382
668
|
)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
class EnsembleConvergence(Convergence):
|
|
672
|
+
"""Runs convergence study over an ensemble of particle geometries.
|
|
673
|
+
|
|
674
|
+
Each batch randomly samples from a directory of geometry files,
|
|
675
|
+
allowing convergence analysis of orientation-averaged and
|
|
676
|
+
geometry-averaged scattering properties.
|
|
677
|
+
"""
|
|
678
|
+
|
|
679
|
+
def __init__(
|
|
680
|
+
self,
|
|
681
|
+
settings: goad.Settings,
|
|
682
|
+
convergables: List[Convergable],
|
|
683
|
+
geom_dir: str,
|
|
684
|
+
batch_size: int = 24,
|
|
685
|
+
max_orientations: int = 100_000,
|
|
686
|
+
min_batches: int = 10,
|
|
687
|
+
mueller_1d: bool = True,
|
|
688
|
+
mueller_2d: bool = False,
|
|
689
|
+
):
|
|
690
|
+
"""
|
|
691
|
+
Initialize an ensemble convergence study.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
settings: GOAD settings for the simulation (geom_path will be overridden)
|
|
695
|
+
convergables: List of variables to monitor for convergence
|
|
696
|
+
geom_dir: Directory containing .obj geometry files
|
|
697
|
+
batch_size: Number of orientations per iteration
|
|
698
|
+
max_orientations: Maximum total orientations before stopping
|
|
699
|
+
min_batches: Minimum number of batches before allowing convergence
|
|
700
|
+
mueller_1d: Whether to collect 1D Mueller matrices
|
|
701
|
+
mueller_2d: Whether to collect 2D Mueller matrices
|
|
702
|
+
"""
|
|
703
|
+
# Discover all .obj files in directory
|
|
704
|
+
geom_path = Path(geom_dir)
|
|
705
|
+
if not geom_path.exists():
|
|
706
|
+
raise ValueError(f"Geometry directory does not exist: {geom_dir}")
|
|
707
|
+
|
|
708
|
+
if not geom_path.is_dir():
|
|
709
|
+
raise ValueError(f"Path is not a directory: {geom_dir}")
|
|
710
|
+
|
|
711
|
+
self.geom_files = sorted([f.name for f in geom_path.glob("*.obj")])
|
|
712
|
+
|
|
713
|
+
if not self.geom_files:
|
|
714
|
+
raise ValueError(f"No .obj files found in directory: {geom_dir}")
|
|
715
|
+
|
|
716
|
+
self.geom_dir = str(geom_path.resolve())
|
|
717
|
+
|
|
718
|
+
print(f"Found {len(self.geom_files)} geometry files in {self.geom_dir}")
|
|
719
|
+
|
|
720
|
+
# Call parent constructor
|
|
721
|
+
super().__init__(
|
|
722
|
+
settings=settings,
|
|
723
|
+
convergables=convergables,
|
|
724
|
+
batch_size=batch_size,
|
|
725
|
+
max_orientations=max_orientations,
|
|
726
|
+
min_batches=min_batches,
|
|
727
|
+
mueller_1d=mueller_1d,
|
|
728
|
+
mueller_2d=mueller_2d,
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
def run(self) -> ConvergenceResults:
|
|
732
|
+
"""Run the ensemble convergence study.
|
|
733
|
+
|
|
734
|
+
Each batch iteration randomly selects a geometry file from the
|
|
735
|
+
ensemble directory before running the orientation averaging.
|
|
736
|
+
|
|
737
|
+
Returns:
|
|
738
|
+
ConvergenceResults containing final ensemble-averaged values
|
|
739
|
+
"""
|
|
740
|
+
iteration = 0
|
|
741
|
+
converged = False
|
|
742
|
+
warning = None
|
|
743
|
+
|
|
744
|
+
while not converged and self.n_orientations < self.max_orientations:
|
|
745
|
+
iteration += 1
|
|
746
|
+
|
|
747
|
+
# Randomly select a geometry file for this batch
|
|
748
|
+
geom_file = random.choice(self.geom_files)
|
|
749
|
+
geom_path = os.path.join(self.geom_dir, geom_file)
|
|
750
|
+
|
|
751
|
+
# Determine batch size for this iteration
|
|
752
|
+
remaining = self.max_orientations - self.n_orientations
|
|
753
|
+
batch_size = min(self.batch_size, remaining)
|
|
754
|
+
|
|
755
|
+
# Create orientations for this batch
|
|
756
|
+
orientations = goad.create_uniform_orientation(batch_size)
|
|
757
|
+
|
|
758
|
+
# Update settings with selected geometry and orientations
|
|
759
|
+
self.settings.geom_path = geom_path
|
|
760
|
+
self.settings.orientation = orientations
|
|
761
|
+
|
|
762
|
+
# Run MultiProblem with selected geometry
|
|
763
|
+
mp = goad.MultiProblem(self.settings)
|
|
764
|
+
mp.py_solve()
|
|
765
|
+
|
|
766
|
+
# Update statistics
|
|
767
|
+
self._update_statistics(mp.results, batch_size)
|
|
768
|
+
|
|
769
|
+
# Print progress (with geometry info)
|
|
770
|
+
min_required = self.min_batches * self.batch_size
|
|
771
|
+
if self.n_orientations < min_required:
|
|
772
|
+
print(
|
|
773
|
+
f"\nIteration {iteration} ({self.n_orientations}/{min_required} orientations, min not reached) - Geometry: {geom_file}"
|
|
774
|
+
)
|
|
775
|
+
else:
|
|
776
|
+
print(
|
|
777
|
+
f"\nIteration {iteration} ({self.n_orientations} orientations, min {min_required} reached) - Geometry: {geom_file}"
|
|
778
|
+
)
|
|
779
|
+
self._print_progress_without_header(iteration)
|
|
780
|
+
|
|
781
|
+
# Check convergence
|
|
782
|
+
converged = self._all_converged()
|
|
783
|
+
|
|
784
|
+
# Prepare final results
|
|
785
|
+
if converged:
|
|
786
|
+
print(f"\nConverged after {self.n_orientations} orientations.")
|
|
787
|
+
else:
|
|
788
|
+
warning = f"Maximum orientations ({self.max_orientations}) reached without convergence"
|
|
789
|
+
print(f"\nWarning: {warning}")
|
|
790
|
+
|
|
791
|
+
# Calculate final values and SEMs
|
|
792
|
+
final_values = {}
|
|
793
|
+
final_sems = {}
|
|
794
|
+
for conv in self.convergables:
|
|
795
|
+
if conv.is_mueller():
|
|
796
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
797
|
+
conv.variable
|
|
798
|
+
)
|
|
799
|
+
final_values[conv.variable] = mean_array
|
|
800
|
+
final_sems[conv.variable] = sem_array
|
|
801
|
+
else:
|
|
802
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
803
|
+
final_values[conv.variable] = mean
|
|
804
|
+
final_sems[conv.variable] = sem
|
|
805
|
+
|
|
806
|
+
# Prepare Mueller matrices with SEM
|
|
807
|
+
mueller_1d = None
|
|
808
|
+
mueller_1d_sem = None
|
|
809
|
+
mueller_2d = None
|
|
810
|
+
|
|
811
|
+
if self.mueller_1d and self.mueller_1d_sum is not None:
|
|
812
|
+
mueller_1d = self.mueller_1d_sum / self.n_orientations
|
|
813
|
+
|
|
814
|
+
# Compute SEM for all 16 Mueller elements
|
|
815
|
+
# mueller_1d shape: (n_theta, 16)
|
|
816
|
+
n_theta = mueller_1d.shape[0]
|
|
817
|
+
mueller_1d_sem = np.zeros_like(mueller_1d)
|
|
818
|
+
|
|
819
|
+
for row in range(1, 5):
|
|
820
|
+
for col in range(1, 5):
|
|
821
|
+
element_name = f"S{row}{col}"
|
|
822
|
+
mueller_idx = (row - 1) * 4 + (col - 1)
|
|
823
|
+
|
|
824
|
+
# Calculate mean and SEM for this element across all theta bins
|
|
825
|
+
mean_array, sem_array = self._calculate_mean_and_sem_array(
|
|
826
|
+
element_name
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
if len(sem_array) > 0:
|
|
830
|
+
mueller_1d_sem[:, mueller_idx] = sem_array
|
|
831
|
+
|
|
832
|
+
# Store mueller_1d_sem in final_values for unified API access
|
|
833
|
+
final_values["mueller_1d_sem"] = mueller_1d_sem
|
|
834
|
+
|
|
835
|
+
if self.mueller_2d and self.mueller_2d_sum is not None:
|
|
836
|
+
mueller_2d = self.mueller_2d_sum / self.n_orientations
|
|
837
|
+
|
|
838
|
+
return ConvergenceResults(
|
|
839
|
+
converged=converged,
|
|
840
|
+
n_orientations=self.n_orientations,
|
|
841
|
+
values=final_values,
|
|
842
|
+
sem_values=final_sems,
|
|
843
|
+
mueller_1d=mueller_1d,
|
|
844
|
+
mueller_2d=mueller_2d,
|
|
845
|
+
convergence_history=self.convergence_history,
|
|
846
|
+
warning=warning,
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
def _print_progress_without_header(self, iteration: int):
|
|
850
|
+
"""Print convergence progress without iteration header (already printed with geometry)."""
|
|
851
|
+
converged_status = self._check_convergence()
|
|
852
|
+
|
|
853
|
+
for conv in self.convergables:
|
|
854
|
+
mean, sem = self._calculate_mean_and_sem(conv.variable)
|
|
855
|
+
|
|
856
|
+
# Calculate 95% CI
|
|
857
|
+
ci_lower = mean - 1.96 * sem
|
|
858
|
+
ci_upper = mean + 1.96 * sem
|
|
859
|
+
|
|
860
|
+
# Format based on tolerance type
|
|
861
|
+
if conv.tolerance_type == "relative":
|
|
862
|
+
if mean != 0:
|
|
863
|
+
relative_sem = sem / abs(mean)
|
|
864
|
+
target_str = f"{conv.tolerance * 100:.1f}%"
|
|
865
|
+
current_str = f"{relative_sem * 100:.2f}%"
|
|
866
|
+
else:
|
|
867
|
+
target_str = f"{conv.tolerance} (abs, mean=0)"
|
|
868
|
+
current_str = f"{sem:.4g}"
|
|
869
|
+
else:
|
|
870
|
+
target_str = f"{conv.tolerance}"
|
|
871
|
+
current_str = f"{sem:.4g}"
|
|
872
|
+
|
|
873
|
+
# Status indicator
|
|
874
|
+
status = "✓" if converged_status[conv.variable] else "❌"
|
|
875
|
+
|
|
876
|
+
# Print line with mean, SEM, CI, and convergence status
|
|
877
|
+
print(
|
|
878
|
+
f" {conv.variable:<10}: {mean:.6f} ± {sem:.6f} [{ci_lower:.6f}, {ci_upper:.6f}] | SEM: {current_str} (target: {target_str}) {status}"
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
# Add to convergence history
|
|
882
|
+
self.convergence_history.append((self.n_orientations, conv.variable, sem))
|