goad-py 0.5.1__cp38-abi3-win_amd64.whl → 0.5.5__cp38-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of goad-py might be problematic. Click here for more details.

goad_py/convergence.py CHANGED
@@ -2,38 +2,71 @@ from dataclasses import dataclass
2
2
  from typing import List, Dict, Optional, Tuple
3
3
  import numpy as np
4
4
  from . import _goad_py as goad
5
+ import os
6
+ import random
7
+ from pathlib import Path
5
8
 
6
9
 
7
10
  @dataclass
8
11
  class Convergable:
9
12
  """Represents a variable to monitor for convergence."""
10
- variable: str # 'asymmetry', 'scatt', 'ext', or 'albedo'
11
- tolerance_type: str = 'relative' # 'relative' or 'absolute'
13
+
14
+ variable: str # 'asymmetry', 'scatt', 'ext', 'albedo', or Mueller element like 'S11', 'S12', etc.
15
+ tolerance_type: str = "relative" # 'relative' or 'absolute'
12
16
  tolerance: float = 0.01
17
+ theta_indices: Optional[List[int]] = (
18
+ None # For Mueller elements: specific theta bin indices to check (None = all bins)
19
+ )
13
20
 
14
21
  def __post_init__(self):
15
- valid_variables = {'asymmetry', 'scatt', 'ext', 'albedo'}
22
+ # Scalar integrated parameters
23
+ valid_scalars = {"asymmetry", "scatt", "ext", "albedo"}
24
+ # Mueller matrix elements (S11, S12, ..., S44)
25
+ valid_mueller = {f"S{i}{j}" for i in range(1, 5) for j in range(1, 5)}
26
+ valid_variables = valid_scalars | valid_mueller
27
+
16
28
  if self.variable not in valid_variables:
17
- raise ValueError(f"Invalid variable '{self.variable}'. Must be one of {valid_variables}")
29
+ raise ValueError(
30
+ f"Invalid variable '{self.variable}'. Must be one of {valid_scalars} or Mueller element (S11-S44)"
31
+ )
18
32
 
19
- valid_types = {'relative', 'absolute'}
33
+ valid_types = {"relative", "absolute"}
20
34
  if self.tolerance_type not in valid_types:
21
- raise ValueError(f"Invalid tolerance_type '{self.tolerance_type}'. Must be one of {valid_types}")
35
+ raise ValueError(
36
+ f"Invalid tolerance_type '{self.tolerance_type}'. Must be one of {valid_types}"
37
+ )
22
38
 
23
39
  if self.tolerance <= 0:
24
40
  raise ValueError(f"Tolerance must be positive, got {self.tolerance}")
25
41
 
42
+ # Validate theta_indices only for Mueller elements
43
+ if self.theta_indices is not None:
44
+ if not self.is_mueller():
45
+ raise ValueError("theta_indices can only be used with Mueller elements")
46
+ # Convert range to list if needed
47
+ if isinstance(self.theta_indices, range):
48
+ self.theta_indices = list(self.theta_indices)
49
+ if not isinstance(self.theta_indices, list):
50
+ raise ValueError("theta_indices must be a list or range of integers")
51
+
52
+ def is_mueller(self) -> bool:
53
+ """Check if this convergable is a Mueller matrix element."""
54
+ return self.variable.startswith("S") and len(self.variable) == 3
55
+
26
56
 
27
57
  @dataclass
28
58
  class ConvergenceResults:
29
59
  """Results from a convergence study."""
60
+
30
61
  converged: bool
31
62
  n_orientations: int
32
63
  values: Dict[str, float] # Final mean values for each tracked variable
33
64
  sem_values: Dict[str, float] # Final SEM values for each tracked variable
34
65
  mueller_1d: Optional[np.ndarray] = None
35
66
  mueller_2d: Optional[np.ndarray] = None
36
- convergence_history: List[Tuple[int, str, float]] = None # (n_orientations, variable, sem)
67
+ convergence_history: List[Tuple[int, str, float]] = (
68
+ None # (n_orientations, variable, sem)
69
+ )
37
70
  warning: Optional[str] = None
38
71
 
39
72
 
@@ -48,7 +81,7 @@ class Convergence:
48
81
  max_orientations: int = 100_000,
49
82
  min_batches: int = 10,
50
83
  mueller_1d: bool = True,
51
- mueller_2d: bool = False
84
+ mueller_2d: bool = False,
52
85
  ):
53
86
  """
54
87
  Initialize a convergence study.
@@ -78,8 +111,10 @@ class Convergence:
78
111
  raise ValueError(f"batch_size must be positive, got {batch_size}")
79
112
 
80
113
  if max_orientations <= 0:
81
- raise ValueError(f"max_orientations must be positive, got {max_orientations}")
82
-
114
+ raise ValueError(
115
+ f"max_orientations must be positive, got {max_orientations}"
116
+ )
117
+
83
118
  if min_batches <= 0:
84
119
  raise ValueError(f"min_batches must be positive, got {min_batches}")
85
120
 
@@ -102,8 +137,12 @@ class Convergence:
102
137
  batch_size: Number of orientations in this batch
103
138
  """
104
139
  # Check for None values indicating Custom binning
105
- if (results.asymmetry is None or results.scat_cross is None or
106
- results.ext_cross is None or results.albedo is None):
140
+ if (
141
+ results.asymmetry is None
142
+ or results.scat_cross is None
143
+ or results.ext_cross is None
144
+ or results.albedo is None
145
+ ):
107
146
  raise ValueError(
108
147
  "Received None values for integrated properties. "
109
148
  "This likely means Custom binning scheme is being used. "
@@ -111,26 +150,37 @@ class Convergence:
111
150
  )
112
151
 
113
152
  # Store batch data for proper statistical analysis
114
- batch_info = {
115
- 'batch_size': batch_size,
116
- 'values': {},
117
- 'weights': {}
118
- }
153
+ batch_info = {"batch_size": batch_size, "values": {}, "weights": {}}
154
+
155
+ # Always store all 4 integrated parameters (for unified API output)
156
+ batch_info["values"]["asymmetry"] = results.asymmetry
157
+ batch_info["weights"]["asymmetry"] = results.scat_cross
158
+ batch_info["values"]["scatt"] = results.scat_cross
159
+ batch_info["weights"]["scatt"] = 1.0 # Equal weighting
160
+ batch_info["values"]["ext"] = results.ext_cross
161
+ batch_info["weights"]["ext"] = 1.0 # Equal weighting
162
+ batch_info["values"]["albedo"] = results.albedo
163
+ batch_info["weights"]["albedo"] = results.ext_cross + results.scat_cross
164
+
165
+ # Always store ALL 16 Mueller elements (for unified API output with full SEM)
166
+ if self.mueller_1d and results.mueller_1d is not None:
167
+ mueller_1d_array = np.array(results.mueller_1d) # Shape: (n_theta, 16)
119
168
 
120
- # Store values and weights for tracked variables
121
- for conv in self.convergables:
122
- if conv.variable == 'asymmetry':
123
- batch_info['values']['asymmetry'] = results.asymmetry
124
- batch_info['weights']['asymmetry'] = results.scat_cross
125
- elif conv.variable == 'scatt':
126
- batch_info['values']['scatt'] = results.scat_cross
127
- batch_info['weights']['scatt'] = 1.0 # Equal weighting
128
- elif conv.variable == 'ext':
129
- batch_info['values']['ext'] = results.ext_cross
130
- batch_info['weights']['ext'] = 1.0 # Equal weighting
131
- elif conv.variable == 'albedo':
132
- batch_info['values']['albedo'] = results.albedo
133
- batch_info['weights']['albedo'] = results.ext_cross + results.scat_cross
169
+ # Store all 16 Mueller elements (S11, S12, ..., S44)
170
+ for row in range(1, 5):
171
+ for col in range(1, 5):
172
+ element_name = f"S{row}{col}"
173
+ mueller_idx = (row - 1) * 4 + (col - 1)
174
+ mueller_element = mueller_1d_array[
175
+ :, mueller_idx
176
+ ] # Shape: (n_theta,)
177
+
178
+ batch_info["values"][element_name] = mueller_element
179
+ batch_info["weights"][element_name] = 1.0 # Equal weighting
180
+
181
+ # Store theta bins if not already stored (for display purposes)
182
+ if "mueller_theta_bins" not in batch_info and results.bins_1d is not None:
183
+ batch_info["mueller_theta_bins"] = np.array(results.bins_1d)
134
184
 
135
185
  self.batch_data.append(batch_info)
136
186
 
@@ -152,6 +202,64 @@ class Convergence:
152
202
  # Update total orientation count
153
203
  self.n_orientations += batch_size
154
204
 
205
+ def _calculate_mean_and_sem_array(
206
+ self, variable: str
207
+ ) -> Tuple[np.ndarray, np.ndarray]:
208
+ """Calculate mean and SEM arrays for Mueller matrix elements across theta bins.
209
+
210
+ Args:
211
+ variable: Mueller element name (e.g., 'S11')
212
+
213
+ Returns:
214
+ Tuple of (mean_array, sem_array) where each is shape (n_theta,)
215
+ """
216
+ if not self.batch_data:
217
+ return np.array([]), np.array([])
218
+
219
+ # Extract batch values (each is an array of theta values)
220
+ batch_arrays = []
221
+ batch_sizes = []
222
+
223
+ for batch in self.batch_data:
224
+ if variable in batch["values"]:
225
+ batch_arrays.append(batch["values"][variable]) # Shape: (n_theta,)
226
+ batch_sizes.append(batch["batch_size"])
227
+
228
+ if not batch_arrays:
229
+ return np.array([]), np.array([])
230
+
231
+ # Stack batches: shape (n_batches, n_theta)
232
+ batch_arrays = np.array(batch_arrays)
233
+ batch_sizes = np.array(batch_sizes)
234
+ n_theta = batch_arrays.shape[1]
235
+
236
+ if len(batch_arrays) < 2:
237
+ # Can't estimate variance with < 2 batches
238
+ mean_array = batch_arrays[0]
239
+ sem_array = np.full(n_theta, float("inf"))
240
+ return mean_array, sem_array
241
+
242
+ # Calculate mean and SEM independently for each theta bin
243
+ # Mean: weighted by batch size
244
+ mean_array = np.average(
245
+ batch_arrays, axis=0, weights=batch_sizes
246
+ ) # Shape: (n_theta,)
247
+
248
+ # Variance between batches at each theta
249
+ batch_means_variance = np.var(batch_arrays, axis=0, ddof=1) # Shape: (n_theta,)
250
+
251
+ # Scale up to estimate population variance
252
+ avg_batch_size = np.mean(batch_sizes)
253
+ estimated_population_variance = batch_means_variance * avg_batch_size
254
+
255
+ # Calculate SEM for total sample
256
+ total_n = np.sum(batch_sizes)
257
+ sem_array = np.sqrt(
258
+ estimated_population_variance / (total_n - 1)
259
+ ) # Shape: (n_theta,)
260
+
261
+ return mean_array, sem_array
262
+
155
263
  def _calculate_mean_and_sem(self, variable: str) -> Tuple[float, float]:
156
264
  """Calculate mean and standard error of the mean for a variable using batch data.
157
265
 
@@ -162,7 +270,7 @@ class Convergence:
162
270
  Tuple of (mean, sem)
163
271
  """
164
272
  if not self.batch_data:
165
- return 0.0, float('inf')
273
+ return 0.0, float("inf")
166
274
 
167
275
  # Extract batch values and weights
168
276
  batch_values = []
@@ -170,20 +278,20 @@ class Convergence:
170
278
  batch_sizes = []
171
279
 
172
280
  for batch in self.batch_data:
173
- if variable in batch['values']:
174
- batch_values.append(batch['values'][variable])
175
- batch_weights.append(batch['weights'][variable])
176
- batch_sizes.append(batch['batch_size'])
281
+ if variable in batch["values"]:
282
+ batch_values.append(batch["values"][variable])
283
+ batch_weights.append(batch["weights"][variable])
284
+ batch_sizes.append(batch["batch_size"])
177
285
 
178
286
  if not batch_values:
179
- return 0.0, float('inf')
287
+ return 0.0, float("inf")
180
288
 
181
289
  batch_values = np.array(batch_values)
182
290
  batch_weights = np.array(batch_weights)
183
291
  batch_sizes = np.array(batch_sizes)
184
292
 
185
293
  # For weighted variables (asymmetry, albedo), use weighted statistics
186
- if variable in ['asymmetry', 'albedo']:
294
+ if variable in ["asymmetry", "albedo"]:
187
295
  # Calculate weighted mean across batches
188
296
  # Each batch contributes: weight * batch_size * value
189
297
  total_weighted_sum = np.sum(batch_weights * batch_sizes * batch_values)
@@ -192,17 +300,23 @@ class Convergence:
192
300
 
193
301
  # Calculate weighted variance between batches
194
302
  if len(batch_values) < 2:
195
- return weighted_mean, float('inf') # Cannot estimate variance with < 2 batches
303
+ return weighted_mean, float(
304
+ "inf"
305
+ ) # Cannot estimate variance with < 2 batches
196
306
 
197
307
  # For batch means, we need to account for the effective weight of each batch
198
308
  effective_weights = batch_weights * batch_sizes
199
- weighted_variance_batch_means = np.sum(effective_weights * (batch_values - weighted_mean)**2) / np.sum(effective_weights)
309
+ weighted_variance_batch_means = np.sum(
310
+ effective_weights * (batch_values - weighted_mean) ** 2
311
+ ) / np.sum(effective_weights)
200
312
 
201
313
  # Scale up to estimate population variance
202
314
  # Batch means have variance = population_variance / average_batch_size
203
315
  # So population_variance ≈ batch_means_variance * average_batch_size
204
316
  avg_batch_size = np.average(batch_sizes, weights=effective_weights)
205
- estimated_population_variance = weighted_variance_batch_means * avg_batch_size
317
+ estimated_population_variance = (
318
+ weighted_variance_batch_means * avg_batch_size
319
+ )
206
320
 
207
321
  # Calculate SEM for the total sample (using n-1 for sample standard error)
208
322
  total_n = np.sum(batch_sizes)
@@ -219,7 +333,7 @@ class Convergence:
219
333
 
220
334
  # Calculate variance between batch means
221
335
  if len(batch_values) < 2:
222
- return mean, float('inf')
336
+ return mean, float("inf")
223
337
 
224
338
  batch_means_variance = np.var(batch_values, ddof=1)
225
339
 
@@ -243,20 +357,58 @@ class Convergence:
243
357
  converged = {}
244
358
 
245
359
  for conv in self.convergables:
246
- mean, sem = self._calculate_mean_and_sem(conv.variable)
247
-
248
- # Calculate tolerance based on type
249
- if conv.tolerance_type == 'relative':
250
- # Relative tolerance: SEM / |mean| < tolerance
251
- if mean != 0:
252
- relative_sem = sem / abs(mean)
253
- converged[conv.variable] = relative_sem < conv.tolerance
360
+ if conv.is_mueller():
361
+ # Mueller element - check theta bins (all or specific indices)
362
+ mean_array, sem_array = self._calculate_mean_and_sem_array(
363
+ conv.variable
364
+ )
365
+
366
+ if len(mean_array) == 0:
367
+ converged[conv.variable] = False
368
+ continue
369
+
370
+ # Select theta bins to check
371
+ if conv.theta_indices is not None:
372
+ # Check only specified indices
373
+ indices = [i for i in conv.theta_indices if i < len(mean_array)]
374
+ if not indices:
375
+ converged[conv.variable] = False
376
+ continue
377
+ mean_subset = mean_array[indices]
378
+ sem_subset = sem_array[indices]
254
379
  else:
255
- # If mean is zero, use absolute comparison
256
- converged[conv.variable] = sem < conv.tolerance
380
+ # Check all bins
381
+ mean_subset = mean_array
382
+ sem_subset = sem_array
383
+
384
+ # Check convergence at selected theta bins
385
+ if conv.tolerance_type == "relative":
386
+ # Relative tolerance: SEM / |mean| < tolerance
387
+ relative_sem = np.where(
388
+ mean_subset != 0,
389
+ sem_subset / np.abs(mean_subset),
390
+ sem_subset / conv.tolerance,
391
+ )
392
+ converged[conv.variable] = np.all(relative_sem < conv.tolerance)
393
+ else:
394
+ # Absolute tolerance: SEM < tolerance
395
+ converged[conv.variable] = np.all(sem_subset < conv.tolerance)
257
396
  else:
258
- # Absolute tolerance: SEM < tolerance
259
- converged[conv.variable] = sem < conv.tolerance
397
+ # Scalar variable
398
+ mean, sem = self._calculate_mean_and_sem(conv.variable)
399
+
400
+ # Calculate tolerance based on type
401
+ if conv.tolerance_type == "relative":
402
+ # Relative tolerance: SEM / |mean| < tolerance
403
+ if mean != 0:
404
+ relative_sem = sem / abs(mean)
405
+ converged[conv.variable] = relative_sem < conv.tolerance
406
+ else:
407
+ # If mean is zero, use absolute comparison
408
+ converged[conv.variable] = sem < conv.tolerance
409
+ else:
410
+ # Absolute tolerance: SEM < tolerance
411
+ converged[conv.variable] = sem < conv.tolerance
260
412
 
261
413
  return converged
262
414
 
@@ -269,7 +421,7 @@ class Convergence:
269
421
  # Check minimum batches requirement first
270
422
  if len(self.batch_data) < self.min_batches:
271
423
  return False
272
-
424
+
273
425
  converged_status = self._check_convergence()
274
426
  return all(converged_status.values())
275
427
 
@@ -279,38 +431,141 @@ class Convergence:
279
431
  Args:
280
432
  iteration: Current iteration number
281
433
  """
282
- print(f"\nIteration {iteration} ({self.n_orientations} orientations):")
434
+ # Calculate minimum required orientations
435
+ min_required = self.min_batches * self.batch_size
436
+
437
+ # Show progress with min orientations requirement
438
+ if self.n_orientations < min_required:
439
+ print(
440
+ f"\nIteration {iteration} ({self.n_orientations}/{min_required} orientations, min not reached):"
441
+ )
442
+ else:
443
+ print(
444
+ f"\nIteration {iteration} ({self.n_orientations} orientations, min {min_required} reached):"
445
+ )
283
446
 
284
447
  converged_status = self._check_convergence()
285
448
 
286
449
  for conv in self.convergables:
287
- mean, sem = self._calculate_mean_and_sem(conv.variable)
288
-
289
- # Calculate 95% CI
290
- ci_lower = mean - 1.96 * sem
291
- ci_upper = mean + 1.96 * sem
292
-
293
- # Format based on tolerance type
294
- if conv.tolerance_type == 'relative':
295
- if mean != 0:
296
- relative_sem = sem / abs(mean)
297
- target_str = f"{conv.tolerance*100:.1f}%"
298
- current_str = f"{relative_sem*100:.2f}%"
450
+ if conv.is_mueller():
451
+ # Mueller element - show worst-case theta bin
452
+ mean_array, sem_array = self._calculate_mean_and_sem_array(
453
+ conv.variable
454
+ )
455
+
456
+ if len(mean_array) == 0:
457
+ print(f" {conv.variable:<10}: No data yet")
458
+ continue
459
+
460
+ # Get theta bins from results (assuming we have access to bins_1d)
461
+ if hasattr(self, "settings") and hasattr(self.settings, "binning"):
462
+ # We'll get theta values from the first batch's mueller_1d if available
463
+ theta_bins = None
464
+ for batch in self.batch_data:
465
+ if "mueller_theta_bins" in batch:
466
+ theta_bins = batch["mueller_theta_bins"]
467
+ break
468
+ if theta_bins is None:
469
+ theta_bins = np.arange(len(mean_array))
299
470
  else:
300
- target_str = f"{conv.tolerance} (abs, mean=0)"
301
- current_str = f"{sem:.4g}"
471
+ theta_bins = np.arange(len(mean_array))
472
+
473
+ # Calculate relative SEM for each theta
474
+ if conv.tolerance_type == "relative":
475
+ relative_sem_array = np.where(
476
+ mean_array != 0, sem_array / np.abs(mean_array), float("inf")
477
+ )
478
+ worst_idx = np.argmax(relative_sem_array)
479
+ worst_sem = relative_sem_array[worst_idx]
480
+ target_str = f"{conv.tolerance * 100:.1f}%"
481
+ current_str = f"{worst_sem * 100:.2f}%"
482
+ else:
483
+ worst_idx = np.argmax(sem_array)
484
+ worst_sem = sem_array[worst_idx]
485
+ target_str = f"{conv.tolerance}"
486
+ current_str = f"{worst_sem:.4g}"
487
+
488
+ worst_theta = theta_bins[worst_idx]
489
+ worst_mean = mean_array[worst_idx]
490
+
491
+ # Count converged bins (either all or specified indices)
492
+ if conv.theta_indices is not None:
493
+ # Only checking specific bins
494
+ indices = [i for i in conv.theta_indices if i < len(mean_array)]
495
+ if conv.tolerance_type == "relative":
496
+ converged_bins = np.sum(
497
+ relative_sem_array[indices] < conv.tolerance
498
+ )
499
+ else:
500
+ converged_bins = np.sum(sem_array[indices] < conv.tolerance)
501
+ total_bins = len(indices)
502
+ bin_desc = (
503
+ f"θ={[theta_bins[i] for i in indices]}"
504
+ if len(indices) <= 3
505
+ else f"{len(indices)} bins"
506
+ )
507
+ else:
508
+ # Checking all bins
509
+ if conv.tolerance_type == "relative":
510
+ converged_bins = np.sum(relative_sem_array < conv.tolerance)
511
+ else:
512
+ converged_bins = np.sum(sem_array < conv.tolerance)
513
+ total_bins = len(mean_array)
514
+ bin_desc = f"{total_bins} bins"
515
+
516
+ status = "✓" if converged_status[conv.variable] else "❌"
517
+
518
+ # Print Mueller convergence info
519
+ if conv.theta_indices is not None and len(conv.theta_indices) <= 3:
520
+ # For small number of specific bins, show them explicitly
521
+ print(
522
+ f" {conv.variable:<10}: {converged_bins}/{total_bins} {bin_desc} | "
523
+ f"Worst θ={worst_theta:.1f}°: {worst_mean:.4g} | SEM: {current_str} (target: {target_str}) {status}"
524
+ )
525
+ else:
526
+ # For many bins, use standard format
527
+ print(
528
+ f" {conv.variable:<10}: {converged_bins}/{total_bins} bins converged | "
529
+ f"Worst θ={worst_theta:.1f}°: {worst_mean:.4g} | SEM: {current_str} (target: {target_str}) {status}"
530
+ )
531
+
532
+ # Add worst SEM to convergence history
533
+ self.convergence_history.append(
534
+ (self.n_orientations, conv.variable, worst_sem)
535
+ )
302
536
  else:
303
- target_str = f"{conv.tolerance}"
304
- current_str = f"{sem:.4g}"
537
+ # Scalar variable
538
+ mean, sem = self._calculate_mean_and_sem(conv.variable)
539
+
540
+ # Calculate 95% CI
541
+ ci_lower = mean - 1.96 * sem
542
+ ci_upper = mean + 1.96 * sem
543
+
544
+ # Format based on tolerance type
545
+ if conv.tolerance_type == "relative":
546
+ if mean != 0:
547
+ relative_sem = sem / abs(mean)
548
+ target_str = f"{conv.tolerance * 100:.1f}%"
549
+ current_str = f"{relative_sem * 100:.2f}%"
550
+ else:
551
+ target_str = f"{conv.tolerance} (abs, mean=0)"
552
+ current_str = f"{sem:.4g}"
553
+ else:
554
+ target_str = f"{conv.tolerance}"
555
+ current_str = f"{sem:.4g}"
305
556
 
306
- # Status indicator
307
- status = "✓" if converged_status[conv.variable] else "❌"
557
+ # Status indicator
558
+ status = "✓" if converged_status[conv.variable] else "❌"
308
559
 
309
- # Print line with mean, SEM, CI, and convergence status
310
- print(f" {conv.variable:<10}: {mean:.6f} ± {sem:.6f} [{ci_lower:.6f}, {ci_upper:.6f}] | SEM: {current_str} (target: {target_str}) {status}")
560
+ # Print line with mean, SEM, CI, and convergence status
561
+ print(
562
+ f" {conv.variable:<10}: {mean:.6f} ± {sem:.6f} [{ci_lower:.6f}, {ci_upper:.6f}] | SEM: {current_str} (target: {target_str}) {status}"
563
+ )
311
564
 
312
- # Add to convergence history
313
- self.convergence_history.append((self.n_orientations, conv.variable, sem))
565
+ # Add to convergence history
566
+ self.convergence_history.append(
567
+ (self.n_orientations, conv.variable, sem)
568
+ )
314
569
 
315
570
  def run(self) -> ConvergenceResults:
316
571
  """Run the convergence study.
@@ -358,15 +613,46 @@ class Convergence:
358
613
  final_values = {}
359
614
  final_sems = {}
360
615
  for conv in self.convergables:
361
- mean, sem = self._calculate_mean_and_sem(conv.variable)
362
- final_values[conv.variable] = mean
363
- final_sems[conv.variable] = sem
616
+ if conv.is_mueller():
617
+ mean_array, sem_array = self._calculate_mean_and_sem_array(
618
+ conv.variable
619
+ )
620
+ final_values[conv.variable] = mean_array
621
+ final_sems[conv.variable] = sem_array
622
+ else:
623
+ mean, sem = self._calculate_mean_and_sem(conv.variable)
624
+ final_values[conv.variable] = mean
625
+ final_sems[conv.variable] = sem
364
626
 
365
- # Prepare Mueller matrices
627
+ # Prepare Mueller matrices with SEM
366
628
  mueller_1d = None
629
+ mueller_1d_sem = None
367
630
  mueller_2d = None
631
+
368
632
  if self.mueller_1d and self.mueller_1d_sum is not None:
369
633
  mueller_1d = self.mueller_1d_sum / self.n_orientations
634
+
635
+ # Compute SEM for all 16 Mueller elements
636
+ # mueller_1d shape: (n_theta, 16)
637
+ n_theta = mueller_1d.shape[0]
638
+ mueller_1d_sem = np.zeros_like(mueller_1d)
639
+
640
+ for row in range(1, 5):
641
+ for col in range(1, 5):
642
+ element_name = f"S{row}{col}"
643
+ mueller_idx = (row - 1) * 4 + (col - 1)
644
+
645
+ # Calculate mean and SEM for this element across all theta bins
646
+ mean_array, sem_array = self._calculate_mean_and_sem_array(
647
+ element_name
648
+ )
649
+
650
+ if len(sem_array) > 0:
651
+ mueller_1d_sem[:, mueller_idx] = sem_array
652
+
653
+ # Store mueller_1d_sem in final_values for unified API access
654
+ final_values["mueller_1d_sem"] = mueller_1d_sem
655
+
370
656
  if self.mueller_2d and self.mueller_2d_sum is not None:
371
657
  mueller_2d = self.mueller_2d_sum / self.n_orientations
372
658
 
@@ -378,5 +664,219 @@ class Convergence:
378
664
  mueller_1d=mueller_1d,
379
665
  mueller_2d=mueller_2d,
380
666
  convergence_history=self.convergence_history,
381
- warning=warning
667
+ warning=warning,
382
668
  )
669
+
670
+
671
+ class EnsembleConvergence(Convergence):
672
+ """Runs convergence study over an ensemble of particle geometries.
673
+
674
+ Each batch randomly samples from a directory of geometry files,
675
+ allowing convergence analysis of orientation-averaged and
676
+ geometry-averaged scattering properties.
677
+ """
678
+
679
+ def __init__(
680
+ self,
681
+ settings: goad.Settings,
682
+ convergables: List[Convergable],
683
+ geom_dir: str,
684
+ batch_size: int = 24,
685
+ max_orientations: int = 100_000,
686
+ min_batches: int = 10,
687
+ mueller_1d: bool = True,
688
+ mueller_2d: bool = False,
689
+ ):
690
+ """
691
+ Initialize an ensemble convergence study.
692
+
693
+ Args:
694
+ settings: GOAD settings for the simulation (geom_path will be overridden)
695
+ convergables: List of variables to monitor for convergence
696
+ geom_dir: Directory containing .obj geometry files
697
+ batch_size: Number of orientations per iteration
698
+ max_orientations: Maximum total orientations before stopping
699
+ min_batches: Minimum number of batches before allowing convergence
700
+ mueller_1d: Whether to collect 1D Mueller matrices
701
+ mueller_2d: Whether to collect 2D Mueller matrices
702
+ """
703
+ # Discover all .obj files in directory
704
+ geom_path = Path(geom_dir)
705
+ if not geom_path.exists():
706
+ raise ValueError(f"Geometry directory does not exist: {geom_dir}")
707
+
708
+ if not geom_path.is_dir():
709
+ raise ValueError(f"Path is not a directory: {geom_dir}")
710
+
711
+ self.geom_files = sorted([f.name for f in geom_path.glob("*.obj")])
712
+
713
+ if not self.geom_files:
714
+ raise ValueError(f"No .obj files found in directory: {geom_dir}")
715
+
716
+ self.geom_dir = str(geom_path.resolve())
717
+
718
+ print(f"Found {len(self.geom_files)} geometry files in {self.geom_dir}")
719
+
720
+ # Call parent constructor
721
+ super().__init__(
722
+ settings=settings,
723
+ convergables=convergables,
724
+ batch_size=batch_size,
725
+ max_orientations=max_orientations,
726
+ min_batches=min_batches,
727
+ mueller_1d=mueller_1d,
728
+ mueller_2d=mueller_2d,
729
+ )
730
+
731
+ def run(self) -> ConvergenceResults:
732
+ """Run the ensemble convergence study.
733
+
734
+ Each batch iteration randomly selects a geometry file from the
735
+ ensemble directory before running the orientation averaging.
736
+
737
+ Returns:
738
+ ConvergenceResults containing final ensemble-averaged values
739
+ """
740
+ iteration = 0
741
+ converged = False
742
+ warning = None
743
+
744
+ while not converged and self.n_orientations < self.max_orientations:
745
+ iteration += 1
746
+
747
+ # Randomly select a geometry file for this batch
748
+ geom_file = random.choice(self.geom_files)
749
+ geom_path = os.path.join(self.geom_dir, geom_file)
750
+
751
+ # Determine batch size for this iteration
752
+ remaining = self.max_orientations - self.n_orientations
753
+ batch_size = min(self.batch_size, remaining)
754
+
755
+ # Create orientations for this batch
756
+ orientations = goad.create_uniform_orientation(batch_size)
757
+
758
+ # Update settings with selected geometry and orientations
759
+ self.settings.geom_path = geom_path
760
+ self.settings.orientation = orientations
761
+
762
+ # Run MultiProblem with selected geometry
763
+ mp = goad.MultiProblem(self.settings)
764
+ mp.py_solve()
765
+
766
+ # Update statistics
767
+ self._update_statistics(mp.results, batch_size)
768
+
769
+ # Print progress (with geometry info)
770
+ min_required = self.min_batches * self.batch_size
771
+ if self.n_orientations < min_required:
772
+ print(
773
+ f"\nIteration {iteration} ({self.n_orientations}/{min_required} orientations, min not reached) - Geometry: {geom_file}"
774
+ )
775
+ else:
776
+ print(
777
+ f"\nIteration {iteration} ({self.n_orientations} orientations, min {min_required} reached) - Geometry: {geom_file}"
778
+ )
779
+ self._print_progress_without_header(iteration)
780
+
781
+ # Check convergence
782
+ converged = self._all_converged()
783
+
784
+ # Prepare final results
785
+ if converged:
786
+ print(f"\nConverged after {self.n_orientations} orientations.")
787
+ else:
788
+ warning = f"Maximum orientations ({self.max_orientations}) reached without convergence"
789
+ print(f"\nWarning: {warning}")
790
+
791
+ # Calculate final values and SEMs
792
+ final_values = {}
793
+ final_sems = {}
794
+ for conv in self.convergables:
795
+ if conv.is_mueller():
796
+ mean_array, sem_array = self._calculate_mean_and_sem_array(
797
+ conv.variable
798
+ )
799
+ final_values[conv.variable] = mean_array
800
+ final_sems[conv.variable] = sem_array
801
+ else:
802
+ mean, sem = self._calculate_mean_and_sem(conv.variable)
803
+ final_values[conv.variable] = mean
804
+ final_sems[conv.variable] = sem
805
+
806
+ # Prepare Mueller matrices with SEM
807
+ mueller_1d = None
808
+ mueller_1d_sem = None
809
+ mueller_2d = None
810
+
811
+ if self.mueller_1d and self.mueller_1d_sum is not None:
812
+ mueller_1d = self.mueller_1d_sum / self.n_orientations
813
+
814
+ # Compute SEM for all 16 Mueller elements
815
+ # mueller_1d shape: (n_theta, 16)
816
+ n_theta = mueller_1d.shape[0]
817
+ mueller_1d_sem = np.zeros_like(mueller_1d)
818
+
819
+ for row in range(1, 5):
820
+ for col in range(1, 5):
821
+ element_name = f"S{row}{col}"
822
+ mueller_idx = (row - 1) * 4 + (col - 1)
823
+
824
+ # Calculate mean and SEM for this element across all theta bins
825
+ mean_array, sem_array = self._calculate_mean_and_sem_array(
826
+ element_name
827
+ )
828
+
829
+ if len(sem_array) > 0:
830
+ mueller_1d_sem[:, mueller_idx] = sem_array
831
+
832
+ # Store mueller_1d_sem in final_values for unified API access
833
+ final_values["mueller_1d_sem"] = mueller_1d_sem
834
+
835
+ if self.mueller_2d and self.mueller_2d_sum is not None:
836
+ mueller_2d = self.mueller_2d_sum / self.n_orientations
837
+
838
+ return ConvergenceResults(
839
+ converged=converged,
840
+ n_orientations=self.n_orientations,
841
+ values=final_values,
842
+ sem_values=final_sems,
843
+ mueller_1d=mueller_1d,
844
+ mueller_2d=mueller_2d,
845
+ convergence_history=self.convergence_history,
846
+ warning=warning,
847
+ )
848
+
849
+ def _print_progress_without_header(self, iteration: int):
850
+ """Print convergence progress without iteration header (already printed with geometry)."""
851
+ converged_status = self._check_convergence()
852
+
853
+ for conv in self.convergables:
854
+ mean, sem = self._calculate_mean_and_sem(conv.variable)
855
+
856
+ # Calculate 95% CI
857
+ ci_lower = mean - 1.96 * sem
858
+ ci_upper = mean + 1.96 * sem
859
+
860
+ # Format based on tolerance type
861
+ if conv.tolerance_type == "relative":
862
+ if mean != 0:
863
+ relative_sem = sem / abs(mean)
864
+ target_str = f"{conv.tolerance * 100:.1f}%"
865
+ current_str = f"{relative_sem * 100:.2f}%"
866
+ else:
867
+ target_str = f"{conv.tolerance} (abs, mean=0)"
868
+ current_str = f"{sem:.4g}"
869
+ else:
870
+ target_str = f"{conv.tolerance}"
871
+ current_str = f"{sem:.4g}"
872
+
873
+ # Status indicator
874
+ status = "✓" if converged_status[conv.variable] else "❌"
875
+
876
+ # Print line with mean, SEM, CI, and convergence status
877
+ print(
878
+ f" {conv.variable:<10}: {mean:.6f} ± {sem:.6f} [{ci_lower:.6f}, {ci_upper:.6f}] | SEM: {current_str} (target: {target_str}) {status}"
879
+ )
880
+
881
+ # Add to convergence history
882
+ self.convergence_history.append((self.n_orientations, conv.variable, sem))