integrate-module 0.97.0__tar.gz → 0.98.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {integrate_module-0.97.0/integrate_module.egg-info → integrate_module-0.98.0}/PKG-INFO +2 -1
  2. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/__init__.py +2 -0
  3. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate.py +43 -15
  4. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_borehole.py +2 -2
  5. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_io.py +97 -44
  6. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_plot.py +48 -7
  7. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_query.py +204 -31
  8. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_rejection.py +40 -2
  9. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_rejection_cli.py +11 -4
  10. integrate_module-0.98.0/integrate/integrate_rejection_jax.py +494 -0
  11. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_timing_cli.py +22 -2
  12. {integrate_module-0.97.0 → integrate_module-0.98.0/integrate_module.egg-info}/PKG-INFO +2 -1
  13. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/SOURCES.txt +1 -0
  14. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/requires.txt +1 -0
  15. {integrate_module-0.97.0 → integrate_module-0.98.0}/pyproject.toml +2 -1
  16. {integrate_module-0.97.0 → integrate_module-0.98.0}/LICENSE +0 -0
  17. {integrate_module-0.97.0 → integrate_module-0.98.0}/README.md +0 -0
  18. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/gex.py +0 -0
  19. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_hdf5_info_cli.py +0 -0
  20. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/dependency_links.txt +0 -0
  21. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/entry_points.txt +0 -0
  22. {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/top_level.txt +0 -0
  23. {integrate_module-0.97.0 → integrate_module-0.98.0}/setup.cfg +0 -0
  24. {integrate_module-0.97.0 → integrate_module-0.98.0}/tests/test_likelihood_multinomial.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: integrate_module
3
- Version: 0.97.0
3
+ Version: 0.98.0
4
4
  Summary: Localized probabilistic data integration
5
5
  Author-email: Thomas Mejer Hansen <tmeha@geo.au.dk>
6
6
  License: MIT
@@ -26,6 +26,7 @@ Requires-Dist: jupyter>=1.0.0
26
26
  Requires-Dist: jupytext
27
27
  Requires-Dist: pandas
28
28
  Requires-Dist: pyvista
29
+ Requires-Dist: litellm
29
30
  Provides-Extra: dev
30
31
  Requires-Dist: pytest; extra == "dev"
31
32
  Requires-Dist: black; extra == "dev"
@@ -78,6 +78,7 @@ from integrate.integrate_io import check_data
78
78
  from integrate.integrate_io import merge_prior
79
79
  from integrate.integrate_io import merge_data
80
80
  from integrate.integrate_io import merge_posterior
81
+ from integrate.integrate_io import filter_prior
81
82
  from integrate.integrate_io import read_usf
82
83
  from integrate.integrate_io import read_usf_mul
83
84
  from integrate.integrate_io import test_read_usf
@@ -124,6 +125,7 @@ from integrate.integrate_query import save_query
124
125
  from integrate.integrate_query import load_query
125
126
  from integrate.integrate_query import get_prior_model_info
126
127
  from integrate.integrate_query import query_from_text
128
+ from integrate.integrate_query import query_test_llm
127
129
 
128
130
  # Import gex module functions
129
131
  from integrate.gex import read_gex as read_gex2
@@ -337,7 +337,8 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
337
337
  - ``/Mx/Mean`` [Np, Nm] Arithmetic mean of posterior realizations.
338
338
  - ``/Mx/LogMean`` [Np, Nm] Geometric mean (exp of mean of log values).
339
339
  - ``/Mx/Median`` [Np, Nm] Median of posterior realizations.
340
- - ``/Mx/Std`` [Np, Nm] Standard deviation of log10(posterior).
340
+ - ``/Mx/Std`` [Np, Nm] Standard deviation of posterior realizations.
341
+ - ``/Mx/LogStd`` [Np, Nm] Standard deviation of log10(posterior realizations).
341
342
  - ``/Mx/HarmonicMean`` [Np, Nm] Trimmed harmonic mean: conductivity samples
342
343
  are trimmed 10% each tail, averaged, then inverted back to resistivity.
343
344
  - ``/Mx/KL`` [Np, Nm] KL divergence in bits. Only written when
@@ -471,6 +472,7 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
471
472
  M_logmean = np.full((nsounding, nm), np.nan)
472
473
  M_mean = np.full((nsounding, nm), np.nan)
473
474
  M_std = np.full((nsounding, nm), np.nan)
475
+ M_logstd = np.full((nsounding, nm), np.nan)
474
476
  M_median = np.full((nsounding, nm), np.nan)
475
477
  M_harmonicmean = np.full((nsounding, nm), np.nan)
476
478
  if computeKL_continuous:
@@ -508,7 +510,8 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
508
510
  M_mean[iid,:] = np.mean(m_post, axis=0)
509
511
  M_median[iid,:] = np.median(m_post, axis=0)
510
512
  with np.errstate(invalid='ignore', divide='ignore'):
511
- M_std[iid,:] = np.std(np.log10(np.maximum(m_post, 1e-10)), axis=0)
513
+ M_logstd[iid,:] = np.std(np.log10(np.maximum(m_post, 1e-10)), axis=0)
514
+ M_std[iid,:] = np.std(m_post, axis=0)
512
515
  _c = 1.0 / np.maximum(m_post, 1e-10)
513
516
  _k = int(np.floor(0.10 * _c.shape[0]))
514
517
  _cs = np.sort(_c, axis=0)
@@ -563,10 +566,9 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
563
566
  # Geometric Mean: exp(mean(log(x)))
564
567
  M_logmean[current_iids, :] = np.exp(np.mean(log_cube, axis=1))
565
568
 
566
- # Std of Log10:
567
- # Math identity: std(log10(x)) = std(ln(x) / ln(10)) = std(ln(x)) * (1/ln(10))
568
- # We reuse 'log_cube' and multiply by constant (faster than re-calculating log10)
569
- M_std[current_iids, :] = np.std(log_cube, axis=1) * INV_LOG_10
569
+ # LogStd: std(log10(x)) = std(ln(x)) * (1/ln(10)); reuse log_cube for speed
570
+ M_logstd[current_iids, :] = np.std(log_cube, axis=1) * INV_LOG_10
571
+ M_std[current_iids, :] = np.std(m_cube, axis=1)
570
572
 
571
573
  # Harmonic mean (trimmed 10% each tail in conductivity space)
572
574
  _c = 1.0 / np.maximum(m_cube, 1e-10)
@@ -579,7 +581,7 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
579
581
 
580
582
 
581
583
  # Create datasets
582
- for stat in ['Mean', 'Median', 'Std', 'LogMean', 'HarmonicMean']:
584
+ for stat in ['Mean', 'Median', 'Std', 'LogStd', 'LogMean', 'HarmonicMean']:
583
585
  if stat not in f_post:
584
586
  dset = '/%s/%s' % (name,stat)
585
587
  if dset not in f_post:
@@ -591,6 +593,7 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
591
593
  f_post['/%s/%s' % (name,'Mean')][:] = M_mean
592
594
  f_post['/%s/%s' % (name,'Median')][:] = M_median
593
595
  f_post['/%s/%s' % (name,'Std')][:] = M_std
596
+ f_post['/%s/%s' % (name,'LogStd')][:] = M_logstd
594
597
  f_post['/%s/%s' % (name,'HarmonicMean')][:] = M_harmonicmean
595
598
  if computeKL_continuous:
596
599
  dset = '/%s/KL' % name
@@ -1721,14 +1724,15 @@ def prior_model_layered(lay_dist='uniform', dz = 1, z_max = 90,
1721
1724
 
1722
1725
  # Apply boundaries if any exist
1723
1726
  if n_boundaries > 0:
1724
- boundaries = i_boundaries_all[i, :n_boundaries]
1727
+ boundaries = np.sort(i_boundaries_all[i, :n_boundaries])
1725
1728
  for j in range(n_boundaries):
1726
1729
  M_rho[i, boundaries[j]:] = rho_all[i, j + 1]
1727
1730
 
1728
1731
  # Save sparse representation if requested
1729
1732
  if save_sparse:
1730
1733
  if n_boundaries > 0:
1731
- m_current = np.concatenate((z_boundaries_all[i, :n_boundaries], rho_all[i, :n_lay]))
1734
+ z_sorted = np.sort(z_boundaries_all[i, :n_boundaries])
1735
+ m_current = np.concatenate((z_sorted, rho_all[i, :n_lay]))
1732
1736
  else:
1733
1737
  m_current = rho_all[i, :n_lay]
1734
1738
  M_rho_sparse[i, 0:len(m_current)] = m_current
@@ -3031,7 +3035,7 @@ def allocate_large_page():
3031
3035
  return None
3032
3036
 
3033
3037
 
3034
- def timing_compute(N_arr=[], Nproc_arr=[]):
3038
+ def timing_compute(N_arr=[], Nproc_arr=[], backend='numpy', NcpuForward=0):
3035
3039
  """
3036
3040
  Execute timing benchmark for INTEGRATE workflow components.
3037
3041
 
@@ -3046,7 +3050,11 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
3046
3050
  Default is [100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000, 5000000].
3047
3051
  Nproc_arr : array_like, optional
3048
3052
  Array of processor counts to test. Default is powers of 2 up to available CPUs.
3049
-
3053
+ NcpuForward : int, optional
3054
+ Fixed number of CPUs to use for forward modeling. When > 0, forward modeling always
3055
+ uses this many CPUs regardless of the current Nproc_arr entry. The inversion
3056
+ (rejection sampling) still varies over Nproc_arr. Default is 0 (use Nproc_arr value).
3057
+
3050
3058
  Returns
3051
3059
  -------
3052
3060
  str
@@ -3119,9 +3127,28 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
3119
3127
 
3120
3128
  print("Testing on %d data sets of size(s):" % len(N_arr), N_arr)
3121
3129
  print("Testing on %d sets of core(s):" % len(Nproc_arr), Nproc_arr)
3130
+ if NcpuForward > 0:
3131
+ print("Forward modeling fixed to %d CPUs (inversion varies over Nproc_arr)" % NcpuForward)
3122
3132
 
3123
3133
 
3124
- file_out = 'timing_%s-%s-%dcore_Nproc%d_N%d.npz' % (hostname,system,Ncpu,len(Nproc_arr), len(N_arr))
3134
+ print("Rejection sampling backend: %s" % backend)
3135
+ backend_label = backend
3136
+ if backend == 'jax':
3137
+ import os
3138
+ jax_platform_env = os.environ.get('JAX_PLATFORMS', '').strip().lower()
3139
+ if jax_platform_env in ('gpu', 'cuda', 'rocm'):
3140
+ backend_label = 'jax_gpu'
3141
+ elif jax_platform_env == 'cpu':
3142
+ backend_label = 'jax_cpu'
3143
+ else:
3144
+ try:
3145
+ import jax
3146
+ jax_device = jax.default_backend()
3147
+ backend_label = 'jax_gpu' if jax_device == 'gpu' else 'jax_cpu'
3148
+ except Exception:
3149
+ backend_label = 'jax_cpu'
3150
+ print("JAX backend label: %s (JAX_PLATFORMS='%s')" % (backend_label, jax_platform_env))
3151
+ file_out = 'timing_%s-%s-%dcore_Nproc%d_N%d_%s.npz' % (hostname,system,Ncpu,len(Nproc_arr), len(N_arr), backend_label)
3125
3152
  print("Writing results to %s " % file_out)
3126
3153
 
3127
3154
  ## TIMING
@@ -3170,14 +3197,15 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
3170
3197
  #ig.plot_prior_stats(f_prior_h5)
3171
3198
  #% A2. Compute prior DATA
3172
3199
  t0_forward = time.time()
3173
- f_prior_data_h5 = ig.prior_data_gaaem(f_prior_h5, file_gex, Ncpu=Ncpu, showInfo=showInfo)
3200
+ Ncpu_fwd = NcpuForward if NcpuForward > 0 else Ncpu
3201
+ f_prior_data_h5 = ig.prior_data_gaaem(f_prior_h5, file_gex, Ncpu=Ncpu_fwd, showInfo=showInfo)
3174
3202
  T_forward[i,j]=time.time()-t0_forward
3175
3203
 
3176
3204
  #% READY FOR INVERSION
3177
3205
  N_use = 1000000
3178
3206
  t0_rejection = time.time()
3179
3207
  if testRejection:
3180
- f_post_h5 = ig.integrate_rejection(f_prior_data_h5, f_data_h5, N_use = N_use, parallel=1, updatePostStat=False, Ncpu=Ncpu, showInfo=showInfo)
3208
+ f_post_h5 = ig.integrate_rejection(f_prior_data_h5, f_data_h5, N_use=N_use, parallel=1, updatePostStat=False, Ncpu=Ncpu, showInfo=showInfo, backend=backend)
3181
3209
  T_rejection[i,j]=time.time()-t0_rejection
3182
3210
 
3183
3211
  #% Compute some generic statistic of the posterior distribution (Mean, Median, Std)
@@ -3187,7 +3215,7 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
3187
3215
  T_poststat[i,j]=time.time()-t0_poststat
3188
3216
 
3189
3217
  T_total = T_prior + T_forward + T_rejection + T_poststat
3190
- np.savez(file_out, T_total=T_total, T_prior=T_prior, T_forward=T_forward, T_rejection=T_rejection, T_poststat=T_poststat, N_arr=N_arr, Nproc_arr=Nproc_arr, nobs=nobs)
3218
+ np.savez(file_out, T_total=T_total, T_prior=T_prior, T_forward=T_forward, T_rejection=T_rejection, T_poststat=T_poststat, N_arr=N_arr, Nproc_arr=Nproc_arr, nobs=nobs, backend=backend)
3191
3219
 
3192
3220
 
3193
3221
  return file_out
@@ -763,8 +763,8 @@ def get_weight_from_position(f_data_h5,x_well=0,y_well=0, i_ref=-1, r_dis = 400,
763
763
  w_data = np.exp(-1*sum_dd**2/r_data**2)
764
764
 
765
765
 
766
- # COmpute the distance from d_ref to all other points
767
- dis = np.sqrt((X-X[i_ref])**2 + (Y-Y[i_ref])**2)
766
+ # Compute the distance from each data point to the actual borehole location
767
+ dis = np.sqrt((X-x_well)**2 + (Y-y_well)**2)
768
768
  w_dis = np.exp(-1*dis**2/r_dis**2)
769
769
 
770
770
  w_combined = w_data * w_dis
@@ -2420,34 +2420,47 @@ def copy_prior(input_filename, output_filename, idx=None, N_use=None, loadtomem=
2420
2420
  return output_filename
2421
2421
 
2422
2422
 
2423
- def filter_prior(f_prior_h5, type='nonnegative_data', id=1,
2424
- f_prior_filtered_h5='', **kwargs):
2423
+ def filter_prior(f_prior_h5, index_use=None, *, type=None, id=1,
2424
+ f_prior_filtered_h5='', makeCopy=True, f_prior_out_h5=None, **kwargs):
2425
2425
  """
2426
2426
  Filter prior realizations and write the result to a new HDF5 file.
2427
2427
 
2428
- Removes rows (realizations) from all M and D datasets in a prior file
2429
- based on a criterion evaluated on a chosen D dataset. The filtered file
2430
- is a complete, self-consistent prior that can be used directly in place
2431
- of the original.
2428
+ Removes rows (realizations) from all M and D datasets in a prior file,
2429
+ either by supplying an explicit index array (``index_use``) or by applying
2430
+ a named criterion to a D dataset (``type``). The filtered file is a
2431
+ complete, self-consistent prior that can be used directly in place of the
2432
+ original.
2432
2433
 
2433
2434
  Parameters
2434
2435
  ----------
2435
2436
  f_prior_h5 : str
2436
2437
  Path to the input prior HDF5 file.
2438
+ index_use : array-like, optional
2439
+ Explicit array of row indices to keep. When provided, ``type`` and
2440
+ ``id`` are ignored. If ``None`` and ``makeCopy=True``, all rows are
2441
+ kept (i.e. the result is a full copy). Default is ``None``.
2437
2442
  type : str, optional
2438
- Filter criterion to apply. Supported values:
2443
+ Filter criterion used when ``index_use`` is ``None``. Supported values:
2439
2444
 
2440
2445
  ``'nonnegative_data'``
2441
2446
  Keep only realizations where every value in ``/D{id}`` is >= 0.
2442
2447
  Useful after forward modelling to remove unphysical responses.
2443
2448
 
2444
- Default is ``'nonnegative_data'``.
2449
+ When ``None`` (default) and ``index_use`` is also ``None``, all rows
2450
+ are kept (equivalent to a full copy).
2445
2451
  id : int, optional
2446
- Index of the D dataset used for filtering (e.g. ``id=1`` uses ``/D1``).
2447
- Default is 1.
2452
+ Index of the D dataset used for criterion-based filtering
2453
+ (e.g. ``id=1`` uses ``/D1``). Default is 1.
2448
2454
  f_prior_filtered_h5 : str, optional
2449
- Output filename. If empty, auto-generates as
2450
- ``<stem>_filtered_<type>.h5``. Default is ``''``.
2455
+ Output filename (kept for backward compatibility). Superseded by
2456
+ ``f_prior_out_h5`` when both are provided. Default is ``''``.
2457
+ makeCopy : bool, optional
2458
+ If ``True`` (default), write to a new file and leave the original
2459
+ untouched. If ``False``, overwrite the original file in-place.
2460
+ f_prior_out_h5 : str, optional
2461
+ Output filename. Auto-generated when ``None`` (default):
2462
+ ``<stem>_filtered_index.h5`` for index-based filtering or
2463
+ ``<stem>_filtered_<type>.h5`` for criterion-based filtering.
2451
2464
  **kwargs
2452
2465
  showInfo : int, optional
2453
2466
  Verbosity level (default 0). Passed through to ``copy_prior``.
@@ -2460,50 +2473,73 @@ def filter_prior(f_prior_h5, type='nonnegative_data', id=1,
2460
2473
  Raises
2461
2474
  ------
2462
2475
  KeyError
2463
- If ``/D{id}`` is not found in the input file.
2476
+ If ``/D{id}`` is not found in the input file (criterion mode only).
2464
2477
  ValueError
2465
- If an unknown ``type`` is specified.
2478
+ If an unknown ``type`` is specified (criterion mode only).
2466
2479
 
2467
2480
  Examples
2468
2481
  --------
2469
- >>> f_prior_filtered = ig.filter_prior(f_prior_h5, type='nonnegative_data', id=1)
2470
-
2471
- Notes
2472
- -----
2473
- Filtering is delegated to ``copy_prior``, which preserves all dataset
2474
- attributes and applies compression. New filter types can be added by
2475
- extending the ``if/elif`` block that computes ``idx``.
2482
+ >>> f_out = ig.filter_prior(f_prior_h5, index_use=np.arange(1000))
2483
+ >>> f_out = ig.filter_prior(f_prior_h5, type='nonnegative_data', id=1)
2476
2484
  """
2477
2485
  import numpy as np
2478
2486
  import os
2479
2487
 
2480
2488
  showInfo = kwargs.get('showInfo', 0)
2481
2489
 
2482
- if not f_prior_filtered_h5:
2490
+ # Determine output path
2491
+ if f_prior_out_h5 is not None:
2492
+ out_file = f_prior_out_h5
2493
+ elif f_prior_filtered_h5:
2494
+ out_file = f_prior_filtered_h5
2495
+ elif not makeCopy:
2496
+ out_file = f_prior_h5 + '.tmp'
2497
+ else:
2483
2498
  stem = os.path.splitext(f_prior_h5)[0]
2484
- f_prior_filtered_h5 = '%s_filtered_%s.h5' % (stem, type)
2499
+ if index_use is not None:
2500
+ out_file = '%s_filtered_index.h5' % stem
2501
+ elif type is not None:
2502
+ out_file = '%s_filtered_%s.h5' % (stem, type)
2503
+ else:
2504
+ out_file = '%s_filtered.h5' % stem
2485
2505
 
2486
- Dname = '/D%d' % id
2506
+ # Determine indices
2507
+ if index_use is not None:
2508
+ idx = np.asarray(index_use)
2509
+ with h5py.File(f_prior_h5, 'r') as f:
2510
+ first = next(k for k in f if isinstance(f[k], h5py.Dataset))
2511
+ N_in = f[first].shape[0]
2512
+ N_out = len(idx)
2513
+ if showInfo >= 0:
2514
+ print("filter_prior [index_use]: keeping %d / %d realizations (%.1f%%)"
2515
+ % (N_out, N_in, 100.0 * N_out / N_in))
2516
+ elif type is not None:
2517
+ Dname = '/D%d' % id
2518
+ with h5py.File(f_prior_h5, 'r') as f:
2519
+ if Dname not in f:
2520
+ raise KeyError("Dataset '%s' not found in %s" % (Dname, f_prior_h5))
2521
+ D = f[Dname][:]
2487
2522
 
2488
- with h5py.File(f_prior_h5, 'r') as f:
2489
- if Dname not in f:
2490
- raise KeyError("Dataset '%s' not found in %s" % (Dname, f_prior_h5))
2491
- D = f[Dname][:]
2523
+ if type == 'nonnegative_data':
2524
+ idx = np.where(np.all(D >= 0, axis=1))[0]
2525
+ else:
2526
+ raise ValueError("Unknown filter type: '%s'" % type)
2492
2527
 
2493
- if type == 'nonnegative_data':
2494
- idx = np.where(np.all(D >= 0, axis=1))[0]
2528
+ N_in = D.shape[0]
2529
+ N_out = len(idx)
2530
+ if showInfo >= 0:
2531
+ print("filter_prior [%s on %s]: keeping %d / %d realizations (%.1f%%)"
2532
+ % (type, Dname, N_out, N_in, 100.0 * N_out / N_in))
2495
2533
  else:
2496
- raise ValueError("Unknown filter type: '%s'" % type)
2534
+ idx = None
2497
2535
 
2498
- N_in = D.shape[0]
2499
- N_out = len(idx)
2500
- if showInfo >= 0:
2501
- print("filter_prior [%s on %s]: keeping %d / %d realizations (%.1f%%)"
2502
- % (type, Dname, N_out, N_in, 100.0 * N_out / N_in))
2536
+ copy_prior(f_prior_h5, out_file, idx=idx, **kwargs)
2503
2537
 
2504
- copy_prior(f_prior_h5, f_prior_filtered_h5, idx=idx, **kwargs)
2538
+ if not makeCopy:
2539
+ os.replace(out_file, f_prior_h5)
2540
+ return f_prior_h5
2505
2541
 
2506
- return f_prior_filtered_h5
2542
+ return out_file
2507
2543
 
2508
2544
 
2509
2545
  def hdf5_scan(file_path):
@@ -2736,7 +2772,6 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
2736
2772
  filelist.append('TX07_20230828_2x4_RC20-33.gex')
2737
2773
  filelist.append('TX07_20230906_2x4_RC20-33.gex')
2738
2774
  filelist.append('TX07_20231016_2x4_RC20-33.gex')
2739
-
2740
2775
 
2741
2776
  if (loadAll or loadType=='shapefiles'):
2742
2777
  #filelist.append('Begravet dal.zip')
@@ -2770,6 +2805,22 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
2770
2805
  filelist.append('POST_DAUGAARD_AVG_prior_detailed_outvalleys_N2000000_dmax90_TX07_20231016_2x4_RC20-33_Nh280_Nf12_Nu2000000_aT1.h5')
2771
2806
  filelist.append('prior_detailed_inout_N4000000_dmax90_TX07_20231016_2x4_RC20-33_Nh280_Nf12.h5')
2772
2807
 
2808
+ if (loadAll or loadType=='WB_sharp'):
2809
+ filelist.append('SCI7_40_ml_sharp2_I02_MOD_syn.xyz')
2810
+ filelist.append('SCI7_40_ml_sharp2_I02_MOD_dat.xyz')
2811
+ filelist.append('SCI7_40_ml_sharp2_I02_MOD_inv.xyz')
2812
+ filelist.append('SCI7_40_ml_sharp2_I02_MOD.xyz')
2813
+ filelist.append('SCI7_40_ml_sharp2_Daugaard.gdb')
2814
+
2815
+ if (loadAll or loadType=='WB_smooth'):
2816
+ filelist.append('SCI7_40_ml_Daugaard_I01_MOD_syn.xyz')
2817
+ filelist.append('SCI7_40_ml_Daugaard_I01_MOD_dat.xyz')
2818
+ filelist.append('SCI7_40_ml_Daugaard_I01_MOD_inv.xyz')
2819
+ filelist.append('SCI7_40_ml_Daugaard_I01_MOD.xyz')
2820
+ filelist.append('SCI7_40_ml_Daugaard_I01.gdb')
2821
+
2822
+
2823
+
2773
2824
  elif case=='ESBJERG':
2774
2825
 
2775
2826
  if len(filelist)==0:
@@ -2900,8 +2951,10 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
2900
2951
  filelist.append('20240911_eksterngps_AVG_export.xyz')
2901
2952
  filelist.append('20241210_AVG_export.xyz')
2902
2953
  filelist.append('20241210_InternGPS_AVG_export.xyz')
2903
- filelist.append('Sdr_Felding_prior_standard_N1000000_dmax90_20260417_0929.h5')
2954
+ #filelist.append('Sdr_Felding_prior_standard_N1000000_dmax90_20260417_0929.h5')
2955
+ #filelist.append('Sdr_Felding_prior_240426_N1000000_dmax90_20260424_1521.h5')
2904
2956
  filelist.append('SdrFelding_boreholes.json')
2957
+ filelist.append('Sdr_Felding_prior_210526_N1000000_dmax90_20260521_1616.h5')
2905
2958
 
2906
2959
 
2907
2960
 
@@ -4793,7 +4846,7 @@ def _analyze_data_file(f, print_line, load_data=False):
4793
4846
  print_line()
4794
4847
 
4795
4848
  # Data groups
4796
- data_groups = sorted([key for key in f.keys() if key.startswith('D') and len(key) == 2])
4849
+ data_groups = sorted([key for key in f.keys() if key.startswith('D') and key[1:].isdigit()], key=lambda k: int(k[1:]))
4797
4850
  print_line(f"Data Groups: {len(data_groups)} found", 0)
4798
4851
  print_line()
4799
4852
 
@@ -4834,7 +4887,7 @@ def _analyze_prior_file(f, print_line, load_data=False):
4834
4887
 
4835
4888
  # Determine number of realizations
4836
4889
  N = None
4837
- model_keys = sorted([key for key in f.keys() if key.startswith('M') and len(key) == 2])
4890
+ model_keys = sorted([key for key in f.keys() if key.startswith('M') and key[1:].isdigit()], key=lambda k: int(k[1:]))
4838
4891
  if model_keys:
4839
4892
  N = f[model_keys[0]].shape[0]
4840
4893
 
@@ -4894,7 +4947,7 @@ def _analyze_prior_file(f, print_line, load_data=False):
4894
4947
  print_line()
4895
4948
 
4896
4949
  # Data groups
4897
- data_groups = sorted([key for key in f.keys() if key.startswith('D') and len(key) == 2])
4950
+ data_groups = sorted([key for key in f.keys() if key.startswith('D') and key[1:].isdigit()], key=lambda k: int(k[1:]))
4898
4951
  print_line(f"Data Realizations: {len(data_groups)} found", 0)
4899
4952
  print_line()
4900
4953
 
@@ -4980,7 +5033,7 @@ def _analyze_post_file(f, print_line, load_data=False):
4980
5033
  print_line()
4981
5034
 
4982
5035
  # Model statistics
4983
- model_keys = sorted([key for key in f.keys() if key.startswith('M') and len(key) == 2])
5036
+ model_keys = sorted([key for key in f.keys() if key.startswith('M') and key[1:].isdigit()], key=lambda k: int(k[1:]))
4984
5037
  print_line(f"Model Parameter Statistics: {len(model_keys)} found", 0)
4985
5038
  print_line()
4986
5039
 
@@ -1465,6 +1465,9 @@ def plot_profile_discrete(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xaxis
1465
1465
  If True, plot KL divergence instead of entropy in the entropy panel.
1466
1466
  KL is plotted only if the ``/Mx/KL`` dataset exists; otherwise entropy
1467
1467
  is used as fallback (default False).
1468
+ fontsize : int or float, optional
1469
+ Font size applied to all text elements (titles, axis labels, colorbar labels,
1470
+ tick labels). If None, matplotlib's current default is used (default None).
1468
1471
 
1469
1472
  Returns
1470
1473
  -------
@@ -1525,6 +1528,7 @@ def plot_profile_discrete(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xaxis
1525
1528
  entropy_max = kwargs.get('entropy_max', None) # Will set default after loading Entropy
1526
1529
  show_n_unique = kwargs.get('show_n_unique', False) # Show number of unique realizations
1527
1530
  plot_kl = kwargs.get('plot_kl', False) # Plot KL divergence instead of entropy
1531
+ fontsize = kwargs.get('fontsize', None)
1528
1532
 
1529
1533
  # Default to showing all panels
1530
1534
  if panels is None:
@@ -1942,8 +1946,16 @@ def plot_profile_discrete(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xaxis
1942
1946
  else:
1943
1947
  ax[2].legend(loc='upper right')
1944
1948
 
1949
+ ax[2].set_xlabel({'x': 'X (m)', 'y': 'Y (m)', 'id': 'ID', 'index': 'Index'}.get(xaxis, xaxis))
1945
1950
  plt.grid(True)
1946
1951
 
1952
+ if fontsize is not None:
1953
+ import matplotlib.text as _mtext
1954
+ for _t in fig.findobj(_mtext.Text):
1955
+ _t.set_fontsize(fontsize)
1956
+ for _ax in fig.get_axes():
1957
+ _ax.tick_params(labelsize=fontsize)
1958
+
1947
1959
  plt.tight_layout()
1948
1960
 
1949
1961
  # Create an invisible colorbar for the last subplot to maintain alignment
@@ -2009,13 +2021,15 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2009
2021
  - ['std']: Only standard deviation
2010
2022
  - ['stats']: Only temperature and log-likelihood
2011
2023
  - Any combination of the above (e.g., ['value', 'stats'])
2012
- Accepted panel names: 'value', 'median', 'mean', 'std', 'uncertainty', 'stats', 'temperature', 't'
2024
+ Accepted panel names: 'value', 'median', 'mean', 'harmonicmean', 'std', 'uncertainty', 'stats', 'temperature', 't'
2025
+ Using a statistic name (``'median'``, ``'mean'``, ``'harmonicmean'``) as the panel name also
2026
+ selects that statistic as the plotted value, overriding the default ``key``.
2013
2027
  hardcopy : bool, optional
2014
2028
  Save plot as PNG file (default False).
2015
2029
  cmap : str or colormap, optional
2016
2030
  Color scheme for plotting (default ``'jet'``).
2017
2031
  key : str, optional
2018
- Statistic to plot: ``'Mean'``, ``'Median'``, or ``'HarmonicMean'`` (default ``'Median'``).
2032
+ Statistic to plot in the value panel: ``'Mean'``, ``'Median'``, or ``'HarmonicMean'`` (default ``'HarmonicMean'``).
2019
2033
  alpha : float, optional
2020
2034
  Transparency scaling factor based on normalized standard deviation (0.0=no
2021
2035
  transparency, default; 1.0=full uncertainty-based transparency).
@@ -2036,6 +2050,9 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2036
2050
  plot_kl : bool, optional
2037
2051
  If True, plot KL divergence instead of standard deviation in the std panel.
2038
2052
  Falls back to Std if ``/Mx/KL`` does not exist (default False).
2053
+ fontsize : int or float, optional
2054
+ Font size applied to all text elements (titles, axis labels, colorbar labels,
2055
+ tick labels). If None, matplotlib's current default is used (default None).
2039
2056
 
2040
2057
  Returns
2041
2058
  -------
@@ -2083,11 +2100,12 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2083
2100
  kwargs.setdefault('clim', None)
2084
2101
 
2085
2102
  alpha = kwargs.get('alpha',0.0)
2086
- key = kwargs.get('key','Median')
2103
+ key = kwargs.get('key','HarmonicMean')
2087
2104
  txt = kwargs.get('txt','')
2088
2105
  showInfo = kwargs.get('showInfo', 0)
2089
2106
  show_n_unique = kwargs.get('show_n_unique', False) # Show number of unique realizations
2090
2107
  plot_kl = kwargs.get('plot_kl', False) # Plot KL divergence instead of Std
2108
+ fontsize = kwargs.get('fontsize', None)
2091
2109
 
2092
2110
  # Default to showing all panels
2093
2111
  if panels is None:
@@ -2096,8 +2114,17 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2096
2114
  # Normalize panel names to lowercase
2097
2115
  panels = [p.lower() for p in panels]
2098
2116
 
2117
+ # Infer key from panel name if not explicitly provided
2118
+ if 'key' not in kwargs:
2119
+ if 'median' in panels:
2120
+ key = 'Median'
2121
+ elif 'mean' in panels:
2122
+ key = 'Mean'
2123
+ elif 'harmonicmean' in panels:
2124
+ key = 'HarmonicMean'
2125
+
2099
2126
  # Determine which panels to show
2100
- show_value = any(p in panels for p in ['value', 'median', 'mean'])
2127
+ show_value = any(p in panels for p in ['value', 'median', 'mean', 'harmonicmean'])
2101
2128
  show_std = any(p in panels for p in ['std', 'uncertainty'])
2102
2129
  show_stats = any(p in panels for p in ['stats', 't', 'temperature'])
2103
2130
 
@@ -2162,6 +2189,10 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2162
2189
  except KeyError:
2163
2190
  HarmonicMean=None
2164
2191
  Std=f_post[Mstr+'/Std'][:].T
2192
+ try:
2193
+ LogStd=f_post[Mstr+'/LogStd'][:].T
2194
+ except KeyError:
2195
+ LogStd=None
2165
2196
  T=f_post['/T'][:].T
2166
2197
  try:
2167
2198
  CHI2=f_post['/CHI2'][:]
@@ -2492,7 +2523,8 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2492
2523
  fig.colorbar(im3, ax=ax[isp], label='KL Divergence (bits)')
2493
2524
  else:
2494
2525
  # STD
2495
- std_data = Std[:,ii]
2526
+ std_src = LogStd if LogStd is not None else Std
2527
+ std_data = std_src[:,ii]
2496
2528
  if gap_alpha is not None:
2497
2529
  std_data = np.ma.masked_where(gap_alpha == 0.0, std_data)
2498
2530
  std_cmap, _ = get_colormap_and_limits('entropy')
@@ -2500,9 +2532,10 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2500
2532
  cmap=std_cmap,
2501
2533
  shading='auto')
2502
2534
  im3.set_clim(0,1)
2503
- ax[isp].set_title('Std %s' % name)
2535
+ std_label = 'LogStd' if LogStd is not None else 'log₁₀ Std'
2536
+ ax[isp].set_title('LogStd %s' % name)
2504
2537
  ax[isp].set_ylabel('Elevation (m)')
2505
- fig.colorbar(im3, ax=ax[isp], label='Standard deviation (Ohm.m)')
2538
+ fig.colorbar(im3, ax=ax[isp], label=std_label)
2506
2539
 
2507
2540
  # Handle single parameter case (nm <= 1)
2508
2541
  if show_value and nm<=1:
@@ -2579,8 +2612,16 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
2579
2612
  else:
2580
2613
  ax[2].legend(loc='upper right')
2581
2614
 
2615
+ ax[2].set_xlabel({'x': 'X (m)', 'y': 'Y (m)', 'id': 'ID', 'index': 'Index'}.get(xaxis, xaxis))
2582
2616
  plt.grid(True)
2583
2617
 
2618
+ if fontsize is not None:
2619
+ import matplotlib.text as _mtext
2620
+ for _t in fig.findobj(_mtext.Text):
2621
+ _t.set_fontsize(fontsize)
2622
+ for _ax in fig.get_axes():
2623
+ _ax.tick_params(labelsize=fontsize)
2624
+
2584
2625
  plt.tight_layout()
2585
2626
 
2586
2627
  if show_stats and nm>1: