integrate-module 0.97.0__tar.gz → 0.98.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {integrate_module-0.97.0/integrate_module.egg-info → integrate_module-0.98.0}/PKG-INFO +2 -1
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/__init__.py +2 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate.py +43 -15
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_borehole.py +2 -2
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_io.py +97 -44
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_plot.py +48 -7
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_query.py +204 -31
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_rejection.py +40 -2
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_rejection_cli.py +11 -4
- integrate_module-0.98.0/integrate/integrate_rejection_jax.py +494 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_timing_cli.py +22 -2
- {integrate_module-0.97.0 → integrate_module-0.98.0/integrate_module.egg-info}/PKG-INFO +2 -1
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/SOURCES.txt +1 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/requires.txt +1 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/pyproject.toml +2 -1
- {integrate_module-0.97.0 → integrate_module-0.98.0}/LICENSE +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/README.md +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/gex.py +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate/integrate_hdf5_info_cli.py +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/dependency_links.txt +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/entry_points.txt +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/integrate_module.egg-info/top_level.txt +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/setup.cfg +0 -0
- {integrate_module-0.97.0 → integrate_module-0.98.0}/tests/test_likelihood_multinomial.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: integrate_module
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.98.0
|
|
4
4
|
Summary: Localized probabilistic data integration
|
|
5
5
|
Author-email: Thomas Mejer Hansen <tmeha@geo.au.dk>
|
|
6
6
|
License: MIT
|
|
@@ -26,6 +26,7 @@ Requires-Dist: jupyter>=1.0.0
|
|
|
26
26
|
Requires-Dist: jupytext
|
|
27
27
|
Requires-Dist: pandas
|
|
28
28
|
Requires-Dist: pyvista
|
|
29
|
+
Requires-Dist: litellm
|
|
29
30
|
Provides-Extra: dev
|
|
30
31
|
Requires-Dist: pytest; extra == "dev"
|
|
31
32
|
Requires-Dist: black; extra == "dev"
|
|
@@ -78,6 +78,7 @@ from integrate.integrate_io import check_data
|
|
|
78
78
|
from integrate.integrate_io import merge_prior
|
|
79
79
|
from integrate.integrate_io import merge_data
|
|
80
80
|
from integrate.integrate_io import merge_posterior
|
|
81
|
+
from integrate.integrate_io import filter_prior
|
|
81
82
|
from integrate.integrate_io import read_usf
|
|
82
83
|
from integrate.integrate_io import read_usf_mul
|
|
83
84
|
from integrate.integrate_io import test_read_usf
|
|
@@ -124,6 +125,7 @@ from integrate.integrate_query import save_query
|
|
|
124
125
|
from integrate.integrate_query import load_query
|
|
125
126
|
from integrate.integrate_query import get_prior_model_info
|
|
126
127
|
from integrate.integrate_query import query_from_text
|
|
128
|
+
from integrate.integrate_query import query_test_llm
|
|
127
129
|
|
|
128
130
|
# Import gex module functions
|
|
129
131
|
from integrate.gex import read_gex as read_gex2
|
|
@@ -337,7 +337,8 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
|
|
|
337
337
|
- ``/Mx/Mean`` [Np, Nm] Arithmetic mean of posterior realizations.
|
|
338
338
|
- ``/Mx/LogMean`` [Np, Nm] Geometric mean (exp of mean of log values).
|
|
339
339
|
- ``/Mx/Median`` [Np, Nm] Median of posterior realizations.
|
|
340
|
-
- ``/Mx/Std`` [Np, Nm] Standard deviation of
|
|
340
|
+
- ``/Mx/Std`` [Np, Nm] Standard deviation of posterior realizations.
|
|
341
|
+
- ``/Mx/LogStd`` [Np, Nm] Standard deviation of log10(posterior realizations).
|
|
341
342
|
- ``/Mx/HarmonicMean`` [Np, Nm] Trimmed harmonic mean: conductivity samples
|
|
342
343
|
are trimmed 10% each tail, averaged, then inverted back to resistivity.
|
|
343
344
|
- ``/Mx/KL`` [Np, Nm] KL divergence in bits. Only written when
|
|
@@ -471,6 +472,7 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
|
|
|
471
472
|
M_logmean = np.full((nsounding, nm), np.nan)
|
|
472
473
|
M_mean = np.full((nsounding, nm), np.nan)
|
|
473
474
|
M_std = np.full((nsounding, nm), np.nan)
|
|
475
|
+
M_logstd = np.full((nsounding, nm), np.nan)
|
|
474
476
|
M_median = np.full((nsounding, nm), np.nan)
|
|
475
477
|
M_harmonicmean = np.full((nsounding, nm), np.nan)
|
|
476
478
|
if computeKL_continuous:
|
|
@@ -508,7 +510,8 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
|
|
|
508
510
|
M_mean[iid,:] = np.mean(m_post, axis=0)
|
|
509
511
|
M_median[iid,:] = np.median(m_post, axis=0)
|
|
510
512
|
with np.errstate(invalid='ignore', divide='ignore'):
|
|
511
|
-
|
|
513
|
+
M_logstd[iid,:] = np.std(np.log10(np.maximum(m_post, 1e-10)), axis=0)
|
|
514
|
+
M_std[iid,:] = np.std(m_post, axis=0)
|
|
512
515
|
_c = 1.0 / np.maximum(m_post, 1e-10)
|
|
513
516
|
_k = int(np.floor(0.10 * _c.shape[0]))
|
|
514
517
|
_cs = np.sort(_c, axis=0)
|
|
@@ -563,10 +566,9 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
|
|
|
563
566
|
# Geometric Mean: exp(mean(log(x)))
|
|
564
567
|
M_logmean[current_iids, :] = np.exp(np.mean(log_cube, axis=1))
|
|
565
568
|
|
|
566
|
-
#
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
M_std[current_iids, :] = np.std(log_cube, axis=1) * INV_LOG_10
|
|
569
|
+
# LogStd: std(log10(x)) = std(ln(x)) * (1/ln(10)); reuse log_cube for speed
|
|
570
|
+
M_logstd[current_iids, :] = np.std(log_cube, axis=1) * INV_LOG_10
|
|
571
|
+
M_std[current_iids, :] = np.std(m_cube, axis=1)
|
|
570
572
|
|
|
571
573
|
# Harmonic mean (trimmed 10% each tail in conductivity space)
|
|
572
574
|
_c = 1.0 / np.maximum(m_cube, 1e-10)
|
|
@@ -579,7 +581,7 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
|
|
|
579
581
|
|
|
580
582
|
|
|
581
583
|
# Create datasets
|
|
582
|
-
for stat in ['Mean', 'Median', 'Std', 'LogMean', 'HarmonicMean']:
|
|
584
|
+
for stat in ['Mean', 'Median', 'Std', 'LogStd', 'LogMean', 'HarmonicMean']:
|
|
583
585
|
if stat not in f_post:
|
|
584
586
|
dset = '/%s/%s' % (name,stat)
|
|
585
587
|
if dset not in f_post:
|
|
@@ -591,6 +593,7 @@ def integrate_posterior_stats(f_post_h5='POST.h5', ip_range=None, **kwargs):
|
|
|
591
593
|
f_post['/%s/%s' % (name,'Mean')][:] = M_mean
|
|
592
594
|
f_post['/%s/%s' % (name,'Median')][:] = M_median
|
|
593
595
|
f_post['/%s/%s' % (name,'Std')][:] = M_std
|
|
596
|
+
f_post['/%s/%s' % (name,'LogStd')][:] = M_logstd
|
|
594
597
|
f_post['/%s/%s' % (name,'HarmonicMean')][:] = M_harmonicmean
|
|
595
598
|
if computeKL_continuous:
|
|
596
599
|
dset = '/%s/KL' % name
|
|
@@ -1721,14 +1724,15 @@ def prior_model_layered(lay_dist='uniform', dz = 1, z_max = 90,
|
|
|
1721
1724
|
|
|
1722
1725
|
# Apply boundaries if any exist
|
|
1723
1726
|
if n_boundaries > 0:
|
|
1724
|
-
boundaries = i_boundaries_all[i, :n_boundaries]
|
|
1727
|
+
boundaries = np.sort(i_boundaries_all[i, :n_boundaries])
|
|
1725
1728
|
for j in range(n_boundaries):
|
|
1726
1729
|
M_rho[i, boundaries[j]:] = rho_all[i, j + 1]
|
|
1727
1730
|
|
|
1728
1731
|
# Save sparse representation if requested
|
|
1729
1732
|
if save_sparse:
|
|
1730
1733
|
if n_boundaries > 0:
|
|
1731
|
-
|
|
1734
|
+
z_sorted = np.sort(z_boundaries_all[i, :n_boundaries])
|
|
1735
|
+
m_current = np.concatenate((z_sorted, rho_all[i, :n_lay]))
|
|
1732
1736
|
else:
|
|
1733
1737
|
m_current = rho_all[i, :n_lay]
|
|
1734
1738
|
M_rho_sparse[i, 0:len(m_current)] = m_current
|
|
@@ -3031,7 +3035,7 @@ def allocate_large_page():
|
|
|
3031
3035
|
return None
|
|
3032
3036
|
|
|
3033
3037
|
|
|
3034
|
-
def timing_compute(N_arr=[], Nproc_arr=[]):
|
|
3038
|
+
def timing_compute(N_arr=[], Nproc_arr=[], backend='numpy', NcpuForward=0):
|
|
3035
3039
|
"""
|
|
3036
3040
|
Execute timing benchmark for INTEGRATE workflow components.
|
|
3037
3041
|
|
|
@@ -3046,7 +3050,11 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
|
|
|
3046
3050
|
Default is [100, 500, 1000, 5000, 10000, 50000, 100000, 500000, 1000000, 5000000].
|
|
3047
3051
|
Nproc_arr : array_like, optional
|
|
3048
3052
|
Array of processor counts to test. Default is powers of 2 up to available CPUs.
|
|
3049
|
-
|
|
3053
|
+
NcpuForward : int, optional
|
|
3054
|
+
Fixed number of CPUs to use for forward modeling. When > 0, forward modeling always
|
|
3055
|
+
uses this many CPUs regardless of the current Nproc_arr entry. The inversion
|
|
3056
|
+
(rejection sampling) still varies over Nproc_arr. Default is 0 (use Nproc_arr value).
|
|
3057
|
+
|
|
3050
3058
|
Returns
|
|
3051
3059
|
-------
|
|
3052
3060
|
str
|
|
@@ -3119,9 +3127,28 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
|
|
|
3119
3127
|
|
|
3120
3128
|
print("Testing on %d data sets of size(s):" % len(N_arr), N_arr)
|
|
3121
3129
|
print("Testing on %d sets of core(s):" % len(Nproc_arr), Nproc_arr)
|
|
3130
|
+
if NcpuForward > 0:
|
|
3131
|
+
print("Forward modeling fixed to %d CPUs (inversion varies over Nproc_arr)" % NcpuForward)
|
|
3122
3132
|
|
|
3123
3133
|
|
|
3124
|
-
|
|
3134
|
+
print("Rejection sampling backend: %s" % backend)
|
|
3135
|
+
backend_label = backend
|
|
3136
|
+
if backend == 'jax':
|
|
3137
|
+
import os
|
|
3138
|
+
jax_platform_env = os.environ.get('JAX_PLATFORMS', '').strip().lower()
|
|
3139
|
+
if jax_platform_env in ('gpu', 'cuda', 'rocm'):
|
|
3140
|
+
backend_label = 'jax_gpu'
|
|
3141
|
+
elif jax_platform_env == 'cpu':
|
|
3142
|
+
backend_label = 'jax_cpu'
|
|
3143
|
+
else:
|
|
3144
|
+
try:
|
|
3145
|
+
import jax
|
|
3146
|
+
jax_device = jax.default_backend()
|
|
3147
|
+
backend_label = 'jax_gpu' if jax_device == 'gpu' else 'jax_cpu'
|
|
3148
|
+
except Exception:
|
|
3149
|
+
backend_label = 'jax_cpu'
|
|
3150
|
+
print("JAX backend label: %s (JAX_PLATFORMS='%s')" % (backend_label, jax_platform_env))
|
|
3151
|
+
file_out = 'timing_%s-%s-%dcore_Nproc%d_N%d_%s.npz' % (hostname,system,Ncpu,len(Nproc_arr), len(N_arr), backend_label)
|
|
3125
3152
|
print("Writing results to %s " % file_out)
|
|
3126
3153
|
|
|
3127
3154
|
## TIMING
|
|
@@ -3170,14 +3197,15 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
|
|
|
3170
3197
|
#ig.plot_prior_stats(f_prior_h5)
|
|
3171
3198
|
#% A2. Compute prior DATA
|
|
3172
3199
|
t0_forward = time.time()
|
|
3173
|
-
|
|
3200
|
+
Ncpu_fwd = NcpuForward if NcpuForward > 0 else Ncpu
|
|
3201
|
+
f_prior_data_h5 = ig.prior_data_gaaem(f_prior_h5, file_gex, Ncpu=Ncpu_fwd, showInfo=showInfo)
|
|
3174
3202
|
T_forward[i,j]=time.time()-t0_forward
|
|
3175
3203
|
|
|
3176
3204
|
#% READY FOR INVERSION
|
|
3177
3205
|
N_use = 1000000
|
|
3178
3206
|
t0_rejection = time.time()
|
|
3179
3207
|
if testRejection:
|
|
3180
|
-
f_post_h5 = ig.integrate_rejection(f_prior_data_h5, f_data_h5, N_use
|
|
3208
|
+
f_post_h5 = ig.integrate_rejection(f_prior_data_h5, f_data_h5, N_use=N_use, parallel=1, updatePostStat=False, Ncpu=Ncpu, showInfo=showInfo, backend=backend)
|
|
3181
3209
|
T_rejection[i,j]=time.time()-t0_rejection
|
|
3182
3210
|
|
|
3183
3211
|
#% Compute some generic statistic of the posterior distribution (Mean, Median, Std)
|
|
@@ -3187,7 +3215,7 @@ def timing_compute(N_arr=[], Nproc_arr=[]):
|
|
|
3187
3215
|
T_poststat[i,j]=time.time()-t0_poststat
|
|
3188
3216
|
|
|
3189
3217
|
T_total = T_prior + T_forward + T_rejection + T_poststat
|
|
3190
|
-
np.savez(file_out, T_total=T_total, T_prior=T_prior, T_forward=T_forward, T_rejection=T_rejection, T_poststat=T_poststat, N_arr=N_arr, Nproc_arr=Nproc_arr, nobs=nobs)
|
|
3218
|
+
np.savez(file_out, T_total=T_total, T_prior=T_prior, T_forward=T_forward, T_rejection=T_rejection, T_poststat=T_poststat, N_arr=N_arr, Nproc_arr=Nproc_arr, nobs=nobs, backend=backend)
|
|
3191
3219
|
|
|
3192
3220
|
|
|
3193
3221
|
return file_out
|
|
@@ -763,8 +763,8 @@ def get_weight_from_position(f_data_h5,x_well=0,y_well=0, i_ref=-1, r_dis = 400,
|
|
|
763
763
|
w_data = np.exp(-1*sum_dd**2/r_data**2)
|
|
764
764
|
|
|
765
765
|
|
|
766
|
-
#
|
|
767
|
-
dis = np.sqrt((X-
|
|
766
|
+
# Compute the distance from each data point to the actual borehole location
|
|
767
|
+
dis = np.sqrt((X-x_well)**2 + (Y-y_well)**2)
|
|
768
768
|
w_dis = np.exp(-1*dis**2/r_dis**2)
|
|
769
769
|
|
|
770
770
|
w_combined = w_data * w_dis
|
|
@@ -2420,34 +2420,47 @@ def copy_prior(input_filename, output_filename, idx=None, N_use=None, loadtomem=
|
|
|
2420
2420
|
return output_filename
|
|
2421
2421
|
|
|
2422
2422
|
|
|
2423
|
-
def filter_prior(f_prior_h5, type=
|
|
2424
|
-
f_prior_filtered_h5='', **kwargs):
|
|
2423
|
+
def filter_prior(f_prior_h5, index_use=None, *, type=None, id=1,
|
|
2424
|
+
f_prior_filtered_h5='', makeCopy=True, f_prior_out_h5=None, **kwargs):
|
|
2425
2425
|
"""
|
|
2426
2426
|
Filter prior realizations and write the result to a new HDF5 file.
|
|
2427
2427
|
|
|
2428
|
-
Removes rows (realizations) from all M and D datasets in a prior file
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
of the
|
|
2428
|
+
Removes rows (realizations) from all M and D datasets in a prior file,
|
|
2429
|
+
either by supplying an explicit index array (``index_use``) or by applying
|
|
2430
|
+
a named criterion to a D dataset (``type``). The filtered file is a
|
|
2431
|
+
complete, self-consistent prior that can be used directly in place of the
|
|
2432
|
+
original.
|
|
2432
2433
|
|
|
2433
2434
|
Parameters
|
|
2434
2435
|
----------
|
|
2435
2436
|
f_prior_h5 : str
|
|
2436
2437
|
Path to the input prior HDF5 file.
|
|
2438
|
+
index_use : array-like, optional
|
|
2439
|
+
Explicit array of row indices to keep. When provided, ``type`` and
|
|
2440
|
+
``id`` are ignored. If ``None`` and ``makeCopy=True``, all rows are
|
|
2441
|
+
kept (i.e. the result is a full copy). Default is ``None``.
|
|
2437
2442
|
type : str, optional
|
|
2438
|
-
Filter criterion
|
|
2443
|
+
Filter criterion used when ``index_use`` is ``None``. Supported values:
|
|
2439
2444
|
|
|
2440
2445
|
``'nonnegative_data'``
|
|
2441
2446
|
Keep only realizations where every value in ``/D{id}`` is >= 0.
|
|
2442
2447
|
Useful after forward modelling to remove unphysical responses.
|
|
2443
2448
|
|
|
2444
|
-
|
|
2449
|
+
When ``None`` (default) and ``index_use`` is also ``None``, all rows
|
|
2450
|
+
are kept (equivalent to a full copy).
|
|
2445
2451
|
id : int, optional
|
|
2446
|
-
Index of the D dataset used for filtering
|
|
2447
|
-
Default is 1.
|
|
2452
|
+
Index of the D dataset used for criterion-based filtering
|
|
2453
|
+
(e.g. ``id=1`` uses ``/D1``). Default is 1.
|
|
2448
2454
|
f_prior_filtered_h5 : str, optional
|
|
2449
|
-
Output filename
|
|
2450
|
-
|
|
2455
|
+
Output filename (kept for backward compatibility). Superseded by
|
|
2456
|
+
``f_prior_out_h5`` when both are provided. Default is ``''``.
|
|
2457
|
+
makeCopy : bool, optional
|
|
2458
|
+
If ``True`` (default), write to a new file and leave the original
|
|
2459
|
+
untouched. If ``False``, overwrite the original file in-place.
|
|
2460
|
+
f_prior_out_h5 : str, optional
|
|
2461
|
+
Output filename. Auto-generated when ``None`` (default):
|
|
2462
|
+
``<stem>_filtered_index.h5`` for index-based filtering or
|
|
2463
|
+
``<stem>_filtered_<type>.h5`` for criterion-based filtering.
|
|
2451
2464
|
**kwargs
|
|
2452
2465
|
showInfo : int, optional
|
|
2453
2466
|
Verbosity level (default 0). Passed through to ``copy_prior``.
|
|
@@ -2460,50 +2473,73 @@ def filter_prior(f_prior_h5, type='nonnegative_data', id=1,
|
|
|
2460
2473
|
Raises
|
|
2461
2474
|
------
|
|
2462
2475
|
KeyError
|
|
2463
|
-
If ``/D{id}`` is not found in the input file.
|
|
2476
|
+
If ``/D{id}`` is not found in the input file (criterion mode only).
|
|
2464
2477
|
ValueError
|
|
2465
|
-
If an unknown ``type`` is specified.
|
|
2478
|
+
If an unknown ``type`` is specified (criterion mode only).
|
|
2466
2479
|
|
|
2467
2480
|
Examples
|
|
2468
2481
|
--------
|
|
2469
|
-
>>>
|
|
2470
|
-
|
|
2471
|
-
Notes
|
|
2472
|
-
-----
|
|
2473
|
-
Filtering is delegated to ``copy_prior``, which preserves all dataset
|
|
2474
|
-
attributes and applies compression. New filter types can be added by
|
|
2475
|
-
extending the ``if/elif`` block that computes ``idx``.
|
|
2482
|
+
>>> f_out = ig.filter_prior(f_prior_h5, index_use=np.arange(1000))
|
|
2483
|
+
>>> f_out = ig.filter_prior(f_prior_h5, type='nonnegative_data', id=1)
|
|
2476
2484
|
"""
|
|
2477
2485
|
import numpy as np
|
|
2478
2486
|
import os
|
|
2479
2487
|
|
|
2480
2488
|
showInfo = kwargs.get('showInfo', 0)
|
|
2481
2489
|
|
|
2482
|
-
|
|
2490
|
+
# Determine output path
|
|
2491
|
+
if f_prior_out_h5 is not None:
|
|
2492
|
+
out_file = f_prior_out_h5
|
|
2493
|
+
elif f_prior_filtered_h5:
|
|
2494
|
+
out_file = f_prior_filtered_h5
|
|
2495
|
+
elif not makeCopy:
|
|
2496
|
+
out_file = f_prior_h5 + '.tmp'
|
|
2497
|
+
else:
|
|
2483
2498
|
stem = os.path.splitext(f_prior_h5)[0]
|
|
2484
|
-
|
|
2499
|
+
if index_use is not None:
|
|
2500
|
+
out_file = '%s_filtered_index.h5' % stem
|
|
2501
|
+
elif type is not None:
|
|
2502
|
+
out_file = '%s_filtered_%s.h5' % (stem, type)
|
|
2503
|
+
else:
|
|
2504
|
+
out_file = '%s_filtered.h5' % stem
|
|
2485
2505
|
|
|
2486
|
-
|
|
2506
|
+
# Determine indices
|
|
2507
|
+
if index_use is not None:
|
|
2508
|
+
idx = np.asarray(index_use)
|
|
2509
|
+
with h5py.File(f_prior_h5, 'r') as f:
|
|
2510
|
+
first = next(k for k in f if isinstance(f[k], h5py.Dataset))
|
|
2511
|
+
N_in = f[first].shape[0]
|
|
2512
|
+
N_out = len(idx)
|
|
2513
|
+
if showInfo >= 0:
|
|
2514
|
+
print("filter_prior [index_use]: keeping %d / %d realizations (%.1f%%)"
|
|
2515
|
+
% (N_out, N_in, 100.0 * N_out / N_in))
|
|
2516
|
+
elif type is not None:
|
|
2517
|
+
Dname = '/D%d' % id
|
|
2518
|
+
with h5py.File(f_prior_h5, 'r') as f:
|
|
2519
|
+
if Dname not in f:
|
|
2520
|
+
raise KeyError("Dataset '%s' not found in %s" % (Dname, f_prior_h5))
|
|
2521
|
+
D = f[Dname][:]
|
|
2487
2522
|
|
|
2488
|
-
|
|
2489
|
-
|
|
2490
|
-
|
|
2491
|
-
|
|
2523
|
+
if type == 'nonnegative_data':
|
|
2524
|
+
idx = np.where(np.all(D >= 0, axis=1))[0]
|
|
2525
|
+
else:
|
|
2526
|
+
raise ValueError("Unknown filter type: '%s'" % type)
|
|
2492
2527
|
|
|
2493
|
-
|
|
2494
|
-
|
|
2528
|
+
N_in = D.shape[0]
|
|
2529
|
+
N_out = len(idx)
|
|
2530
|
+
if showInfo >= 0:
|
|
2531
|
+
print("filter_prior [%s on %s]: keeping %d / %d realizations (%.1f%%)"
|
|
2532
|
+
% (type, Dname, N_out, N_in, 100.0 * N_out / N_in))
|
|
2495
2533
|
else:
|
|
2496
|
-
|
|
2534
|
+
idx = None
|
|
2497
2535
|
|
|
2498
|
-
|
|
2499
|
-
N_out = len(idx)
|
|
2500
|
-
if showInfo >= 0:
|
|
2501
|
-
print("filter_prior [%s on %s]: keeping %d / %d realizations (%.1f%%)"
|
|
2502
|
-
% (type, Dname, N_out, N_in, 100.0 * N_out / N_in))
|
|
2536
|
+
copy_prior(f_prior_h5, out_file, idx=idx, **kwargs)
|
|
2503
2537
|
|
|
2504
|
-
|
|
2538
|
+
if not makeCopy:
|
|
2539
|
+
os.replace(out_file, f_prior_h5)
|
|
2540
|
+
return f_prior_h5
|
|
2505
2541
|
|
|
2506
|
-
return
|
|
2542
|
+
return out_file
|
|
2507
2543
|
|
|
2508
2544
|
|
|
2509
2545
|
def hdf5_scan(file_path):
|
|
@@ -2736,7 +2772,6 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
|
|
|
2736
2772
|
filelist.append('TX07_20230828_2x4_RC20-33.gex')
|
|
2737
2773
|
filelist.append('TX07_20230906_2x4_RC20-33.gex')
|
|
2738
2774
|
filelist.append('TX07_20231016_2x4_RC20-33.gex')
|
|
2739
|
-
|
|
2740
2775
|
|
|
2741
2776
|
if (loadAll or loadType=='shapefiles'):
|
|
2742
2777
|
#filelist.append('Begravet dal.zip')
|
|
@@ -2770,6 +2805,22 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
|
|
|
2770
2805
|
filelist.append('POST_DAUGAARD_AVG_prior_detailed_outvalleys_N2000000_dmax90_TX07_20231016_2x4_RC20-33_Nh280_Nf12_Nu2000000_aT1.h5')
|
|
2771
2806
|
filelist.append('prior_detailed_inout_N4000000_dmax90_TX07_20231016_2x4_RC20-33_Nh280_Nf12.h5')
|
|
2772
2807
|
|
|
2808
|
+
if (loadAll or loadType=='WB_sharp'):
|
|
2809
|
+
filelist.append('SCI7_40_ml_sharp2_I02_MOD_syn.xyz')
|
|
2810
|
+
filelist.append('SCI7_40_ml_sharp2_I02_MOD_dat.xyz')
|
|
2811
|
+
filelist.append('SCI7_40_ml_sharp2_I02_MOD_inv.xyz')
|
|
2812
|
+
filelist.append('SCI7_40_ml_sharp2_I02_MOD.xyz')
|
|
2813
|
+
filelist.append('SCI7_40_ml_sharp2_Daugaard.gdb')
|
|
2814
|
+
|
|
2815
|
+
if (loadAll or loadType=='WB_smooth'):
|
|
2816
|
+
filelist.append('SCI7_40_ml_Daugaard_I01_MOD_syn.xyz')
|
|
2817
|
+
filelist.append('SCI7_40_ml_Daugaard_I01_MOD_dat.xyz')
|
|
2818
|
+
filelist.append('SCI7_40_ml_Daugaard_I01_MOD_inv.xyz')
|
|
2819
|
+
filelist.append('SCI7_40_ml_Daugaard_I01_MOD.xyz')
|
|
2820
|
+
filelist.append('SCI7_40_ml_Daugaard_I01.gdb')
|
|
2821
|
+
|
|
2822
|
+
|
|
2823
|
+
|
|
2773
2824
|
elif case=='ESBJERG':
|
|
2774
2825
|
|
|
2775
2826
|
if len(filelist)==0:
|
|
@@ -2900,8 +2951,10 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
|
|
|
2900
2951
|
filelist.append('20240911_eksterngps_AVG_export.xyz')
|
|
2901
2952
|
filelist.append('20241210_AVG_export.xyz')
|
|
2902
2953
|
filelist.append('20241210_InternGPS_AVG_export.xyz')
|
|
2903
|
-
filelist.append('Sdr_Felding_prior_standard_N1000000_dmax90_20260417_0929.h5')
|
|
2954
|
+
#filelist.append('Sdr_Felding_prior_standard_N1000000_dmax90_20260417_0929.h5')
|
|
2955
|
+
#filelist.append('Sdr_Felding_prior_240426_N1000000_dmax90_20260424_1521.h5')
|
|
2904
2956
|
filelist.append('SdrFelding_boreholes.json')
|
|
2957
|
+
filelist.append('Sdr_Felding_prior_210526_N1000000_dmax90_20260521_1616.h5')
|
|
2905
2958
|
|
|
2906
2959
|
|
|
2907
2960
|
|
|
@@ -4793,7 +4846,7 @@ def _analyze_data_file(f, print_line, load_data=False):
|
|
|
4793
4846
|
print_line()
|
|
4794
4847
|
|
|
4795
4848
|
# Data groups
|
|
4796
|
-
data_groups = sorted([key for key in f.keys() if key.startswith('D') and
|
|
4849
|
+
data_groups = sorted([key for key in f.keys() if key.startswith('D') and key[1:].isdigit()], key=lambda k: int(k[1:]))
|
|
4797
4850
|
print_line(f"Data Groups: {len(data_groups)} found", 0)
|
|
4798
4851
|
print_line()
|
|
4799
4852
|
|
|
@@ -4834,7 +4887,7 @@ def _analyze_prior_file(f, print_line, load_data=False):
|
|
|
4834
4887
|
|
|
4835
4888
|
# Determine number of realizations
|
|
4836
4889
|
N = None
|
|
4837
|
-
model_keys = sorted([key for key in f.keys() if key.startswith('M') and
|
|
4890
|
+
model_keys = sorted([key for key in f.keys() if key.startswith('M') and key[1:].isdigit()], key=lambda k: int(k[1:]))
|
|
4838
4891
|
if model_keys:
|
|
4839
4892
|
N = f[model_keys[0]].shape[0]
|
|
4840
4893
|
|
|
@@ -4894,7 +4947,7 @@ def _analyze_prior_file(f, print_line, load_data=False):
|
|
|
4894
4947
|
print_line()
|
|
4895
4948
|
|
|
4896
4949
|
# Data groups
|
|
4897
|
-
data_groups = sorted([key for key in f.keys() if key.startswith('D') and
|
|
4950
|
+
data_groups = sorted([key for key in f.keys() if key.startswith('D') and key[1:].isdigit()], key=lambda k: int(k[1:]))
|
|
4898
4951
|
print_line(f"Data Realizations: {len(data_groups)} found", 0)
|
|
4899
4952
|
print_line()
|
|
4900
4953
|
|
|
@@ -4980,7 +5033,7 @@ def _analyze_post_file(f, print_line, load_data=False):
|
|
|
4980
5033
|
print_line()
|
|
4981
5034
|
|
|
4982
5035
|
# Model statistics
|
|
4983
|
-
model_keys = sorted([key for key in f.keys() if key.startswith('M') and
|
|
5036
|
+
model_keys = sorted([key for key in f.keys() if key.startswith('M') and key[1:].isdigit()], key=lambda k: int(k[1:]))
|
|
4984
5037
|
print_line(f"Model Parameter Statistics: {len(model_keys)} found", 0)
|
|
4985
5038
|
print_line()
|
|
4986
5039
|
|
|
@@ -1465,6 +1465,9 @@ def plot_profile_discrete(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xaxis
|
|
|
1465
1465
|
If True, plot KL divergence instead of entropy in the entropy panel.
|
|
1466
1466
|
KL is plotted only if the ``/Mx/KL`` dataset exists; otherwise entropy
|
|
1467
1467
|
is used as fallback (default False).
|
|
1468
|
+
fontsize : int or float, optional
|
|
1469
|
+
Font size applied to all text elements (titles, axis labels, colorbar labels,
|
|
1470
|
+
tick labels). If None, matplotlib's current default is used (default None).
|
|
1468
1471
|
|
|
1469
1472
|
Returns
|
|
1470
1473
|
-------
|
|
@@ -1525,6 +1528,7 @@ def plot_profile_discrete(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xaxis
|
|
|
1525
1528
|
entropy_max = kwargs.get('entropy_max', None) # Will set default after loading Entropy
|
|
1526
1529
|
show_n_unique = kwargs.get('show_n_unique', False) # Show number of unique realizations
|
|
1527
1530
|
plot_kl = kwargs.get('plot_kl', False) # Plot KL divergence instead of entropy
|
|
1531
|
+
fontsize = kwargs.get('fontsize', None)
|
|
1528
1532
|
|
|
1529
1533
|
# Default to showing all panels
|
|
1530
1534
|
if panels is None:
|
|
@@ -1942,8 +1946,16 @@ def plot_profile_discrete(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xaxis
|
|
|
1942
1946
|
else:
|
|
1943
1947
|
ax[2].legend(loc='upper right')
|
|
1944
1948
|
|
|
1949
|
+
ax[2].set_xlabel({'x': 'X (m)', 'y': 'Y (m)', 'id': 'ID', 'index': 'Index'}.get(xaxis, xaxis))
|
|
1945
1950
|
plt.grid(True)
|
|
1946
1951
|
|
|
1952
|
+
if fontsize is not None:
|
|
1953
|
+
import matplotlib.text as _mtext
|
|
1954
|
+
for _t in fig.findobj(_mtext.Text):
|
|
1955
|
+
_t.set_fontsize(fontsize)
|
|
1956
|
+
for _ax in fig.get_axes():
|
|
1957
|
+
_ax.tick_params(labelsize=fontsize)
|
|
1958
|
+
|
|
1947
1959
|
plt.tight_layout()
|
|
1948
1960
|
|
|
1949
1961
|
# Create an invisible colorbar for the last subplot to maintain alignment
|
|
@@ -2009,13 +2021,15 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2009
2021
|
- ['std']: Only standard deviation
|
|
2010
2022
|
- ['stats']: Only temperature and log-likelihood
|
|
2011
2023
|
- Any combination of the above (e.g., ['value', 'stats'])
|
|
2012
|
-
Accepted panel names: 'value', 'median', 'mean', 'std', 'uncertainty', 'stats', 'temperature', 't'
|
|
2024
|
+
Accepted panel names: 'value', 'median', 'mean', 'harmonicmean', 'std', 'uncertainty', 'stats', 'temperature', 't'
|
|
2025
|
+
Using a statistic name (``'median'``, ``'mean'``, ``'harmonicmean'``) as the panel name also
|
|
2026
|
+
selects that statistic as the plotted value, overriding the default ``key``.
|
|
2013
2027
|
hardcopy : bool, optional
|
|
2014
2028
|
Save plot as PNG file (default False).
|
|
2015
2029
|
cmap : str or colormap, optional
|
|
2016
2030
|
Color scheme for plotting (default ``'jet'``).
|
|
2017
2031
|
key : str, optional
|
|
2018
|
-
Statistic to plot: ``'Mean'``, ``'Median'``, or ``'HarmonicMean'`` (default ``'
|
|
2032
|
+
Statistic to plot in the value panel: ``'Mean'``, ``'Median'``, or ``'HarmonicMean'`` (default ``'HarmonicMean'``).
|
|
2019
2033
|
alpha : float, optional
|
|
2020
2034
|
Transparency scaling factor based on normalized standard deviation (0.0=no
|
|
2021
2035
|
transparency, default; 1.0=full uncertainty-based transparency).
|
|
@@ -2036,6 +2050,9 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2036
2050
|
plot_kl : bool, optional
|
|
2037
2051
|
If True, plot KL divergence instead of standard deviation in the std panel.
|
|
2038
2052
|
Falls back to Std if ``/Mx/KL`` does not exist (default False).
|
|
2053
|
+
fontsize : int or float, optional
|
|
2054
|
+
Font size applied to all text elements (titles, axis labels, colorbar labels,
|
|
2055
|
+
tick labels). If None, matplotlib's current default is used (default None).
|
|
2039
2056
|
|
|
2040
2057
|
Returns
|
|
2041
2058
|
-------
|
|
@@ -2083,11 +2100,12 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2083
2100
|
kwargs.setdefault('clim', None)
|
|
2084
2101
|
|
|
2085
2102
|
alpha = kwargs.get('alpha',0.0)
|
|
2086
|
-
key = kwargs.get('key','
|
|
2103
|
+
key = kwargs.get('key','HarmonicMean')
|
|
2087
2104
|
txt = kwargs.get('txt','')
|
|
2088
2105
|
showInfo = kwargs.get('showInfo', 0)
|
|
2089
2106
|
show_n_unique = kwargs.get('show_n_unique', False) # Show number of unique realizations
|
|
2090
2107
|
plot_kl = kwargs.get('plot_kl', False) # Plot KL divergence instead of Std
|
|
2108
|
+
fontsize = kwargs.get('fontsize', None)
|
|
2091
2109
|
|
|
2092
2110
|
# Default to showing all panels
|
|
2093
2111
|
if panels is None:
|
|
@@ -2096,8 +2114,17 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2096
2114
|
# Normalize panel names to lowercase
|
|
2097
2115
|
panels = [p.lower() for p in panels]
|
|
2098
2116
|
|
|
2117
|
+
# Infer key from panel name if not explicitly provided
|
|
2118
|
+
if 'key' not in kwargs:
|
|
2119
|
+
if 'median' in panels:
|
|
2120
|
+
key = 'Median'
|
|
2121
|
+
elif 'mean' in panels:
|
|
2122
|
+
key = 'Mean'
|
|
2123
|
+
elif 'harmonicmean' in panels:
|
|
2124
|
+
key = 'HarmonicMean'
|
|
2125
|
+
|
|
2099
2126
|
# Determine which panels to show
|
|
2100
|
-
show_value = any(p in panels for p in ['value', 'median', 'mean'])
|
|
2127
|
+
show_value = any(p in panels for p in ['value', 'median', 'mean', 'harmonicmean'])
|
|
2101
2128
|
show_std = any(p in panels for p in ['std', 'uncertainty'])
|
|
2102
2129
|
show_stats = any(p in panels for p in ['stats', 't', 'temperature'])
|
|
2103
2130
|
|
|
@@ -2162,6 +2189,10 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2162
2189
|
except KeyError:
|
|
2163
2190
|
HarmonicMean=None
|
|
2164
2191
|
Std=f_post[Mstr+'/Std'][:].T
|
|
2192
|
+
try:
|
|
2193
|
+
LogStd=f_post[Mstr+'/LogStd'][:].T
|
|
2194
|
+
except KeyError:
|
|
2195
|
+
LogStd=None
|
|
2165
2196
|
T=f_post['/T'][:].T
|
|
2166
2197
|
try:
|
|
2167
2198
|
CHI2=f_post['/CHI2'][:]
|
|
@@ -2492,7 +2523,8 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2492
2523
|
fig.colorbar(im3, ax=ax[isp], label='KL Divergence (bits)')
|
|
2493
2524
|
else:
|
|
2494
2525
|
# STD
|
|
2495
|
-
|
|
2526
|
+
std_src = LogStd if LogStd is not None else Std
|
|
2527
|
+
std_data = std_src[:,ii]
|
|
2496
2528
|
if gap_alpha is not None:
|
|
2497
2529
|
std_data = np.ma.masked_where(gap_alpha == 0.0, std_data)
|
|
2498
2530
|
std_cmap, _ = get_colormap_and_limits('entropy')
|
|
@@ -2500,9 +2532,10 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2500
2532
|
cmap=std_cmap,
|
|
2501
2533
|
shading='auto')
|
|
2502
2534
|
im3.set_clim(0,1)
|
|
2503
|
-
|
|
2535
|
+
std_label = 'LogStd' if LogStd is not None else 'log₁₀ Std'
|
|
2536
|
+
ax[isp].set_title('LogStd %s' % name)
|
|
2504
2537
|
ax[isp].set_ylabel('Elevation (m)')
|
|
2505
|
-
fig.colorbar(im3, ax=ax[isp], label=
|
|
2538
|
+
fig.colorbar(im3, ax=ax[isp], label=std_label)
|
|
2506
2539
|
|
|
2507
2540
|
# Handle single parameter case (nm <= 1)
|
|
2508
2541
|
if show_value and nm<=1:
|
|
@@ -2579,8 +2612,16 @@ def plot_profile_continuous(f_post_h5, i1=1, i2=1e+9, ii=np.array(()), im=1, xax
|
|
|
2579
2612
|
else:
|
|
2580
2613
|
ax[2].legend(loc='upper right')
|
|
2581
2614
|
|
|
2615
|
+
ax[2].set_xlabel({'x': 'X (m)', 'y': 'Y (m)', 'id': 'ID', 'index': 'Index'}.get(xaxis, xaxis))
|
|
2582
2616
|
plt.grid(True)
|
|
2583
2617
|
|
|
2618
|
+
if fontsize is not None:
|
|
2619
|
+
import matplotlib.text as _mtext
|
|
2620
|
+
for _t in fig.findobj(_mtext.Text):
|
|
2621
|
+
_t.set_fontsize(fontsize)
|
|
2622
|
+
for _ax in fig.get_axes():
|
|
2623
|
+
_ax.tick_params(labelsize=fontsize)
|
|
2624
|
+
|
|
2584
2625
|
plt.tight_layout()
|
|
2585
2626
|
|
|
2586
2627
|
if show_stats and nm>1:
|