integrate-module 0.96.0__tar.gz → 0.97.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {integrate_module-0.96.0/integrate_module.egg-info → integrate_module-0.97.0}/PKG-INFO +1 -1
  2. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate.py +11 -1
  3. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_io.py +190 -26
  4. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_plot.py +16 -8
  5. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_rejection.py +5 -5
  6. {integrate_module-0.96.0 → integrate_module-0.97.0/integrate_module.egg-info}/PKG-INFO +1 -1
  7. {integrate_module-0.96.0 → integrate_module-0.97.0}/pyproject.toml +1 -1
  8. {integrate_module-0.96.0 → integrate_module-0.97.0}/LICENSE +0 -0
  9. {integrate_module-0.96.0 → integrate_module-0.97.0}/README.md +0 -0
  10. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/__init__.py +0 -0
  11. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/gex.py +0 -0
  12. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_borehole.py +0 -0
  13. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_hdf5_info_cli.py +0 -0
  14. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_query.py +0 -0
  15. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_rejection_cli.py +0 -0
  16. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate/integrate_timing_cli.py +0 -0
  17. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate_module.egg-info/SOURCES.txt +0 -0
  18. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate_module.egg-info/dependency_links.txt +0 -0
  19. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate_module.egg-info/entry_points.txt +0 -0
  20. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate_module.egg-info/requires.txt +0 -0
  21. {integrate_module-0.96.0 → integrate_module-0.97.0}/integrate_module.egg-info/top_level.txt +0 -0
  22. {integrate_module-0.96.0 → integrate_module-0.97.0}/setup.cfg +0 -0
  23. {integrate_module-0.96.0 → integrate_module-0.97.0}/tests/test_likelihood_multinomial.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: integrate_module
3
- Version: 0.96.0
3
+ Version: 0.97.0
4
4
  Summary: Localized probabilistic data integration
5
5
  Author-email: Thomas Mejer Hansen <tmeha@geo.au.dk>
6
6
  License: MIT
@@ -1173,7 +1173,7 @@ def get_process_handle_count():
1173
1173
  import os
1174
1174
  return psutil.Process(os.getpid()).num_handles()
1175
1175
 
1176
- def prior_data_gaaem(f_prior_h5, file_gex=None, stmfiles=None, N=0, doMakePriorCopy=True, im=1, id=1, im_height=0, Nhank=280, Nfreq=12, is_log=False, parallel=True, **kwargs):
1176
+ def prior_data_gaaem(f_prior_h5, file_gex=None, stmfiles=None, N=0, doMakePriorCopy=True, im=1, id=1, im_height=0, Nhank=280, Nfreq=12, is_log=False, parallel=True, force_replace=False, **kwargs):
1177
1177
  """
1178
1178
  Generate prior data for the GA-AEM method.
1179
1179
 
@@ -1211,6 +1211,10 @@ def prior_data_gaaem(f_prior_h5, file_gex=None, stmfiles=None, N=0, doMakePriorC
1211
1211
  Ncpu : int, optional
1212
1212
  Number of CPUs to use for parallel processing. Default is 0, which
1213
1213
  uses all available CPUs. Only used when parallel=True.
1214
+ force_replace : bool, optional
1215
+ If True, delete an existing /D{id} dataset before writing.
1216
+ If False (default), print a warning and return early if the
1217
+ dataset already exists.
1214
1218
  showInfo : int, optional
1215
1219
  Level of verbosity for output (0=silent, 1=normal, 2=verbose).
1216
1220
 
@@ -1440,6 +1444,12 @@ def prior_data_gaaem(f_prior_h5, file_gex=None, stmfiles=None, N=0, doMakePriorC
1440
1444
 
1441
1445
  # Write D to f_prior['/D1']
1442
1446
  with h5py.File(f_prior_data_h5, 'a') as f_prior:
1447
+ if Dname in f_prior:
1448
+ if force_replace:
1449
+ del f_prior[Dname]
1450
+ else:
1451
+ print("Key '%s' already exists in %s. Use force_replace=True to overwrite." % (Dname, f_prior_data_h5))
1452
+ return f_prior_data_h5
1443
1453
  f_prior[Dname] = D
1444
1454
 
1445
1455
  # Add method, type, file_ex, and im as attributes to '/D1'
@@ -538,6 +538,10 @@ def save_prior_data(f_prior_h5, D_new, id=None, force_delete=False,
538
538
  print("Key '%s' already exists. Use force_delete=True to overwrite." % key)
539
539
  return False
540
540
 
541
+ # Ensure 2D array: reshape 1D (N,) to (N, 1)
542
+ if D_new.ndim == 1:
543
+ D_new = D_new.reshape(-1, 1)
544
+
541
545
  # Write the new data
542
546
  # Convert to 32-bit float for better memory efficiency if the data is floating point
543
547
  if np.issubdtype(D_new.dtype, np.floating):
@@ -2415,6 +2419,93 @@ def copy_prior(input_filename, output_filename, idx=None, N_use=None, loadtomem=
2415
2419
 
2416
2420
  return output_filename
2417
2421
 
2422
+
2423
+ def filter_prior(f_prior_h5, type='nonnegative_data', id=1,
2424
+ f_prior_filtered_h5='', **kwargs):
2425
+ """
2426
+ Filter prior realizations and write the result to a new HDF5 file.
2427
+
2428
+ Removes rows (realizations) from all M and D datasets in a prior file
2429
+ based on a criterion evaluated on a chosen D dataset. The filtered file
2430
+ is a complete, self-consistent prior that can be used directly in place
2431
+ of the original.
2432
+
2433
+ Parameters
2434
+ ----------
2435
+ f_prior_h5 : str
2436
+ Path to the input prior HDF5 file.
2437
+ type : str, optional
2438
+ Filter criterion to apply. Supported values:
2439
+
2440
+ ``'nonnegative_data'``
2441
+ Keep only realizations where every value in ``/D{id}`` is >= 0.
2442
+ Useful after forward modelling to remove unphysical responses.
2443
+
2444
+ Default is ``'nonnegative_data'``.
2445
+ id : int, optional
2446
+ Index of the D dataset used for filtering (e.g. ``id=1`` uses ``/D1``).
2447
+ Default is 1.
2448
+ f_prior_filtered_h5 : str, optional
2449
+ Output filename. If empty, auto-generates as
2450
+ ``<stem>_filtered_<type>.h5``. Default is ``''``.
2451
+ **kwargs
2452
+ showInfo : int, optional
2453
+ Verbosity level (default 0). Passed through to ``copy_prior``.
2454
+
2455
+ Returns
2456
+ -------
2457
+ str
2458
+ Path to the filtered output HDF5 file.
2459
+
2460
+ Raises
2461
+ ------
2462
+ KeyError
2463
+ If ``/D{id}`` is not found in the input file.
2464
+ ValueError
2465
+ If an unknown ``type`` is specified.
2466
+
2467
+ Examples
2468
+ --------
2469
+ >>> f_prior_filtered = ig.filter_prior(f_prior_h5, type='nonnegative_data', id=1)
2470
+
2471
+ Notes
2472
+ -----
2473
+ Filtering is delegated to ``copy_prior``, which preserves all dataset
2474
+ attributes and applies compression. New filter types can be added by
2475
+ extending the ``if/elif`` block that computes ``idx``.
2476
+ """
2477
+ import numpy as np
2478
+ import os
2479
+
2480
+ showInfo = kwargs.get('showInfo', 0)
2481
+
2482
+ if not f_prior_filtered_h5:
2483
+ stem = os.path.splitext(f_prior_h5)[0]
2484
+ f_prior_filtered_h5 = '%s_filtered_%s.h5' % (stem, type)
2485
+
2486
+ Dname = '/D%d' % id
2487
+
2488
+ with h5py.File(f_prior_h5, 'r') as f:
2489
+ if Dname not in f:
2490
+ raise KeyError("Dataset '%s' not found in %s" % (Dname, f_prior_h5))
2491
+ D = f[Dname][:]
2492
+
2493
+ if type == 'nonnegative_data':
2494
+ idx = np.where(np.all(D >= 0, axis=1))[0]
2495
+ else:
2496
+ raise ValueError("Unknown filter type: '%s'" % type)
2497
+
2498
+ N_in = D.shape[0]
2499
+ N_out = len(idx)
2500
+ if showInfo >= 0:
2501
+ print("filter_prior [%s on %s]: keeping %d / %d realizations (%.1f%%)"
2502
+ % (type, Dname, N_out, N_in, 100.0 * N_out / N_in))
2503
+
2504
+ copy_prior(f_prior_h5, f_prior_filtered_h5, idx=idx, **kwargs)
2505
+
2506
+ return f_prior_filtered_h5
2507
+
2508
+
2418
2509
  def hdf5_scan(file_path):
2419
2510
  """
2420
2511
  Scans an HDF5 file and prints information about datasets (including their size) and attributes.
@@ -2499,13 +2590,15 @@ def download_file(url, download_dir, use_checksum=False, **kwargs):
2499
2590
  return
2500
2591
 
2501
2592
  # Download and save the file
2502
- print(f'Downloading {file_name}')
2593
+ if showInfo>0:
2594
+ print(f'Downloading {file_name}')
2503
2595
  response = requests.get(url)
2504
2596
  response.raise_for_status() # Check if the request was successful
2505
2597
 
2506
2598
  with open(file_path, 'wb') as file:
2507
2599
  file.write(response.content)
2508
- print(f'Downloaded {file_name}')
2600
+ if showInfo>-1:
2601
+ print(f'Downloaded {file_name}')
2509
2602
 
2510
2603
  # Check if checksum verification is enabled
2511
2604
  if use_checksum:
@@ -2787,6 +2880,31 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
2787
2880
  if loadAll:
2788
2881
  filelist.append('haderup_N1000000_dmax90_dz1.h5')
2789
2882
 
2883
+ elif case=='SOENDER_FELDING':
2884
+ filelist.append('README_SOENDER_FELDING')
2885
+ filelist.append('TX07_20240802_2x4_RC20-39.gex')
2886
+ filelist.append('TX07_20240802_2x4_RC20-39_eksternGPS.gex')
2887
+ filelist.append('TX07_20240912_2x4_RC20-39_eksterngps.gex')
2888
+ filelist.append('TX07_20241014_2x4_RC20_33_and_57_EksternGPS.gex')
2889
+ filelist.append('TX07_20241202_2x4_RC20_57.gex')
2890
+ filelist.append('TX07_20241202_2x4_RC20_57_EksternGPS.gex')
2891
+ filelist.append('20240819_AVG_export.xyz')
2892
+ filelist.append('20240820_AVG_export.xyz')
2893
+ filelist.append('20240821_AVG_export.xyz')
2894
+ filelist.append('20240911_AVG_export.xyz')
2895
+ filelist.append('20240924_AVG_export.xyz')
2896
+ filelist.append('20240924_test_AVG_export.xyz')
2897
+ filelist.append('20241007_AVG_export.xyz')
2898
+ filelist.append('20241008_AVG_export.xyz')
2899
+ filelist.append('20241029_AVG_export.xyz')
2900
+ filelist.append('20240911_eksterngps_AVG_export.xyz')
2901
+ filelist.append('20241210_AVG_export.xyz')
2902
+ filelist.append('20241210_InternGPS_AVG_export.xyz')
2903
+ filelist.append('Sdr_Felding_prior_standard_N1000000_dmax90_20260417_0929.h5')
2904
+ filelist.append('SdrFelding_boreholes.json')
2905
+
2906
+
2907
+
2790
2908
  else:
2791
2909
 
2792
2910
  filelist = []
@@ -2798,10 +2916,9 @@ def get_case_data(case='DAUGAARD', loadAll=False, loadType='', filelist=None, **
2798
2916
 
2799
2917
  urlErda = 'https://anon.erda.au.dk/share_redirect/dxOLKDtoul'
2800
2918
  urlErdaCase = '%s/%s' % (urlErda,case)
2801
- for remotefile in filelist:
2802
- #print(remotefile)
2919
+ from tqdm import tqdm
2920
+ for remotefile in tqdm(filelist, desc='Downloading %s' % case):
2803
2921
  remoteurl = '%s/%s' % (urlErdaCase,remotefile)
2804
- #remoteurl = 'https://anon.erda.au.dk/share_redirect/dxOLKDtoul/%s/%s' % (case,remotefile)
2805
2922
  download_file(remoteurl,'.',showInfo=showInfo)
2806
2923
  if showInfo>-1:
2807
2924
  print('--> Got data for case: %s' % case)
@@ -3051,7 +3168,7 @@ def save_data_gaussian(D_obs, D_std = [], d_std=[], Cd=[], id=1, id_prior=None,
3051
3168
  return f_data_h5
3052
3169
 
3053
3170
 
3054
- def xyz_to_h5(file_xyz, file_gex, f_data_h5=None, i_lm_skip=None, i_hm_skip=None, nan_value=None, showInfo=0, disregardFullNan=True):
3171
+ def xyz_to_h5(file_xyz, file_gex, f_data_h5=None, i_lm_skip=None, i_hm_skip=None, nan_value=None, showInfo=0, disregardFullNan=True, data_obs=None, data_std=None):
3055
3172
  """
3056
3173
  Convert Aarhus Workbench XYZ export file(s) to an INTEGRATE HDF5 data file.
3057
3174
 
@@ -3094,6 +3211,15 @@ def xyz_to_h5(file_xyz, file_gex, f_data_h5=None, i_lm_skip=None, i_hm_skip=None
3094
3211
  disregardFullNan : bool, optional
3095
3212
  If True (default), soundings where all gates are NaN are excluded
3096
3213
  from the output HDF5 file.
3214
+ data_obs : list of str, optional
3215
+ Flightlines column names (case-insensitive) to write as additional
3216
+ data blocks. The first entry becomes ``/D2/d_obs``, the second
3217
+ ``/D3/d_obs``, and so on. Example: ``['RX_ALTITUDE', 'TX_ALTITUDE']``.
3218
+ data_std : list of str or None, optional
3219
+ Flightlines column names for the corresponding standard deviations,
3220
+ same length as ``data_obs``. Use ``None`` for an individual entry to
3221
+ fall back to ``0.05 * |d_obs|`` for that column. If the whole
3222
+ parameter is omitted, all columns default to ``0.05 * |d_obs|``.
3097
3223
 
3098
3224
  Returns
3099
3225
  -------
@@ -3182,6 +3308,10 @@ def xyz_to_h5(file_xyz, file_gex, f_data_h5=None, i_lm_skip=None, i_hm_skip=None
3182
3308
  ld = {k: pd.concat([xyz.layer_data[k] for xyz in xyz_list], ignore_index=True)
3183
3309
  for k in xyz_list[0].layer_data}
3184
3310
 
3311
+ # Handle XYZ files that use 'x'/'y' instead of 'utmx'/'utmy'
3312
+ if 'utmx' not in fl.columns and 'x' in fl.columns:
3313
+ fl = fl.rename(columns={'x': 'utmx', 'y': 'utmy'})
3314
+
3185
3315
  # Determine dummy/missing value: explicit arg > XYZ header > fallback 9999
3186
3316
  if nan_value is None:
3187
3317
  nan_value = xyz_list[0].model_info.get('dummy', 9999)
@@ -3245,17 +3375,17 @@ def xyz_to_h5(file_xyz, file_gex, f_data_h5=None, i_lm_skip=None, i_hm_skip=None
3245
3375
  d_std[:, n_lm + j_arr[0]] = d_std_high
3246
3376
 
3247
3377
  # --- exclude all-NaN soundings ---
3248
- if disregardFullNan:
3249
- keep = ~np.all(np.isnan(d_obs), axis=1)
3378
+ keep = ~np.all(np.isnan(d_obs), axis=1) if disregardFullNan else np.ones(len(d_obs), dtype=bool)
3379
+ if disregardFullNan and showInfo >= 1:
3250
3380
  n_removed = np.sum(~keep)
3251
- if showInfo >= 1 and n_removed > 0:
3381
+ if n_removed > 0:
3252
3382
  print('Removed %d all-NaN soundings (%d remaining)' % (n_removed, np.sum(keep)))
3253
- d_obs = d_obs[keep]
3254
- d_std = d_std[keep]
3255
- UTMX = UTMX[keep]
3256
- UTMY = UTMY[keep]
3257
- LINE = LINE[keep]
3258
- ELEVATION = ELEVATION[keep]
3383
+ d_obs = d_obs[keep]
3384
+ d_std = d_std[keep]
3385
+ UTMX = UTMX[keep]
3386
+ UTMY = UTMY[keep]
3387
+ LINE = LINE[keep]
3388
+ ELEVATION = ELEVATION[keep]
3259
3389
 
3260
3390
  # --- write HDF5 ---
3261
3391
  save_data_gaussian(
@@ -3273,10 +3403,29 @@ def xyz_to_h5(file_xyz, file_gex, f_data_h5=None, i_lm_skip=None, i_hm_skip=None
3273
3403
  if n_channels >= 2:
3274
3404
  hf.create_dataset('/D1/i_hm', data=np.arange(i_hm_start, i_hm_end))
3275
3405
 
3406
+ # --- write additional data columns as D2, D3, ... ---
3407
+ if data_obs is not None:
3408
+ _data_std = data_std if data_std is not None else [None] * len(data_obs)
3409
+ for i, col_obs in enumerate(data_obs):
3410
+ obs = fl[col_obs.lower()].values[ch1_pos][keep].reshape(-1, 1).astype(float)
3411
+ std_col = _data_std[i]
3412
+ if std_col is not None:
3413
+ std = fl[std_col.lower()].values[ch1_pos][keep].reshape(-1, 1).astype(float)
3414
+ else:
3415
+ std = 0.05 * np.abs(obs)
3416
+ save_data_gaussian(
3417
+ obs, D_std=std,
3418
+ f_data_h5=f_data_h5,
3419
+ id=i + 2,
3420
+ name=col_obs,
3421
+ delete_if_exist=False,
3422
+ showInfo=showInfo,
3423
+ )
3424
+
3276
3425
  return f_data_h5
3277
3426
 
3278
3427
 
3279
- def save_data_multinomial(D_obs, i_use=None, id=[], id_prior=None, f_data_h5='data.h5', compression=None, compression_opts=None, **kwargs):
3428
+ def save_data_multinomial(D_obs, i_use=None, id=[], id_prior=None, f_data_h5='data.h5', name=None, compression=None, compression_opts=None, **kwargs):
3280
3429
  """
3281
3430
  Save observed data to an HDF5 file in a specified group with a multinomial noise model.
3282
3431
 
@@ -3288,6 +3437,9 @@ def save_data_multinomial(D_obs, i_use=None, id=[], id_prior=None, f_data_h5='d
3288
3437
  :type id_prior: int, optional
3289
3438
  :param f_data_h5: The path to the HDF5 file where the data will be written. Default is 'data.h5'.
3290
3439
  :type f_data_h5: str, optional
3440
+ :param name: Optional human-readable name for this dataset (e.g. 'Lithology'). Stored as
3441
+ the ``name`` attribute on the HDF5 group and used by plotting routines for titles.
3442
+ :type name: str, optional
3291
3443
  :param kwargs: Additional keyword arguments.
3292
3444
  :return: The path to the HDF5 file where the data was written.
3293
3445
  :rtype: str
@@ -3374,7 +3526,9 @@ def save_data_multinomial(D_obs, i_use=None, id=[], id_prior=None, f_data_h5='d
3374
3526
 
3375
3527
  # write attribute noise_model as 'multinomial'
3376
3528
  f['/%s/' % D_str].attrs['noise_model'] = 'multinomial'
3377
-
3529
+ if name is not None:
3530
+ f['/%s/' % D_str].attrs['name'] = name
3531
+
3378
3532
  return id, f_data_h5
3379
3533
 
3380
3534
 
@@ -3561,20 +3715,30 @@ def merge_data(f_data, f_gex='', delta_line=0, f_data_merged_h5='', **kwargs):
3561
3715
  X, Y, LINE, ELEVATION = ig.get_geometry(f_data_h5)
3562
3716
  D = ig.load_data(f_data_h5, showInfo=showInfo)
3563
3717
 
3564
- # append data
3565
- Xc = np.append(Xc, X)
3566
- Yc = np.append(Yc, Y)
3567
- LINEc = np.append(LINEc, LINE+i*delta_line)
3568
- ELEVATIONc = np.append(ELEVATIONc, ELEVATION)
3569
-
3718
+ # attempt data merge before touching geometry
3719
+ merge_ok = True
3720
+ d_obs_new = list(d_obs_c)
3721
+ d_std_new = list(d_std_c)
3570
3722
  for id in range(len(d_obs_c)):
3571
- #print(id)
3572
3723
  try:
3573
- d_obs_c[id] = np.vstack((d_obs_c[id], np.atleast_2d(D['d_obs'][id])))
3574
- d_std_c[id] = np.vstack((d_std_c[id], np.atleast_2d(D['d_std'][id])))
3724
+ d_obs_new[id] = np.vstack((d_obs_c[id], np.atleast_2d(D['d_obs'][id])))
3725
+ d_std_new[id] = np.vstack((d_std_c[id], np.atleast_2d(D['d_std'][id])))
3575
3726
  except:
3727
+ merge_ok = False
3576
3728
  if showInfo>-1:
3577
3729
  print("!!!!! Could not merge %s" % f_data_h5)
3730
+ break
3731
+
3732
+ if not merge_ok:
3733
+ continue
3734
+
3735
+ # only append geometry when data merged successfully
3736
+ d_obs_c = d_obs_new
3737
+ d_std_c = d_std_new
3738
+ Xc = np.append(Xc, X)
3739
+ Yc = np.append(Yc, Y)
3740
+ LINEc = np.append(LINEc, LINE+i*delta_line)
3741
+ ELEVATIONc = np.append(ELEVATIONc, ELEVATION)
3578
3742
 
3579
3743
  Xc = np.atleast_2d(Xc).T
3580
3744
  Yc = np.atleast_2d(Yc).T
@@ -2765,6 +2765,8 @@ def plot_data(f_data_h5, i_plot=[], Dkey=[], plType='imshow', uselog=True, **kwa
2765
2765
  import matplotlib
2766
2766
  import h5py
2767
2767
 
2768
+ showInfo = kwargs.get('showInfo', -1)
2769
+
2768
2770
  # Check if the data file f_data_h5 exists
2769
2771
  if not os.path.exists(f_data_h5):
2770
2772
  print("plot_data: File %s does not exist" % f_data_h5)
@@ -2778,11 +2780,13 @@ def plot_data(f_data_h5, i_plot=[], Dkey=[], plType='imshow', uselog=True, **kwa
2778
2780
  Dkeys = []
2779
2781
  for key in f_data.keys():
2780
2782
  if key[0]=='D':
2781
- print("plot_data: Found data set %s" % key)
2783
+ if showInfo>0:
2784
+ print("plot_data: Found data set %s" % key)
2782
2785
  Dkeys.append(key)
2783
2786
  nd += 1
2784
2787
  Dkey=Dkeys[0]
2785
- print("plot_data: Using data set %s" % Dkey)
2788
+ if showInfo>0:
2789
+ print("plot_data: Using data set %s" % Dkey)
2786
2790
 
2787
2791
  noise_model = f_data['/%s' % Dkey].attrs['noise_model']
2788
2792
 
@@ -2810,9 +2814,6 @@ def plot_data(f_data_h5, i_plot=[], Dkey=[], plType='imshow', uselog=True, **kwa
2810
2814
  # remove all values in i_plot that are smaller than 0
2811
2815
  i_plot = i_plot[i_plot>=0]
2812
2816
 
2813
- # reaplce values larger than 1 with nan in d_std
2814
- d_std[d_std>1] = np.nan
2815
-
2816
2817
  # find number of nan values on d_obs
2817
2818
  non_nan = np.sum(~np.isnan(d_obs), axis=1)
2818
2819
 
@@ -2981,6 +2982,12 @@ def plot_data_prior(f_prior_data_h5,
2981
2982
  obs_data = None
2982
2983
  is_1d = False
2983
2984
 
2985
+ # Read name attribute from observed data group
2986
+ dh5_str_name = 'D%d' % id_data
2987
+ name_attr = f_data[dh5_str_name].attrs.get('name', None) if dh5_str_name in f_data else None
2988
+ if isinstance(name_attr, bytes):
2989
+ name_attr = name_attr.decode('utf-8')
2990
+
2984
2991
  # Load prior data
2985
2992
  dh5_str_prior = 'D%d' % (id)
2986
2993
  if dh5_str_prior in f_prior_data:
@@ -3031,7 +3038,8 @@ def plot_data_prior(f_prior_data_h5,
3031
3038
  plt.xlabel('Data Value')
3032
3039
  plt.ylabel('Probability Density')
3033
3040
  plt.legend()
3034
- plt.title('Prior data vs Observed data (1D Histogram)')
3041
+ name_suffix = ': %s' % name_attr if name_attr else ''
3042
+ plt.title('D%d%s: Prior vs Observed (1D Histogram)' % (id_data, name_suffix))
3035
3043
  else:
3036
3044
  # Original 2D line plot
3037
3045
  if prior_data is not None:
@@ -3044,7 +3052,8 @@ def plot_data_prior(f_prior_data_h5,
3044
3052
 
3045
3053
  plt.xlabel('Data #')
3046
3054
  plt.ylabel('Data Value')
3047
- plt.title('Prior data (black) and observed data (red)')
3055
+ name_suffix = ': %s' % name_attr if name_attr else ''
3056
+ plt.title('D%d%s: Prior (black) vs Observed (red)' % (id_data, name_suffix))
3048
3057
 
3049
3058
  if ylim is not None:
3050
3059
  if is_1d:
@@ -4763,6 +4772,5 @@ def plot_boreholes(W, f_prior_h5=None, Mstr='/M2', hardcopy=False, **kwargs):
4763
4772
  if showInfo >= 0:
4764
4773
  print(f'plot_boreholes: saved {out}')
4765
4774
 
4766
- plt.show()
4767
4775
  return fig
4768
4776
 
@@ -31,7 +31,7 @@ def integrate_rejection(f_prior_h5='prior.h5',
31
31
  N_use=100000000000,
32
32
  id_use=[],
33
33
  ip_range=[],
34
- nr=400,
34
+ nr=1000,
35
35
  autoT=1,
36
36
  T_base = 1,
37
37
  Nchunks=0,
@@ -73,7 +73,7 @@ def integrate_rejection(f_prior_h5='prior.h5',
73
73
  Default is empty list.
74
74
  nr : int, optional
75
75
  Number of posterior samples to retain per data point.
76
- Default is 400.
76
+ Default is 1000.
77
77
  autoT : int, optional
78
78
  Automatic temperature estimation method (1=enabled, 0=disabled).
79
79
  Default is 1.
@@ -413,7 +413,7 @@ def integrate_rejection_range(D,
413
413
  N_use=None,
414
414
  id_use=[],
415
415
  ip_range=[],
416
- nr=400,
416
+ nr=1000,
417
417
  autoT=1,
418
418
  T_base = 1,
419
419
  T_N_above=10,
@@ -448,7 +448,7 @@ def integrate_rejection_range(D,
448
448
  Default is empty list.
449
449
  nr : int, optional
450
450
  Number of posterior samples to retain per data point.
451
- Default is 400.
451
+ Default is 1000.
452
452
  autoT : int, optional
453
453
  Automatic temperature estimation method (1=enabled, 0=disabled).
454
454
  Default is 1.
@@ -1769,4 +1769,4 @@ def compute_hypothesis_probability(f_post_h5_list, **kwargs):
1769
1769
  print(f" - Hypothesis {i+1}: mean P = {np.nanmean(P[:, i]):.4f}, "
1770
1770
  f"median P = {np.nanmedian(P[:, i]):.4f}")
1771
1771
 
1772
- return P, mode, entropy_values
1772
+ return P, mode, entropy_values
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: integrate_module
3
- Version: 0.96.0
3
+ Version: 0.97.0
4
4
  Summary: Localized probabilistic data integration
5
5
  Author-email: Thomas Mejer Hansen <tmeha@geo.au.dk>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "integrate_module"
7
- version = "0.96.0"
7
+ version = "0.97.0"
8
8
  description = "Localized probabilistic data integration"
9
9
  readme = { file = "README.md", content-type = "text/markdown" }
10
10
  requires-python = ">=3.10"