integrate_module 0.99.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1127 @@
1
+
2
+ def compute_P_obs_discrete(depth_top=None, depth_bottom=None, lithology_obs=None, z=None, class_id=None, lithology_prob=0.8, P_prior=None, W=None):
3
+ """
4
+ Compute discrete observation probability matrix from depth intervals and lithology observations.
5
+
6
+ This function creates a probability matrix where each depth point is assigned
7
+ probabilities based on observed lithology classes within specified depth intervals.
8
+
9
+ Parameters
10
+ ----------
11
+ depth_top : array-like, optional
12
+ Array of top depths for each observation interval. Required if W is not provided.
13
+ depth_bottom : array-like, optional
14
+ Array of bottom depths for each observation interval. Required if W is not provided.
15
+ lithology_obs : array-like, optional
16
+ Array of observed lithology class IDs for each interval. Required if W is not provided.
17
+ z : array-like, optional
18
+ Array of depth/position values where probabilities are computed. Required if W is not provided.
19
+ class_id : array-like, optional
20
+ Array of unique class identifiers (e.g., [0, 1, 2] for 3 lithology types). Required if W is not provided.
21
+ lithology_prob : float or array-like, optional
22
+ Probability assigned to the observed class. Can be:
23
+ - float: Same probability for all intervals (default is 0.8)
24
+ - array: Array of probabilities, one per interval (must match length of lithology_obs)
25
+ P_prior : ndarray, optional
26
+ Prior probability matrix of shape (nclass, nm). If None, uses uniform distribution
27
+ for depths not covered by observations. Default is None.
28
+ W : dict, optional
29
+ Well/borehole dictionary containing observation data. If provided, overrides
30
+ the individual parameters. Expected keys:
31
+ - 'depth_top': Array of top depths
32
+ - 'depth_bottom': Array of bottom depths
33
+ - 'class_obs': Array of observed class IDs (e.g., lithology, soil type)
34
+ - 'class_prob': Probability or array of probabilities (optional, defaults to 0.8)
35
+ - 'X': X coordinate of well location (optional, not used in this function)
36
+ - 'Y': Y coordinate of well location (optional, not used in this function)
37
+ Default is None.
38
+
39
+ Returns
40
+ -------
41
+ P_obs : ndarray
42
+ Probability matrix of shape (nclass, nm) where nclass is the number of classes
43
+ and nm is the number of depth points. For each depth point covered by observations,
44
+ the observed class gets probability lithology_prob and other classes share (1-lithology_prob).
45
+ Depths not covered by any observation contain NaN or prior probabilities if provided.
46
+
47
+ Examples
48
+ --------
49
+ >>> # Traditional usage with individual parameters
50
+ >>> depth_top = [0, 10, 20]
51
+ >>> depth_bottom = [10, 20, 30]
52
+ >>> lithology_obs = [1, 2, 1] # clay, sand, clay
53
+ >>> z = np.arange(30)
54
+ >>> class_id = [0, 1, 2] # gravel, clay, sand
55
+ >>> P_obs = compute_P_obs_discrete(depth_top, depth_bottom, lithology_obs, z, class_id)
56
+ >>> print(P_obs.shape) # (3, 30)
57
+
58
+ >>> # With different probabilities per interval
59
+ >>> lithology_prob = [0.9, 0.7, 0.85] # Higher confidence in first interval
60
+ >>> P_obs = compute_P_obs_discrete(depth_top, depth_bottom, lithology_obs, z, class_id, lithology_prob=lithology_prob)
61
+
62
+ >>> # Using well dictionary (cleaner interface)
63
+ >>> W = {'depth_top': [0, 10, 20], 'depth_bottom': [10, 20, 30],
64
+ ... 'class_obs': [1, 2, 1], 'class_prob': [0.9, 0.7, 0.85],
65
+ ... 'X': 543000.0, 'Y': 6175800.0}
66
+ >>> P_obs = compute_P_obs_discrete(z=z, class_id=class_id, W=W)
67
+ """
68
+ import numpy as np
69
+
70
+ # Override parameters with W dictionary if provided
71
+ if W is not None:
72
+ if 'depth_top' in W:
73
+ depth_top = W['depth_top']
74
+ if 'depth_bottom' in W:
75
+ depth_bottom = W['depth_bottom']
76
+ if 'class_obs' in W:
77
+ lithology_obs = W['class_obs']
78
+ if 'class_prob' in W:
79
+ lithology_prob = W['class_prob']
80
+ # Note: X and Y coordinates stored for reference but not used in this function
81
+ # X_well = W.get('X', None)
82
+ # Y_well = W.get('Y', None)
83
+
84
+ # Validate required parameters
85
+ if depth_top is None or depth_bottom is None or lithology_obs is None:
86
+ raise ValueError("depth_top, depth_bottom, and lithology_obs must be provided either as arguments or in W dictionary")
87
+ if z is None or class_id is None:
88
+ raise ValueError("z and class_id are required parameters")
89
+
90
+ nm = len(z)
91
+ nclass = len(class_id)
92
+
93
+ # Convert lithology_prob to array if it's a scalar
94
+ lithology_prob_array = np.atleast_1d(lithology_prob)
95
+ if len(lithology_prob_array) == 1:
96
+ # Scalar case: broadcast to all intervals
97
+ lithology_prob_array = np.full(len(lithology_obs), lithology_prob_array[0])
98
+ elif len(lithology_prob_array) != len(lithology_obs):
99
+ raise ValueError(f"lithology_prob array length ({len(lithology_prob_array)}) must match lithology_obs length ({len(lithology_obs)})")
100
+
101
+ # Initialize with NaN or prior
102
+ if P_prior is not None:
103
+ P_obs = P_prior.copy()
104
+ else:
105
+ P_obs = np.zeros((nclass, nm)) * np.nan
106
+
107
+ # Loop through each depth point
108
+ for im in range(nm):
109
+ # Loop through each observation interval
110
+ for i in range(len(depth_top)):
111
+ # Check if current depth is within this interval
112
+ if z[im] >= depth_top[i] and z[im] < depth_bottom[i]:
113
+ # Get the probability for this specific interval
114
+ lithology_prob_i = lithology_prob_array[i]
115
+ # Compute probability for non-hit classes
116
+ P_nohit = (1 - lithology_prob_i) / (nclass - 1)
117
+
118
+ # Assign probabilities for all classes
119
+ for ic in range(nclass):
120
+ if class_id[ic] == lithology_obs[i]:
121
+ P_obs[ic, im] = lithology_prob_i
122
+ else:
123
+ P_obs[ic, im] = P_nohit
124
+
125
+ return P_obs
126
+
127
+ def _compute_mode_sequential(M_lithology, z, depth_top, depth_bottom, nl, showInfo=1):
128
+ """
129
+ Sequential computation of mode lithology for each realization and depth interval.
130
+
131
+ This is the original implementation extracted for maintainability.
132
+ """
133
+ import numpy as np
134
+ from tqdm import tqdm
135
+ import time
136
+
137
+ nreal = len(M_lithology)
138
+ lithology_mode = np.zeros((nreal, nl), dtype=int)
139
+
140
+ # Show info based on showInfo level
141
+ if showInfo == 1:
142
+ print(f'compute_P_obs_sparse: Processing {nreal} realizations, {nl} intervals')
143
+
144
+ # Start timing
145
+ t_start = time.time()
146
+
147
+ # Extract mode lithology for each realization and depth interval
148
+ iterator = np.arange(nreal)
149
+ if showInfo > 1:
150
+ iterator = tqdm(iterator, desc='compute_P_obs_sparse')
151
+
152
+ for im in iterator:
153
+ M_test = M_lithology[im]
154
+ for i in range(len(depth_top)):
155
+ z_top = depth_top[i]
156
+ z_bottom = depth_bottom[i]
157
+ id_top = np.argmin(np.abs(z - z_top))
158
+ id_bottom = np.argmin(np.abs(z - z_bottom))
159
+
160
+ if id_top == id_bottom:
161
+ lithology_layer = M_test[id_top]
162
+ lithology_mode_layer = lithology_layer
163
+ else:
164
+ lithology_layer = M_test[id_top:id_bottom]
165
+ # Find the most frequent lithology in this layer
166
+ values, counts = np.unique(lithology_layer, return_counts=True)
167
+ lithology_mode_layer = values[np.argmax(counts)]
168
+
169
+ lithology_mode[im, i] = lithology_mode_layer
170
+
171
+ # Show timing information if showInfo > 1
172
+ if showInfo > 1:
173
+ t_elapsed = time.time() - t_start
174
+ time_per_model = t_elapsed / nreal
175
+ print(f'compute_P_obs_sparse: Total runtime: {t_elapsed:.2f} seconds')
176
+ print(f'compute_P_obs_sparse: Time per model: {time_per_model*1000:.2f} ms')
177
+
178
+ return lithology_mode
179
+
180
+ def _compute_mode_worker(args):
181
+ """
182
+ Worker function for parallel processing of lithology mode computation.
183
+
184
+ Processes a chunk of realizations assigned to this worker.
185
+ """
186
+ import numpy as np
187
+ import integrate as ig
188
+
189
+ # Unpack arguments
190
+ chunk_indices, shared_memory_refs, z, depth_top, depth_bottom, nl = args
191
+
192
+ # Reconstruct shared array
193
+ [M_lithology], worker_shm_objects = ig.reconstruct_shared_arrays(shared_memory_refs)
194
+
195
+ try:
196
+ # Initialize output for this chunk
197
+ lithology_mode_chunk = np.zeros((len(chunk_indices), nl), dtype=int)
198
+
199
+ # Process assigned realizations
200
+ for local_idx, im in enumerate(chunk_indices):
201
+ M_test = M_lithology[im]
202
+ for i in range(nl):
203
+ z_top = depth_top[i]
204
+ z_bottom = depth_bottom[i]
205
+ id_top = np.argmin(np.abs(z - z_top))
206
+ id_bottom = np.argmin(np.abs(z - z_bottom))
207
+
208
+ if id_top == id_bottom:
209
+ lithology_mode_chunk[local_idx, i] = M_test[id_top]
210
+ else:
211
+ lithology_layer = M_test[id_top:id_bottom]
212
+ values, counts = np.unique(lithology_layer, return_counts=True)
213
+ lithology_mode_chunk[local_idx, i] = values[np.argmax(counts)]
214
+
215
+ return lithology_mode_chunk
216
+
217
+ finally:
218
+ # Close shared memory in worker
219
+ for shm in worker_shm_objects:
220
+ shm.close()
221
+
222
+ def _compute_mode_parallel(M_lithology, z, depth_top, depth_bottom, nl, Ncpu, showInfo=1):
223
+ """
224
+ Parallel computation of mode lithology using multiprocessing.
225
+
226
+ Splits realizations across worker processes and uses shared memory for efficiency.
227
+ """
228
+ import numpy as np
229
+ import multiprocessing
230
+ from multiprocessing import Pool
231
+ import integrate as ig
232
+ import time
233
+ from tqdm import tqdm
234
+
235
+ # Setup
236
+ if Ncpu < 1:
237
+ Ncpu = multiprocessing.cpu_count()
238
+
239
+ nreal = len(M_lithology)
240
+
241
+ # Show info based on showInfo level
242
+ if showInfo == 1:
243
+ print(f'compute_P_obs_sparse: Processing {nreal} realizations, {nl} intervals (parallel, {Ncpu} CPUs)')
244
+ elif showInfo > 1:
245
+ print(f'compute_P_obs_sparse: Processing {nreal} realizations, {nl} intervals')
246
+ print(f'compute_P_obs_sparse: Using {Ncpu} CPU cores in parallel mode')
247
+
248
+ # Start timing
249
+ t_start = time.time()
250
+
251
+ # Create shared memory for M_lithology
252
+ shared_memory_refs, shm_objects = ig.create_shared_memory([M_lithology])
253
+
254
+ try:
255
+ # Split realizations into many small chunks for better progress tracking
256
+ # More chunks = more frequent progress updates and better load balancing
257
+ min_chunk_size = 100 # Minimum realizations per chunk
258
+ max_chunks = 500 # Cap to avoid excessive overhead
259
+ n_chunks = min(max(nreal // min_chunk_size, Ncpu), max_chunks)
260
+
261
+ if showInfo > 1:
262
+ print(f'compute_P_obs_sparse: Splitting into {n_chunks} chunks for {Ncpu} workers')
263
+
264
+ realization_chunks = np.array_split(np.arange(nreal), n_chunks)
265
+
266
+ # Create worker arguments (include small arrays directly)
267
+ worker_args = [
268
+ (chunk_indices, shared_memory_refs, z, depth_top, depth_bottom, nl)
269
+ for chunk_indices in realization_chunks
270
+ ]
271
+
272
+ # Execute in parallel
273
+ with Pool(processes=Ncpu) as p:
274
+ if showInfo > 1:
275
+ # Use imap to get results as they complete and show progress
276
+ results = list(tqdm(
277
+ p.imap(_compute_mode_worker, worker_args),
278
+ total=len(worker_args),
279
+ desc='compute_P_obs_sparse (parallel chunks)',
280
+ unit='chunk'
281
+ ))
282
+ else:
283
+ # Use regular map without progress tracking
284
+ results = p.map(_compute_mode_worker, worker_args)
285
+
286
+ # Concatenate results
287
+ lithology_mode = np.concatenate(results, axis=0)
288
+
289
+ # Show timing information if showInfo > 1
290
+ if showInfo > 1:
291
+ t_elapsed = time.time() - t_start
292
+ time_per_model = t_elapsed / nreal
293
+ print(f'compute_P_obs_sparse: Total runtime: {t_elapsed:.2f} seconds')
294
+ print(f'compute_P_obs_sparse: Time per model: {time_per_model*1000:.2f} ms')
295
+ print(f'compute_P_obs_sparse: Speedup with {Ncpu} cores vs sequential: ~{Ncpu*0.7:.1f}x (estimated)')
296
+
297
+ return lithology_mode
298
+
299
+ finally:
300
+ # Cleanup shared memory
301
+ ig.cleanup_shared_memory(shm_objects)
302
+
303
+ def welllog_compute_P_obs_class_mode(M_lithology=None, depth_top=None, depth_bottom=None, lithology_obs=None, z=None, class_id=None, lithology_prob=0.8, W=None, parallel=False, Ncpu=-1, showInfo=1):
304
+ """
305
+ Compute observation probability matrix from well log class observations by extracting mode class from prior models.
306
+
307
+ This function processes discrete class models (e.g., lithology) from a prior ensemble to create
308
+ well log observations. For each depth interval, it finds the most frequent (mode) class within
309
+ that interval from each prior model, then creates a probability matrix based on how well these
310
+ modes match the observed classes.
311
+
312
+ **Simplified Usage**: When called with only `W=W`, the function returns just the probability
313
+ matrix `P_obs` without computing class mode, and class_mode is returned as None.
314
+
315
+ Parameters
316
+ ----------
317
+ M_lithology : ndarray, optional
318
+ Array of lithology models from prior ensemble, shape (nreal, nz) where nreal is the
319
+ number of realizations and nz is the number of depth points. If None, only P_obs
320
+ is computed and lithology_mode is returned as None. Default is None.
321
+ depth_top : array-like, optional
322
+ Array of top depths for each observation interval. Required if W is not provided.
323
+ depth_bottom : array-like, optional
324
+ Array of bottom depths for each observation interval. Required if W is not provided.
325
+ lithology_obs : array-like, optional
326
+ Array of observed lithology class IDs for each interval. Required if W is not provided.
327
+ z : array-like, optional
328
+ Array of depth/position values corresponding to M_lithology depth discretization.
329
+ Required if M_lithology is provided and W does not contain depth information.
330
+ class_id : array-like, optional
331
+ Array of unique class identifiers (e.g., [0, 1, 2] for 3 lithology types).
332
+ Required if W is not provided.
333
+ lithology_prob : float or array-like, optional
334
+ Probability assigned to the observed class. Can be:
335
+ - float: Same probability for all intervals (default is 0.8)
336
+ - array: Array of probabilities, one per interval (must match length of lithology_obs)
337
+ W : dict, optional
338
+ Well/borehole dictionary containing observation data. If provided, overrides
339
+ the individual parameters. Expected keys:
340
+ - 'depth_top': Array of top depths
341
+ - 'depth_bottom': Array of bottom depths
342
+ - 'class_obs': Array of observed class IDs (e.g., lithology, soil type)
343
+ - 'class_prob': Probability or array of probabilities (optional, defaults to 0.8)
344
+ - 'X': X coordinate of well location (optional, not used in this function)
345
+ - 'Y': Y coordinate of well location (optional, not used in this function)
346
+ Default is None.
347
+ parallel : bool, optional
348
+ Enable parallel processing for large ensembles. Default is False.
349
+ When True and parallel processing is available, distributes realization
350
+ processing across multiple CPU cores for significant speedup.
351
+ Recommended for N > 10,000 realizations. Only used when M_lithology is provided.
352
+ Ncpu : int, optional
353
+ Number of CPU cores to use for parallel processing. Default is -1 (auto-detect).
354
+ Only used when parallel=True and M_lithology is provided.
355
+ showInfo : int, optional
356
+ Control information output level. Default is 1.
357
+ - 0: No information printed
358
+ - 1: Single line info (number of realizations, intervals)
359
+ - >1: Progress bar with tqdm, runtime statistics, and time per model
360
+ Only applies when M_lithology is provided.
361
+
362
+ Returns
363
+ -------
364
+ P_obs : ndarray
365
+ Probability matrix of shape (nclass, n_obs) where nclass is the number of classes
366
+ and n_obs is the number of observation intervals. Each column represents the
367
+ probability distribution for one depth interval.
368
+ class_mode : ndarray or None
369
+ If M_lithology is provided: Array of mode class values extracted from prior models,
370
+ shape (nreal, n_obs). For each realization and observation interval, contains the most
371
+ frequent class ID within that depth range.
372
+ If M_lithology is None: Returns None.
373
+
374
+ Examples
375
+ --------
376
+ >>> # Full usage: Load prior lithology models and compute mode
377
+ >>> M_lithology = f_prior['M2'][:] # Shape: (100000, 50)
378
+ >>> z = np.linspace(0, 100, 50)
379
+ >>> class_id = [0, 1, 2] # sand, clay, gravel
380
+ >>>
381
+ >>> # Define observations
382
+ >>> depth_top = [0, 20, 40]
383
+ >>> depth_bottom = [20, 40, 60]
384
+ >>> lithology_obs = [1, 0, 1] # clay, sand, clay
385
+ >>>
386
+ >>> # Compute well log observations with class mode
387
+ >>> P_obs, class_mode = welllog_compute_P_obs_class_mode(M_lithology, depth_top, depth_bottom,
388
+ ... lithology_obs, z, class_id)
389
+ >>> print(P_obs.shape) # (3, 3) - 3 classes, 3 observations
390
+ >>> print(class_mode.shape) # (100000, 3) - mode for each realization and interval
391
+
392
+ >>> # Simplified usage: Only compute P_obs using well dictionary
393
+ >>> W = {'depth_top': [0, 20, 40], 'depth_bottom': [20, 40, 60],
394
+ ... 'class_obs': [1, 0, 1], 'class_prob': [0.9, 0.8, 0.85],
395
+ ... 'X': 543000.0, 'Y': 6175800.0}
396
+ >>> P_obs, class_mode = welllog_compute_P_obs_class_mode(W=W, class_id=class_id)
397
+ >>> print(P_obs.shape) # (3, 3) - 3 classes, 3 observations
398
+ >>> print(class_mode) # None (no prior models provided)
399
+ >>>
400
+ >>> # Using parallel processing for large ensembles
401
+ >>> P_obs, class_mode = welllog_compute_P_obs_class_mode(M_lithology, depth_top, depth_bottom,
402
+ ... lithology_obs, z, class_id,
403
+ ... parallel=True, Ncpu=8)
404
+ >>>
405
+ >>> # Control output verbosity
406
+ >>> P_obs, class_mode = welllog_compute_P_obs_class_mode(M_lithology, ..., showInfo=0) # Silent
407
+ >>> P_obs, class_mode = welllog_compute_P_obs_class_mode(M_lithology, ..., showInfo=1) # Single line info
408
+ >>> P_obs, class_mode = welllog_compute_P_obs_class_mode(M_lithology, ..., showInfo=2) # Progress bar + timing
409
+
410
+ Notes
411
+ -----
412
+ The function extracts class mode for each depth interval by:
413
+ 1. Finding depth indices corresponding to interval boundaries
414
+ 2. Extracting class values within the interval
415
+ 3. Computing the most frequent (mode) class
416
+ 4. Assigning probabilities based on match with observed class
417
+
418
+ This approach is suitable for well log observations where each interval represents
419
+ the dominant class within that depth range, rather than the full depth profile.
420
+
421
+ **Simplified Mode** (M_lithology=None):
422
+ When M_lithology is not provided, the function only computes the P_obs probability matrix
423
+ from the observed class data. This is useful when you only need the probability
424
+ representation of observations without extracting mode class from prior models.
425
+
426
+ Parallel processing uses shared memory for M_lithology array to minimize memory overhead.
427
+ Expected speedup: 4-8x on 8-core machines for large ensembles (N > 100,000 realizations).
428
+ The parallel implementation distributes realizations across worker processes while
429
+ maintaining identical results to the sequential version.
430
+
431
+ Both sequential and parallel modes support timing information via showInfo parameter:
432
+ - showInfo=0: Silent mode
433
+ - showInfo=1: Single line summary (default)
434
+ - showInfo>1: Detailed timing (progress bar for sequential, runtime stats for both)
435
+ """
436
+ import numpy as np
437
+
438
+ # Override parameters with W dictionary if provided
439
+ if W is not None:
440
+ if 'depth_top' in W:
441
+ depth_top = W['depth_top']
442
+ if 'depth_bottom' in W:
443
+ depth_bottom = W['depth_bottom']
444
+ if 'class_obs' in W:
445
+ lithology_obs = W['class_obs']
446
+ if 'class_prob' in W:
447
+ lithology_prob = W['class_prob']
448
+ # Note: X and Y coordinates stored for reference but not used in this function
449
+ # X_well = W.get('X', None)
450
+ # Y_well = W.get('Y', None)
451
+
452
+ # Validate required parameters
453
+ if depth_top is None or depth_bottom is None or lithology_obs is None:
454
+ raise ValueError("depth_top, depth_bottom, and lithology_obs must be provided either as arguments or in W dictionary")
455
+ if class_id is None:
456
+ raise ValueError("class_id is required parameter")
457
+
458
+ # Validate z is provided when M_lithology is provided
459
+ if M_lithology is not None and z is None:
460
+ raise ValueError("z is required when M_lithology is provided")
461
+
462
+ # Get dimensions
463
+ nclass = len(class_id)
464
+ nl = len(lithology_obs)
465
+ n_obs = nl
466
+
467
+ # Convert lithology_prob to array if it's a scalar
468
+ lithology_prob_array = np.atleast_1d(lithology_prob)
469
+ if len(lithology_prob_array) == 1:
470
+ # Scalar case: broadcast to all intervals
471
+ lithology_prob_array = np.full(n_obs, lithology_prob_array[0])
472
+ elif len(lithology_prob_array) != n_obs:
473
+ raise ValueError(f"lithology_prob array length ({len(lithology_prob_array)}) must match lithology_obs length ({n_obs})")
474
+
475
+ # Compute mode lithology only if M_lithology is provided
476
+ lithology_mode = None
477
+ if M_lithology is not None:
478
+ nreal = len(M_lithology)
479
+ # Compute mode lithology using sequential or parallel method
480
+ import integrate as ig
481
+ if parallel and ig.use_parallel():
482
+ # Parallel execution path
483
+ lithology_mode = _compute_mode_parallel(M_lithology, z, depth_top, depth_bottom, nl, Ncpu, showInfo)
484
+ else:
485
+ # Sequential execution path (original implementation)
486
+ lithology_mode = _compute_mode_sequential(M_lithology, z, depth_top, depth_bottom, nl, showInfo)
487
+
488
+ # Convert observed lithologies to P_obs probabilities
489
+ P_obs = np.zeros((nclass, n_obs)) * np.nan
490
+ for i in range(n_obs):
491
+ # Get the probability for this specific interval
492
+ lithology_prob_i = lithology_prob_array[i]
493
+
494
+ for j in range(nclass):
495
+ if class_id[j] == lithology_obs[i]:
496
+ P_obs[j, i] = lithology_prob_i
497
+ else:
498
+ P_obs[j, i] = (1 - lithology_prob_i) / (nclass - 1)
499
+
500
+ return P_obs, lithology_mode
501
+
502
+ def rescale_P_obs_temperature(P_obs, T=1.0):
503
+ """
504
+ Rescale discrete observation probabilities by temperature and renormalize.
505
+
506
+ This function applies temperature annealing to probability distributions by raising
507
+ each probability to the power (1/T), then renormalizing each column (depth point)
508
+ so that probabilities sum to 1. Higher temperatures (T > 1) flatten the distribution,
509
+ while lower temperatures (T < 1) sharpen it.
510
+
511
+ Parameters
512
+ ----------
513
+ P_obs : ndarray
514
+ Probability matrix of shape (nclass, nm) where nclass is the number of classes
515
+ and nm is the number of model parameters (e.g., depth points).
516
+ Each column should represent a probability distribution over classes.
517
+ T : float, optional
518
+ Temperature parameter for annealing. Default is 1.0 (no scaling).
519
+ - T = 1.0: No change (original probabilities)
520
+ - T > 1.0: Flattens distribution (less certain)
521
+ - T < 1.0: Sharpens distribution (more certain)
522
+ - T → ∞: Approaches uniform distribution
523
+ - T → 0: Approaches one-hot distribution
524
+
525
+ Returns
526
+ -------
527
+ P_obs_scaled : ndarray
528
+ Temperature-scaled and renormalized probability matrix of shape (nclass, nm).
529
+ Each column sums to 1.0. NaN values in input are preserved in output.
530
+
531
+ Examples
532
+ --------
533
+ >>> P_obs = np.array([[0.8, 0.6, 0.5],
534
+ ... [0.1, 0.2, 0.3],
535
+ ... [0.1, 0.2, 0.2]])
536
+ >>> P_scaled = rescale_P_obs_temperature(P_obs, T=2.0)
537
+ >>> print(P_scaled) # More uniform distribution
538
+ >>> P_scaled = rescale_P_obs_temperature(P_obs, T=0.5)
539
+ >>> print(P_scaled) # Sharper distribution
540
+
541
+ Notes
542
+ -----
543
+ The temperature scaling follows the Boltzmann distribution:
544
+ P_new(c) ∝ P_old(c)^(1/T)
545
+
546
+ After scaling, each column (depth point) is renormalized:
547
+ P_new(c) = P_new(c) / sum_c(P_new(c))
548
+
549
+ This is commonly used in simulated annealing and rejection sampling to control
550
+ the strength of discrete observations during Bayesian inference.
551
+ """
552
+ import numpy as np
553
+
554
+ # Copy to avoid modifying the original
555
+ P_obs_scaled = P_obs.copy()
556
+
557
+ # Get shape
558
+ nclass, nm = P_obs.shape
559
+
560
+ # Apply temperature scaling: p^(1/T)
561
+ # Handle special case where T=1 (no scaling needed)
562
+ if T != 1.0:
563
+ P_obs_scaled = np.power(P_obs_scaled, 1.0 / T)
564
+
565
+ # Renormalize each column (each depth point) to sum to 1
566
+ for im in range(nm):
567
+ col_sum = np.nansum(P_obs_scaled[:, im])
568
+
569
+ # Only renormalize if the sum is non-zero and not NaN
570
+ if col_sum > 0 and not np.isnan(col_sum):
571
+ P_obs_scaled[:, im] = P_obs_scaled[:, im] / col_sum
572
+
573
+ return P_obs_scaled
574
+
575
+ def Pobs_to_datagrid(P_obs, X, Y, f_data_h5, r_data=10, r_dis=100, doPlot=False,
576
+ nan_freq=0.8, r_data_i_use=None):
577
+ """
578
+ Convert point-based discrete probability observations to gridded data with distance-based weighting.
579
+
580
+ This function distributes discrete probability observations (e.g., from a borehole) across
581
+ a spatial grid using distance-based weighting. Observations at location (X, Y) are applied
582
+ to nearby grid points with decreasing influence based on distance. Temperature annealing
583
+ is used to reduce the strength of observations far from the source point.
584
+
585
+ Parameters
586
+ ----------
587
+ P_obs : ndarray
588
+ Probability matrix of shape (nclass, nm) where nclass is the number of classes
589
+ and nm is the number of model parameters (e.g., depth points).
590
+ Each column represents a probability distribution over discrete classes.
591
+ X : float
592
+ X coordinate (e.g., UTM Easting) of the observation point.
593
+ Y : float
594
+ Y coordinate (e.g., UTM Northing) of the observation point.
595
+ f_data_h5 : str
596
+ Path to HDF5 data file containing survey geometry (X, Y coordinates).
597
+ r_data : float, optional
598
+ Inner radius in meters within which observations have full strength.
599
+ Default is 10 meters.
600
+ r_dis : float, optional
601
+ Outer radius in meters for distance-based weighting. Beyond this distance,
602
+ observations are fully attenuated (temperature → ∞). Default is 100 meters.
603
+ doPlot : bool, optional
604
+ If True, creates diagnostic plots showing weight distributions.
605
+ Default is False.
606
+ nan_freq : float, optional
607
+ NaN-frequency threshold for automatic data-gate selection inside
608
+ :func:`get_weight_from_position`. Gates where the fraction of
609
+ non-NaN values is below this threshold are excluded. Default 0.8.
610
+ Ignored when ``r_data_i_use`` is provided.
611
+ r_data_i_use : array-like of int or None, optional
612
+ Explicit gate/channel indices to use for data-distance computation
613
+ inside :func:`get_weight_from_position`. Overrides ``nan_freq`` when
614
+ provided. Default None.
615
+
616
+ Returns
617
+ -------
618
+ d_obs : ndarray
619
+ Gridded observation data of shape (nd, nclass, nm) where nd is the number
620
+ of spatial locations in the survey. Each location gets temperature-scaled
621
+ probabilities based on distance from (X, Y).
622
+ i_use : ndarray
623
+ Binary mask of shape (nd, 1) indicating which grid points should be used
624
+ (1) or ignored (0) in the inversion. Points with temperature < 100 are used.
625
+ T_all : ndarray
626
+ Temperature values of shape (nd, 1) applied to each grid point based on distance
627
+ from the observation point.
628
+
629
+ Notes
630
+ -----
631
+ The function uses distance-based temperature annealing:
632
+ 1. Computes distance-based weights using `get_weight_from_position()`
633
+ 2. Converts distance weight to temperature: T = 1 / w_dis
634
+ 3. Caps maximum temperature at 100 (very weak influence)
635
+ 4. For each grid point:
636
+
637
+ - If T < 100: include point (i_use=1) and apply temperature scaling
638
+ - If T ≥ 100: exclude point (i_use=0) and set observations to NaN
639
+
640
+ Temperature scaling reduces probability certainty with distance:
641
+
642
+ - T = 1 (close to observation): Original probabilities preserved
643
+ - T > 1 (far from observation): Probabilities become more uniform
644
+ - T ≥ 100 (very far): Observations effectively ignored
645
+
646
+ Examples
647
+ --------
648
+ >>> # Borehole observation at specific location
649
+ >>> P_obs = compute_P_obs_discrete(depth_top, depth_bottom, lithology, z, class_id)
650
+ >>> X_well, Y_well = 543000.0, 6175800.0
651
+ >>> d_obs, i_use, T_all = Pobs_to_datagrid(P_obs, X_well, Y_well, 'survey_data.h5',
652
+ ... r_data=10, r_dis=100)
653
+ >>> # Write to data file
654
+ >>> ig.save_data_multinomial(d_obs, i_use=i_use, id=2, f_data_h5='survey_data.h5')
655
+
656
+ See Also
657
+ --------
658
+ rescale_P_obs_temperature : Temperature scaling function
659
+ compute_P_obs_discrete : Create P_obs from depth intervals
660
+ get_weight_from_position : Distance-based weighting function
661
+ """
662
+ import numpy as np
663
+ import integrate as ig
664
+
665
+ # Get grid dimensions from data file
666
+ X_grid, Y_grid, _, _ = ig.get_geometry(f_data_h5)
667
+ nd = len(X_grid)
668
+ nclass, nm = P_obs.shape
669
+
670
+ # Initialize output arrays
671
+ i_use = np.zeros((nd, 1))
672
+ d_obs = np.zeros((nd, nclass, nm)) * np.nan
673
+
674
+ # Compute distance-based weights for all grid points
675
+ w_combined, w_dis, w_data, i_use_from_func = ig.get_weight_from_position(
676
+ f_data_h5, X, Y, r_data=r_data, r_dis=r_dis, doPlot=doPlot,
677
+ nan_freq=nan_freq, r_data_i_use=r_data_i_use
678
+ )
679
+
680
+ # Convert distance weight to temperature
681
+ # w_dis is 1 at observation point, decreases with distance
682
+ # T = 1/w_dis means T increases with distance (weaker influence)
683
+ #T_all = 1 / w_dis
684
+ #T_all = 1 / w_data
685
+ T_all = 1 / w_combined
686
+
687
+ # Cap maximum temperature at 100 (beyond this, observation has negligible effect)
688
+ T_all[T_all > 100] = 100
689
+
690
+ # Apply temperature scaling to each grid point
691
+ for ip in np.arange(nd):
692
+ T = T_all[ip]
693
+
694
+ # Only use points where temperature is reasonable (< 100)
695
+ if T < 100:
696
+ i_use[ip] = 1
697
+ # Scale probabilities based on distance (higher T = more uniform distribution)
698
+ P_obs_local = rescale_P_obs_temperature(P_obs, T=T)
699
+ d_obs[ip, :, :] = P_obs_local
700
+ # else: i_use[ip] = 0 and d_obs[ip] stays NaN
701
+
702
+ return d_obs, i_use, T_all
703
+
704
+
705
+
706
+ def get_weight_from_position(f_data_h5, x_well=0, y_well=0, i_ref=-1, r_dis=400, r_data=2,
707
+ useLog=True, doPlot=False, plFile=None, showInfo=0,
708
+ nan_freq=0.8, r_data_i_use=None):
709
+ """Calculate weights based on distance and data similarity to a reference point.
710
+
711
+ This function computes three sets of weights:
712
+ 1. Combined weights based on both spatial distance and data similarity
713
+ 2. Distance-based weights
714
+ 3. Data similarity weights
715
+
716
+ Parameters
717
+ ----------
718
+ f_data_h5 : str
719
+ Path to HDF5 file containing geometry and observed data.
720
+ x_well : float, optional
721
+ X coordinate of reference point (well). Default 0.
722
+ y_well : float, optional
723
+ Y coordinate of reference point (well). Default 0.
724
+ i_ref : int, optional
725
+ Index of reference point. Default -1 (auto-calculated as closest to x_well, y_well).
726
+ r_dis : float, optional
727
+ Geographic XY distance range [m] for spatial weighting. Default 400.
728
+ r_data : float, optional
729
+ Data-space similarity range parameter for data weighting. Default 2.
730
+ useLog : bool, optional
731
+ Apply log10 transform to data before computing similarity. Default True.
732
+ doPlot : bool, optional
733
+ Create diagnostic weight plots. Default False.
734
+ plFile : str or None, optional
735
+ Output filename for the diagnostic plot. Auto-generated if None.
736
+ showInfo : int, optional
737
+ Verbosity level. Default 0.
738
+ nan_freq : float, optional
739
+ NaN-frequency threshold for automatic gate selection. Gates where the
740
+ fraction of non-NaN values across all soundings is below this threshold
741
+ are excluded from the data-distance computation. Default 0.8.
742
+ Ignored when ``r_data_i_use`` is provided.
743
+ r_data_i_use : array-like of int or None, optional
744
+ Explicit gate/channel indices to use for the data-distance computation.
745
+ When provided, overrides the ``nan_freq`` automatic selection.
746
+ A NaN check at the reference sounding is still applied.
747
+ Default None (use ``nan_freq`` threshold).
748
+
749
+ Returns
750
+ -------
751
+ w_combined : ndarray (N_data,)
752
+ Combined weights from spatial distance and data similarity.
753
+ w_dis : ndarray (N_data,)
754
+ Geographic distance-based weights.
755
+ w_data : ndarray (N_data,)
756
+ Data similarity-based weights.
757
+ i_ref : int
758
+ Index of the reference sounding used.
759
+
760
+ Notes
761
+ -----
762
+ Weights are calculated using Gaussian functions:
763
+ - Distance weights: exp(-dis² / r_dis²)
764
+ - Data weights: exp(-sum_dd² / r_data²)
765
+ where dis is geographic distance and sum_dd is cumulative data difference.
766
+ """
767
+ import integrate as ig
768
+ import numpy as np
769
+ import matplotlib.pyplot as plt
770
+ X, Y, LINE, ELEVATION = ig.get_geometry(f_data_h5)
771
+ DATA = ig.load_data(f_data_h5, showInfo=showInfo)
772
+ id=0
773
+ d_obs = DATA['d_obs'][id]
774
+ d_std = DATA['d_std'][id]
775
+ # index if position in X and Y with smallets distance to well
776
+ if i_ref == -1:
777
+ i_ref = np.argmin((X-x_well)**2 + (Y-y_well)**2)
778
+
779
+ # Select gates to use for data-distance computation
780
+ if r_data_i_use is not None:
781
+ gates = np.asarray(r_data_i_use, dtype=int)
782
+ else:
783
+ n_not_nan = np.sum(~np.isnan(d_obs), axis=0)
784
+ n_not_nan_freq = n_not_nan / d_obs.shape[0]
785
+ gates = np.where(n_not_nan_freq > nan_freq)[0]
786
+ # Remove gates that are NaN at the reference sounding
787
+ gates = gates[~np.isnan(d_obs[i_ref, gates])]
788
+ i_use = gates
789
+ # select gates to use, manually
790
+ if useLog:
791
+ d_ref = np.log10(d_obs[i_ref,i_use])
792
+ d_test = np.log10(d_obs[:,i_use])
793
+ else:
794
+ d_ref = d_obs[i_ref,i_use]
795
+ d_test =d_obs[:,i_use]
796
+ dd = np.abs(d_test - d_ref)
797
+ sum_dd = np.sum(dd, axis=1)
798
+ w_data = np.exp(-1*sum_dd**2/r_data**2)
799
+
800
+
801
+ # Compute the distance from each data point to the actual borehole location
802
+ dis = np.sqrt((X-x_well)**2 + (Y-y_well)**2)
803
+ w_dis = np.exp(-1*dis**2/r_dis**2)
804
+
805
+ w_combined = w_data * w_dis
806
+
807
+ cmap = 'hot_r'
808
+ #cmap = 'jet'
809
+
810
+ if doPlot:
811
+ plt.figure(figsize=(15,5))
812
+ for i in range(3):
813
+ plt.subplot(1,3,i+1)
814
+ plt.plot(X,Y,'.', markersize=1.02, color='lightgray')
815
+ #plt.scatter(X[i_use], Y[i_use], c=w[i_use], cmap='jet', s=1, zorder=3, vmin=0, vmax=1, marker='.')
816
+
817
+ if i==0:
818
+ i_use = np.where(w_combined>0.001)[0]
819
+ plt.scatter(X[i_use],Y[i_use],c=w_combined[i_use], s=1, cmap=cmap, vmin=0, vmax=1, marker='.', zorder=3)
820
+ plt.title('Combined weights')
821
+ elif i==1:
822
+ i_use = np.where(w_dis>0.001)[0]
823
+ plt.scatter(X[i_use],Y[i_use],c=w_dis[i_use], s=1, cmap=cmap, vmin=0, vmax=1, marker='.', zorder=3)
824
+ plt.title('XY distance weights')
825
+ elif i==2:
826
+ i_use = np.where(w_data>0.001)[0]
827
+ plt.scatter(X[i_use],Y[i_use],c=w_data[i_use], s=0.2, cmap=cmap, vmin=0, vmax=1, marker='.', zorder=3)
828
+ plt.title('Data distance weights')
829
+ plt.axis('equal')
830
+ plt.colorbar()
831
+ plt.grid()
832
+ plt.plot(x_well,y_well,'wo', zorder=6, markersize=2)
833
+ plt.plot(x_well,y_well,'ko', zorder=5, markersize=4)
834
+ plt.plot(x_well,y_well,'wo', zorder=4, markersize=6)
835
+
836
+
837
+ plt.suptitle('Weights')
838
+ plt.xlabel('X')
839
+ plt.ylabel('Y')
840
+ if plFile is None:
841
+ plFile = 'weights_%d_%d_%d_rdis%d_rdata%d.png' % (x_well,y_well,i_ref,r_dis,r_data)
842
+ plt.savefig(plFile, dpi=300)
843
+
844
+ return w_combined, w_dis, w_data, i_ref
845
+
846
+
847
+ def prior_data_borehole_class_mode(f_prior_h5, im_prior, BH, parallel=False, **kwargs):
848
+ """
849
+ Compute mode-class prior data for a borehole and save to prior HDF5 file.
850
+
851
+ Reads M{im_prior} from f_prior_h5, extracts the most frequent class in
852
+ each observed depth interval for every prior realization (via
853
+ welllog_compute_P_obs_class_mode), then stores the result as a new dataset
854
+ in f_prior_h5.
855
+
856
+ Parameters
857
+ ----------
858
+ f_prior_h5 : str
859
+ Path to the prior HDF5 file.
860
+ im_prior : int
861
+ Index of the discrete model parameter (e.g. 2 for /M2 lithology).
862
+ BH : dict
863
+ Borehole dictionary with keys depth_top, depth_bottom, class_obs,
864
+ class_prob, X, Y, name, method.
865
+ parallel : bool, optional
866
+ Enable parallel mode computation. Default is False.
867
+ **kwargs
868
+ showInfo : int, optional
869
+ Verbosity level. Default is 1.
870
+
871
+ Returns
872
+ -------
873
+ P_obs : ndarray
874
+ Probability matrix, shape (nclass, n_intervals).
875
+ id_prior : int
876
+ Dataset index of the new /D{id_prior} entry added to f_prior_h5.
877
+ """
878
+ import h5py
879
+ import integrate as ig
880
+
881
+ showInfo = kwargs.get('showInfo', 1)
882
+
883
+ with h5py.File(f_prior_h5, 'r') as f:
884
+ z = f['M%d' % im_prior].attrs['x']
885
+ class_id = f['M%d' % im_prior].attrs['class_id']
886
+ M_lithology = f['M%d' % im_prior][:]
887
+
888
+ P_obs, class_mode = welllog_compute_P_obs_class_mode(
889
+ M_lithology, z=z, class_id=class_id, W=BH,
890
+ parallel=parallel, showInfo=showInfo)
891
+
892
+ id_prior = ig.save_prior_data(f_prior_h5, class_mode, showInfo=showInfo)
893
+
894
+ return P_obs, id_prior
895
+
896
+
897
+ def prior_data_borehole_class_layer(f_prior_h5, im_prior, BH, **kwargs):
898
+ """
899
+ Compute layer-probability prior data for a borehole using identity mapping.
900
+
901
+ Uses compute_P_obs_discrete (no prior ensemble needed) and stores an
902
+ identity prior data reference in f_prior_h5 via prior_data_identity.
903
+
904
+ Parameters
905
+ ----------
906
+ f_prior_h5 : str
907
+ Path to the prior HDF5 file.
908
+ im_prior : int
909
+ Index of the discrete model parameter (e.g. 2 for /M2 lithology).
910
+ BH : dict
911
+ Borehole dictionary with keys depth_top, depth_bottom, class_obs,
912
+ class_prob, X, Y, name, method.
913
+ **kwargs
914
+ showInfo : int, optional
915
+ Verbosity level. Default is 1.
916
+
917
+ Returns
918
+ -------
919
+ P_obs : ndarray
920
+ Probability matrix, shape (nclass, n_intervals).
921
+ id_prior : int
922
+ Dataset index of the identity /D{id_prior} entry in f_prior_h5.
923
+ """
924
+ import h5py
925
+ import integrate as ig
926
+
927
+ showInfo = kwargs.get('showInfo', 1)
928
+
929
+ if BH is None:
930
+ print('prior_data_borehole_class_layer: BH is None, returning None')
931
+ return None, None
932
+
933
+ with h5py.File(f_prior_h5, 'r') as f:
934
+ z = f['M%d' % im_prior].attrs['x']
935
+ class_id = f['M%d' % im_prior].attrs['class_id']
936
+
937
+ P_obs = compute_P_obs_discrete(z=z, class_id=class_id, W=BH)
938
+
939
+ f_prior_h5, id_prior = ig.prior_data_identity(
940
+ f_prior_h5, im=im_prior, doMakePriorCopy=False, showInfo=showInfo)
941
+
942
+ return P_obs, id_prior
943
+
944
+
945
+ def prior_data_borehole(f_prior_h5, im_prior, BH, parallel=False, **kwargs):
946
+ """
947
+ Compute prior data for a single borehole and save to the prior HDF5 file.
948
+
949
+ Dispatches to the appropriate implementation based on BH['method']:
950
+
951
+ - ``'mode_probability'`` → :func:`prior_data_borehole_class_mode`
952
+ (recommended — fast and robust)
953
+ - ``'layer_probability'`` → :func:`prior_data_borehole_class_layer`
954
+ - ``'class_exact'`` / ``'layer_probability_independent'`` → NotImplementedError
955
+
956
+ Parameters
957
+ ----------
958
+ f_prior_h5 : str
959
+ Path to the prior HDF5 file.
960
+ im_prior : int
961
+ Index of the discrete model parameter (e.g. 2 for /M2 lithology).
962
+ BH : dict
963
+ Borehole dictionary. Key ``'method'`` selects the integration approach
964
+ and defaults to ``'mode_probability'`` if absent.
965
+ parallel : bool, optional
966
+ Enable parallel mode computation for ``'mode_probability'``. Default False.
967
+ **kwargs
968
+ showInfo : int, optional
969
+ Verbosity level. Default is 1.
970
+
971
+ Returns
972
+ -------
973
+ P_obs : ndarray or None
974
+ Probability matrix, shape (nclass, n_intervals).
975
+ id_prior : int or None
976
+ Dataset index of the new /D entry added to f_prior_h5.
977
+
978
+ Examples
979
+ --------
980
+ >>> P_obs, id_prior = ig.prior_data_borehole(f_prior_h5, im_prior=2, BH=BH, parallel=True)
981
+ >>> # Then extrapolate to survey grid:
982
+ >>> d_obs, i_use, T_use = ig.Pobs_to_datagrid(P_obs, BH['X'], BH['Y'], f_data_h5)
983
+ >>> id_out, _ = ig.save_data_multinomial(d_obs, i_use=i_use, id_prior=id_prior, f_data_h5=f_data_h5)
984
+ """
985
+ showInfo = kwargs.get('showInfo', 1)
986
+
987
+ if BH is None:
988
+ return None, None
989
+
990
+ method = BH.get('method', 'mode_probability')
991
+
992
+ if showInfo > 0:
993
+ print('prior_data_borehole: borehole=%s method=%s' % (BH.get('name', '?'), method))
994
+
995
+ if method == 'mode_probability':
996
+ return prior_data_borehole_class_mode(
997
+ f_prior_h5, im_prior, BH, parallel=parallel, **kwargs)
998
+ elif method == 'layer_probability':
999
+ return prior_data_borehole_class_layer(
1000
+ f_prior_h5, im_prior, BH, **kwargs)
1001
+ elif method in ('class_exact', 'layer_probability_independent'):
1002
+ raise NotImplementedError("Method '%s' not implemented yet" % method)
1003
+ else:
1004
+ raise ValueError("Unknown method: %s" % method)
1005
+
1006
+
1007
+ def save_borehole_data(f_prior_h5, f_data_h5, BH, **kwargs):
1008
+ """
1009
+ Compute and save prior and observed data for a single borehole in one call.
1010
+
1011
+ Combines the three-step borehole ingestion workflow into a single function:
1012
+
1013
+ 1. Compute mode-class prior data and save to f_prior_h5
1014
+ (via :func:`prior_data_borehole`)
1015
+ 2. Extrapolate point observations to the survey grid with
1016
+ distance-based weighting (via :func:`Pobs_to_datagrid`)
1017
+ 3. Save the gridded observations to f_data_h5
1018
+ (via ``ig.save_data_multinomial``)
1019
+
1020
+ Parameters
1021
+ ----------
1022
+ f_prior_h5 : str
1023
+ Path to the prior HDF5 file.
1024
+ f_data_h5 : str
1025
+ Path to the observed-data HDF5 file.
1026
+ BH : dict
1027
+ Borehole dictionary with keys depth_top, depth_bottom, class_obs,
1028
+ class_prob, X, Y, name, method. Two optional keys control the
1029
+ distance-weighting radii when r_data / r_dis are not passed as
1030
+ explicit kwargs:
1031
+
1032
+ * ``range_data`` (float, optional) — data-space similarity radius.
1033
+ Survey points whose EM data response is similar to the borehole
1034
+ location receive higher weight; points that are more dissimilar are
1035
+ down-weighted. Default: 1,000,000 (effectively no cutoff).
1036
+ * ``range_dis`` (float, optional) — geographic XY distance [m] beyond
1037
+ which the borehole exerts no influence on nearby survey points.
1038
+ Default: 300 m.
1039
+ * ``nan_freq`` (float, optional) — NaN-frequency threshold for automatic
1040
+ data-gate selection. Default: 0.8.
1041
+ * ``r_data_i_use`` (list of int, optional) — explicit gate indices for
1042
+ data-distance computation; overrides ``nan_freq`` when provided.
1043
+ **kwargs
1044
+ im_prior : int, optional
1045
+ Index of the discrete model parameter in f_prior_h5 (e.g. 2 for /M2).
1046
+ Default is 2.
1047
+ parallel : bool, optional
1048
+ Enable parallel mode computation. Default is False.
1049
+ r_data : float, optional
1050
+ Data-space similarity radius. Overrides ``BH['range_data']`` when
1051
+ provided. Resolution order: explicit kwarg > BH['range_data'] >
1052
+ 1,000,000 (no cutoff).
1053
+ r_dis : float, optional
1054
+ Geographic XY fade-out distance [m]. Overrides ``BH['range_dis']``
1055
+ when provided. Resolution order: explicit kwarg > BH['range_dis'] >
1056
+ 300 m.
1057
+ nan_freq : float, optional
1058
+ NaN-frequency threshold for automatic data-gate selection. Gates
1059
+ where the fraction of non-NaN soundings is below this threshold are
1060
+ excluded from data-distance computation. Resolution order: explicit
1061
+ kwarg > BH['nan_freq'] > 0.8. Ignored when ``r_data_i_use`` is set.
1062
+ r_data_i_use : list of int or None, optional
1063
+ Explicit gate/channel indices to use for data-distance computation.
1064
+ Overrides ``nan_freq`` when provided. Resolution order: explicit
1065
+ kwarg > BH['r_data_i_use'] > None.
1066
+ doPlot : bool, optional
1067
+ Plot distance-weight maps. Default is False.
1068
+ showInfo : int, optional
1069
+ Verbosity level (0 = silent, 1 = one summary line per borehole).
1070
+ Default is 1.
1071
+
1072
+ Returns
1073
+ -------
1074
+ id_prior : int
1075
+ Dataset index of the new /D entry added to f_prior_h5.
1076
+ id_out : int
1077
+ Dataset index of the new /D entry added to f_data_h5.
1078
+
1079
+ Examples
1080
+ --------
1081
+ >>> # Single borehole
1082
+ >>> id_prior, id_data = ig.save_borehole_data(f_prior_h5, f_data_h5, BH)
1083
+
1084
+ >>> # All boreholes — collect data IDs for joint inversion
1085
+ >>> id_borehole_list = []
1086
+ >>> for BH in BHOLES:
1087
+ ... _, id_out = ig.save_borehole_data(f_prior_h5, f_data_h5, BH,
1088
+ ... r_data=2, r_dis=300, parallel=True)
1089
+ ... id_borehole_list.append(id_out)
1090
+ >>> f_post_h5 = ig.integrate_rejection(f_prior_h5, f_data_h5,
1091
+ ... id_use=[1] + id_borehole_list)
1092
+ """
1093
+ import integrate as ig
1094
+
1095
+ im_prior = kwargs.get('im_prior', 2)
1096
+ parallel = kwargs.get('parallel', False)
1097
+ r_data = kwargs.get('r_data', BH.get('range_data', 1_000_000))
1098
+ r_dis = kwargs.get('r_dis', BH.get('range_dis', 300))
1099
+ nan_freq = kwargs.get('nan_freq', BH.get('nan_freq', 0.8))
1100
+ r_data_i_use = kwargs.get('r_data_i_use', BH.get('r_data_i_use', None))
1101
+ doPlot = kwargs.get('doPlot', False)
1102
+ showInfo = kwargs.get('showInfo', 1)
1103
+
1104
+ # Step 1: compute mode-class prior data and save to f_prior_h5
1105
+ P_obs, id_prior = prior_data_borehole(
1106
+ f_prior_h5, im_prior, BH,
1107
+ parallel=parallel, showInfo=showInfo)
1108
+
1109
+ # Step 2: extrapolate point observations to the survey grid
1110
+ d_obs, i_use, T_use = Pobs_to_datagrid(
1111
+ P_obs, BH['X'], BH['Y'], f_data_h5,
1112
+ r_data=r_data, r_dis=r_dis, doPlot=doPlot,
1113
+ nan_freq=nan_freq, r_data_i_use=r_data_i_use)
1114
+
1115
+ # Step 3: save gridded observations to f_data_h5
1116
+ id_out, _ = ig.save_data_multinomial(
1117
+ D_obs=d_obs,
1118
+ i_use=i_use,
1119
+ id_prior=id_prior,
1120
+ f_data_h5=f_data_h5,
1121
+ showInfo=showInfo)
1122
+
1123
+ if showInfo > 0:
1124
+ print('save_borehole_data: %s → prior D%d, data D%d'
1125
+ % (BH.get('name', '?'), id_prior, id_out))
1126
+
1127
+ return id_prior, id_out