integrate_module 0.99.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- integrate/__init__.py +144 -0
- integrate/gex.py +402 -0
- integrate/integrate.py +4063 -0
- integrate/integrate_borehole.py +1127 -0
- integrate/integrate_hdf5_info_cli.py +122 -0
- integrate/integrate_io.py +5293 -0
- integrate/integrate_plot.py +4986 -0
- integrate/integrate_query.py +1609 -0
- integrate/integrate_rejection.py +1836 -0
- integrate/integrate_rejection_cli.py +210 -0
- integrate/integrate_rejection_jax.py +494 -0
- integrate/integrate_timing_cli.py +407 -0
- integrate/integrate_www_cli.py +8 -0
- integrate_module-0.99.1.dist-info/METADATA +229 -0
- integrate_module-0.99.1.dist-info/RECORD +19 -0
- integrate_module-0.99.1.dist-info/WHEEL +5 -0
- integrate_module-0.99.1.dist-info/entry_points.txt +5 -0
- integrate_module-0.99.1.dist-info/licenses/LICENSE +21 -0
- integrate_module-0.99.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
INTEGRATE Timing CLI
|
|
4
|
+
|
|
5
|
+
Command-line interface for timing benchmarks of the INTEGRATE workflow.
|
|
6
|
+
This module imports timing functions from the main integrate module.
|
|
7
|
+
|
|
8
|
+
Author: Thomas Mejer Hansen
|
|
9
|
+
Email: tmeha@geo.au.dk
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
# Configure matplotlib for non-interactive plotting
|
|
13
|
+
import matplotlib
|
|
14
|
+
matplotlib.use('Agg') # Use non-interactive backend
|
|
15
|
+
import matplotlib.pyplot as plt
|
|
16
|
+
|
|
17
|
+
# Import timing functions from integrate module
|
|
18
|
+
try:
|
|
19
|
+
# Try relative import first (when run as module)
|
|
20
|
+
from . import integrate as ig
|
|
21
|
+
from .integrate import timing_compute, timing_plot, allocate_large_page
|
|
22
|
+
except ImportError:
|
|
23
|
+
try:
|
|
24
|
+
# Try absolute import (when run directly)
|
|
25
|
+
import integrate as ig
|
|
26
|
+
from integrate import timing_compute, timing_plot, allocate_large_page
|
|
27
|
+
except ImportError:
|
|
28
|
+
print("Error: Could not import integrate module. Please ensure it is properly installed.")
|
|
29
|
+
import sys
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def print_timing_summary(f_timing):
|
|
34
|
+
"""Print a concise summary of timing benchmark results."""
|
|
35
|
+
import numpy as np
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
data = np.load(f_timing)
|
|
39
|
+
T_prior = data['T_prior']
|
|
40
|
+
T_forward = data['T_forward']
|
|
41
|
+
T_rejection = data['T_rejection']
|
|
42
|
+
T_poststat = data['T_poststat']
|
|
43
|
+
N_arr = data['N_arr']
|
|
44
|
+
Nproc_arr = data['Nproc_arr']
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
T_total = data['T_total']
|
|
48
|
+
except:
|
|
49
|
+
T_total = T_prior + T_forward + T_rejection + T_poststat
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
nobs = data['nobs']
|
|
53
|
+
except:
|
|
54
|
+
nobs = 11693 # Default fallback value
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
backend = str(data['backend'])
|
|
58
|
+
except:
|
|
59
|
+
backend = 'numpy'
|
|
60
|
+
|
|
61
|
+
print(f"\n{'='*60}")
|
|
62
|
+
print(f"INTEGRATE Timing Summary: {f_timing}")
|
|
63
|
+
print(f"Rejection backend: {backend}")
|
|
64
|
+
print(f"{'='*60}")
|
|
65
|
+
|
|
66
|
+
print(f"\nDataset sizes tested: {N_arr.astype(int)}")
|
|
67
|
+
print(f"CPU counts tested: {Nproc_arr.astype(int)}")
|
|
68
|
+
|
|
69
|
+
# Calculate key performance metrics
|
|
70
|
+
T_forward_sounding_per_sec = N_arr[:,np.newaxis]/T_forward
|
|
71
|
+
T_forward_sounding_per_sec_per_cpu = T_forward_sounding_per_sec/Nproc_arr[np.newaxis,:]
|
|
72
|
+
|
|
73
|
+
# For rejection sampling, use correct metric: data soundings per second (not lookup table models per second)
|
|
74
|
+
T_rejection_per_data = nobs/T_rejection # Data soundings per second
|
|
75
|
+
T_rejection_per_data_per_cpu = T_rejection_per_data/Nproc_arr[np.newaxis,:]
|
|
76
|
+
|
|
77
|
+
# Find best performance cases (remove NaN values)
|
|
78
|
+
forward_valid = ~np.isnan(T_forward_sounding_per_sec)
|
|
79
|
+
rejection_valid = ~np.isnan(T_rejection_per_data)
|
|
80
|
+
|
|
81
|
+
if np.any(forward_valid):
|
|
82
|
+
max_forward_total = np.nanmax(T_forward_sounding_per_sec)
|
|
83
|
+
max_forward_per_cpu = np.nanmax(T_forward_sounding_per_sec_per_cpu)
|
|
84
|
+
|
|
85
|
+
print(f"\n🚀 FORWARD MODELING PERFORMANCE:")
|
|
86
|
+
print(f" Max soundings/sec (all CPUs): {max_forward_total:.1f}")
|
|
87
|
+
print(f" Max soundings/sec/CPU: {max_forward_per_cpu:.2f}")
|
|
88
|
+
|
|
89
|
+
# Show performance for different CPU counts
|
|
90
|
+
print(f"\n Forward Performance by CPU Count:")
|
|
91
|
+
for j, ncpu in enumerate(Nproc_arr):
|
|
92
|
+
if np.any(~np.isnan(T_forward_sounding_per_sec[:, j])):
|
|
93
|
+
best_perf = np.nanmax(T_forward_sounding_per_sec[:, j])
|
|
94
|
+
best_perf_per_cpu = best_perf / ncpu
|
|
95
|
+
print(f" {int(ncpu):2d} CPUs: {best_perf:8.1f} sounds/sec ({best_perf_per_cpu:.2f} per CPU)")
|
|
96
|
+
|
|
97
|
+
if np.any(rejection_valid):
|
|
98
|
+
# Focus on largest lookup table (largest N value)
|
|
99
|
+
max_n_idx = np.argmax(N_arr)
|
|
100
|
+
|
|
101
|
+
# Extract rejection performance for largest lookup table only
|
|
102
|
+
T_rejection_largest = T_rejection_per_data[max_n_idx, :]
|
|
103
|
+
T_rejection_largest_per_cpu = T_rejection_per_data_per_cpu[max_n_idx, :]
|
|
104
|
+
|
|
105
|
+
# Check if we have valid data for the largest lookup table
|
|
106
|
+
if np.any(~np.isnan(T_rejection_largest)):
|
|
107
|
+
max_rejection_total = np.nanmax(T_rejection_largest)
|
|
108
|
+
max_rejection_per_cpu = np.nanmax(T_rejection_largest_per_cpu)
|
|
109
|
+
|
|
110
|
+
# Find which CPU count achieved the maximum performance
|
|
111
|
+
max_total_cpu_idx = np.nanargmax(T_rejection_largest)
|
|
112
|
+
max_per_cpu_cpu_idx = np.nanargmax(T_rejection_largest_per_cpu)
|
|
113
|
+
max_total_cpus = int(Nproc_arr[max_total_cpu_idx])
|
|
114
|
+
max_per_cpu_cpus = int(Nproc_arr[max_per_cpu_cpu_idx])
|
|
115
|
+
|
|
116
|
+
print(f"\nâš¡ REJECTION SAMPLING PERFORMANCE (Largest Lookup Table: {int(N_arr[max_n_idx]):,} models):")
|
|
117
|
+
print(f" Max data soundings/sec (all CPUs): {max_rejection_total:.1f} (achieved with {max_total_cpus} CPUs)")
|
|
118
|
+
print(f" Max data soundings/sec/CPU: {max_rejection_per_cpu:.2f} (achieved with {max_per_cpu_cpus} CPUs)")
|
|
119
|
+
|
|
120
|
+
# Show performance for different CPU counts (largest lookup table only)
|
|
121
|
+
print(f"\n Rejection Performance by CPU Count (Largest Lookup Table):")
|
|
122
|
+
for j, ncpu in enumerate(Nproc_arr):
|
|
123
|
+
if not np.isnan(T_rejection_largest[j]):
|
|
124
|
+
best_perf = T_rejection_largest[j]
|
|
125
|
+
best_perf_per_cpu = best_perf / ncpu
|
|
126
|
+
print(f" {int(ncpu):2d} CPUs: {best_perf:8.1f} data sounds/sec ({best_perf_per_cpu:.2f} per CPU)")
|
|
127
|
+
elif int(ncpu) == 1:
|
|
128
|
+
# Always show 1 CPU entry even if data is NaN, for reference
|
|
129
|
+
print(f" {int(ncpu):2d} CPUs: No valid data")
|
|
130
|
+
else:
|
|
131
|
+
print(f"\nâš¡ REJECTION SAMPLING PERFORMANCE: No valid data for largest lookup table ({int(N_arr[max_n_idx]):,} models)")
|
|
132
|
+
|
|
133
|
+
# Overall timing breakdown for best case using largest lookup table
|
|
134
|
+
if np.any(~np.isnan(T_total)):
|
|
135
|
+
# Focus on largest lookup table (largest N value)
|
|
136
|
+
max_n_idx = np.argmax(N_arr)
|
|
137
|
+
|
|
138
|
+
# Extract timing data for largest lookup table only
|
|
139
|
+
T_total_largest = T_total[max_n_idx, :]
|
|
140
|
+
|
|
141
|
+
# Check if we have valid data for the largest lookup table
|
|
142
|
+
if np.any(~np.isnan(T_total_largest)):
|
|
143
|
+
# Find best CPU count for the largest lookup table
|
|
144
|
+
best_cpu_idx = np.nanargmax(N_arr[max_n_idx]/T_total_largest)
|
|
145
|
+
best_cpu = Nproc_arr[best_cpu_idx]
|
|
146
|
+
best_n = N_arr[max_n_idx]
|
|
147
|
+
|
|
148
|
+
t_pri = T_prior[max_n_idx, best_cpu_idx]
|
|
149
|
+
t_fwd = T_forward[max_n_idx, best_cpu_idx]
|
|
150
|
+
t_rej = T_rejection[max_n_idx, best_cpu_idx]
|
|
151
|
+
t_post = T_poststat[max_n_idx, best_cpu_idx]
|
|
152
|
+
t_tot = t_pri + t_fwd + t_rej + t_post
|
|
153
|
+
|
|
154
|
+
print(f"\n📊 BEST OVERALL PERFORMANCE (Largest Lookup Table: {int(best_n):,} models):")
|
|
155
|
+
print(f" Best CPU configuration: {int(best_cpu)} CPUs")
|
|
156
|
+
print(f" Total throughput: {best_n/t_tot:.1f} models/sec")
|
|
157
|
+
print(f"\n Time breakdown:")
|
|
158
|
+
print(f" Prior generation: {t_pri:6.1f}s ({100*t_pri/t_tot:4.1f}%)")
|
|
159
|
+
print(f" Forward modeling: {t_fwd:6.1f}s ({100*t_fwd/t_tot:4.1f}%)")
|
|
160
|
+
print(f" Rejection sample: {t_rej:6.1f}s ({100*t_rej/t_tot:4.1f}%)")
|
|
161
|
+
print(f" Post statistics: {t_post:6.1f}s ({100*t_post/t_tot:4.1f}%)")
|
|
162
|
+
print(f" Total time: {t_tot:6.1f}s")
|
|
163
|
+
else:
|
|
164
|
+
print(f"\n📊 BEST OVERALL PERFORMANCE: No valid data for largest lookup table ({int(N_arr[max_n_idx]):,} models)")
|
|
165
|
+
|
|
166
|
+
print(f"\n{'='*60}")
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f"Error reading timing summary from {f_timing}: {str(e)}")
|
|
170
|
+
|
|
171
|
+
# %% The main function
|
|
172
|
+
def main():
|
|
173
|
+
"""Entry point for the integrate_timing command."""
|
|
174
|
+
import argparse
|
|
175
|
+
import sys
|
|
176
|
+
import os
|
|
177
|
+
import glob
|
|
178
|
+
import psutil
|
|
179
|
+
import numpy as np
|
|
180
|
+
|
|
181
|
+
import multiprocessing
|
|
182
|
+
multiprocessing.freeze_support()
|
|
183
|
+
|
|
184
|
+
# Set a lower limit for processes to avoid handle limit issues on Windows
|
|
185
|
+
import platform
|
|
186
|
+
if platform.system() == 'Windows':
|
|
187
|
+
# On Windows, limit the max processes to avoid handle limit issues
|
|
188
|
+
multiprocessing.set_start_method('spawn')
|
|
189
|
+
|
|
190
|
+
# Optional - can help with some multiprocessing issues
|
|
191
|
+
import os
|
|
192
|
+
os.environ['PYTHONWARNINGS'] = 'ignore:semaphore_tracker:UserWarning'
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# Create argument parser
|
|
196
|
+
parser = argparse.ArgumentParser(
|
|
197
|
+
description='INTEGRATE timing benchmark tool',
|
|
198
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
199
|
+
epilog="""
|
|
200
|
+
INTEGRATE Timing Benchmark Tool
|
|
201
|
+
|
|
202
|
+
This tool benchmarks the performance of the complete INTEGRATE workflow including:
|
|
203
|
+
1. Prior model generation (layered geological models)
|
|
204
|
+
2. Forward modeling using GA-AEM electromagnetic simulation
|
|
205
|
+
3. Rejection sampling for Bayesian inversion
|
|
206
|
+
4. Posterior statistics computation
|
|
207
|
+
|
|
208
|
+
USAGE EXAMPLES:
|
|
209
|
+
|
|
210
|
+
Basic benchmarks:
|
|
211
|
+
integrate_timing time small # Quick test with default settings
|
|
212
|
+
integrate_timing time medium # Balanced benchmark
|
|
213
|
+
integrate_timing time large # Comprehensive benchmark
|
|
214
|
+
|
|
215
|
+
Custom dataset sizes:
|
|
216
|
+
integrate_timing time small --Nmin 5000 # Test with 5000 models
|
|
217
|
+
integrate_timing time small --N 100000 # Test with exactly 100000 models
|
|
218
|
+
integrate_timing time medium --Nmin 10000 # Medium test starting from 10000 models
|
|
219
|
+
|
|
220
|
+
Custom CPU configurations:
|
|
221
|
+
integrate_timing time small --Ncpu 16 # Test with exactly 16 CPUs
|
|
222
|
+
integrate_timing time medium --cpu-scale linear # Test all CPU counts [1,2,3,...,64]
|
|
223
|
+
integrate_timing time large --cpu-scale log # Test log scale [1,2,4,8,16,32,64]
|
|
224
|
+
|
|
225
|
+
Combined options:
|
|
226
|
+
integrate_timing time small --Ncpu 32 --Nmin 50000 # 50k models on 32 CPUs
|
|
227
|
+
integrate_timing time medium --N 25000 --cpu-scale linear # 25k models, all CPU counts
|
|
228
|
+
|
|
229
|
+
Plotting results:
|
|
230
|
+
integrate_timing plot timing_results.npz # Plot specific results file
|
|
231
|
+
integrate_timing plot --all # Plot all .npz files in directory
|
|
232
|
+
|
|
233
|
+
PARAMETER PRIORITY:
|
|
234
|
+
- Dataset sizes: --N (highest) > --Nmin > default
|
|
235
|
+
- CPU counts: --Ncpu (highest) > --cpu-scale/--Nmin > default
|
|
236
|
+
|
|
237
|
+
BENCHMARK SIZES:
|
|
238
|
+
- small: ~1,000 models, quick test
|
|
239
|
+
- medium: 1,000-100,000 models, balanced test
|
|
240
|
+
- large: 10,000-1,000,000 models, comprehensive test
|
|
241
|
+
|
|
242
|
+
Results are saved as .npz files and automatically plotted with performance analysis.
|
|
243
|
+
"""
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Create subparsers for different command groups
|
|
247
|
+
subparsers = parser.add_subparsers(dest='command', help='Command to run')
|
|
248
|
+
|
|
249
|
+
# Plot command
|
|
250
|
+
plot_parser = subparsers.add_parser('plot', help='Plot timing results from benchmark files')
|
|
251
|
+
plot_parser.add_argument('file', nargs='?', default='time',
|
|
252
|
+
help='NPZ file containing timing results to plot')
|
|
253
|
+
plot_parser.add_argument('--all', action='store_true',
|
|
254
|
+
help='Plot all NPZ timing files in the current directory')
|
|
255
|
+
plot_parser.add_argument('--no-summary', action='store_true',
|
|
256
|
+
help='Disable timing summary output (enabled by default)')
|
|
257
|
+
|
|
258
|
+
# Time command
|
|
259
|
+
time_parser = subparsers.add_parser('time', help='Run performance benchmark of INTEGRATE workflow')
|
|
260
|
+
time_parser.add_argument('size', choices=['small', 'medium', 'large'],
|
|
261
|
+
default='medium', nargs='?',
|
|
262
|
+
help='Benchmark size: small (~1k models, quick), medium (1k-100k models), large (10k-1M models)')
|
|
263
|
+
time_parser.add_argument('--cpu-scale', choices=['linear', 'log'],
|
|
264
|
+
default='log',
|
|
265
|
+
help='CPU scaling method: linear tests [1,2,3,...,Ncpu], log tests [1,2,4,8,...,Ncpu] (default: log)')
|
|
266
|
+
time_parser.add_argument('--Nmin', type=int, default=0,
|
|
267
|
+
help='Dataset size control: For small benchmark, use exactly this many models. For medium/large, use as starting point in range (default: use benchmark defaults)')
|
|
268
|
+
time_parser.add_argument('--Ncpu', type=int, default=0,
|
|
269
|
+
help='Use exactly this many CPU cores, overriding all other CPU options (default: 0, use scaling)')
|
|
270
|
+
time_parser.add_argument('--N', type=int, default=0,
|
|
271
|
+
help='Use exactly this dataset size (number of models), overriding size and Nmin options (default: 0, use size-based defaults)')
|
|
272
|
+
time_parser.add_argument('--no-summary', action='store_true',
|
|
273
|
+
help='Disable timing summary output (enabled by default)')
|
|
274
|
+
time_parser.add_argument('--backend', choices=['numpy', 'jax'], default='numpy',
|
|
275
|
+
help='Rejection sampling backend: numpy (default) or jax')
|
|
276
|
+
time_parser.add_argument('--NcpuForward', type=int, default=0,
|
|
277
|
+
help='Fix the number of CPUs used for forward modeling only. '
|
|
278
|
+
'When set, forward modeling always uses this many CPUs while '
|
|
279
|
+
'inversion (rejection sampling) varies over the usual CPU range. '
|
|
280
|
+
'(default: 0, forward uses same count as inversion)')
|
|
281
|
+
|
|
282
|
+
# Add special case handling for '-time' without size argument
|
|
283
|
+
if '-time' in sys.argv and len(sys.argv) == 2:
|
|
284
|
+
print("Please specify a size for the timing benchmark:")
|
|
285
|
+
print(" small - Quick test with minimal resources")
|
|
286
|
+
print(" medium - Balanced benchmark (default)")
|
|
287
|
+
print(" large - Comprehensive benchmark (may take hours)")
|
|
288
|
+
print("\nExample: integrate_timing -time medium")
|
|
289
|
+
sys.exit(0)
|
|
290
|
+
|
|
291
|
+
# Parse arguments
|
|
292
|
+
args = parser.parse_args()
|
|
293
|
+
|
|
294
|
+
# Set default command if none is provided
|
|
295
|
+
if args.command is None:
|
|
296
|
+
# Show help when no command is specified
|
|
297
|
+
parser.print_help()
|
|
298
|
+
sys.exit(0)
|
|
299
|
+
|
|
300
|
+
# Execute command
|
|
301
|
+
if args.command == 'plot':
|
|
302
|
+
if args.all:
|
|
303
|
+
# Plot all NPZ files in the current directory
|
|
304
|
+
files = glob.glob('*.npz')
|
|
305
|
+
for f in files:
|
|
306
|
+
try:
|
|
307
|
+
# Show summary by default
|
|
308
|
+
if not args.no_summary:
|
|
309
|
+
print_timing_summary(f)
|
|
310
|
+
|
|
311
|
+
timing_plot(f)
|
|
312
|
+
plt.close('all') # Close all figures after plotting
|
|
313
|
+
print(f"Successfully plotted: {f}")
|
|
314
|
+
except Exception as e:
|
|
315
|
+
print(f"Error plotting {f}: {str(e)}")
|
|
316
|
+
finally:
|
|
317
|
+
plt.close('all') # Ensure figures are closed even on error
|
|
318
|
+
elif args.file:
|
|
319
|
+
# Plot specified file
|
|
320
|
+
if not os.path.exists(args.file):
|
|
321
|
+
print(f"File not found: {args.file}")
|
|
322
|
+
sys.exit(1)
|
|
323
|
+
try:
|
|
324
|
+
# Show summary by default
|
|
325
|
+
if not args.no_summary:
|
|
326
|
+
print_timing_summary(args.file)
|
|
327
|
+
|
|
328
|
+
timing_plot(args.file)
|
|
329
|
+
plt.close('all') # Close all figures after plotting
|
|
330
|
+
print(f"Successfully plotted: {args.file}")
|
|
331
|
+
except Exception as e:
|
|
332
|
+
print(f"Error plotting {args.file}: {str(e)}")
|
|
333
|
+
finally:
|
|
334
|
+
plt.close('all') # Ensure figures are closed even on error
|
|
335
|
+
else:
|
|
336
|
+
print("Please specify a file to plot or use --all")
|
|
337
|
+
|
|
338
|
+
elif args.command == 'time':
|
|
339
|
+
Ncpu = psutil.cpu_count(logical=False)
|
|
340
|
+
|
|
341
|
+
# Handle Ncpu option for processors
|
|
342
|
+
if args.Ncpu > 0:
|
|
343
|
+
# Use only the specified number of CPUs
|
|
344
|
+
Nproc_arr = np.array([args.Ncpu])
|
|
345
|
+
else:
|
|
346
|
+
# Determine CPU scaling based on command line option
|
|
347
|
+
if args.cpu_scale == 'linear':
|
|
348
|
+
Nproc_arr = np.arange(1, Ncpu+1)
|
|
349
|
+
else: # log scaling
|
|
350
|
+
k = int(np.floor(np.log2(Ncpu)))
|
|
351
|
+
Nproc_arr = 2**np.linspace(0,k,(k)+1)
|
|
352
|
+
Nproc_arr = np.append(Nproc_arr, Ncpu)
|
|
353
|
+
Nproc_arr = np.unique(Nproc_arr)
|
|
354
|
+
|
|
355
|
+
# Handle dataset sizes
|
|
356
|
+
if args.N > 0:
|
|
357
|
+
# Use only the specified dataset size
|
|
358
|
+
N_arr = np.array([args.N])
|
|
359
|
+
elif args.Nmin > 0 and args.size == 'small':
|
|
360
|
+
# For small benchmark with Nmin: use only that value for dataset size
|
|
361
|
+
N_arr = np.array([args.Nmin])
|
|
362
|
+
elif args.size == 'small':
|
|
363
|
+
# Small benchmark default
|
|
364
|
+
N_arr = np.array([1000])
|
|
365
|
+
elif args.size == 'medium':
|
|
366
|
+
# Medium benchmark
|
|
367
|
+
if args.Nmin > 0:
|
|
368
|
+
# Use Nmin as starting point for medium benchmark
|
|
369
|
+
N_arr = np.ceil(np.logspace(np.log10(args.Nmin), 5, 9))
|
|
370
|
+
else:
|
|
371
|
+
N_arr = np.ceil(np.logspace(3,5,9))
|
|
372
|
+
elif args.size == 'large':
|
|
373
|
+
# Large benchmark
|
|
374
|
+
if args.Nmin > 0:
|
|
375
|
+
# Use Nmin as starting point for large benchmark
|
|
376
|
+
N_arr = np.ceil(np.logspace(np.log10(args.Nmin), 6, 7))
|
|
377
|
+
else:
|
|
378
|
+
N_arr = np.ceil(np.logspace(3,6,7))
|
|
379
|
+
|
|
380
|
+
# JAX doesn't use multiprocessing — default to a single "process" entry
|
|
381
|
+
# but respect an explicit --Ncpu value if the user provided one
|
|
382
|
+
if args.backend == 'jax' and args.Ncpu == 0:
|
|
383
|
+
Nproc_arr = np.array([1])
|
|
384
|
+
|
|
385
|
+
f_timing = timing_compute(
|
|
386
|
+
N_arr=N_arr,
|
|
387
|
+
Nproc_arr=Nproc_arr,
|
|
388
|
+
backend=args.backend,
|
|
389
|
+
NcpuForward=args.NcpuForward,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Show summary and plot the results
|
|
393
|
+
try:
|
|
394
|
+
# Show summary by default
|
|
395
|
+
if not args.no_summary:
|
|
396
|
+
print_timing_summary(f_timing)
|
|
397
|
+
|
|
398
|
+
timing_plot(f_timing)
|
|
399
|
+
plt.close('all') # Close all figures after plotting
|
|
400
|
+
print(f"Timing plots saved successfully.")
|
|
401
|
+
except Exception as e:
|
|
402
|
+
print(f"Error generating timing plots: {str(e)}")
|
|
403
|
+
finally:
|
|
404
|
+
plt.close('all') # Ensure figures are closed even on error
|
|
405
|
+
|
|
406
|
+
if __name__ == '__main__':
|
|
407
|
+
main()
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: integrate_module
|
|
3
|
+
Version: 0.99.1
|
|
4
|
+
Summary: Localized probabilistic data integration
|
|
5
|
+
Author-email: Thomas Mejer Hansen <tmeha@geo.au.dk>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/AUProbGeo/integrate_module
|
|
8
|
+
Project-URL: Documentation, https://auprobgeo.github.io/integrate_module/
|
|
9
|
+
Project-URL: Repository, https://github.com/AUProbGeo/integrate_module
|
|
10
|
+
Project-URL: Issues, https://github.com/AUProbGeo/integrate_module/issues
|
|
11
|
+
Keywords: inversion,electromagnetic,geophysics,geology,prior,tarantola
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: matplotlib
|
|
18
|
+
Requires-Dist: h5py
|
|
19
|
+
Requires-Dist: scipy
|
|
20
|
+
Requires-Dist: psutil
|
|
21
|
+
Requires-Dist: tqdm
|
|
22
|
+
Requires-Dist: requests
|
|
23
|
+
Requires-Dist: geoprior1d>=0.9
|
|
24
|
+
Requires-Dist: ga-aem-forward-win; platform_system == "Windows"
|
|
25
|
+
Requires-Dist: jupyter>=1.0.0
|
|
26
|
+
Requires-Dist: jupytext
|
|
27
|
+
Requires-Dist: ipykernel
|
|
28
|
+
Requires-Dist: pandas
|
|
29
|
+
Requires-Dist: pyvista
|
|
30
|
+
Requires-Dist: litellm
|
|
31
|
+
Requires-Dist: streamlit
|
|
32
|
+
Requires-Dist: libaarhusxyz
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest; extra == "dev"
|
|
35
|
+
Requires-Dist: black; extra == "dev"
|
|
36
|
+
Provides-Extra: docs
|
|
37
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
38
|
+
Requires-Dist: nbsphinx; extra == "docs"
|
|
39
|
+
Requires-Dist: sphinx-gallery; extra == "docs"
|
|
40
|
+
Requires-Dist: pandoc; extra == "docs"
|
|
41
|
+
Requires-Dist: myst-parser; extra == "docs"
|
|
42
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
43
|
+
Requires-Dist: furo; extra == "docs"
|
|
44
|
+
Requires-Dist: tomli; python_version < "3.11" and extra == "docs"
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
# INTEGRATE Python Module
|
|
48
|
+
|
|
49
|
+
[](https://github.com/AUProbGeo/integrate_module/actions/workflows/docs.yml)
|
|
50
|
+
[](https://badge.fury.io/py/integrate-module)
|
|
51
|
+
[](https://auprobgeo.github.io/integrate_module/)
|
|
52
|
+
[](https://opensource.org/licenses/MIT)
|
|
53
|
+
[](https://www.python.org/downloads/)
|
|
54
|
+
|
|
55
|
+
This repository contains the INTEGRATE Python module for localized probabilistic data integration in geophysics.
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
Assuming you already have Python 3.10+ installed:
|
|
61
|
+
|
|
62
|
+
pip install integrate_module
|
|
63
|
+
|
|
64
|
+
On Windows, this will also install the Python wrapper for GA-AEM (1D EM forward modeling - GPL v2 code): [ga-aem-forward-win](https://pypi.org/project/ga-aem-forward-win/)
|
|
65
|
+
|
|
66
|
+
On Linux/macOS, you will need to install GA-AEM manually.
|
|
67
|
+
|
|
68
|
+
### Using uv (recommended, from PyPI)
|
|
69
|
+
|
|
70
|
+
[uv](https://github.com/astral-sh/uv) is a fast Python package manager. Install it first if needed:
|
|
71
|
+
|
|
72
|
+
# Install uv (Linux/macOS)
|
|
73
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
74
|
+
# or: pip install uv
|
|
75
|
+
|
|
76
|
+
# Create virtual environment in .venv/ inside the module root
|
|
77
|
+
cd path/to/integrate_module
|
|
78
|
+
uv venv .venv --python 3.11
|
|
79
|
+
|
|
80
|
+
# Activate
|
|
81
|
+
source .venv/bin/activate # Linux/macOS
|
|
82
|
+
.venv\Scripts\activate # Windows
|
|
83
|
+
|
|
84
|
+
# Install integrate module
|
|
85
|
+
uv pip install integrate_module
|
|
86
|
+
|
|
87
|
+
### Using uv (from source)
|
|
88
|
+
|
|
89
|
+
# Install uv (Linux/macOS)
|
|
90
|
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
91
|
+
# or: pip install uv
|
|
92
|
+
|
|
93
|
+
# Create .venv and install all dependencies in one step (recommended for development)
|
|
94
|
+
cd path/to/integrate_module
|
|
95
|
+
uv sync
|
|
96
|
+
|
|
97
|
+
# Activate
|
|
98
|
+
source .venv/bin/activate # Linux/macOS
|
|
99
|
+
.venv\Scripts\activate # Windows
|
|
100
|
+
|
|
101
|
+
### Using pip + venv (from PyPI, on Ubuntu)
|
|
102
|
+
|
|
103
|
+
# Install python3-venv
|
|
104
|
+
sudo apt install python3-venv
|
|
105
|
+
|
|
106
|
+
# Create virtual environment in .venv/ inside the module root
|
|
107
|
+
cd path/to/integrate_module
|
|
108
|
+
python3 -m venv .venv
|
|
109
|
+
source .venv/bin/activate
|
|
110
|
+
pip install --upgrade pip
|
|
111
|
+
|
|
112
|
+
# Install integrate module
|
|
113
|
+
pip install integrate_module
|
|
114
|
+
|
|
115
|
+
### Using pip + venv (from source, on Ubuntu)
|
|
116
|
+
|
|
117
|
+
# Install python3-venv
|
|
118
|
+
sudo apt install python3-venv
|
|
119
|
+
|
|
120
|
+
# Create virtual environment in .venv/ inside the module root
|
|
121
|
+
cd path/to/integrate_module
|
|
122
|
+
python3 -m venv .venv
|
|
123
|
+
source .venv/bin/activate
|
|
124
|
+
pip install --upgrade pip
|
|
125
|
+
|
|
126
|
+
# Install integrate module from source
|
|
127
|
+
pip install -e .
|
|
128
|
+
|
|
129
|
+
### Installing documentation dependencies
|
|
130
|
+
|
|
131
|
+
To also install the packages needed to build the Sphinx documentation, use the `docs` extra:
|
|
132
|
+
|
|
133
|
+
# With uv (from source)
|
|
134
|
+
uv sync --extra docs
|
|
135
|
+
|
|
136
|
+
# With uv pip (from source)
|
|
137
|
+
uv pip install -e ".[docs]"
|
|
138
|
+
|
|
139
|
+
# With pip (from source)
|
|
140
|
+
pip install -e ".[docs]"
|
|
141
|
+
|
|
142
|
+
### Using Conda + pip (from PyPI)
|
|
143
|
+
|
|
144
|
+
Create a Conda environment (called integrate) and install the required modules:
|
|
145
|
+
|
|
146
|
+
conda create --name integrate python=3.10 numpy pandas matplotlib scipy tqdm requests h5py psutil
|
|
147
|
+
conda activate integrate
|
|
148
|
+
pip install integrate_module
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
### Using Conda + pip (from source)
|
|
152
|
+
|
|
153
|
+
Create a Conda environment (called integrate) and install the required modules:
|
|
154
|
+
|
|
155
|
+
conda create --name integrate python=3.10 numpy pandas matplotlib scipy tqdm requests h5py psutil
|
|
156
|
+
conda activate integrate
|
|
157
|
+
pip install -e .
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
## GA-AEM
|
|
161
|
+
|
|
162
|
+
In order to use GA-AEM for forward EM modeling, the 'gatdaem1d' Python module must be installed. Follow instructions at [https://github.com/GeoscienceAustralia/ga-aem](https://github.com/GeoscienceAustralia/ga-aem) or use the information below.
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
### PyPI package for Windows
|
|
166
|
+
|
|
167
|
+
On Windows, the [ga-aem-forward-win](https://pypi.org/project/ga-aem-forward-win/) package will be automatically installed, providing access to the GA-AEM forward code. It can be installed manually using:
|
|
168
|
+
|
|
169
|
+
pip install ga-aem-forward-win
|
|
170
|
+
|
|
171
|
+
### Pre-compiled Python module for Windows
|
|
172
|
+
|
|
173
|
+
1. Download the pre-compiled version of GA-AEM for Windows from the latest release: https://github.com/GeoscienceAustralia/ga-aem/releases (GA-AEM.zip)
|
|
174
|
+
|
|
175
|
+
2. Download precompiled FFTW3 Windows DLLs from https://www.fftw.org/install/windows.html (fftw-3.3.5-dll64.zip)
|
|
176
|
+
|
|
177
|
+
3. Extract both archives:
|
|
178
|
+
- `unzip GA-AEM.zip` to get GA-AEM
|
|
179
|
+
- `unzip fftw-3.3.5-dll64.zip` to get fftw-3.3.5-dll64
|
|
180
|
+
|
|
181
|
+
4. Copy FFTW3 DLLs to GA-AEM Python directory:
|
|
182
|
+
|
|
183
|
+
cp fftw-3.3.5-dll64/*.dll GA-AEM/python/gatdaem1d/
|
|
184
|
+
|
|
185
|
+
5. Install the Python gatdaem1d module:
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
cd GA-AEM/python/
|
|
189
|
+
pip install -e .
|
|
190
|
+
|
|
191
|
+
# Test the installation
|
|
192
|
+
cd examples
|
|
193
|
+
python integrate_skytem.py
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
### Compile GA-AEM Python module on Debian/Ubuntu/Linux
|
|
198
|
+
|
|
199
|
+
A script that downloads and installs GA-AEM is located in `scripts/cmake_build_script_DebianUbuntu_gatdaem1d.sh`. This script has been tested and confirmed to work on both Debian and Ubuntu distributions. Be sure to use the appropriate Python environment and then run:
|
|
200
|
+
|
|
201
|
+
sh scripts/cmake_build_script_DebianUbuntu_gatdaem1d.sh
|
|
202
|
+
cd ga-aem/install-ubuntu/python
|
|
203
|
+
pip install .
|
|
204
|
+
|
|
205
|
+
### Compile GA-AEM Python module on macOS/Homebrew
|
|
206
|
+
|
|
207
|
+
First install Homebrew, then run:
|
|
208
|
+
|
|
209
|
+
sh ./scripts/cmake_build_script_homebrew_gatdaem1d.sh
|
|
210
|
+
cd ga-aem/install-homebrew/python
|
|
211
|
+
pip install .
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
## Development
|
|
215
|
+
|
|
216
|
+
The `main` branch is the most stable, with less frequent updates but larger changes.
|
|
217
|
+
|
|
218
|
+
The `develop` branch contains the current development code and may be updated frequently. Some functions and examples may be broken.
|
|
219
|
+
|
|
220
|
+
An extra set of tests and examples are located in the ``experimental`` sub-branch `https://github.com/cultpenguin/integrate_module_experimental/ <https://github.com/cultpenguin/integrate_module_experimental/>`_.
|
|
221
|
+
Please ask the developers for access to this branch if needed. To clone the main repository with the experimental branch, use:
|
|
222
|
+
|
|
223
|
+
git clone --recurse-submodules git@github.com:AUProbGeo/integrate_module.git
|
|
224
|
+
|
|
225
|
+
You may need to run the following command to update the submodules:
|
|
226
|
+
|
|
227
|
+
cd experimental
|
|
228
|
+
git submodule update --init --recursive
|
|
229
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
integrate/__init__.py,sha256=kk6NhOnQ0A5pHB-7ZNDVum1ur_xIjH-B-TZ_LVRCFMY,7436
|
|
2
|
+
integrate/gex.py,sha256=aKbHDomn0MYr8bEh7zNdM0xuqN0O6tqJ4Y2J2vXEz5k,14884
|
|
3
|
+
integrate/integrate.py,sha256=AYD6Iy2oM9xf6q3QDfDUZTQaS5DRKidCXPWTyiawR3A,154088
|
|
4
|
+
integrate/integrate_borehole.py,sha256=Xg7qWLBooQQnAvKWOTwHtQsPyvlQUsUaa4VwIjVucfM,47867
|
|
5
|
+
integrate/integrate_hdf5_info_cli.py,sha256=kZlmvBdPfYFQz2TSB9cXUWeSLZqqd_BeGRVliN6H5eE,3626
|
|
6
|
+
integrate/integrate_io.py,sha256=YrlgBhVhLjCISDXqpwj0pdL6NWnocL392GF2CUGVfKA,210753
|
|
7
|
+
integrate/integrate_plot.py,sha256=F95D5Agie8BnknxFKgEjmPcSy0MsYM_MNNPlXqA41eA,231645
|
|
8
|
+
integrate/integrate_query.py,sha256=-i6aE42ampBF8rrlUtxH4HJtp_0uO5JtxQ-EU5secYM,62376
|
|
9
|
+
integrate/integrate_rejection.py,sha256=QyK_X_WKqkMzBzdbUR7wO7qGCOnFu6vs2Dr7nZAHq2M,71681
|
|
10
|
+
integrate/integrate_rejection_cli.py,sha256=4eKIuEJ98gmf9mLh67u4EE6o23k0H_wS6tczirf6olY,7209
|
|
11
|
+
integrate/integrate_rejection_jax.py,sha256=1YZpANJfr4Fn82MdA6HGRorD3E4tewEaHfKEplWjpYc,20334
|
|
12
|
+
integrate/integrate_timing_cli.py,sha256=-wjKHguPSmyKfn-SGI12Rqbw2iQy_j7bw0sGvgYd8EY,18352
|
|
13
|
+
integrate/integrate_www_cli.py,sha256=p628dV7rPgdci9xJqxP4_fYThkHLf1KJyaO3o1p3cnQ,229
|
|
14
|
+
integrate_module-0.99.1.dist-info/licenses/LICENSE,sha256=xHDEkdbQBsz88r_VgzuhyDYogRg-np2_KfPrJ3clT24,1108
|
|
15
|
+
integrate_module-0.99.1.dist-info/METADATA,sha256=GqTYNlZXmSliwIJlGtSfMsON-M1sdtd1sV3-apqdiTM,7909
|
|
16
|
+
integrate_module-0.99.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
17
|
+
integrate_module-0.99.1.dist-info/entry_points.txt,sha256=w-KqeDaAv6vV4UwT5t1SQ2IwtCEofTJOGZjsjCOuycc,230
|
|
18
|
+
integrate_module-0.99.1.dist-info/top_level.txt,sha256=MALr2Hqk_f1nuZOLr0uhce4jtKD7dPLSuRXD3Plchtc,10
|
|
19
|
+
integrate_module-0.99.1.dist-info/RECORD,,
|