pivtools 0.1.3__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. pivtools-0.1.3.dist-info/METADATA +222 -0
  2. pivtools-0.1.3.dist-info/RECORD +127 -0
  3. pivtools-0.1.3.dist-info/WHEEL +5 -0
  4. pivtools-0.1.3.dist-info/entry_points.txt +3 -0
  5. pivtools-0.1.3.dist-info/top_level.txt +3 -0
  6. pivtools_cli/__init__.py +5 -0
  7. pivtools_cli/_build_marker.c +25 -0
  8. pivtools_cli/_build_marker.cp311-win_amd64.pyd +0 -0
  9. pivtools_cli/cli.py +225 -0
  10. pivtools_cli/example.py +139 -0
  11. pivtools_cli/lib/PIV_2d_cross_correlate.c +334 -0
  12. pivtools_cli/lib/PIV_2d_cross_correlate.h +22 -0
  13. pivtools_cli/lib/common.h +36 -0
  14. pivtools_cli/lib/interp2custom.c +146 -0
  15. pivtools_cli/lib/interp2custom.h +48 -0
  16. pivtools_cli/lib/peak_locate_gsl.c +711 -0
  17. pivtools_cli/lib/peak_locate_gsl.h +40 -0
  18. pivtools_cli/lib/peak_locate_gsl_print.c +736 -0
  19. pivtools_cli/lib/peak_locate_lm.c +751 -0
  20. pivtools_cli/lib/peak_locate_lm.h +27 -0
  21. pivtools_cli/lib/xcorr.c +342 -0
  22. pivtools_cli/lib/xcorr.h +31 -0
  23. pivtools_cli/lib/xcorr_cache.c +78 -0
  24. pivtools_cli/lib/xcorr_cache.h +26 -0
  25. pivtools_cli/piv/interp2custom/interp2custom.py +69 -0
  26. pivtools_cli/piv/piv.py +240 -0
  27. pivtools_cli/piv/piv_backend/base.py +825 -0
  28. pivtools_cli/piv/piv_backend/cpu_instantaneous.py +1005 -0
  29. pivtools_cli/piv/piv_backend/factory.py +28 -0
  30. pivtools_cli/piv/piv_backend/gpu_instantaneous.py +15 -0
  31. pivtools_cli/piv/piv_backend/infilling.py +445 -0
  32. pivtools_cli/piv/piv_backend/outlier_detection.py +306 -0
  33. pivtools_cli/piv/piv_backend/profile_cpu_instantaneous.py +230 -0
  34. pivtools_cli/piv/piv_result.py +40 -0
  35. pivtools_cli/piv/save_results.py +342 -0
  36. pivtools_cli/piv_cluster/cluster.py +108 -0
  37. pivtools_cli/preprocessing/filters.py +399 -0
  38. pivtools_cli/preprocessing/preprocess.py +79 -0
  39. pivtools_cli/tests/helpers.py +107 -0
  40. pivtools_cli/tests/instantaneous_piv/test_piv_integration.py +167 -0
  41. pivtools_cli/tests/instantaneous_piv/test_piv_integration_multi.py +553 -0
  42. pivtools_cli/tests/preprocessing/test_filters.py +41 -0
  43. pivtools_core/__init__.py +5 -0
  44. pivtools_core/config.py +703 -0
  45. pivtools_core/config.yaml +135 -0
  46. pivtools_core/image_handling/__init__.py +0 -0
  47. pivtools_core/image_handling/load_images.py +464 -0
  48. pivtools_core/image_handling/readers/__init__.py +53 -0
  49. pivtools_core/image_handling/readers/generic_readers.py +50 -0
  50. pivtools_core/image_handling/readers/lavision_reader.py +190 -0
  51. pivtools_core/image_handling/readers/registry.py +24 -0
  52. pivtools_core/paths.py +49 -0
  53. pivtools_core/vector_loading.py +248 -0
  54. pivtools_gui/__init__.py +3 -0
  55. pivtools_gui/app.py +687 -0
  56. pivtools_gui/calibration/__init__.py +0 -0
  57. pivtools_gui/calibration/app/__init__.py +0 -0
  58. pivtools_gui/calibration/app/views.py +1186 -0
  59. pivtools_gui/calibration/calibration_planar/planar_calibration_production.py +570 -0
  60. pivtools_gui/calibration/vector_calibration_production.py +544 -0
  61. pivtools_gui/config.py +703 -0
  62. pivtools_gui/image_handling/__init__.py +0 -0
  63. pivtools_gui/image_handling/load_images.py +464 -0
  64. pivtools_gui/image_handling/readers/__init__.py +53 -0
  65. pivtools_gui/image_handling/readers/generic_readers.py +50 -0
  66. pivtools_gui/image_handling/readers/lavision_reader.py +190 -0
  67. pivtools_gui/image_handling/readers/registry.py +24 -0
  68. pivtools_gui/masking/__init__.py +0 -0
  69. pivtools_gui/masking/app/__init__.py +0 -0
  70. pivtools_gui/masking/app/views.py +123 -0
  71. pivtools_gui/paths.py +49 -0
  72. pivtools_gui/piv_runner.py +261 -0
  73. pivtools_gui/pivtools.py +58 -0
  74. pivtools_gui/plotting/__init__.py +0 -0
  75. pivtools_gui/plotting/app/__init__.py +0 -0
  76. pivtools_gui/plotting/app/views.py +1671 -0
  77. pivtools_gui/plotting/plot_maker.py +220 -0
  78. pivtools_gui/post_processing/POD/__init__.py +0 -0
  79. pivtools_gui/post_processing/POD/app/__init__.py +0 -0
  80. pivtools_gui/post_processing/POD/app/views.py +647 -0
  81. pivtools_gui/post_processing/POD/pod_decompose.py +979 -0
  82. pivtools_gui/post_processing/POD/views.py +1096 -0
  83. pivtools_gui/post_processing/__init__.py +0 -0
  84. pivtools_gui/static/404.html +1 -0
  85. pivtools_gui/static/_next/static/chunks/117-d5793c8e79de5511.js +2 -0
  86. pivtools_gui/static/_next/static/chunks/484-cfa8b9348ce4f00e.js +1 -0
  87. pivtools_gui/static/_next/static/chunks/869-320a6b9bdafbb6d3.js +1 -0
  88. pivtools_gui/static/_next/static/chunks/app/_not-found/page-12f067ceb7415e55.js +1 -0
  89. pivtools_gui/static/_next/static/chunks/app/layout-b907d5f31ac82e9d.js +1 -0
  90. pivtools_gui/static/_next/static/chunks/app/page-334cc4e8444cde2f.js +1 -0
  91. pivtools_gui/static/_next/static/chunks/fd9d1056-ad15f396ddf9b7e5.js +1 -0
  92. pivtools_gui/static/_next/static/chunks/framework-f66176bb897dc684.js +1 -0
  93. pivtools_gui/static/_next/static/chunks/main-a1b3ced4d5f6d998.js +1 -0
  94. pivtools_gui/static/_next/static/chunks/main-app-8a63c6f5e7baee11.js +1 -0
  95. pivtools_gui/static/_next/static/chunks/pages/_app-72b849fbd24ac258.js +1 -0
  96. pivtools_gui/static/_next/static/chunks/pages/_error-7ba65e1336b92748.js +1 -0
  97. pivtools_gui/static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  98. pivtools_gui/static/_next/static/chunks/webpack-4a8ca7c99e9bb3d8.js +1 -0
  99. pivtools_gui/static/_next/static/css/7d3f2337d7ea12a5.css +3 -0
  100. pivtools_gui/static/_next/static/vQeR20OUdSSKlK4vukC4q/_buildManifest.js +1 -0
  101. pivtools_gui/static/_next/static/vQeR20OUdSSKlK4vukC4q/_ssgManifest.js +1 -0
  102. pivtools_gui/static/file.svg +1 -0
  103. pivtools_gui/static/globe.svg +1 -0
  104. pivtools_gui/static/grid.svg +8 -0
  105. pivtools_gui/static/index.html +1 -0
  106. pivtools_gui/static/index.txt +8 -0
  107. pivtools_gui/static/next.svg +1 -0
  108. pivtools_gui/static/vercel.svg +1 -0
  109. pivtools_gui/static/window.svg +1 -0
  110. pivtools_gui/stereo_reconstruction/__init__.py +0 -0
  111. pivtools_gui/stereo_reconstruction/app/__init__.py +0 -0
  112. pivtools_gui/stereo_reconstruction/app/views.py +1985 -0
  113. pivtools_gui/stereo_reconstruction/stereo_calibration_production.py +606 -0
  114. pivtools_gui/stereo_reconstruction/stereo_reconstruction_production.py +544 -0
  115. pivtools_gui/utils.py +63 -0
  116. pivtools_gui/vector_loading.py +248 -0
  117. pivtools_gui/vector_merging/__init__.py +1 -0
  118. pivtools_gui/vector_merging/app/__init__.py +1 -0
  119. pivtools_gui/vector_merging/app/views.py +759 -0
  120. pivtools_gui/vector_statistics/app/__init__.py +1 -0
  121. pivtools_gui/vector_statistics/app/views.py +710 -0
  122. pivtools_gui/vector_statistics/ensemble_statistics.py +49 -0
  123. pivtools_gui/vector_statistics/instantaneous_statistics.py +311 -0
  124. pivtools_gui/video_maker/__init__.py +0 -0
  125. pivtools_gui/video_maker/app/__init__.py +0 -0
  126. pivtools_gui/video_maker/app/views.py +436 -0
  127. pivtools_gui/video_maker/video_maker.py +662 -0
@@ -0,0 +1,139 @@
1
+ import logging
2
+ import os
3
+ import sys
4
+ import tracemalloc
5
+ import time
6
+ from pathlib import Path
7
+
8
+ # Add src to path for unified imports
9
+ from pivtools_core.config import Config
10
+ from pivtools_core.image_handling.load_images import load_images, load_mask_for_camera
11
+ from pivtools_core.image_handling.load_images import compute_vector_mask
12
+
13
+ from pivtools_cli.piv.piv import perform_piv_and_save
14
+ from pivtools_cli.piv.save_results import (
15
+ save_coordinates_from_config_distributed,
16
+ get_output_path,
17
+ )
18
+ from pivtools_cli.piv_cluster.cluster import start_cluster
19
+ from pivtools_cli.preprocessing.preprocess import preprocess_images
20
+
21
+ if __name__ == "__main__":
22
+
23
+ start_time = time.time() # Start timer
24
+
25
+ config = Config()
26
+ os.environ["OMP_NUM_THREADS"] = config.omp_threads
27
+ os.environ["MALLOC_TRIM_THRESHOLD_"] = "0"
28
+ if config.debug:
29
+ tracemalloc.start()
30
+
31
+ try:
32
+ cluster, client = start_cluster(
33
+ n_workers_per_node=config.dask_workers_per_node,
34
+ threads_per_worker=config.dask_threads_per_worker,
35
+ memory_limit=config.dask_memory_limit,
36
+ config=config,
37
+ )
38
+ logging.info("Dask cluster started successfully")
39
+
40
+ except Exception as e:
41
+ logging.error("Error starting Dask cluster: %s", e)
42
+ exit(1)
43
+
44
+ try:
45
+
46
+ info = client.scheduler_info()
47
+ for w, meta in info["workers"].items():
48
+ logging.info("Dask Worker Info:")
49
+ logging.info("Worker %s", w)
50
+ logging.info(" pid: %s", meta.get("pid"))
51
+ logging.info(" host: %s", meta.get("host"))
52
+ logging.info(" local_dir: %s", meta.get("local_directory"))
53
+ logging.info(" nanny: %s", meta.get("nanny"))
54
+
55
+ camera_numbers = config.camera_numbers
56
+ source_path = config.source_paths[0]
57
+ base_path = config.base_paths[0]
58
+
59
+ for camera_num in camera_numbers:
60
+ logging.info("Processing camera: Cam%d", camera_num)
61
+
62
+ # Load images from source path (lazy loading - no memory consumption yet)
63
+ images = load_images(camera_num, config, source=source_path)
64
+
65
+ # Preprocess images (applies filters from config)
66
+ # This intelligently handles batching:
67
+ # - Batch filters (time, pod): rechunks to batch_size
68
+ # - Single-image filters: keeps single-image chunks
69
+ # - No filters: skips preprocessing entirely
70
+ processed_images = preprocess_images(images, config)
71
+
72
+ # Load mask once per camera (if masking is enabled)
73
+ mask = load_mask_for_camera(camera_num, config, source_path_idx=0)
74
+
75
+ # Pre-compute vector masks once per camera (if masking is enabled)
76
+ vector_masks = None
77
+ if config.masking_enabled and mask is not None:
78
+ logging.info("Pre-computing vector masks for Cam%d", camera_num)
79
+ vector_masks = compute_vector_mask(mask, config)
80
+ logging.info("Vector masks computed: %d passes", len(vector_masks))
81
+
82
+ # Get output path for this camera (uncalibrated PIV)
83
+ # Path: base_path/uncalibrated_piv/{num_images}/Cam{camera_num}/instantaneous
84
+ output_path = get_output_path(
85
+ config,
86
+ camera_num,
87
+ use_uncalibrated=True
88
+ )
89
+
90
+ # Perform PIV and save in parallel on workers with TRUE lazy loading
91
+ # Each worker processes ONE image at a time: load → PIV → save → free
92
+ # Memory per worker: ~280 MB (constant regardless of total images)
93
+ # Dask scheduler handles task distribution automatically
94
+ saved_paths, scattered_cache = perform_piv_and_save(
95
+ processed_images, # Use preprocessed images
96
+ config,
97
+ client,
98
+ output_path,
99
+ start_frame=1,
100
+ runs_to_save=config.instantaneous_runs_0based,
101
+ vector_masks=vector_masks, # Pass pre-computed vector masks
102
+ )
103
+
104
+ # Submit coordinate saving task (runs once per camera) with shared cache
105
+ coords_future = client.submit(
106
+ save_coordinates_from_config_distributed,
107
+ config,
108
+ output_path,
109
+ scattered_cache, # Use the same scattered cache
110
+ config.instantaneous_runs_0based,
111
+ )
112
+
113
+ # All PIV processing completed with true lazy loading!
114
+ # Workers processed images one-by-one, keeping memory footprint minimal
115
+ logging.info(
116
+ "PIV and save completed: %d frames saved to %s",
117
+ len(saved_paths), output_path
118
+ )
119
+
120
+ # Wait for coordinates to be saved
121
+ coords_future.result()
122
+ logging.info("Coordinates saved to %s", output_path)
123
+
124
+ if config.debug:
125
+ current, peak = tracemalloc.get_traced_memory()
126
+ print(f"Current memory usage: {current / 10**6:.2f} MB")
127
+ print(f"Peak memory usage: {peak / 10**6:.2f} MB")
128
+
129
+ tracemalloc.stop()
130
+ except Exception as e:
131
+ import traceback
132
+ print(f"Error: {e}", flush=True)
133
+ print("Traceback:", flush=True)
134
+ traceback.print_exc()
135
+ finally:
136
+ client.close()
137
+ end_time = time.time() # End timer
138
+ elapsed = end_time - start_time
139
+ print(f"Total elapsed time: {elapsed:.2f} seconds", flush=True)
@@ -0,0 +1,334 @@
1
+ #include "PIV_2d_cross_correlate.h"
2
+ #include "common.h"
3
+ #include "xcorr.h"
4
+ #include "xcorr_cache.h" /* FFTW wisdom caching */
5
+ #include "peak_locate_lm.h" /* Fast LM solver instead of GSL */
6
+ #include <omp.h>
7
+ #include <math.h>
8
+ #include <stdlib.h>
9
+ #include <string.h>
10
+
11
+ unsigned char bulkxcorr2d(const float *fImageA, const float *fImageB, const float *fMask, const int *nImageSize,
12
+ const float *fWinCtrsX, const float *fWinCtrsY, const int *nWindows, float *fWindowWeightA, bool bEnsemble,
13
+ const float *fWindowWeightB, const int *nWindowSize, int nPeaks, int iPeakFinder,
14
+ float *fPkLocX, float *fPkLocY, float *fPkHeight, float *fSx, float *fSy, float *fSxy, float *fCorrelPlane_Out)
15
+ {
16
+ int i, j, ii, jj, x, y;
17
+ int xmin, ymin;
18
+ int iWindowIdx, nWindowsTotal;
19
+ float *fWindowA, *fWindowB;
20
+ float *fCorrelPlane, *fStd, *fCorrelWeight;
21
+ float *fPeakLoc;
22
+ float fMeanA, fMeanB, fEnergyA, fEnergyB, fEnergyNorm;
23
+ int nPxPerWindow, n;
24
+ unsigned uError;
25
+ sPlan sCCPlan;
26
+ /* Removed peak_finder_lock - LM solver is thread-safe without locks */
27
+
28
+ /* calculate correlation plane weighting matrix
29
+ * according to Raffel et al., the weight factors can be obtained
30
+ * by convolving the image weighting function with itself
31
+ */
32
+ nPxPerWindow = nWindowSize[0] * nWindowSize[1];
33
+ fCorrelWeight = (float*)malloc(nPxPerWindow * sizeof(float));
34
+ if (!fCorrelWeight) { return ERROR_NOMEM; }
35
+ uError = convolve(fWindowWeightB, fWindowWeightB, fCorrelWeight, nWindowSize);
36
+ if (uError)
37
+ {
38
+ free(fCorrelWeight);
39
+ return uError;
40
+ }
41
+
42
+ for(n = 0; n < nPxPerWindow; ++n)
43
+ fCorrelWeight[n] = nPxPerWindow / fCorrelWeight[n];
44
+
45
+
46
+ nWindowsTotal = nWindows[0] * nWindows[1];
47
+
48
+ /* Initialize FFTW threading and wisdom cache */
49
+ fftwf_init_threads();
50
+ fftwf_plan_with_nthreads(1);
51
+
52
+ /* Load FFTW wisdom for optimized plans */
53
+ char wisdom_path[512];
54
+ xcorr_cache_get_default_wisdom_path(wisdom_path, sizeof(wisdom_path));
55
+ xcorr_cache_init(wisdom_path);
56
+
57
+ /* fork here, parallelise */
58
+
59
+ /* fork here, parallelise */
60
+ uError = ERROR_NONE;
61
+ // printf(" Max threads: %d\n", omp_get_max_threads());
62
+
63
+ #pragma omp parallel \
64
+ private(i, j, n, ii, jj, x, y, \
65
+ xmin, ymin, \
66
+ iWindowIdx, \
67
+ fWindowA, fWindowB, fCorrelPlane, fStd, fPeakLoc, \
68
+ fMeanA, fMeanB, fEnergyA, fEnergyB, fEnergyNorm, \
69
+ sCCPlan) \
70
+ shared(fImageA, fImageB, fMask, nImageSize, \
71
+ fWinCtrsX, fWinCtrsY, nWindows, bEnsemble, \
72
+ fCorrelWeight, fWindowWeightA, fWindowWeightB, nWindowSize, nPeaks, iPeakFinder, \
73
+ fPkLocX, fPkLocY, fPkHeight, fSx, fSy, fSxy, \
74
+ nWindowsTotal, nPxPerWindow, fCorrelPlane_Out) \
75
+ default(none) \
76
+ reduction(|:uError) \
77
+ num_threads(omp_get_max_threads())
78
+ {
79
+ /* Allocate memory for correlation windows
80
+ * Use aligned allocation for better cache performance
81
+ */
82
+ uError = ERROR_NONE;
83
+ fCorrelPlane = (float*)fftwf_malloc(nPxPerWindow * sizeof(float));
84
+ fWindowA = (float*)fftwf_malloc(nPxPerWindow * sizeof(float));
85
+ fWindowB = (float*)fftwf_malloc(nPxPerWindow * sizeof(float));
86
+ fStd = (float*)malloc(3 * nPeaks * sizeof(float));
87
+ fPeakLoc = (float*)malloc(3 * nPeaks * sizeof(float));
88
+
89
+
90
+ if(!fWindowA || !fWindowB || !fCorrelPlane || !fPeakLoc || !fStd)
91
+ {
92
+ uError = ERROR_NOMEM;
93
+ goto thread_cleanup;
94
+ }
95
+
96
+ /* create cross-correlation plan for this thread */
97
+ memset(&sCCPlan, 0, sizeof(sCCPlan));
98
+ #pragma omp critical
99
+ {
100
+ fftwf_plan_with_nthreads(1);
101
+ uError = xcorr_create_plan(nWindowSize, &sCCPlan);
102
+ }
103
+ if(uError)
104
+ goto thread_cleanup;
105
+
106
+ /* condense to one loop to make parallelisation easier */
107
+ #pragma omp for schedule(static, CHUNKSIZE) nowait
108
+ for(iWindowIdx = 0; iWindowIdx < nWindowsTotal; ++iWindowIdx)
109
+ {
110
+ /* Coordinate system (matching MATLAB and Python):
111
+ * - nWindows[0] = number of windows in Y (height) direction
112
+ * - nWindows[1] = number of windows in X (width) direction
113
+ * - fWinCtrsY[jj] = Y-coordinate of window center (row index)
114
+ * - fWinCtrsX[ii] = X-coordinate of window center (column index)
115
+ * - Row-major linearization: index = jj * nWindows[1] + ii
116
+ */
117
+
118
+ /* get index in window center arrays
119
+ * For row-major: linearIdx = row*nCols + col
120
+ * Here: iWindowIdx = jj*nWindows[1] + ii
121
+ */
122
+ ii = iWindowIdx % nWindows[1]; // Column index (X)
123
+ jj = iWindowIdx / nWindows[1]; // Row index (Y)
124
+
125
+ /* Mask uses same row-major indexing */
126
+ int mask_idx = jj * nWindows[1] + ii;
127
+
128
+ if (mask_idx < 0 || mask_idx >= nWindows[0] * nWindows[1])
129
+ {
130
+ uError = ERROR_OUT_OF_BOUNDS;
131
+ //goto thread_cleanup;
132
+ }
133
+ // Check if the mask value at this index is 1
134
+ if (fMask[mask_idx] == 1)
135
+ {
136
+ continue; // Skip this window if the mask value is 1
137
+ }
138
+
139
+ /* Extract correlation window from images
140
+ * Window center coordinates (0-based array indices):
141
+ * - fWinCtrsX[ii] is the X-coordinate (column) of the window center
142
+ * For a 128-pixel window at the left edge: center = 63.5
143
+ * - fWinCtrsY[jj] is the Y-coordinate (row) of the window center
144
+ * Window size:
145
+ * - nWindowSize[0] = window height (number of rows)
146
+ * - nWindowSize[1] = window width (number of columns)
147
+ * Image dimensions:
148
+ * - nImageSize[0] = image height (number of rows)
149
+ * - nImageSize[1] = image width (number of columns)
150
+ *
151
+ * Window extraction:
152
+ * - For a window of size N centered at position C:
153
+ * window covers pixels from floor(C - (N-1)/2 + 0.5) to floor(C + (N-1)/2 + 0.5)
154
+ * - Example: N=128, C=63.5 -> floor(63.5 - 63.5 + 0.5) = 0 to floor(63.5 + 63.5 + 0.5) = 127
155
+ */
156
+
157
+ /* Calculate top-left corner of window in image coordinates */
158
+ int row_min = (int)floor(fWinCtrsY[jj] - ((float)nWindowSize[0]-1.0)/2.0 + 0.5);
159
+ int col_min = (int)floor(fWinCtrsX[ii] - ((float)nWindowSize[1]-1.0)/2.0 + 0.5);
160
+
161
+ /* Bounds check to prevent segfault */
162
+ if (row_min < 0 || col_min < 0 ||
163
+ row_min + nWindowSize[0] > nImageSize[0] ||
164
+ col_min + nWindowSize[1] > nImageSize[1]) {
165
+ uError = ERROR_OUT_OF_BOUNDS;
166
+ continue; /* Skip this window */
167
+ }
168
+
169
+ /* Extract window pixels: iterate over window rows and columns */
170
+ for(int row_win = 0; row_win < nWindowSize[0]; ++row_win)
171
+ {
172
+ int row_img = row_min + row_win;
173
+ for(int col_win = 0; col_win < nWindowSize[1]; ++col_win)
174
+ {
175
+ int col_img = col_min + col_win;
176
+ /* Row-major indexing: array[row, col] -> row*width + col */
177
+ fWindowA[SUB2IND_2D(row_win, col_win, nWindowSize[1])] =
178
+ fImageA[SUB2IND_2D(row_img, col_img, nImageSize[1])];
179
+ fWindowB[SUB2IND_2D(row_win, col_win, nWindowSize[1])] =
180
+ fImageB[SUB2IND_2D(row_img, col_img, nImageSize[1])];
181
+ }
182
+ }
183
+ /* Pre-multiply by weighting window and compute mean
184
+ * Using SIMD hints for vectorization
185
+ */
186
+ fMeanA = 0;
187
+ fMeanB = 0;
188
+ #pragma omp simd reduction(+:fMeanA,fMeanB)
189
+ for(n = 0; n < nPxPerWindow; ++n)
190
+ {
191
+ fWindowA[n] *= fWindowWeightA[n];
192
+ fWindowB[n] *= fWindowWeightB[n];
193
+ fMeanA += fWindowA[n];
194
+ fMeanB += fWindowB[n];
195
+ }
196
+ fMeanA = fMeanA / (float)nPxPerWindow;
197
+ fMeanB = fMeanB / (float)nPxPerWindow;
198
+
199
+ /* Subtract mean and calculate signal energy for peak normalisation
200
+ * Using SIMD hints for vectorization
201
+ */
202
+ fEnergyA = 0;
203
+ fEnergyB = 0;
204
+ if (!bEnsemble) {
205
+ #pragma omp simd reduction(+:fEnergyA,fEnergyB)
206
+ for(n = 0; n < nPxPerWindow; ++n)
207
+ {
208
+ fWindowA[n] -= fMeanA;
209
+ fWindowB[n] -= fMeanB;
210
+ fEnergyA += fWindowA[n]*fWindowA[n];
211
+ fEnergyB += fWindowB[n]*fWindowB[n];
212
+ }
213
+ } else {
214
+ #pragma omp simd reduction(+:fEnergyA,fEnergyB)
215
+ for(n = 0; n < nPxPerWindow; ++n)
216
+ {
217
+ fEnergyA += fWindowA[n]*fWindowA[n];
218
+ fEnergyB += fWindowB[n]*fWindowB[n];
219
+ }
220
+ }
221
+ fEnergyNorm = 1 / (float)sqrt(fEnergyA * fEnergyB);
222
+
223
+ /* Cross-correlate */
224
+ xcorr_preplanned(fWindowB, fWindowA, fCorrelPlane, &sCCPlan);
225
+
226
+ /* Apply correlation plane weighting with SIMD vectorization */
227
+ if (!bEnsemble) {
228
+ #pragma omp simd
229
+ for (n = 0; n < nPxPerWindow; ++n)
230
+ {
231
+ fCorrelPlane[n] *= fCorrelWeight[n];
232
+ }
233
+ }
234
+
235
+
236
+
237
+
238
+
239
+ memcpy(&fCorrelPlane_Out[nPxPerWindow * iWindowIdx], fCorrelPlane, nPxPerWindow * sizeof(float));
240
+
241
+
242
+
243
+ /* Call peak finder - LM solver is fully thread-safe, no locks needed */
244
+ if (!bEnsemble) {
245
+ lsqpeaklocate_lm(fCorrelPlane, nWindowSize, fPeakLoc, nPeaks, iPeakFinder, fStd);
246
+
247
+
248
+ /* Save displacement and peak height
249
+ * Output arrays have shape [nPeaks, nWindows[0], nWindows[1]]
250
+ * where nWindows[0] = Y-windows, nWindows[1] = X-windows
251
+ * Peak locations from lsqpeaklocate_lm:
252
+ * - fPeakLoc[0, n, 3] = row offset (Y) from window center
253
+ * - fPeakLoc[1, n, 3] = column offset (X) from window center
254
+ * - fPeakLoc[2, n, 3] = peak magnitude
255
+ */
256
+ for(n = 0; n < nPeaks; ++n)
257
+ {
258
+ /* Calculate linear index for this peak and window
259
+ * For 3D array [nPeaks, nRows, nCols] in row-major:
260
+ * index = peak*nRows*nCols + row*nCols + col
261
+ */
262
+ int out_idx = n * nWindows[0] * nWindows[1] + jj * nWindows[1] + ii;
263
+
264
+ /* Peak location in correlation plane (centered at window size/2)
265
+ * Subtract window center to get displacement from window center
266
+ * fPeakLoc dimensions: [3, nPeaks] in row-major
267
+ */
268
+ float peak_row = fPeakLoc[SUB2IND_2D(0, n, nPeaks)]; // Y-displacement
269
+ float peak_col = fPeakLoc[SUB2IND_2D(1, n, nPeaks)]; // X-displacement
270
+ float peak_mag = fPeakLoc[SUB2IND_2D(2, n, nPeaks)];
271
+
272
+ /* Store displacements (subtract window center to get offset) */
273
+ fPkLocX[out_idx] = peak_col - nWindowSize[1]/2.0f; // X is column
274
+ fPkLocY[out_idx] = peak_row - nWindowSize[0]/2.0f; // Y is row
275
+
276
+ /* Store standard deviations */
277
+ fSx[out_idx] = fStd[SUB2IND_2D(0, n, nPeaks)];
278
+ fSy[out_idx] = fStd[SUB2IND_2D(1, n, nPeaks)];
279
+ fSxy[out_idx] = fStd[SUB2IND_2D(2, n, nPeaks)];
280
+
281
+ /* Normalize peak height by window weight and energy content */
282
+ int pk_row = MIN(MAX(0, (int)peak_row), nWindowSize[0]-1);
283
+ int pk_col = MIN(MAX(0, (int)peak_col), nWindowSize[1]-1);
284
+ fPkHeight[out_idx] = peak_mag * fEnergyNorm /
285
+ fCorrelWeight[SUB2IND_2D(pk_row, pk_col, nWindowSize[1])];
286
+ }
287
+ }
288
+ }
289
+
290
+ /* Cleanup memory and other resources before leaving the thread */
291
+ thread_cleanup:
292
+ #pragma omp critical
293
+ {
294
+ xcorr_destroy_plan(&sCCPlan);
295
+ }
296
+ if(fWindowA) fftwf_free(fWindowA);
297
+ if(fStd) free(fStd);
298
+ if(fWindowB) fftwf_free(fWindowB);
299
+ if(fCorrelPlane) fftwf_free(fCorrelPlane);
300
+ if(fPeakLoc) free(fPeakLoc);
301
+
302
+ } /* end parallelised section */
303
+
304
+ /* Save wisdom for future runs */
305
+ xcorr_cache_save_wisdom(wisdom_path);
306
+
307
+ return uError;
308
+ }
309
+
310
+ /* fminvec, find minimum element in vector */
311
+ float fminvec(const float *fVec, int n)
312
+ {
313
+ int i;
314
+ float ret;
315
+
316
+ ret = fVec[0];
317
+ for(i = 1; i < n; ++i)
318
+ ret = MIN(ret, fVec[i]);
319
+
320
+ return ret;
321
+ }
322
+
323
+ /* fmaxvec, find maximum element in vector */
324
+ float fmaxvec(const float *fVec, int n)
325
+ {
326
+ int i;
327
+ float ret;
328
+
329
+ ret = fVec[0];
330
+ for(i = 1; i < n; ++i)
331
+ ret = MAX(ret, fVec[i]);
332
+
333
+ return ret;
334
+ }
@@ -0,0 +1,22 @@
1
+ #ifndef PIV_2D_XCORR_H
2
+ #define PIV_2D_XCORR_H
3
+ #include <stdbool.h>
4
+
5
+ #ifdef _WIN32
6
+ #define EXPORT __declspec(dllexport)
7
+ #else
8
+ #define EXPORT
9
+ #endif
10
+
11
+ /**** function declarations ****/
12
+
13
+ EXPORT unsigned char bulkxcorr2d(const float *fImageA, const float *fImageB,const float *fMask, const int *nImageSize,
14
+ const float *fWinCtrsX, const float *fWinCtrsY, const int *nWindows, float * fWindowWeightA, bool bEnsemble,
15
+ const float *fWindowWeightB, const int *nWindowSize, int nPeaks, int iPeakFinder,
16
+ float *fPkLocX, float *fPkLocY, float *fPkHeight, float *fSx, float *fSy, float *fSxy, float *fCorrelPlane_Out);
17
+
18
+
19
+ EXPORT float fminvec(const float *fVec, int n);
20
+ EXPORT float fmaxvec(const float *fVec, int n);
21
+
22
+ #endif
@@ -0,0 +1,36 @@
1
+ #ifndef COMMON_H
2
+ #define COMMON_H
3
+
4
+ /**** useful macros ****/
5
+ /* Row-major (C-contiguous) indexing for 2D arrays
6
+ * For array[i,j] with shape [M, N]:
7
+ * - i is the row index (0 to M-1), corresponds to Y/height
8
+ * - j is the column index (0 to N-1), corresponds to X/width
9
+ * - Linear index = i*N + j
10
+ */
11
+ #define SUB2IND_2D(i, j, N) ((i)*(N) + (j))
12
+ #define SUB2IND_3D(i, j, k, M, N) ((i)*(M)*(N) + (j)*(N) + (k))
13
+
14
+ #define MIN(A,B) ((A)<(B)?(A):(B))
15
+ #define MAX(A,B) ((A)>(B)?(A):(B))
16
+
17
+ /**** defines ****/
18
+ #define PI 3.14159265f
19
+ #define SQRT_PI 1.77245385f
20
+ #define TRUE 1
21
+ #define FALSE 0
22
+ #ifndef NAN
23
+ #define NAN (0.0f/0.0f)
24
+ #endif
25
+
26
+ #define CHUNKSIZE 256
27
+ #define NUMTHREADS_MAX omp_get_max_threads()
28
+
29
+ #define ERROR_NONE 0
30
+ #define ERROR_NOMEM 1
31
+ #define ERROR_NOPLAN_FWD 2
32
+ #define ERROR_NOPLAN_BWD 4
33
+ #define ERROR_NOPLAN 8
34
+ #define ERROR_OUT_OF_BOUNDS 9
35
+
36
+ #endif
@@ -0,0 +1,146 @@
1
+ #include "interp2custom.h"
2
+
3
+ /**** global variables ****/
4
+ int g_iLUTIn_interptialised = 0;
5
+ int g_iKernelHalfSize = 5;
6
+ int g_iKernelType = 0;
7
+ float g_fGaussKernelStd = 0.65f;
8
+ float *g_fLUT = NULL;
9
+
10
+ /**** sinc helper ****/
11
+ float sinc(float x) {
12
+ return x == 0.0f ? 1.0f : sinf(PI*x)/(PI*x);
13
+ }
14
+
15
+ /**** 1D interpolation ****/
16
+ float interp1custom(float *y, float *fFilterCoefficients) {
17
+ int m;
18
+ float yi = 0;
19
+ #pragma omp simd
20
+ for(m = 0; m < KERNEL_SIZE; ++m) {
21
+ yi += y[m] * fFilterCoefficients[m];
22
+ }
23
+ return yi;
24
+ }
25
+
26
+ /**** vectorized 1D interpolation ****/
27
+ void interp1custom_vec(float *y, float *yi, int N, float *fFilterCoefficients) {
28
+ int m, n;
29
+ memset(yi, 0, sizeof(float) * N);
30
+ for(m = 0; m < KERNEL_SIZE; ++m) {
31
+ #pragma omp simd
32
+ for(n = 0; n < N; ++n) {
33
+ yi[n] += y[m*N + n] * fFilterCoefficients[m];
34
+ }
35
+ }
36
+ }
37
+
38
+ /**** generate LUT ****/
39
+ void interp1custom_generatelut(int iKernelType, int iKernelSize, float fOptions) {
40
+ int m, n;
41
+ float delta, A;
42
+ int iKernelHalfSize = iKernelSize / 2;
43
+ int bRegenerate = !g_iLUTIn_interptialised || iKernelType != g_iKernelType || iKernelHalfSize != g_iKernelHalfSize;
44
+ if(iKernelType == KERNEL_GAUSSIAN)
45
+ bRegenerate = bRegenerate || fOptions != g_fGaussKernelStd;
46
+
47
+ if(!bRegenerate) return;
48
+
49
+ if(g_iLUTIn_interptialised && g_fLUT) free(g_fLUT);
50
+ g_fLUT = (float*)malloc(NLUT * iKernelSize * sizeof(float));
51
+
52
+ switch(iKernelType) {
53
+ case KERNEL_LANCZOS:
54
+ for(n = 1; n < NLUT; ++n) {
55
+ delta = ((float)n)/((float)NLUT);
56
+ for(m = -iKernelHalfSize+1; m <= iKernelHalfSize; ++m) {
57
+ g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1] = sinc(delta - m) * sinc((delta - m)/(float)iKernelHalfSize);
58
+ }
59
+ }
60
+ memset(g_fLUT, 0, sizeof(float)*iKernelSize);
61
+ g_fLUT[iKernelHalfSize-1] = 1;
62
+ break;
63
+ case KERNEL_GAUSSIAN:
64
+ for(n = 0; n < NLUT; ++n) {
65
+ delta = ((float)n)/((float)NLUT);
66
+ A = 0;
67
+ for(m = -iKernelHalfSize+1; m <= iKernelHalfSize; ++m) {
68
+ g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1] = expf(-powf((m-delta)/fOptions, 2));
69
+ A += g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1];
70
+ }
71
+ A = 1/A;
72
+ for(m = -iKernelHalfSize+1; m <= iKernelHalfSize; ++m)
73
+ g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1] *= A;
74
+ }
75
+ break;
76
+ }
77
+ g_iKernelType = iKernelType;
78
+ g_fGaussKernelStd = fOptions;
79
+ g_iKernelHalfSize = iKernelHalfSize;
80
+ g_iLUTIn_interptialised = 1;
81
+ }
82
+
83
+ /**** destroy LUT ****/
84
+ void interp1custom_destroylut(void) {
85
+ if(g_iLUTIn_interptialised && g_fLUT) {
86
+ free(g_fLUT);
87
+ g_fLUT = NULL;
88
+ }
89
+ }
90
+
91
+ /**** 2D interpolation ****/
92
+ void interp2custom(const float *y, size_t *N, const float *f_i, const float *f_j, float *yi, int n_interp) {
93
+ int n, i, j;
94
+ float delta[2];
95
+ int m[2];
96
+ float *fFilterCoefficients[2];
97
+ float *yi_stage0, *yi_stage1;
98
+ int i_min, i_max, j_min, j_max;
99
+
100
+ #pragma omp parallel default(none) \
101
+ private(n,i,j,i_min,i_max,j_min,j_max,delta,m,fFilterCoefficients,yi_stage0,yi_stage1) \
102
+ shared(y,N,f_i,f_j,yi,n_interp,g_fLUT,g_iKernelHalfSize) \
103
+ num_threads(NUMTHREADS_MAX)
104
+ {
105
+ yi_stage0 = (float*)malloc(KERNEL_SIZE*KERNEL_SIZE*sizeof(float));
106
+ yi_stage1 = (float*)malloc(KERNEL_SIZE*sizeof(float));
107
+
108
+ #pragma omp for schedule(static, CHUNKSIZE)
109
+ for(n = 0; n < n_interp; ++n) {
110
+ if(f_i[n] < 0 || f_i[n] > N[0]-1 || f_j[n] < 0 || f_j[n] > N[1]-1) {
111
+ yi[n] = 0;
112
+ continue;
113
+ }
114
+
115
+ m[0] = (int)f_i[n];
116
+ m[1] = (int)f_j[n];
117
+ delta[0] = f_i[n] - m[0];
118
+ delta[1] = f_j[n] - m[1];
119
+
120
+ for(i = 0; i < 2; ++i) {
121
+ j = (int)((float)NLUT*delta[i]);
122
+ j = MIN(MAX(j,0),NLUT-1);
123
+ fFilterCoefficients[i] = &g_fLUT[j * KERNEL_SIZE];
124
+ }
125
+
126
+ i_min = MAX(0, m[0]-KERNEL_HALF_SIZE+1);
127
+ j_min = MAX(0, m[1]-KERNEL_HALF_SIZE+1);
128
+ i_max = MIN(N[0]-1, m[0]+KERNEL_HALF_SIZE);
129
+ j_max = MIN(N[1]-1, m[1]+KERNEL_HALF_SIZE);
130
+
131
+ memset(yi_stage0, 0, KERNEL_SIZE*KERNEL_SIZE*sizeof(float));
132
+
133
+ for(j = j_min; j <= j_max; ++j) {
134
+ memcpy(&yi_stage0[SUB2IND_2D(i_min-m[0]+KERNEL_HALF_SIZE-1, j-m[1]+KERNEL_HALF_SIZE-1, KERNEL_SIZE)],
135
+ &y[SUB2IND_2D(i_min,j,N[0])],
136
+ (i_max-i_min+1)*sizeof(float));
137
+ }
138
+
139
+ interp1custom_vec(yi_stage0, yi_stage1, KERNEL_SIZE, fFilterCoefficients[1]);
140
+ yi[n] = interp1custom(yi_stage1, fFilterCoefficients[0]);
141
+ }
142
+
143
+ free(yi_stage0);
144
+ free(yi_stage1);
145
+ }
146
+ }