pivtools 0.1.3__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pivtools-0.1.3.dist-info/METADATA +222 -0
- pivtools-0.1.3.dist-info/RECORD +127 -0
- pivtools-0.1.3.dist-info/WHEEL +5 -0
- pivtools-0.1.3.dist-info/entry_points.txt +3 -0
- pivtools-0.1.3.dist-info/top_level.txt +3 -0
- pivtools_cli/__init__.py +5 -0
- pivtools_cli/_build_marker.c +25 -0
- pivtools_cli/_build_marker.cp311-win_amd64.pyd +0 -0
- pivtools_cli/cli.py +225 -0
- pivtools_cli/example.py +139 -0
- pivtools_cli/lib/PIV_2d_cross_correlate.c +334 -0
- pivtools_cli/lib/PIV_2d_cross_correlate.h +22 -0
- pivtools_cli/lib/common.h +36 -0
- pivtools_cli/lib/interp2custom.c +146 -0
- pivtools_cli/lib/interp2custom.h +48 -0
- pivtools_cli/lib/peak_locate_gsl.c +711 -0
- pivtools_cli/lib/peak_locate_gsl.h +40 -0
- pivtools_cli/lib/peak_locate_gsl_print.c +736 -0
- pivtools_cli/lib/peak_locate_lm.c +751 -0
- pivtools_cli/lib/peak_locate_lm.h +27 -0
- pivtools_cli/lib/xcorr.c +342 -0
- pivtools_cli/lib/xcorr.h +31 -0
- pivtools_cli/lib/xcorr_cache.c +78 -0
- pivtools_cli/lib/xcorr_cache.h +26 -0
- pivtools_cli/piv/interp2custom/interp2custom.py +69 -0
- pivtools_cli/piv/piv.py +240 -0
- pivtools_cli/piv/piv_backend/base.py +825 -0
- pivtools_cli/piv/piv_backend/cpu_instantaneous.py +1005 -0
- pivtools_cli/piv/piv_backend/factory.py +28 -0
- pivtools_cli/piv/piv_backend/gpu_instantaneous.py +15 -0
- pivtools_cli/piv/piv_backend/infilling.py +445 -0
- pivtools_cli/piv/piv_backend/outlier_detection.py +306 -0
- pivtools_cli/piv/piv_backend/profile_cpu_instantaneous.py +230 -0
- pivtools_cli/piv/piv_result.py +40 -0
- pivtools_cli/piv/save_results.py +342 -0
- pivtools_cli/piv_cluster/cluster.py +108 -0
- pivtools_cli/preprocessing/filters.py +399 -0
- pivtools_cli/preprocessing/preprocess.py +79 -0
- pivtools_cli/tests/helpers.py +107 -0
- pivtools_cli/tests/instantaneous_piv/test_piv_integration.py +167 -0
- pivtools_cli/tests/instantaneous_piv/test_piv_integration_multi.py +553 -0
- pivtools_cli/tests/preprocessing/test_filters.py +41 -0
- pivtools_core/__init__.py +5 -0
- pivtools_core/config.py +703 -0
- pivtools_core/config.yaml +135 -0
- pivtools_core/image_handling/__init__.py +0 -0
- pivtools_core/image_handling/load_images.py +464 -0
- pivtools_core/image_handling/readers/__init__.py +53 -0
- pivtools_core/image_handling/readers/generic_readers.py +50 -0
- pivtools_core/image_handling/readers/lavision_reader.py +190 -0
- pivtools_core/image_handling/readers/registry.py +24 -0
- pivtools_core/paths.py +49 -0
- pivtools_core/vector_loading.py +248 -0
- pivtools_gui/__init__.py +3 -0
- pivtools_gui/app.py +687 -0
- pivtools_gui/calibration/__init__.py +0 -0
- pivtools_gui/calibration/app/__init__.py +0 -0
- pivtools_gui/calibration/app/views.py +1186 -0
- pivtools_gui/calibration/calibration_planar/planar_calibration_production.py +570 -0
- pivtools_gui/calibration/vector_calibration_production.py +544 -0
- pivtools_gui/config.py +703 -0
- pivtools_gui/image_handling/__init__.py +0 -0
- pivtools_gui/image_handling/load_images.py +464 -0
- pivtools_gui/image_handling/readers/__init__.py +53 -0
- pivtools_gui/image_handling/readers/generic_readers.py +50 -0
- pivtools_gui/image_handling/readers/lavision_reader.py +190 -0
- pivtools_gui/image_handling/readers/registry.py +24 -0
- pivtools_gui/masking/__init__.py +0 -0
- pivtools_gui/masking/app/__init__.py +0 -0
- pivtools_gui/masking/app/views.py +123 -0
- pivtools_gui/paths.py +49 -0
- pivtools_gui/piv_runner.py +261 -0
- pivtools_gui/pivtools.py +58 -0
- pivtools_gui/plotting/__init__.py +0 -0
- pivtools_gui/plotting/app/__init__.py +0 -0
- pivtools_gui/plotting/app/views.py +1671 -0
- pivtools_gui/plotting/plot_maker.py +220 -0
- pivtools_gui/post_processing/POD/__init__.py +0 -0
- pivtools_gui/post_processing/POD/app/__init__.py +0 -0
- pivtools_gui/post_processing/POD/app/views.py +647 -0
- pivtools_gui/post_processing/POD/pod_decompose.py +979 -0
- pivtools_gui/post_processing/POD/views.py +1096 -0
- pivtools_gui/post_processing/__init__.py +0 -0
- pivtools_gui/static/404.html +1 -0
- pivtools_gui/static/_next/static/chunks/117-d5793c8e79de5511.js +2 -0
- pivtools_gui/static/_next/static/chunks/484-cfa8b9348ce4f00e.js +1 -0
- pivtools_gui/static/_next/static/chunks/869-320a6b9bdafbb6d3.js +1 -0
- pivtools_gui/static/_next/static/chunks/app/_not-found/page-12f067ceb7415e55.js +1 -0
- pivtools_gui/static/_next/static/chunks/app/layout-b907d5f31ac82e9d.js +1 -0
- pivtools_gui/static/_next/static/chunks/app/page-334cc4e8444cde2f.js +1 -0
- pivtools_gui/static/_next/static/chunks/fd9d1056-ad15f396ddf9b7e5.js +1 -0
- pivtools_gui/static/_next/static/chunks/framework-f66176bb897dc684.js +1 -0
- pivtools_gui/static/_next/static/chunks/main-a1b3ced4d5f6d998.js +1 -0
- pivtools_gui/static/_next/static/chunks/main-app-8a63c6f5e7baee11.js +1 -0
- pivtools_gui/static/_next/static/chunks/pages/_app-72b849fbd24ac258.js +1 -0
- pivtools_gui/static/_next/static/chunks/pages/_error-7ba65e1336b92748.js +1 -0
- pivtools_gui/static/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- pivtools_gui/static/_next/static/chunks/webpack-4a8ca7c99e9bb3d8.js +1 -0
- pivtools_gui/static/_next/static/css/7d3f2337d7ea12a5.css +3 -0
- pivtools_gui/static/_next/static/vQeR20OUdSSKlK4vukC4q/_buildManifest.js +1 -0
- pivtools_gui/static/_next/static/vQeR20OUdSSKlK4vukC4q/_ssgManifest.js +1 -0
- pivtools_gui/static/file.svg +1 -0
- pivtools_gui/static/globe.svg +1 -0
- pivtools_gui/static/grid.svg +8 -0
- pivtools_gui/static/index.html +1 -0
- pivtools_gui/static/index.txt +8 -0
- pivtools_gui/static/next.svg +1 -0
- pivtools_gui/static/vercel.svg +1 -0
- pivtools_gui/static/window.svg +1 -0
- pivtools_gui/stereo_reconstruction/__init__.py +0 -0
- pivtools_gui/stereo_reconstruction/app/__init__.py +0 -0
- pivtools_gui/stereo_reconstruction/app/views.py +1985 -0
- pivtools_gui/stereo_reconstruction/stereo_calibration_production.py +606 -0
- pivtools_gui/stereo_reconstruction/stereo_reconstruction_production.py +544 -0
- pivtools_gui/utils.py +63 -0
- pivtools_gui/vector_loading.py +248 -0
- pivtools_gui/vector_merging/__init__.py +1 -0
- pivtools_gui/vector_merging/app/__init__.py +1 -0
- pivtools_gui/vector_merging/app/views.py +759 -0
- pivtools_gui/vector_statistics/app/__init__.py +1 -0
- pivtools_gui/vector_statistics/app/views.py +710 -0
- pivtools_gui/vector_statistics/ensemble_statistics.py +49 -0
- pivtools_gui/vector_statistics/instantaneous_statistics.py +311 -0
- pivtools_gui/video_maker/__init__.py +0 -0
- pivtools_gui/video_maker/app/__init__.py +0 -0
- pivtools_gui/video_maker/app/views.py +436 -0
- pivtools_gui/video_maker/video_maker.py +662 -0
pivtools_cli/example.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import tracemalloc
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Add src to path for unified imports
|
|
9
|
+
from pivtools_core.config import Config
|
|
10
|
+
from pivtools_core.image_handling.load_images import load_images, load_mask_for_camera
|
|
11
|
+
from pivtools_core.image_handling.load_images import compute_vector_mask
|
|
12
|
+
|
|
13
|
+
from pivtools_cli.piv.piv import perform_piv_and_save
|
|
14
|
+
from pivtools_cli.piv.save_results import (
|
|
15
|
+
save_coordinates_from_config_distributed,
|
|
16
|
+
get_output_path,
|
|
17
|
+
)
|
|
18
|
+
from pivtools_cli.piv_cluster.cluster import start_cluster
|
|
19
|
+
from pivtools_cli.preprocessing.preprocess import preprocess_images
|
|
20
|
+
|
|
21
|
+
if __name__ == "__main__":
|
|
22
|
+
|
|
23
|
+
start_time = time.time() # Start timer
|
|
24
|
+
|
|
25
|
+
config = Config()
|
|
26
|
+
os.environ["OMP_NUM_THREADS"] = config.omp_threads
|
|
27
|
+
os.environ["MALLOC_TRIM_THRESHOLD_"] = "0"
|
|
28
|
+
if config.debug:
|
|
29
|
+
tracemalloc.start()
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
cluster, client = start_cluster(
|
|
33
|
+
n_workers_per_node=config.dask_workers_per_node,
|
|
34
|
+
threads_per_worker=config.dask_threads_per_worker,
|
|
35
|
+
memory_limit=config.dask_memory_limit,
|
|
36
|
+
config=config,
|
|
37
|
+
)
|
|
38
|
+
logging.info("Dask cluster started successfully")
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
logging.error("Error starting Dask cluster: %s", e)
|
|
42
|
+
exit(1)
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
|
|
46
|
+
info = client.scheduler_info()
|
|
47
|
+
for w, meta in info["workers"].items():
|
|
48
|
+
logging.info("Dask Worker Info:")
|
|
49
|
+
logging.info("Worker %s", w)
|
|
50
|
+
logging.info(" pid: %s", meta.get("pid"))
|
|
51
|
+
logging.info(" host: %s", meta.get("host"))
|
|
52
|
+
logging.info(" local_dir: %s", meta.get("local_directory"))
|
|
53
|
+
logging.info(" nanny: %s", meta.get("nanny"))
|
|
54
|
+
|
|
55
|
+
camera_numbers = config.camera_numbers
|
|
56
|
+
source_path = config.source_paths[0]
|
|
57
|
+
base_path = config.base_paths[0]
|
|
58
|
+
|
|
59
|
+
for camera_num in camera_numbers:
|
|
60
|
+
logging.info("Processing camera: Cam%d", camera_num)
|
|
61
|
+
|
|
62
|
+
# Load images from source path (lazy loading - no memory consumption yet)
|
|
63
|
+
images = load_images(camera_num, config, source=source_path)
|
|
64
|
+
|
|
65
|
+
# Preprocess images (applies filters from config)
|
|
66
|
+
# This intelligently handles batching:
|
|
67
|
+
# - Batch filters (time, pod): rechunks to batch_size
|
|
68
|
+
# - Single-image filters: keeps single-image chunks
|
|
69
|
+
# - No filters: skips preprocessing entirely
|
|
70
|
+
processed_images = preprocess_images(images, config)
|
|
71
|
+
|
|
72
|
+
# Load mask once per camera (if masking is enabled)
|
|
73
|
+
mask = load_mask_for_camera(camera_num, config, source_path_idx=0)
|
|
74
|
+
|
|
75
|
+
# Pre-compute vector masks once per camera (if masking is enabled)
|
|
76
|
+
vector_masks = None
|
|
77
|
+
if config.masking_enabled and mask is not None:
|
|
78
|
+
logging.info("Pre-computing vector masks for Cam%d", camera_num)
|
|
79
|
+
vector_masks = compute_vector_mask(mask, config)
|
|
80
|
+
logging.info("Vector masks computed: %d passes", len(vector_masks))
|
|
81
|
+
|
|
82
|
+
# Get output path for this camera (uncalibrated PIV)
|
|
83
|
+
# Path: base_path/uncalibrated_piv/{num_images}/Cam{camera_num}/instantaneous
|
|
84
|
+
output_path = get_output_path(
|
|
85
|
+
config,
|
|
86
|
+
camera_num,
|
|
87
|
+
use_uncalibrated=True
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Perform PIV and save in parallel on workers with TRUE lazy loading
|
|
91
|
+
# Each worker processes ONE image at a time: load → PIV → save → free
|
|
92
|
+
# Memory per worker: ~280 MB (constant regardless of total images)
|
|
93
|
+
# Dask scheduler handles task distribution automatically
|
|
94
|
+
saved_paths, scattered_cache = perform_piv_and_save(
|
|
95
|
+
processed_images, # Use preprocessed images
|
|
96
|
+
config,
|
|
97
|
+
client,
|
|
98
|
+
output_path,
|
|
99
|
+
start_frame=1,
|
|
100
|
+
runs_to_save=config.instantaneous_runs_0based,
|
|
101
|
+
vector_masks=vector_masks, # Pass pre-computed vector masks
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Submit coordinate saving task (runs once per camera) with shared cache
|
|
105
|
+
coords_future = client.submit(
|
|
106
|
+
save_coordinates_from_config_distributed,
|
|
107
|
+
config,
|
|
108
|
+
output_path,
|
|
109
|
+
scattered_cache, # Use the same scattered cache
|
|
110
|
+
config.instantaneous_runs_0based,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# All PIV processing completed with true lazy loading!
|
|
114
|
+
# Workers processed images one-by-one, keeping memory footprint minimal
|
|
115
|
+
logging.info(
|
|
116
|
+
"PIV and save completed: %d frames saved to %s",
|
|
117
|
+
len(saved_paths), output_path
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Wait for coordinates to be saved
|
|
121
|
+
coords_future.result()
|
|
122
|
+
logging.info("Coordinates saved to %s", output_path)
|
|
123
|
+
|
|
124
|
+
if config.debug:
|
|
125
|
+
current, peak = tracemalloc.get_traced_memory()
|
|
126
|
+
print(f"Current memory usage: {current / 10**6:.2f} MB")
|
|
127
|
+
print(f"Peak memory usage: {peak / 10**6:.2f} MB")
|
|
128
|
+
|
|
129
|
+
tracemalloc.stop()
|
|
130
|
+
except Exception as e:
|
|
131
|
+
import traceback
|
|
132
|
+
print(f"Error: {e}", flush=True)
|
|
133
|
+
print("Traceback:", flush=True)
|
|
134
|
+
traceback.print_exc()
|
|
135
|
+
finally:
|
|
136
|
+
client.close()
|
|
137
|
+
end_time = time.time() # End timer
|
|
138
|
+
elapsed = end_time - start_time
|
|
139
|
+
print(f"Total elapsed time: {elapsed:.2f} seconds", flush=True)
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
#include "PIV_2d_cross_correlate.h"
|
|
2
|
+
#include "common.h"
|
|
3
|
+
#include "xcorr.h"
|
|
4
|
+
#include "xcorr_cache.h" /* FFTW wisdom caching */
|
|
5
|
+
#include "peak_locate_lm.h" /* Fast LM solver instead of GSL */
|
|
6
|
+
#include <omp.h>
|
|
7
|
+
#include <math.h>
|
|
8
|
+
#include <stdlib.h>
|
|
9
|
+
#include <string.h>
|
|
10
|
+
|
|
11
|
+
unsigned char bulkxcorr2d(const float *fImageA, const float *fImageB, const float *fMask, const int *nImageSize,
|
|
12
|
+
const float *fWinCtrsX, const float *fWinCtrsY, const int *nWindows, float *fWindowWeightA, bool bEnsemble,
|
|
13
|
+
const float *fWindowWeightB, const int *nWindowSize, int nPeaks, int iPeakFinder,
|
|
14
|
+
float *fPkLocX, float *fPkLocY, float *fPkHeight, float *fSx, float *fSy, float *fSxy, float *fCorrelPlane_Out)
|
|
15
|
+
{
|
|
16
|
+
int i, j, ii, jj, x, y;
|
|
17
|
+
int xmin, ymin;
|
|
18
|
+
int iWindowIdx, nWindowsTotal;
|
|
19
|
+
float *fWindowA, *fWindowB;
|
|
20
|
+
float *fCorrelPlane, *fStd, *fCorrelWeight;
|
|
21
|
+
float *fPeakLoc;
|
|
22
|
+
float fMeanA, fMeanB, fEnergyA, fEnergyB, fEnergyNorm;
|
|
23
|
+
int nPxPerWindow, n;
|
|
24
|
+
unsigned uError;
|
|
25
|
+
sPlan sCCPlan;
|
|
26
|
+
/* Removed peak_finder_lock - LM solver is thread-safe without locks */
|
|
27
|
+
|
|
28
|
+
/* calculate correlation plane weighting matrix
|
|
29
|
+
* according to Raffel et al., the weight factors can be obtained
|
|
30
|
+
* by convolving the image weighting function with itself
|
|
31
|
+
*/
|
|
32
|
+
nPxPerWindow = nWindowSize[0] * nWindowSize[1];
|
|
33
|
+
fCorrelWeight = (float*)malloc(nPxPerWindow * sizeof(float));
|
|
34
|
+
if (!fCorrelWeight) { return ERROR_NOMEM; }
|
|
35
|
+
uError = convolve(fWindowWeightB, fWindowWeightB, fCorrelWeight, nWindowSize);
|
|
36
|
+
if (uError)
|
|
37
|
+
{
|
|
38
|
+
free(fCorrelWeight);
|
|
39
|
+
return uError;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
for(n = 0; n < nPxPerWindow; ++n)
|
|
43
|
+
fCorrelWeight[n] = nPxPerWindow / fCorrelWeight[n];
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
nWindowsTotal = nWindows[0] * nWindows[1];
|
|
47
|
+
|
|
48
|
+
/* Initialize FFTW threading and wisdom cache */
|
|
49
|
+
fftwf_init_threads();
|
|
50
|
+
fftwf_plan_with_nthreads(1);
|
|
51
|
+
|
|
52
|
+
/* Load FFTW wisdom for optimized plans */
|
|
53
|
+
char wisdom_path[512];
|
|
54
|
+
xcorr_cache_get_default_wisdom_path(wisdom_path, sizeof(wisdom_path));
|
|
55
|
+
xcorr_cache_init(wisdom_path);
|
|
56
|
+
|
|
57
|
+
/* fork here, parallelise */
|
|
58
|
+
|
|
59
|
+
/* fork here, parallelise */
|
|
60
|
+
uError = ERROR_NONE;
|
|
61
|
+
// printf(" Max threads: %d\n", omp_get_max_threads());
|
|
62
|
+
|
|
63
|
+
#pragma omp parallel \
|
|
64
|
+
private(i, j, n, ii, jj, x, y, \
|
|
65
|
+
xmin, ymin, \
|
|
66
|
+
iWindowIdx, \
|
|
67
|
+
fWindowA, fWindowB, fCorrelPlane, fStd, fPeakLoc, \
|
|
68
|
+
fMeanA, fMeanB, fEnergyA, fEnergyB, fEnergyNorm, \
|
|
69
|
+
sCCPlan) \
|
|
70
|
+
shared(fImageA, fImageB, fMask, nImageSize, \
|
|
71
|
+
fWinCtrsX, fWinCtrsY, nWindows, bEnsemble, \
|
|
72
|
+
fCorrelWeight, fWindowWeightA, fWindowWeightB, nWindowSize, nPeaks, iPeakFinder, \
|
|
73
|
+
fPkLocX, fPkLocY, fPkHeight, fSx, fSy, fSxy, \
|
|
74
|
+
nWindowsTotal, nPxPerWindow, fCorrelPlane_Out) \
|
|
75
|
+
default(none) \
|
|
76
|
+
reduction(|:uError) \
|
|
77
|
+
num_threads(omp_get_max_threads())
|
|
78
|
+
{
|
|
79
|
+
/* Allocate memory for correlation windows
|
|
80
|
+
* Use aligned allocation for better cache performance
|
|
81
|
+
*/
|
|
82
|
+
uError = ERROR_NONE;
|
|
83
|
+
fCorrelPlane = (float*)fftwf_malloc(nPxPerWindow * sizeof(float));
|
|
84
|
+
fWindowA = (float*)fftwf_malloc(nPxPerWindow * sizeof(float));
|
|
85
|
+
fWindowB = (float*)fftwf_malloc(nPxPerWindow * sizeof(float));
|
|
86
|
+
fStd = (float*)malloc(3 * nPeaks * sizeof(float));
|
|
87
|
+
fPeakLoc = (float*)malloc(3 * nPeaks * sizeof(float));
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
if(!fWindowA || !fWindowB || !fCorrelPlane || !fPeakLoc || !fStd)
|
|
91
|
+
{
|
|
92
|
+
uError = ERROR_NOMEM;
|
|
93
|
+
goto thread_cleanup;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/* create cross-correlation plan for this thread */
|
|
97
|
+
memset(&sCCPlan, 0, sizeof(sCCPlan));
|
|
98
|
+
#pragma omp critical
|
|
99
|
+
{
|
|
100
|
+
fftwf_plan_with_nthreads(1);
|
|
101
|
+
uError = xcorr_create_plan(nWindowSize, &sCCPlan);
|
|
102
|
+
}
|
|
103
|
+
if(uError)
|
|
104
|
+
goto thread_cleanup;
|
|
105
|
+
|
|
106
|
+
/* condense to one loop to make parallelisation easier */
|
|
107
|
+
#pragma omp for schedule(static, CHUNKSIZE) nowait
|
|
108
|
+
for(iWindowIdx = 0; iWindowIdx < nWindowsTotal; ++iWindowIdx)
|
|
109
|
+
{
|
|
110
|
+
/* Coordinate system (matching MATLAB and Python):
|
|
111
|
+
* - nWindows[0] = number of windows in Y (height) direction
|
|
112
|
+
* - nWindows[1] = number of windows in X (width) direction
|
|
113
|
+
* - fWinCtrsY[jj] = Y-coordinate of window center (row index)
|
|
114
|
+
* - fWinCtrsX[ii] = X-coordinate of window center (column index)
|
|
115
|
+
* - Row-major linearization: index = jj * nWindows[1] + ii
|
|
116
|
+
*/
|
|
117
|
+
|
|
118
|
+
/* get index in window center arrays
|
|
119
|
+
* For row-major: linearIdx = row*nCols + col
|
|
120
|
+
* Here: iWindowIdx = jj*nWindows[1] + ii
|
|
121
|
+
*/
|
|
122
|
+
ii = iWindowIdx % nWindows[1]; // Column index (X)
|
|
123
|
+
jj = iWindowIdx / nWindows[1]; // Row index (Y)
|
|
124
|
+
|
|
125
|
+
/* Mask uses same row-major indexing */
|
|
126
|
+
int mask_idx = jj * nWindows[1] + ii;
|
|
127
|
+
|
|
128
|
+
if (mask_idx < 0 || mask_idx >= nWindows[0] * nWindows[1])
|
|
129
|
+
{
|
|
130
|
+
uError = ERROR_OUT_OF_BOUNDS;
|
|
131
|
+
//goto thread_cleanup;
|
|
132
|
+
}
|
|
133
|
+
// Check if the mask value at this index is 1
|
|
134
|
+
if (fMask[mask_idx] == 1)
|
|
135
|
+
{
|
|
136
|
+
continue; // Skip this window if the mask value is 1
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/* Extract correlation window from images
|
|
140
|
+
* Window center coordinates (0-based array indices):
|
|
141
|
+
* - fWinCtrsX[ii] is the X-coordinate (column) of the window center
|
|
142
|
+
* For a 128-pixel window at the left edge: center = 63.5
|
|
143
|
+
* - fWinCtrsY[jj] is the Y-coordinate (row) of the window center
|
|
144
|
+
* Window size:
|
|
145
|
+
* - nWindowSize[0] = window height (number of rows)
|
|
146
|
+
* - nWindowSize[1] = window width (number of columns)
|
|
147
|
+
* Image dimensions:
|
|
148
|
+
* - nImageSize[0] = image height (number of rows)
|
|
149
|
+
* - nImageSize[1] = image width (number of columns)
|
|
150
|
+
*
|
|
151
|
+
* Window extraction:
|
|
152
|
+
* - For a window of size N centered at position C:
|
|
153
|
+
* window covers pixels from floor(C - (N-1)/2 + 0.5) to floor(C + (N-1)/2 + 0.5)
|
|
154
|
+
* - Example: N=128, C=63.5 -> floor(63.5 - 63.5 + 0.5) = 0 to floor(63.5 + 63.5 + 0.5) = 127
|
|
155
|
+
*/
|
|
156
|
+
|
|
157
|
+
/* Calculate top-left corner of window in image coordinates */
|
|
158
|
+
int row_min = (int)floor(fWinCtrsY[jj] - ((float)nWindowSize[0]-1.0)/2.0 + 0.5);
|
|
159
|
+
int col_min = (int)floor(fWinCtrsX[ii] - ((float)nWindowSize[1]-1.0)/2.0 + 0.5);
|
|
160
|
+
|
|
161
|
+
/* Bounds check to prevent segfault */
|
|
162
|
+
if (row_min < 0 || col_min < 0 ||
|
|
163
|
+
row_min + nWindowSize[0] > nImageSize[0] ||
|
|
164
|
+
col_min + nWindowSize[1] > nImageSize[1]) {
|
|
165
|
+
uError = ERROR_OUT_OF_BOUNDS;
|
|
166
|
+
continue; /* Skip this window */
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/* Extract window pixels: iterate over window rows and columns */
|
|
170
|
+
for(int row_win = 0; row_win < nWindowSize[0]; ++row_win)
|
|
171
|
+
{
|
|
172
|
+
int row_img = row_min + row_win;
|
|
173
|
+
for(int col_win = 0; col_win < nWindowSize[1]; ++col_win)
|
|
174
|
+
{
|
|
175
|
+
int col_img = col_min + col_win;
|
|
176
|
+
/* Row-major indexing: array[row, col] -> row*width + col */
|
|
177
|
+
fWindowA[SUB2IND_2D(row_win, col_win, nWindowSize[1])] =
|
|
178
|
+
fImageA[SUB2IND_2D(row_img, col_img, nImageSize[1])];
|
|
179
|
+
fWindowB[SUB2IND_2D(row_win, col_win, nWindowSize[1])] =
|
|
180
|
+
fImageB[SUB2IND_2D(row_img, col_img, nImageSize[1])];
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
/* Pre-multiply by weighting window and compute mean
|
|
184
|
+
* Using SIMD hints for vectorization
|
|
185
|
+
*/
|
|
186
|
+
fMeanA = 0;
|
|
187
|
+
fMeanB = 0;
|
|
188
|
+
#pragma omp simd reduction(+:fMeanA,fMeanB)
|
|
189
|
+
for(n = 0; n < nPxPerWindow; ++n)
|
|
190
|
+
{
|
|
191
|
+
fWindowA[n] *= fWindowWeightA[n];
|
|
192
|
+
fWindowB[n] *= fWindowWeightB[n];
|
|
193
|
+
fMeanA += fWindowA[n];
|
|
194
|
+
fMeanB += fWindowB[n];
|
|
195
|
+
}
|
|
196
|
+
fMeanA = fMeanA / (float)nPxPerWindow;
|
|
197
|
+
fMeanB = fMeanB / (float)nPxPerWindow;
|
|
198
|
+
|
|
199
|
+
/* Subtract mean and calculate signal energy for peak normalisation
|
|
200
|
+
* Using SIMD hints for vectorization
|
|
201
|
+
*/
|
|
202
|
+
fEnergyA = 0;
|
|
203
|
+
fEnergyB = 0;
|
|
204
|
+
if (!bEnsemble) {
|
|
205
|
+
#pragma omp simd reduction(+:fEnergyA,fEnergyB)
|
|
206
|
+
for(n = 0; n < nPxPerWindow; ++n)
|
|
207
|
+
{
|
|
208
|
+
fWindowA[n] -= fMeanA;
|
|
209
|
+
fWindowB[n] -= fMeanB;
|
|
210
|
+
fEnergyA += fWindowA[n]*fWindowA[n];
|
|
211
|
+
fEnergyB += fWindowB[n]*fWindowB[n];
|
|
212
|
+
}
|
|
213
|
+
} else {
|
|
214
|
+
#pragma omp simd reduction(+:fEnergyA,fEnergyB)
|
|
215
|
+
for(n = 0; n < nPxPerWindow; ++n)
|
|
216
|
+
{
|
|
217
|
+
fEnergyA += fWindowA[n]*fWindowA[n];
|
|
218
|
+
fEnergyB += fWindowB[n]*fWindowB[n];
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
fEnergyNorm = 1 / (float)sqrt(fEnergyA * fEnergyB);
|
|
222
|
+
|
|
223
|
+
/* Cross-correlate */
|
|
224
|
+
xcorr_preplanned(fWindowB, fWindowA, fCorrelPlane, &sCCPlan);
|
|
225
|
+
|
|
226
|
+
/* Apply correlation plane weighting with SIMD vectorization */
|
|
227
|
+
if (!bEnsemble) {
|
|
228
|
+
#pragma omp simd
|
|
229
|
+
for (n = 0; n < nPxPerWindow; ++n)
|
|
230
|
+
{
|
|
231
|
+
fCorrelPlane[n] *= fCorrelWeight[n];
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
memcpy(&fCorrelPlane_Out[nPxPerWindow * iWindowIdx], fCorrelPlane, nPxPerWindow * sizeof(float));
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
/* Call peak finder - LM solver is fully thread-safe, no locks needed */
|
|
244
|
+
if (!bEnsemble) {
|
|
245
|
+
lsqpeaklocate_lm(fCorrelPlane, nWindowSize, fPeakLoc, nPeaks, iPeakFinder, fStd);
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
/* Save displacement and peak height
|
|
249
|
+
* Output arrays have shape [nPeaks, nWindows[0], nWindows[1]]
|
|
250
|
+
* where nWindows[0] = Y-windows, nWindows[1] = X-windows
|
|
251
|
+
* Peak locations from lsqpeaklocate_lm:
|
|
252
|
+
* - fPeakLoc[0, n, 3] = row offset (Y) from window center
|
|
253
|
+
* - fPeakLoc[1, n, 3] = column offset (X) from window center
|
|
254
|
+
* - fPeakLoc[2, n, 3] = peak magnitude
|
|
255
|
+
*/
|
|
256
|
+
for(n = 0; n < nPeaks; ++n)
|
|
257
|
+
{
|
|
258
|
+
/* Calculate linear index for this peak and window
|
|
259
|
+
* For 3D array [nPeaks, nRows, nCols] in row-major:
|
|
260
|
+
* index = peak*nRows*nCols + row*nCols + col
|
|
261
|
+
*/
|
|
262
|
+
int out_idx = n * nWindows[0] * nWindows[1] + jj * nWindows[1] + ii;
|
|
263
|
+
|
|
264
|
+
/* Peak location in correlation plane (centered at window size/2)
|
|
265
|
+
* Subtract window center to get displacement from window center
|
|
266
|
+
* fPeakLoc dimensions: [3, nPeaks] in row-major
|
|
267
|
+
*/
|
|
268
|
+
float peak_row = fPeakLoc[SUB2IND_2D(0, n, nPeaks)]; // Y-displacement
|
|
269
|
+
float peak_col = fPeakLoc[SUB2IND_2D(1, n, nPeaks)]; // X-displacement
|
|
270
|
+
float peak_mag = fPeakLoc[SUB2IND_2D(2, n, nPeaks)];
|
|
271
|
+
|
|
272
|
+
/* Store displacements (subtract window center to get offset) */
|
|
273
|
+
fPkLocX[out_idx] = peak_col - nWindowSize[1]/2.0f; // X is column
|
|
274
|
+
fPkLocY[out_idx] = peak_row - nWindowSize[0]/2.0f; // Y is row
|
|
275
|
+
|
|
276
|
+
/* Store standard deviations */
|
|
277
|
+
fSx[out_idx] = fStd[SUB2IND_2D(0, n, nPeaks)];
|
|
278
|
+
fSy[out_idx] = fStd[SUB2IND_2D(1, n, nPeaks)];
|
|
279
|
+
fSxy[out_idx] = fStd[SUB2IND_2D(2, n, nPeaks)];
|
|
280
|
+
|
|
281
|
+
/* Normalize peak height by window weight and energy content */
|
|
282
|
+
int pk_row = MIN(MAX(0, (int)peak_row), nWindowSize[0]-1);
|
|
283
|
+
int pk_col = MIN(MAX(0, (int)peak_col), nWindowSize[1]-1);
|
|
284
|
+
fPkHeight[out_idx] = peak_mag * fEnergyNorm /
|
|
285
|
+
fCorrelWeight[SUB2IND_2D(pk_row, pk_col, nWindowSize[1])];
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/* Cleanup memory and other resources before leaving the thread */
|
|
291
|
+
thread_cleanup:
|
|
292
|
+
#pragma omp critical
|
|
293
|
+
{
|
|
294
|
+
xcorr_destroy_plan(&sCCPlan);
|
|
295
|
+
}
|
|
296
|
+
if(fWindowA) fftwf_free(fWindowA);
|
|
297
|
+
if(fStd) free(fStd);
|
|
298
|
+
if(fWindowB) fftwf_free(fWindowB);
|
|
299
|
+
if(fCorrelPlane) fftwf_free(fCorrelPlane);
|
|
300
|
+
if(fPeakLoc) free(fPeakLoc);
|
|
301
|
+
|
|
302
|
+
} /* end parallelised section */
|
|
303
|
+
|
|
304
|
+
/* Save wisdom for future runs */
|
|
305
|
+
xcorr_cache_save_wisdom(wisdom_path);
|
|
306
|
+
|
|
307
|
+
return uError;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/* fminvec, find minimum element in vector */
|
|
311
|
+
float fminvec(const float *fVec, int n)
|
|
312
|
+
{
|
|
313
|
+
int i;
|
|
314
|
+
float ret;
|
|
315
|
+
|
|
316
|
+
ret = fVec[0];
|
|
317
|
+
for(i = 1; i < n; ++i)
|
|
318
|
+
ret = MIN(ret, fVec[i]);
|
|
319
|
+
|
|
320
|
+
return ret;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/* fmaxvec, find maximum element in vector */
|
|
324
|
+
float fmaxvec(const float *fVec, int n)
|
|
325
|
+
{
|
|
326
|
+
int i;
|
|
327
|
+
float ret;
|
|
328
|
+
|
|
329
|
+
ret = fVec[0];
|
|
330
|
+
for(i = 1; i < n; ++i)
|
|
331
|
+
ret = MAX(ret, fVec[i]);
|
|
332
|
+
|
|
333
|
+
return ret;
|
|
334
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#ifndef PIV_2D_XCORR_H
|
|
2
|
+
#define PIV_2D_XCORR_H
|
|
3
|
+
#include <stdbool.h>
|
|
4
|
+
|
|
5
|
+
#ifdef _WIN32
|
|
6
|
+
#define EXPORT __declspec(dllexport)
|
|
7
|
+
#else
|
|
8
|
+
#define EXPORT
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
/**** function declarations ****/
|
|
12
|
+
|
|
13
|
+
EXPORT unsigned char bulkxcorr2d(const float *fImageA, const float *fImageB,const float *fMask, const int *nImageSize,
|
|
14
|
+
const float *fWinCtrsX, const float *fWinCtrsY, const int *nWindows, float * fWindowWeightA, bool bEnsemble,
|
|
15
|
+
const float *fWindowWeightB, const int *nWindowSize, int nPeaks, int iPeakFinder,
|
|
16
|
+
float *fPkLocX, float *fPkLocY, float *fPkHeight, float *fSx, float *fSy, float *fSxy, float *fCorrelPlane_Out);
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
EXPORT float fminvec(const float *fVec, int n);
|
|
20
|
+
EXPORT float fmaxvec(const float *fVec, int n);
|
|
21
|
+
|
|
22
|
+
#endif
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#ifndef COMMON_H
|
|
2
|
+
#define COMMON_H
|
|
3
|
+
|
|
4
|
+
/**** useful macros ****/
|
|
5
|
+
/* Row-major (C-contiguous) indexing for 2D arrays
|
|
6
|
+
* For array[i,j] with shape [M, N]:
|
|
7
|
+
* - i is the row index (0 to M-1), corresponds to Y/height
|
|
8
|
+
* - j is the column index (0 to N-1), corresponds to X/width
|
|
9
|
+
* - Linear index = i*N + j
|
|
10
|
+
*/
|
|
11
|
+
#define SUB2IND_2D(i, j, N) ((i)*(N) + (j))
|
|
12
|
+
#define SUB2IND_3D(i, j, k, M, N) ((i)*(M)*(N) + (j)*(N) + (k))
|
|
13
|
+
|
|
14
|
+
#define MIN(A,B) ((A)<(B)?(A):(B))
|
|
15
|
+
#define MAX(A,B) ((A)>(B)?(A):(B))
|
|
16
|
+
|
|
17
|
+
/**** defines ****/
|
|
18
|
+
#define PI 3.14159265f
|
|
19
|
+
#define SQRT_PI 1.77245385f
|
|
20
|
+
#define TRUE 1
|
|
21
|
+
#define FALSE 0
|
|
22
|
+
#ifndef NAN
|
|
23
|
+
#define NAN (0.0f/0.0f)
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
#define CHUNKSIZE 256
|
|
27
|
+
#define NUMTHREADS_MAX omp_get_max_threads()
|
|
28
|
+
|
|
29
|
+
#define ERROR_NONE 0
|
|
30
|
+
#define ERROR_NOMEM 1
|
|
31
|
+
#define ERROR_NOPLAN_FWD 2
|
|
32
|
+
#define ERROR_NOPLAN_BWD 4
|
|
33
|
+
#define ERROR_NOPLAN 8
|
|
34
|
+
#define ERROR_OUT_OF_BOUNDS 9
|
|
35
|
+
|
|
36
|
+
#endif
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
#include "interp2custom.h"
|
|
2
|
+
|
|
3
|
+
/**** global variables ****/
|
|
4
|
+
int g_iLUTIn_interptialised = 0;
|
|
5
|
+
int g_iKernelHalfSize = 5;
|
|
6
|
+
int g_iKernelType = 0;
|
|
7
|
+
float g_fGaussKernelStd = 0.65f;
|
|
8
|
+
float *g_fLUT = NULL;
|
|
9
|
+
|
|
10
|
+
/**** sinc helper ****/
|
|
11
|
+
float sinc(float x) {
|
|
12
|
+
return x == 0.0f ? 1.0f : sinf(PI*x)/(PI*x);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**** 1D interpolation ****/
|
|
16
|
+
float interp1custom(float *y, float *fFilterCoefficients) {
|
|
17
|
+
int m;
|
|
18
|
+
float yi = 0;
|
|
19
|
+
#pragma omp simd
|
|
20
|
+
for(m = 0; m < KERNEL_SIZE; ++m) {
|
|
21
|
+
yi += y[m] * fFilterCoefficients[m];
|
|
22
|
+
}
|
|
23
|
+
return yi;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**** vectorized 1D interpolation ****/
|
|
27
|
+
void interp1custom_vec(float *y, float *yi, int N, float *fFilterCoefficients) {
|
|
28
|
+
int m, n;
|
|
29
|
+
memset(yi, 0, sizeof(float) * N);
|
|
30
|
+
for(m = 0; m < KERNEL_SIZE; ++m) {
|
|
31
|
+
#pragma omp simd
|
|
32
|
+
for(n = 0; n < N; ++n) {
|
|
33
|
+
yi[n] += y[m*N + n] * fFilterCoefficients[m];
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**** generate LUT ****/
|
|
39
|
+
void interp1custom_generatelut(int iKernelType, int iKernelSize, float fOptions) {
|
|
40
|
+
int m, n;
|
|
41
|
+
float delta, A;
|
|
42
|
+
int iKernelHalfSize = iKernelSize / 2;
|
|
43
|
+
int bRegenerate = !g_iLUTIn_interptialised || iKernelType != g_iKernelType || iKernelHalfSize != g_iKernelHalfSize;
|
|
44
|
+
if(iKernelType == KERNEL_GAUSSIAN)
|
|
45
|
+
bRegenerate = bRegenerate || fOptions != g_fGaussKernelStd;
|
|
46
|
+
|
|
47
|
+
if(!bRegenerate) return;
|
|
48
|
+
|
|
49
|
+
if(g_iLUTIn_interptialised && g_fLUT) free(g_fLUT);
|
|
50
|
+
g_fLUT = (float*)malloc(NLUT * iKernelSize * sizeof(float));
|
|
51
|
+
|
|
52
|
+
switch(iKernelType) {
|
|
53
|
+
case KERNEL_LANCZOS:
|
|
54
|
+
for(n = 1; n < NLUT; ++n) {
|
|
55
|
+
delta = ((float)n)/((float)NLUT);
|
|
56
|
+
for(m = -iKernelHalfSize+1; m <= iKernelHalfSize; ++m) {
|
|
57
|
+
g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1] = sinc(delta - m) * sinc((delta - m)/(float)iKernelHalfSize);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
memset(g_fLUT, 0, sizeof(float)*iKernelSize);
|
|
61
|
+
g_fLUT[iKernelHalfSize-1] = 1;
|
|
62
|
+
break;
|
|
63
|
+
case KERNEL_GAUSSIAN:
|
|
64
|
+
for(n = 0; n < NLUT; ++n) {
|
|
65
|
+
delta = ((float)n)/((float)NLUT);
|
|
66
|
+
A = 0;
|
|
67
|
+
for(m = -iKernelHalfSize+1; m <= iKernelHalfSize; ++m) {
|
|
68
|
+
g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1] = expf(-powf((m-delta)/fOptions, 2));
|
|
69
|
+
A += g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1];
|
|
70
|
+
}
|
|
71
|
+
A = 1/A;
|
|
72
|
+
for(m = -iKernelHalfSize+1; m <= iKernelHalfSize; ++m)
|
|
73
|
+
g_fLUT[n * iKernelSize + m + iKernelHalfSize - 1] *= A;
|
|
74
|
+
}
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
g_iKernelType = iKernelType;
|
|
78
|
+
g_fGaussKernelStd = fOptions;
|
|
79
|
+
g_iKernelHalfSize = iKernelHalfSize;
|
|
80
|
+
g_iLUTIn_interptialised = 1;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**** destroy LUT ****/
|
|
84
|
+
void interp1custom_destroylut(void) {
|
|
85
|
+
if(g_iLUTIn_interptialised && g_fLUT) {
|
|
86
|
+
free(g_fLUT);
|
|
87
|
+
g_fLUT = NULL;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**** 2D interpolation ****/
|
|
92
|
+
void interp2custom(const float *y, size_t *N, const float *f_i, const float *f_j, float *yi, int n_interp) {
|
|
93
|
+
int n, i, j;
|
|
94
|
+
float delta[2];
|
|
95
|
+
int m[2];
|
|
96
|
+
float *fFilterCoefficients[2];
|
|
97
|
+
float *yi_stage0, *yi_stage1;
|
|
98
|
+
int i_min, i_max, j_min, j_max;
|
|
99
|
+
|
|
100
|
+
#pragma omp parallel default(none) \
|
|
101
|
+
private(n,i,j,i_min,i_max,j_min,j_max,delta,m,fFilterCoefficients,yi_stage0,yi_stage1) \
|
|
102
|
+
shared(y,N,f_i,f_j,yi,n_interp,g_fLUT,g_iKernelHalfSize) \
|
|
103
|
+
num_threads(NUMTHREADS_MAX)
|
|
104
|
+
{
|
|
105
|
+
yi_stage0 = (float*)malloc(KERNEL_SIZE*KERNEL_SIZE*sizeof(float));
|
|
106
|
+
yi_stage1 = (float*)malloc(KERNEL_SIZE*sizeof(float));
|
|
107
|
+
|
|
108
|
+
#pragma omp for schedule(static, CHUNKSIZE)
|
|
109
|
+
for(n = 0; n < n_interp; ++n) {
|
|
110
|
+
if(f_i[n] < 0 || f_i[n] > N[0]-1 || f_j[n] < 0 || f_j[n] > N[1]-1) {
|
|
111
|
+
yi[n] = 0;
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
m[0] = (int)f_i[n];
|
|
116
|
+
m[1] = (int)f_j[n];
|
|
117
|
+
delta[0] = f_i[n] - m[0];
|
|
118
|
+
delta[1] = f_j[n] - m[1];
|
|
119
|
+
|
|
120
|
+
for(i = 0; i < 2; ++i) {
|
|
121
|
+
j = (int)((float)NLUT*delta[i]);
|
|
122
|
+
j = MIN(MAX(j,0),NLUT-1);
|
|
123
|
+
fFilterCoefficients[i] = &g_fLUT[j * KERNEL_SIZE];
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
i_min = MAX(0, m[0]-KERNEL_HALF_SIZE+1);
|
|
127
|
+
j_min = MAX(0, m[1]-KERNEL_HALF_SIZE+1);
|
|
128
|
+
i_max = MIN(N[0]-1, m[0]+KERNEL_HALF_SIZE);
|
|
129
|
+
j_max = MIN(N[1]-1, m[1]+KERNEL_HALF_SIZE);
|
|
130
|
+
|
|
131
|
+
memset(yi_stage0, 0, KERNEL_SIZE*KERNEL_SIZE*sizeof(float));
|
|
132
|
+
|
|
133
|
+
for(j = j_min; j <= j_max; ++j) {
|
|
134
|
+
memcpy(&yi_stage0[SUB2IND_2D(i_min-m[0]+KERNEL_HALF_SIZE-1, j-m[1]+KERNEL_HALF_SIZE-1, KERNEL_SIZE)],
|
|
135
|
+
&y[SUB2IND_2D(i_min,j,N[0])],
|
|
136
|
+
(i_max-i_min+1)*sizeof(float));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
interp1custom_vec(yi_stage0, yi_stage1, KERNEL_SIZE, fFilterCoefficients[1]);
|
|
140
|
+
yi[n] = interp1custom(yi_stage1, fFilterCoefficients[0]);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
free(yi_stage0);
|
|
144
|
+
free(yi_stage1);
|
|
145
|
+
}
|
|
146
|
+
}
|