peclet-flow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- peclet_flow-0.1.0/.github/workflows/ci.yml +66 -0
- peclet_flow-0.1.0/.github/workflows/docs.yml +59 -0
- peclet_flow-0.1.0/.github/workflows/quality.yml +26 -0
- peclet_flow-0.1.0/.github/workflows/release.yml +53 -0
- peclet_flow-0.1.0/.gitignore +77 -0
- peclet_flow-0.1.0/AGENTS.md +41 -0
- peclet_flow-0.1.0/CLAUDE.md +246 -0
- peclet_flow-0.1.0/CMakeLists.txt +67 -0
- peclet_flow-0.1.0/GEMINI.md +2 -0
- peclet_flow-0.1.0/LICENSE +21 -0
- peclet_flow-0.1.0/PKG-INFO +93 -0
- peclet_flow-0.1.0/README.md +79 -0
- peclet_flow-0.1.0/build_project.sh +6 -0
- peclet_flow-0.1.0/cfd_utils/__init__.py +52 -0
- peclet_flow-0.1.0/cfd_utils/vti.py +80 -0
- peclet_flow-0.1.0/cmake/PecletDeps.cmake +91 -0
- peclet_flow-0.1.0/codebase_to_text.py +90 -0
- peclet_flow-0.1.0/doc/Robust_Scaled_IBM_Solver.tex +379 -0
- peclet_flow-0.1.0/doc/colocated_study/staggered_vs_colocated.md +87 -0
- peclet_flow-0.1.0/doc/flow_colocated_plan.md +182 -0
- peclet_flow-0.1.0/doc/flow_multigrid_plan.md +319 -0
- peclet_flow-0.1.0/doc/ibm_overlay.md +84 -0
- peclet_flow-0.1.0/doc/packing_multires_resolution_report.md +104 -0
- peclet_flow-0.1.0/doc/packing_parameter_study_workflow.md +395 -0
- peclet_flow-0.1.0/doc/velocity_mg_plan.md +205 -0
- peclet_flow-0.1.0/docs/Doxyfile +68 -0
- peclet_flow-0.1.0/examples/sdflow_intro.ipynb +205 -0
- peclet_flow-0.1.0/notebooks/generate_notebook.py +362 -0
- peclet_flow-0.1.0/notebooks/generate_poiseuille_notebook.py +300 -0
- peclet_flow-0.1.0/notebooks/verification_incremental_pressure.ipynb +310 -0
- peclet_flow-0.1.0/notebooks/verification_periodic_spheres.ipynb +1011 -0
- peclet_flow-0.1.0/notebooks/verify_poiseuille.ipynb +329 -0
- peclet_flow-0.1.0/packaging/flow_init.py +18 -0
- peclet_flow-0.1.0/packaging/flow_pnm_init.py +7 -0
- peclet_flow-0.1.0/pyproject.toml +62 -0
- peclet_flow-0.1.0/scripts/bench_velocity_mg_256.py +82 -0
- peclet_flow-0.1.0/scripts/bench_velocity_mg_bfs.py +89 -0
- peclet_flow-0.1.0/scripts/compare_amr_sdflow_field.py +85 -0
- peclet_flow-0.1.0/scripts/state_initialization.py +101 -0
- peclet_flow-0.1.0/scripts/study_avg_velocity_spheres.py +131 -0
- peclet_flow-0.1.0/scripts/test_extraction.py +69 -0
- peclet_flow-0.1.0/scripts/test_ibm_exactness.py +88 -0
- peclet_flow-0.1.0/scripts/test_ibm_polynomials.py +171 -0
- peclet_flow-0.1.0/scripts/test_vmg_cavity.py +62 -0
- peclet_flow-0.1.0/scripts/three_solver_study.py +88 -0
- peclet_flow-0.1.0/scripts/tune_periodic_spheres_multigrid.py +230 -0
- peclet_flow-0.1.0/scripts/tune_sdf_packing_multigrid.py +215 -0
- peclet_flow-0.1.0/scripts/validate_zick_homsy_sdflow.py +106 -0
- peclet_flow-0.1.0/scripts/verify_bfs_sdflow.py +135 -0
- peclet_flow-0.1.0/scripts/verify_channel_sdflow.py +98 -0
- peclet_flow-0.1.0/scripts/verify_chebyshev_sdflow.py +70 -0
- peclet_flow-0.1.0/scripts/verify_colocated_bfs.py +93 -0
- peclet_flow-0.1.0/scripts/verify_colocated_channel.py +83 -0
- peclet_flow-0.1.0/scripts/verify_colocated_lid_cavity.py +68 -0
- peclet_flow-0.1.0/scripts/verify_colocated_poiseuille.py +73 -0
- peclet_flow-0.1.0/scripts/verify_colocated_spheres.py +91 -0
- peclet_flow-0.1.0/scripts/verify_colocated_taylor_green.py +85 -0
- peclet_flow-0.1.0/scripts/verify_ibm_math.py +127 -0
- peclet_flow-0.1.0/scripts/verify_ibm_sandwich.py +138 -0
- peclet_flow-0.1.0/scripts/verify_implicit_advection_sdflow.py +82 -0
- peclet_flow-0.1.0/scripts/verify_lid_cavity_sdflow.py +88 -0
- peclet_flow-0.1.0/scripts/verify_periodic_spheres_sdflow.py +101 -0
- peclet_flow-0.1.0/scripts/verify_poiseuille_sdflow.py +81 -0
- peclet_flow-0.1.0/scripts/verify_segmentation.py +67 -0
- peclet_flow-0.1.0/scripts/verify_velocity_mg_staircase_packing_sdflow.py +103 -0
- peclet_flow-0.1.0/scripts/verify_velocity_mg_staircase_zh_sdflow.py +106 -0
- peclet_flow-0.1.0/scripts/verify_velocity_mg_upwind_cavity_sdflow.py +100 -0
- peclet_flow-0.1.0/scripts/verify_velocity_mg_upwind_sdflow.py +90 -0
- peclet_flow-0.1.0/sdflow.pyi +70 -0
- peclet_flow-0.1.0/src/colocated_advection.hpp +102 -0
- peclet_flow-0.1.0/src/cut_cell_ibm.hpp +214 -0
- peclet_flow-0.1.0/src/flow_bindings.cpp +173 -0
- peclet_flow-0.1.0/src/flow_ibm.hpp +727 -0
- peclet_flow-0.1.0/src/flow_reference.hpp +211 -0
- peclet_flow-0.1.0/src/grid_layout.hpp +87 -0
- peclet_flow-0.1.0/src/mac_approx_projection.hpp +74 -0
- peclet_flow-0.1.0/src/mac_bc.hpp +219 -0
- peclet_flow-0.1.0/src/mac_cutcell.hpp +92 -0
- peclet_flow-0.1.0/src/mac_cutcell_mg.hpp +505 -0
- peclet_flow-0.1.0/src/mac_ibm.hpp +146 -0
- peclet_flow-0.1.0/src/mac_mg.hpp +166 -0
- peclet_flow-0.1.0/src/mac_pressure.hpp +115 -0
- peclet_flow-0.1.0/src/mac_reductions.hpp +90 -0
- peclet_flow-0.1.0/src/mac_stencils.hpp +83 -0
- peclet_flow-0.1.0/src/mac_transfer.hpp +100 -0
- peclet_flow-0.1.0/src/mac_velocity_mg.hpp +448 -0
- peclet_flow-0.1.0/src/pnm_bindings.cpp +101 -0
- peclet_flow-0.1.0/src/pore_extraction.hpp +375 -0
- peclet_flow-0.1.0/src/sdf_reader.cpp +154 -0
- peclet_flow-0.1.0/src/sdf_reader.h +25 -0
- peclet_flow-0.1.0/src/staggered_advection.hpp +155 -0
- peclet_flow-0.1.0/tests/cuda_bench/CMakeLists.txt +7 -0
- peclet_flow-0.1.0/tests/cuda_bench/bench_rbgs.cu +59 -0
- peclet_flow-0.1.0/tests/kokkos/CMakeLists.txt +90 -0
- peclet_flow-0.1.0/tests/kokkos/bench_rbgs.cpp +45 -0
- peclet_flow-0.1.0/tests/kokkos/test_advection.cpp +83 -0
- peclet_flow-0.1.0/tests/kokkos/test_bc.cpp +95 -0
- peclet_flow-0.1.0/tests/kokkos/test_cutcell.cpp +82 -0
- peclet_flow-0.1.0/tests/kokkos/test_fou_operator.cpp +65 -0
- peclet_flow-0.1.0/tests/kokkos/test_ibm.cpp +96 -0
- peclet_flow-0.1.0/tests/kokkos/test_ibm_apply.cpp +103 -0
- peclet_flow-0.1.0/tests/kokkos/test_ibm_overlay.cpp +93 -0
- peclet_flow-0.1.0/tests/kokkos/test_mg.cpp +59 -0
- peclet_flow-0.1.0/tests/kokkos/test_poiseuille.cpp +95 -0
- peclet_flow-0.1.0/tests/kokkos/test_poiseuille_ibm.cpp +122 -0
- peclet_flow-0.1.0/tests/kokkos/test_reductions.cpp +75 -0
- peclet_flow-0.1.0/tests/kokkos/test_sdflow_tg.cpp +115 -0
- peclet_flow-0.1.0/tests/kokkos/test_stencils.cpp +93 -0
- peclet_flow-0.1.0/tests/kokkos/test_transfer.cpp +111 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/CMakeLists.txt +49 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_cutcellmg_mpi.cpp +96 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_distributed_diffusion.cpp +132 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_distributed_mg.cpp +175 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_distributed_pressure.cpp +175 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_sdflow_colocated_mpi.cpp +113 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_sdflow_mpi.cpp +113 -0
- peclet_flow-0.1.0/tests/kokkos_mpi/test_velocitymg_mpi.cpp +93 -0
- peclet_flow-0.1.0/tests/regression/README.md +56 -0
- peclet_flow-0.1.0/tests/regression/perf_baseline.json +219 -0
- peclet_flow-0.1.0/tests/regression/perf_baseline_colocated.json +220 -0
- peclet_flow-0.1.0/tests/regression/sdflow_regression.py +290 -0
- peclet_flow-0.1.0/tests/study/mem_gpu.json +38 -0
- peclet_flow-0.1.0/tests/study/results_cpu.json +675 -0
- peclet_flow-0.1.0/tests/study/results_gpu.json +675 -0
- peclet_flow-0.1.0/tests/study/staggered_vs_colocated.py +458 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
# Build the Kokkos (OpenMP/Serial) backend from source, then build the sdflow + pnm Python modules and
|
|
4
|
+
# run the single-rank Kokkos kernel unit tests. The GPU (CUDA/HIP) backends and the multi-rank MPI
|
|
5
|
+
# tests are NOT exercisable on free GitHub runners (no GPU); this CI covers the portable host build and
|
|
6
|
+
# the CPU kernel tests, which is what catches the vast majority of regressions. The single-rank modules
|
|
7
|
+
# never link MPI/core (the distributed path is gated behind CFD_MPI), so Kokkos + pybind11 is
|
|
8
|
+
# the whole dependency set here.
|
|
9
|
+
|
|
10
|
+
on:
|
|
11
|
+
push:
|
|
12
|
+
branches: [main, "**"]
|
|
13
|
+
pull_request:
|
|
14
|
+
workflow_dispatch:
|
|
15
|
+
|
|
16
|
+
env:
|
|
17
|
+
KOKKOS_TAG: "5.1.1" # keep in lockstep with ../tools/bootstrap_deps.sh
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
build-test:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Install toolchain
|
|
26
|
+
run: |
|
|
27
|
+
sudo apt-get update
|
|
28
|
+
sudo apt-get install -y cmake ninja-build libomp-dev python3-dev python3-pip
|
|
29
|
+
python3 -m pip install --upgrade pip pybind11 numpy
|
|
30
|
+
|
|
31
|
+
- name: Cache Kokkos install
|
|
32
|
+
id: kokkos-cache
|
|
33
|
+
uses: actions/cache@v4
|
|
34
|
+
with:
|
|
35
|
+
path: ${{ github.workspace }}/kokkos-install
|
|
36
|
+
key: kokkos-${{ env.KOKKOS_TAG }}-openmp-${{ runner.os }}
|
|
37
|
+
|
|
38
|
+
- name: Build Kokkos (OpenMP + Serial)
|
|
39
|
+
if: steps.kokkos-cache.outputs.cache-hit != 'true'
|
|
40
|
+
run: |
|
|
41
|
+
git clone --depth 1 --branch "$KOKKOS_TAG" https://github.com/kokkos/kokkos.git /tmp/kokkos
|
|
42
|
+
cmake -S /tmp/kokkos -B /tmp/kokkos/build -G Ninja \
|
|
43
|
+
-DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_STANDARD=20 \
|
|
44
|
+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
|
45
|
+
-DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_SERIAL=ON \
|
|
46
|
+
-DCMAKE_INSTALL_PREFIX="$GITHUB_WORKSPACE/kokkos-install"
|
|
47
|
+
cmake --build /tmp/kokkos/build -j
|
|
48
|
+
cmake --install /tmp/kokkos/build
|
|
49
|
+
|
|
50
|
+
- name: Build sdflow + pnm modules
|
|
51
|
+
run: |
|
|
52
|
+
cmake -S . -B build \
|
|
53
|
+
-DCMAKE_PREFIX_PATH="$GITHUB_WORKSPACE/kokkos-install;$(python3 -m pybind11 --cmakedir)"
|
|
54
|
+
cmake --build build -j
|
|
55
|
+
|
|
56
|
+
- name: Import smoke test
|
|
57
|
+
run: PYTHONPATH=$PWD/build python3 -c "import sdflow, pnm; print('sdflow + pnm import OK')"
|
|
58
|
+
|
|
59
|
+
- name: Build + run Kokkos kernel unit tests (tests/kokkos)
|
|
60
|
+
run: |
|
|
61
|
+
# Kokkos-only unit suite (no MPI / core): the ported cut-cell IBM, advection,
|
|
62
|
+
# multigrid-transfer, BC and reduction kernels, each checked against a serial reference.
|
|
63
|
+
cmake -S tests/kokkos -B build_kokkos \
|
|
64
|
+
-DCMAKE_PREFIX_PATH="$GITHUB_WORKSPACE/kokkos-install"
|
|
65
|
+
cmake --build build_kokkos -j
|
|
66
|
+
ctest --test-dir build_kokkos --output-on-failure
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
name: Documentation
|
|
2
|
+
|
|
3
|
+
# Build the Doxygen API documentation (Kokkos C++ in src/, Python in scripts/ & tests/) and publish it
|
|
4
|
+
# to GitHub Pages. Requires Pages to be enabled for the repository with "Source: GitHub Actions"
|
|
5
|
+
# (Settings -> Pages). Runs on every push to main; can also be triggered manually. Doxygen only parses
|
|
6
|
+
# the sources (no Kokkos toolchain needed), so this runs on a plain ubuntu runner.
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
branches: [main]
|
|
11
|
+
paths:
|
|
12
|
+
- 'src/**'
|
|
13
|
+
- 'scripts/**'
|
|
14
|
+
- 'tests/**'
|
|
15
|
+
- 'docs/**'
|
|
16
|
+
- 'README.md'
|
|
17
|
+
- '.github/workflows/docs.yml'
|
|
18
|
+
workflow_dispatch:
|
|
19
|
+
|
|
20
|
+
permissions:
|
|
21
|
+
contents: read
|
|
22
|
+
pages: write
|
|
23
|
+
id-token: write
|
|
24
|
+
|
|
25
|
+
# allow one concurrent deployment; cancel an in-progress run for a newer push
|
|
26
|
+
concurrency:
|
|
27
|
+
group: pages
|
|
28
|
+
cancel-in-progress: true
|
|
29
|
+
|
|
30
|
+
jobs:
|
|
31
|
+
build:
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
steps:
|
|
34
|
+
- name: Checkout
|
|
35
|
+
uses: actions/checkout@v4
|
|
36
|
+
|
|
37
|
+
- name: Install Doxygen + Graphviz
|
|
38
|
+
run: |
|
|
39
|
+
sudo apt-get update
|
|
40
|
+
sudo apt-get install -y doxygen graphviz
|
|
41
|
+
|
|
42
|
+
- name: Build documentation
|
|
43
|
+
run: doxygen docs/Doxyfile
|
|
44
|
+
|
|
45
|
+
- name: Upload Pages artifact
|
|
46
|
+
uses: actions/upload-pages-artifact@v3
|
|
47
|
+
with:
|
|
48
|
+
path: docs/html
|
|
49
|
+
|
|
50
|
+
deploy:
|
|
51
|
+
needs: build
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
environment:
|
|
54
|
+
name: github-pages
|
|
55
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
56
|
+
steps:
|
|
57
|
+
- name: Deploy to GitHub Pages
|
|
58
|
+
id: deployment
|
|
59
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Quality
|
|
2
|
+
|
|
3
|
+
# Lightweight, non-breaking code-quality gates. The Python job hard-fails on *critical* errors only
|
|
4
|
+
# (syntax errors, undefined names, bad string formatting / comparisons — the classic stop-the-build
|
|
5
|
+
# set), so it catches real bugs without drowning research scripts in style nits.
|
|
6
|
+
|
|
7
|
+
on:
|
|
8
|
+
push:
|
|
9
|
+
branches: [main, "**"]
|
|
10
|
+
pull_request:
|
|
11
|
+
workflow_dispatch:
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
python-lint:
|
|
15
|
+
name: ruff (critical errors)
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
- run: pip install ruff
|
|
23
|
+
- name: Ruff critical-error check
|
|
24
|
+
run: |
|
|
25
|
+
ruff check . --select E9,F63,F7,F82 \
|
|
26
|
+
--exclude ".venv,venv,build,build_*,__pycache__,_deps,extern,legacy,notebooks"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
# Build the sdist + self-contained OpenMP CPU wheels and publish to PyPI (peclet-flow) on a version tag.
|
|
4
|
+
# Uses PyPI Trusted Publishing (OIDC) — configure the publisher on PyPI first; no API token secret needed.
|
|
5
|
+
# The wheels FetchContent-build Kokkos (OpenMP+Serial) + the core headers inside each build
|
|
6
|
+
# (PECLET_VENDOR_DEPS=ON, from pyproject's [tool.cibuildwheel]); GPU/HIP + MPI are the source (sdist) path.
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
tags: ["v*"]
|
|
11
|
+
workflow_dispatch:
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
sdist:
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
- uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.12"
|
|
21
|
+
- run: pip install build
|
|
22
|
+
- run: python -m build --sdist
|
|
23
|
+
- uses: actions/upload-artifact@v4
|
|
24
|
+
with:
|
|
25
|
+
name: sdist
|
|
26
|
+
path: dist/*.tar.gz
|
|
27
|
+
|
|
28
|
+
wheels:
|
|
29
|
+
name: wheels (manylinux_2_28 x86_64)
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v4
|
|
33
|
+
# cibuildwheel reads [tool.cibuildwheel] from pyproject.toml (PECLET_VENDOR_DEPS=ON): each wheel
|
|
34
|
+
# compiles Kokkos as a subproject, so builds are slow but self-contained.
|
|
35
|
+
- uses: pypa/cibuildwheel@v2.21
|
|
36
|
+
- uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: wheels-linux
|
|
39
|
+
path: wheelhouse/*.whl
|
|
40
|
+
|
|
41
|
+
publish:
|
|
42
|
+
needs: [sdist, wheels]
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
|
|
45
|
+
environment: pypi
|
|
46
|
+
permissions:
|
|
47
|
+
id-token: write # required for trusted publishing
|
|
48
|
+
steps:
|
|
49
|
+
- uses: actions/download-artifact@v4
|
|
50
|
+
with:
|
|
51
|
+
path: dist
|
|
52
|
+
merge-multiple: true
|
|
53
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Build directories
|
|
2
|
+
build/
|
|
3
|
+
build_*/
|
|
4
|
+
bin/
|
|
5
|
+
lib/
|
|
6
|
+
*.so
|
|
7
|
+
*.a
|
|
8
|
+
*.o
|
|
9
|
+
*.log
|
|
10
|
+
|
|
11
|
+
# Python
|
|
12
|
+
__pycache__/
|
|
13
|
+
*.pyc
|
|
14
|
+
*.pyo
|
|
15
|
+
*.pyd
|
|
16
|
+
.Python
|
|
17
|
+
env/
|
|
18
|
+
venv/
|
|
19
|
+
.env
|
|
20
|
+
.venv
|
|
21
|
+
*.egg-info/
|
|
22
|
+
|
|
23
|
+
# IDE/Editor
|
|
24
|
+
.vscode/
|
|
25
|
+
.idea/
|
|
26
|
+
*.swp
|
|
27
|
+
*~
|
|
28
|
+
.DS_Store
|
|
29
|
+
|
|
30
|
+
# CMake
|
|
31
|
+
CMakeCache.txt
|
|
32
|
+
CMakeFiles/
|
|
33
|
+
cmake_install.cmake
|
|
34
|
+
Makefile
|
|
35
|
+
install_manifest.txt
|
|
36
|
+
_deps/
|
|
37
|
+
|
|
38
|
+
# Cache
|
|
39
|
+
.cache/
|
|
40
|
+
|
|
41
|
+
# Data
|
|
42
|
+
data/
|
|
43
|
+
|
|
44
|
+
# Output files
|
|
45
|
+
*.vtp
|
|
46
|
+
*.vti
|
|
47
|
+
*.edges
|
|
48
|
+
*.npz
|
|
49
|
+
output/
|
|
50
|
+
*.png
|
|
51
|
+
*.csv
|
|
52
|
+
*.pvsm
|
|
53
|
+
|
|
54
|
+
# Generated documentation (built by Doxygen / CI)
|
|
55
|
+
docs/html/
|
|
56
|
+
docs/latex/
|
|
57
|
+
doxygen_warn.txt
|
|
58
|
+
|
|
59
|
+
# Agent/scratch cruft — old planning notes, AI context dumps, session logs (keep these out of
|
|
60
|
+
# `git add -A`). Tracked design docs (e.g. doc/sdflow_*_plan.md) are unaffected; force-add a new
|
|
61
|
+
# plan with `git add -f` if it should be versioned.
|
|
62
|
+
.codex
|
|
63
|
+
codebase_context.txt
|
|
64
|
+
output.txt
|
|
65
|
+
doc/tmp.txt
|
|
66
|
+
doc/refactor_claude.txt
|
|
67
|
+
doc/session-*.json
|
|
68
|
+
doc/plan_*.md
|
|
69
|
+
doc/implementation_plan_*.md
|
|
70
|
+
doc/*_handoff.md
|
|
71
|
+
doc/*_review*.md
|
|
72
|
+
doc/*investigation*.md
|
|
73
|
+
doc/IBM_Implementation_Plan.md
|
|
74
|
+
doc/continuation_force_sweep_*.md
|
|
75
|
+
doc/pressure_from_phi_plan.md
|
|
76
|
+
doc/pymrm_ghost_cell_ibm_transfer.md
|
|
77
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Repository Guidelines
|
|
2
|
+
|
|
3
|
+
## Project Structure & Module Organization
|
|
4
|
+
- `src/`: header-only **Kokkos** C++ sources for the CFD solver + **nanobind** bindings (`.hpp` + `.cpp`; no `.cu/.cuh` — CUDA retired).
|
|
5
|
+
- `tests/`: C++ Kokkos kernel tests (`tests/kokkos/`) + multi-rank ctests (`tests/kokkos_mpi/test_*.cpp`) + a couple of pore-extraction Python scripts.
|
|
6
|
+
- `scripts/`: `flow` verification/analysis scripts (e.g., `verify_poiseuille_sdflow.py`).
|
|
7
|
+
- `build/`: CMake build output (expects `build/peclet.flow.*.so` + `build/pnm.*.so`).
|
|
8
|
+
- `doc/`, `notebooks/`, `data/`: design notes, experiments, and input datasets.
|
|
9
|
+
|
|
10
|
+
## Build, Test, and Development Commands
|
|
11
|
+
```bash
|
|
12
|
+
# Build/install the Python extensions via scikit-build-core (Kokkos prefix from ../tools/bootstrap_deps.sh)
|
|
13
|
+
CMAKE_PREFIX_PATH="$PWD/../extern/install/<backend>" pip install .
|
|
14
|
+
# Or a dev cmake build (nanobind found via the active interpreter; CMake 3.24+):
|
|
15
|
+
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PWD/../extern/install/<backend>" && cmake --build build -j
|
|
16
|
+
|
|
17
|
+
# Activate venv and run flow verification (canonical scripts are scripts/verify_*_sdflow.py)
|
|
18
|
+
source .venv/bin/activate
|
|
19
|
+
PYTHONPATH=$PWD/build python scripts/verify_poiseuille_sdflow.py
|
|
20
|
+
PYTHONPATH=$PWD/build python scripts/verify_periodic_spheres_sdflow.py
|
|
21
|
+
PYTHONPATH=$PWD/build python scripts/verify_lid_cavity_sdflow.py
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Coding Style & Naming Conventions
|
|
25
|
+
- C++ uses 2-space indentation and K&R-style braces; device code is plain Kokkos C++ in `.hpp` headers.
|
|
26
|
+
- Python uses 4-space indentation.
|
|
27
|
+
- Files and functions use `snake_case`; types/classes use `PascalCase`.
|
|
28
|
+
- Kokkos device work is `parallel_for`/`parallel_reduce` over `Kokkos::View`s; keep functor types suffixed `_kernel`/`_op`.
|
|
29
|
+
|
|
30
|
+
## Testing Guidelines
|
|
31
|
+
- Tests are executable Python scripts under `tests/`.
|
|
32
|
+
- Name tests as `test_*.py` and keep import paths pointed at `build/`.
|
|
33
|
+
- No formal coverage target; prefer adding a small validation script when fixing numerics.
|
|
34
|
+
|
|
35
|
+
## Commit & Pull Request Guidelines
|
|
36
|
+
- Recent history uses Conventional Commit prefixes (`feat:`, `fix:`); follow this when possible.
|
|
37
|
+
- Keep commits scoped to one change; include the numerical/physical intent in the message.
|
|
38
|
+
- PRs should describe the change, list commands run, and attach plots or outputs if results change.
|
|
39
|
+
|
|
40
|
+
## Agent Notes
|
|
41
|
+
- For more detailed architecture and build guidance, see `CLAUDE.md`.
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
Performance-portable incompressible Navier-Stokes CFD solver for porous media simulations. Uses a staggered MAC grid with the Immersed Boundary Method (IBM) over complex geometries defined by Signed Distance Functions (SDF), with cut-cell pressure projection. Built on **Kokkos** — the same source runs on **CUDA, HIP (AMD/LUMI), and OpenMP** backends, selected at build time by the install prefix.
|
|
8
|
+
|
|
9
|
+
**`flow` is THE solver** (`src/flow_bindings.cpp` → `peclet::flow::IbmSolver` in `src/flow_ibm.hpp`):
|
|
10
|
+
cut-cell IBM physics on a staggered MAC grid, with a grid-independent geometric **multigrid** pressure
|
|
11
|
+
solve, and a multi-rank MPI path (core grid halo). It solves the equations in **physical units**
|
|
12
|
+
(density `rho`, dynamic viscosity `mu`, physical pressure `p`). See the "MPI / flow" section below.
|
|
13
|
+
|
|
14
|
+
The CUDA implementation was **retired** (Kokkos became canonical, 2026-06): `flow` was validated
|
|
15
|
+
bit-identical to the CUDA solver (machine-precision, and against the Zick & Homsy sphere-array Stokes drag)
|
|
16
|
+
before the CUDA sources were deleted. Restore point: the git tag `pre-cuda-retirement`. The cut-cell IBM
|
|
17
|
+
primitives live in `src/cut_cell_ibm.hpp`; the operator headers are `src/mac_*.hpp` + `src/flow_ibm.hpp`.
|
|
18
|
+
|
|
19
|
+
**`pnm` is the pore-network-extraction module** (`src/pnm_bindings.cpp` + `src/pore_extraction.hpp`
|
|
20
|
+
Kokkos compute + the pure-C++ `src/sdf_reader.cpp` VTI reader): `SDFReader`, `extract_pores`,
|
|
21
|
+
`segment_volume`, `extract_topology_gpu` — the repo's namesake "pnm_from_sdf" feature, unrelated to the CFD solve.
|
|
22
|
+
|
|
23
|
+
## Build Commands
|
|
24
|
+
|
|
25
|
+
Kokkos is found via `find_package` against the bootstrapped install prefix
|
|
26
|
+
(`../extern/install/<backend>`, built once by `../tools/bootstrap_deps.sh` — a **hard build dependency**;
|
|
27
|
+
backend = `nvidia-cuda` / `host-openmp` / `lumi-hip`); **nanobind** is provisioned by the shared
|
|
28
|
+
`SuiteNanobind` helper (found through the active Python interpreter, no cmakedir prefix needed). With
|
|
29
|
+
`nvcc`, put it on `PATH` (`export PATH=/usr/local/cuda-13.2/bin:$PATH`).
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
source .venv/bin/activate
|
|
33
|
+
# Canonical: build + install both modules (flow solver + pnm) via scikit-build-core.
|
|
34
|
+
CMAKE_PREFIX_PATH="$PWD/../extern/install/nvidia-cuda" pip install .
|
|
35
|
+
|
|
36
|
+
# Or a dev cmake build (single-rank Python modules):
|
|
37
|
+
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PWD/../extern/install/nvidia-cuda"
|
|
38
|
+
cmake --build build -j
|
|
39
|
+
# Output: build/peclet.flow.*.so (the CFD solver) + build/pnm.*.so (pore extraction)
|
|
40
|
+
|
|
41
|
+
# OpenMP backend: same source, just swap the prefix (extern/install/host-openmp).
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Requirements:** Kokkos 5.x (C++20), CMake 3.24+, Python 3.10+, nanobind + scikit-build-core;
|
|
45
|
+
`../core` (header-only) + MPI for the multi-rank test suite. The Kokkos/ArborX install prefix is
|
|
46
|
+
produced by `../tools/bootstrap_deps.sh`.
|
|
47
|
+
|
|
48
|
+
## Running Tests and Verification
|
|
49
|
+
|
|
50
|
+
Drive `flow` verification from Python:
|
|
51
|
+
```bash
|
|
52
|
+
source .venv/bin/activate
|
|
53
|
+
export PYTHONPATH=$PWD/build
|
|
54
|
+
python scripts/verify_periodic_spheres_sdflow.py # cut-cell Stokes through a sphere packing
|
|
55
|
+
python scripts/verify_poiseuille_sdflow.py # analytic parabola
|
|
56
|
+
python scripts/verify_lid_cavity_sdflow.py # lid cavity vs Ghia
|
|
57
|
+
python scripts/verify_channel_sdflow.py # developing channel
|
|
58
|
+
python scripts/verify_bfs_sdflow.py # backward-facing step
|
|
59
|
+
python scripts/verify_chebyshev_sdflow.py # Chebyshev pressure driver == MG-PCG
|
|
60
|
+
python scripts/validate_zick_homsy_sdflow.py # external ground truth (Z&H drag)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
C++ kernel + multi-rank test suites (own `find_package` projects; build against the same prefix):
|
|
64
|
+
```bash
|
|
65
|
+
# Single-rank Kokkos kernel unit tests:
|
|
66
|
+
cmake -S tests/kokkos -B build_kokkos -DCMAKE_PREFIX_PATH=$PWD/../extern/install/nvidia-cuda
|
|
67
|
+
cmake --build build_kokkos -j && ctest --test-dir build_kokkos --output-on-failure # 14 tests
|
|
68
|
+
# Multi-rank (MPI) tests, np=1,2,4:
|
|
69
|
+
cmake -S tests/kokkos_mpi -B build_kmpi -DCMAKE_PREFIX_PATH=$PWD/../extern/install/nvidia-cuda \
|
|
70
|
+
-DMPIEXEC_EXECUTABLE=/usr/bin/mpirun
|
|
71
|
+
cmake --build build_kmpi -j && ctest --test-dir build_kmpi --output-on-failure # 18 tests (6 x np)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Single-GPU **accuracy + efficiency regression suite** (grid-convergence + recorded solver-iteration
|
|
75
|
+
counts, checked against a saved baseline so regressions are caught — Z&H sphere, random-sphere bed,
|
|
76
|
+
hollow-ring bed): `PYTHONPATH=$PWD/build python tests/regression/sdflow_regression.py` (`--update` to
|
|
77
|
+
re-record the baseline). See [`tests/regression/README.md`](tests/regression/README.md).
|
|
78
|
+
|
|
79
|
+
Pore-network extraction (the `pnm` module): `python scripts/test_extraction.py`,
|
|
80
|
+
`python scripts/verify_segmentation.py`.
|
|
81
|
+
|
|
82
|
+
## Architecture
|
|
83
|
+
|
|
84
|
+
### Memory Layout
|
|
85
|
+
|
|
86
|
+
- Linear indexing: `I = x + y*nx + z*nx*ny` (x is fastest)
|
|
87
|
+
- Python arrays: Fortran order `order='F'` with shape `(nx, ny, nz)`
|
|
88
|
+
- Periodic boundaries with wrapping: `(x % res.x + res.x) % res.x`
|
|
89
|
+
|
|
90
|
+
### Numerical Method (`flow`)
|
|
91
|
+
|
|
92
|
+
The physical incompressible momentum equation `rho*(du/dt + (u.grad)u) = -grad(p) + mu*Lap(u) + f`, solved
|
|
93
|
+
each step (semi-implicit), **scaled by 1/dt** (the "divided" convention — the operator is `(rho/dt)*I -
|
|
94
|
+
mu*Lap`, well-conditioned at large dt / steady state):
|
|
95
|
+
|
|
96
|
+
1. **Advection**: explicit Koren TVD, or implicit-FOU + deferred-correction TVD (`set_implicit_advection`)
|
|
97
|
+
2. **Diffusion**: backward-Euler implicit, Red-Black Gauss-Seidel (or opt-in velocity multigrid)
|
|
98
|
+
3. **Pressure projection**: `u = u* - grad(phi)`, `Lap(phi) = div(u*)`; physical pressure `p = (rho/dt)*phi`,
|
|
99
|
+
accumulated rotationally under the default incremental-pressure scheme. Geometric multigrid (V-cycle /
|
|
100
|
+
MG-PCG / Chebyshev).
|
|
101
|
+
4. **IBM**: Robust-Scaled cut-cell method with D_rescale for near-wall handling.
|
|
102
|
+
|
|
103
|
+
### Key Source Files
|
|
104
|
+
|
|
105
|
+
All Kokkos, header-only (`namespace flow`), C++20.
|
|
106
|
+
|
|
107
|
+
**`flow` (the CFD solver):**
|
|
108
|
+
- `src/flow_ibm.hpp` - `peclet::flow::IbmSolver`: the solver (diffusion, projection, three pressure drivers, Picard, MPI)
|
|
109
|
+
- `src/mac_cutcell_mg.hpp` - `CutcellMG`: geometric pressure MG (V-cycle / PCG / Chebyshev), MPI-folded
|
|
110
|
+
- `src/mac_velocity_mg.hpp` - `VelocityMG`: velocity multigrid (staircase / upwind / domain-BC), MPI-folded
|
|
111
|
+
- `src/mac_ibm.hpp`, `src/mac_cutcell.hpp`, `src/mac_pressure.hpp`, `src/mac_bc.hpp`, `src/mac_reductions.hpp` - IBM stencil, cut-cell openness, projection, domain BCs, reductions
|
|
112
|
+
- `src/cut_cell_ibm.hpp` - the Robust-Scaled cut-cell IBM overlay (`poly_*`, K/M/X/Nbc/R, D_rescale)
|
|
113
|
+
- `src/staggered_advection.hpp` - `sadv::advect`: staggered Koren TVD advection (+ implicit-FOU operator)
|
|
114
|
+
- `src/flow_bindings.cpp` - the `flow` nanobind module: `peclet.flow.Solver` (staggered MAC, default) and
|
|
115
|
+
`peclet.flow.SolverColocated` (collocated/cell-centered velocities via the `GridLayout` policy + ABC
|
|
116
|
+
approximate projection — identical Python API; see [`doc/flow_colocated_plan.md`](doc/flow_colocated_plan.md))
|
|
117
|
+
|
|
118
|
+
**`pnm` (pore-network extraction):**
|
|
119
|
+
- `src/pore_extraction.hpp` (`namespace pnm`, Kokkos compute), `src/sdf_reader.cpp` / `.h` (pure-C++ VTI reader)
|
|
120
|
+
- `src/pnm_bindings.cpp` - the `pnm` nanobind module (`SDFReader`, `extract_pores`, `segment_volume`, ...)
|
|
121
|
+
|
|
122
|
+
## Python API Usage (`flow`)
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import peclet.flow
|
|
126
|
+
s = peclet.flow.Solver(nx, ny, nz)
|
|
127
|
+
s.set_rho(1.0); s.set_mu(0.01); s.set_dt(60.0) # physical units; fix before geometry
|
|
128
|
+
s.set_body_force(1e-2, 0, 0) # force per unit volume
|
|
129
|
+
s.set_solid(sdf, cutcell_pressure=True, pressure_coarse="rediscretized") # SDF [x,y,z], <0 inside
|
|
130
|
+
for _ in range(n_steps):
|
|
131
|
+
s.step()
|
|
132
|
+
u = s.get_u() # 3-D numpy array [x,y,z]; p = s.get_p() is the physical pressure
|
|
133
|
+
```
|
|
134
|
+
See the "Pressure solver options" table below and `scripts/*_sdflow.py` for the full API.
|
|
135
|
+
|
|
136
|
+
## Conventions
|
|
137
|
+
|
|
138
|
+
- **SDF sign**: Negative inside solid, positive in fluid
|
|
139
|
+
- **Kokkos kernels**: `parallel_for` / `parallel_reduce` over `Kokkos::View`s (`MDRangePolicy` for 3-D loops); device sources are `.hpp` compiled as C++ (the launch compiler routes through `nvcc`/`hipcc`), never `.cu`
|
|
140
|
+
- **Staggered grid**: u at (i+1/2,j,k), v at (i,j+1/2,k), w at (i,j,k+1/2), p at cell centers
|
|
141
|
+
|
|
142
|
+
## MPI / flow (the CFD solver, core integration)
|
|
143
|
+
|
|
144
|
+
The **`flow`** solver (`peclet::flow::IbmSolver`) is built on the shared `core` library (sibling repo
|
|
145
|
+
`../core`), whose **Kokkos** grid halo (`peclet::core::halo::GridHalo`) carries the
|
|
146
|
+
multi-rank ghost exchange. The single-rank Python module is built by the main `CMakeLists.txt`; the
|
|
147
|
+
multi-rank path is exercised by the `tests/kokkos_mpi` ctests (gated behind `PECLET_FLOW_MPI`, so the single-rank
|
|
148
|
+
module is byte-identical). It was validated bit-identical (machine precision) to the retired CUDA solver
|
|
149
|
+
and against external analytics.
|
|
150
|
+
|
|
151
|
+
Key pieces (all `src/*.hpp`, Kokkos, header-only, `namespace flow`):
|
|
152
|
+
- `peclet::core::halo::GridHalo` (core) — per-level ORB block ghost exchange for the
|
|
153
|
+
`double` cell-fields on the extended local block. cfd's x-fastest layout matches `peclet::core::Field3D`.
|
|
154
|
+
- `staggered_advection.hpp` — `sadv::advect`: staggered Koren TVD advection, templated on a field accessor.
|
|
155
|
+
- `flow_ibm.hpp` — `peclet::flow::IbmSolver`: the solver. `step()` does per-component implicit diffusion
|
|
156
|
+
(RB-GS or velocity-MG, halo exchange between sweeps) + cut-cell incremental-rotational projection, with
|
|
157
|
+
`set_advection`/`set_implicit_advection`, `set_body_force`, `set_solid` (cut-cell IBM no-slip), domain
|
|
158
|
+
BCs, and `initMpi(gnx,gny,gnz,comm)` for the multi-rank step.
|
|
159
|
+
|
|
160
|
+
### Pressure solver options (the `flow` module)
|
|
161
|
+
|
|
162
|
+
The cut-cell pressure Poisson is solved by a geometric **multigrid** (`mac_cutcell_mg.hpp`, `CutcellMG`)
|
|
163
|
+
whose smoother is **Red-Black Gauss-Seidel** and whose coarse operator is the **rediscretized** cut-cell
|
|
164
|
+
operator. Three outer drivers wrap that V-cycle — **select one per solver**:
|
|
165
|
+
|
|
166
|
+
| driver | select with | use |
|
|
167
|
+
|---|---|---|
|
|
168
|
+
| **Standalone V-cycle** | default (neither below set) | multi-rank default. `set_pressure_multigrid(True, levels=1)` ⇒ pure RB-GS (no coarse grid) |
|
|
169
|
+
| **MG-PCG** | `set_pressure_pcg(True, max_iter, rtol)` | **single-GPU default** (auto-enabled on 1 rank); ~1.2× faster than the V-cycle to a fixed tolerance |
|
|
170
|
+
| **Chebyshev** | `set_pressure_chebyshev(True, max_iter, rtol)` | communication-light (no per-iteration global dot-products) — for large multi-GPU where PCG's reductions are latency-bound. ≈ PCG iteration count; bounds estimated once on step 1 |
|
|
171
|
+
|
|
172
|
+
- **PCG and Chebyshev are mutually exclusive** (last set wins); either overrides the single-rank auto-PCG
|
|
173
|
+
default. With neither set, the solve is `n_pois` standalone V-cycles.
|
|
174
|
+
- Coarse-operator mode: `set_solid(..., pressure_coarse="rediscretized")` (default; also `"galerkin"` /
|
|
175
|
+
`"const"`). `set_pressure_multigrid(on, levels)` sets the multigrid depth (`levels=1` == pure RB-GS).
|
|
176
|
+
- `set_pressure_warmstart(True)` seeds each solve from the previous step's φ (opt-in, off by default).
|
|
177
|
+
- Validated against Zick & Homsy SC-sphere drag. Design + benchmarks:
|
|
178
|
+
[`doc/flow_multigrid_plan.md`](doc/flow_multigrid_plan.md).
|
|
179
|
+
|
|
180
|
+
### Domain boundary conditions
|
|
181
|
+
|
|
182
|
+
Beyond periodic + IBM no-slip on immersed solids, flow has **native per-face domain BCs** (`mac_bc.hpp`):
|
|
183
|
+
`set_domain_bc(face, type, vx, vy, vz)` for the 6 faces (0=−x,1=+x,2=−y,3=+y,4=−z,5=+z); `type` 0=periodic
|
|
184
|
+
(default), 1=no-slip wall, 2=Dirichlet velocity / inflow, 3=outflow. Velocity ghosts are filled in the
|
|
185
|
+
MAC-staggered convention. Tangential walls use a **face-fold** in the implicit diffusion (drop the wall
|
|
186
|
+
face, fold its β into the diagonal + RHS) so `u_inner` stays implicit — no Gauss–Seidel lag; explicit
|
|
187
|
+
advection keeps the reflection ghost. Call **before** geometry/first step. For a domain-BC problem with no
|
|
188
|
+
immersed solid, use `set_pressure_geometry(all_fluid_sdf)` (the cut-cell pressure operator without the IBM).
|
|
189
|
+
|
|
190
|
+
**Open boundaries** (outflow, or inflow with a non-zero normal velocity) split the face openness into two
|
|
191
|
+
roles: the **operator** openness α (pressure matrix) is 0 at walls + inflow (Neumann) and open at outflow
|
|
192
|
+
(Dirichlet p=0, ghost held at 0 → non-singular, mean-removal off); the **flux** openness β
|
|
193
|
+
(divergence/correction) stays open at inflow + outflow so their flux is counted. Outflow velocity is
|
|
194
|
+
zero-gradient (∂/∂n=0); the projection corrects the outflow face so mass leaves.
|
|
195
|
+
|
|
196
|
+
**Non-uniform inlets:** `set_domain_bc_profile(face, profile[Nb,Nc,3])` prescribes a per-position inlet
|
|
197
|
+
velocity over the face's perpendicular plane (sets the face to inflow). Used for a parabolic channel inlet
|
|
198
|
+
or the **backward-facing step**, whose step is realized purely as the inlet condition — the developed
|
|
199
|
+
parabola over the open upper half, zero over the step face (no immersed solid needed).
|
|
200
|
+
|
|
201
|
+
**Validated:** lid-driven cavity vs Ghia et al. Re=100 to ~0.7% rms (`scripts/verify_lid_cavity_sdflow.py`);
|
|
202
|
+
developing plane channel (uniform inlet → parabolic Poiseuille outlet, `u_max/U_mean`→1.5, exact mass
|
|
203
|
+
conservation, machine-precision divergence; `scripts/verify_channel_sdflow.py`); backward-facing step
|
|
204
|
+
(Gartling expansion-ratio-2, `scripts/verify_bfs_sdflow.py`) — reattachment `x_r/S` 5.3 (Re_S=100) → 8.3
|
|
205
|
+
(Re_S=200) on the Armaly/Biswas curve, `PECLET_FLOW_BFS_RE800=1` pushes to the Gartling Re=800 benchmark.
|
|
206
|
+
|
|
207
|
+
The **rediscretized geometric pressure multigrid is multilevel on these non-periodic domains** (not just the
|
|
208
|
+
periodic/IBM case): each coarse level re-imposes the boundary face openness (Neumann wall/inflow → 0,
|
|
209
|
+
Dirichlet outflow → open) and the trilinear prolongation fills the non-periodic boundary ghosts
|
|
210
|
+
(Neumann → zero-gradient, Dirichlet → 0). Gated on `has_bc_`, so the periodic/IBM path is byte-identical.
|
|
211
|
+
Convergence is grid-independent — e.g. a 256×64 channel at a fixed 10 V-cycles/step drives the flux
|
|
212
|
+
divergence from `2e-3` (1 level) to `5e-7` (3 levels) at ~the same cost.
|
|
213
|
+
|
|
214
|
+
**Semi-coarsening** handles thin (quasi-2D) grids: uniform 2:1 coarsening caps an `nz=4` grid at 2 levels,
|
|
215
|
+
so `init(..., semi=true)` halves an axis only while it stays even and ≥2 — a thin axis freezes while the
|
|
216
|
+
wide axes keep coarsening (`MGLevel::ratio`/`cfac`; the transfer + openness kernels take a per-axis
|
|
217
|
+
`int3 ratio`, the operator uses per-axis `idx2/cfac²`). The solver enables it only for native-BC problems
|
|
218
|
+
(`has_domain_bc_`, `semi_level_count`); the periodic/IBM porous path stays uniform + `clamp_levels`, so it
|
|
219
|
+
is byte-identical. A quasi-2D 256×64×4 channel now builds up to 8 levels (was 2): raw V-cycle flux
|
|
220
|
+
divergence at a fixed 8 cycles drops `1.7e-4`→`8.6e-13`. The BC verify scripts request `levels=8`
|
|
221
|
+
(auto-capped). *Follow-ups:* convective outflow for unsteady wakes, multi-rank inlet-profile scatter
|
|
222
|
+
(validated single-rank).
|
|
223
|
+
|
|
224
|
+
Validated against analytics (Taylor–Green ~2e-15, Poiseuille, momentum conservation) **and against Zick &
|
|
225
|
+
Homsy sphere-array drag**; the multi-rank step is bit-exact to the single-rank — **18 `tests/kokkos_mpi`
|
|
226
|
+
ctests, real multi-rank np=1,2,4, on CUDA + OpenMP**.
|
|
227
|
+
|
|
228
|
+
Build/test the multi-rank ctests:
|
|
229
|
+
```bash
|
|
230
|
+
export PATH=/usr/local/cuda-13.2/bin:$PATH
|
|
231
|
+
cmake -S tests/kokkos_mpi -B build_kmpi \
|
|
232
|
+
-DCMAKE_PREFIX_PATH=$PWD/../extern/install/nvidia-cuda \
|
|
233
|
+
-DMPIEXEC_EXECUTABLE=/usr/bin/mpirun
|
|
234
|
+
cmake --build build_kmpi -j
|
|
235
|
+
ctest --test-dir build_kmpi --output-on-failure
|
|
236
|
+
```
|
|
237
|
+
**Force `-DMPIEXEC_EXECUTABLE=/usr/bin/mpirun`** — FindMPI may pick ParaView's bundled `mpiexec` on
|
|
238
|
+
`PATH`, which launches the OpenMPI-linked test binaries as singletons (so `*_np4` silently runs 4×np=1).
|
|
239
|
+
|
|
240
|
+
**Status:** `peclet::flow::IbmSolver`/`flow` is the full solver — the Robust-Scaled cut-cell IBM, a grid-independent
|
|
241
|
+
geometric **multigrid** pressure solve (rediscretized cut-cell coarse operator; three selectable outer
|
|
242
|
+
drivers), velocity multigrid, implicit-FOU + Picard, all domain BCs, and a bit-exact multi-rank step
|
|
243
|
+
(`CutcellMG` + `VelocityMG` MPI-folded). The CUDA implementation is **retired** (restore tag
|
|
244
|
+
`pre-cuda-retirement`). **Remaining open items:** the large-np scaling work — an **agglomerated coarse
|
|
245
|
+
solve** and the communication-light **Chebyshev** accelerator's at-scale benchmark, both needing real
|
|
246
|
+
multi-GPU hardware.
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.24)
|
|
2
|
+
project(peclet_flow LANGUAGES CXX)
|
|
3
|
+
|
|
4
|
+
# Canonical cfd-gpu build: the Kokkos `sdflow` cut-cell IBM Navier-Stokes solver + the `pnm`
|
|
5
|
+
# pore-network extraction, as importable Python modules. Kokkos provides the backend (CUDA / HIP / OpenMP),
|
|
6
|
+
# selected by the install prefix (extern/install/<backend>, built by ../tools/bootstrap_deps.sh -- a HARD
|
|
7
|
+
# build dependency).
|
|
8
|
+
#
|
|
9
|
+
# Build (single-rank Python modules); nanobind is found via the active interpreter (SuiteNanobind):
|
|
10
|
+
# cmake -S . -B build -DCMAKE_PREFIX_PATH="$PWD/../extern/install/nvidia-cuda"
|
|
11
|
+
# cmake --build build -j -> build/sdflow.*.so + build/pnm.*.so
|
|
12
|
+
#
|
|
13
|
+
# Multi-rank MPI is exercised by the tests/kokkos_mpi ctest suite (its own find_package project); the
|
|
14
|
+
# single-rank kernel unit tests live in tests/kokkos. See CLAUDE.md.
|
|
15
|
+
|
|
16
|
+
set(CMAKE_CXX_STANDARD 20)
|
|
17
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
18
|
+
if(NOT CMAKE_BUILD_TYPE)
|
|
19
|
+
set(CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE)
|
|
20
|
+
endif()
|
|
21
|
+
|
|
22
|
+
# Dependencies via the vendored PecletDeps helper: an installed Kokkos prefix + sibling checkout for the
|
|
23
|
+
# dev/suite build, or FetchContent-built Kokkos + fetched core headers for a self-contained
|
|
24
|
+
# sdist/wheel (cibuildwheel). See cmake/PecletDeps.cmake.
|
|
25
|
+
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
|
26
|
+
include(PecletDeps)
|
|
27
|
+
peclet_require_kokkos()
|
|
28
|
+
peclet_require_nanobind()
|
|
29
|
+
peclet_sibling_include(peclet-core "${PECLET_TPX_TAG}" "../core" TPX_INC)
|
|
30
|
+
|
|
31
|
+
# sdflow -- the cut-cell IBM Navier-Stokes solver (single-rank Python module).
|
|
32
|
+
# NB_STATIC: bundle nanobind's runtime into the module (no shared libnanobind to ship); the Kokkos
|
|
33
|
+
# device path is routed by the launch compiler regardless.
|
|
34
|
+
# NOMINSIZE: nanobind's default -Os size optimization is rejected by nvcc ("'s': expected a number")
|
|
35
|
+
# since the Kokkos device sources compile as CXX through the launch compiler.
|
|
36
|
+
# The two extensions are assembled into the PEP-420 peclet namespace: the solver as `peclet.flow` (the
|
|
37
|
+
# private `_flow` extension re-exported by peclet/flow/__init__.py) and pore extraction as
|
|
38
|
+
# `peclet.flow.pnm`. The package __init__.py files are kept as plain files under packaging/ (OUTSIDE any
|
|
39
|
+
# importable peclet/ dir, so an incomplete source package can never shadow the installed one) and staged
|
|
40
|
+
# into <build>/peclet/... so `PYTHONPATH=<build> python …` (`import peclet.flow`) works in the dev loop
|
|
41
|
+
# too. The SKBUILD install rules (guarded below) place them into the wheel.
|
|
42
|
+
nanobind_add_module(sdflow NB_STATIC NOMINSIZE src/flow_bindings.cpp)
|
|
43
|
+
set_target_properties(sdflow PROPERTIES OUTPUT_NAME _flow # -> peclet.flow._flow (NB_MODULE(_flow))
|
|
44
|
+
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/peclet/flow")
|
|
45
|
+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/packaging/flow_init.py
|
|
46
|
+
${CMAKE_CURRENT_BINARY_DIR}/peclet/flow/__init__.py COPYONLY)
|
|
47
|
+
target_include_directories(sdflow PRIVATE src "${TPX_INC}")
|
|
48
|
+
target_link_libraries(sdflow PRIVATE Kokkos::kokkos)
|
|
49
|
+
|
|
50
|
+
# pnm -- pore-network extraction (Kokkos GPU compute + pure-C++ SDFReader VTI reader).
|
|
51
|
+
nanobind_add_module(pnm NB_STATIC NOMINSIZE src/pnm_bindings.cpp src/sdf_reader.cpp)
|
|
52
|
+
set_target_properties(pnm PROPERTIES OUTPUT_NAME _pnm # -> peclet.flow.pnm._pnm (NB_MODULE(_pnm))
|
|
53
|
+
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/peclet/flow/pnm")
|
|
54
|
+
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/packaging/flow_pnm_init.py
|
|
55
|
+
${CMAKE_CURRENT_BINARY_DIR}/peclet/flow/pnm/__init__.py COPYONLY)
|
|
56
|
+
target_include_directories(pnm PRIVATE src "${TPX_INC}")
|
|
57
|
+
target_link_libraries(pnm PRIVATE Kokkos::kokkos)
|
|
58
|
+
|
|
59
|
+
# --- pip / scikit-build-core install rule -------------------------------------------------------
|
|
60
|
+
if(DEFINED SKBUILD)
|
|
61
|
+
install(TARGETS sdflow LIBRARY DESTINATION peclet/flow COMPONENT python)
|
|
62
|
+
install(TARGETS pnm LIBRARY DESTINATION peclet/flow/pnm COMPONENT python)
|
|
63
|
+
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/flow_init.py
|
|
64
|
+
DESTINATION peclet/flow RENAME __init__.py COMPONENT python)
|
|
65
|
+
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/packaging/flow_pnm_init.py
|
|
66
|
+
DESTINATION peclet/flow/pnm RENAME __init__.py COMPONENT python)
|
|
67
|
+
endif()
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
# Gemini Added Memories
|
|
2
|
+
- Workflow Protocol: For any issue, I must follow this sequence: 1. Form Hypothesis, 2. Investigate, 3. Report Findings, 4. Propose Plan, 5. **STOP AND WAIT for User Agreement**, 6. Execute Plan. I am strictly forbidden from combining Step 4 and 6. I must never assume approval.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Frank Peters
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|