arraymorph 0.2.0b1__tar.gz → 0.2.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. arraymorph-0.2.0b1/.github/workflows/build-lib_array_morph-and-pypi-package.yaml → arraymorph-0.2.0b2/.github/workflows/build.yaml +93 -70
  2. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/.gitignore +10 -0
  3. arraymorph-0.2.0b2/PKG-INFO +343 -0
  4. arraymorph-0.2.0b2/README.md +330 -0
  5. arraymorph-0.2.0b2/justfile +56 -0
  6. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/CMakeLists.txt +18 -13
  7. arraymorph-0.2.0b2/lib/README.md +64 -0
  8. arraymorph-0.2.0b2/lib/justfile +37 -0
  9. arraymorph-0.2.0b2/lib/vcpkg.json +12 -0
  10. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/pyproject.toml +1 -1
  11. arraymorph-0.2.0b1/.github/workflows/build.yml +0 -62
  12. arraymorph-0.2.0b1/PKG-INFO +0 -142
  13. arraymorph-0.2.0b1/README.md +0 -129
  14. arraymorph-0.2.0b1/justfile +0 -63
  15. arraymorph-0.2.0b1/lib/README.md +0 -17
  16. arraymorph-0.2.0b1/lib/conanfile.py +0 -41
  17. arraymorph-0.2.0b1/lib/justfile +0 -40
  18. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/.actrc +0 -0
  19. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  20. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/.github/ISSUE_TEMPLATE/config.yaml +0 -0
  21. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/.python-version +0 -0
  22. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/CONAN-INTEGRATION.md +0 -0
  23. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/LICENSE +0 -0
  24. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/env-example.txt +0 -0
  25. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/examples/python/read.py +0 -0
  26. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/examples/python/write.py +0 -0
  27. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/.clangd +0 -0
  28. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/core/constants.h +0 -0
  29. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/core/logger.h +0 -0
  30. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/core/operators.h +0 -0
  31. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/core/utils.h +0 -0
  32. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/chunk_obj.h +0 -0
  33. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/dataset_callbacks.h +0 -0
  34. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/dataset_obj.h +0 -0
  35. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/file_callbacks.h +0 -0
  36. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/group_callbacks.h +0 -0
  37. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/initialize.h +0 -0
  38. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/include/arraymorph/s3vl/vol_connector.h +0 -0
  39. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/meta.yaml +0 -0
  40. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/scripts/extract_perspective.py +0 -0
  41. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/CMakeLists.txt +0 -0
  42. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/core/constants.cc +0 -0
  43. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/core/operators.cc +0 -0
  44. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/core/utils.cc +0 -0
  45. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/s3vl/chunk_obj.cc +0 -0
  46. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/s3vl/dataset_callbacks.cc +0 -0
  47. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/s3vl/dataset_obj.cc +0 -0
  48. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/s3vl/file_callbacks.cc +0 -0
  49. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/s3vl/group_callbacks.cc +0 -0
  50. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/lib/src/s3vl/vol_connector.cc +0 -0
  51. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/main.py +0 -0
  52. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/release.yaml +0 -0
  53. {arraymorph-0.2.0b1 → arraymorph-0.2.0b2}/src/arraymorph/__init__.py +0 -0
@@ -1,6 +1,8 @@
1
1
  name: Build, Test, and Publish
2
2
 
3
3
  on:
4
+ push:
5
+ branches: [main]
4
6
  pull_request:
5
7
  branches: [main]
6
8
  release:
@@ -11,52 +13,95 @@ jobs:
11
13
  build_wheels:
12
14
  name: Build (${{ matrix.os }} / ${{ matrix.arch }} / py${{ matrix.python }})
13
15
  runs-on: ${{ matrix.runner }}
16
+ container: ${{ matrix.container || '' }}
14
17
  strategy:
15
18
  fail-fast: false
16
19
  matrix:
17
20
  include:
18
- # Linux x86_64
19
- - { os: linux, arch: x86_64, runner: ubuntu-latest, python: "3.9" }
20
- - { os: linux, arch: x86_64, runner: ubuntu-latest, python: "3.10" }
21
- - { os: linux, arch: x86_64, runner: ubuntu-latest, python: "3.11" }
22
- - { os: linux, arch: x86_64, runner: ubuntu-latest, python: "3.12" }
23
- - { os: linux, arch: x86_64, runner: ubuntu-latest, python: "3.13" }
24
- - { os: linux, arch: x86_64, runner: ubuntu-latest, python: "3.14" }
25
- # Linux aarch64
21
+ # Linux x86_64 (manylinux_2_28 container)
22
+ - {
23
+ os: linux,
24
+ arch: x86_64,
25
+ runner: ubuntu-latest,
26
+ container: "quay.io/pypa/manylinux_2_28_x86_64",
27
+ python: "3.9",
28
+ }
29
+ - {
30
+ os: linux,
31
+ arch: x86_64,
32
+ runner: ubuntu-latest,
33
+ container: "quay.io/pypa/manylinux_2_28_x86_64",
34
+ python: "3.10",
35
+ }
36
+ - {
37
+ os: linux,
38
+ arch: x86_64,
39
+ runner: ubuntu-latest,
40
+ container: "quay.io/pypa/manylinux_2_28_x86_64",
41
+ python: "3.11",
42
+ }
43
+ - {
44
+ os: linux,
45
+ arch: x86_64,
46
+ runner: ubuntu-latest,
47
+ container: "quay.io/pypa/manylinux_2_28_x86_64",
48
+ python: "3.12",
49
+ }
50
+ - {
51
+ os: linux,
52
+ arch: x86_64,
53
+ runner: ubuntu-latest,
54
+ container: "quay.io/pypa/manylinux_2_28_x86_64",
55
+ python: "3.13",
56
+ }
57
+ - {
58
+ os: linux,
59
+ arch: x86_64,
60
+ runner: ubuntu-latest,
61
+ container: "quay.io/pypa/manylinux_2_28_x86_64",
62
+ python: "3.14",
63
+ }
64
+ # Linux aarch64 (manylinux_2_28 container)
26
65
  - {
27
66
  os: linux,
28
67
  arch: aarch64,
29
68
  runner: ubuntu-24.04-arm,
69
+ container: "quay.io/pypa/manylinux_2_28_aarch64",
30
70
  python: "3.9",
31
71
  }
32
72
  - {
33
73
  os: linux,
34
74
  arch: aarch64,
35
75
  runner: ubuntu-24.04-arm,
76
+ container: "quay.io/pypa/manylinux_2_28_aarch64",
36
77
  python: "3.10",
37
78
  }
38
79
  - {
39
80
  os: linux,
40
81
  arch: aarch64,
41
82
  runner: ubuntu-24.04-arm,
83
+ container: "quay.io/pypa/manylinux_2_28_aarch64",
42
84
  python: "3.11",
43
85
  }
44
86
  - {
45
87
  os: linux,
46
88
  arch: aarch64,
47
89
  runner: ubuntu-24.04-arm,
90
+ container: "quay.io/pypa/manylinux_2_28_aarch64",
48
91
  python: "3.12",
49
92
  }
50
93
  - {
51
94
  os: linux,
52
95
  arch: aarch64,
53
96
  runner: ubuntu-24.04-arm,
97
+ container: "quay.io/pypa/manylinux_2_28_aarch64",
54
98
  python: "3.13",
55
99
  }
56
100
  - {
57
101
  os: linux,
58
102
  arch: aarch64,
59
103
  runner: ubuntu-24.04-arm,
104
+ container: "quay.io/pypa/manylinux_2_28_aarch64",
60
105
  python: "3.14",
61
106
  }
62
107
  # macOS arm64
@@ -71,7 +116,7 @@ jobs:
71
116
  - name: Checkout
72
117
  uses: actions/checkout@v4
73
118
  with:
74
- fetch-depth: 0 # setuptools-scm needs full history
119
+ fetch-depth: 0
75
120
 
76
121
  - name: Install uv
77
122
  uses: astral-sh/setup-uv@v7
@@ -85,93 +130,71 @@ jobs:
85
130
  # 1. System deps
86
131
  # ──────────────────────────────────────────────
87
132
 
88
- - name: Install system deps (Linux)
133
+ - name: Install system deps (Linux manylinux)
134
+ if: runner.os == 'Linux'
135
+ run: |
136
+ yum -y install \
137
+ git curl ca-certificates \
138
+ zip unzip tar \
139
+ cmake ninja-build pkgconfig \
140
+ gcc gcc-c++ make \
141
+ perl perl-IPC-Cmd perl-ExtUtils-MakeMaker \
142
+ kernel-headers
143
+
144
+ - name: Sanity check compilers
89
145
  if: runner.os == 'Linux'
90
146
  run: |
91
- sudo apt-get update
92
- sudo apt-get install -y \
93
- cmake ninja-build \
94
- pkg-config patchelf \
95
- libx11-dev libx11-xcb-dev libfontenc-dev \
96
- libice-dev libsm-dev libxau-dev libxaw7-dev \
97
- libxcomposite-dev libxcursor-dev libxdamage-dev \
98
- libxdmcp-dev libxext-dev libxfixes-dev libxi-dev \
99
- libxinerama-dev libxkbfile-dev libxmu-dev \
100
- libxmuu-dev libxpm-dev libxrandr-dev libxrender-dev \
101
- libxres-dev libxss-dev libxt-dev libxtst-dev \
102
- libxv-dev libxxf86vm-dev libxcb-glx0-dev \
103
- libxcb-render0-dev libxcb-render-util0-dev \
104
- libxcb-xkb-dev libxcb-icccm4-dev libxcb-image0-dev \
105
- libxcb-keysyms1-dev libxcb-randr0-dev libxcb-shape0-dev \
106
- libxcb-sync-dev libxcb-xfixes0-dev libxcb-xinerama0-dev \
107
- libxcb-dri3-dev uuid-dev libxcb-cursor-dev \
108
- libxcb-dri2-0-dev libxcb-present-dev \
109
- libxcb-composite0-dev libxcb-ewmh-dev libxcb-res0-dev \
110
- libasound2-dev
147
+ gcc --version
148
+ g++ --version
149
+ perl -MIPC::Cmd -e 'print "IPC::Cmd OK\n"'
111
150
 
112
151
  - name: Install system deps (macOS)
113
152
  if: runner.os == 'macOS'
114
- run: brew install ninja cmake
153
+ run: brew install ninja cmake curl openssl
115
154
 
116
155
  - name: Install Python tools
117
156
  run: |
118
157
  uv venv --python ${{ matrix.python }}
119
- echo "${{ github.workspace }}/.venv/bin" >> $GITHUB_PATH
158
+ echo "${GITHUB_WORKSPACE}/.venv/bin" >> $GITHUB_PATH
120
159
  uv pip install \
121
160
  scikit-build-core setuptools-scm h5py \
122
161
  build auditwheel delocate
123
162
 
124
163
  # ──────────────────────────────────────────────
125
- # 2. Conan: install C++ deps (cached per platform)
164
+ # 2. vcpkg: install C++ deps (cached per platform)
126
165
  # ──────────────────────────────────────────────
127
166
 
128
- - name: Cache Conan packages
129
- uses: actions/cache@v4
130
- with:
131
- path: ~/.conan2
132
- key: conan-${{ matrix.os }}-${{ matrix.arch }}-${{ hashFiles('lib/conanfile.py') }}
133
- restore-keys: conan-${{ matrix.os }}-${{ matrix.arch }}-
134
-
135
- - name: Conan install
136
- working-directory: lib
167
+ - name: Bootstrap vcpkg
137
168
  run: |
138
- uv tool install conan
139
- conan profile detect --force
140
- conan install . --build=missing -of build \
141
- -c tools.system.package_manager:mode=install \
142
- -c tools.system.package_manager:sudo=True
169
+ git clone https://github.com/microsoft/vcpkg.git ${GITHUB_WORKSPACE}/vcpkg
170
+ ${GITHUB_WORKSPACE}/vcpkg/bootstrap-vcpkg.sh
171
+ echo "VCPKG_ROOT=${GITHUB_WORKSPACE}/vcpkg" >> $GITHUB_ENV
172
+ echo "CMAKE_TOOLCHAIN_FILE=${GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV
143
173
 
144
- - name: Find Conan toolchain
145
- run: |
146
- TOOLCHAIN=$(find ${{ github.workspace }}/lib/build -name "conan_toolchain.cmake" | head -1)
147
- if [ -z "$TOOLCHAIN" ]; then
148
- echo "ERROR: conan_toolchain.cmake not found"
149
- find ${{ github.workspace }}/lib/build -type f -name "*.cmake" || true
150
- exit 1
151
- fi
152
- echo "CMAKE_TOOLCHAIN_FILE=$TOOLCHAIN" >> $GITHUB_ENV
153
- echo "Found toolchain at: $TOOLCHAIN"
174
+ - name: Cache vcpkg packages
175
+ uses: actions/cache@v4
176
+ with:
177
+ path: ~/.cache/vcpkg/archives
178
+ key: vcpkg-${{ matrix.os }}-${{ matrix.arch }}-${{ hashFiles('lib/vcpkg.json') }}
179
+ restore-keys: vcpkg-${{ matrix.os }}-${{ matrix.arch }}-
154
180
 
155
181
  # ──────────────────────────────────────────────
156
182
  # 3. Discover h5py HDF5 + build wheel
157
183
  # ──────────────────────────────────────────────
158
-
159
184
  - name: Discover h5py HDF5 location
160
185
  run: |
161
- H5PY_HDF5_DIR=$(python3 -c "
186
+ HDF5_DIR=$(${GITHUB_WORKSPACE}/.venv/bin/python -c "
162
187
  import h5py, os
163
188
  d = os.path.dirname(h5py.__file__)
164
189
  dylibs = os.path.join(d, '.dylibs')
165
190
  libs = os.path.join(os.path.dirname(d), 'h5py.libs')
166
191
  print(dylibs if os.path.exists(dylibs) else libs)
167
192
  ")
168
- echo "H5PY_HDF5_DIR=$H5PY_HDF5_DIR" >> $GITHUB_ENV
169
- echo "Discovered h5py HDF5 at: $H5PY_HDF5_DIR"
170
- ls -la "$H5PY_HDF5_DIR"
193
+ echo "HDF5_DIR=$HDF5_DIR" >> $GITHUB_ENV
171
194
 
172
195
  - name: Build wheel
173
- run: |
174
- uv build --wheel --no-build-isolation --python ${{ github.workspace }}/.venv/bin/python
196
+ run: uv build --wheel --no-build-isolation --python ${GITHUB_WORKSPACE}/.venv/bin/python
197
+
175
198
  # ──────────────────────────────────────────────
176
199
  # 4. Repair wheel for PyPI
177
200
  # ──────────────────────────────────────────────
@@ -179,7 +202,7 @@ jobs:
179
202
  - name: Repair wheel (Linux)
180
203
  if: runner.os == 'Linux'
181
204
  run: |
182
- export LD_LIBRARY_PATH="${H5PY_HDF5_DIR}:${LD_LIBRARY_PATH}"
205
+ export LD_LIBRARY_PATH="${HDF5_DIR}:${LD_LIBRARY_PATH}"
183
206
  auditwheel show dist/*.whl
184
207
  auditwheel repair dist/*.whl -w wheelhouse/ \
185
208
  --exclude libhdf5.so \
@@ -190,7 +213,7 @@ jobs:
190
213
  - name: Repair wheel (macOS)
191
214
  if: runner.os == 'macOS'
192
215
  run: |
193
- export DYLD_LIBRARY_PATH="${H5PY_HDF5_DIR}:${DYLD_LIBRARY_PATH}"
216
+ export DYLD_LIBRARY_PATH="${HDF5_DIR}:${DYLD_LIBRARY_PATH}"
194
217
  delocate-listdeps dist/*.whl
195
218
  delocate-wheel -w wheelhouse/ dist/*.whl \
196
219
  --exclude libhdf5 \
@@ -223,7 +246,7 @@ jobs:
223
246
  ext="${lib_file##*.}"
224
247
  cp "$lib_file" "lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext"
225
248
  echo "LIB_ARTIFACT=lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext" >> $GITHUB_ENV
226
-
249
+
227
250
  - name: Fix HDF5 paths in standalone binary (macOS)
228
251
  if: runner.os == 'macOS' && github.event_name == 'release' && matrix.python == '3.12'
229
252
  run: |
@@ -233,7 +256,7 @@ jobs:
233
256
  install_name_tool -change "$HDF5_REF" "@rpath/$HDF5_FILENAME" "$LIB"
234
257
  echo "Fixed: $HDF5_REF → @rpath/$HDF5_FILENAME"
235
258
  otool -L "$LIB" | grep hdf5
236
-
259
+
237
260
  - name: Fix HDF5 paths in standalone binary (Linux)
238
261
  if: runner.os == 'Linux' && github.event_name == 'release' && matrix.python == '3.12'
239
262
  run: |
@@ -244,7 +267,7 @@ jobs:
244
267
  "$LIB"
245
268
  echo "Fixed HDF5 dependency"
246
269
  ldd "$LIB" | grep hdf5 || patchelf --print-needed "$LIB" | grep hdf5
247
-
270
+
248
271
  - name: Attach native library to GitHub release
249
272
  if: github.event_name == 'release' && matrix.python == '3.12'
250
273
  uses: softprops/action-gh-release@v2
@@ -346,7 +369,7 @@ jobs:
346
369
 
347
370
  publish:
348
371
  name: Publish to PyPI
349
- needs: [test_testpypi] # ← now waits for TestPyPI to pass
372
+ needs: [test_testpypi]
350
373
  runs-on: ubuntu-latest
351
374
  if: github.event_name == 'release'
352
375
  environment:
@@ -72,3 +72,13 @@ uv.lock
72
72
  *.flv
73
73
  *.mov
74
74
  *.wmv
75
+
76
+ # vcpkg
77
+ lib/vcpkg_installed/
78
+ vcpkg_installed/
79
+
80
+ # Conan generated (legacy)
81
+ lib/activate.sh
82
+ lib/deactivate.sh
83
+ .conan2/
84
+ conan.conf
@@ -0,0 +1,343 @@
1
+ Metadata-Version: 2.2
2
+ Name: arraymorph
3
+ Version: 0.2.0b2
4
+ Summary: HDF5 VOL connector for cloud object storage (AWS S3, Azure Blob)
5
+ Author: ruochenj123, wangtg2013
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ICICLE-ai/ArrayMorph
8
+ Project-URL: Repository, https://github.com/ICICLE-ai/ArrayMorph
9
+ Project-URL: Issues, https://github.com/ICICLE-ai/ArrayMorph/issues
10
+ Requires-Python: >=3.9
11
+ Requires-Dist: h5py>=3.11.0
12
+ Description-Content-Type: text/markdown
13
+
14
+ # ArrayMorph
15
+
16
+ [![Build Status](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml/badge.svg)](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yaml)
17
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
18
+
19
+ ArrayMorph enables efficient storage and retrieval of array data from cloud object stores, supporting AWS S3 and Azure Blob Storage. It is an HDF5 Virtual Object Layer (VOL) plugin that transparently routes HDF5 file operations to cloud storage — existing h5py or HDF5 C++ code works unchanged once the plugin is loaded.
20
+
21
+ **Tag**: CI4AI
22
+
23
+ ---
24
+
25
+ # How-To Guides
26
+
27
+ ## Install ArrayMorph
28
+
29
+ ```bash
30
+ pip install arraymorph
31
+ ```
32
+
33
+ Once installed, jump straight to [Configure credentials for AWS S3](#configure-credentials-for-aws-s3) or [Azure](#configure-credentials-for-azure-blob-storage) below.
34
+
35
+ If you need the standalone `lib_arraymorph` binary, you can [download a pre-built release](#download-a-pre-built-lib_arraymorph) or [build from source](#build-from-source).
36
+
37
+ ## Configure credentials for AWS S3
38
+
39
+ Use the Python API before opening any HDF5 files:
40
+
41
+ ```python
42
+ import arraymorph
43
+
44
+ arraymorph.configure_s3(
45
+ bucket="my-bucket",
46
+ access_key="MY_ACCESS_KEY",
47
+ secret_key="MY_SECRET_KEY",
48
+ region="us-east-1",
49
+ use_tls=True,
50
+ )
51
+ arraymorph.enable()
52
+ ```
53
+
54
+ Or set environment variables directly:
55
+
56
+ ```bash
57
+ export STORAGE_PLATFORM=S3
58
+ export BUCKET_NAME=my-bucket
59
+ export AWS_ACCESS_KEY_ID=MY_ACCESS_KEY
60
+ export AWS_SECRET_ACCESS_KEY=MY_SECRET_KEY
61
+ export AWS_REGION=us-east-1
62
+ export HDF5_PLUGIN_PATH=$(python -c "import arraymorph; print(arraymorph.get_plugin_path())")
63
+ export HDF5_VOL_CONNECTOR=arraymorph
64
+ ```
65
+
66
+ ## Configure credentials for Azure Blob Storage
67
+
68
+ ```python
69
+ import arraymorph
70
+
71
+ arraymorph.configure_azure(
72
+ container="my-container",
73
+ connection_string="DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net",
74
+ )
75
+ arraymorph.enable()
76
+ ```
77
+
78
+ Or set environment variables directly:
79
+
80
+ ```bash
81
+ export STORAGE_PLATFORM=Azure
82
+ export BUCKET_NAME=my-container
83
+ export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;..."
84
+ export HDF5_PLUGIN_PATH=$(python -c "import arraymorph; print(arraymorph.get_plugin_path())")
85
+ export HDF5_VOL_CONNECTOR=arraymorph
86
+ ```
87
+
88
+ ## Use an S3-compatible object store (MinIO, Ceph, Garage)
89
+
90
+ Pass `endpoint`, `addressing_style=True`, and `use_signed_payloads=True` to match the requirements of most self-hosted S3-compatible stores:
91
+
92
+ ```python
93
+ import arraymorph
94
+
95
+ arraymorph.configure_s3(
96
+ bucket="my-bucket",
97
+ access_key="MY_ACCESS_KEY",
98
+ secret_key="MY_SECRET_KEY",
99
+ endpoint="http://localhost:9000",
100
+ region="us-east-1",
101
+ use_tls=False,
102
+ addressing_style=True,
103
+ use_signed_payloads=True,
104
+ )
105
+ arraymorph.enable()
106
+ ```
107
+
108
+ ## Download a pre-built lib_arraymorph
109
+
110
+ Each [GitHub release](https://github.com/ICICLE-ai/ArrayMorph/releases) attaches standalone pre-compiled binaries of `lib_arraymorph` for all supported platforms:
111
+
112
+ | File | Platform |
113
+ | ---------------------------------- | ------------------- |
114
+ | `lib_arraymorph-linux-x86_64.so` | Linux x86_64 |
115
+ | `lib_arraymorph-linux-aarch64.so` | Linux aarch64 |
116
+ | `lib_arraymorph-macos-arm64.dylib` | macOS Apple Silicon |
117
+
118
+ Download the file for your platform from the release assets and set `HDF5_PLUGIN_PATH` to the directory containing it before calling `arraymorph.enable()` or setting `HDF5_VOL_CONNECTOR` manually.
119
+
120
+ ## Build from source
121
+
122
+ Use this path if you want to compile `lib_arraymorph` yourself — for example to target a specific platform, contribute changes, or build a custom wheel.
123
+
124
+ ### Prerequisites
125
+
126
+ - [vcpkg](https://github.com/microsoft/vcpkg) — installs the AWS and Azure C++ SDKs via CMake
127
+ - [CMake](https://cmake.org) and [Ninja](https://ninja-build.org)
128
+ - [uv](https://docs.astral.sh/uv/) — Python package manager
129
+
130
+ ### Step 1 — Clone and create a virtual environment
131
+
132
+ ```bash
133
+ git clone https://github.com/ICICLE-ai/ArrayMorph.git
134
+ cd ArrayMorph
135
+ uv venv
136
+ source .venv/bin/activate
137
+ ```
138
+
139
+ ### Step 2 — Install h5py
140
+
141
+ `lib_arraymorph` links against an HDF5 shared library at build time. Rather than requiring a separate system-wide HDF5 installation, the build system points CMake at the `.so` / `.dylib` that h5py already bundles. Install h5py first so those libraries are present:
142
+
143
+ ```bash
144
+ uv pip install h5py
145
+ ```
146
+
147
+ On macOS the bundled libraries land in `.venv/lib/python*/site-packages/h5py/.dylibs/`; on Linux in `.venv/lib/python*/site-packages/h5py.libs/`.
148
+
149
+ ### Step 3 — Configure and build the shared library
150
+
151
+ ```bash
152
+ export HDF5_DIR=$(.venv/bin/python -c "import h5py,os; d=os.path.dirname(h5py.__file__); print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))")
153
+
154
+ cmake -B lib/build -S lib \
155
+ -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \
156
+ -DCMAKE_BUILD_TYPE=Release \
157
+ -G Ninja
158
+
159
+ cmake --build lib/build
160
+ ```
161
+
162
+ This produces `lib/build/lib_arraymorph.dylib` on macOS or `lib/build/lib_arraymorph.so` on Linux.
163
+
164
+ ### Optional — Python package
165
+
166
+ If you also want to use the Python API, install the package in editable mode:
167
+
168
+ ```bash
169
+ HDF5_DIR=$HDF5_DIR \
170
+ CMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \
171
+ uv pip install -e .
172
+ ```
173
+
174
+ Or build a redistributable wheel:
175
+
176
+ ```bash
177
+ HDF5_DIR=$HDF5_DIR \
178
+ CMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \
179
+ uv build --wheel --no-build-isolation
180
+ ```
181
+
182
+ The wheel is written to `dist/`. Install it in any environment with:
183
+
184
+ ```bash
185
+ pip install dist/arraymorph-*.whl
186
+ ```
187
+
188
+ ---
189
+
190
+ # Tutorials
191
+
192
+ ## Write and read a chunked array on AWS S3
193
+
194
+ This tutorial walks through writing a 2-D NumPy array to a cloud HDF5 file and reading a slice of it back.
195
+
196
+ ### Prerequisites
197
+
198
+ - An AWS account with an S3 bucket, or an S3-compatible object store
199
+ - ArrayMorph installed (`pip install arraymorph`)
200
+
201
+ ### Step 1 — Configure and enable ArrayMorph
202
+
203
+ ```python
204
+ import arraymorph
205
+
206
+ arraymorph.configure_s3(
207
+ bucket="my-bucket",
208
+ access_key="MY_ACCESS_KEY",
209
+ secret_key="MY_SECRET_KEY",
210
+ region="us-east-1",
211
+ use_tls=True,
212
+ )
213
+ arraymorph.enable()
214
+ ```
215
+
216
+ `arraymorph.enable()` sets `HDF5_PLUGIN_PATH` and `HDF5_VOL_CONNECTOR` in the current process. Any `h5py.File(...)` call made after this point is routed through ArrayMorph.
217
+
218
+ ### Step 2 — Write array data
219
+
220
+ ```python
221
+ import h5py
222
+ import numpy as np
223
+
224
+ data = np.fromfunction(lambda i, j: i + j, (100, 100), dtype="i4")
225
+
226
+ with h5py.File("demo.h5", "w") as f:
227
+ f.create_dataset("values", data=data, chunks=(10, 10))
228
+ ```
229
+
230
+ Each 10×10 chunk is stored as a separate object in your S3 bucket.
231
+
232
+ ### Step 3 — Read a slice back
233
+
234
+ ```python
235
+ import h5py
236
+
237
+ with h5py.File("demo.h5", "r") as f:
238
+ dset = f["values"]
239
+ print(dset.dtype) # int32
240
+ print(dset[5:15, 5:15]) # fetches only the chunks that overlap this slice
241
+ ```
242
+
243
+ Only the chunks that overlap the requested hyperslab are fetched from cloud storage — no full-file download occurs.
244
+
245
+ ---
246
+
247
+ # Explanation
248
+
249
+ ## How ArrayMorph works
250
+
251
+ ArrayMorph is implemented as an HDF5 **Virtual Object Layer (VOL)** connector. The VOL is an abstraction layer inside the HDF5 library that separates the public API from the storage implementation. By providing a plugin that registers itself as a VOL connector, ArrayMorph intercepts every HDF5 file operation before it reaches the native POSIX layer.
252
+
253
+ When `arraymorph.enable()` is called:
254
+
255
+ 1. `HDF5_PLUGIN_PATH` is set to the directory containing the compiled shared library (`lib_arraymorph.so` / `lib_arraymorph.dylib`).
256
+ 2. `HDF5_VOL_CONNECTOR=arraymorph` tells HDF5 to load and activate that plugin for all subsequent file operations.
257
+
258
+ From this point, a call like `h5py.File("demo.h5", "w")` does not touch the local filesystem. Instead, the VOL connector:
259
+
260
+ 1. Reads cloud credentials from environment variables and constructs an AWS S3 or Azure Blob client (selected by `STORAGE_PLATFORM`).
261
+ 2. On dataset read/write, translates the HDF5 hyperslab selection into a list of chunks and dispatches asynchronous get/put requests against the object store — one object per chunk.
262
+
263
+ ### Chunked storage model
264
+
265
+ HDF5 datasets are divided into fixed-size chunks (e.g. `chunks=(64, 64)` for a 2-D dataset). ArrayMorph stores each chunk as an independent object in the bucket. The object key encodes the dataset path and chunk coordinates, so a partial read only fetches the chunks that overlap the requested slice. For large chunks, ArrayMorph can issue byte-range requests to retrieve only the needed bytes within a chunk object.
266
+
267
+ ### Async I/O
268
+
269
+ Both the S3 and Azure backends use asynchronous operations dispatched to a thread pool. This allows ArrayMorph to fetch multiple chunks in parallel, which is important for workloads that access many chunks per read (e.g. strided access patterns in machine learning data loaders).
270
+
271
+ ### Compatibility
272
+
273
+ Because the interception happens at the VOL layer, no changes to application code are required. Any program that opens HDF5 files with h5py or the HDF5 C++ API will automatically use ArrayMorph once the plugin is loaded.
274
+
275
+ ---
276
+
277
+ # References
278
+
279
+ ## Python API
280
+
281
+ ### `arraymorph.enable() -> None`
282
+
283
+ Sets `HDF5_PLUGIN_PATH` and `HDF5_VOL_CONNECTOR` in the current process environment. Must be called before any `h5py.File(...)` call.
284
+
285
+ ### `arraymorph.get_plugin_path() -> str`
286
+
287
+ Returns the directory containing the compiled VOL plugin. Useful when you need to set `HDF5_PLUGIN_PATH` manually.
288
+
289
+ ### `arraymorph.configure_s3(bucket, access_key, secret_key, endpoint=None, region="us-east-2", use_tls=False, addressing_style=False, use_signed_payloads=False) -> None`
290
+
291
+ Configures the S3 client. All parameters are written to environment variables consumed by the C++ plugin at file-open time.
292
+
293
+ | Parameter | Environment variable | Default | Description |
294
+ | --------------------- | ------------------------- | ----------- | ---------------------------------------------------- |
295
+ | `bucket` | `BUCKET_NAME` | — | S3 bucket name |
296
+ | `access_key` | `AWS_ACCESS_KEY_ID` | — | Access key ID |
297
+ | `secret_key` | `AWS_SECRET_ACCESS_KEY` | — | Secret access key |
298
+ | `endpoint` | `AWS_ENDPOINT_URL_S3` | AWS default | Custom endpoint for S3-compatible stores |
299
+ | `region` | `AWS_REGION` | `us-east-2` | SigV4 signing region |
300
+ | `use_tls` | `AWS_USE_TLS` | `false` | Use HTTPS when `True` |
301
+ | `addressing_style` | `AWS_S3_ADDRESSING_STYLE` | `virtual` | `path` when `True`; required for most non-AWS stores |
302
+ | `use_signed_payloads` | `AWS_SIGNED_PAYLOADS` | `false` | Include request body in SigV4 signature |
303
+
304
+ ### `arraymorph.configure_azure(container, connection_string=None) -> None`
305
+
306
+ Configures the Azure Blob client.
307
+
308
+ | Parameter | Environment variable | Default | Description |
309
+ | ------------------- | --------------------------------- | -------- | ------------------------------- |
310
+ | `container` | `BUCKET_NAME` | — | Azure container name |
311
+ | `connection_string` | `AZURE_STORAGE_CONNECTION_STRING` | From env | Azure Storage connection string |
312
+
313
+ ## Environment variables
314
+
315
+ All configuration can be applied via environment variables without using the Python API. This is useful when running HDF5 C++ programs directly.
316
+
317
+ | Variable | Description |
318
+ | --------------------------------- | --------------------------------------------------- |
319
+ | `HDF5_PLUGIN_PATH` | Directory containing `lib_arraymorph.so` / `.dylib` |
320
+ | `HDF5_VOL_CONNECTOR` | Must be `arraymorph` to activate the plugin |
321
+ | `STORAGE_PLATFORM` | `S3` (default) or `Azure` |
322
+ | `BUCKET_NAME` | Bucket or container name |
323
+ | `AWS_ACCESS_KEY_ID` | S3 access key |
324
+ | `AWS_SECRET_ACCESS_KEY` | S3 secret key |
325
+ | `AWS_REGION` | SigV4 signing region |
326
+ | `AWS_ENDPOINT_URL_S3` | Custom S3-compatible endpoint URL |
327
+ | `AWS_USE_TLS` | `true` / `false` |
328
+ | `AWS_S3_ADDRESSING_STYLE` | `path` or `virtual` |
329
+ | `AWS_SIGNED_PAYLOADS` | `true` / `false` |
330
+ | `AZURE_STORAGE_CONNECTION_STRING` | Azure connection string |
331
+
332
+ ## External references
333
+
334
+ - [HDF5 VOL connectors](https://docs.hdfgroup.org/hdf5/develop/_v_o_l.html)
335
+ - [AWS SDK for C++](https://github.com/aws/aws-sdk-cpp)
336
+ - [Azure SDK for C++](https://github.com/Azure/azure-sdk-for-cpp)
337
+ - [h5py documentation](https://docs.h5py.org/en/stable/)
338
+
339
+ ---
340
+
341
+ ## Acknowledgements
342
+
343
+ This project is supported by the National Science Foundation (NSF) funded AI institute for Intelligent Cyberinfrastructure with Computational Learning in the Environment (ICICLE) (OAC 2112606).