arraymorph 0.2.0b1__tar.gz → 0.2.0b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arraymorph-0.2.0b1/.github/workflows/build-lib_array_morph-and-pypi-package.yaml → arraymorph-0.2.0b3/.github/workflows/build.yaml +93 -70
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/.gitignore +10 -0
- arraymorph-0.2.0b3/CHANGELOG.md +15 -0
- arraymorph-0.2.0b3/PKG-INFO +343 -0
- arraymorph-0.2.0b3/README.md +330 -0
- arraymorph-0.2.0b3/justfile +56 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/CMakeLists.txt +18 -13
- arraymorph-0.2.0b3/lib/README.md +64 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/initialize.h +8 -1
- arraymorph-0.2.0b3/lib/justfile +37 -0
- arraymorph-0.2.0b3/lib/vcpkg.json +12 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/pyproject.toml +1 -1
- arraymorph-0.2.0b1/.github/workflows/build.yml +0 -62
- arraymorph-0.2.0b1/PKG-INFO +0 -142
- arraymorph-0.2.0b1/README.md +0 -129
- arraymorph-0.2.0b1/justfile +0 -63
- arraymorph-0.2.0b1/lib/README.md +0 -17
- arraymorph-0.2.0b1/lib/conanfile.py +0 -41
- arraymorph-0.2.0b1/lib/justfile +0 -40
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/.actrc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/.github/ISSUE_TEMPLATE/config.yaml +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/.python-version +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/CONAN-INTEGRATION.md +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/LICENSE +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/env-example.txt +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/examples/python/read.py +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/examples/python/write.py +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/.clangd +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/core/constants.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/core/logger.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/core/operators.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/core/utils.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/chunk_obj.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/dataset_callbacks.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/dataset_obj.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/file_callbacks.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/group_callbacks.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/include/arraymorph/s3vl/vol_connector.h +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/meta.yaml +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/scripts/extract_perspective.py +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/CMakeLists.txt +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/core/constants.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/core/operators.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/core/utils.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/s3vl/chunk_obj.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/s3vl/dataset_callbacks.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/s3vl/dataset_obj.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/s3vl/file_callbacks.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/s3vl/group_callbacks.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/lib/src/s3vl/vol_connector.cc +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/main.py +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/release.yaml +0 -0
- {arraymorph-0.2.0b1 → arraymorph-0.2.0b3}/src/arraymorph/__init__.py +0 -0
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
name: Build, Test, and Publish
|
|
2
2
|
|
|
3
3
|
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
4
6
|
pull_request:
|
|
5
7
|
branches: [main]
|
|
6
8
|
release:
|
|
@@ -11,52 +13,95 @@ jobs:
|
|
|
11
13
|
build_wheels:
|
|
12
14
|
name: Build (${{ matrix.os }} / ${{ matrix.arch }} / py${{ matrix.python }})
|
|
13
15
|
runs-on: ${{ matrix.runner }}
|
|
16
|
+
container: ${{ matrix.container || '' }}
|
|
14
17
|
strategy:
|
|
15
18
|
fail-fast: false
|
|
16
19
|
matrix:
|
|
17
20
|
include:
|
|
18
|
-
# Linux x86_64
|
|
19
|
-
- {
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
21
|
+
# Linux x86_64 (manylinux_2_28 container)
|
|
22
|
+
- {
|
|
23
|
+
os: linux,
|
|
24
|
+
arch: x86_64,
|
|
25
|
+
runner: ubuntu-latest,
|
|
26
|
+
container: "quay.io/pypa/manylinux_2_28_x86_64",
|
|
27
|
+
python: "3.9",
|
|
28
|
+
}
|
|
29
|
+
- {
|
|
30
|
+
os: linux,
|
|
31
|
+
arch: x86_64,
|
|
32
|
+
runner: ubuntu-latest,
|
|
33
|
+
container: "quay.io/pypa/manylinux_2_28_x86_64",
|
|
34
|
+
python: "3.10",
|
|
35
|
+
}
|
|
36
|
+
- {
|
|
37
|
+
os: linux,
|
|
38
|
+
arch: x86_64,
|
|
39
|
+
runner: ubuntu-latest,
|
|
40
|
+
container: "quay.io/pypa/manylinux_2_28_x86_64",
|
|
41
|
+
python: "3.11",
|
|
42
|
+
}
|
|
43
|
+
- {
|
|
44
|
+
os: linux,
|
|
45
|
+
arch: x86_64,
|
|
46
|
+
runner: ubuntu-latest,
|
|
47
|
+
container: "quay.io/pypa/manylinux_2_28_x86_64",
|
|
48
|
+
python: "3.12",
|
|
49
|
+
}
|
|
50
|
+
- {
|
|
51
|
+
os: linux,
|
|
52
|
+
arch: x86_64,
|
|
53
|
+
runner: ubuntu-latest,
|
|
54
|
+
container: "quay.io/pypa/manylinux_2_28_x86_64",
|
|
55
|
+
python: "3.13",
|
|
56
|
+
}
|
|
57
|
+
- {
|
|
58
|
+
os: linux,
|
|
59
|
+
arch: x86_64,
|
|
60
|
+
runner: ubuntu-latest,
|
|
61
|
+
container: "quay.io/pypa/manylinux_2_28_x86_64",
|
|
62
|
+
python: "3.14",
|
|
63
|
+
}
|
|
64
|
+
# Linux aarch64 (manylinux_2_28 container)
|
|
26
65
|
- {
|
|
27
66
|
os: linux,
|
|
28
67
|
arch: aarch64,
|
|
29
68
|
runner: ubuntu-24.04-arm,
|
|
69
|
+
container: "quay.io/pypa/manylinux_2_28_aarch64",
|
|
30
70
|
python: "3.9",
|
|
31
71
|
}
|
|
32
72
|
- {
|
|
33
73
|
os: linux,
|
|
34
74
|
arch: aarch64,
|
|
35
75
|
runner: ubuntu-24.04-arm,
|
|
76
|
+
container: "quay.io/pypa/manylinux_2_28_aarch64",
|
|
36
77
|
python: "3.10",
|
|
37
78
|
}
|
|
38
79
|
- {
|
|
39
80
|
os: linux,
|
|
40
81
|
arch: aarch64,
|
|
41
82
|
runner: ubuntu-24.04-arm,
|
|
83
|
+
container: "quay.io/pypa/manylinux_2_28_aarch64",
|
|
42
84
|
python: "3.11",
|
|
43
85
|
}
|
|
44
86
|
- {
|
|
45
87
|
os: linux,
|
|
46
88
|
arch: aarch64,
|
|
47
89
|
runner: ubuntu-24.04-arm,
|
|
90
|
+
container: "quay.io/pypa/manylinux_2_28_aarch64",
|
|
48
91
|
python: "3.12",
|
|
49
92
|
}
|
|
50
93
|
- {
|
|
51
94
|
os: linux,
|
|
52
95
|
arch: aarch64,
|
|
53
96
|
runner: ubuntu-24.04-arm,
|
|
97
|
+
container: "quay.io/pypa/manylinux_2_28_aarch64",
|
|
54
98
|
python: "3.13",
|
|
55
99
|
}
|
|
56
100
|
- {
|
|
57
101
|
os: linux,
|
|
58
102
|
arch: aarch64,
|
|
59
103
|
runner: ubuntu-24.04-arm,
|
|
104
|
+
container: "quay.io/pypa/manylinux_2_28_aarch64",
|
|
60
105
|
python: "3.14",
|
|
61
106
|
}
|
|
62
107
|
# macOS arm64
|
|
@@ -71,7 +116,7 @@ jobs:
|
|
|
71
116
|
- name: Checkout
|
|
72
117
|
uses: actions/checkout@v4
|
|
73
118
|
with:
|
|
74
|
-
fetch-depth: 0
|
|
119
|
+
fetch-depth: 0
|
|
75
120
|
|
|
76
121
|
- name: Install uv
|
|
77
122
|
uses: astral-sh/setup-uv@v7
|
|
@@ -85,93 +130,71 @@ jobs:
|
|
|
85
130
|
# 1. System deps
|
|
86
131
|
# ──────────────────────────────────────────────
|
|
87
132
|
|
|
88
|
-
- name: Install system deps (Linux)
|
|
133
|
+
- name: Install system deps (Linux manylinux)
|
|
134
|
+
if: runner.os == 'Linux'
|
|
135
|
+
run: |
|
|
136
|
+
yum -y install \
|
|
137
|
+
git curl ca-certificates \
|
|
138
|
+
zip unzip tar \
|
|
139
|
+
cmake ninja-build pkgconfig \
|
|
140
|
+
gcc gcc-c++ make \
|
|
141
|
+
perl perl-IPC-Cmd perl-ExtUtils-MakeMaker \
|
|
142
|
+
kernel-headers
|
|
143
|
+
|
|
144
|
+
- name: Sanity check compilers
|
|
89
145
|
if: runner.os == 'Linux'
|
|
90
146
|
run: |
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
pkg-config patchelf \
|
|
95
|
-
libx11-dev libx11-xcb-dev libfontenc-dev \
|
|
96
|
-
libice-dev libsm-dev libxau-dev libxaw7-dev \
|
|
97
|
-
libxcomposite-dev libxcursor-dev libxdamage-dev \
|
|
98
|
-
libxdmcp-dev libxext-dev libxfixes-dev libxi-dev \
|
|
99
|
-
libxinerama-dev libxkbfile-dev libxmu-dev \
|
|
100
|
-
libxmuu-dev libxpm-dev libxrandr-dev libxrender-dev \
|
|
101
|
-
libxres-dev libxss-dev libxt-dev libxtst-dev \
|
|
102
|
-
libxv-dev libxxf86vm-dev libxcb-glx0-dev \
|
|
103
|
-
libxcb-render0-dev libxcb-render-util0-dev \
|
|
104
|
-
libxcb-xkb-dev libxcb-icccm4-dev libxcb-image0-dev \
|
|
105
|
-
libxcb-keysyms1-dev libxcb-randr0-dev libxcb-shape0-dev \
|
|
106
|
-
libxcb-sync-dev libxcb-xfixes0-dev libxcb-xinerama0-dev \
|
|
107
|
-
libxcb-dri3-dev uuid-dev libxcb-cursor-dev \
|
|
108
|
-
libxcb-dri2-0-dev libxcb-present-dev \
|
|
109
|
-
libxcb-composite0-dev libxcb-ewmh-dev libxcb-res0-dev \
|
|
110
|
-
libasound2-dev
|
|
147
|
+
gcc --version
|
|
148
|
+
g++ --version
|
|
149
|
+
perl -MIPC::Cmd -e 'print "IPC::Cmd OK\n"'
|
|
111
150
|
|
|
112
151
|
- name: Install system deps (macOS)
|
|
113
152
|
if: runner.os == 'macOS'
|
|
114
|
-
run: brew install ninja cmake
|
|
153
|
+
run: brew install ninja cmake curl openssl
|
|
115
154
|
|
|
116
155
|
- name: Install Python tools
|
|
117
156
|
run: |
|
|
118
157
|
uv venv --python ${{ matrix.python }}
|
|
119
|
-
echo "${
|
|
158
|
+
echo "${GITHUB_WORKSPACE}/.venv/bin" >> $GITHUB_PATH
|
|
120
159
|
uv pip install \
|
|
121
160
|
scikit-build-core setuptools-scm h5py \
|
|
122
161
|
build auditwheel delocate
|
|
123
162
|
|
|
124
163
|
# ──────────────────────────────────────────────
|
|
125
|
-
# 2.
|
|
164
|
+
# 2. vcpkg: install C++ deps (cached per platform)
|
|
126
165
|
# ──────────────────────────────────────────────
|
|
127
166
|
|
|
128
|
-
- name:
|
|
129
|
-
uses: actions/cache@v4
|
|
130
|
-
with:
|
|
131
|
-
path: ~/.conan2
|
|
132
|
-
key: conan-${{ matrix.os }}-${{ matrix.arch }}-${{ hashFiles('lib/conanfile.py') }}
|
|
133
|
-
restore-keys: conan-${{ matrix.os }}-${{ matrix.arch }}-
|
|
134
|
-
|
|
135
|
-
- name: Conan install
|
|
136
|
-
working-directory: lib
|
|
167
|
+
- name: Bootstrap vcpkg
|
|
137
168
|
run: |
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
-c tools.system.package_manager:sudo=True
|
|
169
|
+
git clone https://github.com/microsoft/vcpkg.git ${GITHUB_WORKSPACE}/vcpkg
|
|
170
|
+
${GITHUB_WORKSPACE}/vcpkg/bootstrap-vcpkg.sh
|
|
171
|
+
echo "VCPKG_ROOT=${GITHUB_WORKSPACE}/vcpkg" >> $GITHUB_ENV
|
|
172
|
+
echo "CMAKE_TOOLCHAIN_FILE=${GITHUB_WORKSPACE}/vcpkg/scripts/buildsystems/vcpkg.cmake" >> $GITHUB_ENV
|
|
143
173
|
|
|
144
|
-
- name:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
exit 1
|
|
151
|
-
fi
|
|
152
|
-
echo "CMAKE_TOOLCHAIN_FILE=$TOOLCHAIN" >> $GITHUB_ENV
|
|
153
|
-
echo "Found toolchain at: $TOOLCHAIN"
|
|
174
|
+
- name: Cache vcpkg packages
|
|
175
|
+
uses: actions/cache@v4
|
|
176
|
+
with:
|
|
177
|
+
path: ~/.cache/vcpkg/archives
|
|
178
|
+
key: vcpkg-${{ matrix.os }}-${{ matrix.arch }}-${{ hashFiles('lib/vcpkg.json') }}
|
|
179
|
+
restore-keys: vcpkg-${{ matrix.os }}-${{ matrix.arch }}-
|
|
154
180
|
|
|
155
181
|
# ──────────────────────────────────────────────
|
|
156
182
|
# 3. Discover h5py HDF5 + build wheel
|
|
157
183
|
# ──────────────────────────────────────────────
|
|
158
|
-
|
|
159
184
|
- name: Discover h5py HDF5 location
|
|
160
185
|
run: |
|
|
161
|
-
|
|
186
|
+
HDF5_DIR=$(${GITHUB_WORKSPACE}/.venv/bin/python -c "
|
|
162
187
|
import h5py, os
|
|
163
188
|
d = os.path.dirname(h5py.__file__)
|
|
164
189
|
dylibs = os.path.join(d, '.dylibs')
|
|
165
190
|
libs = os.path.join(os.path.dirname(d), 'h5py.libs')
|
|
166
191
|
print(dylibs if os.path.exists(dylibs) else libs)
|
|
167
192
|
")
|
|
168
|
-
echo "
|
|
169
|
-
echo "Discovered h5py HDF5 at: $H5PY_HDF5_DIR"
|
|
170
|
-
ls -la "$H5PY_HDF5_DIR"
|
|
193
|
+
echo "HDF5_DIR=$HDF5_DIR" >> $GITHUB_ENV
|
|
171
194
|
|
|
172
195
|
- name: Build wheel
|
|
173
|
-
run:
|
|
174
|
-
|
|
196
|
+
run: uv build --wheel --no-build-isolation --python ${GITHUB_WORKSPACE}/.venv/bin/python
|
|
197
|
+
|
|
175
198
|
# ──────────────────────────────────────────────
|
|
176
199
|
# 4. Repair wheel for PyPI
|
|
177
200
|
# ──────────────────────────────────────────────
|
|
@@ -179,7 +202,7 @@ jobs:
|
|
|
179
202
|
- name: Repair wheel (Linux)
|
|
180
203
|
if: runner.os == 'Linux'
|
|
181
204
|
run: |
|
|
182
|
-
export LD_LIBRARY_PATH="${
|
|
205
|
+
export LD_LIBRARY_PATH="${HDF5_DIR}:${LD_LIBRARY_PATH}"
|
|
183
206
|
auditwheel show dist/*.whl
|
|
184
207
|
auditwheel repair dist/*.whl -w wheelhouse/ \
|
|
185
208
|
--exclude libhdf5.so \
|
|
@@ -190,7 +213,7 @@ jobs:
|
|
|
190
213
|
- name: Repair wheel (macOS)
|
|
191
214
|
if: runner.os == 'macOS'
|
|
192
215
|
run: |
|
|
193
|
-
export DYLD_LIBRARY_PATH="${
|
|
216
|
+
export DYLD_LIBRARY_PATH="${HDF5_DIR}:${DYLD_LIBRARY_PATH}"
|
|
194
217
|
delocate-listdeps dist/*.whl
|
|
195
218
|
delocate-wheel -w wheelhouse/ dist/*.whl \
|
|
196
219
|
--exclude libhdf5 \
|
|
@@ -223,7 +246,7 @@ jobs:
|
|
|
223
246
|
ext="${lib_file##*.}"
|
|
224
247
|
cp "$lib_file" "lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext"
|
|
225
248
|
echo "LIB_ARTIFACT=lib_arraymorph-${{ matrix.os }}-${{ matrix.arch }}.$ext" >> $GITHUB_ENV
|
|
226
|
-
|
|
249
|
+
|
|
227
250
|
- name: Fix HDF5 paths in standalone binary (macOS)
|
|
228
251
|
if: runner.os == 'macOS' && github.event_name == 'release' && matrix.python == '3.12'
|
|
229
252
|
run: |
|
|
@@ -233,7 +256,7 @@ jobs:
|
|
|
233
256
|
install_name_tool -change "$HDF5_REF" "@rpath/$HDF5_FILENAME" "$LIB"
|
|
234
257
|
echo "Fixed: $HDF5_REF → @rpath/$HDF5_FILENAME"
|
|
235
258
|
otool -L "$LIB" | grep hdf5
|
|
236
|
-
|
|
259
|
+
|
|
237
260
|
- name: Fix HDF5 paths in standalone binary (Linux)
|
|
238
261
|
if: runner.os == 'Linux' && github.event_name == 'release' && matrix.python == '3.12'
|
|
239
262
|
run: |
|
|
@@ -244,7 +267,7 @@ jobs:
|
|
|
244
267
|
"$LIB"
|
|
245
268
|
echo "Fixed HDF5 dependency"
|
|
246
269
|
ldd "$LIB" | grep hdf5 || patchelf --print-needed "$LIB" | grep hdf5
|
|
247
|
-
|
|
270
|
+
|
|
248
271
|
- name: Attach native library to GitHub release
|
|
249
272
|
if: github.event_name == 'release' && matrix.python == '3.12'
|
|
250
273
|
uses: softprops/action-gh-release@v2
|
|
@@ -346,7 +369,7 @@ jobs:
|
|
|
346
369
|
|
|
347
370
|
publish:
|
|
348
371
|
name: Publish to PyPI
|
|
349
|
-
needs: [test_testpypi]
|
|
372
|
+
needs: [test_testpypi]
|
|
350
373
|
runs-on: ubuntu-latest
|
|
351
374
|
if: github.event_name == 'release'
|
|
352
375
|
environment:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [Unreleased]
|
|
6
|
+
|
|
7
|
+
> **⚠️ Pre-release** — API may change. Feedback welcome via [GitHub Issues](https://github.com/ICICLE-ai/ArrayMorph/issues).
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
- **Python Package & API**: ArrayMorph is now available via `pip install arraymorph`. You can now dynamically configure AWS S3, Azure Blob Storage, or any S3-compatible endpoints directly from Python (`arraymorph.configure_s3(...)` and `arraymorph.configure_azure(...)`).
|
|
11
|
+
- **Pre-built Binaries**: Pre-compiled binaries of `lib_arraymorph` are now attached to GitHub releases for Linux (x86_64, aarch64) and macOS (Apple Silicon).
|
|
12
|
+
- **Expanded Documentation**: The README has been overhauled with comprehensive How-To guides, tutorials, and a detailed explanation of ArrayMorph's chunked storage model and async I/O.
|
|
13
|
+
|
|
14
|
+
### Changed
|
|
15
|
+
- **Simplified Build System**: The build system has been revamped. It now leverages `uv` for Python environments and `vcpkg` for fetching C++ SDK dependencies, making building from source much smoother.
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: arraymorph
|
|
3
|
+
Version: 0.2.0b3
|
|
4
|
+
Summary: HDF5 VOL connector for cloud object storage (AWS S3, Azure Blob)
|
|
5
|
+
Author: ruochenj123, wangtg2013
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ICICLE-ai/ArrayMorph
|
|
8
|
+
Project-URL: Repository, https://github.com/ICICLE-ai/ArrayMorph
|
|
9
|
+
Project-URL: Issues, https://github.com/ICICLE-ai/ArrayMorph/issues
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Requires-Dist: h5py>=3.11.0
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# ArrayMorph
|
|
15
|
+
|
|
16
|
+
[](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yaml)
|
|
17
|
+
[](https://opensource.org/licenses/MIT)
|
|
18
|
+
|
|
19
|
+
ArrayMorph enables efficient storage and retrieval of array data from cloud object stores, supporting AWS S3 and Azure Blob Storage. It is an HDF5 Virtual Object Layer (VOL) plugin that transparently routes HDF5 file operations to cloud storage — existing h5py or HDF5 C++ code works unchanged once the plugin is loaded.
|
|
20
|
+
|
|
21
|
+
**Tag**: CI4AI
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
# How-To Guides
|
|
26
|
+
|
|
27
|
+
## Install ArrayMorph
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install arraymorph
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Once installed, jump straight to [Configure credentials for AWS S3](#configure-credentials-for-aws-s3) or [Azure](#configure-credentials-for-azure-blob-storage) below.
|
|
34
|
+
|
|
35
|
+
If you need the standalone `lib_arraymorph` binary, you can [download a pre-built release](#download-a-pre-built-lib_arraymorph) or [build from source](#build-from-source).
|
|
36
|
+
|
|
37
|
+
## Configure credentials for AWS S3
|
|
38
|
+
|
|
39
|
+
Use the Python API before opening any HDF5 files:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import arraymorph
|
|
43
|
+
|
|
44
|
+
arraymorph.configure_s3(
|
|
45
|
+
bucket="my-bucket",
|
|
46
|
+
access_key="MY_ACCESS_KEY",
|
|
47
|
+
secret_key="MY_SECRET_KEY",
|
|
48
|
+
region="us-east-1",
|
|
49
|
+
use_tls=True,
|
|
50
|
+
)
|
|
51
|
+
arraymorph.enable()
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Or set environment variables directly:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
export STORAGE_PLATFORM=S3
|
|
58
|
+
export BUCKET_NAME=my-bucket
|
|
59
|
+
export AWS_ACCESS_KEY_ID=MY_ACCESS_KEY
|
|
60
|
+
export AWS_SECRET_ACCESS_KEY=MY_SECRET_KEY
|
|
61
|
+
export AWS_REGION=us-east-1
|
|
62
|
+
export HDF5_PLUGIN_PATH=$(python -c "import arraymorph; print(arraymorph.get_plugin_path())")
|
|
63
|
+
export HDF5_VOL_CONNECTOR=arraymorph
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Configure credentials for Azure Blob Storage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import arraymorph
|
|
70
|
+
|
|
71
|
+
arraymorph.configure_azure(
|
|
72
|
+
container="my-container",
|
|
73
|
+
connection_string="DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net",
|
|
74
|
+
)
|
|
75
|
+
arraymorph.enable()
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Or set environment variables directly:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
export STORAGE_PLATFORM=Azure
|
|
82
|
+
export BUCKET_NAME=my-container
|
|
83
|
+
export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=https;..."
|
|
84
|
+
export HDF5_PLUGIN_PATH=$(python -c "import arraymorph; print(arraymorph.get_plugin_path())")
|
|
85
|
+
export HDF5_VOL_CONNECTOR=arraymorph
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Use an S3-compatible object store (MinIO, Ceph, Garage)
|
|
89
|
+
|
|
90
|
+
Pass `endpoint`, `addressing_style=True`, and `use_signed_payloads=True` to match the requirements of most self-hosted S3-compatible stores:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import arraymorph
|
|
94
|
+
|
|
95
|
+
arraymorph.configure_s3(
|
|
96
|
+
bucket="my-bucket",
|
|
97
|
+
access_key="MY_ACCESS_KEY",
|
|
98
|
+
secret_key="MY_SECRET_KEY",
|
|
99
|
+
endpoint="http://localhost:9000",
|
|
100
|
+
region="us-east-1",
|
|
101
|
+
use_tls=False,
|
|
102
|
+
addressing_style=True,
|
|
103
|
+
use_signed_payloads=True,
|
|
104
|
+
)
|
|
105
|
+
arraymorph.enable()
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Download a pre-built lib_arraymorph
|
|
109
|
+
|
|
110
|
+
Each [GitHub release](https://github.com/ICICLE-ai/ArrayMorph/releases) attaches standalone pre-compiled binaries of `lib_arraymorph` for all supported platforms:
|
|
111
|
+
|
|
112
|
+
| File | Platform |
|
|
113
|
+
| ---------------------------------- | ------------------- |
|
|
114
|
+
| `lib_arraymorph-linux-x86_64.so` | Linux x86_64 |
|
|
115
|
+
| `lib_arraymorph-linux-aarch64.so` | Linux aarch64 |
|
|
116
|
+
| `lib_arraymorph-macos-arm64.dylib` | macOS Apple Silicon |
|
|
117
|
+
|
|
118
|
+
Download the file for your platform from the release assets and set `HDF5_PLUGIN_PATH` to the directory containing it before calling `arraymorph.enable()` or setting `HDF5_VOL_CONNECTOR` manually.
|
|
119
|
+
|
|
120
|
+
## Build from source
|
|
121
|
+
|
|
122
|
+
Use this path if you want to compile `lib_arraymorph` yourself — for example to target a specific platform, contribute changes, or build a custom wheel.
|
|
123
|
+
|
|
124
|
+
### Prerequisites
|
|
125
|
+
|
|
126
|
+
- [vcpkg](https://github.com/microsoft/vcpkg) — installs the AWS and Azure C++ SDKs via CMake
|
|
127
|
+
- [CMake](https://cmake.org) and [Ninja](https://ninja-build.org)
|
|
128
|
+
- [uv](https://docs.astral.sh/uv/) — Python package manager
|
|
129
|
+
|
|
130
|
+
### Step 1 — Clone and create a virtual environment
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
git clone https://github.com/ICICLE-ai/ArrayMorph.git
|
|
134
|
+
cd ArrayMorph
|
|
135
|
+
uv venv
|
|
136
|
+
source .venv/bin/activate
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Step 2 — Install h5py
|
|
140
|
+
|
|
141
|
+
`lib_arraymorph` links against an HDF5 shared library at build time. Rather than requiring a separate system-wide HDF5 installation, the build system points CMake at the `.so` / `.dylib` that h5py already bundles. Install h5py first so those libraries are present:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
uv pip install h5py
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
On macOS the bundled libraries land in `.venv/lib/python*/site-packages/h5py/.dylibs/`; on Linux in `.venv/lib/python*/site-packages/h5py.libs/`.
|
|
148
|
+
|
|
149
|
+
### Step 3 — Configure and build the shared library
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
export HDF5_DIR=$(.venv/bin/python -c "import h5py,os; d=os.path.dirname(h5py.__file__); print(os.path.join(d,'.dylibs') if os.path.exists(os.path.join(d,'.dylibs')) else os.path.join(os.path.dirname(d),'h5py.libs'))")
|
|
153
|
+
|
|
154
|
+
cmake -B lib/build -S lib \
|
|
155
|
+
-DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \
|
|
156
|
+
-DCMAKE_BUILD_TYPE=Release \
|
|
157
|
+
-G Ninja
|
|
158
|
+
|
|
159
|
+
cmake --build lib/build
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
This produces `lib/build/lib_arraymorph.dylib` on macOS or `lib/build/lib_arraymorph.so` on Linux.
|
|
163
|
+
|
|
164
|
+
### Optional — Python package
|
|
165
|
+
|
|
166
|
+
If you also want to use the Python API, install the package in editable mode:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
HDF5_DIR=$HDF5_DIR \
|
|
170
|
+
CMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \
|
|
171
|
+
uv pip install -e .
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Or build a redistributable wheel:
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
HDF5_DIR=$HDF5_DIR \
|
|
178
|
+
CMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT:-~/.vcpkg}/scripts/buildsystems/vcpkg.cmake \
|
|
179
|
+
uv build --wheel --no-build-isolation
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
The wheel is written to `dist/`. Install it in any environment with:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
pip install dist/arraymorph-*.whl
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
# Tutorials
|
|
191
|
+
|
|
192
|
+
## Write and read a chunked array on AWS S3
|
|
193
|
+
|
|
194
|
+
This tutorial walks through writing a 2-D NumPy array to a cloud HDF5 file and reading a slice of it back.
|
|
195
|
+
|
|
196
|
+
### Prerequisites
|
|
197
|
+
|
|
198
|
+
- An AWS account with an S3 bucket, or an S3-compatible object store
|
|
199
|
+
- ArrayMorph installed (`pip install arraymorph`)
|
|
200
|
+
|
|
201
|
+
### Step 1 — Configure and enable ArrayMorph
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
import arraymorph
|
|
205
|
+
|
|
206
|
+
arraymorph.configure_s3(
|
|
207
|
+
bucket="my-bucket",
|
|
208
|
+
access_key="MY_ACCESS_KEY",
|
|
209
|
+
secret_key="MY_SECRET_KEY",
|
|
210
|
+
region="us-east-1",
|
|
211
|
+
use_tls=True,
|
|
212
|
+
)
|
|
213
|
+
arraymorph.enable()
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
`arraymorph.enable()` sets `HDF5_PLUGIN_PATH` and `HDF5_VOL_CONNECTOR` in the current process. Any `h5py.File(...)` call made after this point is routed through ArrayMorph.
|
|
217
|
+
|
|
218
|
+
### Step 2 — Write array data
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
import h5py
|
|
222
|
+
import numpy as np
|
|
223
|
+
|
|
224
|
+
data = np.fromfunction(lambda i, j: i + j, (100, 100), dtype="i4")
|
|
225
|
+
|
|
226
|
+
with h5py.File("demo.h5", "w") as f:
|
|
227
|
+
f.create_dataset("values", data=data, chunks=(10, 10))
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
Each 10×10 chunk is stored as a separate object in your S3 bucket.
|
|
231
|
+
|
|
232
|
+
### Step 3 — Read a slice back
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
import h5py
|
|
236
|
+
|
|
237
|
+
with h5py.File("demo.h5", "r") as f:
|
|
238
|
+
dset = f["values"]
|
|
239
|
+
print(dset.dtype) # int32
|
|
240
|
+
print(dset[5:15, 5:15]) # fetches only the chunks that overlap this slice
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
Only the chunks that overlap the requested hyperslab are fetched from cloud storage — no full-file download occurs.
|
|
244
|
+
|
|
245
|
+
---
|
|
246
|
+
|
|
247
|
+
# Explanation
|
|
248
|
+
|
|
249
|
+
## How ArrayMorph works
|
|
250
|
+
|
|
251
|
+
ArrayMorph is implemented as an HDF5 **Virtual Object Layer (VOL)** connector. The VOL is an abstraction layer inside the HDF5 library that separates the public API from the storage implementation. By providing a plugin that registers itself as a VOL connector, ArrayMorph intercepts every HDF5 file operation before it reaches the native POSIX layer.
|
|
252
|
+
|
|
253
|
+
When `arraymorph.enable()` is called:
|
|
254
|
+
|
|
255
|
+
1. `HDF5_PLUGIN_PATH` is set to the directory containing the compiled shared library (`lib_arraymorph.so` / `lib_arraymorph.dylib`).
|
|
256
|
+
2. `HDF5_VOL_CONNECTOR=arraymorph` tells HDF5 to load and activate that plugin for all subsequent file operations.
|
|
257
|
+
|
|
258
|
+
From this point, a call like `h5py.File("demo.h5", "w")` does not touch the local filesystem. Instead, the VOL connector:
|
|
259
|
+
|
|
260
|
+
1. Reads cloud credentials from environment variables and constructs an AWS S3 or Azure Blob client (selected by `STORAGE_PLATFORM`).
|
|
261
|
+
2. On dataset read/write, translates the HDF5 hyperslab selection into a list of chunks and dispatches asynchronous get/put requests against the object store — one object per chunk.
|
|
262
|
+
|
|
263
|
+
### Chunked storage model
|
|
264
|
+
|
|
265
|
+
HDF5 datasets are divided into fixed-size chunks (e.g. `chunks=(64, 64)` for a 2-D dataset). ArrayMorph stores each chunk as an independent object in the bucket. The object key encodes the dataset path and chunk coordinates, so a partial read only fetches the chunks that overlap the requested slice. For large chunks, ArrayMorph can issue byte-range requests to retrieve only the needed bytes within a chunk object.
|
|
266
|
+
|
|
267
|
+
### Async I/O
|
|
268
|
+
|
|
269
|
+
Both the S3 and Azure backends use asynchronous operations dispatched to a thread pool. This allows ArrayMorph to fetch multiple chunks in parallel, which is important for workloads that access many chunks per read (e.g. strided access patterns in machine learning data loaders).
|
|
270
|
+
|
|
271
|
+
### Compatibility
|
|
272
|
+
|
|
273
|
+
Because the interception happens at the VOL layer, no changes to application code are required. Any program that opens HDF5 files with h5py or the HDF5 C++ API will automatically use ArrayMorph once the plugin is loaded.
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
# References
|
|
278
|
+
|
|
279
|
+
## Python API
|
|
280
|
+
|
|
281
|
+
### `arraymorph.enable() -> None`
|
|
282
|
+
|
|
283
|
+
Sets `HDF5_PLUGIN_PATH` and `HDF5_VOL_CONNECTOR` in the current process environment. Must be called before any `h5py.File(...)` call.
|
|
284
|
+
|
|
285
|
+
### `arraymorph.get_plugin_path() -> str`
|
|
286
|
+
|
|
287
|
+
Returns the directory containing the compiled VOL plugin. Useful when you need to set `HDF5_PLUGIN_PATH` manually.
|
|
288
|
+
|
|
289
|
+
### `arraymorph.configure_s3(bucket, access_key, secret_key, endpoint=None, region="us-east-2", use_tls=False, addressing_style=False, use_signed_payloads=False) -> None`
|
|
290
|
+
|
|
291
|
+
Configures the S3 client. All parameters are written to environment variables consumed by the C++ plugin at file-open time.
|
|
292
|
+
|
|
293
|
+
| Parameter | Environment variable | Default | Description |
|
|
294
|
+
| --------------------- | ------------------------- | ----------- | ---------------------------------------------------- |
|
|
295
|
+
| `bucket` | `BUCKET_NAME` | — | S3 bucket name |
|
|
296
|
+
| `access_key` | `AWS_ACCESS_KEY_ID` | — | Access key ID |
|
|
297
|
+
| `secret_key` | `AWS_SECRET_ACCESS_KEY` | — | Secret access key |
|
|
298
|
+
| `endpoint` | `AWS_ENDPOINT_URL_S3` | AWS default | Custom endpoint for S3-compatible stores |
|
|
299
|
+
| `region` | `AWS_REGION` | `us-east-2` | SigV4 signing region |
|
|
300
|
+
| `use_tls` | `AWS_USE_TLS` | `false` | Use HTTPS when `True` |
|
|
301
|
+
| `addressing_style` | `AWS_S3_ADDRESSING_STYLE` | `virtual` | `path` when `True`; required for most non-AWS stores |
|
|
302
|
+
| `use_signed_payloads` | `AWS_SIGNED_PAYLOADS` | `false` | Include request body in SigV4 signature |
|
|
303
|
+
|
|
304
|
+
### `arraymorph.configure_azure(container, connection_string=None) -> None`
|
|
305
|
+
|
|
306
|
+
Configures the Azure Blob client.
|
|
307
|
+
|
|
308
|
+
| Parameter | Environment variable | Default | Description |
|
|
309
|
+
| ------------------- | --------------------------------- | -------- | ------------------------------- |
|
|
310
|
+
| `container` | `BUCKET_NAME` | — | Azure container name |
|
|
311
|
+
| `connection_string` | `AZURE_STORAGE_CONNECTION_STRING` | From env | Azure Storage connection string |
|
|
312
|
+
|
|
313
|
+
## Environment variables
|
|
314
|
+
|
|
315
|
+
All configuration can be applied via environment variables without using the Python API. This is useful when running HDF5 C++ programs directly.
|
|
316
|
+
|
|
317
|
+
| Variable | Description |
|
|
318
|
+
| --------------------------------- | --------------------------------------------------- |
|
|
319
|
+
| `HDF5_PLUGIN_PATH` | Directory containing `lib_arraymorph.so` / `.dylib` |
|
|
320
|
+
| `HDF5_VOL_CONNECTOR` | Must be `arraymorph` to activate the plugin |
|
|
321
|
+
| `STORAGE_PLATFORM` | `S3` (default) or `Azure` |
|
|
322
|
+
| `BUCKET_NAME` | Bucket or container name |
|
|
323
|
+
| `AWS_ACCESS_KEY_ID` | S3 access key |
|
|
324
|
+
| `AWS_SECRET_ACCESS_KEY` | S3 secret key |
|
|
325
|
+
| `AWS_REGION` | SigV4 signing region |
|
|
326
|
+
| `AWS_ENDPOINT_URL_S3` | Custom S3-compatible endpoint URL |
|
|
327
|
+
| `AWS_USE_TLS` | `true` / `false` |
|
|
328
|
+
| `AWS_S3_ADDRESSING_STYLE` | `path` or `virtual` |
|
|
329
|
+
| `AWS_SIGNED_PAYLOADS` | `true` / `false` |
|
|
330
|
+
| `AZURE_STORAGE_CONNECTION_STRING` | Azure connection string |
|
|
331
|
+
|
|
332
|
+
## External references
|
|
333
|
+
|
|
334
|
+
- [HDF5 VOL connectors](https://docs.hdfgroup.org/hdf5/develop/_v_o_l.html)
|
|
335
|
+
- [AWS SDK for C++](https://github.com/aws/aws-sdk-cpp)
|
|
336
|
+
- [Azure SDK for C++](https://github.com/Azure/azure-sdk-for-cpp)
|
|
337
|
+
- [h5py documentation](https://docs.h5py.org/en/stable/)
|
|
338
|
+
|
|
339
|
+
---
|
|
340
|
+
|
|
341
|
+
## Acknowledgements
|
|
342
|
+
|
|
343
|
+
This project is supported by the National Science Foundation (NSF) funded AI institute for Intelligent Cyberinfrastructure with Computational Learning in the Environment (ICICLE) (OAC 2112606).
|