arraymorph 0.2.0b2.dev0__cp314-cp314-macosx_15_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arraymorph/__init__.py ADDED
@@ -0,0 +1,161 @@
1
+ """
2
+ ArrayMorph - HDF5 VOL connector for cloud object storage.
3
+
4
+ Supports AWS S3 and Azure Blob Storage via HDF5's Virtual Object Layer.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from pathlib import Path
11
+
12
+ __version__ = "0.2.0"
13
+
14
+ # The compiled VOL plugin lives next to this file after installation
15
+ _PLUGIN_DIR = str(Path(__file__).parent / "lib")
16
+
17
+
18
+ def get_plugin_path() -> str:
19
+ """Return the directory containing the ArrayMorph VOL plugin (.so/.dylib).
20
+
21
+ Use this to set HDF5_PLUGIN_PATH:
22
+ >>> import arraymorph
23
+ >>> os.environ["HDF5_PLUGIN_PATH"] = arraymorph.get_plugin_path()
24
+ """
25
+ return _PLUGIN_DIR
26
+
27
+
28
+ def enable() -> None:
29
+ """Configure HDF5 environment variables to use ArrayMorph.
30
+
31
+ Sets HDF5_PLUGIN_PATH and HDF5_VOL_CONNECTOR so that any
32
+ subsequent h5py calls route through the ArrayMorph VOL connector.
33
+
34
+ Usage:
35
+ >>> import arraymorph
36
+ >>> arraymorph.enable()
37
+ >>> import h5py
38
+ >>> f = h5py.File("s3://bucket/data.h5", "r")
39
+ """
40
+ os.environ["HDF5_PLUGIN_PATH"] = _PLUGIN_DIR
41
+ os.environ["HDF5_VOL_CONNECTOR"] = "arraymorph"
42
+
43
+
44
+ def configure_s3(
45
+ bucket: str,
46
+ access_key: str = "",
47
+ secret_key: str = "",
48
+ endpoint: str | None = None,
49
+ region: str = "us-east-2",
50
+ use_tls: bool = False,
51
+ addressing_style: bool = False,
52
+ use_signed_payloads: bool = False,
53
+ ) -> None:
54
+ """Configure AWS S3 credentials and client behavior for ArrayMorph.
55
+
56
+ Sets the environment variables read by the VOL connector's S3 client
57
+ at initialization time. Call this before any h5py file operations.
58
+
59
+ Args:
60
+ bucket: Name of the S3 bucket where HDF5 files are stored.
61
+ Maps to: BUCKET_NAME
62
+ access_key: Access key ID for authentication with the S3 service.
63
+ Maps to: AWS_ACCESS_KEY_ID
64
+ secret_key: Secret access key paired with access_key for authentication.
65
+ Maps to: AWS_SECRET_ACCESS_KEY
66
+ endpoint: Custom S3-compatible endpoint URL (e.g. 'http://localhost:3900').
67
+ When None, the S3 client targets the default AWS endpoint. Required
68
+ for any non-AWS S3-compatible object store (MinIO, Ceph, etc.).
69
+ Maps to: AWS_ENDPOINT_URL_S3
70
+ region: Region label used in SigV4 request signing. Must match the region
71
+ your bucket or S3-compatible store is configured with — a mismatch
72
+ produces signature validation errors. Defaults to 'us-east-2'.
73
+ Maps to: AWS_REGION
74
+ use_tls: Whether to use HTTPS (True) or HTTP (False) for S3 connections.
75
+ Set to False for object stores that do not have TLS configured.
76
+ Defaults to False.
77
+ Maps to: AWS_USE_TLS
78
+ addressing_style: URL addressing style for the S3 client. When True,
79
+ uses path-style ('endpoint/bucket/key'). When False, uses
80
+ virtual-hosted style ('bucket.endpoint/key'), which can cause the
81
+ S3 client to misinterpret the HDF5 filename as the bucket name.
82
+ Most S3-compatible stores require path-style addressing.
83
+ Defaults to False.
84
+ Maps to: AWS_S3_ADDRESSING_STYLE
85
+ use_signed_payloads: Whether to include the request body in the SigV4
86
+ signature (PayloadSigningPolicy::Always). Some S3-compatible stores
87
+ require signed payloads and will reject requests with signature
88
+ validation errors if this is disabled. Defaults to False.
89
+ Maps to: AWS_SIGNED_PAYLOADS
90
+
91
+ Example:
92
+ >>> import arraymorph
93
+ >>> arraymorph.configure_s3(
94
+ ... bucket="my-bucket",
95
+ ... access_key="my-access-key",
96
+ ... secret_key="my-secret-key",
97
+ ... endpoint="http://localhost:3900",
98
+ ... region="us-east-1",
99
+ ... use_tls=False,
100
+ ... addressing_style=True,
101
+ ... use_signed_payloads=True,
102
+ ... )
103
+ >>> arraymorph.enable()
104
+ """
105
+ if not (access_key and secret_key):
106
+ raise ValueError(
107
+ "configure_s3() requires both 'access_key' and 'secret_key'. "
108
+ "Set them explicitly or export AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY "
109
+ "before calling this function."
110
+ )
111
+
112
+ os.environ["AWS_ACCESS_KEY_ID"] = access_key
113
+ os.environ["AWS_SECRET_ACCESS_KEY"] = secret_key
114
+ os.environ["STORAGE_PLATFORM"] = "S3"
115
+ os.environ["BUCKET_NAME"] = bucket
116
+ os.environ["AWS_REGION"] = region
117
+
118
+ if endpoint:
119
+ os.environ["AWS_ENDPOINT_URL_S3"] = endpoint
120
+
121
+ os.environ["AWS_USE_TLS"] = str(use_tls).lower()
122
+ os.environ["AWS_S3_ADDRESSING_STYLE"] = "path" if addressing_style else "virtual"
123
+ os.environ["AWS_SIGNED_PAYLOADS"] = str(use_signed_payloads).lower()
124
+
125
+
126
+ def configure_azure(
127
+ container: str,
128
+ connection_string: str | None = None,
129
+ ) -> None:
130
+ """Configure Azure Blob Storage credentials for ArrayMorph.
131
+
132
+ Sets the environment variables read by the VOL connector's Azure client
133
+ at initialization time. Call this before any h5py file operations.
134
+
135
+ Args:
136
+ container: Name of the Azure Blob Storage container where HDF5 files
137
+ are stored. Maps to: BUCKET_NAME
138
+ connection_string: Azure Storage connection string used to authenticate
139
+ and locate the storage account. If None, the connector will fall back
140
+ to the existing AZURE_STORAGE_CONNECTION_STRING environment variable.
141
+ Maps to: AZURE_STORAGE_CONNECTION_STRING
142
+
143
+ Example:
144
+ >>> import arraymorph
145
+ >>> arraymorph.configure_azure(
146
+ ... container="my-container",
147
+ ... connection_string="DefaultEndpointsProtocol=https;AccountName=...",
148
+ ... )
149
+ >>> arraymorph.enable()
150
+ """
151
+ if not connection_string and not os.environ.get("AZURE_STORAGE_CONNECTION_STRING"):
152
+ raise ValueError(
153
+ "configure_azure() requires a 'connection_string'. "
154
+ "Set it explicitly or export AZURE_STORAGE_CONNECTION_STRING "
155
+ "before calling this function."
156
+ )
157
+
158
+ os.environ["STORAGE_PLATFORM"] = "Azure"
159
+ os.environ["BUCKET_NAME"] = container
160
+ if connection_string:
161
+ os.environ["AZURE_STORAGE_CONNECTION_STRING"] = connection_string
Binary file
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.2
2
+ Name: arraymorph
3
+ Version: 0.2.0b2.dev0
4
+ Summary: HDF5 VOL connector for cloud object storage (AWS S3, Azure Blob)
5
+ Author: ruochenj123, wangtg2013
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ICICLE-ai/ArrayMorph
8
+ Project-URL: Repository, https://github.com/ICICLE-ai/ArrayMorph
9
+ Project-URL: Issues, https://github.com/ICICLE-ai/ArrayMorph/issues
10
+ Requires-Python: >=3.9
11
+ Requires-Dist: h5py>=3.11.0
12
+ Description-Content-Type: text/markdown
13
+
14
+ # ArrayMorph
15
+
16
+ [![Build Status](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml/badge.svg)](https://github.com/ICICLE-ai/arraymorph/actions/workflows/build.yml)
17
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
18
+
19
+ ArrayMorph is a software to manage array data stored on cloud object storage efficiently. It supports both HDF5 C++ API and h5py API. The data returned by h5py API is numpy arrays. By using h5py API, users can access array data stored on the cloud and feed the read data into machine learning pipelines seamlessly.
20
+
21
+ **Tag**: CI4AI
22
+
23
+ ---
24
+
25
+ # How-To Guides
26
+
27
+ ## Install dependencies
28
+
29
+ It is recommended to use Conda (and conda-forge) for managing dependencies.
30
+
31
+ 1. Install [Miniconda](https://docs.anaconda.com/miniconda/)
32
+ 2. Install [conda-build](https://docs.conda.io/projects/conda-build/en/stable/install-conda-build.html) for installing local conda packages
33
+ 3. Create and activate environment with dependencies:
34
+ ```bash
35
+ conda create -n arraymorph conda-forge::gxx=9
36
+ conda activate arraymorph
37
+ conda install -n arraymorph cmake conda-forge::hdf5=1.14.2 conda-forge::aws-sdk-cpp conda-forge::azure-storage-blobs-cpp conda-forge::h5py
38
+ ```
39
+
40
+ ## Install ArrayMorph via ArrayMorph local conda package
41
+ ```bash
42
+ git clone https://github.com/ICICLE-ai/arraymorph.git
43
+ cd arraymorph/arraymorph_channel
44
+ conda index .
45
+ conda install -n arraymorph arraymorph -c file://$(pwd) -c conda-forge
46
+ ```
47
+
48
+ ## Install ArryMorph from source code
49
+
50
+ ### Build ArrayMorph
51
+ ```bash
52
+ git clone https://github.com/ICICLE-ai/arraymorph.git
53
+ cd arraymorph/arraymorph
54
+ cmake -B ./build -S . -DCMAKE_PREFIX_PATH=$CONDA_PREFIX
55
+ cd build
56
+ make
57
+ ```
58
+
59
+ ### Enable VOL plugin:
60
+ ```bash
61
+ export HDF5_PLUGIN_PATH=/path/to/arraymorph/arraymorph/build/src
62
+ export HDF5_VOL_CONNECTOR=arraymorph
63
+ ```
64
+
65
+ ## Configure Environment for Cloud Access
66
+
67
+ ### AWS Configuration:
68
+ ```bash
69
+ export STORAGE_PLATFORM=S3
70
+ export BUCKET_NAME=XXXXXX
71
+ export AWS_ACCESS_KEY_ID=XXXXXX
72
+ export AWS_SECRET_ACCESS_KEY=XXXXXX
73
+ export AWS_REGION=us-east-2 # or your bucket's region
74
+ ```
75
+
76
+ ### Azure Configuration:
77
+ ```bash
78
+ export STORAGE_PLATFORM=Azure
79
+ export BUCKET_NAME=XXXXXX
80
+ export AZURE_STORAGE_CONNECTION_STRING=XXXXXX
81
+ ```
82
+
83
+ ---
84
+
85
+ # Tutorials
86
+
87
+ ## Run a simple example: Writing and Reading HDF5 files from Cloud
88
+
89
+ ### Prerequisites:
90
+ - AWS or Azure cloud account with credentials
91
+ - S3 bucket or Azure container
92
+ - ArrayMorph dependencies installed
93
+
94
+ ### Steps:
95
+ 1. Activate conda environment
96
+ ```bash
97
+ conda activate arraymorph
98
+ ```
99
+
100
+ 2. Write sample HDF5 data to the cloud
101
+ ```bash
102
+ cd examples/python
103
+ python3 write.py
104
+ ```
105
+
106
+ 3. Read data back from cloud HDF5 file
107
+ ```bash
108
+ cd examples/python
109
+ python3 read.py
110
+ ```
111
+ ---
112
+
113
+ # Explanation
114
+
115
+ ### How ArrayMorph Works
116
+
117
+ ArrayMorph plugs into the HDF5 stack using a VOL (Virtual Object Layer) plugin that intercepts file operations and routes them to cloud object storage instead of local files. This allows existing HDF5 APIs (both C++ and h5py in Python) to operate on cloud-based data seamlessly, enabling transparent cloud access for scientific or ML pipelines.
118
+
119
+ It supports:
120
+ - Cloud backends: AWS S3 and Azure Blob
121
+ - File formats: Current binary data stream (we plan to extend to other formats like jpg in the future)
122
+ - Languages: C++ and Python (via h5py compatibility)
123
+
124
+ The system is designed to be efficient in latency-sensitive scenarios and aims to integrate well with large-scale distributed training and inference.
125
+
126
+ ---
127
+
128
+ ## References
129
+
130
+ - [HDF5 VOL connectors](https://docs.hdfgroup.org/hdf5/develop/_v_o_l.html)
131
+ - [AWS SDK for C++](https://github.com/aws/aws-sdk-cpp)
132
+ - [Azure SDK for C++](https://github.com/Azure/azure-sdk-for-cpp)
133
+ - [h5py documentation](https://docs.h5py.org/en/stable/)
134
+ - [conda-forge](https://conda-forge.org/)
135
+
136
+ ---
137
+
138
+ ## Acknowledgements
139
+
140
+ This project is supported by:
141
+
142
+ *National Science Foundation (NSF) funded AI institute for Intelligent Cyberinfrastructure with Computational Learning in the Environment (ICICLE) (OAC 2112606)*
@@ -0,0 +1,6 @@
1
+ arraymorph/__init__.py,sha256=Gz0-ZYGOoSpLbsLBlPIiF5buuizi-Q9iq-HHbsWlI44,6257
2
+ arraymorph/lib/lib_arraymorph.dylib,sha256=ujM3DRZPhmlbzC5PIxcOuBkHQayxhezywk6hrYjgevM,14191016
3
+ arraymorph-0.2.0b2.dev0.dist-info/RECORD,,
4
+ arraymorph-0.2.0b2.dev0.dist-info/WHEEL,sha256=nEGB58_ldWTXGbP8ePRzOy9ovTHE0yL0yPMzHJ-Wakw,141
5
+ arraymorph-0.2.0b2.dev0.dist-info/METADATA,sha256=Br1w8Hol-GWPFZLlDrtSuBRA-KoLiz9e198JNo3MPyg,4526
6
+ arraymorph-0.2.0b2.dev0.dist-info/licenses/LICENSE,sha256=dfxSWfn7Ool0X832DV85QnXw3rUwggqXZ1vVDGyxeEA,1145
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: scikit-build-core 0.12.0
3
+ Root-Is-Purelib: false
4
+ Tag: cp314-cp314-macosx_15_0_arm64
5
+ Generator: delocate 0.13.0
6
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Intelligent Cyberinfrastructure with Computational Learning in the Environment -- ICICLE
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.