fluence-hpc 0.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fluence_hpc-0.0.0/PKG-INFO +111 -0
- fluence_hpc-0.0.0/README.md +96 -0
- fluence_hpc-0.0.0/fluence/__init__.py +16 -0
- fluence_hpc-0.0.0/fluence/interceptor.py +38 -0
- fluence_hpc-0.0.0/fluence/providers/__init__.py +27 -0
- fluence_hpc-0.0.0/fluence/providers/base.py +117 -0
- fluence_hpc-0.0.0/fluence/providers/braket.py +122 -0
- fluence_hpc-0.0.0/fluence/sidecar.py +99 -0
- fluence_hpc-0.0.0/fluence/sitecustomize.py +12 -0
- fluence_hpc-0.0.0/fluence/stage.py +59 -0
- fluence_hpc-0.0.0/fluence/ungate.py +73 -0
- fluence_hpc-0.0.0/fluence_hpc.egg-info/PKG-INFO +111 -0
- fluence_hpc-0.0.0/fluence_hpc.egg-info/SOURCES.txt +18 -0
- fluence_hpc-0.0.0/fluence_hpc.egg-info/dependency_links.txt +1 -0
- fluence_hpc-0.0.0/fluence_hpc.egg-info/entry_points.txt +2 -0
- fluence_hpc-0.0.0/fluence_hpc.egg-info/requires.txt +8 -0
- fluence_hpc-0.0.0/fluence_hpc.egg-info/top_level.txt +1 -0
- fluence_hpc-0.0.0/pyproject.toml +31 -0
- fluence_hpc-0.0.0/setup.cfg +4 -0
- fluence_hpc-0.0.0/tests/test_fluence.py +108 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fluence-hpc
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Fluence quantum-classical scheduling coordination library (sidecar + interceptor + providers)
|
|
5
|
+
Author: Fluence / converged-computing
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Provides-Extra: braket
|
|
10
|
+
Requires-Dist: amazon-braket-sdk; extra == "braket"
|
|
11
|
+
Requires-Dist: boto3; extra == "braket"
|
|
12
|
+
Provides-Extra: all
|
|
13
|
+
Requires-Dist: amazon-braket-sdk; extra == "all"
|
|
14
|
+
Requires-Dist: boto3; extra == "all"
|
|
15
|
+
|
|
16
|
+
# fluence (Python)
|
|
17
|
+
|
|
18
|
+
Quantum-classical scheduling coordination library for the Fluence Kubernetes
|
|
19
|
+
scheduler. Import name `fluence`; distributed on PyPI as `fluence-hpc`.
|
|
20
|
+
|
|
21
|
+
This package is **built into the Fluence sidecar image** and **staged into user
|
|
22
|
+
application containers at admission time** — users never install it.
|
|
23
|
+
|
|
24
|
+
## What it does
|
|
25
|
+
|
|
26
|
+
A hybrid quantum-classical workflow submits work to two queues: the Kubernetes
|
|
27
|
+
scheduler (classical) and a QPU vendor API (quantum). Classical worker pods would
|
|
28
|
+
idle while the QPU queue drains. Fluence gates the workers until the quantum task
|
|
29
|
+
is about to run, then releases them. This library is the runtime half:
|
|
30
|
+
|
|
31
|
+
- **interceptor** (`fluence.interceptor`) — runs inside the user container,
|
|
32
|
+
monkey-patches the vendor SDK submit call to tag each task with the pod UID.
|
|
33
|
+
- **sidecar** (`fluence.sidecar`) — runs in a sidecar container, discovers the
|
|
34
|
+
tagged task, polls queue position, and ungates the classical workers when the
|
|
35
|
+
task is ready (or, in observe-only mode, just records the queue position).
|
|
36
|
+
- **providers** (`fluence.providers`) — per-vendor plug-ins implementing both
|
|
37
|
+
halves. Providers self-register on import.
|
|
38
|
+
|
|
39
|
+
## Delivery (Model C)
|
|
40
|
+
|
|
41
|
+
The interceptor must run in the user's container, which does **not** have this
|
|
42
|
+
package installed. Rather than require a user install or concatenate a text
|
|
43
|
+
snippet, the Fluence webhook:
|
|
44
|
+
|
|
45
|
+
1. injects an **init container** (the sidecar image) running
|
|
46
|
+
`python -m fluence.stage <dir>`, which copies the pure-Python `fluence`
|
|
47
|
+
package plus a `sitecustomize.py` into a shared `emptyDir`;
|
|
48
|
+
2. mounts that volume into the user container and prepends `<dir>` to
|
|
49
|
+
`PYTHONPATH`.
|
|
50
|
+
|
|
51
|
+
Python imports `sitecustomize` automatically on every interpreter start
|
|
52
|
+
(`python app.py` included — unlike `PYTHONSTARTUP`, which only fires for
|
|
53
|
+
interactive sessions), so `import fluence.interceptor` runs before user code.
|
|
54
|
+
The interceptor patches whichever vendor SDK is present and fail-soft skips the
|
|
55
|
+
rest. No user code changes, no vendor SDKs added to the user image.
|
|
56
|
+
|
|
57
|
+
## Adding a provider
|
|
58
|
+
|
|
59
|
+
Add one module under `fluence/providers/` that subclasses `Provider`, implements
|
|
60
|
+
`install_interceptor` (tag hook), `matches`, `find_my_task`, `is_ready_to_ungate`,
|
|
61
|
+
`queue_position` (optional), and `job_id`, and calls `register(PROVIDER)`. Import
|
|
62
|
+
it from `fluence/providers/__init__.py`. Nothing else changes.
|
|
63
|
+
|
|
64
|
+
## Tests
|
|
65
|
+
|
|
66
|
+
python3 python/tests/test_fluence.py
|
|
67
|
+
|
|
68
|
+
## Building and releasing
|
|
69
|
+
|
|
70
|
+
The package is distributed on PyPI as `fluence-hpc` (the import name `fluence` is
|
|
71
|
+
already taken on PyPI). It is also baked into the sidecar image, so a release
|
|
72
|
+
moves the package version and the image tag together.
|
|
73
|
+
|
|
74
|
+
### Build the distributions
|
|
75
|
+
|
|
76
|
+
From `python/`:
|
|
77
|
+
|
|
78
|
+
pip install --upgrade build twine
|
|
79
|
+
python -m build
|
|
80
|
+
|
|
81
|
+
This produces `dist/fluence_hpc-<version>-py3-none-any.whl` and
|
|
82
|
+
`dist/fluence_hpc-<version>.tar.gz`. Upload both.
|
|
83
|
+
|
|
84
|
+
### Test on TestPyPI first
|
|
85
|
+
|
|
86
|
+
twine upload --repository testpypi dist/*
|
|
87
|
+
pip install --index-url https://test.pypi.org/simple/ fluence-hpc
|
|
88
|
+
python -c "import fluence; print(fluence.__version__)"
|
|
89
|
+
|
|
90
|
+
### Release to PyPI
|
|
91
|
+
|
|
92
|
+
twine upload dist/*
|
|
93
|
+
|
|
94
|
+
After this, `pip install fluence-hpc` works anywhere and imports as `fluence`.
|
|
95
|
+
|
|
96
|
+
### Versioning
|
|
97
|
+
|
|
98
|
+
Bump `version` in `pyproject.toml` and `__version__` in `fluence/__init__.py`
|
|
99
|
+
together (PyPI refuses to overwrite an existing version). Because the package is
|
|
100
|
+
version-locked into the sidecar image, tag the release so the image and the
|
|
101
|
+
package share a version — e.g. a `v0.1.1` git tag triggers both the
|
|
102
|
+
`sidecar-build-deploy` workflow (image) and a PyPI publish.
|
|
103
|
+
|
|
104
|
+
### Automated release (recommended)
|
|
105
|
+
|
|
106
|
+
Prefer GitHub Actions with PyPI Trusted Publishing (OIDC) over manual token
|
|
107
|
+
uploads: register the repo + workflow once on PyPI, then a release workflow
|
|
108
|
+
triggered by a version tag builds with `python -m build` and uploads with
|
|
109
|
+
`pypa/gh-action-pypi-publish` — no stored secret. The Docker image is built by
|
|
110
|
+
`.github/workflows/sidecar-build-deploy.yaml` on the same tag, keeping the
|
|
111
|
+
package version and image tag in lockstep.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# fluence (Python)
|
|
2
|
+
|
|
3
|
+
Quantum-classical scheduling coordination library for the Fluence Kubernetes
|
|
4
|
+
scheduler. Import name `fluence`; distributed on PyPI as `fluence-hpc`.
|
|
5
|
+
|
|
6
|
+
This package is **built into the Fluence sidecar image** and **staged into user
|
|
7
|
+
application containers at admission time** — users never install it.
|
|
8
|
+
|
|
9
|
+
## What it does
|
|
10
|
+
|
|
11
|
+
A hybrid quantum-classical workflow submits work to two queues: the Kubernetes
|
|
12
|
+
scheduler (classical) and a QPU vendor API (quantum). Classical worker pods would
|
|
13
|
+
idle while the QPU queue drains. Fluence gates the workers until the quantum task
|
|
14
|
+
is about to run, then releases them. This library is the runtime half:
|
|
15
|
+
|
|
16
|
+
- **interceptor** (`fluence.interceptor`) — runs inside the user container,
|
|
17
|
+
monkey-patches the vendor SDK submit call to tag each task with the pod UID.
|
|
18
|
+
- **sidecar** (`fluence.sidecar`) — runs in a sidecar container, discovers the
|
|
19
|
+
tagged task, polls queue position, and ungates the classical workers when the
|
|
20
|
+
task is ready (or, in observe-only mode, just records the queue position).
|
|
21
|
+
- **providers** (`fluence.providers`) — per-vendor plug-ins implementing both
|
|
22
|
+
halves. Providers self-register on import.
|
|
23
|
+
|
|
24
|
+
## Delivery (Model C)
|
|
25
|
+
|
|
26
|
+
The interceptor must run in the user's container, which does **not** have this
|
|
27
|
+
package installed. Rather than require a user install or concatenate a text
|
|
28
|
+
snippet, the Fluence webhook:
|
|
29
|
+
|
|
30
|
+
1. injects an **init container** (the sidecar image) running
|
|
31
|
+
`python -m fluence.stage <dir>`, which copies the pure-Python `fluence`
|
|
32
|
+
package plus a `sitecustomize.py` into a shared `emptyDir`;
|
|
33
|
+
2. mounts that volume into the user container and prepends `<dir>` to
|
|
34
|
+
`PYTHONPATH`.
|
|
35
|
+
|
|
36
|
+
Python imports `sitecustomize` automatically on every interpreter start
|
|
37
|
+
(`python app.py` included — unlike `PYTHONSTARTUP`, which only fires for
|
|
38
|
+
interactive sessions), so `import fluence.interceptor` runs before user code.
|
|
39
|
+
The interceptor patches whichever vendor SDK is present and fail-soft skips the
|
|
40
|
+
rest. No user code changes, no vendor SDKs added to the user image.
|
|
41
|
+
|
|
42
|
+
## Adding a provider
|
|
43
|
+
|
|
44
|
+
Add one module under `fluence/providers/` that subclasses `Provider`, implements
|
|
45
|
+
`install_interceptor` (tag hook), `matches`, `find_my_task`, `is_ready_to_ungate`,
|
|
46
|
+
`queue_position` (optional), and `job_id`, and calls `register(PROVIDER)`. Import
|
|
47
|
+
it from `fluence/providers/__init__.py`. Nothing else changes.
|
|
48
|
+
|
|
49
|
+
## Tests
|
|
50
|
+
|
|
51
|
+
python3 python/tests/test_fluence.py
|
|
52
|
+
|
|
53
|
+
## Building and releasing
|
|
54
|
+
|
|
55
|
+
The package is distributed on PyPI as `fluence-hpc` (the import name `fluence` is
|
|
56
|
+
already taken on PyPI). It is also baked into the sidecar image, so a release
|
|
57
|
+
moves the package version and the image tag together.
|
|
58
|
+
|
|
59
|
+
### Build the distributions
|
|
60
|
+
|
|
61
|
+
From `python/`:
|
|
62
|
+
|
|
63
|
+
pip install --upgrade build twine
|
|
64
|
+
python -m build
|
|
65
|
+
|
|
66
|
+
This produces `dist/fluence_hpc-<version>-py3-none-any.whl` and
|
|
67
|
+
`dist/fluence_hpc-<version>.tar.gz`. Upload both.
|
|
68
|
+
|
|
69
|
+
### Test on TestPyPI first
|
|
70
|
+
|
|
71
|
+
twine upload --repository testpypi dist/*
|
|
72
|
+
pip install --index-url https://test.pypi.org/simple/ fluence-hpc
|
|
73
|
+
python -c "import fluence; print(fluence.__version__)"
|
|
74
|
+
|
|
75
|
+
### Release to PyPI
|
|
76
|
+
|
|
77
|
+
twine upload dist/*
|
|
78
|
+
|
|
79
|
+
After this, `pip install fluence-hpc` works anywhere and imports as `fluence`.
|
|
80
|
+
|
|
81
|
+
### Versioning
|
|
82
|
+
|
|
83
|
+
Bump `version` in `pyproject.toml` and `__version__` in `fluence/__init__.py`
|
|
84
|
+
together (PyPI refuses to overwrite an existing version). Because the package is
|
|
85
|
+
version-locked into the sidecar image, tag the release so the image and the
|
|
86
|
+
package share a version — e.g. a `v0.1.1` git tag triggers both the
|
|
87
|
+
`sidecar-build-deploy` workflow (image) and a PyPI publish.
|
|
88
|
+
|
|
89
|
+
### Automated release (recommended)
|
|
90
|
+
|
|
91
|
+
Prefer GitHub Actions with PyPI Trusted Publishing (OIDC) over manual token
|
|
92
|
+
uploads: register the repo + workflow once on PyPI, then a release workflow
|
|
93
|
+
triggered by a version tag builds with `python -m build` and uploads with
|
|
94
|
+
`pypa/gh-action-pypi-publish` — no stored secret. The Docker image is built by
|
|
95
|
+
`.github/workflows/sidecar-build-deploy.yaml` on the same tag, keeping the
|
|
96
|
+
package version and image tag in lockstep.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence — quantum-classical scheduling coordination for the Fluence Kubernetes
|
|
3
|
+
scheduler.
|
|
4
|
+
|
|
5
|
+
This package is built into the Fluence sidecar image and staged into user
|
|
6
|
+
application containers at admission time (via an init container + shared volume
|
|
7
|
+
on PYTHONPATH), so the interceptor runs with zero user code changes.
|
|
8
|
+
|
|
9
|
+
Submodules:
|
|
10
|
+
fluence.providers provider interface + registry (per-vendor plug-ins)
|
|
11
|
+
fluence.interceptor runs every registered provider's submit-time tag hook
|
|
12
|
+
fluence.sidecar the sidecar coordination main loop
|
|
13
|
+
fluence.ungate generic worker ungating (Kubernetes patch logic)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.interceptor — installs every registered provider's submit-time tag hook.
|
|
3
|
+
|
|
4
|
+
Runs inside the user's application container, triggered automatically by a
|
|
5
|
+
sitecustomize.py on PYTHONPATH (staged there by the Fluence init container). On
|
|
6
|
+
import it asks every registered provider to install its interceptor; each
|
|
7
|
+
provider fail-soft skips if its vendor SDK is not present in this container.
|
|
8
|
+
|
|
9
|
+
This module's import must never raise — sitecustomize guards it, but we also
|
|
10
|
+
guard here so a single provider bug cannot affect the user application.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def install() -> None:
|
|
19
|
+
pod_uid = os.environ.get("FLUENCE_POD_UID", "")
|
|
20
|
+
try:
|
|
21
|
+
from fluence.providers import all_providers
|
|
22
|
+
except Exception as e: # pragma: no cover - defensive
|
|
23
|
+
print(f"[fluence] interceptor: providers unavailable: {e}", flush=True)
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
for provider in all_providers():
|
|
27
|
+
try:
|
|
28
|
+
if provider.install_interceptor(pod_uid):
|
|
29
|
+
print(f"[fluence] interceptor installed for provider "
|
|
30
|
+
f"{provider.name!r} (pod_uid={pod_uid})", flush=True)
|
|
31
|
+
except Exception as e:
|
|
32
|
+
# A provider's hook must never break the user app.
|
|
33
|
+
print(f"[fluence] interceptor for {provider.name!r} skipped: {e}",
|
|
34
|
+
flush=True)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Install on import.
|
|
38
|
+
install()
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.providers — provider registry.
|
|
3
|
+
|
|
4
|
+
Importing this package imports every provider submodule, each of which calls
|
|
5
|
+
fluence.providers.base.register() at import time. This is the single extension
|
|
6
|
+
point: to add a vendor, drop a new module here that defines a Provider subclass
|
|
7
|
+
and calls register() — nothing else in the codebase needs to change.
|
|
8
|
+
|
|
9
|
+
Provider discovery is by explicit submodule import below (simple and debuggable).
|
|
10
|
+
Importing a provider module never fails on a missing vendor SDK: the SDK is only
|
|
11
|
+
imported lazily inside the methods that need it.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from fluence.providers.base import ( # noqa: F401
|
|
15
|
+
Provider,
|
|
16
|
+
Task,
|
|
17
|
+
TAG_KEY,
|
|
18
|
+
log,
|
|
19
|
+
register,
|
|
20
|
+
all_providers,
|
|
21
|
+
resolve,
|
|
22
|
+
resolve_from_env,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Import provider modules so they self-register. Add new providers here.
|
|
26
|
+
from fluence.providers import braket # noqa: F401,E402
|
|
27
|
+
# from fluence.providers import ibm # noqa: F401 (when implemented)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.providers.base — the provider interface and registration machinery.
|
|
3
|
+
|
|
4
|
+
A provider is a per-vendor plug-in (AWS Braket, IBM Qiskit Runtime, ...) that
|
|
5
|
+
implements two halves of the quantum-coordination mechanism:
|
|
6
|
+
|
|
7
|
+
- INTERCEPTOR hook (`install_interceptor`): runs inside the user's application
|
|
8
|
+
container; monkey-patches the vendor SDK's submit call to stamp the shared
|
|
9
|
+
`fluence-pod-uid` tag on every task. Must fail-soft if the vendor SDK is not
|
|
10
|
+
importable in that container.
|
|
11
|
+
|
|
12
|
+
- SIDECAR methods (`matches`, `find_my_task`, `is_ready_to_ungate`,
|
|
13
|
+
`queue_position`, `job_id`): run inside the Fluence sidecar container; find
|
|
14
|
+
the tagged task, poll readiness, and yield a vendor-neutral job id.
|
|
15
|
+
|
|
16
|
+
Providers self-register by calling `register()` at import time. The package
|
|
17
|
+
imports every provider submodule (see fluence.providers.__init__) so importing
|
|
18
|
+
the package registers them all. Registration is the single extension point:
|
|
19
|
+
adding a vendor is one new module that calls register().
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import os
|
|
25
|
+
from datetime import datetime, timezone
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Shared convention between every interceptor hook and every find_my_task.
|
|
29
|
+
# The interceptor stamps this tag key with the pod UID; the sidecar searches
|
|
30
|
+
# for it. Changing it is a coordinated change across all providers.
|
|
31
|
+
TAG_KEY = "fluence-pod-uid"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def log(msg: str) -> None:
|
|
35
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
36
|
+
print(f"[fluence] {ts} {msg}", flush=True)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Task:
|
|
40
|
+
"""
|
|
41
|
+
Opaque handle to a vendor quantum task. A provider returns its own subclass
|
|
42
|
+
from find_my_task; the framework treats it opaquely and only passes it back
|
|
43
|
+
to that provider. Vendor identifiers (ARN, job id) live inside.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Provider:
|
|
48
|
+
"""Interface every quantum vendor implements. See module docstring."""
|
|
49
|
+
|
|
50
|
+
#: short stable name, e.g. "braket", "ibm"
|
|
51
|
+
name: str = "base"
|
|
52
|
+
|
|
53
|
+
# ── interceptor half (runs in the user container) ──────────────────────────
|
|
54
|
+
|
|
55
|
+
def install_interceptor(self, pod_uid: str) -> bool:
|
|
56
|
+
"""
|
|
57
|
+
Monkey-patch this vendor's SDK submit call to stamp TAG_KEY=<pod_uid>.
|
|
58
|
+
Return True if the patch was installed, False if the SDK is absent
|
|
59
|
+
(fail-soft). Must never raise.
|
|
60
|
+
"""
|
|
61
|
+
raise NotImplementedError
|
|
62
|
+
|
|
63
|
+
# ── sidecar half (runs in the sidecar container) ───────────────────────────
|
|
64
|
+
|
|
65
|
+
def matches(self, vendor: str, backend: str) -> bool:
|
|
66
|
+
"""True if this provider handles the given vendor/backend (resolved at
|
|
67
|
+
runtime from the pod's backend annotation)."""
|
|
68
|
+
raise NotImplementedError
|
|
69
|
+
|
|
70
|
+
def find_my_task(self, pod_uid: str, backend: str, timeout: int) -> "Task | None":
|
|
71
|
+
"""Search the vendor for the task tagged TAG_KEY=<pod_uid>, polling until
|
|
72
|
+
found or timeout. Returns an opaque Task or None."""
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
def is_ready_to_ungate(self, task: "Task") -> bool:
|
|
76
|
+
"""True when workers should be ungated — queue position == 1 or the task
|
|
77
|
+
is already RUNNING/terminal. Always implementable."""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
def queue_position(self, task: "Task") -> "int | None":
|
|
81
|
+
"""Optional richer telemetry: integer queue position (1 == next), or None
|
|
82
|
+
if the vendor does not expose one. Not required for the ungate decision."""
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def job_id(self, task: "Task") -> str:
|
|
86
|
+
"""Stable, vendor-neutral identifier handed to workers at ungate time."""
|
|
87
|
+
raise NotImplementedError
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ── registry ────────────────────────────────────────────────────────────────────
|
|
91
|
+
|
|
92
|
+
_REGISTRY: "list[Provider]" = []
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def register(provider: Provider) -> None:
|
|
96
|
+
"""Register a provider. Called by each provider module at import time."""
|
|
97
|
+
_REGISTRY.append(provider)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def all_providers() -> "list[Provider]":
|
|
101
|
+
return list(_REGISTRY)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def resolve(vendor: str = "", backend: str = "") -> "Provider | None":
|
|
105
|
+
"""Return the registered provider matching vendor/backend, or None."""
|
|
106
|
+
for p in _REGISTRY:
|
|
107
|
+
try:
|
|
108
|
+
if p.matches(vendor, backend):
|
|
109
|
+
return p
|
|
110
|
+
except Exception as e: # a provider's matches() must never break resolution
|
|
111
|
+
log(f"provider {p.name!r} matches() error: {e}")
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def resolve_from_env() -> "Provider | None":
|
|
116
|
+
return resolve(os.environ.get("FLUXION_VENDOR", ""),
|
|
117
|
+
os.environ.get("FLUXION_BACKEND", ""))
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.providers.braket — AWS Braket provider.
|
|
3
|
+
|
|
4
|
+
Holds both halves of the Braket coordination mechanism:
|
|
5
|
+
- install_interceptor: patches AwsDevice.run() to stamp the pod-uid tag
|
|
6
|
+
(runs in the user container; fail-soft if amazon-braket-sdk is absent).
|
|
7
|
+
- sidecar methods: discover the tagged task, poll queue position, yield the
|
|
8
|
+
task ARN as the (vendor-neutral-typed) job id.
|
|
9
|
+
|
|
10
|
+
Self-registers via register(PROVIDER) at import. Importing this module never
|
|
11
|
+
requires the braket SDK; SDK imports are lazy, inside the methods.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import time
|
|
18
|
+
|
|
19
|
+
from fluence.providers.base import Provider, Task, TAG_KEY, log, register
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BraketTask(Task):
|
|
23
|
+
def __init__(self, arn: str):
|
|
24
|
+
self.arn = arn
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _region_from_arn(arn: str) -> str:
|
|
28
|
+
parts = arn.split(":")
|
|
29
|
+
region = parts[3] if len(parts) > 3 and parts[3] else ""
|
|
30
|
+
return region or os.environ.get("AWS_DEFAULT_REGION", "us-east-1")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BraketProvider(Provider):
|
|
34
|
+
name = "braket"
|
|
35
|
+
|
|
36
|
+
# ── interceptor half ───────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
def install_interceptor(self, pod_uid: str) -> bool:
|
|
39
|
+
try:
|
|
40
|
+
from braket.aws import AwsDevice
|
|
41
|
+
except ImportError:
|
|
42
|
+
return False # braket SDK not in this container — fail-soft
|
|
43
|
+
|
|
44
|
+
original_run = AwsDevice.run
|
|
45
|
+
|
|
46
|
+
def patched_run(self, task_specification, *args, **kwargs):
|
|
47
|
+
if pod_uid:
|
|
48
|
+
tags = kwargs.get("tags", {})
|
|
49
|
+
tags[TAG_KEY] = pod_uid
|
|
50
|
+
kwargs["tags"] = tags
|
|
51
|
+
return original_run(self, task_specification, *args, **kwargs)
|
|
52
|
+
|
|
53
|
+
AwsDevice.run = patched_run
|
|
54
|
+
return True
|
|
55
|
+
|
|
56
|
+
# ── sidecar half ───────────────────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
def matches(self, vendor: str, backend: str) -> bool:
|
|
59
|
+
v, b = (vendor or "").lower(), (backend or "").lower()
|
|
60
|
+
if v == "braket":
|
|
61
|
+
return True
|
|
62
|
+
return "braket" in b or b.startswith("arn:aws:braket")
|
|
63
|
+
|
|
64
|
+
def _client(self, backend: str):
|
|
65
|
+
import boto3
|
|
66
|
+
region = (_region_from_arn(backend) if backend.startswith("arn:")
|
|
67
|
+
else os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))
|
|
68
|
+
return boto3.client("braket", region_name=region)
|
|
69
|
+
|
|
70
|
+
def find_my_task(self, pod_uid, backend, timeout):
|
|
71
|
+
client = self._client(backend)
|
|
72
|
+
log(f"[braket] searching for task tagged {TAG_KEY}={pod_uid}")
|
|
73
|
+
deadline = time.time() + timeout
|
|
74
|
+
device_arn = backend if backend.startswith("arn:aws:braket") else None
|
|
75
|
+
while time.time() < deadline:
|
|
76
|
+
try:
|
|
77
|
+
filters = [{"name": f"tags:{TAG_KEY}", "operator": "EQUAL",
|
|
78
|
+
"values": [pod_uid]}]
|
|
79
|
+
if device_arn:
|
|
80
|
+
filters.append({"name": "deviceArn", "operator": "EQUAL",
|
|
81
|
+
"values": [device_arn]})
|
|
82
|
+
resp = client.search_quantum_tasks(filters=filters, maxResults=10)
|
|
83
|
+
tasks = resp.get("quantumTasks", [])
|
|
84
|
+
if tasks:
|
|
85
|
+
tasks.sort(key=lambda t: t.get("createdAt", ""), reverse=True)
|
|
86
|
+
arn = tasks[0]["quantumTaskArn"]
|
|
87
|
+
log(f"[braket] found task by tag: {arn}")
|
|
88
|
+
return BraketTask(arn)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
log(f"[braket] search error (will retry): {e}")
|
|
91
|
+
time.sleep(10)
|
|
92
|
+
log("[braket] task discovery timed out")
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def _aws_task(self, task: BraketTask):
|
|
96
|
+
import asyncio
|
|
97
|
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
|
98
|
+
from braket.aws import AwsQuantumTask
|
|
99
|
+
return AwsQuantumTask(arn=task.arn)
|
|
100
|
+
|
|
101
|
+
def is_ready_to_ungate(self, task: BraketTask) -> bool:
|
|
102
|
+
t = self._aws_task(task)
|
|
103
|
+
if t.state() in ("RUNNING", "COMPLETED", "FAILED", "CANCELLED"):
|
|
104
|
+
return True
|
|
105
|
+
try:
|
|
106
|
+
return str(t.queue_position().queue_position) == "1"
|
|
107
|
+
except Exception:
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
def queue_position(self, task: BraketTask):
|
|
111
|
+
try:
|
|
112
|
+
pos = self._aws_task(task).queue_position().queue_position
|
|
113
|
+
return int(pos) if pos is not None and str(pos).isdigit() else None
|
|
114
|
+
except Exception:
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def job_id(self, task: BraketTask) -> str:
|
|
118
|
+
return task.arn
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
PROVIDER = BraketProvider()
|
|
122
|
+
register(PROVIDER)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.sidecar — provider-agnostic quantum coordination sidecar main loop.
|
|
3
|
+
|
|
4
|
+
Injected by the Fluence webhook into the quantum-submitting pod. Resolves its
|
|
5
|
+
vendor at runtime from the backend annotation, discovers the task the user
|
|
6
|
+
application submitted (tagged by the interceptor), polls readiness, and either
|
|
7
|
+
ungates gated workers (gang mode) or just logs the queue-position series
|
|
8
|
+
(observe-only mode).
|
|
9
|
+
|
|
10
|
+
Entry point: `fluence-sidecar` console script (see pyproject.toml) -> main().
|
|
11
|
+
|
|
12
|
+
Environment (injected by the Fluence webhook):
|
|
13
|
+
FLUENCE_POD_UID UID of this pod (matches interceptor tag)
|
|
14
|
+
FLUENCE_NAMESPACE Kubernetes namespace
|
|
15
|
+
FLUENCE_GATED_PODS comma-separated gated worker names
|
|
16
|
+
FLUENCE_OBSERVE "true" for observe-only telemetry mode
|
|
17
|
+
FLUXION_BACKEND / FLUXION_VENDOR scheduler-chosen backend / vendor
|
|
18
|
+
FLUENCE_TASK_DISCOVERY_TIMEOUT seconds to wait for discovery (default 300)
|
|
19
|
+
FLUENCE_POLL_INTERVAL seconds between polls (default 30)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import os
|
|
25
|
+
import sys
|
|
26
|
+
import time
|
|
27
|
+
|
|
28
|
+
from fluence.providers import resolve_from_env
|
|
29
|
+
from fluence.providers.base import log
|
|
30
|
+
from fluence.ungate import ungate_pods, gated_pods_from_env, namespace_from_env
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _poll(provider, task, poll_interval, ungate):
|
|
34
|
+
mode = "gang" if ungate else "observe-only"
|
|
35
|
+
log(f"{mode} mode: polling queue position")
|
|
36
|
+
last = object()
|
|
37
|
+
while True:
|
|
38
|
+
try:
|
|
39
|
+
if provider.is_ready_to_ungate(task):
|
|
40
|
+
log(f"task ready (position={provider.queue_position(task)})")
|
|
41
|
+
return
|
|
42
|
+
pos = provider.queue_position(task)
|
|
43
|
+
if pos != last:
|
|
44
|
+
log(f"queue position: {pos}")
|
|
45
|
+
last = pos
|
|
46
|
+
except Exception as e:
|
|
47
|
+
log(f"poll error (will retry): {e}")
|
|
48
|
+
time.sleep(poll_interval)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def main():
|
|
52
|
+
pod_uid = os.environ.get("FLUENCE_POD_UID", "")
|
|
53
|
+
backend = os.environ.get("FLUXION_BACKEND", "")
|
|
54
|
+
observe = os.environ.get("FLUENCE_OBSERVE", "").lower() == "true"
|
|
55
|
+
discovery_timeout = int(os.environ.get("FLUENCE_TASK_DISCOVERY_TIMEOUT", 300))
|
|
56
|
+
poll_interval = int(os.environ.get("FLUENCE_POLL_INTERVAL", 30))
|
|
57
|
+
|
|
58
|
+
namespace = namespace_from_env()
|
|
59
|
+
gated_pods = gated_pods_from_env()
|
|
60
|
+
|
|
61
|
+
log("starting fluence quantum sidecar")
|
|
62
|
+
log(f" pod_uid={pod_uid} namespace={namespace} backend={backend} "
|
|
63
|
+
f"observe={observe} gated_pods={gated_pods}")
|
|
64
|
+
|
|
65
|
+
provider = resolve_from_env()
|
|
66
|
+
if provider is None:
|
|
67
|
+
log("ERROR: could not resolve a quantum provider from the backend")
|
|
68
|
+
if gated_pods and not observe:
|
|
69
|
+
ungate_pods(gated_pods, "", namespace)
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
log(f"resolved provider: {provider.name}")
|
|
72
|
+
|
|
73
|
+
if not observe and not gated_pods:
|
|
74
|
+
log("no gated workers and not observe mode — nothing to do")
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
task = provider.find_my_task(pod_uid, backend, discovery_timeout)
|
|
78
|
+
if task is None:
|
|
79
|
+
log("ERROR: could not discover quantum task")
|
|
80
|
+
if gated_pods and not observe:
|
|
81
|
+
log("ungating workers anyway to avoid deadlock")
|
|
82
|
+
ungate_pods(gated_pods, "", namespace)
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
job_id = provider.job_id(task)
|
|
86
|
+
log(f"discovered task, job_id={job_id}")
|
|
87
|
+
|
|
88
|
+
_poll(provider, task, poll_interval, ungate=not observe)
|
|
89
|
+
|
|
90
|
+
if observe:
|
|
91
|
+
log("observe-only run complete")
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
ungate_pods(gated_pods, job_id, namespace)
|
|
95
|
+
log("done — workers ungated")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
if __name__ == "__main__":
|
|
99
|
+
main()
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# fluence sitecustomize — staged onto the user container's PYTHONPATH by the
|
|
2
|
+
# Fluence init container. Python imports `sitecustomize` automatically on every
|
|
3
|
+
# interpreter start (interactive OR script), so this runs the interceptor with
|
|
4
|
+
# zero user code changes and without relying on PYTHONSTARTUP (which only fires
|
|
5
|
+
# for interactive sessions).
|
|
6
|
+
#
|
|
7
|
+
# Guarded so a fluence-side error can never break the user's application.
|
|
8
|
+
try:
|
|
9
|
+
import fluence.interceptor # noqa: F401 (import side-effect installs hooks)
|
|
10
|
+
except Exception as _e: # pragma: no cover
|
|
11
|
+
import sys
|
|
12
|
+
print(f"[fluence] interceptor skipped: {_e}", file=sys.stderr, flush=True)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.stage — init-container entrypoint for Model C delivery.
|
|
3
|
+
|
|
4
|
+
The Fluence webhook injects an init container (the sidecar image, which has
|
|
5
|
+
`fluence` installed) that runs `python -m fluence.stage <dest>`. This copies the
|
|
6
|
+
installed `fluence` package plus a `sitecustomize.py` into <dest>, a shared
|
|
7
|
+
emptyDir volume. The webhook mounts that volume into the user's application
|
|
8
|
+
container and prepends <dest> to PYTHONPATH. Python then auto-imports
|
|
9
|
+
sitecustomize on startup, which imports fluence.interceptor — tagging the user's
|
|
10
|
+
quantum tasks with zero user code changes and no vendor SDK requirement on our
|
|
11
|
+
side (the interceptor patches whatever SDK the user already has).
|
|
12
|
+
|
|
13
|
+
Replaces the old build-interceptor.sh: assembly is real package staging, not
|
|
14
|
+
text concatenation.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
python -m fluence.stage /opt/fluence-staged
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import os
|
|
23
|
+
import shutil
|
|
24
|
+
import sys
|
|
25
|
+
|
|
26
|
+
import fluence
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def stage(dest: str) -> None:
|
|
30
|
+
os.makedirs(dest, exist_ok=True)
|
|
31
|
+
|
|
32
|
+
# Copy the installed `fluence` package into <dest>/fluence so it is importable
|
|
33
|
+
# when <dest> is on PYTHONPATH. We copy only the pure-Python package — no
|
|
34
|
+
# vendor SDKs — so we never perturb the user container's own dependencies.
|
|
35
|
+
pkg_src = os.path.dirname(os.path.abspath(fluence.__file__))
|
|
36
|
+
pkg_dst = os.path.join(dest, "fluence")
|
|
37
|
+
if os.path.exists(pkg_dst):
|
|
38
|
+
shutil.rmtree(pkg_dst)
|
|
39
|
+
shutil.copytree(
|
|
40
|
+
pkg_src, pkg_dst,
|
|
41
|
+
ignore=shutil.ignore_patterns("__pycache__", "*.pyc", "tests"),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Place sitecustomize.py at the TOP of <dest> (not inside the package) so
|
|
45
|
+
# Python's site machinery imports it automatically on interpreter startup.
|
|
46
|
+
src_sitecustomize = os.path.join(pkg_src, "sitecustomize.py")
|
|
47
|
+
shutil.copyfile(src_sitecustomize, os.path.join(dest, "sitecustomize.py"))
|
|
48
|
+
|
|
49
|
+
print(f"[fluence] staged package + sitecustomize into {dest}", flush=True)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def main(argv=None):
|
|
53
|
+
argv = argv if argv is not None else sys.argv[1:]
|
|
54
|
+
dest = argv[0] if argv else os.environ.get("FLUENCE_STAGE_DIR", "/opt/fluence-staged")
|
|
55
|
+
stage(dest)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
main()
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fluence.ungate — generic worker ungating (Kubernetes side).
|
|
3
|
+
|
|
4
|
+
Once the sidecar determines the quantum task is ready, it ungates the gated
|
|
5
|
+
classical worker pods: stamp the vendor-neutral job-id annotation, set the
|
|
6
|
+
high-priority class, and remove the scheduling gate atomically. This is pure
|
|
7
|
+
Kubernetes plumbing — no vendor specifics.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import subprocess
|
|
15
|
+
|
|
16
|
+
from fluence.providers.base import log
|
|
17
|
+
|
|
18
|
+
JOB_ID_ANNOTATION = "fluence.flux-framework.org/quantum-job-id"
|
|
19
|
+
QUANTUM_GATE_NAME = "quantum.braket/ready"
|
|
20
|
+
PRIORITY_CLASS = "fluence-quantum-classical"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def kubectl(args):
|
|
24
|
+
result = subprocess.run(["kubectl"] + args, capture_output=True, text=True)
|
|
25
|
+
if result.returncode != 0:
|
|
26
|
+
raise RuntimeError(f"kubectl {' '.join(args)} failed: {result.stderr.strip()}")
|
|
27
|
+
return result.stdout.strip()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def ungate_pods(gated_pods, job_id, namespace):
|
|
31
|
+
"""
|
|
32
|
+
For each gated worker pod:
|
|
33
|
+
1. Stamp the vendor-neutral job-id annotation so the worker can locate
|
|
34
|
+
the quantum result.
|
|
35
|
+
2. Set the high-priority class and remove the scheduling gate atomically
|
|
36
|
+
(priority is set here, not in the webhook, to avoid the admission
|
|
37
|
+
controller conflict where priority:0 is already defaulted).
|
|
38
|
+
"""
|
|
39
|
+
for pod_name in gated_pods:
|
|
40
|
+
pod_name = pod_name.strip()
|
|
41
|
+
if not pod_name:
|
|
42
|
+
continue
|
|
43
|
+
log(f"ungating pod: {pod_name}")
|
|
44
|
+
|
|
45
|
+
if job_id:
|
|
46
|
+
try:
|
|
47
|
+
kubectl(["annotate", "pod", pod_name, "-n", namespace,
|
|
48
|
+
f"{JOB_ID_ANNOTATION}={job_id}", "--overwrite"])
|
|
49
|
+
log(f" patched job id onto {pod_name}: {job_id}")
|
|
50
|
+
except RuntimeError as e:
|
|
51
|
+
log(f" WARNING: could not annotate {pod_name}: {e}")
|
|
52
|
+
else:
|
|
53
|
+
log(f" WARNING: no job id to patch onto {pod_name}")
|
|
54
|
+
|
|
55
|
+
patch = json.dumps([
|
|
56
|
+
{"op": "add", "path": "/spec/priorityClassName", "value": PRIORITY_CLASS},
|
|
57
|
+
{"op": "remove", "path": "/spec/schedulingGates/0"},
|
|
58
|
+
])
|
|
59
|
+
try:
|
|
60
|
+
kubectl(["patch", "pod", pod_name, "-n", namespace,
|
|
61
|
+
"--type=json", f"-p={patch}"])
|
|
62
|
+
log(f" set priority and removed gate from {pod_name}")
|
|
63
|
+
except RuntimeError as e:
|
|
64
|
+
log(f" WARNING: could not patch {pod_name}: {e}")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def gated_pods_from_env():
|
|
68
|
+
return [p.strip() for p in os.environ.get("FLUENCE_GATED_PODS", "").split(",")
|
|
69
|
+
if p.strip()]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def namespace_from_env():
|
|
73
|
+
return os.environ.get("FLUENCE_NAMESPACE", "default")
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fluence-hpc
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Fluence quantum-classical scheduling coordination library (sidecar + interceptor + providers)
|
|
5
|
+
Author: Fluence / converged-computing
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Provides-Extra: braket
|
|
10
|
+
Requires-Dist: amazon-braket-sdk; extra == "braket"
|
|
11
|
+
Requires-Dist: boto3; extra == "braket"
|
|
12
|
+
Provides-Extra: all
|
|
13
|
+
Requires-Dist: amazon-braket-sdk; extra == "all"
|
|
14
|
+
Requires-Dist: boto3; extra == "all"
|
|
15
|
+
|
|
16
|
+
# fluence (Python)
|
|
17
|
+
|
|
18
|
+
Quantum-classical scheduling coordination library for the Fluence Kubernetes
|
|
19
|
+
scheduler. Import name `fluence`; distributed on PyPI as `fluence-hpc`.
|
|
20
|
+
|
|
21
|
+
This package is **built into the Fluence sidecar image** and **staged into user
|
|
22
|
+
application containers at admission time** — users never install it.
|
|
23
|
+
|
|
24
|
+
## What it does
|
|
25
|
+
|
|
26
|
+
A hybrid quantum-classical workflow submits work to two queues: the Kubernetes
|
|
27
|
+
scheduler (classical) and a QPU vendor API (quantum). Classical worker pods would
|
|
28
|
+
idle while the QPU queue drains. Fluence gates the workers until the quantum task
|
|
29
|
+
is about to run, then releases them. This library is the runtime half:
|
|
30
|
+
|
|
31
|
+
- **interceptor** (`fluence.interceptor`) — runs inside the user container,
|
|
32
|
+
monkey-patches the vendor SDK submit call to tag each task with the pod UID.
|
|
33
|
+
- **sidecar** (`fluence.sidecar`) — runs in a sidecar container, discovers the
|
|
34
|
+
tagged task, polls queue position, and ungates the classical workers when the
|
|
35
|
+
task is ready (or, in observe-only mode, just records the queue position).
|
|
36
|
+
- **providers** (`fluence.providers`) — per-vendor plug-ins implementing both
|
|
37
|
+
halves. Providers self-register on import.
|
|
38
|
+
|
|
39
|
+
## Delivery (Model C)
|
|
40
|
+
|
|
41
|
+
The interceptor must run in the user's container, which does **not** have this
|
|
42
|
+
package installed. Rather than require a user install or concatenate a text
|
|
43
|
+
snippet, the Fluence webhook:
|
|
44
|
+
|
|
45
|
+
1. injects an **init container** (the sidecar image) running
|
|
46
|
+
`python -m fluence.stage <dir>`, which copies the pure-Python `fluence`
|
|
47
|
+
package plus a `sitecustomize.py` into a shared `emptyDir`;
|
|
48
|
+
2. mounts that volume into the user container and prepends `<dir>` to
|
|
49
|
+
`PYTHONPATH`.
|
|
50
|
+
|
|
51
|
+
Python imports `sitecustomize` automatically on every interpreter start
|
|
52
|
+
(`python app.py` included — unlike `PYTHONSTARTUP`, which only fires for
|
|
53
|
+
interactive sessions), so `import fluence.interceptor` runs before user code.
|
|
54
|
+
The interceptor patches whichever vendor SDK is present and fail-soft skips the
|
|
55
|
+
rest. No user code changes, no vendor SDKs added to the user image.
|
|
56
|
+
|
|
57
|
+
## Adding a provider
|
|
58
|
+
|
|
59
|
+
Add one module under `fluence/providers/` that subclasses `Provider`, implements
|
|
60
|
+
`install_interceptor` (tag hook), `matches`, `find_my_task`, `is_ready_to_ungate`,
|
|
61
|
+
`queue_position` (optional), and `job_id`, and calls `register(PROVIDER)`. Import
|
|
62
|
+
it from `fluence/providers/__init__.py`. Nothing else changes.
|
|
63
|
+
|
|
64
|
+
## Tests
|
|
65
|
+
|
|
66
|
+
python3 python/tests/test_fluence.py
|
|
67
|
+
|
|
68
|
+
## Building and releasing
|
|
69
|
+
|
|
70
|
+
The package is distributed on PyPI as `fluence-hpc` (the import name `fluence` is
|
|
71
|
+
already taken on PyPI). It is also baked into the sidecar image, so a release
|
|
72
|
+
moves the package version and the image tag together.
|
|
73
|
+
|
|
74
|
+
### Build the distributions
|
|
75
|
+
|
|
76
|
+
From `python/`:
|
|
77
|
+
|
|
78
|
+
pip install --upgrade build twine
|
|
79
|
+
python -m build
|
|
80
|
+
|
|
81
|
+
This produces `dist/fluence_hpc-<version>-py3-none-any.whl` and
|
|
82
|
+
`dist/fluence_hpc-<version>.tar.gz`. Upload both.
|
|
83
|
+
|
|
84
|
+
### Test on TestPyPI first
|
|
85
|
+
|
|
86
|
+
twine upload --repository testpypi dist/*
|
|
87
|
+
pip install --index-url https://test.pypi.org/simple/ fluence-hpc
|
|
88
|
+
python -c "import fluence; print(fluence.__version__)"
|
|
89
|
+
|
|
90
|
+
### Release to PyPI
|
|
91
|
+
|
|
92
|
+
twine upload dist/*
|
|
93
|
+
|
|
94
|
+
After this, `pip install fluence-hpc` works anywhere and imports as `fluence`.
|
|
95
|
+
|
|
96
|
+
### Versioning
|
|
97
|
+
|
|
98
|
+
Bump `version` in `pyproject.toml` and `__version__` in `fluence/__init__.py`
|
|
99
|
+
together (PyPI refuses to overwrite an existing version). Because the package is
|
|
100
|
+
version-locked into the sidecar image, tag the release so the image and the
|
|
101
|
+
package share a version — e.g. a `v0.1.1` git tag triggers both the
|
|
102
|
+
`sidecar-build-deploy` workflow (image) and a PyPI publish.
|
|
103
|
+
|
|
104
|
+
### Automated release (recommended)
|
|
105
|
+
|
|
106
|
+
Prefer GitHub Actions with PyPI Trusted Publishing (OIDC) over manual token
|
|
107
|
+
uploads: register the repo + workflow once on PyPI, then a release workflow
|
|
108
|
+
triggered by a version tag builds with `python -m build` and uploads with
|
|
109
|
+
`pypa/gh-action-pypi-publish` — no stored secret. The Docker image is built by
|
|
110
|
+
`.github/workflows/sidecar-build-deploy.yaml` on the same tag, keeping the
|
|
111
|
+
package version and image tag in lockstep.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
fluence/__init__.py
|
|
4
|
+
fluence/interceptor.py
|
|
5
|
+
fluence/sidecar.py
|
|
6
|
+
fluence/sitecustomize.py
|
|
7
|
+
fluence/stage.py
|
|
8
|
+
fluence/ungate.py
|
|
9
|
+
fluence/providers/__init__.py
|
|
10
|
+
fluence/providers/base.py
|
|
11
|
+
fluence/providers/braket.py
|
|
12
|
+
fluence_hpc.egg-info/PKG-INFO
|
|
13
|
+
fluence_hpc.egg-info/SOURCES.txt
|
|
14
|
+
fluence_hpc.egg-info/dependency_links.txt
|
|
15
|
+
fluence_hpc.egg-info/entry_points.txt
|
|
16
|
+
fluence_hpc.egg-info/requires.txt
|
|
17
|
+
fluence_hpc.egg-info/top_level.txt
|
|
18
|
+
tests/test_fluence.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fluence
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
# PyPI distribution name (the import name `fluence` is taken on PyPI, so we
|
|
7
|
+
# distribute under a scoped name but import as `fluence`).
|
|
8
|
+
name = "fluence-hpc"
|
|
9
|
+
version = "0.0.0"
|
|
10
|
+
description = "Fluence quantum-classical scheduling coordination library (sidecar + interceptor + providers)"
|
|
11
|
+
readme = "README.md"
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
license = { text = "MIT" }
|
|
14
|
+
authors = [{ name = "Fluence / converged-computing" }]
|
|
15
|
+
|
|
16
|
+
# No hard runtime dependencies. Vendor SDKs (amazon-braket-sdk, qiskit-ibm-
|
|
17
|
+
# runtime, ...) are optional extras installed in the SIDECAR image only. The
|
|
18
|
+
# interceptor never imports an SDK it isn't already running alongside.
|
|
19
|
+
dependencies = []
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
braket = ["amazon-braket-sdk", "boto3"]
|
|
23
|
+
# ibm = ["qiskit-ibm-runtime"]
|
|
24
|
+
all = ["amazon-braket-sdk", "boto3"]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
fluence-sidecar = "fluence.sidecar:main"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
where = ["."]
|
|
31
|
+
include = ["fluence*"]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for the fluence package. Run: python3 -m pytest python/tests/
|
|
3
|
+
None of these require a vendor SDK — they exercise registration, resolution,
|
|
4
|
+
fail-soft interceptor behavior, and the staging mechanism.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_braket_registered():
|
|
14
|
+
from fluence.providers import all_providers
|
|
15
|
+
assert "braket" in [p.name for p in all_providers()]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_resolve_braket_by_vendor():
|
|
19
|
+
from fluence.providers import resolve
|
|
20
|
+
assert resolve("braket", "").name == "braket"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_resolve_braket_by_arn_backend():
|
|
24
|
+
from fluence.providers import resolve
|
|
25
|
+
p = resolve("", "arn:aws:braket:us-east-1::device/quantum-simulator/amazon/sv1")
|
|
26
|
+
assert p is not None and p.name == "braket"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_resolve_unknown_returns_none():
|
|
30
|
+
from fluence.providers import resolve
|
|
31
|
+
assert resolve("nope", "nope") is None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_interceptor_failsoft_without_sdk():
|
|
35
|
+
# Importing the interceptor must not raise even though braket is absent.
|
|
36
|
+
import importlib
|
|
37
|
+
import fluence.interceptor
|
|
38
|
+
importlib.reload(fluence.interceptor) # re-run install(); must not raise
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_braket_install_interceptor_failsoft():
|
|
42
|
+
from fluence.providers.braket import PROVIDER
|
|
43
|
+
# No braket SDK installed -> returns False, never raises.
|
|
44
|
+
assert PROVIDER.install_interceptor("uid") is False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_job_id_is_arn():
|
|
48
|
+
from fluence.providers.braket import PROVIDER, BraketTask
|
|
49
|
+
arn = "arn:aws:braket:us-east-1::quantum-task/abc-123"
|
|
50
|
+
assert PROVIDER.job_id(BraketTask(arn)) == arn
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_stage_produces_importable_package():
|
|
54
|
+
# python -m fluence.stage <dest> must create an importable fluence + a
|
|
55
|
+
# top-level sitecustomize.py.
|
|
56
|
+
with tempfile.TemporaryDirectory() as d:
|
|
57
|
+
subprocess.run([sys.executable, "-m", "fluence.stage", d], check=True)
|
|
58
|
+
assert os.path.isfile(os.path.join(d, "sitecustomize.py"))
|
|
59
|
+
assert os.path.isfile(os.path.join(d, "fluence", "__init__.py"))
|
|
60
|
+
assert os.path.isdir(os.path.join(d, "fluence", "providers"))
|
|
61
|
+
# __pycache__ and tests must be excluded from the staged copy.
|
|
62
|
+
assert not os.path.exists(os.path.join(d, "fluence", "tests"))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_staged_sitecustomize_runs_interceptor():
|
|
66
|
+
# Stage, then run a subprocess with ONLY the staged dir on PYTHONPATH and a
|
|
67
|
+
# fake braket SDK; the interceptor must patch AwsDevice.run to inject the tag.
|
|
68
|
+
with tempfile.TemporaryDirectory() as d:
|
|
69
|
+
staged = os.path.join(d, "staged")
|
|
70
|
+
subprocess.run([sys.executable, "-m", "fluence.stage", staged], check=True)
|
|
71
|
+
|
|
72
|
+
fakesdk = os.path.join(d, "fakesdk")
|
|
73
|
+
os.makedirs(os.path.join(fakesdk, "braket", "aws"))
|
|
74
|
+
open(os.path.join(fakesdk, "braket", "__init__.py"), "w").close()
|
|
75
|
+
with open(os.path.join(fakesdk, "braket", "aws", "__init__.py"), "w") as f:
|
|
76
|
+
f.write(
|
|
77
|
+
"class AwsDevice:\n"
|
|
78
|
+
" def run(self, spec, *a, **k):\n"
|
|
79
|
+
" print('TAGS', k.get('tags'))\n"
|
|
80
|
+
)
|
|
81
|
+
app = os.path.join(d, "app.py")
|
|
82
|
+
with open(app, "w") as f:
|
|
83
|
+
f.write("from braket.aws import AwsDevice\nAwsDevice().run('c')\n")
|
|
84
|
+
|
|
85
|
+
env = {
|
|
86
|
+
"PATH": os.environ.get("PATH", ""),
|
|
87
|
+
"HOME": d,
|
|
88
|
+
"FLUENCE_POD_UID": "pod-xyz",
|
|
89
|
+
"PYTHONPATH": staged + os.pathsep + fakesdk,
|
|
90
|
+
}
|
|
91
|
+
out = subprocess.run([sys.executable, app], env=env,
|
|
92
|
+
capture_output=True, text=True)
|
|
93
|
+
assert "fluence-pod-uid" in out.stdout, out.stdout + out.stderr
|
|
94
|
+
assert "pod-xyz" in out.stdout, out.stdout + out.stderr
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == "__main__":
|
|
98
|
+
fns = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
|
|
99
|
+
failed = 0
|
|
100
|
+
for fn in fns:
|
|
101
|
+
try:
|
|
102
|
+
fn()
|
|
103
|
+
print(f"PASS {fn.__name__}")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
failed += 1
|
|
106
|
+
print(f"FAIL {fn.__name__}: {e}")
|
|
107
|
+
print(f"\n{len(fns) - failed}/{len(fns)} passed")
|
|
108
|
+
sys.exit(1 if failed else 0)
|