cap-sdk-python 2.5.2__tar.gz → 2.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/PKG-INFO +110 -1
  2. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/README.md +107 -0
  3. cap_sdk_python-2.5.4/cap/__init__.py +145 -0
  4. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/bus.py +13 -0
  5. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/client.py +9 -0
  6. cap_sdk_python-2.5.4/cap/errors.py +116 -0
  7. cap_sdk_python-2.5.4/cap/heartbeat.py +153 -0
  8. cap_sdk_python-2.5.4/cap/metrics.py +33 -0
  9. cap_sdk_python-2.5.4/cap/middleware.py +50 -0
  10. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/alert_pb2_grpc.py +1 -1
  11. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/buspacket_pb2_grpc.py +1 -1
  12. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/handshake_pb2_grpc.py +1 -1
  13. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/heartbeat_pb2_grpc.py +1 -1
  14. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/job_pb2_grpc.py +1 -1
  15. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/safety_pb2_grpc.py +1 -1
  16. cap_sdk_python-2.5.4/cap/progress.py +100 -0
  17. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/runtime.py +193 -81
  18. cap_sdk_python-2.5.4/cap/testing.py +125 -0
  19. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/worker.py +51 -7
  20. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap_sdk_python.egg-info/PKG-INFO +110 -1
  21. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap_sdk_python.egg-info/SOURCES.txt +12 -0
  22. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap_sdk_python.egg-info/requires.txt +3 -0
  23. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/pyproject.toml +4 -1
  24. cap_sdk_python-2.5.4/tests/test_errors.py +46 -0
  25. cap_sdk_python-2.5.4/tests/test_heartbeat.py +311 -0
  26. cap_sdk_python-2.5.4/tests/test_metrics.py +117 -0
  27. cap_sdk_python-2.5.4/tests/test_middleware.py +162 -0
  28. cap_sdk_python-2.5.4/tests/test_progress.py +260 -0
  29. cap_sdk_python-2.5.4/tests/test_testing.py +45 -0
  30. cap_sdk_python-2.5.2/cap/__init__.py +0 -70
  31. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/__init__.py +0 -0
  32. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/__init__.py +0 -0
  33. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/__init__.py +0 -0
  34. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/__init__.py +0 -0
  35. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/alert_pb2.py +0 -0
  36. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/buspacket_pb2.py +0 -0
  37. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/handshake_pb2.py +0 -0
  38. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/heartbeat_pb2.py +0 -0
  39. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/job_pb2.py +0 -0
  40. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/pb/cordum/agent/v1/safety_pb2.py +0 -0
  41. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/subjects.py +0 -0
  42. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap/validate.py +0 -0
  43. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap_sdk_python.egg-info/dependency_links.txt +0 -0
  44. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/cap_sdk_python.egg-info/top_level.txt +0 -0
  45. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/setup.cfg +0 -0
  46. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/tests/test_conformance.py +0 -0
  47. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/tests/test_runtime.py +0 -0
  48. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/tests/test_sdk.py +0 -0
  49. {cap_sdk_python-2.5.2 → cap_sdk_python-2.5.4}/tests/test_validate.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cap-sdk-python
3
- Version: 2.5.2
3
+ Version: 2.5.4
4
4
  Summary: CAP (Cordum Agent Protocol) Python SDK
5
5
  Author-email: Cordum <eng@cordum.io>
6
6
  License-Expression: Apache-2.0
@@ -16,6 +16,8 @@ Requires-Dist: nats-py>=2.6.0
16
16
  Requires-Dist: cryptography>=41.0.0
17
17
  Requires-Dist: pydantic>=2.6.0
18
18
  Requires-Dist: redis>=5.0.0
19
+ Provides-Extra: dev
20
+ Requires-Dist: pdoc>=14.0; extra == "dev"
19
21
 
20
22
  # CAP Python SDK
21
23
 
@@ -97,6 +99,27 @@ Asyncio-first SDK with NATS helpers for CAP workers and clients.
97
99
 
98
100
  Swap out `cap.bus` if you need a different transport.
99
101
 
102
+ ## Testing
103
+
104
+ The `cap.testing` module lets you test handlers without running NATS or Redis.
105
+
106
+ ```python
107
+ from cap.testing import run_handler
108
+ from cap.pb.cordum.agent.v1 import job_pb2
109
+
110
+ async def test_echo():
111
+ result = await run_handler(
112
+ lambda ctx, data: {"echo": data["prompt"]},
113
+ {"prompt": "hello"},
114
+ topic="job.echo",
115
+ )
116
+ assert result.status == job_pb2.JOB_STATUS_SUCCEEDED
117
+ ```
118
+
119
+ - `run_handler(handler, input, **options)` — runs a single handler invocation and returns the `JobResult`.
120
+ - `create_test_agent(**options)` — returns `(agent, mock_nats, store)` pre-wired with `MockNATS` + `InMemoryBlobStore`.
121
+ - `MockNATS` — in-memory NATS mock for custom test setups.
122
+
100
123
  ## Runtime (High-Level SDK)
101
124
  The runtime hides NATS/Redis plumbing and gives you typed handlers.
102
125
 
@@ -120,6 +143,92 @@ async def summarize(ctx: Context, data: Input) -> Output:
120
143
  asyncio.run(agent.run())
121
144
  ```
122
145
 
146
+ ### Middleware
147
+
148
+ Add cross-cutting concerns (logging, auth, metrics) without modifying handlers:
149
+
150
+ ```python
151
+ from cap.middleware import logging_middleware
152
+
153
+ # Built-in logging middleware
154
+ agent.use(logging_middleware())
155
+
156
+ # Custom middleware
157
+ async def timing(ctx, data, next_fn):
158
+ import time
159
+ start = time.monotonic()
160
+ result = await next_fn(ctx, data)
161
+ elapsed = time.monotonic() - start
162
+ print(f"job {ctx.job_id} took {elapsed:.3f}s")
163
+ return result
164
+
165
+ agent.use(timing)
166
+ ```
167
+
168
+ Middleware executes in registration order (FIFO). Each can inspect context,
169
+ measure timing, or short-circuit by returning without calling `next_fn`.
170
+
123
171
  ### Environment
124
172
  - `NATS_URL` (default `nats://127.0.0.1:4222`)
125
173
  - `REDIS_URL` (default `redis://127.0.0.1:6379/0`)
174
+
175
+ ## Generating API Docs
176
+
177
+ Generate HTML API reference locally using [pdoc](https://pdoc.dev/):
178
+
179
+ ```bash
180
+ pip install cap-sdk-python[dev]
181
+ pdoc ./cap --output-dir docs
182
+ ```
183
+
184
+ Output is written to `docs/` (gitignored). Open `docs/index.html` to browse.
185
+
186
+ ## Observability
187
+
188
+ ### Structured Logging
189
+ The runtime Agent and Worker use `logging.Logger` (stdlib) for structured logging. All log calls include contextual fields (`job_id`, `trace_id`, `topic`, `sender_id`). Pass a custom logger or leave as default:
190
+
191
+ ```python
192
+ import logging
193
+ from cap.runtime import Agent
194
+
195
+ logger = logging.getLogger("my-agent")
196
+ logger.setLevel(logging.DEBUG)
197
+ agent = Agent(logger=logger)
198
+ ```
199
+
200
+ ### MetricsHook
201
+ Implement the `MetricsHook` protocol to integrate with Prometheus, OpenTelemetry, or any metrics system:
202
+
203
+ ```python
204
+ from cap.metrics import MetricsHook
205
+
206
+ class MetricsHook(Protocol):
207
+ def on_job_received(self, job_id: str, topic: str) -> None: ...
208
+ def on_job_completed(self, job_id: str, duration_ms: int, status: str) -> None: ...
209
+ def on_job_failed(self, job_id: str, error_msg: str) -> None: ...
210
+ def on_heartbeat_sent(self, worker_id: str) -> None: ...
211
+ ```
212
+
213
+ The default is `NoopMetrics` (zero overhead). Example Prometheus integration:
214
+
215
+ ```python
216
+ from cap.runtime import Agent
217
+
218
+ class PromMetrics:
219
+ def on_job_received(self, job_id, topic):
220
+ jobs_received.labels(topic=topic).inc()
221
+
222
+ def on_job_completed(self, job_id, duration_ms, status):
223
+ job_duration.labels(status=status).observe(duration_ms)
224
+
225
+ def on_job_failed(self, job_id, error_msg):
226
+ jobs_failed.inc()
227
+
228
+ def on_heartbeat_sent(self, worker_id):
229
+ pass
230
+
231
+ agent = Agent(metrics=PromMetrics())
232
+ ```
233
+
234
+ The `trace_id` is propagated through all log and metrics calls for distributed tracing correlation.
@@ -78,6 +78,27 @@ Asyncio-first SDK with NATS helpers for CAP workers and clients.
78
78
 
79
79
  Swap out `cap.bus` if you need a different transport.
80
80
 
81
+ ## Testing
82
+
83
+ The `cap.testing` module lets you test handlers without running NATS or Redis.
84
+
85
+ ```python
86
+ from cap.testing import run_handler
87
+ from cap.pb.cordum.agent.v1 import job_pb2
88
+
89
+ async def test_echo():
90
+ result = await run_handler(
91
+ lambda ctx, data: {"echo": data["prompt"]},
92
+ {"prompt": "hello"},
93
+ topic="job.echo",
94
+ )
95
+ assert result.status == job_pb2.JOB_STATUS_SUCCEEDED
96
+ ```
97
+
98
+ - `run_handler(handler, input, **options)` — runs a single handler invocation and returns the `JobResult`.
99
+ - `create_test_agent(**options)` — returns `(agent, mock_nats, store)` pre-wired with `MockNATS` + `InMemoryBlobStore`.
100
+ - `MockNATS` — in-memory NATS mock for custom test setups.
101
+
81
102
  ## Runtime (High-Level SDK)
82
103
  The runtime hides NATS/Redis plumbing and gives you typed handlers.
83
104
 
@@ -101,6 +122,92 @@ async def summarize(ctx: Context, data: Input) -> Output:
101
122
  asyncio.run(agent.run())
102
123
  ```
103
124
 
125
+ ### Middleware
126
+
127
+ Add cross-cutting concerns (logging, auth, metrics) without modifying handlers:
128
+
129
+ ```python
130
+ from cap.middleware import logging_middleware
131
+
132
+ # Built-in logging middleware
133
+ agent.use(logging_middleware())
134
+
135
+ # Custom middleware
136
+ async def timing(ctx, data, next_fn):
137
+ import time
138
+ start = time.monotonic()
139
+ result = await next_fn(ctx, data)
140
+ elapsed = time.monotonic() - start
141
+ print(f"job {ctx.job_id} took {elapsed:.3f}s")
142
+ return result
143
+
144
+ agent.use(timing)
145
+ ```
146
+
147
+ Middleware executes in registration order (FIFO). Each can inspect context,
148
+ measure timing, or short-circuit by returning without calling `next_fn`.
149
+
104
150
  ### Environment
105
151
  - `NATS_URL` (default `nats://127.0.0.1:4222`)
106
152
  - `REDIS_URL` (default `redis://127.0.0.1:6379/0`)
153
+
154
+ ## Generating API Docs
155
+
156
+ Generate HTML API reference locally using [pdoc](https://pdoc.dev/):
157
+
158
+ ```bash
159
+ pip install cap-sdk-python[dev]
160
+ pdoc ./cap --output-dir docs
161
+ ```
162
+
163
+ Output is written to `docs/` (gitignored). Open `docs/index.html` to browse.
164
+
165
+ ## Observability
166
+
167
+ ### Structured Logging
168
+ The runtime Agent and Worker use `logging.Logger` (stdlib) for structured logging. All log calls include contextual fields (`job_id`, `trace_id`, `topic`, `sender_id`). Pass a custom logger or leave as default:
169
+
170
+ ```python
171
+ import logging
172
+ from cap.runtime import Agent
173
+
174
+ logger = logging.getLogger("my-agent")
175
+ logger.setLevel(logging.DEBUG)
176
+ agent = Agent(logger=logger)
177
+ ```
178
+
179
+ ### MetricsHook
180
+ Implement the `MetricsHook` protocol to integrate with Prometheus, OpenTelemetry, or any metrics system:
181
+
182
+ ```python
183
+ from cap.metrics import MetricsHook
184
+
185
+ class MetricsHook(Protocol):
186
+ def on_job_received(self, job_id: str, topic: str) -> None: ...
187
+ def on_job_completed(self, job_id: str, duration_ms: int, status: str) -> None: ...
188
+ def on_job_failed(self, job_id: str, error_msg: str) -> None: ...
189
+ def on_heartbeat_sent(self, worker_id: str) -> None: ...
190
+ ```
191
+
192
+ The default is `NoopMetrics` (zero overhead). Example Prometheus integration:
193
+
194
+ ```python
195
+ from cap.runtime import Agent
196
+
197
+ class PromMetrics:
198
+ def on_job_received(self, job_id, topic):
199
+ jobs_received.labels(topic=topic).inc()
200
+
201
+ def on_job_completed(self, job_id, duration_ms, status):
202
+ job_duration.labels(status=status).observe(duration_ms)
203
+
204
+ def on_job_failed(self, job_id, error_msg):
205
+ jobs_failed.inc()
206
+
207
+ def on_heartbeat_sent(self, worker_id):
208
+ pass
209
+
210
+ agent = Agent(metrics=PromMetrics())
211
+ ```
212
+
213
+ The `trace_id` is propagated through all log and metrics calls for distributed tracing correlation.
@@ -0,0 +1,145 @@
1
+ """CAP (Cordum Agent Protocol) SDK for Python.
2
+
3
+ Provides helpers for submitting jobs, running workers, and building
4
+ high-level agents on the CAP bus.
5
+ """
6
+
7
+ import sys
8
+ import types
9
+
10
+ try:
11
+ from google.protobuf import runtime_version as _runtime_version # noqa: F401
12
+ except Exception:
13
+ try:
14
+ import google.protobuf as _protobuf
15
+ except Exception:
16
+ _protobuf = None
17
+
18
+ _shim = types.SimpleNamespace()
19
+
20
+ class _Domain:
21
+ PUBLIC = 0
22
+
23
+ def _validate(*_args, **_kwargs):
24
+ return None
25
+
26
+ _shim.Domain = _Domain
27
+ _shim.ValidateProtobufRuntimeVersion = _validate
28
+ sys.modules["google.protobuf.runtime_version"] = _shim
29
+ if _protobuf is not None:
30
+ setattr(_protobuf, "runtime_version", _shim)
31
+
32
+ from .client import submit_job
33
+ from .worker import run_worker
34
+ from .bus import connect_nats
35
+ from .runtime import Agent, Context, BlobStore, RedisBlobStore, InMemoryBlobStore
36
+ from .middleware import Middleware, NextFn, logging_middleware
37
+ from .metrics import MetricsHook, NoopMetrics
38
+ from .heartbeat import (
39
+ heartbeat_payload,
40
+ heartbeat_payload_with_memory,
41
+ heartbeat_payload_with_progress,
42
+ emit_heartbeat,
43
+ heartbeat_loop,
44
+ )
45
+ from .progress import (
46
+ progress_payload,
47
+ cancel_payload,
48
+ emit_progress,
49
+ emit_cancel,
50
+ )
51
+ from .validate import (
52
+ ValidationError,
53
+ validate_job_request,
54
+ validate_job_result,
55
+ validate_bus_packet,
56
+ )
57
+ from .errors import (
58
+ CAPError,
59
+ VersionMismatchError,
60
+ MalformedPacketError,
61
+ UnknownPayloadError,
62
+ SignatureInvalidError,
63
+ SignatureMissingError,
64
+ JobTimeoutError,
65
+ ResourceExhaustedError,
66
+ PermissionDeniedError,
67
+ InvalidInputError,
68
+ JobNotFoundError,
69
+ DuplicateJobError,
70
+ WorkerUnavailableError,
71
+ SafetyDeniedError,
72
+ PolicyViolationError,
73
+ RiskTagBlockedError,
74
+ PublishFailedError,
75
+ SubscribeFailedError,
76
+ ConnectionLostError,
77
+ )
78
+ from .subjects import (
79
+ SUBJECT_SUBMIT,
80
+ SUBJECT_RESULT,
81
+ SUBJECT_HEARTBEAT,
82
+ SUBJECT_ALERT,
83
+ SUBJECT_PROGRESS,
84
+ SUBJECT_CANCEL,
85
+ SUBJECT_DLQ,
86
+ SUBJECT_WORKFLOW_EVENT,
87
+ SUBJECT_HANDSHAKE,
88
+ )
89
+
90
+ __all__ = [
91
+ "submit_job",
92
+ "run_worker",
93
+ "connect_nats",
94
+ "Agent",
95
+ "Context",
96
+ "BlobStore",
97
+ "RedisBlobStore",
98
+ "InMemoryBlobStore",
99
+ "Middleware",
100
+ "NextFn",
101
+ "logging_middleware",
102
+ "MetricsHook",
103
+ "NoopMetrics",
104
+ "heartbeat_payload",
105
+ "heartbeat_payload_with_memory",
106
+ "heartbeat_payload_with_progress",
107
+ "emit_heartbeat",
108
+ "heartbeat_loop",
109
+ "progress_payload",
110
+ "cancel_payload",
111
+ "emit_progress",
112
+ "emit_cancel",
113
+ "ValidationError",
114
+ "validate_job_request",
115
+ "validate_job_result",
116
+ "validate_bus_packet",
117
+ "SUBJECT_SUBMIT",
118
+ "SUBJECT_RESULT",
119
+ "SUBJECT_HEARTBEAT",
120
+ "SUBJECT_ALERT",
121
+ "SUBJECT_PROGRESS",
122
+ "SUBJECT_CANCEL",
123
+ "SUBJECT_DLQ",
124
+ "SUBJECT_WORKFLOW_EVENT",
125
+ "SUBJECT_HANDSHAKE",
126
+ "CAPError",
127
+ "VersionMismatchError",
128
+ "MalformedPacketError",
129
+ "UnknownPayloadError",
130
+ "SignatureInvalidError",
131
+ "SignatureMissingError",
132
+ "JobTimeoutError",
133
+ "ResourceExhaustedError",
134
+ "PermissionDeniedError",
135
+ "InvalidInputError",
136
+ "JobNotFoundError",
137
+ "DuplicateJobError",
138
+ "WorkerUnavailableError",
139
+ "SafetyDeniedError",
140
+ "PolicyViolationError",
141
+ "RiskTagBlockedError",
142
+ "PublishFailedError",
143
+ "SubscribeFailedError",
144
+ "ConnectionLostError",
145
+ ]
@@ -3,6 +3,8 @@ from typing import Optional
3
3
 
4
4
 
5
5
  class NATSConfig:
6
+ """NATS connection configuration."""
7
+
6
8
  def __init__(
7
9
  self,
8
10
  url: str,
@@ -19,6 +21,17 @@ class NATSConfig:
19
21
 
20
22
 
21
23
  async def connect_nats(cfg: NATSConfig):
24
+ """Open a NATS connection using the provided configuration.
25
+
26
+ Args:
27
+ cfg: Connection settings.
28
+
29
+ Returns:
30
+ A connected NATS client.
31
+
32
+ Raises:
33
+ RuntimeError: If the ``nats-py`` package is not installed.
34
+ """
22
35
  try:
23
36
  import nats # type: ignore
24
37
  except ImportError as exc:
@@ -17,6 +17,15 @@ async def submit_job(
17
17
  sender_id: str,
18
18
  private_key: Optional[ec.EllipticCurvePrivateKey] = None,
19
19
  ):
20
+ """Publish a JobRequest onto the CAP submit subject.
21
+
22
+ Args:
23
+ nc: An active NATS connection.
24
+ job_request: A protobuf JobRequest message.
25
+ trace_id: Distributed trace identifier propagated through the bus.
26
+ sender_id: Identity of the sender (used in the BusPacket envelope).
27
+ private_key: Optional ECDSA private key for signing the packet.
28
+ """
20
29
  ts = timestamp_pb2.Timestamp()
21
30
  ts.GetCurrentTime()
22
31
  packet = buspacket_pb2.BusPacket()
@@ -0,0 +1,116 @@
1
+ """Typed error classes matching the CAP ErrorCode registry.
2
+
3
+ See spec/13-error-codes.md for the full taxonomy.
4
+ """
5
+
6
+
7
+ class CAPError(Exception):
8
+ """Base class for all CAP protocol errors."""
9
+
10
+ code: str = "ERROR_CODE_UNSPECIFIED"
11
+ numeric_code: int = 0
12
+
13
+ def __init__(self, message: str) -> None:
14
+ super().__init__(message)
15
+
16
+
17
+ # Protocol errors (100-199)
18
+
19
+
20
+ class VersionMismatchError(CAPError):
21
+ code = "ERROR_CODE_PROTOCOL_VERSION_MISMATCH"
22
+ numeric_code = 100
23
+
24
+
25
+ class MalformedPacketError(CAPError):
26
+ code = "ERROR_CODE_PROTOCOL_MALFORMED_PACKET"
27
+ numeric_code = 101
28
+
29
+
30
+ class UnknownPayloadError(CAPError):
31
+ code = "ERROR_CODE_PROTOCOL_UNKNOWN_PAYLOAD"
32
+ numeric_code = 102
33
+
34
+
35
+ class SignatureInvalidError(CAPError):
36
+ code = "ERROR_CODE_PROTOCOL_SIGNATURE_INVALID"
37
+ numeric_code = 103
38
+
39
+
40
+ class SignatureMissingError(CAPError):
41
+ code = "ERROR_CODE_PROTOCOL_SIGNATURE_MISSING"
42
+ numeric_code = 104
43
+
44
+
45
+ # Job errors (200-299)
46
+
47
+
48
+ class JobTimeoutError(CAPError):
49
+ code = "ERROR_CODE_JOB_TIMEOUT"
50
+ numeric_code = 200
51
+
52
+
53
+ class ResourceExhaustedError(CAPError):
54
+ code = "ERROR_CODE_JOB_RESOURCE_EXHAUSTED"
55
+ numeric_code = 201
56
+
57
+
58
+ class PermissionDeniedError(CAPError):
59
+ code = "ERROR_CODE_JOB_PERMISSION_DENIED"
60
+ numeric_code = 202
61
+
62
+
63
+ class InvalidInputError(CAPError):
64
+ code = "ERROR_CODE_JOB_INVALID_INPUT"
65
+ numeric_code = 203
66
+
67
+
68
+ class JobNotFoundError(CAPError):
69
+ code = "ERROR_CODE_JOB_NOT_FOUND"
70
+ numeric_code = 204
71
+
72
+
73
+ class DuplicateJobError(CAPError):
74
+ code = "ERROR_CODE_JOB_DUPLICATE"
75
+ numeric_code = 205
76
+
77
+
78
+ class WorkerUnavailableError(CAPError):
79
+ code = "ERROR_CODE_JOB_WORKER_UNAVAILABLE"
80
+ numeric_code = 206
81
+
82
+
83
+ # Safety errors (300-399)
84
+
85
+
86
+ class SafetyDeniedError(CAPError):
87
+ code = "ERROR_CODE_SAFETY_DENIED"
88
+ numeric_code = 300
89
+
90
+
91
+ class PolicyViolationError(CAPError):
92
+ code = "ERROR_CODE_SAFETY_POLICY_VIOLATION"
93
+ numeric_code = 301
94
+
95
+
96
+ class RiskTagBlockedError(CAPError):
97
+ code = "ERROR_CODE_SAFETY_RISK_TAG_BLOCKED"
98
+ numeric_code = 302
99
+
100
+
101
+ # Transport errors (400-499)
102
+
103
+
104
+ class PublishFailedError(CAPError):
105
+ code = "ERROR_CODE_TRANSPORT_PUBLISH_FAILED"
106
+ numeric_code = 400
107
+
108
+
109
+ class SubscribeFailedError(CAPError):
110
+ code = "ERROR_CODE_TRANSPORT_SUBSCRIBE_FAILED"
111
+ numeric_code = 401
112
+
113
+
114
+ class ConnectionLostError(CAPError):
115
+ code = "ERROR_CODE_TRANSPORT_CONNECTION_LOST"
116
+ numeric_code = 402
@@ -0,0 +1,153 @@
1
+ """Heartbeat helpers for CAP Python SDK.
2
+
3
+ These helpers build and publish heartbeat BusPacket envelopes.
4
+ """
5
+
6
+ import asyncio
7
+ import logging
8
+ from typing import Callable, Optional
9
+
10
+ from cryptography.hazmat.primitives import hashes
11
+ from cryptography.hazmat.primitives.asymmetric import ec
12
+ from google.protobuf import timestamp_pb2
13
+
14
+ from cap.client import DEFAULT_PROTOCOL_VERSION
15
+ from cap.metrics import MetricsHook
16
+ from cap.pb.cordum.agent.v1 import buspacket_pb2, heartbeat_pb2
17
+ from cap.subjects import SUBJECT_HEARTBEAT
18
+
19
+ _logger = logging.getLogger("cap.heartbeat")
20
+
21
+
22
+ def heartbeat_payload(
23
+ worker_id: str,
24
+ pool: str,
25
+ active_jobs: int,
26
+ max_parallel: int,
27
+ cpu_load: float,
28
+ ) -> bytes:
29
+ """Build a heartbeat payload with CPU utilization only."""
30
+ return heartbeat_payload_with_progress(
31
+ worker_id=worker_id,
32
+ pool=pool,
33
+ active_jobs=active_jobs,
34
+ max_parallel=max_parallel,
35
+ cpu_load=cpu_load,
36
+ )
37
+
38
+
39
+ def heartbeat_payload_with_memory(
40
+ worker_id: str,
41
+ pool: str,
42
+ active_jobs: int,
43
+ max_parallel: int,
44
+ cpu_load: float,
45
+ memory_load: float,
46
+ ) -> bytes:
47
+ """Build a heartbeat payload including memory utilization."""
48
+ return heartbeat_payload_with_progress(
49
+ worker_id=worker_id,
50
+ pool=pool,
51
+ active_jobs=active_jobs,
52
+ max_parallel=max_parallel,
53
+ cpu_load=cpu_load,
54
+ memory_load=memory_load,
55
+ )
56
+
57
+
58
+ def heartbeat_payload_with_progress(
59
+ worker_id: str,
60
+ pool: str,
61
+ active_jobs: int,
62
+ max_parallel: int,
63
+ cpu_load: float,
64
+ memory_load: float = 0.0,
65
+ progress_pct: int = 0,
66
+ last_memo: str = "",
67
+ ) -> bytes:
68
+ """Build a heartbeat payload including optional progress fields."""
69
+ ts = timestamp_pb2.Timestamp()
70
+ ts.GetCurrentTime()
71
+
72
+ packet = buspacket_pb2.BusPacket()
73
+ packet.sender_id = worker_id
74
+ packet.protocol_version = DEFAULT_PROTOCOL_VERSION
75
+ packet.created_at.CopyFrom(ts)
76
+ packet.heartbeat.CopyFrom(
77
+ heartbeat_pb2.Heartbeat(
78
+ worker_id=worker_id,
79
+ pool=pool,
80
+ active_jobs=active_jobs,
81
+ max_parallel_jobs=max_parallel,
82
+ cpu_load=cpu_load,
83
+ memory_load=memory_load,
84
+ progress_pct=progress_pct,
85
+ last_memo=last_memo,
86
+ )
87
+ )
88
+ return packet.SerializeToString(deterministic=True)
89
+
90
+
91
+ async def emit_heartbeat(
92
+ nc,
93
+ payload: bytes,
94
+ private_key: Optional[ec.EllipticCurvePrivateKey] = None,
95
+ ) -> None:
96
+ """Publish one heartbeat packet to the heartbeat subject."""
97
+ data = payload
98
+ if private_key is not None:
99
+ packet = buspacket_pb2.BusPacket()
100
+ packet.ParseFromString(payload)
101
+ packet.ClearField("signature")
102
+ unsigned_data = packet.SerializeToString(deterministic=True)
103
+ packet.signature = private_key.sign(unsigned_data, ec.ECDSA(hashes.SHA256()))
104
+ data = packet.SerializeToString(deterministic=True)
105
+
106
+ await nc.publish(SUBJECT_HEARTBEAT, data)
107
+
108
+
109
+ async def heartbeat_loop(
110
+ nc,
111
+ payload_fn: Callable[[], bytes],
112
+ interval: float = 5.0,
113
+ private_key: Optional[ec.EllipticCurvePrivateKey] = None,
114
+ metrics: MetricsHook | None = None,
115
+ cancel_event: asyncio.Event | None = None,
116
+ ) -> None:
117
+ """Emit heartbeat packets periodically until cancelled."""
118
+ sleep_interval = max(0.0, interval)
119
+
120
+ while True:
121
+ if cancel_event is not None and cancel_event.is_set():
122
+ return
123
+
124
+ if cancel_event is None:
125
+ await asyncio.sleep(sleep_interval)
126
+ else:
127
+ sleep_task = asyncio.create_task(asyncio.sleep(sleep_interval))
128
+ cancel_task = asyncio.create_task(cancel_event.wait())
129
+ done, pending = await asyncio.wait(
130
+ {sleep_task, cancel_task},
131
+ return_when=asyncio.FIRST_COMPLETED,
132
+ )
133
+
134
+ for task in pending:
135
+ task.cancel()
136
+ if pending:
137
+ await asyncio.gather(*pending, return_exceptions=True)
138
+
139
+ if cancel_task in done and cancel_event.is_set():
140
+ return
141
+
142
+ try:
143
+ payload = payload_fn()
144
+ await emit_heartbeat(nc=nc, payload=payload, private_key=private_key)
145
+ if metrics is not None:
146
+ packet = buspacket_pb2.BusPacket()
147
+ packet.ParseFromString(payload)
148
+ worker_id = packet.heartbeat.worker_id or packet.sender_id
149
+ metrics.on_heartbeat_sent(worker_id)
150
+ except asyncio.CancelledError:
151
+ raise
152
+ except Exception:
153
+ _logger.exception("heartbeat emission failed")