carl-agent-server 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- carl_agent_server-0.1.0/.github/workflows/ci.yml +41 -0
- carl_agent_server-0.1.0/.github/workflows/release.yml +85 -0
- carl_agent_server-0.1.0/.gitignore +26 -0
- carl_agent_server-0.1.0/LICENSE +21 -0
- carl_agent_server-0.1.0/PKG-INFO +201 -0
- carl_agent_server-0.1.0/README.md +175 -0
- carl_agent_server-0.1.0/pyproject.toml +66 -0
- carl_agent_server-0.1.0/src/carl_agent_server/__init__.py +38 -0
- carl_agent_server-0.1.0/src/carl_agent_server/agent.py +688 -0
- carl_agent_server-0.1.0/src/carl_agent_server/app.py +297 -0
- carl_agent_server-0.1.0/src/carl_agent_server/chain_source.py +126 -0
- carl_agent_server-0.1.0/src/carl_agent_server/cli.py +95 -0
- carl_agent_server-0.1.0/src/carl_agent_server/cost.py +36 -0
- carl_agent_server-0.1.0/src/carl_agent_server/hub.py +228 -0
- carl_agent_server-0.1.0/src/carl_agent_server/llm.py +39 -0
- carl_agent_server-0.1.0/src/carl_agent_server/models.py +254 -0
- carl_agent_server-0.1.0/src/carl_agent_server/run_records.py +88 -0
- carl_agent_server-0.1.0/src/carl_agent_server/sessions.py +115 -0
- carl_agent_server-0.1.0/src/carl_agent_server/timeouts.py +48 -0
- carl_agent_server-0.1.0/src/carl_agent_server/tools.py +110 -0
- carl_agent_server-0.1.0/tests/__init__.py +0 -0
- carl_agent_server-0.1.0/tests/conftest.py +88 -0
- carl_agent_server-0.1.0/tests/test_app.py +113 -0
- carl_agent_server-0.1.0/tests/test_async_runs.py +94 -0
- carl_agent_server-0.1.0/tests/test_attached.py +213 -0
- carl_agent_server-0.1.0/tests/test_auth.py +85 -0
- carl_agent_server-0.1.0/tests/test_chat.py +123 -0
- carl_agent_server-0.1.0/tests/test_cli.py +69 -0
- carl_agent_server-0.1.0/tests/test_hub.py +154 -0
- carl_agent_server-0.1.0/tests/test_human_input.py +162 -0
- carl_agent_server-0.1.0/tests/test_metrics.py +125 -0
- carl_agent_server-0.1.0/tests/test_run_records.py +75 -0
- carl_agent_server-0.1.0/tests/test_schedule.py +102 -0
- carl_agent_server-0.1.0/tests/test_sessions.py +94 -0
- carl_agent_server-0.1.0/tests/test_timeouts.py +99 -0
- carl_agent_server-0.1.0/tests/test_tools.py +34 -0
- carl_agent_server-0.1.0/uv.lock +770 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
lint-and-test:
|
|
13
|
+
name: Lint + tests (py${{ matrix.python }})
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
fail-fast: false
|
|
17
|
+
matrix:
|
|
18
|
+
python: ["3.12", "3.13"]
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout repository
|
|
21
|
+
uses: actions/checkout@v5
|
|
22
|
+
|
|
23
|
+
- name: Install uv
|
|
24
|
+
uses: astral-sh/setup-uv@v6
|
|
25
|
+
with:
|
|
26
|
+
enable-cache: true
|
|
27
|
+
|
|
28
|
+
- name: Set up Python ${{ matrix.python }}
|
|
29
|
+
run: uv python install ${{ matrix.python }}
|
|
30
|
+
|
|
31
|
+
- name: Sync dev dependencies
|
|
32
|
+
run: uv sync --group dev
|
|
33
|
+
|
|
34
|
+
- name: Run ruff
|
|
35
|
+
run: uv run ruff check src/ tests/
|
|
36
|
+
|
|
37
|
+
- name: Run mypy
|
|
38
|
+
run: uv run mypy src/
|
|
39
|
+
|
|
40
|
+
- name: Run pytest
|
|
41
|
+
run: uv run pytest tests/ -q --maxfail=1
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
name: Release to PyPI
|
|
2
|
+
|
|
3
|
+
# Runs only after the CI workflow (lint + tests) finishes successfully on
|
|
4
|
+
# main. If CI was green and the pyproject version isn't on PyPI yet, build
|
|
5
|
+
# and publish `carl-agent-server`. Tests therefore always gate the release.
|
|
6
|
+
on:
|
|
7
|
+
workflow_run:
|
|
8
|
+
workflows: ["CI"]
|
|
9
|
+
types: [completed]
|
|
10
|
+
branches: [main]
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
check-version:
|
|
14
|
+
# Skip entirely unless the triggering CI run passed.
|
|
15
|
+
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
outputs:
|
|
18
|
+
should_publish: ${{ steps.check.outputs.should_publish }}
|
|
19
|
+
version: ${{ steps.check.outputs.version }}
|
|
20
|
+
steps:
|
|
21
|
+
- uses: actions/checkout@v5
|
|
22
|
+
with:
|
|
23
|
+
# Build/inspect the exact commit CI tested, not the branch tip.
|
|
24
|
+
ref: ${{ github.event.workflow_run.head_sha }}
|
|
25
|
+
|
|
26
|
+
- name: Check whether version is already on PyPI
|
|
27
|
+
id: check
|
|
28
|
+
run: |
|
|
29
|
+
version=$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "(.*)"/\1/')
|
|
30
|
+
echo "pyproject version: $version"
|
|
31
|
+
echo "version=$version" >> "$GITHUB_OUTPUT"
|
|
32
|
+
|
|
33
|
+
status=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/carl-agent-server/$version/json")
|
|
34
|
+
echo "PyPI lookup HTTP status: $status"
|
|
35
|
+
case "$status" in
|
|
36
|
+
200)
|
|
37
|
+
echo "Version $version already on PyPI — nothing to publish."
|
|
38
|
+
echo "should_publish=false" >> "$GITHUB_OUTPUT"
|
|
39
|
+
;;
|
|
40
|
+
404)
|
|
41
|
+
echo "Version $version not on PyPI — will publish."
|
|
42
|
+
echo "should_publish=true" >> "$GITHUB_OUTPUT"
|
|
43
|
+
;;
|
|
44
|
+
*)
|
|
45
|
+
echo "Unexpected status $status from PyPI; refusing to guess." >&2
|
|
46
|
+
exit 1
|
|
47
|
+
;;
|
|
48
|
+
esac
|
|
49
|
+
|
|
50
|
+
release:
|
|
51
|
+
needs: check-version
|
|
52
|
+
if: needs.check-version.outputs.should_publish == 'true'
|
|
53
|
+
runs-on: ubuntu-latest
|
|
54
|
+
permissions:
|
|
55
|
+
contents: write # push the v<version> tag
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v5
|
|
58
|
+
with:
|
|
59
|
+
ref: ${{ github.event.workflow_run.head_sha }}
|
|
60
|
+
|
|
61
|
+
- name: Set up Python
|
|
62
|
+
uses: actions/setup-python@v6
|
|
63
|
+
with:
|
|
64
|
+
python-version: "3.12"
|
|
65
|
+
|
|
66
|
+
- name: Install build tooling
|
|
67
|
+
run: python -m pip install --upgrade build
|
|
68
|
+
|
|
69
|
+
- name: Build sdist + wheel
|
|
70
|
+
run: python -m build
|
|
71
|
+
|
|
72
|
+
- name: Publish to PyPI
|
|
73
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
74
|
+
with:
|
|
75
|
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
76
|
+
|
|
77
|
+
- name: Tag the release
|
|
78
|
+
run: |
|
|
79
|
+
version="${{ needs.check-version.outputs.version }}"
|
|
80
|
+
if git ls-remote --exit-code --tags origin "refs/tags/v$version" >/dev/null 2>&1; then
|
|
81
|
+
echo "Tag v$version already exists — skipping."
|
|
82
|
+
else
|
|
83
|
+
git tag "v$version"
|
|
84
|
+
git push origin "v$version"
|
|
85
|
+
fi
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
|
|
8
|
+
# Virtual environments
|
|
9
|
+
.venv/
|
|
10
|
+
venv/
|
|
11
|
+
|
|
12
|
+
# Tooling caches
|
|
13
|
+
.pytest_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.coverage
|
|
17
|
+
|
|
18
|
+
# IDE / OS
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
.DS_Store
|
|
22
|
+
__MACOSX/
|
|
23
|
+
|
|
24
|
+
# Local state
|
|
25
|
+
*.log
|
|
26
|
+
agent-hub.json
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AIRI Institute
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: carl-agent-server
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Serve CARL reasoning chains as HTTP agents (FastAPI) — per-agent Swagger, hub with hot-reload from gigaevo Memory
|
|
5
|
+
Project-URL: Homepage, https://github.com/pyshka501/carl-agent-server
|
|
6
|
+
Project-URL: Repository, https://github.com/pyshka501/carl-agent-server
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: agent,carl,deployment,fastapi,reasoning
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Software Development
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Requires-Dist: fastapi>=0.115
|
|
20
|
+
Requires-Dist: gigaevo-client>=0.3.0
|
|
21
|
+
Requires-Dist: httpx>=0.27
|
|
22
|
+
Requires-Dist: mmar-carl>=0.3.0
|
|
23
|
+
Requires-Dist: pydantic>=2.0
|
|
24
|
+
Requires-Dist: uvicorn>=0.30
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# carl-agent-server
|
|
28
|
+
|
|
29
|
+
Serve [CARL](https://github.com/Glazkoff/carl) reasoning chains as HTTP
|
|
30
|
+
**agents**. One chain = one agent: a FastAPI facade with `/invoke`, `/info`,
|
|
31
|
+
`/healthz` and its **own Swagger** (`/docs`) — the OpenAPI title, description
|
|
32
|
+
and version come from the chain's metadata, so the docs page reads as *that
|
|
33
|
+
agent's* documentation and users can try the agent right from `/docs`.
|
|
34
|
+
|
|
35
|
+
Part of the MMAR ecosystem: chains are authored in CARE / MAGE, stored &
|
|
36
|
+
versioned in **gigaevo Memory** (channels: `latest` → `stable`), executed by
|
|
37
|
+
[mmar-carl](https://pypi.org/project/mmar-carl/). In *attached* mode an agent
|
|
38
|
+
follows a Memory channel and **hot-reloads on `promote`/`pin`** — promote a
|
|
39
|
+
new version and the running agent picks it up with zero downtime; pin the old
|
|
40
|
+
version to roll back. A hot-reloaded version that fails preflight is rejected
|
|
41
|
+
and the healthy one keeps serving. Updates arrive via SSE push with a polling
|
|
42
|
+
safety net (see [Hot-reload](#hot-reload-attached-mode)).
|
|
43
|
+
|
|
44
|
+
## Quickstart — the hub (recommended)
|
|
45
|
+
|
|
46
|
+
One lightweight process hosts N agents; each gets its own Swagger at
|
|
47
|
+
`/agents/<name>/docs`. Deploy/undeploy at runtime via the control API:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uv sync --group dev
|
|
51
|
+
|
|
52
|
+
AGENT_LLM_API_KEY=sk-... AGENT_LLM_MODEL=openai/gpt-4o \
|
|
53
|
+
uv run carl-agent-hub serve --port 8080
|
|
54
|
+
|
|
55
|
+
# deploy a chain JSON from disk (offline source)
|
|
56
|
+
curl -X POST localhost:8080/deployments \
|
|
57
|
+
-H 'content-type: application/json' \
|
|
58
|
+
-d '{"name": "demo", "chain_file": "./chain.json"}'
|
|
59
|
+
|
|
60
|
+
# deploy a chain entity from Memory, following its stable channel
|
|
61
|
+
curl -X POST localhost:8080/deployments \
|
|
62
|
+
-H 'content-type: application/json' \
|
|
63
|
+
-d '{"name": "weather", "entity_id": "<chain-uuid>", "channel": "stable"}'
|
|
64
|
+
|
|
65
|
+
open http://localhost:8080/agents/demo/docs # try POST /invoke from Swagger
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Deployments persist to `--state-file` (default `~/.care/agent-hub.json`) and
|
|
69
|
+
are restored on restart; `--no-persist` disables that.
|
|
70
|
+
|
|
71
|
+
### Hub control API
|
|
72
|
+
|
|
73
|
+
| Endpoint | Purpose |
|
|
74
|
+
|---|---|
|
|
75
|
+
| `GET /deployments` | list deployments (name, url, version, ready, runs) |
|
|
76
|
+
| `POST /deployments` | deploy (body = deployment spec; 409 duplicate, 422 unloadable) |
|
|
77
|
+
| `GET /deployments/{name}` | one deployment |
|
|
78
|
+
| `POST /deployments/{name}/reload` | re-fetch + preflight + swap now |
|
|
79
|
+
| `DELETE /deployments/{name}` | undeploy (unmounts the agent) |
|
|
80
|
+
| `GET /healthz` | hub liveness |
|
|
81
|
+
|
|
82
|
+
## Solo mode (one agent = one process)
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
AGENT_LLM_API_KEY=... AGENT_LLM_MODEL=openai/gpt-4o \
|
|
86
|
+
uv run carl-agent serve --chain-file ./chain.json --name demo --port 8001
|
|
87
|
+
# or attached: --entity-id <uuid> --channel stable
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Agent API (under `/agents/<name>` in the hub, or the root in solo mode)
|
|
91
|
+
|
|
92
|
+
| Endpoint | Purpose |
|
|
93
|
+
|---|---|
|
|
94
|
+
| `POST /invoke` | run the chain (`?mode=sync` default; `?mode=async` → 202 + run_id) |
|
|
95
|
+
| `POST /chat` | converse with the agent (`{message, session_id?}`); the dialogue so far is fed into the chain each turn — the chain is unchanged. Omit `session_id` to start a session (returned in the reply); sessions evict after the idle TTL |
|
|
96
|
+
| `GET /runs/{id}` | run status/result (answer, steps, tokens, time) |
|
|
97
|
+
| `POST /runs/{id}/input` | resume a run paused on a `human_input` step (status `waiting`) — `{value}`. Async-invoke flow: invoke `?mode=async`, poll until `waiting`, then provide input |
|
|
98
|
+
| `GET /runs/{id}/events` | SSE step stream (replays history, ends with `result`) |
|
|
99
|
+
| `DELETE /runs/{id}` | cooperative cancel of a running run |
|
|
100
|
+
| `GET /schedule` | the deployment's auto-invoke schedule + firing stats (D3) |
|
|
101
|
+
| `POST /schedule/trigger` | fire one scheduled run now (manual trigger) |
|
|
102
|
+
| `GET /metrics` | usage + cost: run counts, total tokens, total USD, budget + remaining (D4) |
|
|
103
|
+
| `GET /info` | agent card (name, version, channel, required tools, readiness) |
|
|
104
|
+
| `GET /healthz` / `GET /readyz` | liveness / readiness (with the reason when 503) |
|
|
105
|
+
| `GET /docs` | this agent's own Swagger |
|
|
106
|
+
|
|
107
|
+
## Environment
|
|
108
|
+
|
|
109
|
+
| Variable | Purpose |
|
|
110
|
+
|---|---|
|
|
111
|
+
| `AGENT_LLM_API_KEY` / `AGENT_LLM_MODEL` / `AGENT_LLM_BASE_URL` | OpenAI-compatible LLM the chains run on |
|
|
112
|
+
| `AGENT_MEMORY_URL` / `AGENT_MEMORY_API_KEY` | gigaevo Memory (attached mode) |
|
|
113
|
+
| `AGENT_WEB_SEARCH_API_KEY` | enables the `web_search` builtin tool (Tavily) |
|
|
114
|
+
|
|
115
|
+
Per-deployment overrides (`llm_model`, `llm_api_key`, `memory_url`, …) exist on
|
|
116
|
+
the deployment spec, but prefer env vars: hub specs persist to the state file
|
|
117
|
+
verbatim, and secrets belong in the environment, not on disk.
|
|
118
|
+
|
|
119
|
+
## Auth
|
|
120
|
+
|
|
121
|
+
Set a per-agent `api_key` on the deployment (CARE's `/deploy` generates one)
|
|
122
|
+
and `/invoke`, `/chat`, `/runs/*` require it via `X-API-Key: <key>` (or
|
|
123
|
+
`Authorization: Bearer <key>`); `/healthz`, `/readyz`, `/info`, `/docs` stay
|
|
124
|
+
open. Loopback requests (127.0.0.1/::1) skip the check unless
|
|
125
|
+
`auth_allow_localhost=false`. No `api_key` set → auth is off (localhost demo).
|
|
126
|
+
Solo: `carl-agent serve --api-key <key>` (or `AGENT_API_KEY`). The hub's
|
|
127
|
+
state file holds these keys and is written `chmod 600`.
|
|
128
|
+
|
|
129
|
+
## Hot-reload (attached mode)
|
|
130
|
+
|
|
131
|
+
An attached agent follows its Memory channel through **two mechanisms**:
|
|
132
|
+
|
|
133
|
+
- **SSE watcher** (fast path): a `gigaevo_client` subscription to
|
|
134
|
+
`/v1/events/stream` filtered on the entity — a `promoted`/`pinned` event
|
|
135
|
+
triggers a reload within ~a second.
|
|
136
|
+
- **Poll fallback** (safety net): every `poll_fallback_s` (default **60s**,
|
|
137
|
+
`0` disables) the agent compares the channel's current `version_id` to the
|
|
138
|
+
serving one and reloads on drift.
|
|
139
|
+
|
|
140
|
+
The fallback exists because the SSE subscription can die **silently**:
|
|
141
|
+
`gigaevo_client` (≤0.3.0) retries a failing `/v1/events/stream` in a loop
|
|
142
|
+
without ever surfacing the error — e.g. when a stale Memory deployment routes
|
|
143
|
+
that path into the generic `/v1/{entity_type}/{entity_id}` handler (400), the
|
|
144
|
+
watcher looks armed but no event is ever delivered. With the fallback, a
|
|
145
|
+
missed promote is picked up within a minute instead of never; `POST
|
|
146
|
+
/deployments/{name}/reload` remains the immediate manual lever.
|
|
147
|
+
|
|
148
|
+
To check the live events endpoint a deployment is watching:
|
|
149
|
+
`curl -N <memory-url>/v1/events/stream` must hold the connection open and
|
|
150
|
+
print `entity_changed` events on promote — an instant JSON error means the
|
|
151
|
+
Memory deployment is broken/stale and only the poll fallback (and manual
|
|
152
|
+
reload) will move versions.
|
|
153
|
+
|
|
154
|
+
Both paths funnel into one swap-safe reload: fetch → parse → preflight → swap,
|
|
155
|
+
and a failed candidate never evicts the serving chain.
|
|
156
|
+
|
|
157
|
+
## Timeouts
|
|
158
|
+
|
|
159
|
+
Two layers bound a run. `chain_timeout_s` (default 300s) is the agent's hard
|
|
160
|
+
wall-clock deadline for the whole run. `step_timeout_s` (default 60s) is a
|
|
161
|
+
default per-step timeout injected at load into any step the author left
|
|
162
|
+
unbounded — capped never to exceed the chain-level `timeout`, so it tightens
|
|
163
|
+
but never loosens authored intent. Together a single hung step fails fast at
|
|
164
|
+
the step level instead of burning the whole run budget.
|
|
165
|
+
|
|
166
|
+
## Schedules
|
|
167
|
+
|
|
168
|
+
A deployment can carry a `schedule` (`{interval_s, input, enabled}`) and the
|
|
169
|
+
agent auto-invokes its chain on that cadence — the in-template scheduler,
|
|
170
|
+
lifecycle-bound (starts on activation, stops on shutdown; survives a single
|
|
171
|
+
run's failure, skips ticks while not ready). `GET /schedule` reports it,
|
|
172
|
+
`POST /schedule/trigger` fires one run now. For external cron/batch use
|
|
173
|
+
`care run` instead; an inbound HTTP trigger is just `POST /invoke`.
|
|
174
|
+
|
|
175
|
+
## Cost & budgets
|
|
176
|
+
|
|
177
|
+
Set per-1k token prices on the deployment (`price_per_1k_input_usd`,
|
|
178
|
+
`price_per_1k_output_usd`) and each run's USD cost is computed from its token
|
|
179
|
+
usage and stamped on the run record; `GET /metrics` reports run counts, total
|
|
180
|
+
tokens and total spend. An optional `budget_usd` cap refuses further runs with
|
|
181
|
+
**402** once spent (needs pricing to take effect; the scheduler skips ticks
|
|
182
|
+
while over budget).
|
|
183
|
+
|
|
184
|
+
## Tools
|
|
185
|
+
|
|
186
|
+
Deployed agents ship a **read-only** builtin tool set: `calculator`,
|
|
187
|
+
`current_datetime`, `fetch_url`, `http_request` (GET/HEAD only — mutating
|
|
188
|
+
methods raise) and `web_search` (when a key is configured). Mutating tools
|
|
189
|
+
(e.g. `run_python`) are deliberately not registered in deployments.
|
|
190
|
+
|
|
191
|
+
## Development
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
uv run pytest tests/ -q
|
|
195
|
+
uv run ruff check src/ tests/
|
|
196
|
+
uv run mypy src/
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Status: Phase A of the production-mode plan is nearly complete (agent core,
|
|
200
|
+
async/SSE runs, attached hot-reload, the hub, CLIs). Next: run-records to
|
|
201
|
+
Memory, then the CARE control-plane integration (`/deploy` from the TUI).
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# carl-agent-server
|
|
2
|
+
|
|
3
|
+
Serve [CARL](https://github.com/Glazkoff/carl) reasoning chains as HTTP
|
|
4
|
+
**agents**. One chain = one agent: a FastAPI facade with `/invoke`, `/info`,
|
|
5
|
+
`/healthz` and its **own Swagger** (`/docs`) — the OpenAPI title, description
|
|
6
|
+
and version come from the chain's metadata, so the docs page reads as *that
|
|
7
|
+
agent's* documentation and users can try the agent right from `/docs`.
|
|
8
|
+
|
|
9
|
+
Part of the MMAR ecosystem: chains are authored in CARE / MAGE, stored &
|
|
10
|
+
versioned in **gigaevo Memory** (channels: `latest` → `stable`), executed by
|
|
11
|
+
[mmar-carl](https://pypi.org/project/mmar-carl/). In *attached* mode an agent
|
|
12
|
+
follows a Memory channel and **hot-reloads on `promote`/`pin`** — promote a
|
|
13
|
+
new version and the running agent picks it up with zero downtime; pin the old
|
|
14
|
+
version to roll back. A hot-reloaded version that fails preflight is rejected
|
|
15
|
+
and the healthy one keeps serving. Updates arrive via SSE push with a polling
|
|
16
|
+
safety net (see [Hot-reload](#hot-reload-attached-mode)).
|
|
17
|
+
|
|
18
|
+
## Quickstart — the hub (recommended)
|
|
19
|
+
|
|
20
|
+
One lightweight process hosts N agents; each gets its own Swagger at
|
|
21
|
+
`/agents/<name>/docs`. Deploy/undeploy at runtime via the control API:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
uv sync --group dev
|
|
25
|
+
|
|
26
|
+
AGENT_LLM_API_KEY=sk-... AGENT_LLM_MODEL=openai/gpt-4o \
|
|
27
|
+
uv run carl-agent-hub serve --port 8080
|
|
28
|
+
|
|
29
|
+
# deploy a chain JSON from disk (offline source)
|
|
30
|
+
curl -X POST localhost:8080/deployments \
|
|
31
|
+
-H 'content-type: application/json' \
|
|
32
|
+
-d '{"name": "demo", "chain_file": "./chain.json"}'
|
|
33
|
+
|
|
34
|
+
# deploy a chain entity from Memory, following its stable channel
|
|
35
|
+
curl -X POST localhost:8080/deployments \
|
|
36
|
+
-H 'content-type: application/json' \
|
|
37
|
+
-d '{"name": "weather", "entity_id": "<chain-uuid>", "channel": "stable"}'
|
|
38
|
+
|
|
39
|
+
open http://localhost:8080/agents/demo/docs # try POST /invoke from Swagger
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Deployments persist to `--state-file` (default `~/.care/agent-hub.json`) and
|
|
43
|
+
are restored on restart; `--no-persist` disables that.
|
|
44
|
+
|
|
45
|
+
### Hub control API
|
|
46
|
+
|
|
47
|
+
| Endpoint | Purpose |
|
|
48
|
+
|---|---|
|
|
49
|
+
| `GET /deployments` | list deployments (name, url, version, ready, runs) |
|
|
50
|
+
| `POST /deployments` | deploy (body = deployment spec; 409 duplicate, 422 unloadable) |
|
|
51
|
+
| `GET /deployments/{name}` | one deployment |
|
|
52
|
+
| `POST /deployments/{name}/reload` | re-fetch + preflight + swap now |
|
|
53
|
+
| `DELETE /deployments/{name}` | undeploy (unmounts the agent) |
|
|
54
|
+
| `GET /healthz` | hub liveness |
|
|
55
|
+
|
|
56
|
+
## Solo mode (one agent = one process)
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
AGENT_LLM_API_KEY=... AGENT_LLM_MODEL=openai/gpt-4o \
|
|
60
|
+
uv run carl-agent serve --chain-file ./chain.json --name demo --port 8001
|
|
61
|
+
# or attached: --entity-id <uuid> --channel stable
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Agent API (under `/agents/<name>` in the hub, or the root in solo mode)
|
|
65
|
+
|
|
66
|
+
| Endpoint | Purpose |
|
|
67
|
+
|---|---|
|
|
68
|
+
| `POST /invoke` | run the chain (`?mode=sync` default; `?mode=async` → 202 + run_id) |
|
|
69
|
+
| `POST /chat` | converse with the agent (`{message, session_id?}`); the dialogue so far is fed into the chain each turn — the chain is unchanged. Omit `session_id` to start a session (returned in the reply); sessions evict after the idle TTL |
|
|
70
|
+
| `GET /runs/{id}` | run status/result (answer, steps, tokens, time) |
|
|
71
|
+
| `POST /runs/{id}/input` | resume a run paused on a `human_input` step (status `waiting`) — `{value}`. Async-invoke flow: invoke `?mode=async`, poll until `waiting`, then provide input |
|
|
72
|
+
| `GET /runs/{id}/events` | SSE step stream (replays history, ends with `result`) |
|
|
73
|
+
| `DELETE /runs/{id}` | cooperative cancel of a running run |
|
|
74
|
+
| `GET /schedule` | the deployment's auto-invoke schedule + firing stats (D3) |
|
|
75
|
+
| `POST /schedule/trigger` | fire one scheduled run now (manual trigger) |
|
|
76
|
+
| `GET /metrics` | usage + cost: run counts, total tokens, total USD, budget + remaining (D4) |
|
|
77
|
+
| `GET /info` | agent card (name, version, channel, required tools, readiness) |
|
|
78
|
+
| `GET /healthz` / `GET /readyz` | liveness / readiness (with the reason when 503) |
|
|
79
|
+
| `GET /docs` | this agent's own Swagger |
|
|
80
|
+
|
|
81
|
+
## Environment
|
|
82
|
+
|
|
83
|
+
| Variable | Purpose |
|
|
84
|
+
|---|---|
|
|
85
|
+
| `AGENT_LLM_API_KEY` / `AGENT_LLM_MODEL` / `AGENT_LLM_BASE_URL` | OpenAI-compatible LLM the chains run on |
|
|
86
|
+
| `AGENT_MEMORY_URL` / `AGENT_MEMORY_API_KEY` | gigaevo Memory (attached mode) |
|
|
87
|
+
| `AGENT_WEB_SEARCH_API_KEY` | enables the `web_search` builtin tool (Tavily) |
|
|
88
|
+
|
|
89
|
+
Per-deployment overrides (`llm_model`, `llm_api_key`, `memory_url`, …) exist on
|
|
90
|
+
the deployment spec, but prefer env vars: hub specs persist to the state file
|
|
91
|
+
verbatim, and secrets belong in the environment, not on disk.
|
|
92
|
+
|
|
93
|
+
## Auth
|
|
94
|
+
|
|
95
|
+
Set a per-agent `api_key` on the deployment (CARE's `/deploy` generates one)
|
|
96
|
+
and `/invoke`, `/chat`, `/runs/*` require it via `X-API-Key: <key>` (or
|
|
97
|
+
`Authorization: Bearer <key>`); `/healthz`, `/readyz`, `/info`, `/docs` stay
|
|
98
|
+
open. Loopback requests (127.0.0.1/::1) skip the check unless
|
|
99
|
+
`auth_allow_localhost=false`. No `api_key` set → auth is off (localhost demo).
|
|
100
|
+
Solo: `carl-agent serve --api-key <key>` (or `AGENT_API_KEY`). The hub's
|
|
101
|
+
state file holds these keys and is written `chmod 600`.
|
|
102
|
+
|
|
103
|
+
## Hot-reload (attached mode)
|
|
104
|
+
|
|
105
|
+
An attached agent follows its Memory channel through **two mechanisms**:
|
|
106
|
+
|
|
107
|
+
- **SSE watcher** (fast path): a `gigaevo_client` subscription to
|
|
108
|
+
`/v1/events/stream` filtered on the entity — a `promoted`/`pinned` event
|
|
109
|
+
triggers a reload within ~a second.
|
|
110
|
+
- **Poll fallback** (safety net): every `poll_fallback_s` (default **60s**,
|
|
111
|
+
`0` disables) the agent compares the channel's current `version_id` to the
|
|
112
|
+
serving one and reloads on drift.
|
|
113
|
+
|
|
114
|
+
The fallback exists because the SSE subscription can die **silently**:
|
|
115
|
+
`gigaevo_client` (≤0.3.0) retries a failing `/v1/events/stream` in a loop
|
|
116
|
+
without ever surfacing the error — e.g. when a stale Memory deployment routes
|
|
117
|
+
that path into the generic `/v1/{entity_type}/{entity_id}` handler (400), the
|
|
118
|
+
watcher looks armed but no event is ever delivered. With the fallback, a
|
|
119
|
+
missed promote is picked up within a minute instead of never; `POST
|
|
120
|
+
/deployments/{name}/reload` remains the immediate manual lever.
|
|
121
|
+
|
|
122
|
+
To check the live events endpoint a deployment is watching:
|
|
123
|
+
`curl -N <memory-url>/v1/events/stream` must hold the connection open and
|
|
124
|
+
print `entity_changed` events on promote — an instant JSON error means the
|
|
125
|
+
Memory deployment is broken/stale and only the poll fallback (and manual
|
|
126
|
+
reload) will move versions.
|
|
127
|
+
|
|
128
|
+
Both paths funnel into one swap-safe reload: fetch → parse → preflight → swap,
|
|
129
|
+
and a failed candidate never evicts the serving chain.
|
|
130
|
+
|
|
131
|
+
## Timeouts
|
|
132
|
+
|
|
133
|
+
Two layers bound a run. `chain_timeout_s` (default 300s) is the agent's hard
|
|
134
|
+
wall-clock deadline for the whole run. `step_timeout_s` (default 60s) is a
|
|
135
|
+
default per-step timeout injected at load into any step the author left
|
|
136
|
+
unbounded — capped never to exceed the chain-level `timeout`, so it tightens
|
|
137
|
+
but never loosens authored intent. Together a single hung step fails fast at
|
|
138
|
+
the step level instead of burning the whole run budget.
|
|
139
|
+
|
|
140
|
+
## Schedules
|
|
141
|
+
|
|
142
|
+
A deployment can carry a `schedule` (`{interval_s, input, enabled}`) and the
|
|
143
|
+
agent auto-invokes its chain on that cadence — the in-template scheduler,
|
|
144
|
+
lifecycle-bound (starts on activation, stops on shutdown; survives a single
|
|
145
|
+
run's failure, skips ticks while not ready). `GET /schedule` reports it,
|
|
146
|
+
`POST /schedule/trigger` fires one run now. For external cron/batch use
|
|
147
|
+
`care run` instead; an inbound HTTP trigger is just `POST /invoke`.
|
|
148
|
+
|
|
149
|
+
## Cost & budgets
|
|
150
|
+
|
|
151
|
+
Set per-1k token prices on the deployment (`price_per_1k_input_usd`,
|
|
152
|
+
`price_per_1k_output_usd`) and each run's USD cost is computed from its token
|
|
153
|
+
usage and stamped on the run record; `GET /metrics` reports run counts, total
|
|
154
|
+
tokens and total spend. An optional `budget_usd` cap refuses further runs with
|
|
155
|
+
**402** once spent (needs pricing to take effect; the scheduler skips ticks
|
|
156
|
+
while over budget).
|
|
157
|
+
|
|
158
|
+
## Tools
|
|
159
|
+
|
|
160
|
+
Deployed agents ship a **read-only** builtin tool set: `calculator`,
|
|
161
|
+
`current_datetime`, `fetch_url`, `http_request` (GET/HEAD only — mutating
|
|
162
|
+
methods raise) and `web_search` (when a key is configured). Mutating tools
|
|
163
|
+
(e.g. `run_python`) are deliberately not registered in deployments.
|
|
164
|
+
|
|
165
|
+
## Development
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
uv run pytest tests/ -q
|
|
169
|
+
uv run ruff check src/ tests/
|
|
170
|
+
uv run mypy src/
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Status: Phase A of the production-mode plan is nearly complete (agent core,
|
|
174
|
+
async/SSE runs, attached hot-reload, the hub, CLIs). Next: run-records to
|
|
175
|
+
Memory, then the CARE control-plane integration (`/deploy` from the TUI).
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "carl-agent-server"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Serve CARL reasoning chains as HTTP agents (FastAPI) — per-agent Swagger, hub with hot-reload from gigaevo Memory"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.12"
|
|
8
|
+
keywords = ["carl", "agent", "fastapi", "reasoning", "deployment"]
|
|
9
|
+
classifiers = [
|
|
10
|
+
"Development Status :: 3 - Alpha",
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Programming Language :: Python :: 3.13",
|
|
16
|
+
"Topic :: Software Development",
|
|
17
|
+
"Typing :: Typed",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"fastapi>=0.115",
|
|
21
|
+
"uvicorn>=0.30",
|
|
22
|
+
"pydantic>=2.0",
|
|
23
|
+
"httpx>=0.27",
|
|
24
|
+
"mmar-carl>=0.3.0",
|
|
25
|
+
"gigaevo-client>=0.3.0",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.urls]
|
|
29
|
+
Homepage = "https://github.com/pyshka501/carl-agent-server"
|
|
30
|
+
Repository = "https://github.com/pyshka501/carl-agent-server"
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
carl-agent = "carl_agent_server.cli:main"
|
|
34
|
+
carl-agent-hub = "carl_agent_server.cli:hub_main"
|
|
35
|
+
|
|
36
|
+
[dependency-groups]
|
|
37
|
+
dev = [
|
|
38
|
+
"pytest>=8.2",
|
|
39
|
+
"pytest-asyncio>=1.0.0",
|
|
40
|
+
"ruff>=0.4",
|
|
41
|
+
"mypy>=1.10",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[build-system]
|
|
45
|
+
requires = ["hatchling"]
|
|
46
|
+
build-backend = "hatchling.build"
|
|
47
|
+
|
|
48
|
+
[tool.hatch.build.targets.wheel]
|
|
49
|
+
packages = ["src/carl_agent_server"]
|
|
50
|
+
|
|
51
|
+
[tool.pytest.ini_options]
|
|
52
|
+
testpaths = ["tests"]
|
|
53
|
+
asyncio_mode = "auto"
|
|
54
|
+
|
|
55
|
+
[tool.ruff]
|
|
56
|
+
line-length = 120
|
|
57
|
+
target-version = "py312"
|
|
58
|
+
|
|
59
|
+
[tool.ruff.lint]
|
|
60
|
+
select = ["E", "F", "I", "N", "W", "UP", "B"]
|
|
61
|
+
ignore = ["E501"]
|
|
62
|
+
|
|
63
|
+
[tool.mypy]
|
|
64
|
+
python_version = "3.12"
|
|
65
|
+
ignore_missing_imports = true
|
|
66
|
+
check_untyped_defs = true
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""carl-agent-server — serve CARL reasoning chains as HTTP agents.
|
|
2
|
+
|
|
3
|
+
One chain = one agent: a FastAPI facade with /invoke, /info, /healthz and its
|
|
4
|
+
OWN /docs (OpenAPI metadata is taken from the chain's Memory entity, so the
|
|
5
|
+
Swagger page reads as that agent's documentation). Agents are served solo
|
|
6
|
+
(`carl-agent serve`) or mounted together in the hub (`/agents/<name>/…`).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .app import build_agent_app
|
|
10
|
+
from .hub import build_hub_app
|
|
11
|
+
from .models import (
|
|
12
|
+
AgentInfo,
|
|
13
|
+
ChatRequest,
|
|
14
|
+
ChatResponse,
|
|
15
|
+
DeploymentInfo,
|
|
16
|
+
DeploymentSpec,
|
|
17
|
+
HumanInputRequest,
|
|
18
|
+
InvokeRequest,
|
|
19
|
+
RunRecord,
|
|
20
|
+
ScheduleConfig,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"AgentInfo",
|
|
27
|
+
"ChatRequest",
|
|
28
|
+
"ChatResponse",
|
|
29
|
+
"DeploymentInfo",
|
|
30
|
+
"DeploymentSpec",
|
|
31
|
+
"HumanInputRequest",
|
|
32
|
+
"InvokeRequest",
|
|
33
|
+
"RunRecord",
|
|
34
|
+
"ScheduleConfig",
|
|
35
|
+
"__version__",
|
|
36
|
+
"build_agent_app",
|
|
37
|
+
"build_hub_app",
|
|
38
|
+
]
|