carl-agent-server 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. carl_agent_server-0.1.0/.github/workflows/ci.yml +41 -0
  2. carl_agent_server-0.1.0/.github/workflows/release.yml +85 -0
  3. carl_agent_server-0.1.0/.gitignore +26 -0
  4. carl_agent_server-0.1.0/LICENSE +21 -0
  5. carl_agent_server-0.1.0/PKG-INFO +201 -0
  6. carl_agent_server-0.1.0/README.md +175 -0
  7. carl_agent_server-0.1.0/pyproject.toml +66 -0
  8. carl_agent_server-0.1.0/src/carl_agent_server/__init__.py +38 -0
  9. carl_agent_server-0.1.0/src/carl_agent_server/agent.py +688 -0
  10. carl_agent_server-0.1.0/src/carl_agent_server/app.py +297 -0
  11. carl_agent_server-0.1.0/src/carl_agent_server/chain_source.py +126 -0
  12. carl_agent_server-0.1.0/src/carl_agent_server/cli.py +95 -0
  13. carl_agent_server-0.1.0/src/carl_agent_server/cost.py +36 -0
  14. carl_agent_server-0.1.0/src/carl_agent_server/hub.py +228 -0
  15. carl_agent_server-0.1.0/src/carl_agent_server/llm.py +39 -0
  16. carl_agent_server-0.1.0/src/carl_agent_server/models.py +254 -0
  17. carl_agent_server-0.1.0/src/carl_agent_server/run_records.py +88 -0
  18. carl_agent_server-0.1.0/src/carl_agent_server/sessions.py +115 -0
  19. carl_agent_server-0.1.0/src/carl_agent_server/timeouts.py +48 -0
  20. carl_agent_server-0.1.0/src/carl_agent_server/tools.py +110 -0
  21. carl_agent_server-0.1.0/tests/__init__.py +0 -0
  22. carl_agent_server-0.1.0/tests/conftest.py +88 -0
  23. carl_agent_server-0.1.0/tests/test_app.py +113 -0
  24. carl_agent_server-0.1.0/tests/test_async_runs.py +94 -0
  25. carl_agent_server-0.1.0/tests/test_attached.py +213 -0
  26. carl_agent_server-0.1.0/tests/test_auth.py +85 -0
  27. carl_agent_server-0.1.0/tests/test_chat.py +123 -0
  28. carl_agent_server-0.1.0/tests/test_cli.py +69 -0
  29. carl_agent_server-0.1.0/tests/test_hub.py +154 -0
  30. carl_agent_server-0.1.0/tests/test_human_input.py +162 -0
  31. carl_agent_server-0.1.0/tests/test_metrics.py +125 -0
  32. carl_agent_server-0.1.0/tests/test_run_records.py +75 -0
  33. carl_agent_server-0.1.0/tests/test_schedule.py +102 -0
  34. carl_agent_server-0.1.0/tests/test_sessions.py +94 -0
  35. carl_agent_server-0.1.0/tests/test_timeouts.py +99 -0
  36. carl_agent_server-0.1.0/tests/test_tools.py +34 -0
  37. carl_agent_server-0.1.0/uv.lock +770 -0
@@ -0,0 +1,41 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ lint-and-test:
13
+ name: Lint + tests (py${{ matrix.python }})
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ python: ["3.12", "3.13"]
19
+ steps:
20
+ - name: Checkout repository
21
+ uses: actions/checkout@v5
22
+
23
+ - name: Install uv
24
+ uses: astral-sh/setup-uv@v6
25
+ with:
26
+ enable-cache: true
27
+
28
+ - name: Set up Python ${{ matrix.python }}
29
+ run: uv python install ${{ matrix.python }}
30
+
31
+ - name: Sync dev dependencies
32
+ run: uv sync --group dev
33
+
34
+ - name: Run ruff
35
+ run: uv run ruff check src/ tests/
36
+
37
+ - name: Run mypy
38
+ run: uv run mypy src/
39
+
40
+ - name: Run pytest
41
+ run: uv run pytest tests/ -q --maxfail=1
@@ -0,0 +1,85 @@
1
+ name: Release to PyPI
2
+
3
+ # Runs only after the CI workflow (lint + tests) finishes successfully on
4
+ # main. If CI was green and the pyproject version isn't on PyPI yet, build
5
+ # and publish `carl-agent-server`. Tests therefore always gate the release.
6
+ on:
7
+ workflow_run:
8
+ workflows: ["CI"]
9
+ types: [completed]
10
+ branches: [main]
11
+
12
+ jobs:
13
+ check-version:
14
+ # Skip entirely unless the triggering CI run passed.
15
+ if: ${{ github.event.workflow_run.conclusion == 'success' }}
16
+ runs-on: ubuntu-latest
17
+ outputs:
18
+ should_publish: ${{ steps.check.outputs.should_publish }}
19
+ version: ${{ steps.check.outputs.version }}
20
+ steps:
21
+ - uses: actions/checkout@v5
22
+ with:
23
+ # Build/inspect the exact commit CI tested, not the branch tip.
24
+ ref: ${{ github.event.workflow_run.head_sha }}
25
+
26
+ - name: Check whether version is already on PyPI
27
+ id: check
28
+ run: |
29
+ version=$(grep -m1 '^version = ' pyproject.toml | sed -E 's/version = "(.*)"/\1/')
30
+ echo "pyproject version: $version"
31
+ echo "version=$version" >> "$GITHUB_OUTPUT"
32
+
33
+ status=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/carl-agent-server/$version/json")
34
+ echo "PyPI lookup HTTP status: $status"
35
+ case "$status" in
36
+ 200)
37
+ echo "Version $version already on PyPI — nothing to publish."
38
+ echo "should_publish=false" >> "$GITHUB_OUTPUT"
39
+ ;;
40
+ 404)
41
+ echo "Version $version not on PyPI — will publish."
42
+ echo "should_publish=true" >> "$GITHUB_OUTPUT"
43
+ ;;
44
+ *)
45
+ echo "Unexpected status $status from PyPI; refusing to guess." >&2
46
+ exit 1
47
+ ;;
48
+ esac
49
+
50
+ release:
51
+ needs: check-version
52
+ if: needs.check-version.outputs.should_publish == 'true'
53
+ runs-on: ubuntu-latest
54
+ permissions:
55
+ contents: write # push the v<version> tag
56
+ steps:
57
+ - uses: actions/checkout@v5
58
+ with:
59
+ ref: ${{ github.event.workflow_run.head_sha }}
60
+
61
+ - name: Set up Python
62
+ uses: actions/setup-python@v6
63
+ with:
64
+ python-version: "3.12"
65
+
66
+ - name: Install build tooling
67
+ run: python -m pip install --upgrade build
68
+
69
+ - name: Build sdist + wheel
70
+ run: python -m build
71
+
72
+ - name: Publish to PyPI
73
+ uses: pypa/gh-action-pypi-publish@release/v1
74
+ with:
75
+ password: ${{ secrets.PYPI_API_TOKEN }}
76
+
77
+ - name: Tag the release
78
+ run: |
79
+ version="${{ needs.check-version.outputs.version }}"
80
+ if git ls-remote --exit-code --tags origin "refs/tags/v$version" >/dev/null 2>&1; then
81
+ echo "Tag v$version already exists — skipping."
82
+ else
83
+ git tag "v$version"
84
+ git push origin "v$version"
85
+ fi
@@ -0,0 +1,26 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+
8
+ # Virtual environments
9
+ .venv/
10
+ venv/
11
+
12
+ # Tooling caches
13
+ .pytest_cache/
14
+ .ruff_cache/
15
+ .mypy_cache/
16
+ .coverage
17
+
18
+ # IDE / OS
19
+ .vscode/
20
+ .idea/
21
+ .DS_Store
22
+ __MACOSX/
23
+
24
+ # Local state
25
+ *.log
26
+ agent-hub.json
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 AIRI Institute
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: carl-agent-server
3
+ Version: 0.1.0
4
+ Summary: Serve CARL reasoning chains as HTTP agents (FastAPI) — per-agent Swagger, hub with hot-reload from gigaevo Memory
5
+ Project-URL: Homepage, https://github.com/pyshka501/carl-agent-server
6
+ Project-URL: Repository, https://github.com/pyshka501/carl-agent-server
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: agent,carl,deployment,fastapi,reasoning
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Software Development
17
+ Classifier: Typing :: Typed
18
+ Requires-Python: >=3.12
19
+ Requires-Dist: fastapi>=0.115
20
+ Requires-Dist: gigaevo-client>=0.3.0
21
+ Requires-Dist: httpx>=0.27
22
+ Requires-Dist: mmar-carl>=0.3.0
23
+ Requires-Dist: pydantic>=2.0
24
+ Requires-Dist: uvicorn>=0.30
25
+ Description-Content-Type: text/markdown
26
+
27
+ # carl-agent-server
28
+
29
+ Serve [CARL](https://github.com/Glazkoff/carl) reasoning chains as HTTP
30
+ **agents**. One chain = one agent: a FastAPI facade with `/invoke`, `/info`,
31
+ `/healthz` and its **own Swagger** (`/docs`) — the OpenAPI title, description
32
+ and version come from the chain's metadata, so the docs page reads as *that
33
+ agent's* documentation and users can try the agent right from `/docs`.
34
+
35
+ Part of the MMAR ecosystem: chains are authored in CARE / MAGE, stored &
36
+ versioned in **gigaevo Memory** (channels: `latest` → `stable`), executed by
37
+ [mmar-carl](https://pypi.org/project/mmar-carl/). In *attached* mode an agent
38
+ follows a Memory channel and **hot-reloads on `promote`/`pin`** — promote a
39
+ new version and the running agent picks it up with zero downtime; pin the old
40
+ version to roll back. A hot-reloaded version that fails preflight is rejected
41
+ and the healthy one keeps serving. Updates arrive via SSE push with a polling
42
+ safety net (see [Hot-reload](#hot-reload-attached-mode)).
43
+
44
+ ## Quickstart — the hub (recommended)
45
+
46
+ One lightweight process hosts N agents; each gets its own Swagger at
47
+ `/agents/<name>/docs`. Deploy/undeploy at runtime via the control API:
48
+
49
+ ```bash
50
+ uv sync --group dev
51
+
52
+ AGENT_LLM_API_KEY=sk-... AGENT_LLM_MODEL=openai/gpt-4o \
53
+ uv run carl-agent-hub serve --port 8080
54
+
55
+ # deploy a chain JSON from disk (offline source)
56
+ curl -X POST localhost:8080/deployments \
57
+ -H 'content-type: application/json' \
58
+ -d '{"name": "demo", "chain_file": "./chain.json"}'
59
+
60
+ # deploy a chain entity from Memory, following its stable channel
61
+ curl -X POST localhost:8080/deployments \
62
+ -H 'content-type: application/json' \
63
+ -d '{"name": "weather", "entity_id": "<chain-uuid>", "channel": "stable"}'
64
+
65
+ open http://localhost:8080/agents/demo/docs # try POST /invoke from Swagger
66
+ ```
67
+
68
+ Deployments persist to `--state-file` (default `~/.care/agent-hub.json`) and
69
+ are restored on restart; `--no-persist` disables that.
70
+
71
+ ### Hub control API
72
+
73
+ | Endpoint | Purpose |
74
+ |---|---|
75
+ | `GET /deployments` | list deployments (name, url, version, ready, runs) |
76
+ | `POST /deployments` | deploy (body = deployment spec; 409 duplicate, 422 unloadable) |
77
+ | `GET /deployments/{name}` | one deployment |
78
+ | `POST /deployments/{name}/reload` | re-fetch + preflight + swap now |
79
+ | `DELETE /deployments/{name}` | undeploy (unmounts the agent) |
80
+ | `GET /healthz` | hub liveness |
81
+
82
+ ## Solo mode (one agent = one process)
83
+
84
+ ```bash
85
+ AGENT_LLM_API_KEY=... AGENT_LLM_MODEL=openai/gpt-4o \
86
+ uv run carl-agent serve --chain-file ./chain.json --name demo --port 8001
87
+ # or attached: --entity-id <uuid> --channel stable
88
+ ```
89
+
90
+ ## Agent API (under `/agents/<name>` in the hub, or the root in solo mode)
91
+
92
+ | Endpoint | Purpose |
93
+ |---|---|
94
+ | `POST /invoke` | run the chain (`?mode=sync` default; `?mode=async` → 202 + run_id) |
95
+ | `POST /chat` | converse with the agent (`{message, session_id?}`); the dialogue so far is fed into the chain each turn — the chain is unchanged. Omit `session_id` to start a session (returned in the reply); sessions evict after the idle TTL |
96
+ | `GET /runs/{id}` | run status/result (answer, steps, tokens, time) |
97
+ | `POST /runs/{id}/input` | resume a run paused on a `human_input` step (status `waiting`) — `{value}`. Async-invoke flow: invoke `?mode=async`, poll until `waiting`, then provide input |
98
+ | `GET /runs/{id}/events` | SSE step stream (replays history, ends with `result`) |
99
+ | `DELETE /runs/{id}` | cooperative cancel of a running run |
100
+ | `GET /schedule` | the deployment's auto-invoke schedule + firing stats (D3) |
101
+ | `POST /schedule/trigger` | fire one scheduled run now (manual trigger) |
102
+ | `GET /metrics` | usage + cost: run counts, total tokens, total USD, budget + remaining (D4) |
103
+ | `GET /info` | agent card (name, version, channel, required tools, readiness) |
104
+ | `GET /healthz` / `GET /readyz` | liveness / readiness (with the reason when 503) |
105
+ | `GET /docs` | this agent's own Swagger |
106
+
107
+ ## Environment
108
+
109
+ | Variable | Purpose |
110
+ |---|---|
111
+ | `AGENT_LLM_API_KEY` / `AGENT_LLM_MODEL` / `AGENT_LLM_BASE_URL` | OpenAI-compatible LLM the chains run on |
112
+ | `AGENT_MEMORY_URL` / `AGENT_MEMORY_API_KEY` | gigaevo Memory (attached mode) |
113
+ | `AGENT_WEB_SEARCH_API_KEY` | enables the `web_search` builtin tool (Tavily) |
114
+
115
+ Per-deployment overrides (`llm_model`, `llm_api_key`, `memory_url`, …) exist on
116
+ the deployment spec, but prefer env vars: hub specs persist to the state file
117
+ verbatim, and secrets belong in the environment, not on disk.
118
+
119
+ ## Auth
120
+
121
+ Set a per-agent `api_key` on the deployment (CARE's `/deploy` generates one)
122
+ and `/invoke`, `/chat`, `/runs/*` require it via `X-API-Key: <key>` (or
123
+ `Authorization: Bearer <key>`); `/healthz`, `/readyz`, `/info`, `/docs` stay
124
+ open. Loopback requests (127.0.0.1/::1) skip the check unless
125
+ `auth_allow_localhost=false`. No `api_key` set → auth is off (localhost demo).
126
+ Solo: `carl-agent serve --api-key <key>` (or `AGENT_API_KEY`). The hub's
127
+ state file holds these keys and is written `chmod 600`.
128
+
129
+ ## Hot-reload (attached mode)
130
+
131
+ An attached agent follows its Memory channel through **two mechanisms**:
132
+
133
+ - **SSE watcher** (fast path): a `gigaevo_client` subscription to
134
+ `/v1/events/stream` filtered on the entity — a `promoted`/`pinned` event
135
+ triggers a reload within ~a second.
136
+ - **Poll fallback** (safety net): every `poll_fallback_s` (default **60s**,
137
+ `0` disables) the agent compares the channel's current `version_id` to the
138
+ serving one and reloads on drift.
139
+
140
+ The fallback exists because the SSE subscription can die **silently**:
141
+ `gigaevo_client` (≤0.3.0) retries a failing `/v1/events/stream` in a loop
142
+ without ever surfacing the error — e.g. when a stale Memory deployment routes
143
+ that path into the generic `/v1/{entity_type}/{entity_id}` handler (400), the
144
+ watcher looks armed but no event is ever delivered. With the fallback, a
145
+ missed promote is picked up within a minute instead of never; `POST
146
+ /deployments/{name}/reload` remains the immediate manual lever.
147
+
148
+ To check the live events endpoint a deployment is watching:
149
+ `curl -N <memory-url>/v1/events/stream` must hold the connection open and
150
+ print `entity_changed` events on promote — an instant JSON error means the
151
+ Memory deployment is broken/stale and only the poll fallback (and manual
152
+ reload) will move versions.
153
+
154
+ Both paths funnel into one swap-safe reload: fetch → parse → preflight → swap,
155
+ and a failed candidate never evicts the serving chain.
156
+
157
+ ## Timeouts
158
+
159
+ Two layers bound a run. `chain_timeout_s` (default 300s) is the agent's hard
160
+ wall-clock deadline for the whole run. `step_timeout_s` (default 60s) is a
161
+ default per-step timeout injected at load into any step the author left
162
+ unbounded — capped never to exceed the chain-level `timeout`, so it tightens
163
+ but never loosens authored intent. Together a single hung step fails fast at
164
+ the step level instead of burning the whole run budget.
165
+
166
+ ## Schedules
167
+
168
+ A deployment can carry a `schedule` (`{interval_s, input, enabled}`) and the
169
+ agent auto-invokes its chain on that cadence — the in-template scheduler,
170
+ lifecycle-bound (starts on activation, stops on shutdown; survives a single
171
+ run's failure, skips ticks while not ready). `GET /schedule` reports it,
172
+ `POST /schedule/trigger` fires one run now. For external cron/batch use
173
+ `care run` instead; an inbound HTTP trigger is just `POST /invoke`.
174
+
175
+ ## Cost & budgets
176
+
177
+ Set per-1k token prices on the deployment (`price_per_1k_input_usd`,
178
+ `price_per_1k_output_usd`) and each run's USD cost is computed from its token
179
+ usage and stamped on the run record; `GET /metrics` reports run counts, total
180
+ tokens and total spend. An optional `budget_usd` cap refuses further runs with
181
+ **402** once spent (needs pricing to take effect; the scheduler skips ticks
182
+ while over budget).
183
+
184
+ ## Tools
185
+
186
+ Deployed agents ship a **read-only** builtin tool set: `calculator`,
187
+ `current_datetime`, `fetch_url`, `http_request` (GET/HEAD only — mutating
188
+ methods raise) and `web_search` (when a key is configured). Mutating tools
189
+ (e.g. `run_python`) are deliberately not registered in deployments.
190
+
191
+ ## Development
192
+
193
+ ```bash
194
+ uv run pytest tests/ -q
195
+ uv run ruff check src/ tests/
196
+ uv run mypy src/
197
+ ```
198
+
199
+ Status: Phase A of the production-mode plan is nearly complete (agent core,
200
+ async/SSE runs, attached hot-reload, the hub, CLIs). Next: run-records to
201
+ Memory, then the CARE control-plane integration (`/deploy` from the TUI).
@@ -0,0 +1,175 @@
1
+ # carl-agent-server
2
+
3
+ Serve [CARL](https://github.com/Glazkoff/carl) reasoning chains as HTTP
4
+ **agents**. One chain = one agent: a FastAPI facade with `/invoke`, `/info`,
5
+ `/healthz` and its **own Swagger** (`/docs`) — the OpenAPI title, description
6
+ and version come from the chain's metadata, so the docs page reads as *that
7
+ agent's* documentation and users can try the agent right from `/docs`.
8
+
9
+ Part of the MMAR ecosystem: chains are authored in CARE / MAGE, stored &
10
+ versioned in **gigaevo Memory** (channels: `latest` → `stable`), executed by
11
+ [mmar-carl](https://pypi.org/project/mmar-carl/). In *attached* mode an agent
12
+ follows a Memory channel and **hot-reloads on `promote`/`pin`** — promote a
13
+ new version and the running agent picks it up with zero downtime; pin the old
14
+ version to roll back. A hot-reloaded version that fails preflight is rejected
15
+ and the healthy one keeps serving. Updates arrive via SSE push with a polling
16
+ safety net (see [Hot-reload](#hot-reload-attached-mode)).
17
+
18
+ ## Quickstart — the hub (recommended)
19
+
20
+ One lightweight process hosts N agents; each gets its own Swagger at
21
+ `/agents/<name>/docs`. Deploy/undeploy at runtime via the control API:
22
+
23
+ ```bash
24
+ uv sync --group dev
25
+
26
+ AGENT_LLM_API_KEY=sk-... AGENT_LLM_MODEL=openai/gpt-4o \
27
+ uv run carl-agent-hub serve --port 8080
28
+
29
+ # deploy a chain JSON from disk (offline source)
30
+ curl -X POST localhost:8080/deployments \
31
+ -H 'content-type: application/json' \
32
+ -d '{"name": "demo", "chain_file": "./chain.json"}'
33
+
34
+ # deploy a chain entity from Memory, following its stable channel
35
+ curl -X POST localhost:8080/deployments \
36
+ -H 'content-type: application/json' \
37
+ -d '{"name": "weather", "entity_id": "<chain-uuid>", "channel": "stable"}'
38
+
39
+ open http://localhost:8080/agents/demo/docs # try POST /invoke from Swagger
40
+ ```
41
+
42
+ Deployments persist to `--state-file` (default `~/.care/agent-hub.json`) and
43
+ are restored on restart; `--no-persist` disables that.
44
+
45
+ ### Hub control API
46
+
47
+ | Endpoint | Purpose |
48
+ |---|---|
49
+ | `GET /deployments` | list deployments (name, url, version, ready, runs) |
50
+ | `POST /deployments` | deploy (body = deployment spec; 409 duplicate, 422 unloadable) |
51
+ | `GET /deployments/{name}` | one deployment |
52
+ | `POST /deployments/{name}/reload` | re-fetch + preflight + swap now |
53
+ | `DELETE /deployments/{name}` | undeploy (unmounts the agent) |
54
+ | `GET /healthz` | hub liveness |
55
+
56
+ ## Solo mode (one agent = one process)
57
+
58
+ ```bash
59
+ AGENT_LLM_API_KEY=... AGENT_LLM_MODEL=openai/gpt-4o \
60
+ uv run carl-agent serve --chain-file ./chain.json --name demo --port 8001
61
+ # or attached: --entity-id <uuid> --channel stable
62
+ ```
63
+
64
+ ## Agent API (under `/agents/<name>` in the hub, or the root in solo mode)
65
+
66
+ | Endpoint | Purpose |
67
+ |---|---|
68
+ | `POST /invoke` | run the chain (`?mode=sync` default; `?mode=async` → 202 + run_id) |
69
+ | `POST /chat` | converse with the agent (`{message, session_id?}`); the dialogue so far is fed into the chain each turn — the chain is unchanged. Omit `session_id` to start a session (returned in the reply); sessions evict after the idle TTL |
70
+ | `GET /runs/{id}` | run status/result (answer, steps, tokens, time) |
71
+ | `POST /runs/{id}/input` | resume a run paused on a `human_input` step (status `waiting`) — `{value}`. Async-invoke flow: invoke `?mode=async`, poll until `waiting`, then provide input |
72
+ | `GET /runs/{id}/events` | SSE step stream (replays history, ends with `result`) |
73
+ | `DELETE /runs/{id}` | cooperative cancel of a running run |
74
+ | `GET /schedule` | the deployment's auto-invoke schedule + firing stats (D3) |
75
+ | `POST /schedule/trigger` | fire one scheduled run now (manual trigger) |
76
+ | `GET /metrics` | usage + cost: run counts, total tokens, total USD, budget + remaining (D4) |
77
+ | `GET /info` | agent card (name, version, channel, required tools, readiness) |
78
+ | `GET /healthz` / `GET /readyz` | liveness / readiness (with the reason when 503) |
79
+ | `GET /docs` | this agent's own Swagger |
80
+
81
+ ## Environment
82
+
83
+ | Variable | Purpose |
84
+ |---|---|
85
+ | `AGENT_LLM_API_KEY` / `AGENT_LLM_MODEL` / `AGENT_LLM_BASE_URL` | OpenAI-compatible LLM the chains run on |
86
+ | `AGENT_MEMORY_URL` / `AGENT_MEMORY_API_KEY` | gigaevo Memory (attached mode) |
87
+ | `AGENT_WEB_SEARCH_API_KEY` | enables the `web_search` builtin tool (Tavily) |
88
+
89
+ Per-deployment overrides (`llm_model`, `llm_api_key`, `memory_url`, …) exist on
90
+ the deployment spec, but prefer env vars: hub specs persist to the state file
91
+ verbatim, and secrets belong in the environment, not on disk.
92
+
93
+ ## Auth
94
+
95
+ Set a per-agent `api_key` on the deployment (CARE's `/deploy` generates one)
96
+ and `/invoke`, `/chat`, `/runs/*` require it via `X-API-Key: <key>` (or
97
+ `Authorization: Bearer <key>`); `/healthz`, `/readyz`, `/info`, `/docs` stay
98
+ open. Loopback requests (127.0.0.1/::1) skip the check unless
99
+ `auth_allow_localhost=false`. No `api_key` set → auth is off (localhost demo).
100
+ Solo: `carl-agent serve --api-key <key>` (or `AGENT_API_KEY`). The hub's
101
+ state file holds these keys and is written `chmod 600`.
102
+
103
+ ## Hot-reload (attached mode)
104
+
105
+ An attached agent follows its Memory channel through **two mechanisms**:
106
+
107
+ - **SSE watcher** (fast path): a `gigaevo_client` subscription to
108
+ `/v1/events/stream` filtered on the entity — a `promoted`/`pinned` event
109
+ triggers a reload within ~a second.
110
+ - **Poll fallback** (safety net): every `poll_fallback_s` (default **60s**,
111
+ `0` disables) the agent compares the channel's current `version_id` to the
112
+ serving one and reloads on drift.
113
+
114
+ The fallback exists because the SSE subscription can die **silently**:
115
+ `gigaevo_client` (≤0.3.0) retries a failing `/v1/events/stream` in a loop
116
+ without ever surfacing the error — e.g. when a stale Memory deployment routes
117
+ that path into the generic `/v1/{entity_type}/{entity_id}` handler (400), the
118
+ watcher looks armed but no event is ever delivered. With the fallback, a
119
+ missed promote is picked up within a minute instead of never; `POST
120
+ /deployments/{name}/reload` remains the immediate manual lever.
121
+
122
+ To check the live events endpoint a deployment is watching:
123
+ `curl -N <memory-url>/v1/events/stream` must hold the connection open and
124
+ print `entity_changed` events on promote — an instant JSON error means the
125
+ Memory deployment is broken/stale and only the poll fallback (and manual
126
+ reload) will move versions.
127
+
128
+ Both paths funnel into one swap-safe reload: fetch → parse → preflight → swap,
129
+ and a failed candidate never evicts the serving chain.
130
+
131
+ ## Timeouts
132
+
133
+ Two layers bound a run. `chain_timeout_s` (default 300s) is the agent's hard
134
+ wall-clock deadline for the whole run. `step_timeout_s` (default 60s) is a
135
+ default per-step timeout injected at load into any step the author left
136
+ unbounded — capped never to exceed the chain-level `timeout`, so it tightens
137
+ but never loosens authored intent. Together a single hung step fails fast at
138
+ the step level instead of burning the whole run budget.
139
+
140
+ ## Schedules
141
+
142
+ A deployment can carry a `schedule` (`{interval_s, input, enabled}`) and the
143
+ agent auto-invokes its chain on that cadence — the in-template scheduler,
144
+ lifecycle-bound (starts on activation, stops on shutdown; survives a single
145
+ run's failure, skips ticks while not ready). `GET /schedule` reports it,
146
+ `POST /schedule/trigger` fires one run now. For external cron/batch use
147
+ `care run` instead; an inbound HTTP trigger is just `POST /invoke`.
148
+
149
+ ## Cost & budgets
150
+
151
+ Set per-1k token prices on the deployment (`price_per_1k_input_usd`,
152
+ `price_per_1k_output_usd`) and each run's USD cost is computed from its token
153
+ usage and stamped on the run record; `GET /metrics` reports run counts, total
154
+ tokens and total spend. An optional `budget_usd` cap refuses further runs with
155
+ **402** once spent (needs pricing to take effect; the scheduler skips ticks
156
+ while over budget).
157
+
158
+ ## Tools
159
+
160
+ Deployed agents ship a **read-only** builtin tool set: `calculator`,
161
+ `current_datetime`, `fetch_url`, `http_request` (GET/HEAD only — mutating
162
+ methods raise) and `web_search` (when a key is configured). Mutating tools
163
+ (e.g. `run_python`) are deliberately not registered in deployments.
164
+
165
+ ## Development
166
+
167
+ ```bash
168
+ uv run pytest tests/ -q
169
+ uv run ruff check src/ tests/
170
+ uv run mypy src/
171
+ ```
172
+
173
+ Status: Phase A of the production-mode plan is nearly complete (agent core,
174
+ async/SSE runs, attached hot-reload, the hub, CLIs). Next: run-records to
175
+ Memory, then the CARE control-plane integration (`/deploy` from the TUI).
@@ -0,0 +1,66 @@
1
+ [project]
2
+ name = "carl-agent-server"
3
+ version = "0.1.0"
4
+ description = "Serve CARL reasoning chains as HTTP agents (FastAPI) — per-agent Swagger, hub with hot-reload from gigaevo Memory"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.12"
8
+ keywords = ["carl", "agent", "fastapi", "reasoning", "deployment"]
9
+ classifiers = [
10
+ "Development Status :: 3 - Alpha",
11
+ "Intended Audience :: Developers",
12
+ "Programming Language :: Python :: 3",
13
+ "Programming Language :: Python :: 3 :: Only",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Programming Language :: Python :: 3.13",
16
+ "Topic :: Software Development",
17
+ "Typing :: Typed",
18
+ ]
19
+ dependencies = [
20
+ "fastapi>=0.115",
21
+ "uvicorn>=0.30",
22
+ "pydantic>=2.0",
23
+ "httpx>=0.27",
24
+ "mmar-carl>=0.3.0",
25
+ "gigaevo-client>=0.3.0",
26
+ ]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/pyshka501/carl-agent-server"
30
+ Repository = "https://github.com/pyshka501/carl-agent-server"
31
+
32
+ [project.scripts]
33
+ carl-agent = "carl_agent_server.cli:main"
34
+ carl-agent-hub = "carl_agent_server.cli:hub_main"
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "pytest>=8.2",
39
+ "pytest-asyncio>=1.0.0",
40
+ "ruff>=0.4",
41
+ "mypy>=1.10",
42
+ ]
43
+
44
+ [build-system]
45
+ requires = ["hatchling"]
46
+ build-backend = "hatchling.build"
47
+
48
+ [tool.hatch.build.targets.wheel]
49
+ packages = ["src/carl_agent_server"]
50
+
51
+ [tool.pytest.ini_options]
52
+ testpaths = ["tests"]
53
+ asyncio_mode = "auto"
54
+
55
+ [tool.ruff]
56
+ line-length = 120
57
+ target-version = "py312"
58
+
59
+ [tool.ruff.lint]
60
+ select = ["E", "F", "I", "N", "W", "UP", "B"]
61
+ ignore = ["E501"]
62
+
63
+ [tool.mypy]
64
+ python_version = "3.12"
65
+ ignore_missing_imports = true
66
+ check_untyped_defs = true
@@ -0,0 +1,38 @@
1
+ """carl-agent-server — serve CARL reasoning chains as HTTP agents.
2
+
3
+ One chain = one agent: a FastAPI facade with /invoke, /info, /healthz and its
4
+ OWN /docs (OpenAPI metadata is taken from the chain's Memory entity, so the
5
+ Swagger page reads as that agent's documentation). Agents are served solo
6
+ (`carl-agent serve`) or mounted together in the hub (`/agents/<name>/…`).
7
+ """
8
+
9
+ from .app import build_agent_app
10
+ from .hub import build_hub_app
11
+ from .models import (
12
+ AgentInfo,
13
+ ChatRequest,
14
+ ChatResponse,
15
+ DeploymentInfo,
16
+ DeploymentSpec,
17
+ HumanInputRequest,
18
+ InvokeRequest,
19
+ RunRecord,
20
+ ScheduleConfig,
21
+ )
22
+
23
+ __version__ = "0.1.0"
24
+
25
+ __all__ = [
26
+ "AgentInfo",
27
+ "ChatRequest",
28
+ "ChatResponse",
29
+ "DeploymentInfo",
30
+ "DeploymentSpec",
31
+ "HumanInputRequest",
32
+ "InvokeRequest",
33
+ "RunRecord",
34
+ "ScheduleConfig",
35
+ "__version__",
36
+ "build_agent_app",
37
+ "build_hub_app",
38
+ ]