openrtc 0.1.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openrtc-0.1.0 → openrtc-0.2.2}/PKG-INFO +67 -1
- {openrtc-0.1.0 → openrtc-0.2.2}/README.md +65 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/changelog.md +24 -0
- openrtc-0.2.2/examples/density_demo.py +163 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/pyproject.toml +1 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/__init__.py +3 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/execution/coroutine.py +110 -13
- openrtc-0.2.2/src/openrtc/execution/file_watcher.py +472 -0
- openrtc-0.2.2/tests/benchmarks/throughput.py +384 -0
- openrtc-0.2.2/tests/execution/test_file_watcher.py +623 -0
- openrtc-0.2.2/tests/execution/test_file_watcher_smoke.py +66 -0
- openrtc-0.2.2/tests/integration/__init__.py +0 -0
- openrtc-0.2.2/tests/integration/test_coroutine_realroom.py +200 -0
- openrtc-0.2.2/tests/test_coroutine_job_context.py +90 -0
- openrtc-0.2.2/tests/test_coroutine_lifecycle.py +231 -0
- openrtc-0.2.2/tests/test_throughput_bench.py +42 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/uv.lock +2 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.coderabbit.yaml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.editorconfig +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.env.example +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/FUNDING.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/dependabot.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/audit.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/bench.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/build.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/canary.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/deploy-docs.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/docs.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/integration.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/lint.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/publish.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.github/workflows/test.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.gitignore +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/.pre-commit-config.yaml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/AGENTS.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/CLAUDE.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/CONTRIBUTING.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/LICENSE +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/Makefile +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/assets/banner.png +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/assets/logo.png +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/codecov.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docker-compose.test.yml +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/.vitepress/config.ts +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/.vitepress/theme/custom.css +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/.vitepress/theme/index.ts +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/api/pool.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/audit-2026-05-02.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/benchmarks/density-v0.1.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/cli.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/concepts/architecture.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/deployment/github-pages.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/design/agent-server-integration.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/design/job-executor-protocol.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/design/proc-pool-surface.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/design/v0.1.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/examples.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/getting-started.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/index.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/package-lock.json +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/package.json +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/public/banner.png +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/public/logo.png +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/public/logo.svg +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/docs/release-v0.1.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/agents/dental.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/agents/restaurant.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/.dockerignore +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/.env.example +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/.gitignore +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/Dockerfile +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/README.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/app.css +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/components/agents-ui/agent-audio-visualizer-wave.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/components/agents-ui/agent-chat-transcript.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/components/agents-ui/agent-session-provider.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/components/demo-call-page.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/root.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/routes/api.token.ts +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/routes/dentist.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/routes/home.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/routes/restaurant.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/routes.ts +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/welcome/logo-dark.svg +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/welcome/logo-light.svg +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/app/welcome/welcome.tsx +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/package-lock.json +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/package.json +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/public/favicon.ico +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/react-router.config.ts +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/tsconfig.json +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/frontend/vite.config.ts +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/examples/main.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/commands.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/dashboard.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/entry.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/livekit.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/params.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/reporter.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/cli/types.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/config.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/discovery.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/pool.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/routing.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/serialization.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/core/turn_handling.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/execution/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/execution/coroutine_server.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/execution/prewarm.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/observability/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/observability/metrics.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/observability/snapshot.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/observability/stream.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/py.typed +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/tui/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/tui/app.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/src/openrtc/types.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/benchmarks/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/benchmarks/density.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/conftest.py +0 -0
- {openrtc-0.1.0/tests/integration → openrtc-0.2.2/tests/execution}/__init__.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/integration/README.md +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/integration/conftest.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/integration/test_concurrent_real_calls.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/integration/test_dev_server_fixture.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_cli.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_cli_optional_extra_integration.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_cli_params.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_config.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_backpressure.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_coverage.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_drain.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_isolation.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_server.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_skeleton.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_coroutine_smoke.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_dashboard.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_discovery.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_isolation_process_parity.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_metrics_stream.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_pool.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_resources.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_routing.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_serialization.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_tui_app.py +0 -0
- {openrtc-0.1.0 → openrtc-0.2.2}/tests/test_turn_handling.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openrtc
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Run multiple LiveKit voice agents in a single shared worker process.
|
|
5
5
|
Project-URL: Homepage, https://github.com/mahimailabs/openrtc
|
|
6
6
|
Project-URL: Repository, https://github.com/mahimailabs/openrtc
|
|
@@ -20,6 +20,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
|
20
20
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
21
|
Requires-Python: <3.14,>=3.11
|
|
22
22
|
Requires-Dist: livekit-agents[openai,silero,turn-detector]~=1.5
|
|
23
|
+
Requires-Dist: watchfiles<2,>=0.21
|
|
23
24
|
Provides-Extra: cli
|
|
24
25
|
Requires-Dist: rich>=13; extra == 'cli'
|
|
25
26
|
Requires-Dist: typer>=0.12; extra == 'cli'
|
|
@@ -265,6 +266,71 @@ footprint. Validate against the §8.4 real-LiveKit integration test
|
|
|
265
266
|
`OPENAI_API_KEY`) before quoting a per-session memory number to your
|
|
266
267
|
operators.
|
|
267
268
|
|
|
269
|
+
### Throughput: steady-state event-loop p99
|
|
270
|
+
|
|
271
|
+
Memory density is only half the question. N sessions share one event loop and
|
|
272
|
+
one GIL, so the other half is whether the loop keeps up.
|
|
273
|
+
`tests/benchmarks/throughput.py` drives N concurrent sessions through the real
|
|
274
|
+
Silero VAD over synthetic 16 kHz PCM at 50 fps (the continuous on-loop CPU cost)
|
|
275
|
+
and measures event-loop p99 latency, separating the startup burst from steady
|
|
276
|
+
state.
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
uv run python tests/benchmarks/throughput.py --sessions 1,10,25,50,100
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Sample sweep (Apple M-series laptop, `vad` workload, steady state):
|
|
283
|
+
|
|
284
|
+
| Sessions | steady-state loop p99 | peak RSS |
|
|
285
|
+
| ---: | ---: | ---: |
|
|
286
|
+
| 1 | 0.9 ms | 160 MB |
|
|
287
|
+
| 10 | 1.3 ms | 160 MB |
|
|
288
|
+
| 25 | 1.2 ms | 160 MB |
|
|
289
|
+
| 50 | 1.1 ms | 160 MB |
|
|
290
|
+
| 100 | 2.8 ms | 160 MB |
|
|
291
|
+
|
|
292
|
+
Steady-state VAD inference stays well under a 100 ms loop-latency budget to 100
|
|
293
|
+
sessions, with flat resident memory (the model loads once). The expensive,
|
|
294
|
+
bursty part is session *startup* (each `session.start()` plus greeting), which
|
|
295
|
+
the benchmark reports as a separate `startup_p99` column and which dominates
|
|
296
|
+
early-life latency. This workload models the continuous VAD path, not the full
|
|
297
|
+
STT/LLM/TTS orchestration, so read it as the on-loop-CPU ceiling rather than a
|
|
298
|
+
full-pipeline guarantee. Run it on your own hardware before quoting a
|
|
299
|
+
sessions-per-worker number.
|
|
300
|
+
|
|
301
|
+
### Prove it on your machine
|
|
302
|
+
|
|
303
|
+
The process column above is estimated. This script measures both models for
|
|
304
|
+
real on your laptop: it spawns one subprocess per session for the
|
|
305
|
+
process-per-session model, runs the same number of sessions as `asyncio`
|
|
306
|
+
tasks in a single process for the coroutine model, then prints the memory
|
|
307
|
+
used each way. No LiveKit server, no API keys, no model download.
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
uv run python examples/density_demo.py # 16 sessions
|
|
311
|
+
uv run python examples/density_demo.py --sessions 32 # the gap widens with N
|
|
312
|
+
uv run python examples/density_demo.py --sessions 50 --load-vad # adds the shared Silero VAD model
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
Sample output (Apple M-series laptop, import-only mode):
|
|
316
|
+
|
|
317
|
+
```text
|
|
318
|
+
Hosting 16 concurrent voice sessions. Measuring resident memory.
|
|
319
|
+
|
|
320
|
+
livekit-agents (process per session): 1861 MB total ( 116.3 MB/session)
|
|
321
|
+
OpenRTC coroutine pool (one process): 195 MB total ( 12.2 MB/session)
|
|
322
|
+
|
|
323
|
+
OpenRTC uses 9.5x less memory for the same 16 sessions.
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
Your numbers vary by machine, and the ratio grows as you raise `--sessions`
|
|
327
|
+
(the coroutine pool pays the import cost once and amortizes it across every
|
|
328
|
+
session). This default mode counts only the `livekit-agents` import cost, so
|
|
329
|
+
it is a conservative lower bound: `--load-vad` adds the shared Silero VAD
|
|
330
|
+
model weights (paid once in the pool, once per process otherwise), and
|
|
331
|
+
`tests/benchmarks/density.py --sessions 50` proves the 50-sessions-under-4-GB
|
|
332
|
+
ceiling. The full script is [examples/density_demo.py](examples/density_demo.py).
|
|
333
|
+
|
|
268
334
|
## Routing
|
|
269
335
|
|
|
270
336
|
One process hosts several agent classes, so each session must resolve to a single registered name. `AgentPool` resolves the agent in this order:
|
|
@@ -234,6 +234,71 @@ footprint. Validate against the §8.4 real-LiveKit integration test
|
|
|
234
234
|
`OPENAI_API_KEY`) before quoting a per-session memory number to your
|
|
235
235
|
operators.
|
|
236
236
|
|
|
237
|
+
### Throughput: steady-state event-loop p99
|
|
238
|
+
|
|
239
|
+
Memory density is only half the question. N sessions share one event loop and
|
|
240
|
+
one GIL, so the other half is whether the loop keeps up.
|
|
241
|
+
`tests/benchmarks/throughput.py` drives N concurrent sessions through the real
|
|
242
|
+
Silero VAD over synthetic 16 kHz PCM at 50 fps (the continuous on-loop CPU cost)
|
|
243
|
+
and measures event-loop p99 latency, separating the startup burst from steady
|
|
244
|
+
state.
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
uv run python tests/benchmarks/throughput.py --sessions 1,10,25,50,100
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
Sample sweep (Apple M-series laptop, `vad` workload, steady state):
|
|
251
|
+
|
|
252
|
+
| Sessions | steady-state loop p99 | peak RSS |
|
|
253
|
+
| ---: | ---: | ---: |
|
|
254
|
+
| 1 | 0.9 ms | 160 MB |
|
|
255
|
+
| 10 | 1.3 ms | 160 MB |
|
|
256
|
+
| 25 | 1.2 ms | 160 MB |
|
|
257
|
+
| 50 | 1.1 ms | 160 MB |
|
|
258
|
+
| 100 | 2.8 ms | 160 MB |
|
|
259
|
+
|
|
260
|
+
Steady-state VAD inference stays well under a 100 ms loop-latency budget to 100
|
|
261
|
+
sessions, with flat resident memory (the model loads once). The expensive,
|
|
262
|
+
bursty part is session *startup* (each `session.start()` plus greeting), which
|
|
263
|
+
the benchmark reports as a separate `startup_p99` column and which dominates
|
|
264
|
+
early-life latency. This workload models the continuous VAD path, not the full
|
|
265
|
+
STT/LLM/TTS orchestration, so read it as the on-loop-CPU ceiling rather than a
|
|
266
|
+
full-pipeline guarantee. Run it on your own hardware before quoting a
|
|
267
|
+
sessions-per-worker number.
|
|
268
|
+
|
|
269
|
+
### Prove it on your machine
|
|
270
|
+
|
|
271
|
+
The process column above is estimated. This script measures both models for
|
|
272
|
+
real on your laptop: it spawns one subprocess per session for the
|
|
273
|
+
process-per-session model, runs the same number of sessions as `asyncio`
|
|
274
|
+
tasks in a single process for the coroutine model, then prints the memory
|
|
275
|
+
used each way. No LiveKit server, no API keys, no model download.
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
uv run python examples/density_demo.py # 16 sessions
|
|
279
|
+
uv run python examples/density_demo.py --sessions 32 # the gap widens with N
|
|
280
|
+
uv run python examples/density_demo.py --sessions 50 --load-vad # adds the shared Silero VAD model
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
Sample output (Apple M-series laptop, import-only mode):
|
|
284
|
+
|
|
285
|
+
```text
|
|
286
|
+
Hosting 16 concurrent voice sessions. Measuring resident memory.
|
|
287
|
+
|
|
288
|
+
livekit-agents (process per session): 1861 MB total ( 116.3 MB/session)
|
|
289
|
+
OpenRTC coroutine pool (one process): 195 MB total ( 12.2 MB/session)
|
|
290
|
+
|
|
291
|
+
OpenRTC uses 9.5x less memory for the same 16 sessions.
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
Your numbers vary by machine, and the ratio grows as you raise `--sessions`
|
|
295
|
+
(the coroutine pool pays the import cost once and amortizes it across every
|
|
296
|
+
session). This default mode counts only the `livekit-agents` import cost, so
|
|
297
|
+
it is a conservative lower bound: `--load-vad` adds the shared Silero VAD
|
|
298
|
+
model weights (paid once in the pool, once per process otherwise), and
|
|
299
|
+
`tests/benchmarks/density.py --sessions 50` proves the 50-sessions-under-4-GB
|
|
300
|
+
ceiling. The full script is [examples/density_demo.py](examples/density_demo.py).
|
|
301
|
+
|
|
237
302
|
## Routing
|
|
238
303
|
|
|
239
304
|
One process hosts several agent classes, so each session must resolve to a single registered name. `AgentPool` resolves the agent in this order:
|
|
@@ -147,6 +147,30 @@ contributor onboarding matches what's in the repo.
|
|
|
147
147
|
|
|
148
148
|
<!-- releases -->
|
|
149
149
|
|
|
150
|
+
## [0.2.1] - 2026-05-06
|
|
151
|
+
|
|
152
|
+
## What's Changed
|
|
153
|
+
* [v0.2.1] File watcher infrastructure for agent code (MAH-80) by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/39
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
**Full Changelog**: https://github.com/mahimailabs/openrtc-runtime/compare/v0.1.0...v0.2.1
|
|
157
|
+
|
|
158
|
+
---
|
|
159
|
+
|
|
160
|
+
## [0.1.0] - 2026-05-06
|
|
161
|
+
|
|
162
|
+
## What's Changed
|
|
163
|
+
* Feat: light websocket by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/30
|
|
164
|
+
* docs: bring docs/ in sync with v0.1 surface by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/35
|
|
165
|
+
* Feat: struc refac by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/36
|
|
166
|
+
* Feat/coroutine pool by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/37
|
|
167
|
+
* Feat/coroutine pool prod by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/38
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
**Full Changelog**: https://github.com/mahimailabs/openrtc-runtime/compare/v0.0.17...v0.1.0
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
150
174
|
## [0.0.17] - 2026-04-03
|
|
151
175
|
|
|
152
176
|
## What's Changed
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Prove the OpenRTC density win, on one laptop, with real numbers.
|
|
2
|
+
|
|
3
|
+
The claim: livekit-agents runs roughly one OS process per session (about
|
|
4
|
+
3 GB each in production). OpenRTC's coroutine pool runs N sessions as
|
|
5
|
+
asyncio tasks inside a single process, so the heavy per-process cost
|
|
6
|
+
(Python interpreter, the livekit-agents import graph, and shared models
|
|
7
|
+
like Silero VAD and the turn detector) is paid ONCE instead of N times.
|
|
8
|
+
|
|
9
|
+
This script measures both models for real:
|
|
10
|
+
|
|
11
|
+
* "process-per-session" (what vanilla livekit-agents does):
|
|
12
|
+
spawn N subprocesses, each imports the agent stack and holds a
|
|
13
|
+
per-session buffer. We sum the resident memory across all of them.
|
|
14
|
+
|
|
15
|
+
* "OpenRTC coroutine pool" (the default isolation mode):
|
|
16
|
+
import the stack ONCE, run N asyncio sessions in this single process,
|
|
17
|
+
each holding the same per-session buffer. We read this process's
|
|
18
|
+
resident memory.
|
|
19
|
+
|
|
20
|
+
Then it prints total memory each way, memory per session, and the ratio.
|
|
21
|
+
No LiveKit server, no network, no model download required.
|
|
22
|
+
|
|
23
|
+
Run it:
|
|
24
|
+
|
|
25
|
+
uv run python examples/density_demo.py # N = 16
|
|
26
|
+
uv run python examples/density_demo.py --sessions 32
|
|
27
|
+
uv run python examples/density_demo.py --sessions 50 --load-vad
|
|
28
|
+
|
|
29
|
+
Use --load-vad to also load the real Silero VAD in every worker (the model
|
|
30
|
+
livekit-agents would load per process and OpenRTC shares). It downloads
|
|
31
|
+
ONNX weights on first run, then makes the gap even wider.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import argparse
|
|
37
|
+
import asyncio
|
|
38
|
+
import contextlib
|
|
39
|
+
import multiprocessing as mp
|
|
40
|
+
import os
|
|
41
|
+
import time
|
|
42
|
+
|
|
43
|
+
import psutil
|
|
44
|
+
|
|
45
|
+
# Stand-in for one session's live audio plus conversation state. The real
|
|
46
|
+
# per-session cost is dominated by the shared-vs-per-process fixed cost, so
|
|
47
|
+
# the exact buffer size is not load-bearing; it just keeps each session honest.
|
|
48
|
+
_SESSION_BUFFER_MB = 5
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _import_stack(load_vad: bool) -> None:
|
|
52
|
+
"""Pay the per-process import cost that livekit-agents incurs per session."""
|
|
53
|
+
import livekit.agents # noqa: F401 (the real wheel, ~150 MB resident)
|
|
54
|
+
|
|
55
|
+
import openrtc # noqa: F401
|
|
56
|
+
|
|
57
|
+
if load_vad:
|
|
58
|
+
# The shared model OpenRTC loads once in prewarm and livekit-agents
|
|
59
|
+
# loads in every worker process. Widens the gap; needs a one-time
|
|
60
|
+
# weights download.
|
|
61
|
+
from livekit.plugins import silero
|
|
62
|
+
|
|
63
|
+
silero.VAD.load()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _process_worker(ready: object, stop: object, load_vad: bool) -> None:
|
|
67
|
+
"""One subprocess == one session, the livekit-agents process-per-job model."""
|
|
68
|
+
_import_stack(load_vad)
|
|
69
|
+
_buffer = bytearray(_SESSION_BUFFER_MB * 1024 * 1024) # noqa: F841
|
|
70
|
+
ready.set() # type: ignore[attr-defined]
|
|
71
|
+
stop.wait() # type: ignore[attr-defined] hold the buffer until measured
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def measure_process_model(sessions: int, load_vad: bool) -> float:
|
|
75
|
+
"""Sum resident memory of N independent worker processes (MB)."""
|
|
76
|
+
# "spawn" matches LiveKit's default executor on macOS, so each child pays
|
|
77
|
+
# the full fresh-interpreter import cost, exactly as in production.
|
|
78
|
+
ctx = mp.get_context("spawn")
|
|
79
|
+
ready_events = [ctx.Event() for _ in range(sessions)]
|
|
80
|
+
stop_event = ctx.Event()
|
|
81
|
+
procs = [
|
|
82
|
+
ctx.Process(
|
|
83
|
+
target=_process_worker, args=(ready_events[i], stop_event, load_vad)
|
|
84
|
+
)
|
|
85
|
+
for i in range(sessions)
|
|
86
|
+
]
|
|
87
|
+
for p in procs:
|
|
88
|
+
p.start()
|
|
89
|
+
for ev in ready_events:
|
|
90
|
+
ev.wait(timeout=120) # every worker finished importing + allocated
|
|
91
|
+
|
|
92
|
+
time.sleep(0.5) # let resident memory settle
|
|
93
|
+
total_bytes = 0
|
|
94
|
+
for p in procs:
|
|
95
|
+
with contextlib.suppress(
|
|
96
|
+
psutil.NoSuchProcess
|
|
97
|
+
): # a worker may have exited early
|
|
98
|
+
total_bytes += psutil.Process(p.pid).memory_info().rss
|
|
99
|
+
|
|
100
|
+
stop_event.set()
|
|
101
|
+
for p in procs:
|
|
102
|
+
p.join()
|
|
103
|
+
return total_bytes / (1024 * 1024)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def measure_coroutine_model(sessions: int, load_vad: bool) -> float:
|
|
107
|
+
"""Resident memory of ONE process hosting N asyncio sessions (MB)."""
|
|
108
|
+
_import_stack(load_vad) # paid once, in this process
|
|
109
|
+
|
|
110
|
+
async def _session() -> None:
|
|
111
|
+
_buffer = bytearray(_SESSION_BUFFER_MB * 1024 * 1024)
|
|
112
|
+
try:
|
|
113
|
+
await asyncio.sleep(3600) # stay alive until measured
|
|
114
|
+
finally:
|
|
115
|
+
del _buffer
|
|
116
|
+
|
|
117
|
+
tasks = [asyncio.create_task(_session()) for _ in range(sessions)]
|
|
118
|
+
await asyncio.sleep(0.5) # let all sessions allocate + settle
|
|
119
|
+
rss_mb = psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024)
|
|
120
|
+
|
|
121
|
+
for t in tasks:
|
|
122
|
+
t.cancel()
|
|
123
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
124
|
+
return rss_mb
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def main() -> None:
|
|
128
|
+
parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
|
|
129
|
+
parser.add_argument(
|
|
130
|
+
"--sessions", type=int, default=16, help="concurrent sessions (default 16)"
|
|
131
|
+
)
|
|
132
|
+
parser.add_argument(
|
|
133
|
+
"--load-vad",
|
|
134
|
+
action="store_true",
|
|
135
|
+
help="also load real Silero VAD in every worker",
|
|
136
|
+
)
|
|
137
|
+
args = parser.parse_args()
|
|
138
|
+
n = args.sessions
|
|
139
|
+
|
|
140
|
+
print(f"\nHosting {n} concurrent voice sessions. Measuring resident memory.\n")
|
|
141
|
+
|
|
142
|
+
# Process model first so this parent process stays light; the coroutine
|
|
143
|
+
# measurement then imports the stack into this same process on purpose.
|
|
144
|
+
process_mb = measure_process_model(n, args.load_vad)
|
|
145
|
+
coroutine_mb = asyncio.run(measure_coroutine_model(n, args.load_vad))
|
|
146
|
+
|
|
147
|
+
ratio = process_mb / coroutine_mb if coroutine_mb else float("inf")
|
|
148
|
+
print(
|
|
149
|
+
f" livekit-agents (process per session): {process_mb:8.0f} MB total "
|
|
150
|
+
f"({process_mb / n:6.1f} MB/session)"
|
|
151
|
+
)
|
|
152
|
+
print(
|
|
153
|
+
f" OpenRTC coroutine pool (one process): {coroutine_mb:8.0f} MB total "
|
|
154
|
+
f"({coroutine_mb / n:6.1f} MB/session)"
|
|
155
|
+
)
|
|
156
|
+
print(f"\n OpenRTC uses {ratio:.1f}x less memory for the same {n} sessions.\n")
|
|
157
|
+
print(" Same agent code, both ways. In OpenRTC you flip one argument:")
|
|
158
|
+
print(' AgentPool(isolation="process") # the left column above')
|
|
159
|
+
print(' AgentPool(isolation="coroutine") # the right column (default)\n')
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
if __name__ == "__main__":
|
|
163
|
+
main()
|
|
@@ -4,6 +4,7 @@ from importlib.metadata import PackageNotFoundError, version
|
|
|
4
4
|
|
|
5
5
|
from .core.config import AgentConfig, AgentDiscoveryConfig, agent_config
|
|
6
6
|
from .core.pool import AgentPool
|
|
7
|
+
from .execution.file_watcher import FileChange, FileWatcher
|
|
7
8
|
from .types import ProviderValue
|
|
8
9
|
|
|
9
10
|
try:
|
|
@@ -18,6 +19,8 @@ __all__ = [
|
|
|
18
19
|
"AgentConfig",
|
|
19
20
|
"AgentDiscoveryConfig",
|
|
20
21
|
"AgentPool",
|
|
22
|
+
"FileChange",
|
|
23
|
+
"FileWatcher",
|
|
21
24
|
"ProviderValue",
|
|
22
25
|
"__version__",
|
|
23
26
|
"agent_config",
|
|
@@ -14,6 +14,8 @@ Contracts derived from:
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
import contextlib
|
|
18
|
+
import contextvars
|
|
17
19
|
import inspect
|
|
18
20
|
import logging
|
|
19
21
|
import uuid
|
|
@@ -25,7 +27,7 @@ from livekit import rtc
|
|
|
25
27
|
from livekit.agents import JobContext, JobExecutorType, JobProcess, utils
|
|
26
28
|
from livekit.agents.ipc import inference_executor as inference_executor_mod
|
|
27
29
|
from livekit.agents.ipc.job_executor import JobStatus
|
|
28
|
-
from livekit.agents.job import RunningJobInfo
|
|
30
|
+
from livekit.agents.job import RunningJobInfo, _JobContextVar
|
|
29
31
|
|
|
30
32
|
if TYPE_CHECKING:
|
|
31
33
|
from livekit.agents.ipc.job_executor import JobExecutor
|
|
@@ -114,6 +116,7 @@ class CoroutineJobExecutor:
|
|
|
114
116
|
self._session_end_fnc = session_end_fnc
|
|
115
117
|
self._context_factory = context_factory
|
|
116
118
|
self._loop = loop
|
|
119
|
+
self._shutdown_fut: asyncio.Future[str] | None = None
|
|
117
120
|
|
|
118
121
|
@property
|
|
119
122
|
def id(self) -> str:
|
|
@@ -271,22 +274,79 @@ class CoroutineJobExecutor:
|
|
|
271
274
|
self._task = loop.create_task(self._run_entrypoint(ctx))
|
|
272
275
|
|
|
273
276
|
async def _run_entrypoint(self, ctx: JobContext) -> None:
|
|
277
|
+
"""Run the session lifecycle, mirroring upstream ``_run_job_task``.
|
|
278
|
+
|
|
279
|
+
Establishes the job context (so ``get_job_context()`` resolves inside
|
|
280
|
+
the entrypoint and the session), holds the session open until shutdown
|
|
281
|
+
is requested (room disconnect, ``ctx.shutdown()``, or an entrypoint
|
|
282
|
+
crash), then runs the teardown sequence. Every ``JobContext`` hook is
|
|
283
|
+
treated as optional so the executor still runs with the bare stub
|
|
284
|
+
contexts that unit tests and the density benchmark pass directly.
|
|
285
|
+
"""
|
|
274
286
|
assert self._entrypoint_fnc is not None # checked in launch_job
|
|
287
|
+
loop = asyncio.get_running_loop()
|
|
288
|
+
shutdown_fut: asyncio.Future[str] = loop.create_future()
|
|
289
|
+
self._shutdown_fut = shutdown_fut
|
|
290
|
+
|
|
291
|
+
def _request_shutdown(reason: str = "shutdown") -> None:
|
|
292
|
+
if not shutdown_fut.done():
|
|
293
|
+
shutdown_fut.set_result(reason)
|
|
294
|
+
|
|
295
|
+
# Per-job log fields, then the contextvar (the MAH-158 fix).
|
|
296
|
+
_on_setup = getattr(ctx, "_on_setup", None)
|
|
297
|
+
if callable(_on_setup):
|
|
298
|
+
_on_setup()
|
|
299
|
+
token: contextvars.Token[JobContext] | None = None
|
|
300
|
+
with contextlib.suppress(Exception):
|
|
301
|
+
token = _JobContextVar.set(ctx)
|
|
302
|
+
|
|
303
|
+
# Shutdown triggers (all optional for stub contexts): ctx.shutdown()
|
|
304
|
+
# via on_shutdown, and the room "disconnected" event (mirrors
|
|
305
|
+
# job_proc_lazy_main's room-disconnected handler).
|
|
306
|
+
if hasattr(ctx, "_on_shutdown"):
|
|
307
|
+
|
|
308
|
+
def _on_shutdown(reason: str = "") -> None:
|
|
309
|
+
_request_shutdown(reason or "shutdown")
|
|
310
|
+
|
|
311
|
+
ctx._on_shutdown = _on_shutdown
|
|
312
|
+
_room_on = getattr(getattr(ctx, "room", None), "on", None)
|
|
313
|
+
if callable(_room_on):
|
|
314
|
+
_room_on("disconnected", lambda *_a: _request_shutdown("room disconnected"))
|
|
315
|
+
|
|
275
316
|
try:
|
|
276
|
-
|
|
317
|
+
try:
|
|
318
|
+
await self._entrypoint_fnc(ctx)
|
|
319
|
+
except asyncio.CancelledError:
|
|
320
|
+
if self._status is JobStatus.RUNNING:
|
|
321
|
+
self._status = JobStatus.FAILED
|
|
322
|
+
raise
|
|
323
|
+
except Exception:
|
|
324
|
+
if self._status is JobStatus.RUNNING:
|
|
325
|
+
self._status = JobStatus.FAILED
|
|
326
|
+
logger.exception(
|
|
327
|
+
"entrypoint raised in CoroutineJobExecutor",
|
|
328
|
+
extra=self.logging_extra(),
|
|
329
|
+
)
|
|
330
|
+
return
|
|
331
|
+
# Entrypoint returned cleanly. Hold a real job open until the call
|
|
332
|
+
# ends (the MAH-160 fix), then run teardown. A setup-only entrypoint
|
|
333
|
+
# (no live session) or a fake job (simulate_job, which has no live
|
|
334
|
+
# room to disconnect) completes on return instead.
|
|
335
|
+
_is_fake = getattr(ctx, "is_fake_job", None)
|
|
336
|
+
fake_job = bool(_is_fake()) if callable(_is_fake) else False
|
|
337
|
+
if (
|
|
338
|
+
getattr(ctx, "_primary_agent_session", None) is not None
|
|
339
|
+
and not fake_job
|
|
340
|
+
):
|
|
341
|
+
try:
|
|
342
|
+
await shutdown_fut
|
|
343
|
+
except asyncio.CancelledError:
|
|
344
|
+
if self._status is JobStatus.RUNNING:
|
|
345
|
+
self._status = JobStatus.FAILED
|
|
346
|
+
raise
|
|
347
|
+
await self._teardown(ctx, shutdown_fut.result())
|
|
277
348
|
if self._status is JobStatus.RUNNING:
|
|
278
349
|
self._status = JobStatus.SUCCESS
|
|
279
|
-
except asyncio.CancelledError:
|
|
280
|
-
if self._status is JobStatus.RUNNING:
|
|
281
|
-
self._status = JobStatus.FAILED
|
|
282
|
-
raise
|
|
283
|
-
except Exception:
|
|
284
|
-
if self._status is JobStatus.RUNNING:
|
|
285
|
-
self._status = JobStatus.FAILED
|
|
286
|
-
logger.exception(
|
|
287
|
-
"entrypoint raised in CoroutineJobExecutor",
|
|
288
|
-
extra=self.logging_extra(),
|
|
289
|
-
)
|
|
290
350
|
finally:
|
|
291
351
|
if self._session_end_fnc is not None:
|
|
292
352
|
try:
|
|
@@ -296,6 +356,43 @@ class CoroutineJobExecutor:
|
|
|
296
356
|
"session_end_fnc raised in CoroutineJobExecutor",
|
|
297
357
|
extra=self.logging_extra(),
|
|
298
358
|
)
|
|
359
|
+
if token is not None:
|
|
360
|
+
with contextlib.suppress(Exception):
|
|
361
|
+
_JobContextVar.reset(token)
|
|
362
|
+
|
|
363
|
+
async def _teardown(self, ctx: JobContext, reason: str) -> None:
|
|
364
|
+
"""Run the post-shutdown lifecycle (mirrors upstream ``_run_job_task``).
|
|
365
|
+
|
|
366
|
+
Closes the primary ``AgentSession``, runs ``_on_session_end`` and the
|
|
367
|
+
registered shutdown callbacks, cancels pending tasks, and cleans up.
|
|
368
|
+
Every hook is optional so stub contexts in tests and benchmarks are
|
|
369
|
+
tolerated.
|
|
370
|
+
"""
|
|
371
|
+
primary = getattr(ctx, "_primary_agent_session", None)
|
|
372
|
+
if primary is not None and hasattr(primary, "aclose"):
|
|
373
|
+
with contextlib.suppress(Exception):
|
|
374
|
+
await primary.aclose()
|
|
375
|
+
_on_session_end = getattr(ctx, "_on_session_end", None)
|
|
376
|
+
if callable(_on_session_end):
|
|
377
|
+
with contextlib.suppress(Exception):
|
|
378
|
+
await _on_session_end()
|
|
379
|
+
for callback in list(getattr(ctx, "_shutdown_callbacks", None) or []):
|
|
380
|
+
try:
|
|
381
|
+
await callback(reason)
|
|
382
|
+
except Exception:
|
|
383
|
+
logger.exception(
|
|
384
|
+
"shutdown callback raised in CoroutineJobExecutor",
|
|
385
|
+
extra=self.logging_extra(),
|
|
386
|
+
)
|
|
387
|
+
pending = list(getattr(ctx, "_pending_tasks", None) or [])
|
|
388
|
+
if pending:
|
|
389
|
+
for task in pending:
|
|
390
|
+
task.cancel()
|
|
391
|
+
await asyncio.gather(*pending, return_exceptions=True)
|
|
392
|
+
_on_cleanup = getattr(ctx, "_on_cleanup", None)
|
|
393
|
+
if callable(_on_cleanup):
|
|
394
|
+
with contextlib.suppress(Exception):
|
|
395
|
+
_on_cleanup()
|
|
299
396
|
|
|
300
397
|
def logging_extra(self) -> dict[str, Any]:
|
|
301
398
|
return {"executor_id": self._id}
|