openrtc 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. {openrtc-0.2.1 → openrtc-0.2.3}/PKG-INFO +136 -1
  2. {openrtc-0.2.1 → openrtc-0.2.3}/README.md +135 -0
  3. {openrtc-0.2.1 → openrtc-0.2.3}/docs/changelog.md +33 -8
  4. openrtc-0.2.3/examples/density_demo.py +163 -0
  5. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/execution/coroutine.py +110 -13
  6. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/execution/prewarm.py +18 -0
  7. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/observability/metrics.py +37 -0
  8. openrtc-0.2.3/tests/benchmarks/throughput.py +384 -0
  9. openrtc-0.2.3/tests/integration/test_coroutine_realroom.py +200 -0
  10. openrtc-0.2.3/tests/test_coroutine_job_context.py +90 -0
  11. openrtc-0.2.3/tests/test_coroutine_lifecycle.py +231 -0
  12. openrtc-0.2.3/tests/test_savings_readout.py +76 -0
  13. openrtc-0.2.3/tests/test_throughput_bench.py +42 -0
  14. {openrtc-0.2.1 → openrtc-0.2.3}/.coderabbit.yaml +0 -0
  15. {openrtc-0.2.1 → openrtc-0.2.3}/.editorconfig +0 -0
  16. {openrtc-0.2.1 → openrtc-0.2.3}/.env.example +0 -0
  17. {openrtc-0.2.1 → openrtc-0.2.3}/.github/FUNDING.yml +0 -0
  18. {openrtc-0.2.1 → openrtc-0.2.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  19. {openrtc-0.2.1 → openrtc-0.2.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  20. {openrtc-0.2.1 → openrtc-0.2.3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  21. {openrtc-0.2.1 → openrtc-0.2.3}/.github/dependabot.yml +0 -0
  22. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/audit.yml +0 -0
  23. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/bench.yml +0 -0
  24. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/build.yml +0 -0
  25. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/canary.yml +0 -0
  26. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/deploy-docs.yml +0 -0
  27. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/docs.yml +0 -0
  28. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/integration.yml +0 -0
  29. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/lint.yml +0 -0
  30. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/publish.yml +0 -0
  31. {openrtc-0.2.1 → openrtc-0.2.3}/.github/workflows/test.yml +0 -0
  32. {openrtc-0.2.1 → openrtc-0.2.3}/.gitignore +0 -0
  33. {openrtc-0.2.1 → openrtc-0.2.3}/.pre-commit-config.yaml +0 -0
  34. {openrtc-0.2.1 → openrtc-0.2.3}/AGENTS.md +0 -0
  35. {openrtc-0.2.1 → openrtc-0.2.3}/CLAUDE.md +0 -0
  36. {openrtc-0.2.1 → openrtc-0.2.3}/CONTRIBUTING.md +0 -0
  37. {openrtc-0.2.1 → openrtc-0.2.3}/LICENSE +0 -0
  38. {openrtc-0.2.1 → openrtc-0.2.3}/Makefile +0 -0
  39. {openrtc-0.2.1 → openrtc-0.2.3}/assets/banner.png +0 -0
  40. {openrtc-0.2.1 → openrtc-0.2.3}/assets/logo.png +0 -0
  41. {openrtc-0.2.1 → openrtc-0.2.3}/codecov.yml +0 -0
  42. {openrtc-0.2.1 → openrtc-0.2.3}/docker-compose.test.yml +0 -0
  43. {openrtc-0.2.1 → openrtc-0.2.3}/docs/.vitepress/config.ts +0 -0
  44. {openrtc-0.2.1 → openrtc-0.2.3}/docs/.vitepress/theme/custom.css +0 -0
  45. {openrtc-0.2.1 → openrtc-0.2.3}/docs/.vitepress/theme/index.ts +0 -0
  46. {openrtc-0.2.1 → openrtc-0.2.3}/docs/api/pool.md +0 -0
  47. {openrtc-0.2.1 → openrtc-0.2.3}/docs/audit-2026-05-02.md +0 -0
  48. {openrtc-0.2.1 → openrtc-0.2.3}/docs/benchmarks/density-v0.1.md +0 -0
  49. {openrtc-0.2.1 → openrtc-0.2.3}/docs/cli.md +0 -0
  50. {openrtc-0.2.1 → openrtc-0.2.3}/docs/concepts/architecture.md +0 -0
  51. {openrtc-0.2.1 → openrtc-0.2.3}/docs/deployment/github-pages.md +0 -0
  52. {openrtc-0.2.1 → openrtc-0.2.3}/docs/design/agent-server-integration.md +0 -0
  53. {openrtc-0.2.1 → openrtc-0.2.3}/docs/design/job-executor-protocol.md +0 -0
  54. {openrtc-0.2.1 → openrtc-0.2.3}/docs/design/proc-pool-surface.md +0 -0
  55. {openrtc-0.2.1 → openrtc-0.2.3}/docs/design/v0.1.md +0 -0
  56. {openrtc-0.2.1 → openrtc-0.2.3}/docs/examples.md +0 -0
  57. {openrtc-0.2.1 → openrtc-0.2.3}/docs/getting-started.md +0 -0
  58. {openrtc-0.2.1 → openrtc-0.2.3}/docs/index.md +0 -0
  59. {openrtc-0.2.1 → openrtc-0.2.3}/docs/package-lock.json +0 -0
  60. {openrtc-0.2.1 → openrtc-0.2.3}/docs/package.json +0 -0
  61. {openrtc-0.2.1 → openrtc-0.2.3}/docs/public/banner.png +0 -0
  62. {openrtc-0.2.1 → openrtc-0.2.3}/docs/public/logo.png +0 -0
  63. {openrtc-0.2.1 → openrtc-0.2.3}/docs/public/logo.svg +0 -0
  64. {openrtc-0.2.1 → openrtc-0.2.3}/docs/release-v0.1.md +0 -0
  65. {openrtc-0.2.1 → openrtc-0.2.3}/examples/agents/dental.py +0 -0
  66. {openrtc-0.2.1 → openrtc-0.2.3}/examples/agents/restaurant.py +0 -0
  67. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/.dockerignore +0 -0
  68. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/.env.example +0 -0
  69. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/.gitignore +0 -0
  70. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/Dockerfile +0 -0
  71. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/README.md +0 -0
  72. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/app.css +0 -0
  73. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/components/agents-ui/agent-audio-visualizer-wave.tsx +0 -0
  74. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/components/agents-ui/agent-chat-transcript.tsx +0 -0
  75. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/components/agents-ui/agent-session-provider.tsx +0 -0
  76. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/components/demo-call-page.tsx +0 -0
  77. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/root.tsx +0 -0
  78. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/routes/api.token.ts +0 -0
  79. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/routes/dentist.tsx +0 -0
  80. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/routes/home.tsx +0 -0
  81. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/routes/restaurant.tsx +0 -0
  82. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/routes.ts +0 -0
  83. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/welcome/logo-dark.svg +0 -0
  84. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/welcome/logo-light.svg +0 -0
  85. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/app/welcome/welcome.tsx +0 -0
  86. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/package-lock.json +0 -0
  87. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/package.json +0 -0
  88. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/public/favicon.ico +0 -0
  89. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/react-router.config.ts +0 -0
  90. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/tsconfig.json +0 -0
  91. {openrtc-0.2.1 → openrtc-0.2.3}/examples/frontend/vite.config.ts +0 -0
  92. {openrtc-0.2.1 → openrtc-0.2.3}/examples/main.py +0 -0
  93. {openrtc-0.2.1 → openrtc-0.2.3}/pyproject.toml +0 -0
  94. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/__init__.py +0 -0
  95. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/__init__.py +0 -0
  96. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/commands.py +0 -0
  97. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/dashboard.py +0 -0
  98. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/entry.py +0 -0
  99. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/livekit.py +0 -0
  100. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/params.py +0 -0
  101. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/reporter.py +0 -0
  102. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/cli/types.py +0 -0
  103. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/__init__.py +0 -0
  104. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/config.py +0 -0
  105. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/discovery.py +0 -0
  106. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/pool.py +0 -0
  107. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/routing.py +0 -0
  108. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/serialization.py +0 -0
  109. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/core/turn_handling.py +0 -0
  110. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/execution/__init__.py +0 -0
  111. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/execution/coroutine_server.py +0 -0
  112. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/execution/file_watcher.py +0 -0
  113. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/observability/__init__.py +0 -0
  114. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/observability/snapshot.py +0 -0
  115. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/observability/stream.py +0 -0
  116. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/py.typed +0 -0
  117. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/tui/__init__.py +0 -0
  118. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/tui/app.py +0 -0
  119. {openrtc-0.2.1 → openrtc-0.2.3}/src/openrtc/types.py +0 -0
  120. {openrtc-0.2.1 → openrtc-0.2.3}/tests/benchmarks/__init__.py +0 -0
  121. {openrtc-0.2.1 → openrtc-0.2.3}/tests/benchmarks/density.py +0 -0
  122. {openrtc-0.2.1 → openrtc-0.2.3}/tests/conftest.py +0 -0
  123. {openrtc-0.2.1 → openrtc-0.2.3}/tests/execution/__init__.py +0 -0
  124. {openrtc-0.2.1 → openrtc-0.2.3}/tests/execution/test_file_watcher.py +0 -0
  125. {openrtc-0.2.1 → openrtc-0.2.3}/tests/execution/test_file_watcher_smoke.py +0 -0
  126. {openrtc-0.2.1 → openrtc-0.2.3}/tests/integration/README.md +0 -0
  127. {openrtc-0.2.1 → openrtc-0.2.3}/tests/integration/__init__.py +0 -0
  128. {openrtc-0.2.1 → openrtc-0.2.3}/tests/integration/conftest.py +0 -0
  129. {openrtc-0.2.1 → openrtc-0.2.3}/tests/integration/test_concurrent_real_calls.py +0 -0
  130. {openrtc-0.2.1 → openrtc-0.2.3}/tests/integration/test_dev_server_fixture.py +0 -0
  131. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_cli.py +0 -0
  132. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_cli_optional_extra_integration.py +0 -0
  133. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_cli_params.py +0 -0
  134. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_config.py +0 -0
  135. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_backpressure.py +0 -0
  136. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_coverage.py +0 -0
  137. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_drain.py +0 -0
  138. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_isolation.py +0 -0
  139. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_server.py +0 -0
  140. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_skeleton.py +0 -0
  141. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_coroutine_smoke.py +0 -0
  142. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_dashboard.py +0 -0
  143. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_discovery.py +0 -0
  144. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_isolation_process_parity.py +0 -0
  145. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_metrics_stream.py +0 -0
  146. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_pool.py +0 -0
  147. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_resources.py +0 -0
  148. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_routing.py +0 -0
  149. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_serialization.py +0 -0
  150. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_tui_app.py +0 -0
  151. {openrtc-0.2.1 → openrtc-0.2.3}/tests/test_turn_handling.py +0 -0
  152. {openrtc-0.2.1 → openrtc-0.2.3}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrtc
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Run multiple LiveKit voice agents in a single shared worker process.
5
5
  Project-URL: Homepage, https://github.com/mahimailabs/openrtc
6
6
  Project-URL: Repository, https://github.com/mahimailabs/openrtc
@@ -203,6 +203,76 @@ If a module has no `@agent_config`, the agent name defaults to the filename stem
203
203
 
204
204
  Discovered agents work with `livekit dev` and spawn-based workers on macOS. For `add()`, define agent classes at module scope so worker reload can import them.
205
205
 
206
+ ## Migrating from livekit-agents
207
+
208
+ Already running one or more `livekit-agents` workers? Each is its own process that
209
+ loads the same VAD and turn-detector models. Collapse them into one `AgentPool`
210
+ worker without changing your agents.
211
+
212
+ **Before** (one worker per agent, N processes):
213
+
214
+ ```python
215
+ # restaurant_worker.py (plus a near-identical dental_worker.py, support_worker.py, ...)
216
+ from livekit import agents
217
+ from livekit.agents import Agent, AgentSession
218
+ from livekit.plugins import openai, silero
219
+
220
+
221
+ class RestaurantAgent(Agent):
222
+ def __init__(self) -> None:
223
+ super().__init__(instructions="You help callers book tables.")
224
+
225
+
226
+ async def entrypoint(ctx: agents.JobContext) -> None:
227
+ session = AgentSession(
228
+ stt=openai.STT(), llm=openai.LLM(), tts=openai.TTS(), vad=silero.VAD.load()
229
+ )
230
+ await session.start(agent=RestaurantAgent(), room=ctx.room)
231
+ await ctx.connect()
232
+
233
+
234
+ if __name__ == "__main__":
235
+ agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))
236
+ ```
237
+
238
+ **After** (one worker, N agents, one shared prewarm):
239
+
240
+ ```python
241
+ # worker.py
242
+ from livekit.agents import Agent
243
+ from livekit.plugins import openai
244
+ from openrtc import AgentPool
245
+
246
+
247
+ class RestaurantAgent(Agent): # unchanged
248
+ def __init__(self) -> None:
249
+ super().__init__(instructions="You help callers book tables.")
250
+
251
+
252
+ class DentalAgent(Agent): # unchanged
253
+ def __init__(self) -> None:
254
+ super().__init__(instructions="You help callers manage appointments.")
255
+
256
+
257
+ pool = AgentPool(default_stt=openai.STT(), default_llm=openai.LLM(), default_tts=openai.TTS())
258
+ pool.add("restaurant", RestaurantAgent)
259
+ pool.add("dental", DentalAgent)
260
+ pool.run()
261
+ ```
262
+
263
+ Your `Agent` subclasses, tools, and provider objects are unchanged. You delete the
264
+ per-worker boilerplate (`entrypoint`, `AgentSession` wiring, `cli.run_app`) and
265
+ register the agents on one pool; OpenRTC owns prewarm, routing, and per-call
266
+ session construction. On the first run the worker logs the win, for example:
267
+
268
+ ```text
269
+ OpenRTC: 2 agents in 1 worker (baseline ~410 MB). 2 separate livekit-agents
270
+ workers would cost ~820 MB; sharing one worker saves ~410 MB of idle baseline
271
+ (assumes equal per-worker baselines).
272
+ ```
273
+
274
+ See [Routing](#routing) for how each incoming call resolves to one registered agent.
275
+
206
276
  ## Memory: before and after
207
277
 
208
278
  Assume an illustrative **~400 MB** idle baseline per worker for the shared stack (VAD, turn detector, and similar). Your measured RSS will differ by provider, model, and OS.
@@ -266,6 +336,71 @@ footprint. Validate against the §8.4 real-LiveKit integration test
266
336
  `OPENAI_API_KEY`) before quoting a per-session memory number to your
267
337
  operators.
268
338
 
339
+ ### Throughput: steady-state event-loop p99
340
+
341
+ Memory density is only half the question. N sessions share one event loop and
342
+ one GIL, so the other half is whether the loop keeps up.
343
+ `tests/benchmarks/throughput.py` drives N concurrent sessions through the real
344
+ Silero VAD over synthetic 16 kHz PCM at 50 fps (the continuous on-loop CPU cost)
345
+ and measures event-loop p99 latency, separating the startup burst from steady
346
+ state.
347
+
348
+ ```bash
349
+ uv run python tests/benchmarks/throughput.py --sessions 1,10,25,50,100
350
+ ```
351
+
352
+ Sample sweep (Apple M-series laptop, `vad` workload, steady state):
353
+
354
+ | Sessions | steady-state loop p99 | peak RSS |
355
+ | ---: | ---: | ---: |
356
+ | 1 | 0.9 ms | 160 MB |
357
+ | 10 | 1.3 ms | 160 MB |
358
+ | 25 | 1.2 ms | 160 MB |
359
+ | 50 | 1.1 ms | 160 MB |
360
+ | 100 | 2.8 ms | 160 MB |
361
+
362
+ Steady-state VAD inference stays well under a 100 ms loop-latency budget to 100
363
+ sessions, with flat resident memory (the model loads once). The expensive,
364
+ bursty part is session *startup* (each `session.start()` plus greeting), which
365
+ the benchmark reports as a separate `startup_p99` column and which dominates
366
+ early-life latency. This workload models the continuous VAD path, not the full
367
+ STT/LLM/TTS orchestration, so read it as the on-loop-CPU ceiling rather than a
368
+ full-pipeline guarantee. Run it on your own hardware before quoting a
369
+ sessions-per-worker number.
370
+
371
+ ### Prove it on your machine
372
+
373
+ The process column above is estimated. This script measures both models for
374
+ real on your laptop: it spawns one subprocess per session for the
375
+ process-per-session model, runs the same number of sessions as `asyncio`
376
+ tasks in a single process for the coroutine model, then prints the memory
377
+ used each way. No LiveKit server, no API keys, no model download.
378
+
379
+ ```bash
380
+ uv run python examples/density_demo.py # 16 sessions
381
+ uv run python examples/density_demo.py --sessions 32 # the gap widens with N
382
+ uv run python examples/density_demo.py --sessions 50 --load-vad # adds the shared Silero VAD model
383
+ ```
384
+
385
+ Sample output (Apple M-series laptop, import-only mode):
386
+
387
+ ```text
388
+ Hosting 16 concurrent voice sessions. Measuring resident memory.
389
+
390
+ livekit-agents (process per session): 1861 MB total ( 116.3 MB/session)
391
+ OpenRTC coroutine pool (one process): 195 MB total ( 12.2 MB/session)
392
+
393
+ OpenRTC uses 9.5x less memory for the same 16 sessions.
394
+ ```
395
+
396
+ Your numbers vary by machine, and the ratio grows as you raise `--sessions`
397
+ (the coroutine pool pays the import cost once and amortizes it across every
398
+ session). This default mode counts only the `livekit-agents` import cost, so
399
+ it is a conservative lower bound: `--load-vad` adds the shared Silero VAD
400
+ model weights (paid once in the pool, once per process otherwise), and
401
+ `tests/benchmarks/density.py --sessions 50` proves the 50-sessions-under-4-GB
402
+ ceiling. The full script is [examples/density_demo.py](examples/density_demo.py).
403
+
269
404
  ## Routing
270
405
 
271
406
  One process hosts several agent classes, so each session must resolve to a single registered name. `AgentPool` resolves the agent in this order:
@@ -171,6 +171,76 @@ If a module has no `@agent_config`, the agent name defaults to the filename stem
171
171
 
172
172
  Discovered agents work with `livekit dev` and spawn-based workers on macOS. For `add()`, define agent classes at module scope so worker reload can import them.
173
173
 
174
+ ## Migrating from livekit-agents
175
+
176
+ Already running one or more `livekit-agents` workers? Each is its own process that
177
+ loads the same VAD and turn-detector models. Collapse them into one `AgentPool`
178
+ worker without changing your agents.
179
+
180
+ **Before** (one worker per agent, N processes):
181
+
182
+ ```python
183
+ # restaurant_worker.py (plus a near-identical dental_worker.py, support_worker.py, ...)
184
+ from livekit import agents
185
+ from livekit.agents import Agent, AgentSession
186
+ from livekit.plugins import openai, silero
187
+
188
+
189
+ class RestaurantAgent(Agent):
190
+ def __init__(self) -> None:
191
+ super().__init__(instructions="You help callers book tables.")
192
+
193
+
194
+ async def entrypoint(ctx: agents.JobContext) -> None:
195
+ session = AgentSession(
196
+ stt=openai.STT(), llm=openai.LLM(), tts=openai.TTS(), vad=silero.VAD.load()
197
+ )
198
+ await session.start(agent=RestaurantAgent(), room=ctx.room)
199
+ await ctx.connect()
200
+
201
+
202
+ if __name__ == "__main__":
203
+ agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))
204
+ ```
205
+
206
+ **After** (one worker, N agents, one shared prewarm):
207
+
208
+ ```python
209
+ # worker.py
210
+ from livekit.agents import Agent
211
+ from livekit.plugins import openai
212
+ from openrtc import AgentPool
213
+
214
+
215
+ class RestaurantAgent(Agent): # unchanged
216
+ def __init__(self) -> None:
217
+ super().__init__(instructions="You help callers book tables.")
218
+
219
+
220
+ class DentalAgent(Agent): # unchanged
221
+ def __init__(self) -> None:
222
+ super().__init__(instructions="You help callers manage appointments.")
223
+
224
+
225
+ pool = AgentPool(default_stt=openai.STT(), default_llm=openai.LLM(), default_tts=openai.TTS())
226
+ pool.add("restaurant", RestaurantAgent)
227
+ pool.add("dental", DentalAgent)
228
+ pool.run()
229
+ ```
230
+
231
+ Your `Agent` subclasses, tools, and provider objects are unchanged. You delete the
232
+ per-worker boilerplate (`entrypoint`, `AgentSession` wiring, `cli.run_app`) and
233
+ register the agents on one pool; OpenRTC owns prewarm, routing, and per-call
234
+ session construction. On the first run the worker logs the win, for example:
235
+
236
+ ```text
237
+ OpenRTC: 2 agents in 1 worker (baseline ~410 MB). 2 separate livekit-agents
238
+ workers would cost ~820 MB; sharing one worker saves ~410 MB of idle baseline
239
+ (assumes equal per-worker baselines).
240
+ ```
241
+
242
+ See [Routing](#routing) for how each incoming call resolves to one registered agent.
243
+
174
244
  ## Memory: before and after
175
245
 
176
246
  Assume an illustrative **~400 MB** idle baseline per worker for the shared stack (VAD, turn detector, and similar). Your measured RSS will differ by provider, model, and OS.
@@ -234,6 +304,71 @@ footprint. Validate against the §8.4 real-LiveKit integration test
234
304
  `OPENAI_API_KEY`) before quoting a per-session memory number to your
235
305
  operators.
236
306
 
307
+ ### Throughput: steady-state event-loop p99
308
+
309
+ Memory density is only half the question. N sessions share one event loop and
310
+ one GIL, so the other half is whether the loop keeps up.
311
+ `tests/benchmarks/throughput.py` drives N concurrent sessions through the real
312
+ Silero VAD over synthetic 16 kHz PCM at 50 fps (the continuous on-loop CPU cost)
313
+ and measures event-loop p99 latency, separating the startup burst from steady
314
+ state.
315
+
316
+ ```bash
317
+ uv run python tests/benchmarks/throughput.py --sessions 1,10,25,50,100
318
+ ```
319
+
320
+ Sample sweep (Apple M-series laptop, `vad` workload, steady state):
321
+
322
+ | Sessions | steady-state loop p99 | peak RSS |
323
+ | ---: | ---: | ---: |
324
+ | 1 | 0.9 ms | 160 MB |
325
+ | 10 | 1.3 ms | 160 MB |
326
+ | 25 | 1.2 ms | 160 MB |
327
+ | 50 | 1.1 ms | 160 MB |
328
+ | 100 | 2.8 ms | 160 MB |
329
+
330
+ Steady-state VAD inference stays well under a 100 ms loop-latency budget to 100
331
+ sessions, with flat resident memory (the model loads once). The expensive,
332
+ bursty part is session *startup* (each `session.start()` plus greeting), which
333
+ the benchmark reports as a separate `startup_p99` column and which dominates
334
+ early-life latency. This workload models the continuous VAD path, not the full
335
+ STT/LLM/TTS orchestration, so read it as the on-loop-CPU ceiling rather than a
336
+ full-pipeline guarantee. Run it on your own hardware before quoting a
337
+ sessions-per-worker number.
338
+
339
+ ### Prove it on your machine
340
+
341
+ The process column above is estimated. This script measures both models for
342
+ real on your laptop: it spawns one subprocess per session for the
343
+ process-per-session model, runs the same number of sessions as `asyncio`
344
+ tasks in a single process for the coroutine model, then prints the memory
345
+ used each way. No LiveKit server, no API keys, no model download.
346
+
347
+ ```bash
348
+ uv run python examples/density_demo.py # 16 sessions
349
+ uv run python examples/density_demo.py --sessions 32 # the gap widens with N
350
+ uv run python examples/density_demo.py --sessions 50 --load-vad # adds the shared Silero VAD model
351
+ ```
352
+
353
+ Sample output (Apple M-series laptop, import-only mode):
354
+
355
+ ```text
356
+ Hosting 16 concurrent voice sessions. Measuring resident memory.
357
+
358
+ livekit-agents (process per session): 1861 MB total ( 116.3 MB/session)
359
+ OpenRTC coroutine pool (one process): 195 MB total ( 12.2 MB/session)
360
+
361
+ OpenRTC uses 9.5x less memory for the same 16 sessions.
362
+ ```
363
+
364
+ Your numbers vary by machine, and the ratio grows as you raise `--sessions`
365
+ (the coroutine pool pays the import cost once and amortizes it across every
366
+ session). This default mode counts only the `livekit-agents` import cost, so
367
+ it is a conservative lower bound: `--load-vad` adds the shared Silero VAD
368
+ model weights (paid once in the pool, once per process otherwise), and
369
+ `tests/benchmarks/density.py --sessions 50` proves the 50-sessions-under-4-GB
370
+ ceiling. The full script is [examples/density_demo.py](examples/density_demo.py).
371
+
237
372
  ## Routing
238
373
 
239
374
  One process hosts several agent classes, so each session must resolve to a single registered name. `AgentPool` resolves the agent in this order:
@@ -147,16 +147,41 @@ contributor onboarding matches what's in the repo.
147
147
 
148
148
  <!-- releases -->
149
149
 
150
+ ## [0.2.2] - 2026-05-30
151
+
152
+ ### Fixed
153
+ - Coroutine mode now establishes the LiveKit job context for the session duration, so `get_job_context()` works inside agents and sessions and shutdown callbacks run (MAH-158).
154
+ - Coroutine sessions are held open until the call ends (room disconnect or `ctx.shutdown()`) instead of being marked SUCCESS when the entrypoint returns, so `max_concurrent_sessions` backpressure and runtime session counts are accurate (MAH-160).
155
+
156
+ ### Added
157
+ - Real-audio throughput benchmark (`tests/benchmarks/throughput.py`) reporting steady-state event-loop p99 vs session count, separating startup from steady state (MAH-163).
158
+ - `examples/density_demo.py`: a no-server demo comparing process-per-session vs coroutine-pool resident memory.
159
+
160
+ ### Changed
161
+ - The coroutine real-room integration test is now a correctness gate (job context plus no-failure); throughput moved to the dedicated benchmark.
162
+
163
+ ---
164
+
165
+ ## [0.2.1] - 2026-05-06
166
+
167
+ ## What's Changed
168
+ * [v0.2.1] File watcher infrastructure for agent code (MAH-80) by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/39
169
+
170
+
171
+ **Full Changelog**: https://github.com/mahimailabs/openrtc-runtime/compare/v0.1.0...v0.2.1
172
+
173
+ ---
174
+
150
175
  ## [0.1.0] - 2026-05-06
151
176
 
152
- ## What's Changed
153
- * Feat: light websocket by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/30
154
- * docs: bring docs/ in sync with v0.1 surface by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/35
155
- * Feat: struc refac by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/36
156
- * Feat/coroutine pool by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/37
157
- * Feat/coroutine pool prod by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/38
158
-
159
-
177
+ ## What's Changed
178
+ * Feat: light websocket by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/30
179
+ * docs: bring docs/ in sync with v0.1 surface by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/35
180
+ * Feat: struc refac by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/36
181
+ * Feat/coroutine pool by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/37
182
+ * Feat/coroutine pool prod by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/38
183
+
184
+
160
185
  **Full Changelog**: https://github.com/mahimailabs/openrtc-runtime/compare/v0.0.17...v0.1.0
161
186
 
162
187
  ---
@@ -0,0 +1,163 @@
1
+ """Prove the OpenRTC density win, on one laptop, with real numbers.
2
+
3
+ The claim: livekit-agents runs roughly one OS process per session (about
4
+ 3 GB each in production). OpenRTC's coroutine pool runs N sessions as
5
+ asyncio tasks inside a single process, so the heavy per-process cost
6
+ (Python interpreter, the livekit-agents import graph, and shared models
7
+ like Silero VAD and the turn detector) is paid ONCE instead of N times.
8
+
9
+ This script measures both models for real:
10
+
11
+ * "process-per-session" (what vanilla livekit-agents does):
12
+ spawn N subprocesses, each imports the agent stack and holds a
13
+ per-session buffer. We sum the resident memory across all of them.
14
+
15
+ * "OpenRTC coroutine pool" (the default isolation mode):
16
+ import the stack ONCE, run N asyncio sessions in this single process,
17
+ each holding the same per-session buffer. We read this process's
18
+ resident memory.
19
+
20
+ Then it prints total memory each way, memory per session, and the ratio.
21
+ No LiveKit server, no network, no model download required.
22
+
23
+ Run it:
24
+
25
+ uv run python examples/density_demo.py # N = 16
26
+ uv run python examples/density_demo.py --sessions 32
27
+ uv run python examples/density_demo.py --sessions 50 --load-vad
28
+
29
+ Use --load-vad to also load the real Silero VAD in every worker (the model
30
+ livekit-agents would load per process and OpenRTC shares). It downloads
31
+ ONNX weights on first run, then makes the gap even wider.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import argparse
37
+ import asyncio
38
+ import contextlib
39
+ import multiprocessing as mp
40
+ import os
41
+ import time
42
+
43
+ import psutil
44
+
45
+ # Stand-in for one session's live audio plus conversation state. The real
46
+ # per-session cost is dominated by the shared-vs-per-process fixed cost, so
47
+ # the exact buffer size is not load-bearing; it just keeps each session honest.
48
+ _SESSION_BUFFER_MB = 5
49
+
50
+
51
+ def _import_stack(load_vad: bool) -> None:
52
+ """Pay the per-process import cost that livekit-agents incurs per session."""
53
+ import livekit.agents # noqa: F401 (the real wheel, ~150 MB resident)
54
+
55
+ import openrtc # noqa: F401
56
+
57
+ if load_vad:
58
+ # The shared model OpenRTC loads once in prewarm and livekit-agents
59
+ # loads in every worker process. Widens the gap; needs a one-time
60
+ # weights download.
61
+ from livekit.plugins import silero
62
+
63
+ silero.VAD.load()
64
+
65
+
66
+ def _process_worker(ready: object, stop: object, load_vad: bool) -> None:
67
+ """One subprocess == one session, the livekit-agents process-per-job model."""
68
+ _import_stack(load_vad)
69
+ _buffer = bytearray(_SESSION_BUFFER_MB * 1024 * 1024) # noqa: F841
70
+ ready.set() # type: ignore[attr-defined]
71
+ stop.wait() # type: ignore[attr-defined] hold the buffer until measured
72
+
73
+
74
+ def measure_process_model(sessions: int, load_vad: bool) -> float:
75
+ """Sum resident memory of N independent worker processes (MB)."""
76
+ # "spawn" matches LiveKit's default executor on macOS, so each child pays
77
+ # the full fresh-interpreter import cost, exactly as in production.
78
+ ctx = mp.get_context("spawn")
79
+ ready_events = [ctx.Event() for _ in range(sessions)]
80
+ stop_event = ctx.Event()
81
+ procs = [
82
+ ctx.Process(
83
+ target=_process_worker, args=(ready_events[i], stop_event, load_vad)
84
+ )
85
+ for i in range(sessions)
86
+ ]
87
+ for p in procs:
88
+ p.start()
89
+ for ev in ready_events:
90
+ ev.wait(timeout=120) # every worker finished importing + allocated
91
+
92
+ time.sleep(0.5) # let resident memory settle
93
+ total_bytes = 0
94
+ for p in procs:
95
+ with contextlib.suppress(
96
+ psutil.NoSuchProcess
97
+ ): # a worker may have exited early
98
+ total_bytes += psutil.Process(p.pid).memory_info().rss
99
+
100
+ stop_event.set()
101
+ for p in procs:
102
+ p.join()
103
+ return total_bytes / (1024 * 1024)
104
+
105
+
106
+ async def measure_coroutine_model(sessions: int, load_vad: bool) -> float:
107
+ """Resident memory of ONE process hosting N asyncio sessions (MB)."""
108
+ _import_stack(load_vad) # paid once, in this process
109
+
110
+ async def _session() -> None:
111
+ _buffer = bytearray(_SESSION_BUFFER_MB * 1024 * 1024)
112
+ try:
113
+ await asyncio.sleep(3600) # stay alive until measured
114
+ finally:
115
+ del _buffer
116
+
117
+ tasks = [asyncio.create_task(_session()) for _ in range(sessions)]
118
+ await asyncio.sleep(0.5) # let all sessions allocate + settle
119
+ rss_mb = psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024)
120
+
121
+ for t in tasks:
122
+ t.cancel()
123
+ await asyncio.gather(*tasks, return_exceptions=True)
124
+ return rss_mb
125
+
126
+
127
+ def main() -> None:
128
+ parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
129
+ parser.add_argument(
130
+ "--sessions", type=int, default=16, help="concurrent sessions (default 16)"
131
+ )
132
+ parser.add_argument(
133
+ "--load-vad",
134
+ action="store_true",
135
+ help="also load real Silero VAD in every worker",
136
+ )
137
+ args = parser.parse_args()
138
+ n = args.sessions
139
+
140
+ print(f"\nHosting {n} concurrent voice sessions. Measuring resident memory.\n")
141
+
142
+ # Process model first so this parent process stays light; the coroutine
143
+ # measurement then imports the stack into this same process on purpose.
144
+ process_mb = measure_process_model(n, args.load_vad)
145
+ coroutine_mb = asyncio.run(measure_coroutine_model(n, args.load_vad))
146
+
147
+ ratio = process_mb / coroutine_mb if coroutine_mb else float("inf")
148
+ print(
149
+ f" livekit-agents (process per session): {process_mb:8.0f} MB total "
150
+ f"({process_mb / n:6.1f} MB/session)"
151
+ )
152
+ print(
153
+ f" OpenRTC coroutine pool (one process): {coroutine_mb:8.0f} MB total "
154
+ f"({coroutine_mb / n:6.1f} MB/session)"
155
+ )
156
+ print(f"\n OpenRTC uses {ratio:.1f}x less memory for the same {n} sessions.\n")
157
+ print(" Same agent code, both ways. In OpenRTC you flip one argument:")
158
+ print(' AgentPool(isolation="process") # the left column above')
159
+ print(' AgentPool(isolation="coroutine") # the right column (default)\n')
160
+
161
+
162
+ if __name__ == "__main__":
163
+ main()
@@ -14,6 +14,8 @@ Contracts derived from:
14
14
  from __future__ import annotations
15
15
 
16
16
  import asyncio
17
+ import contextlib
18
+ import contextvars
17
19
  import inspect
18
20
  import logging
19
21
  import uuid
@@ -25,7 +27,7 @@ from livekit import rtc
25
27
  from livekit.agents import JobContext, JobExecutorType, JobProcess, utils
26
28
  from livekit.agents.ipc import inference_executor as inference_executor_mod
27
29
  from livekit.agents.ipc.job_executor import JobStatus
28
- from livekit.agents.job import RunningJobInfo
30
+ from livekit.agents.job import RunningJobInfo, _JobContextVar
29
31
 
30
32
  if TYPE_CHECKING:
31
33
  from livekit.agents.ipc.job_executor import JobExecutor
@@ -114,6 +116,7 @@ class CoroutineJobExecutor:
114
116
  self._session_end_fnc = session_end_fnc
115
117
  self._context_factory = context_factory
116
118
  self._loop = loop
119
+ self._shutdown_fut: asyncio.Future[str] | None = None
117
120
 
118
121
  @property
119
122
  def id(self) -> str:
@@ -271,22 +274,79 @@ class CoroutineJobExecutor:
271
274
  self._task = loop.create_task(self._run_entrypoint(ctx))
272
275
 
273
276
  async def _run_entrypoint(self, ctx: JobContext) -> None:
277
+ """Run the session lifecycle, mirroring upstream ``_run_job_task``.
278
+
279
+ Establishes the job context (so ``get_job_context()`` resolves inside
280
+ the entrypoint and the session), holds the session open until shutdown
281
+ is requested (room disconnect, ``ctx.shutdown()``, or an entrypoint
282
+ crash), then runs the teardown sequence. Every ``JobContext`` hook is
283
+ treated as optional so the executor still runs with the bare stub
284
+ contexts that unit tests and the density benchmark pass directly.
285
+ """
274
286
  assert self._entrypoint_fnc is not None # checked in launch_job
287
+ loop = asyncio.get_running_loop()
288
+ shutdown_fut: asyncio.Future[str] = loop.create_future()
289
+ self._shutdown_fut = shutdown_fut
290
+
291
+ def _request_shutdown(reason: str = "shutdown") -> None:
292
+ if not shutdown_fut.done():
293
+ shutdown_fut.set_result(reason)
294
+
295
+ # Per-job log fields, then the contextvar (the MAH-158 fix).
296
+ _on_setup = getattr(ctx, "_on_setup", None)
297
+ if callable(_on_setup):
298
+ _on_setup()
299
+ token: contextvars.Token[JobContext] | None = None
300
+ with contextlib.suppress(Exception):
301
+ token = _JobContextVar.set(ctx)
302
+
303
+ # Shutdown triggers (all optional for stub contexts): ctx.shutdown()
304
+ # via on_shutdown, and the room "disconnected" event (mirrors
305
+ # job_proc_lazy_main's room-disconnected handler).
306
+ if hasattr(ctx, "_on_shutdown"):
307
+
308
+ def _on_shutdown(reason: str = "") -> None:
309
+ _request_shutdown(reason or "shutdown")
310
+
311
+ ctx._on_shutdown = _on_shutdown
312
+ _room_on = getattr(getattr(ctx, "room", None), "on", None)
313
+ if callable(_room_on):
314
+ _room_on("disconnected", lambda *_a: _request_shutdown("room disconnected"))
315
+
275
316
  try:
276
- await self._entrypoint_fnc(ctx)
317
+ try:
318
+ await self._entrypoint_fnc(ctx)
319
+ except asyncio.CancelledError:
320
+ if self._status is JobStatus.RUNNING:
321
+ self._status = JobStatus.FAILED
322
+ raise
323
+ except Exception:
324
+ if self._status is JobStatus.RUNNING:
325
+ self._status = JobStatus.FAILED
326
+ logger.exception(
327
+ "entrypoint raised in CoroutineJobExecutor",
328
+ extra=self.logging_extra(),
329
+ )
330
+ return
331
+ # Entrypoint returned cleanly. Hold a real job open until the call
332
+ # ends (the MAH-160 fix), then run teardown. A setup-only entrypoint
333
+ # (no live session) or a fake job (simulate_job, which has no live
334
+ # room to disconnect) completes on return instead.
335
+ _is_fake = getattr(ctx, "is_fake_job", None)
336
+ fake_job = bool(_is_fake()) if callable(_is_fake) else False
337
+ if (
338
+ getattr(ctx, "_primary_agent_session", None) is not None
339
+ and not fake_job
340
+ ):
341
+ try:
342
+ await shutdown_fut
343
+ except asyncio.CancelledError:
344
+ if self._status is JobStatus.RUNNING:
345
+ self._status = JobStatus.FAILED
346
+ raise
347
+ await self._teardown(ctx, shutdown_fut.result())
277
348
  if self._status is JobStatus.RUNNING:
278
349
  self._status = JobStatus.SUCCESS
279
- except asyncio.CancelledError:
280
- if self._status is JobStatus.RUNNING:
281
- self._status = JobStatus.FAILED
282
- raise
283
- except Exception:
284
- if self._status is JobStatus.RUNNING:
285
- self._status = JobStatus.FAILED
286
- logger.exception(
287
- "entrypoint raised in CoroutineJobExecutor",
288
- extra=self.logging_extra(),
289
- )
290
350
  finally:
291
351
  if self._session_end_fnc is not None:
292
352
  try:
@@ -296,6 +356,43 @@ class CoroutineJobExecutor:
296
356
  "session_end_fnc raised in CoroutineJobExecutor",
297
357
  extra=self.logging_extra(),
298
358
  )
359
+ if token is not None:
360
+ with contextlib.suppress(Exception):
361
+ _JobContextVar.reset(token)
362
+
363
+ async def _teardown(self, ctx: JobContext, reason: str) -> None:
364
+ """Run the post-shutdown lifecycle (mirrors upstream ``_run_job_task``).
365
+
366
+ Closes the primary ``AgentSession``, runs ``_on_session_end`` and the
367
+ registered shutdown callbacks, cancels pending tasks, and cleans up.
368
+ Every hook is optional so stub contexts in tests and benchmarks are
369
+ tolerated.
370
+ """
371
+ primary = getattr(ctx, "_primary_agent_session", None)
372
+ if primary is not None and hasattr(primary, "aclose"):
373
+ with contextlib.suppress(Exception):
374
+ await primary.aclose()
375
+ _on_session_end = getattr(ctx, "_on_session_end", None)
376
+ if callable(_on_session_end):
377
+ with contextlib.suppress(Exception):
378
+ await _on_session_end()
379
+ for callback in list(getattr(ctx, "_shutdown_callbacks", None) or []):
380
+ try:
381
+ await callback(reason)
382
+ except Exception:
383
+ logger.exception(
384
+ "shutdown callback raised in CoroutineJobExecutor",
385
+ extra=self.logging_extra(),
386
+ )
387
+ pending = list(getattr(ctx, "_pending_tasks", None) or [])
388
+ if pending:
389
+ for task in pending:
390
+ task.cancel()
391
+ await asyncio.gather(*pending, return_exceptions=True)
392
+ _on_cleanup = getattr(ctx, "_on_cleanup", None)
393
+ if callable(_on_cleanup):
394
+ with contextlib.suppress(Exception):
395
+ _on_cleanup()
299
396
 
300
397
  def logging_extra(self) -> dict[str, Any]:
301
398
  return {"executor_id": self._id}