openrtc 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. {openrtc-0.2.1 → openrtc-0.2.2}/PKG-INFO +66 -1
  2. {openrtc-0.2.1 → openrtc-0.2.2}/README.md +65 -0
  3. {openrtc-0.2.1 → openrtc-0.2.2}/docs/changelog.md +16 -6
  4. openrtc-0.2.2/examples/density_demo.py +163 -0
  5. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/execution/coroutine.py +110 -13
  6. openrtc-0.2.2/tests/benchmarks/throughput.py +384 -0
  7. openrtc-0.2.2/tests/integration/test_coroutine_realroom.py +200 -0
  8. openrtc-0.2.2/tests/test_coroutine_job_context.py +90 -0
  9. openrtc-0.2.2/tests/test_coroutine_lifecycle.py +231 -0
  10. openrtc-0.2.2/tests/test_throughput_bench.py +42 -0
  11. {openrtc-0.2.1 → openrtc-0.2.2}/.coderabbit.yaml +0 -0
  12. {openrtc-0.2.1 → openrtc-0.2.2}/.editorconfig +0 -0
  13. {openrtc-0.2.1 → openrtc-0.2.2}/.env.example +0 -0
  14. {openrtc-0.2.1 → openrtc-0.2.2}/.github/FUNDING.yml +0 -0
  15. {openrtc-0.2.1 → openrtc-0.2.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  16. {openrtc-0.2.1 → openrtc-0.2.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  17. {openrtc-0.2.1 → openrtc-0.2.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  18. {openrtc-0.2.1 → openrtc-0.2.2}/.github/dependabot.yml +0 -0
  19. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/audit.yml +0 -0
  20. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/bench.yml +0 -0
  21. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/build.yml +0 -0
  22. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/canary.yml +0 -0
  23. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/deploy-docs.yml +0 -0
  24. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/docs.yml +0 -0
  25. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/integration.yml +0 -0
  26. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/lint.yml +0 -0
  27. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/publish.yml +0 -0
  28. {openrtc-0.2.1 → openrtc-0.2.2}/.github/workflows/test.yml +0 -0
  29. {openrtc-0.2.1 → openrtc-0.2.2}/.gitignore +0 -0
  30. {openrtc-0.2.1 → openrtc-0.2.2}/.pre-commit-config.yaml +0 -0
  31. {openrtc-0.2.1 → openrtc-0.2.2}/AGENTS.md +0 -0
  32. {openrtc-0.2.1 → openrtc-0.2.2}/CLAUDE.md +0 -0
  33. {openrtc-0.2.1 → openrtc-0.2.2}/CONTRIBUTING.md +0 -0
  34. {openrtc-0.2.1 → openrtc-0.2.2}/LICENSE +0 -0
  35. {openrtc-0.2.1 → openrtc-0.2.2}/Makefile +0 -0
  36. {openrtc-0.2.1 → openrtc-0.2.2}/assets/banner.png +0 -0
  37. {openrtc-0.2.1 → openrtc-0.2.2}/assets/logo.png +0 -0
  38. {openrtc-0.2.1 → openrtc-0.2.2}/codecov.yml +0 -0
  39. {openrtc-0.2.1 → openrtc-0.2.2}/docker-compose.test.yml +0 -0
  40. {openrtc-0.2.1 → openrtc-0.2.2}/docs/.vitepress/config.ts +0 -0
  41. {openrtc-0.2.1 → openrtc-0.2.2}/docs/.vitepress/theme/custom.css +0 -0
  42. {openrtc-0.2.1 → openrtc-0.2.2}/docs/.vitepress/theme/index.ts +0 -0
  43. {openrtc-0.2.1 → openrtc-0.2.2}/docs/api/pool.md +0 -0
  44. {openrtc-0.2.1 → openrtc-0.2.2}/docs/audit-2026-05-02.md +0 -0
  45. {openrtc-0.2.1 → openrtc-0.2.2}/docs/benchmarks/density-v0.1.md +0 -0
  46. {openrtc-0.2.1 → openrtc-0.2.2}/docs/cli.md +0 -0
  47. {openrtc-0.2.1 → openrtc-0.2.2}/docs/concepts/architecture.md +0 -0
  48. {openrtc-0.2.1 → openrtc-0.2.2}/docs/deployment/github-pages.md +0 -0
  49. {openrtc-0.2.1 → openrtc-0.2.2}/docs/design/agent-server-integration.md +0 -0
  50. {openrtc-0.2.1 → openrtc-0.2.2}/docs/design/job-executor-protocol.md +0 -0
  51. {openrtc-0.2.1 → openrtc-0.2.2}/docs/design/proc-pool-surface.md +0 -0
  52. {openrtc-0.2.1 → openrtc-0.2.2}/docs/design/v0.1.md +0 -0
  53. {openrtc-0.2.1 → openrtc-0.2.2}/docs/examples.md +0 -0
  54. {openrtc-0.2.1 → openrtc-0.2.2}/docs/getting-started.md +0 -0
  55. {openrtc-0.2.1 → openrtc-0.2.2}/docs/index.md +0 -0
  56. {openrtc-0.2.1 → openrtc-0.2.2}/docs/package-lock.json +0 -0
  57. {openrtc-0.2.1 → openrtc-0.2.2}/docs/package.json +0 -0
  58. {openrtc-0.2.1 → openrtc-0.2.2}/docs/public/banner.png +0 -0
  59. {openrtc-0.2.1 → openrtc-0.2.2}/docs/public/logo.png +0 -0
  60. {openrtc-0.2.1 → openrtc-0.2.2}/docs/public/logo.svg +0 -0
  61. {openrtc-0.2.1 → openrtc-0.2.2}/docs/release-v0.1.md +0 -0
  62. {openrtc-0.2.1 → openrtc-0.2.2}/examples/agents/dental.py +0 -0
  63. {openrtc-0.2.1 → openrtc-0.2.2}/examples/agents/restaurant.py +0 -0
  64. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/.dockerignore +0 -0
  65. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/.env.example +0 -0
  66. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/.gitignore +0 -0
  67. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/Dockerfile +0 -0
  68. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/README.md +0 -0
  69. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/app.css +0 -0
  70. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/components/agents-ui/agent-audio-visualizer-wave.tsx +0 -0
  71. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/components/agents-ui/agent-chat-transcript.tsx +0 -0
  72. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/components/agents-ui/agent-session-provider.tsx +0 -0
  73. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/components/demo-call-page.tsx +0 -0
  74. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/root.tsx +0 -0
  75. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/routes/api.token.ts +0 -0
  76. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/routes/dentist.tsx +0 -0
  77. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/routes/home.tsx +0 -0
  78. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/routes/restaurant.tsx +0 -0
  79. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/routes.ts +0 -0
  80. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/welcome/logo-dark.svg +0 -0
  81. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/welcome/logo-light.svg +0 -0
  82. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/app/welcome/welcome.tsx +0 -0
  83. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/package-lock.json +0 -0
  84. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/package.json +0 -0
  85. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/public/favicon.ico +0 -0
  86. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/react-router.config.ts +0 -0
  87. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/tsconfig.json +0 -0
  88. {openrtc-0.2.1 → openrtc-0.2.2}/examples/frontend/vite.config.ts +0 -0
  89. {openrtc-0.2.1 → openrtc-0.2.2}/examples/main.py +0 -0
  90. {openrtc-0.2.1 → openrtc-0.2.2}/pyproject.toml +0 -0
  91. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/__init__.py +0 -0
  92. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/__init__.py +0 -0
  93. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/commands.py +0 -0
  94. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/dashboard.py +0 -0
  95. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/entry.py +0 -0
  96. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/livekit.py +0 -0
  97. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/params.py +0 -0
  98. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/reporter.py +0 -0
  99. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/cli/types.py +0 -0
  100. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/__init__.py +0 -0
  101. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/config.py +0 -0
  102. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/discovery.py +0 -0
  103. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/pool.py +0 -0
  104. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/routing.py +0 -0
  105. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/serialization.py +0 -0
  106. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/core/turn_handling.py +0 -0
  107. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/execution/__init__.py +0 -0
  108. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/execution/coroutine_server.py +0 -0
  109. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/execution/file_watcher.py +0 -0
  110. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/execution/prewarm.py +0 -0
  111. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/observability/__init__.py +0 -0
  112. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/observability/metrics.py +0 -0
  113. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/observability/snapshot.py +0 -0
  114. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/observability/stream.py +0 -0
  115. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/py.typed +0 -0
  116. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/tui/__init__.py +0 -0
  117. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/tui/app.py +0 -0
  118. {openrtc-0.2.1 → openrtc-0.2.2}/src/openrtc/types.py +0 -0
  119. {openrtc-0.2.1 → openrtc-0.2.2}/tests/benchmarks/__init__.py +0 -0
  120. {openrtc-0.2.1 → openrtc-0.2.2}/tests/benchmarks/density.py +0 -0
  121. {openrtc-0.2.1 → openrtc-0.2.2}/tests/conftest.py +0 -0
  122. {openrtc-0.2.1 → openrtc-0.2.2}/tests/execution/__init__.py +0 -0
  123. {openrtc-0.2.1 → openrtc-0.2.2}/tests/execution/test_file_watcher.py +0 -0
  124. {openrtc-0.2.1 → openrtc-0.2.2}/tests/execution/test_file_watcher_smoke.py +0 -0
  125. {openrtc-0.2.1 → openrtc-0.2.2}/tests/integration/README.md +0 -0
  126. {openrtc-0.2.1 → openrtc-0.2.2}/tests/integration/__init__.py +0 -0
  127. {openrtc-0.2.1 → openrtc-0.2.2}/tests/integration/conftest.py +0 -0
  128. {openrtc-0.2.1 → openrtc-0.2.2}/tests/integration/test_concurrent_real_calls.py +0 -0
  129. {openrtc-0.2.1 → openrtc-0.2.2}/tests/integration/test_dev_server_fixture.py +0 -0
  130. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_cli.py +0 -0
  131. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_cli_optional_extra_integration.py +0 -0
  132. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_cli_params.py +0 -0
  133. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_config.py +0 -0
  134. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_backpressure.py +0 -0
  135. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_coverage.py +0 -0
  136. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_drain.py +0 -0
  137. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_isolation.py +0 -0
  138. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_server.py +0 -0
  139. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_skeleton.py +0 -0
  140. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_coroutine_smoke.py +0 -0
  141. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_dashboard.py +0 -0
  142. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_discovery.py +0 -0
  143. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_isolation_process_parity.py +0 -0
  144. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_metrics_stream.py +0 -0
  145. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_pool.py +0 -0
  146. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_resources.py +0 -0
  147. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_routing.py +0 -0
  148. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_serialization.py +0 -0
  149. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_tui_app.py +0 -0
  150. {openrtc-0.2.1 → openrtc-0.2.2}/tests/test_turn_handling.py +0 -0
  151. {openrtc-0.2.1 → openrtc-0.2.2}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openrtc
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Run multiple LiveKit voice agents in a single shared worker process.
5
5
  Project-URL: Homepage, https://github.com/mahimailabs/openrtc
6
6
  Project-URL: Repository, https://github.com/mahimailabs/openrtc
@@ -266,6 +266,71 @@ footprint. Validate against the §8.4 real-LiveKit integration test
266
266
  `OPENAI_API_KEY`) before quoting a per-session memory number to your
267
267
  operators.
268
268
 
269
+ ### Throughput: steady-state event-loop p99
270
+
271
+ Memory density is only half the question. N sessions share one event loop and
272
+ one GIL, so the other half is whether the loop keeps up.
273
+ `tests/benchmarks/throughput.py` drives N concurrent sessions through the real
274
+ Silero VAD over synthetic 16 kHz PCM at 50 fps (the continuous on-loop CPU cost)
275
+ and measures event-loop p99 latency, separating the startup burst from steady
276
+ state.
277
+
278
+ ```bash
279
+ uv run python tests/benchmarks/throughput.py --sessions 1,10,25,50,100
280
+ ```
281
+
282
+ Sample sweep (Apple M-series laptop, `vad` workload, steady state):
283
+
284
+ | Sessions | steady-state loop p99 | peak RSS |
285
+ | ---: | ---: | ---: |
286
+ | 1 | 0.9 ms | 160 MB |
287
+ | 10 | 1.3 ms | 160 MB |
288
+ | 25 | 1.2 ms | 160 MB |
289
+ | 50 | 1.1 ms | 160 MB |
290
+ | 100 | 2.8 ms | 160 MB |
291
+
292
+ Steady-state VAD inference stays well under a 100 ms loop-latency budget to 100
293
+ sessions, with flat resident memory (the model loads once). The expensive,
294
+ bursty part is session *startup* (each `session.start()` plus greeting), which
295
+ the benchmark reports as a separate `startup_p99` column and which dominates
296
+ early-life latency. This workload models the continuous VAD path, not the full
297
+ STT/LLM/TTS orchestration, so read it as the on-loop-CPU ceiling rather than a
298
+ full-pipeline guarantee. Run it on your own hardware before quoting a
299
+ sessions-per-worker number.
300
+
301
+ ### Prove it on your machine
302
+
303
+ The process column above is estimated. This script measures both models for
304
+ real on your laptop: it spawns one subprocess per session for the
305
+ process-per-session model, runs the same number of sessions as `asyncio`
306
+ tasks in a single process for the coroutine model, then prints the memory
307
+ used each way. No LiveKit server, no API keys, no model download.
308
+
309
+ ```bash
310
+ uv run python examples/density_demo.py # 16 sessions
311
+ uv run python examples/density_demo.py --sessions 32 # the gap widens with N
312
+ uv run python examples/density_demo.py --sessions 50 --load-vad # adds the shared Silero VAD model
313
+ ```
314
+
315
+ Sample output (Apple M-series laptop, import-only mode):
316
+
317
+ ```text
318
+ Hosting 16 concurrent voice sessions. Measuring resident memory.
319
+
320
+ livekit-agents (process per session): 1861 MB total ( 116.3 MB/session)
321
+ OpenRTC coroutine pool (one process): 195 MB total ( 12.2 MB/session)
322
+
323
+ OpenRTC uses 9.5x less memory for the same 16 sessions.
324
+ ```
325
+
326
+ Your numbers vary by machine, and the ratio grows as you raise `--sessions`
327
+ (the coroutine pool pays the import cost once and amortizes it across every
328
+ session). This default mode counts only the `livekit-agents` import cost, so
329
+ it is a conservative lower bound: `--load-vad` adds the shared Silero VAD
330
+ model weights (paid once in the pool, once per process otherwise), and
331
+ `tests/benchmarks/density.py --sessions 50` proves the 50-sessions-under-4-GB
332
+ ceiling. The full script is [examples/density_demo.py](examples/density_demo.py).
333
+
269
334
  ## Routing
270
335
 
271
336
  One process hosts several agent classes, so each session must resolve to a single registered name. `AgentPool` resolves the agent in this order:
@@ -234,6 +234,71 @@ footprint. Validate against the §8.4 real-LiveKit integration test
234
234
  `OPENAI_API_KEY`) before quoting a per-session memory number to your
235
235
  operators.
236
236
 
237
+ ### Throughput: steady-state event-loop p99
238
+
239
+ Memory density is only half the question. N sessions share one event loop and
240
+ one GIL, so the other half is whether the loop keeps up.
241
+ `tests/benchmarks/throughput.py` drives N concurrent sessions through the real
242
+ Silero VAD over synthetic 16 kHz PCM at 50 fps (the continuous on-loop CPU cost)
243
+ and measures event-loop p99 latency, separating the startup burst from steady
244
+ state.
245
+
246
+ ```bash
247
+ uv run python tests/benchmarks/throughput.py --sessions 1,10,25,50,100
248
+ ```
249
+
250
+ Sample sweep (Apple M-series laptop, `vad` workload, steady state):
251
+
252
+ | Sessions | steady-state loop p99 | peak RSS |
253
+ | ---: | ---: | ---: |
254
+ | 1 | 0.9 ms | 160 MB |
255
+ | 10 | 1.3 ms | 160 MB |
256
+ | 25 | 1.2 ms | 160 MB |
257
+ | 50 | 1.1 ms | 160 MB |
258
+ | 100 | 2.8 ms | 160 MB |
259
+
260
+ Steady-state VAD inference stays well under a 100 ms loop-latency budget to 100
261
+ sessions, with flat resident memory (the model loads once). The expensive,
262
+ bursty part is session *startup* (each `session.start()` plus greeting), which
263
+ the benchmark reports as a separate `startup_p99` column and which dominates
264
+ early-life latency. This workload models the continuous VAD path, not the full
265
+ STT/LLM/TTS orchestration, so read it as the on-loop-CPU ceiling rather than a
266
+ full-pipeline guarantee. Run it on your own hardware before quoting a
267
+ sessions-per-worker number.
268
+
269
+ ### Prove it on your machine
270
+
271
+ The process column above is estimated. This script measures both models for
272
+ real on your laptop: it spawns one subprocess per session for the
273
+ process-per-session model, runs the same number of sessions as `asyncio`
274
+ tasks in a single process for the coroutine model, then prints the memory
275
+ used each way. No LiveKit server, no API keys, no model download.
276
+
277
+ ```bash
278
+ uv run python examples/density_demo.py # 16 sessions
279
+ uv run python examples/density_demo.py --sessions 32 # the gap widens with N
280
+ uv run python examples/density_demo.py --sessions 50 --load-vad # adds the shared Silero VAD model
281
+ ```
282
+
283
+ Sample output (Apple M-series laptop, import-only mode):
284
+
285
+ ```text
286
+ Hosting 16 concurrent voice sessions. Measuring resident memory.
287
+
288
+ livekit-agents (process per session): 1861 MB total ( 116.3 MB/session)
289
+ OpenRTC coroutine pool (one process): 195 MB total ( 12.2 MB/session)
290
+
291
+ OpenRTC uses 9.5x less memory for the same 16 sessions.
292
+ ```
293
+
294
+ Your numbers vary by machine, and the ratio grows as you raise `--sessions`
295
+ (the coroutine pool pays the import cost once and amortizes it across every
296
+ session). This default mode counts only the `livekit-agents` import cost, so
297
+ it is a conservative lower bound: `--load-vad` adds the shared Silero VAD
298
+ model weights (paid once in the pool, once per process otherwise), and
299
+ `tests/benchmarks/density.py --sessions 50` proves the 50-sessions-under-4-GB
300
+ ceiling. The full script is [examples/density_demo.py](examples/density_demo.py).
301
+
237
302
  ## Routing
238
303
 
239
304
  One process hosts several agent classes, so each session must resolve to a single registered name. `AgentPool` resolves the agent in this order:
@@ -147,16 +147,26 @@ contributor onboarding matches what's in the repo.
147
147
 
148
148
  <!-- releases -->
149
149
 
150
- ## [0.1.0] - 2026-05-06
150
+ ## [0.2.1] - 2026-05-06
151
151
 
152
152
  ## What's Changed
153
- * Feat: light websocket by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/30
154
- * docs: bring docs/ in sync with v0.1 surface by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/35
155
- * Feat: struc refac by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/36
156
- * Feat/coroutine pool by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/37
157
- * Feat/coroutine pool prod by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/38
153
+ * [v0.2.1] File watcher infrastructure for agent code (MAH-80) by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/39
158
154
 
159
155
 
156
+ **Full Changelog**: https://github.com/mahimailabs/openrtc-runtime/compare/v0.1.0...v0.2.1
157
+
158
+ ---
159
+
160
+ ## [0.1.0] - 2026-05-06
161
+
162
+ ## What's Changed
163
+ * Feat: light websocket by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/30
164
+ * docs: bring docs/ in sync with v0.1 surface by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/35
165
+ * Feat: struc refac by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/36
166
+ * Feat/coroutine pool by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/37
167
+ * Feat/coroutine pool prod by @mahimairaja in https://github.com/mahimailabs/openrtc-runtime/pull/38
168
+
169
+
160
170
  **Full Changelog**: https://github.com/mahimailabs/openrtc-runtime/compare/v0.0.17...v0.1.0
161
171
 
162
172
  ---
@@ -0,0 +1,163 @@
1
+ """Prove the OpenRTC density win, on one laptop, with real numbers.
2
+
3
+ The claim: livekit-agents runs roughly one OS process per session (about
4
+ 3 GB each in production). OpenRTC's coroutine pool runs N sessions as
5
+ asyncio tasks inside a single process, so the heavy per-process cost
6
+ (Python interpreter, the livekit-agents import graph, and shared models
7
+ like Silero VAD and the turn detector) is paid ONCE instead of N times.
8
+
9
+ This script measures both models for real:
10
+
11
+ * "process-per-session" (what vanilla livekit-agents does):
12
+ spawn N subprocesses, each imports the agent stack and holds a
13
+ per-session buffer. We sum the resident memory across all of them.
14
+
15
+ * "OpenRTC coroutine pool" (the default isolation mode):
16
+ import the stack ONCE, run N asyncio sessions in this single process,
17
+ each holding the same per-session buffer. We read this process's
18
+ resident memory.
19
+
20
+ Then it prints total memory each way, memory per session, and the ratio.
21
+ No LiveKit server, no network, no model download required.
22
+
23
+ Run it:
24
+
25
+ uv run python examples/density_demo.py # N = 16
26
+ uv run python examples/density_demo.py --sessions 32
27
+ uv run python examples/density_demo.py --sessions 50 --load-vad
28
+
29
+ Use --load-vad to also load the real Silero VAD in every worker (the model
30
+ livekit-agents would load per process and OpenRTC shares). It downloads
31
+ ONNX weights on first run, then makes the gap even wider.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import argparse
37
+ import asyncio
38
+ import contextlib
39
+ import multiprocessing as mp
40
+ import os
41
+ import time
42
+
43
+ import psutil
44
+
45
+ # Stand-in for one session's live audio plus conversation state. The real
46
+ # per-session cost is dominated by the shared-vs-per-process fixed cost, so
47
+ # the exact buffer size is not load-bearing; it just keeps each session honest.
48
+ _SESSION_BUFFER_MB = 5
49
+
50
+
51
+ def _import_stack(load_vad: bool) -> None:
52
+ """Pay the per-process import cost that livekit-agents incurs per session."""
53
+ import livekit.agents # noqa: F401 (the real wheel, ~150 MB resident)
54
+
55
+ import openrtc # noqa: F401
56
+
57
+ if load_vad:
58
+ # The shared model OpenRTC loads once in prewarm and livekit-agents
59
+ # loads in every worker process. Widens the gap; needs a one-time
60
+ # weights download.
61
+ from livekit.plugins import silero
62
+
63
+ silero.VAD.load()
64
+
65
+
66
+ def _process_worker(ready: object, stop: object, load_vad: bool) -> None:
67
+ """One subprocess == one session, the livekit-agents process-per-job model."""
68
+ _import_stack(load_vad)
69
+ _buffer = bytearray(_SESSION_BUFFER_MB * 1024 * 1024) # noqa: F841
70
+ ready.set() # type: ignore[attr-defined]
71
+ stop.wait() # type: ignore[attr-defined] hold the buffer until measured
72
+
73
+
74
+ def measure_process_model(sessions: int, load_vad: bool) -> float:
75
+ """Sum resident memory of N independent worker processes (MB)."""
76
+ # "spawn" matches LiveKit's default executor on macOS, so each child pays
77
+ # the full fresh-interpreter import cost, exactly as in production.
78
+ ctx = mp.get_context("spawn")
79
+ ready_events = [ctx.Event() for _ in range(sessions)]
80
+ stop_event = ctx.Event()
81
+ procs = [
82
+ ctx.Process(
83
+ target=_process_worker, args=(ready_events[i], stop_event, load_vad)
84
+ )
85
+ for i in range(sessions)
86
+ ]
87
+ for p in procs:
88
+ p.start()
89
+ for ev in ready_events:
90
+ ev.wait(timeout=120) # every worker finished importing + allocated
91
+
92
+ time.sleep(0.5) # let resident memory settle
93
+ total_bytes = 0
94
+ for p in procs:
95
+ with contextlib.suppress(
96
+ psutil.NoSuchProcess
97
+ ): # a worker may have exited early
98
+ total_bytes += psutil.Process(p.pid).memory_info().rss
99
+
100
+ stop_event.set()
101
+ for p in procs:
102
+ p.join()
103
+ return total_bytes / (1024 * 1024)
104
+
105
+
106
+ async def measure_coroutine_model(sessions: int, load_vad: bool) -> float:
107
+ """Resident memory of ONE process hosting N asyncio sessions (MB)."""
108
+ _import_stack(load_vad) # paid once, in this process
109
+
110
+ async def _session() -> None:
111
+ _buffer = bytearray(_SESSION_BUFFER_MB * 1024 * 1024)
112
+ try:
113
+ await asyncio.sleep(3600) # stay alive until measured
114
+ finally:
115
+ del _buffer
116
+
117
+ tasks = [asyncio.create_task(_session()) for _ in range(sessions)]
118
+ await asyncio.sleep(0.5) # let all sessions allocate + settle
119
+ rss_mb = psutil.Process(os.getpid()).memory_info().rss / (1024 * 1024)
120
+
121
+ for t in tasks:
122
+ t.cancel()
123
+ await asyncio.gather(*tasks, return_exceptions=True)
124
+ return rss_mb
125
+
126
+
127
+ def main() -> None:
128
+ parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0])
129
+ parser.add_argument(
130
+ "--sessions", type=int, default=16, help="concurrent sessions (default 16)"
131
+ )
132
+ parser.add_argument(
133
+ "--load-vad",
134
+ action="store_true",
135
+ help="also load real Silero VAD in every worker",
136
+ )
137
+ args = parser.parse_args()
138
+ n = args.sessions
139
+
140
+ print(f"\nHosting {n} concurrent voice sessions. Measuring resident memory.\n")
141
+
142
+ # Process model first so this parent process stays light; the coroutine
143
+ # measurement then imports the stack into this same process on purpose.
144
+ process_mb = measure_process_model(n, args.load_vad)
145
+ coroutine_mb = asyncio.run(measure_coroutine_model(n, args.load_vad))
146
+
147
+ ratio = process_mb / coroutine_mb if coroutine_mb else float("inf")
148
+ print(
149
+ f" livekit-agents (process per session): {process_mb:8.0f} MB total "
150
+ f"({process_mb / n:6.1f} MB/session)"
151
+ )
152
+ print(
153
+ f" OpenRTC coroutine pool (one process): {coroutine_mb:8.0f} MB total "
154
+ f"({coroutine_mb / n:6.1f} MB/session)"
155
+ )
156
+ print(f"\n OpenRTC uses {ratio:.1f}x less memory for the same {n} sessions.\n")
157
+ print(" Same agent code, both ways. In OpenRTC you flip one argument:")
158
+ print(' AgentPool(isolation="process") # the left column above')
159
+ print(' AgentPool(isolation="coroutine") # the right column (default)\n')
160
+
161
+
162
+ if __name__ == "__main__":
163
+ main()
@@ -14,6 +14,8 @@ Contracts derived from:
14
14
  from __future__ import annotations
15
15
 
16
16
  import asyncio
17
+ import contextlib
18
+ import contextvars
17
19
  import inspect
18
20
  import logging
19
21
  import uuid
@@ -25,7 +27,7 @@ from livekit import rtc
25
27
  from livekit.agents import JobContext, JobExecutorType, JobProcess, utils
26
28
  from livekit.agents.ipc import inference_executor as inference_executor_mod
27
29
  from livekit.agents.ipc.job_executor import JobStatus
28
- from livekit.agents.job import RunningJobInfo
30
+ from livekit.agents.job import RunningJobInfo, _JobContextVar
29
31
 
30
32
  if TYPE_CHECKING:
31
33
  from livekit.agents.ipc.job_executor import JobExecutor
@@ -114,6 +116,7 @@ class CoroutineJobExecutor:
114
116
  self._session_end_fnc = session_end_fnc
115
117
  self._context_factory = context_factory
116
118
  self._loop = loop
119
+ self._shutdown_fut: asyncio.Future[str] | None = None
117
120
 
118
121
  @property
119
122
  def id(self) -> str:
@@ -271,22 +274,79 @@ class CoroutineJobExecutor:
271
274
  self._task = loop.create_task(self._run_entrypoint(ctx))
272
275
 
273
276
  async def _run_entrypoint(self, ctx: JobContext) -> None:
277
+ """Run the session lifecycle, mirroring upstream ``_run_job_task``.
278
+
279
+ Establishes the job context (so ``get_job_context()`` resolves inside
280
+ the entrypoint and the session), holds the session open until shutdown
281
+ is requested (room disconnect, ``ctx.shutdown()``, or an entrypoint
282
+ crash), then runs the teardown sequence. Every ``JobContext`` hook is
283
+ treated as optional so the executor still runs with the bare stub
284
+ contexts that unit tests and the density benchmark pass directly.
285
+ """
274
286
  assert self._entrypoint_fnc is not None # checked in launch_job
287
+ loop = asyncio.get_running_loop()
288
+ shutdown_fut: asyncio.Future[str] = loop.create_future()
289
+ self._shutdown_fut = shutdown_fut
290
+
291
+ def _request_shutdown(reason: str = "shutdown") -> None:
292
+ if not shutdown_fut.done():
293
+ shutdown_fut.set_result(reason)
294
+
295
+ # Per-job log fields, then the contextvar (the MAH-158 fix).
296
+ _on_setup = getattr(ctx, "_on_setup", None)
297
+ if callable(_on_setup):
298
+ _on_setup()
299
+ token: contextvars.Token[JobContext] | None = None
300
+ with contextlib.suppress(Exception):
301
+ token = _JobContextVar.set(ctx)
302
+
303
+ # Shutdown triggers (all optional for stub contexts): ctx.shutdown()
304
+ # via on_shutdown, and the room "disconnected" event (mirrors
305
+ # job_proc_lazy_main's room-disconnected handler).
306
+ if hasattr(ctx, "_on_shutdown"):
307
+
308
+ def _on_shutdown(reason: str = "") -> None:
309
+ _request_shutdown(reason or "shutdown")
310
+
311
+ ctx._on_shutdown = _on_shutdown
312
+ _room_on = getattr(getattr(ctx, "room", None), "on", None)
313
+ if callable(_room_on):
314
+ _room_on("disconnected", lambda *_a: _request_shutdown("room disconnected"))
315
+
275
316
  try:
276
- await self._entrypoint_fnc(ctx)
317
+ try:
318
+ await self._entrypoint_fnc(ctx)
319
+ except asyncio.CancelledError:
320
+ if self._status is JobStatus.RUNNING:
321
+ self._status = JobStatus.FAILED
322
+ raise
323
+ except Exception:
324
+ if self._status is JobStatus.RUNNING:
325
+ self._status = JobStatus.FAILED
326
+ logger.exception(
327
+ "entrypoint raised in CoroutineJobExecutor",
328
+ extra=self.logging_extra(),
329
+ )
330
+ return
331
+ # Entrypoint returned cleanly. Hold a real job open until the call
332
+ # ends (the MAH-160 fix), then run teardown. A setup-only entrypoint
333
+ # (no live session) or a fake job (simulate_job, which has no live
334
+ # room to disconnect) completes on return instead.
335
+ _is_fake = getattr(ctx, "is_fake_job", None)
336
+ fake_job = bool(_is_fake()) if callable(_is_fake) else False
337
+ if (
338
+ getattr(ctx, "_primary_agent_session", None) is not None
339
+ and not fake_job
340
+ ):
341
+ try:
342
+ await shutdown_fut
343
+ except asyncio.CancelledError:
344
+ if self._status is JobStatus.RUNNING:
345
+ self._status = JobStatus.FAILED
346
+ raise
347
+ await self._teardown(ctx, shutdown_fut.result())
277
348
  if self._status is JobStatus.RUNNING:
278
349
  self._status = JobStatus.SUCCESS
279
- except asyncio.CancelledError:
280
- if self._status is JobStatus.RUNNING:
281
- self._status = JobStatus.FAILED
282
- raise
283
- except Exception:
284
- if self._status is JobStatus.RUNNING:
285
- self._status = JobStatus.FAILED
286
- logger.exception(
287
- "entrypoint raised in CoroutineJobExecutor",
288
- extra=self.logging_extra(),
289
- )
290
350
  finally:
291
351
  if self._session_end_fnc is not None:
292
352
  try:
@@ -296,6 +356,43 @@ class CoroutineJobExecutor:
296
356
  "session_end_fnc raised in CoroutineJobExecutor",
297
357
  extra=self.logging_extra(),
298
358
  )
359
+ if token is not None:
360
+ with contextlib.suppress(Exception):
361
+ _JobContextVar.reset(token)
362
+
363
+ async def _teardown(self, ctx: JobContext, reason: str) -> None:
364
+ """Run the post-shutdown lifecycle (mirrors upstream ``_run_job_task``).
365
+
366
+ Closes the primary ``AgentSession``, runs ``_on_session_end`` and the
367
+ registered shutdown callbacks, cancels pending tasks, and cleans up.
368
+ Every hook is optional so stub contexts in tests and benchmarks are
369
+ tolerated.
370
+ """
371
+ primary = getattr(ctx, "_primary_agent_session", None)
372
+ if primary is not None and hasattr(primary, "aclose"):
373
+ with contextlib.suppress(Exception):
374
+ await primary.aclose()
375
+ _on_session_end = getattr(ctx, "_on_session_end", None)
376
+ if callable(_on_session_end):
377
+ with contextlib.suppress(Exception):
378
+ await _on_session_end()
379
+ for callback in list(getattr(ctx, "_shutdown_callbacks", None) or []):
380
+ try:
381
+ await callback(reason)
382
+ except Exception:
383
+ logger.exception(
384
+ "shutdown callback raised in CoroutineJobExecutor",
385
+ extra=self.logging_extra(),
386
+ )
387
+ pending = list(getattr(ctx, "_pending_tasks", None) or [])
388
+ if pending:
389
+ for task in pending:
390
+ task.cancel()
391
+ await asyncio.gather(*pending, return_exceptions=True)
392
+ _on_cleanup = getattr(ctx, "_on_cleanup", None)
393
+ if callable(_on_cleanup):
394
+ with contextlib.suppress(Exception):
395
+ _on_cleanup()
299
396
 
300
397
  def logging_extra(self) -> dict[str, Any]:
301
398
  return {"executor_id": self._id}