hermes-plugin-teams-voice 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. hermes_plugin_teams_voice-0.1.0/LICENSE +22 -0
  2. hermes_plugin_teams_voice-0.1.0/PKG-INFO +178 -0
  3. hermes_plugin_teams_voice-0.1.0/README.md +151 -0
  4. hermes_plugin_teams_voice-0.1.0/hermes_plugin_teams_voice.egg-info/PKG-INFO +178 -0
  5. hermes_plugin_teams_voice-0.1.0/hermes_plugin_teams_voice.egg-info/SOURCES.txt +54 -0
  6. hermes_plugin_teams_voice-0.1.0/hermes_plugin_teams_voice.egg-info/dependency_links.txt +1 -0
  7. hermes_plugin_teams_voice-0.1.0/hermes_plugin_teams_voice.egg-info/entry_points.txt +2 -0
  8. hermes_plugin_teams_voice-0.1.0/hermes_plugin_teams_voice.egg-info/requires.txt +4 -0
  9. hermes_plugin_teams_voice-0.1.0/hermes_plugin_teams_voice.egg-info/top_level.txt +1 -0
  10. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/__init__.py +70 -0
  11. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/agent_consult.py +88 -0
  12. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/audio.py +94 -0
  13. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/bridge_server.py +268 -0
  14. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/call_session_base.py +136 -0
  15. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/call_tools.py +226 -0
  16. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/cli.py +109 -0
  17. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/config.py +214 -0
  18. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/echo_guard.py +77 -0
  19. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/elevenlabs_tts.py +67 -0
  20. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/expression.py +92 -0
  21. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/group_call_gate.py +97 -0
  22. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/handlers.py +609 -0
  23. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/hmac_auth.py +104 -0
  24. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/meeting.py +170 -0
  25. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/meeting_docx.py +54 -0
  26. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/outbound.py +85 -0
  27. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/plugin.yaml +60 -0
  28. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/protocol.py +288 -0
  29. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/realtime/__init__.py +1 -0
  30. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/realtime/openai_client.py +446 -0
  31. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/realtime_tools.py +141 -0
  32. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/streaming_audio.py +109 -0
  33. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/__init__.py +0 -0
  34. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_audio.py +54 -0
  35. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_call_tools.py +75 -0
  36. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_caps_allowlist.py +49 -0
  37. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_config_sources.py +43 -0
  38. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_contract.py +79 -0
  39. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_cvi_extras.py +54 -0
  40. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_dialogue_depth.py +108 -0
  41. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_expression_viseme.py +36 -0
  42. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_group_call_gate.py +73 -0
  43. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_integration.py +198 -0
  44. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_meeting.py +52 -0
  45. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_protocol_hmac.py +95 -0
  46. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_realtime_config.py +67 -0
  47. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_review_fixes.py +125 -0
  48. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_streaming_audio.py +51 -0
  49. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tests/test_voice_ux.py +45 -0
  50. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/tools.py +47 -0
  51. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/verbal_interrupts.py +106 -0
  52. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/viseme_estimate.py +137 -0
  53. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/vision_budget.py +32 -0
  54. hermes_plugin_teams_voice-0.1.0/hermes_teams_voice/vision_store.py +60 -0
  55. hermes_plugin_teams_voice-0.1.0/pyproject.toml +50 -0
  56. hermes_plugin_teams_voice-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Nous Research
4
+ Copyright (c) 2026 Alaaeldin Elhenawy
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: hermes-plugin-teams-voice
3
+ Version: 0.1.0
4
+ Summary: Microsoft Teams voice/video (Conversational Video Interface) plugin for Hermes Agent
5
+ Author-email: Alaaeldin Elhenawy <alaamh@outlook.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/alaamh/hermes-plugin-teams-voice
8
+ Project-URL: Repository, https://github.com/alaamh/hermes-plugin-teams-voice
9
+ Project-URL: Issues, https://github.com/alaamh/hermes-plugin-teams-voice/issues
10
+ Keywords: hermes,hermes-agent,microsoft-teams,voice,cvi,plugin
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Communications :: Conferencing
19
+ Classifier: Topic :: Multimedia :: Sound/Audio
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: aiohttp>=3.8
24
+ Provides-Extra: numpy
25
+ Requires-Dist: numpy>=1.21; extra == "numpy"
26
+ Dynamic: license-file
27
+
28
+ # hermes-plugin-teams-voice
29
+
30
+ Microsoft Teams **voice/video (Conversational Video Interface)** for **Hermes Agent**,
31
+ packaged as a standalone, pip-installable plugin — install it *on top of* a normal
32
+ Hermes install, no fork required.
33
+
34
+ The plugin (name **`teams_voice`**) hosts the HMAC-authenticated WebSocket bridge that
35
+ a media worker dials into, and drives the call: realtime (OpenAI/Azure
36
+ speech-to-speech) **or** streaming (STT→agent→TTS), camera/screen vision, the avatar
37
+ driver cues (expression / visemes / show-to-caller), group-call etiquette, DTMF,
38
+ bilingual EN/AR, meeting recap/minutes, and SharePoint (OneDrive) file send.
39
+
40
+ ## Install on Hermes
41
+
42
+ Install into the **same Python environment as Hermes** — it discovers the plugin via
43
+ the `hermes_agent.plugins` entry-point and imports it in-process. Target the python
44
+ that runs `hermes` (Linux/macOS `…/venv/bin/python`, Windows `…\venv\Scripts\python.exe`),
45
+ or activate that venv first and drop `--python`.
46
+
47
+ **A — from PyPI (recommended):**
48
+
49
+ ```bash
50
+ uv pip install --python /path/to/hermes/venv/bin/python hermes-plugin-teams-voice
51
+ # or, with the Hermes venv activated: pip install hermes-plugin-teams-voice
52
+ ```
53
+
54
+ **B — from GitHub (latest / pre-release):**
55
+
56
+ ```bash
57
+ uv pip install --python /path/to/hermes/venv/bin/python \
58
+ "git+https://github.com/alaamh/hermes-plugin-teams-voice.git"
59
+ ```
60
+
61
+ **C — from a local checkout (development):**
62
+
63
+ ```bash
64
+ git clone https://github.com/alaamh/hermes-plugin-teams-voice.git
65
+ uv pip install --python /path/to/hermes/venv/bin/python -e ./hermes-plugin-teams-voice
66
+ ```
67
+
68
+ > Installing into the wrong environment means Hermes won't see the plugin.
69
+ > Faster audio (optional): add the `numpy` extra, e.g. `hermes-plugin-teams-voice[numpy]`.
70
+
71
+ ## Enable + run
72
+
73
+ ```bash
74
+ hermes plugins list # confirm: teams_voice (source: entrypoint)
75
+ hermes plugins enable teams_voice # entry-point plugins are opt-in
76
+ hermes teams-voice serve --handler realtime # voice bridge; also: streaming | echo | logging
77
+ hermes gateway run # (separately) the Teams chat plane + cron
78
+ ```
79
+
80
+ ## Configure
81
+
82
+ Config lives in Hermes's own files (this package ships none). Non-secret settings go
83
+ in **`config.yaml`**; secrets go in **`.env`** and are referenced with `${VAR}`.
84
+
85
+ **`~/.hermes/config.yaml`** — under `plugins.entries.teams_voice.config`:
86
+
87
+ ```yaml
88
+ plugins:
89
+ enabled:
90
+ - teams_voice # entry-point plugins are opt-in
91
+ entries:
92
+ teams_voice:
93
+ config:
94
+ shared_secret: ${TEAMS_VOICE_SHARED_SECRET} # MUST byte-match the worker's secret
95
+ host: 127.0.0.1
96
+ port: 8443 # voice WS the worker dials: ws://host:port/voice/msteams/stream
97
+ share_point_site_id: ${TEAMS_SHAREPOINT_SITE_ID} # optional: attach files/minutes to the chat
98
+ meeting_recap: true # optional: post minutes at call end
99
+ allowlist: [] # optional: caller AAD object ids (empty = allow all)
100
+ session_scope: per-call # per-call | per-thread | per-aad
101
+ # Realtime (speech-to-speech) brain — Azure OpenAI Realtime:
102
+ realtime:
103
+ backend: azure # azure | openai
104
+ azure_endpoint: https://<your-azure-resource>.cognitiveservices.azure.com
105
+ azure_deployment: gpt-realtime
106
+ azure_api_version: 2025-04-01-preview
107
+ voice: cedar
108
+ api_key: ${AZURE_FOUNDRY_API_KEY}
109
+ vad_threshold: 0.5
110
+ prefix_padding_ms: 300
111
+ silence_duration_ms: 500
112
+ ```
113
+
114
+ > **Public OpenAI** instead of Azure: set `backend: openai`, `model: gpt-realtime`,
115
+ > `api_key: ${OPENAI_API_KEY}`, and drop the `azure_*` keys.
116
+ > **Streaming** (STT→agent→TTS) instead of realtime: omit the `realtime:` block and run
117
+ > `hermes teams-voice serve --handler streaming` (needs `ffmpeg` on PATH).
118
+
119
+ **`~/.hermes/.env`** — the secrets referenced above (plus Teams chat-plane creds if you
120
+ also run `hermes gateway run`):
121
+
122
+ ```bash
123
+ # Voice bridge
124
+ TEAMS_VOICE_SHARED_SECRET=<same value as the media worker's shared secret>
125
+ AZURE_FOUNDRY_API_KEY=<azure-openai-key> # or OPENAI_API_KEY for public OpenAI
126
+ TEAMS_SHAREPOINT_SITE_ID=<host>,<siteGuid>,<webGuid> # optional (needs Graph Sites.ReadWrite.All)
127
+
128
+ # Teams chat plane (platforms/teams) — only if you run the gateway:
129
+ TEAMS_CLIENT_ID=<bot-app-id>
130
+ TEAMS_CLIENT_SECRET=<bot-app-secret>
131
+ TEAMS_TENANT_ID=<azure-ad-tenant-id>
132
+ ```
133
+
134
+ `shared_secret` **must byte-match** the media worker's shared secret or the HMAC
135
+ handshake fails. Full key reference (every option, streaming mode, DLP/audit, the
136
+ required Microsoft Graph permissions): [`hermes_teams_voice/README.md`](hermes_teams_voice/README.md).
137
+
138
+ ## Upgrade / uninstall
139
+
140
+ ```bash
141
+ uv pip install --upgrade hermes-plugin-teams-voice
142
+ uv pip uninstall hermes-plugin-teams-voice # then it disappears from `hermes plugins list`
143
+ ```
144
+
145
+ ## How it loads
146
+
147
+ Hermes discovers pip plugins via the `hermes_agent.plugins` entry-point group. This
148
+ package exposes:
149
+
150
+ ```toml
151
+ [project.entry-points."hermes_agent.plugins"]
152
+ teams_voice = "hermes_teams_voice"
153
+ ```
154
+
155
+ Hermes imports `hermes_teams_voice` and calls its `register(ctx)` — registering the
156
+ `teams-voice` CLI, the status tool, and the session hook. Entry-point plugins are
157
+ opt-in, so `teams_voice` must be in `plugins.enabled` (`hermes plugins enable` does this).
158
+
159
+ ## Requirements
160
+
161
+ - A working **Hermes Agent** install (the host; not a PyPI package).
162
+ - Python ≥ 3.10 and `aiohttp`; `ffmpeg` on PATH for streaming-mode TTS decode.
163
+ - A media worker that bridges the live Teams call audio/video into this plugin over the HMAC WebSocket (open-source, separate repo).
164
+
165
+ ## Relationship to the bundled plugin
166
+
167
+ This is the same code as the in-tree `plugins/teams_voice` plugin, repackaged for pip
168
+ distribution so you don't have to fork Hermes. Install it on **vanilla** Hermes; don't
169
+ also keep a bundled `teams_voice` (same name → the entry-point would shadow it).
170
+
171
+ - **Voice/CVI** works fully on vanilla Hermes.
172
+ - **Chat-plane governance + SharePoint file attach** depend on the enhanced
173
+ `plugins/platforms/teams` adapter; without it the plugin **degrades gracefully**
174
+ (e.g. meeting minutes post as text instead of a SharePoint file card).
175
+
176
+ ## License
177
+
178
+ MIT (matches Hermes Agent). Created by Alaaeldin Elhenawy — Dubai, UAE.
@@ -0,0 +1,151 @@
1
+ # hermes-plugin-teams-voice
2
+
3
+ Microsoft Teams **voice/video (Conversational Video Interface)** for **Hermes Agent**,
4
+ packaged as a standalone, pip-installable plugin — install it *on top of* a normal
5
+ Hermes install, no fork required.
6
+
7
+ The plugin (name **`teams_voice`**) hosts the HMAC-authenticated WebSocket bridge that
8
+ a media worker dials into, and drives the call: realtime (OpenAI/Azure
9
+ speech-to-speech) **or** streaming (STT→agent→TTS), camera/screen vision, the avatar
10
+ driver cues (expression / visemes / show-to-caller), group-call etiquette, DTMF,
11
+ bilingual EN/AR, meeting recap/minutes, and SharePoint (OneDrive) file send.
12
+
13
+ ## Install on Hermes
14
+
15
+ Install into the **same Python environment as Hermes** — it discovers the plugin via
16
+ the `hermes_agent.plugins` entry-point and imports it in-process. Target the python
17
+ that runs `hermes` (Linux/macOS `…/venv/bin/python`, Windows `…\venv\Scripts\python.exe`),
18
+ or activate that venv first and drop `--python`.
19
+
20
+ **A — from PyPI (recommended):**
21
+
22
+ ```bash
23
+ uv pip install --python /path/to/hermes/venv/bin/python hermes-plugin-teams-voice
24
+ # or, with the Hermes venv activated: pip install hermes-plugin-teams-voice
25
+ ```
26
+
27
+ **B — from GitHub (latest / pre-release):**
28
+
29
+ ```bash
30
+ uv pip install --python /path/to/hermes/venv/bin/python \
31
+ "git+https://github.com/alaamh/hermes-plugin-teams-voice.git"
32
+ ```
33
+
34
+ **C — from a local checkout (development):**
35
+
36
+ ```bash
37
+ git clone https://github.com/alaamh/hermes-plugin-teams-voice.git
38
+ uv pip install --python /path/to/hermes/venv/bin/python -e ./hermes-plugin-teams-voice
39
+ ```
40
+
41
+ > Installing into the wrong environment means Hermes won't see the plugin.
42
+ > Faster audio (optional): add the `numpy` extra, e.g. `hermes-plugin-teams-voice[numpy]`.
43
+
44
+ ## Enable + run
45
+
46
+ ```bash
47
+ hermes plugins list # confirm: teams_voice (source: entrypoint)
48
+ hermes plugins enable teams_voice # entry-point plugins are opt-in
49
+ hermes teams-voice serve --handler realtime # voice bridge; also: streaming | echo | logging
50
+ hermes gateway run # (separately) the Teams chat plane + cron
51
+ ```
52
+
53
+ ## Configure
54
+
55
+ Config lives in Hermes's own files (this package ships none). Non-secret settings go
56
+ in **`config.yaml`**; secrets go in **`.env`** and are referenced with `${VAR}`.
57
+
58
+ **`~/.hermes/config.yaml`** — under `plugins.entries.teams_voice.config`:
59
+
60
+ ```yaml
61
+ plugins:
62
+ enabled:
63
+ - teams_voice # entry-point plugins are opt-in
64
+ entries:
65
+ teams_voice:
66
+ config:
67
+ shared_secret: ${TEAMS_VOICE_SHARED_SECRET} # MUST byte-match the worker's secret
68
+ host: 127.0.0.1
69
+ port: 8443 # voice WS the worker dials: ws://host:port/voice/msteams/stream
70
+ share_point_site_id: ${TEAMS_SHAREPOINT_SITE_ID} # optional: attach files/minutes to the chat
71
+ meeting_recap: true # optional: post minutes at call end
72
+ allowlist: [] # optional: caller AAD object ids (empty = allow all)
73
+ session_scope: per-call # per-call | per-thread | per-aad
74
+ # Realtime (speech-to-speech) brain — Azure OpenAI Realtime:
75
+ realtime:
76
+ backend: azure # azure | openai
77
+ azure_endpoint: https://<your-azure-resource>.cognitiveservices.azure.com
78
+ azure_deployment: gpt-realtime
79
+ azure_api_version: 2025-04-01-preview
80
+ voice: cedar
81
+ api_key: ${AZURE_FOUNDRY_API_KEY}
82
+ vad_threshold: 0.5
83
+ prefix_padding_ms: 300
84
+ silence_duration_ms: 500
85
+ ```
86
+
87
+ > **Public OpenAI** instead of Azure: set `backend: openai`, `model: gpt-realtime`,
88
+ > `api_key: ${OPENAI_API_KEY}`, and drop the `azure_*` keys.
89
+ > **Streaming** (STT→agent→TTS) instead of realtime: omit the `realtime:` block and run
90
+ > `hermes teams-voice serve --handler streaming` (needs `ffmpeg` on PATH).
91
+
92
+ **`~/.hermes/.env`** — the secrets referenced above (plus Teams chat-plane creds if you
93
+ also run `hermes gateway run`):
94
+
95
+ ```bash
96
+ # Voice bridge
97
+ TEAMS_VOICE_SHARED_SECRET=<same value as the media worker's shared secret>
98
+ AZURE_FOUNDRY_API_KEY=<azure-openai-key> # or OPENAI_API_KEY for public OpenAI
99
+ TEAMS_SHAREPOINT_SITE_ID=<host>,<siteGuid>,<webGuid> # optional (needs Graph Sites.ReadWrite.All)
100
+
101
+ # Teams chat plane (platforms/teams) — only if you run the gateway:
102
+ TEAMS_CLIENT_ID=<bot-app-id>
103
+ TEAMS_CLIENT_SECRET=<bot-app-secret>
104
+ TEAMS_TENANT_ID=<azure-ad-tenant-id>
105
+ ```
106
+
107
+ `shared_secret` **must byte-match** the media worker's shared secret or the HMAC
108
+ handshake fails. Full key reference (every option, streaming mode, DLP/audit, the
109
+ required Microsoft Graph permissions): [`hermes_teams_voice/README.md`](hermes_teams_voice/README.md).
110
+
111
+ ## Upgrade / uninstall
112
+
113
+ ```bash
114
+ uv pip install --upgrade hermes-plugin-teams-voice
115
+ uv pip uninstall hermes-plugin-teams-voice # then it disappears from `hermes plugins list`
116
+ ```
117
+
118
+ ## How it loads
119
+
120
+ Hermes discovers pip plugins via the `hermes_agent.plugins` entry-point group. This
121
+ package exposes:
122
+
123
+ ```toml
124
+ [project.entry-points."hermes_agent.plugins"]
125
+ teams_voice = "hermes_teams_voice"
126
+ ```
127
+
128
+ Hermes imports `hermes_teams_voice` and calls its `register(ctx)` — registering the
129
+ `teams-voice` CLI, the status tool, and the session hook. Entry-point plugins are
130
+ opt-in, so `teams_voice` must be in `plugins.enabled` (`hermes plugins enable` does this).
131
+
132
+ ## Requirements
133
+
134
+ - A working **Hermes Agent** install (the host; not a PyPI package).
135
+ - Python ≥ 3.10 and `aiohttp`; `ffmpeg` on PATH for streaming-mode TTS decode.
136
+ - A media worker that bridges the live Teams call audio/video into this plugin over the HMAC WebSocket (open-source, separate repo).
137
+
138
+ ## Relationship to the bundled plugin
139
+
140
+ This is the same code as the in-tree `plugins/teams_voice` plugin, repackaged for pip
141
+ distribution so you don't have to fork Hermes. Install it on **vanilla** Hermes; don't
142
+ also keep a bundled `teams_voice` (same name → the entry-point would shadow it).
143
+
144
+ - **Voice/CVI** works fully on vanilla Hermes.
145
+ - **Chat-plane governance + SharePoint file attach** depend on the enhanced
146
+ `plugins/platforms/teams` adapter; without it the plugin **degrades gracefully**
147
+ (e.g. meeting minutes post as text instead of a SharePoint file card).
148
+
149
+ ## License
150
+
151
+ MIT (matches Hermes Agent). Created by Alaaeldin Elhenawy — Dubai, UAE.
@@ -0,0 +1,178 @@
1
+ Metadata-Version: 2.4
2
+ Name: hermes-plugin-teams-voice
3
+ Version: 0.1.0
4
+ Summary: Microsoft Teams voice/video (Conversational Video Interface) plugin for Hermes Agent
5
+ Author-email: Alaaeldin Elhenawy <alaamh@outlook.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/alaamh/hermes-plugin-teams-voice
8
+ Project-URL: Repository, https://github.com/alaamh/hermes-plugin-teams-voice
9
+ Project-URL: Issues, https://github.com/alaamh/hermes-plugin-teams-voice/issues
10
+ Keywords: hermes,hermes-agent,microsoft-teams,voice,cvi,plugin
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Communications :: Conferencing
19
+ Classifier: Topic :: Multimedia :: Sound/Audio
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: aiohttp>=3.8
24
+ Provides-Extra: numpy
25
+ Requires-Dist: numpy>=1.21; extra == "numpy"
26
+ Dynamic: license-file
27
+
28
+ # hermes-plugin-teams-voice
29
+
30
+ Microsoft Teams **voice/video (Conversational Video Interface)** for **Hermes Agent**,
31
+ packaged as a standalone, pip-installable plugin — install it *on top of* a normal
32
+ Hermes install, no fork required.
33
+
34
+ The plugin (name **`teams_voice`**) hosts the HMAC-authenticated WebSocket bridge that
35
+ a media worker dials into, and drives the call: realtime (OpenAI/Azure
36
+ speech-to-speech) **or** streaming (STT→agent→TTS), camera/screen vision, the avatar
37
+ driver cues (expression / visemes / show-to-caller), group-call etiquette, DTMF,
38
+ bilingual EN/AR, meeting recap/minutes, and SharePoint (OneDrive) file send.
39
+
40
+ ## Install on Hermes
41
+
42
+ Install into the **same Python environment as Hermes** — it discovers the plugin via
43
+ the `hermes_agent.plugins` entry-point and imports it in-process. Target the python
44
+ that runs `hermes` (Linux/macOS `…/venv/bin/python`, Windows `…\venv\Scripts\python.exe`),
45
+ or activate that venv first and drop `--python`.
46
+
47
+ **A — from PyPI (recommended):**
48
+
49
+ ```bash
50
+ uv pip install --python /path/to/hermes/venv/bin/python hermes-plugin-teams-voice
51
+ # or, with the Hermes venv activated: pip install hermes-plugin-teams-voice
52
+ ```
53
+
54
+ **B — from GitHub (latest / pre-release):**
55
+
56
+ ```bash
57
+ uv pip install --python /path/to/hermes/venv/bin/python \
58
+ "git+https://github.com/alaamh/hermes-plugin-teams-voice.git"
59
+ ```
60
+
61
+ **C — from a local checkout (development):**
62
+
63
+ ```bash
64
+ git clone https://github.com/alaamh/hermes-plugin-teams-voice.git
65
+ uv pip install --python /path/to/hermes/venv/bin/python -e ./hermes-plugin-teams-voice
66
+ ```
67
+
68
+ > Installing into the wrong environment means Hermes won't see the plugin.
69
+ > Faster audio (optional): add the `numpy` extra, e.g. `hermes-plugin-teams-voice[numpy]`.
70
+
71
+ ## Enable + run
72
+
73
+ ```bash
74
+ hermes plugins list # confirm: teams_voice (source: entrypoint)
75
+ hermes plugins enable teams_voice # entry-point plugins are opt-in
76
+ hermes teams-voice serve --handler realtime # voice bridge; also: streaming | echo | logging
77
+ hermes gateway run # (separately) the Teams chat plane + cron
78
+ ```
79
+
80
+ ## Configure
81
+
82
+ Config lives in Hermes's own files (this package ships none). Non-secret settings go
83
+ in **`config.yaml`**; secrets go in **`.env`** and are referenced with `${VAR}`.
84
+
85
+ **`~/.hermes/config.yaml`** — under `plugins.entries.teams_voice.config`:
86
+
87
+ ```yaml
88
+ plugins:
89
+ enabled:
90
+ - teams_voice # entry-point plugins are opt-in
91
+ entries:
92
+ teams_voice:
93
+ config:
94
+ shared_secret: ${TEAMS_VOICE_SHARED_SECRET} # MUST byte-match the worker's secret
95
+ host: 127.0.0.1
96
+ port: 8443 # voice WS the worker dials: ws://host:port/voice/msteams/stream
97
+ share_point_site_id: ${TEAMS_SHAREPOINT_SITE_ID} # optional: attach files/minutes to the chat
98
+ meeting_recap: true # optional: post minutes at call end
99
+ allowlist: [] # optional: caller AAD object ids (empty = allow all)
100
+ session_scope: per-call # per-call | per-thread | per-aad
101
+ # Realtime (speech-to-speech) brain — Azure OpenAI Realtime:
102
+ realtime:
103
+ backend: azure # azure | openai
104
+ azure_endpoint: https://<your-azure-resource>.cognitiveservices.azure.com
105
+ azure_deployment: gpt-realtime
106
+ azure_api_version: 2025-04-01-preview
107
+ voice: cedar
108
+ api_key: ${AZURE_FOUNDRY_API_KEY}
109
+ vad_threshold: 0.5
110
+ prefix_padding_ms: 300
111
+ silence_duration_ms: 500
112
+ ```
113
+
114
+ > **Public OpenAI** instead of Azure: set `backend: openai`, `model: gpt-realtime`,
115
+ > `api_key: ${OPENAI_API_KEY}`, and drop the `azure_*` keys.
116
+ > **Streaming** (STT→agent→TTS) instead of realtime: omit the `realtime:` block and run
117
+ > `hermes teams-voice serve --handler streaming` (needs `ffmpeg` on PATH).
118
+
119
+ **`~/.hermes/.env`** — the secrets referenced above (plus Teams chat-plane creds if you
120
+ also run `hermes gateway run`):
121
+
122
+ ```bash
123
+ # Voice bridge
124
+ TEAMS_VOICE_SHARED_SECRET=<same value as the media worker's shared secret>
125
+ AZURE_FOUNDRY_API_KEY=<azure-openai-key> # or OPENAI_API_KEY for public OpenAI
126
+ TEAMS_SHAREPOINT_SITE_ID=<host>,<siteGuid>,<webGuid> # optional (needs Graph Sites.ReadWrite.All)
127
+
128
+ # Teams chat plane (platforms/teams) — only if you run the gateway:
129
+ TEAMS_CLIENT_ID=<bot-app-id>
130
+ TEAMS_CLIENT_SECRET=<bot-app-secret>
131
+ TEAMS_TENANT_ID=<azure-ad-tenant-id>
132
+ ```
133
+
134
+ `shared_secret` **must byte-match** the media worker's shared secret or the HMAC
135
+ handshake fails. Full key reference (every option, streaming mode, DLP/audit, the
136
+ required Microsoft Graph permissions): [`hermes_teams_voice/README.md`](hermes_teams_voice/README.md).
137
+
138
+ ## Upgrade / uninstall
139
+
140
+ ```bash
141
+ uv pip install --upgrade hermes-plugin-teams-voice
142
+ uv pip uninstall hermes-plugin-teams-voice # then it disappears from `hermes plugins list`
143
+ ```
144
+
145
+ ## How it loads
146
+
147
+ Hermes discovers pip plugins via the `hermes_agent.plugins` entry-point group. This
148
+ package exposes:
149
+
150
+ ```toml
151
+ [project.entry-points."hermes_agent.plugins"]
152
+ teams_voice = "hermes_teams_voice"
153
+ ```
154
+
155
+ Hermes imports `hermes_teams_voice` and calls its `register(ctx)` — registering the
156
+ `teams-voice` CLI, the status tool, and the session hook. Entry-point plugins are
157
+ opt-in, so `teams_voice` must be in `plugins.enabled` (`hermes plugins enable` does this).
158
+
159
+ ## Requirements
160
+
161
+ - A working **Hermes Agent** install (the host; not a PyPI package).
162
+ - Python ≥ 3.10 and `aiohttp`; `ffmpeg` on PATH for streaming-mode TTS decode.
163
+ - A media worker that bridges the live Teams call audio/video into this plugin over the HMAC WebSocket (open-source, separate repo).
164
+
165
+ ## Relationship to the bundled plugin
166
+
167
+ This is the same code as the in-tree `plugins/teams_voice` plugin, repackaged for pip
168
+ distribution so you don't have to fork Hermes. Install it on **vanilla** Hermes; don't
169
+ also keep a bundled `teams_voice` (same name → the entry-point would shadow it).
170
+
171
+ - **Voice/CVI** works fully on vanilla Hermes.
172
+ - **Chat-plane governance + SharePoint file attach** depend on the enhanced
173
+ `plugins/platforms/teams` adapter; without it the plugin **degrades gracefully**
174
+ (e.g. meeting minutes post as text instead of a SharePoint file card).
175
+
176
+ ## License
177
+
178
+ MIT (matches Hermes Agent). Created by Alaaeldin Elhenawy — Dubai, UAE.
@@ -0,0 +1,54 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ hermes_plugin_teams_voice.egg-info/PKG-INFO
5
+ hermes_plugin_teams_voice.egg-info/SOURCES.txt
6
+ hermes_plugin_teams_voice.egg-info/dependency_links.txt
7
+ hermes_plugin_teams_voice.egg-info/entry_points.txt
8
+ hermes_plugin_teams_voice.egg-info/requires.txt
9
+ hermes_plugin_teams_voice.egg-info/top_level.txt
10
+ hermes_teams_voice/__init__.py
11
+ hermes_teams_voice/agent_consult.py
12
+ hermes_teams_voice/audio.py
13
+ hermes_teams_voice/bridge_server.py
14
+ hermes_teams_voice/call_session_base.py
15
+ hermes_teams_voice/call_tools.py
16
+ hermes_teams_voice/cli.py
17
+ hermes_teams_voice/config.py
18
+ hermes_teams_voice/echo_guard.py
19
+ hermes_teams_voice/elevenlabs_tts.py
20
+ hermes_teams_voice/expression.py
21
+ hermes_teams_voice/group_call_gate.py
22
+ hermes_teams_voice/handlers.py
23
+ hermes_teams_voice/hmac_auth.py
24
+ hermes_teams_voice/meeting.py
25
+ hermes_teams_voice/meeting_docx.py
26
+ hermes_teams_voice/outbound.py
27
+ hermes_teams_voice/plugin.yaml
28
+ hermes_teams_voice/protocol.py
29
+ hermes_teams_voice/realtime_tools.py
30
+ hermes_teams_voice/streaming_audio.py
31
+ hermes_teams_voice/tools.py
32
+ hermes_teams_voice/verbal_interrupts.py
33
+ hermes_teams_voice/viseme_estimate.py
34
+ hermes_teams_voice/vision_budget.py
35
+ hermes_teams_voice/vision_store.py
36
+ hermes_teams_voice/realtime/__init__.py
37
+ hermes_teams_voice/realtime/openai_client.py
38
+ hermes_teams_voice/tests/__init__.py
39
+ hermes_teams_voice/tests/test_audio.py
40
+ hermes_teams_voice/tests/test_call_tools.py
41
+ hermes_teams_voice/tests/test_caps_allowlist.py
42
+ hermes_teams_voice/tests/test_config_sources.py
43
+ hermes_teams_voice/tests/test_contract.py
44
+ hermes_teams_voice/tests/test_cvi_extras.py
45
+ hermes_teams_voice/tests/test_dialogue_depth.py
46
+ hermes_teams_voice/tests/test_expression_viseme.py
47
+ hermes_teams_voice/tests/test_group_call_gate.py
48
+ hermes_teams_voice/tests/test_integration.py
49
+ hermes_teams_voice/tests/test_meeting.py
50
+ hermes_teams_voice/tests/test_protocol_hmac.py
51
+ hermes_teams_voice/tests/test_realtime_config.py
52
+ hermes_teams_voice/tests/test_review_fixes.py
53
+ hermes_teams_voice/tests/test_streaming_audio.py
54
+ hermes_teams_voice/tests/test_voice_ux.py
@@ -0,0 +1,2 @@
1
+ [hermes_agent.plugins]
2
+ teams_voice = hermes_teams_voice
@@ -0,0 +1,4 @@
1
+ aiohttp>=3.8
2
+
3
+ [numpy]
4
+ numpy>=1.21
@@ -0,0 +1,70 @@
1
+ """teams_voice plugin — Microsoft Teams real-time voice/video (CVI) bridge driver.
2
+
3
+ Hosts an HMAC-authenticated WebSocket the companion Windows .NET media worker
4
+ dials into, and drives the call: dialogue (realtime
5
+ or streaming), perception (camera/screen vision), and the avatar rendering cues
6
+ (expression / visemes / show-to-caller). The worker renders the NV12 avatar tile;
7
+ this plugin sends the drivers.
8
+
9
+ Chat-plane integration (Teams messages, message actions, meeting-recap posting)
10
+ is handled by the existing ``plugins/platforms/teams`` adapter — this plugin is
11
+ the *media/voice* half and deliberately does not duplicate it.
12
+
13
+ Status: implemented. Realtime (OpenAI/Azure speech-to-speech) and streaming
14
+ (STT->agent->TTS) call modes; vision, tools (consult/agent_task/look_at_screen/
15
+ show_to_caller/call_me_back/post_meeting_minutes), group gate, verbal interrupts,
16
+ DTMF, bilingual, meeting recap. The Windows .NET media worker renders the avatar.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import logging
22
+
23
+ from .cli import register_cli as _register_cli
24
+ from .cli import teams_voice_command as _teams_voice_command
25
+ from .tools import (
26
+ TEAMS_VOICE_STATUS_SCHEMA,
27
+ check_requirements,
28
+ handle_teams_voice_status,
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ def _on_session_end(**_kwargs) -> None:
35
+ """Best-effort hook placeholder.
36
+
37
+ The bridge runs as its own server process, so there is nothing call-scoped to
38
+ tear down on agent-session end today. Kept registered so the lifecycle wiring
39
+ is stable as the realtime brain lands.
40
+ """
41
+ return None
42
+
43
+
44
+ def register(ctx) -> None:
45
+ """Plugin entry point — register the status tool, CLI, and lifecycle hook.
46
+
47
+ Called once by the plugin loader when ``teams_voice`` is enabled via
48
+ ``plugins.enabled`` in config.yaml.
49
+ """
50
+ ctx.register_tool(
51
+ name="teams_voice_status",
52
+ toolset="teams_voice",
53
+ schema=TEAMS_VOICE_STATUS_SCHEMA,
54
+ handler=handle_teams_voice_status,
55
+ check_fn=check_requirements,
56
+ emoji="📞",
57
+ )
58
+
59
+ ctx.register_cli_command(
60
+ name="teams-voice",
61
+ help="Microsoft Teams voice/video (CVI) bridge (serve, status)",
62
+ setup_fn=_register_cli,
63
+ handler_fn=_teams_voice_command,
64
+ description=(
65
+ "Run the HMAC-authenticated bridge the Teams .NET media worker "
66
+ "connects to. See: hermes teams-voice status"
67
+ ),
68
+ )
69
+
70
+ ctx.register_hook("on_session_end", _on_session_end)