@llblab/pi-telegram 0.7.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -23,7 +23,7 @@
23
23
  ## 3. Project Topology
24
24
 
25
25
  - `/index.ts`: Main extension entrypoint and runtime composition layer for the bridge
26
- - `/lib/*.ts`: Flat domain modules for reusable runtime logic. Favor domain files such as queueing/runtime, replies, polling, updates, attachments, commands, lifecycle hooks, prompts, prompt-templates, pi SDK adapter, Telegram API, config, turns, media, setup, rendering, app menu, menu-model, menu-thinking, menu-queue, status/model-resolution support, and other cohesive bridge subsystems; use `shared` only when a type or constant truly spans multiple domains
26
+ - `/lib/*.ts`: Flat domain modules for reusable runtime logic. Favor domain files such as queueing/runtime, replies, polling, updates, outbound-attachments, commands, lifecycle hooks, prompts, prompt-templates, pi SDK adapter, Telegram API, config, turns, media, setup, rendering, app menu, menu-model, menu-thinking, menu-queue, status/model-resolution support, and other cohesive bridge subsystems; use `shared` only when a type or constant truly spans multiple domains
27
27
  - `/tests/*.test.ts`: Domain-mirrored regression suites that follow the same flat naming as `/lib`
28
28
  - `/docs/README.md`: Documentation index for technical project docs
29
29
  - `/docs/architecture.md`: Runtime and subsystem overview for the bridge
@@ -100,8 +100,8 @@
100
100
  The canonical detailed ownership map lives in [`docs/architecture.md`](./docs/architecture.md). Keep this section as a compact agent-facing index, not a second copy of the full map.
101
101
 
102
102
  - Scheduling and lifecycle: `queue`, `runtime`, `lifecycle`, `locks`
103
- - Telegram transport and inbound flow: `api`, `polling`, `updates`, `routing`, `media`, `turns`, `attachment-handlers`, `config`, `setup`
104
- - Response surfaces: `preview`, `replies`, `rendering`, `keyboard`, `attachments`, `outbound-handlers`, `status`
103
+ - Telegram transport and inbound flow: `api`, `polling`, `updates`, `routing`, `media`, `turns`, `inbound-handlers`, `config`, `setup`
104
+ - Response surfaces: `preview`, `replies`, `rendering`, `keyboard`, `outbound-attachments`, `outbound-handlers`, `status`
105
105
  - Controls and application menu UI: `commands`, `menu`, `menu-model`, `menu-thinking`, `menu-status`, `menu-queue`, `model`, `prompts`
106
106
  - Pi SDK boundary: `pi` owns direct pi imports and bound extension API ports
107
107
 
@@ -131,9 +131,9 @@ The canonical detailed ownership map lives in [`docs/architecture.md`](./docs/ar
131
131
  - For `/telegram-setup`, prefer the locally saved bot token over environment variables on repeat setup runs; env vars are the bootstrap path when no local token exists, and persisted `telegram.json` writes must remain atomic plus private because status/setup/polling paths may read it concurrently
132
132
  - Command help plus prompt-template commands and status/model/thinking/queue controls are driven through `/start`'s Telegram inline application menu and callback queries; the Queue button shows the queued-item count, model-menu scope/pagination controls stay at the top under Main menu, the model pagination indicator opens a compact page picker, and thinking-menu text stays a compact heading because the current level is marked by button state; `/status`, `/model`, `/thinking`, and `/queue` are hidden compatibility shortcuts
133
133
  - Shared inline-keyboard structure belongs to `keyboard`; application-control button labels, callback data, and callback behavior stay in `menu`/`menu-model`/`menu-thinking`/`menu-status`/`menu-queue` while core queue mechanics stay in `queue`
134
- - Inbound files may become π image inputs or configured attachment-handler text before queueing; outbound files must flow through `telegram_attach`
134
+ - Inbound text/media may be transformed through configured `inboundHandlers` before queueing; legacy `attachmentHandlers` are deprecated compatibility aliases appended after `inboundHandlers`; outbound files must flow through `telegram_attach`
135
135
  - Long Telegram text split recovery belongs to `text-groups`: keep it conservative, short-debounced, same chat/user/message-id contiguous, and gated by near-limit human text so normal rapid follow-ups and slash commands stay separate
136
- - Inbound attachment handlers and command-backed outbound handlers use command templates as the standard integration contract; built-in outbound buttons use inline keyboards plus callback routing because no external command execution is needed
136
+ - Inbound handlers and command-backed outbound handlers use command templates as the standard integration contract; built-in outbound buttons use inline keyboards plus callback routing because no external command execution is needed
137
137
  - Telegram prompt-template commands are discovered from π slash commands with `source: "prompt"`; π template names are mapped to Bot API-compatible aliases (`fix-tests` → `/fix_tests`), aliases that conflict with built-in bridge commands or hidden shortcuts are not displayed, prompt-template aliases stay out of the Telegram bot command menu, and the bridge expands template files before queueing because extension-originated `sendUserMessage()` bypasses π's interactive template expansion
138
138
  - Unknown callback data not owned by pi-telegram prefixes (`tgbtn:`, `menu:`, `model:`, `thinking:`, `status:`, `queue:`) may be forwarded as `[callback] <data>` after built-in handlers decline it; external extensions should follow `docs/callback-namespaces.md` and must not poll the same bot independently
139
139
  - Command templates stay compact and shell-free: no `command` field, no shell execution, inline defaults are allowed as `{name=default}`, `template` may be a string or an ordered composition array, only `args`/`defaults` inherit into leaves, top-level `timeout` wraps composed sequences, stdout pipes to the next step's stdin by default, and multi-step work should use `template: [...]` rather than provider-specific fields; `pipe` is only a legacy local alias
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.8.0: Handler Bus
4
+
5
+ - `[Inbound Handlers]` Added `inboundHandlers` as the provider-neutral Telegram → π transformation bus. Raw Telegram text can match `type: "text"`, `mime: "text/plain"`, or `mime: "text/*"`, receives text on stdin and `{text}`, and non-empty stdout replaces the prompt text before queueing; media/file handlers keep the existing `{file}`/`{mime}`/`{type}` behavior with optional independent selectors. Impact: translation, normalization, STT, OCR, and file extraction can share one command-template integration model.
6
+ - `[Text Attachments]` Attached `text/plain`/`text/*` files now have a built-in fail-open reader that injects UTF-8 content into `[outputs]` when no configured handler produced output. Impact: ordinary `.txt` and other text documents become readable to π without custom extraction config.
7
+ - `[Inbound Domain]` Renamed the implementation module and mirrored regression suite from `attachment-handlers` to `inbound-handlers`. Impact: file names now match the unified text/media preprocessing domain while legacy `attachmentHandlers` config remains supported.
8
+ - `[Outbound Attachment Domain]` Renamed the outbound file-delivery module and mirrored regression suite from `attachments` to `outbound-attachments`. Impact: `telegram_attach` ownership now reads as an outbound domain beside `outbound-handlers` while behavior stays unchanged.
9
+ - `[Inbound Docs]` Consolidated the deprecated `docs/attachment-handlers.md` page into `docs/inbound-handlers.md` and removed the old page. Impact: the inbound bus docs are now the canonical home for legacy `attachmentHandlers`, placeholders, ordered fallbacks, and prompt-output behavior without split documentation.
10
+ - `[Attachment Handlers]` `attachmentHandlers` is now deprecated but remains supported as a compatibility alias appended after `inboundHandlers`. Impact: existing voice/file preprocessing configs keep working while new configs can move to the unified inbound bus.
11
+ - `[Outbound Handlers]` Added `outboundHandlers` support for `type: "text"`; final text/Markdown replies can be transformed before Telegram rendering and delivery. Impact: translation-back or other outbound text normalization can be configured without hard-coded providers.
12
+ - `[Outbound Text Preview]` Finalized rich preview messages now pass through outbound `type: "text"` handlers before Telegram edit/delivery, with expanded README/docs examples for machine translation, final text rewrites, composed translated voice-over, and inline-button compatibility. Impact: outbound text transforms apply even when the final answer reuses an existing preview instead of falling back to a separate send path, while inline buttons remain attached and visible labels are transformed without changing callback prompts.
13
+ - `[Package]` Bumped package metadata to `0.8.0` through npm and kept the lockfile in sync.
14
+
3
15
  ## 0.7.2: Split Text Coalescing Hotfix
4
16
 
5
17
  - `[Text Coalescing]` Telegram text messages that look like automatic splits of one near-limit human message are now short-debounced and forwarded to π as one prompt, using a conservative 3600-character near-limit threshold. Commands, bot messages, media groups, captions, non-contiguous messages, and normal short follow-ups bypass coalescing. Impact: long pasted logs/prompts are less likely to arrive as separate π turns when Telegram chunks them.
@@ -78,7 +90,7 @@
78
90
  ## 0.5.0: Command Templates, Domain Boundaries & Queue UX
79
91
 
80
92
  - `[Queue UX]` Telegram `/status` and `/model` now execute immediately, post-agent-end queue dispatch retries after pi settles idle state, and the status bar shows specific busy labels (`active`, `dispatching`, `queued`, `tool running`, `model`). Reaction priority remains local and applies to text, voice, file, image, and media-group turns without introducing pi steering semantics. Impact: controls do not get stuck behind generation, queued work no longer needs a later Telegram update to unstick, and attachment turns keep predictable ordering.
81
- - `[Attachment Handlers]` Inbound preprocessing now uses portable `template` configs with `args`/`defaults` and ordered fallback chains, documented in `docs/command-templates.md` and `docs/attachment-handlers.md`. Impact: voice/STT primary-fallback setups work from `telegram.json` without coupling pi-telegram to private auto-tool registry internals.
93
+ - `[Attachment Handlers]` Inbound preprocessing now uses portable `template` configs with `args`/`defaults` and ordered fallback chains, documented in `docs/command-templates.md` and current inbound handler docs. Impact: voice/STT primary-fallback setups work from `telegram.json` without coupling pi-telegram to private auto-tool registry internals.
82
94
  - `[Domain Boundaries]` Removed the broad `registration` domain and moved registration surfaces to owners: attachments register `telegram_attach`, commands register pi `/telegram-*` commands, lifecycle registers hooks, and prompts own Telegram-specific system prompt injection. Impact: entrypoint wiring is clearer and each registration surface has focused tests.
83
95
  - `[telegram_attach]` The outbound attachment tool now lives in the attachments domain with outbound limits, queueing failure events, and pi-friendly tool-result formatting. Impact: outbound file delivery behavior is owned by the same domain that queues and sends Telegram attachments.
84
96
  - `[Docs & Validation]` Updated README, docs, architecture/context maps, backlog, focused coverage, and removed vendored repository-local agent skills in favor of global validation tooling. Impact: user-facing docs, validation, and package-adjacent repo contents match the 0.5.0 code shape without stale skill copies.
package/README.md CHANGED
@@ -18,7 +18,7 @@ This repository is an actively maintained fork of [`badlogic/pi-telegram`](https
18
18
  - **In-flight Model Switching**: Change the active model mid-generation. The agent gracefully pauses, applies the new model, and restarts its response without losing context.
19
19
  - **Smart Message Queue**: Messages sent while the agent is busy are queued and previewed in the π status bar, and queued turns can be reprioritized or removed with Telegram reactions or the queue section of the inline application menu.
20
20
  - **Mobile-Optimized Rendering**: Tables and lists are formatted for narrow screens, table padding accounts for emoji grapheme and wide Unicode display width, and Telegram-originated runs prompt the assistant to prefer narrow table columns for phone readability. Markdown is correctly parsed and split to fit Telegram's limits without breaking HTML structures or code blocks, block spacing stays faithful to the original Markdown with readable heading separation, supported absolute links stay clickable, and unsupported link forms degrade safely.
21
- - **File Handling & Attachments**: Send images and files to the agent, transcribe or transform inbound files with configured attachment handlers, or ask π to generate and return artifacts. Inbound downloads and outbound attachments are size-limited by default, and outbound files are delivered automatically via the `telegram_attach` tool.
21
+ - **File Handling & Attachments**: Send images and files to the agent, transcribe or transform inbound text/media with configured inbound handlers, or ask π to generate and return artifacts. Inbound downloads and outbound attachments are size-limited by default, and outbound files are delivered automatically via the `telegram_attach` tool.
22
22
  - **Streaming Responses**: Closed Markdown blocks stream back as rich Telegram HTML while π is generating, and the still-growing tail stays readable until the final fully rendered reply lands.
23
23
 
24
24
  ## Install
@@ -111,26 +111,36 @@ Run these inside π, not Telegram:
111
111
  - Inbound images, albums, and files are saved to `~/.pi/agent/tmp/telegram`. Unhandled local file paths are included in the prompt, handled attachment output is injected into the prompt text, and inbound images are forwarded to π as image inputs. Inbound downloads default to a 50 MiB limit and can be adjusted with `PI_TELEGRAM_INBOUND_FILE_MAX_BYTES` or `TELEGRAM_MAX_FILE_SIZE_BYTES`.
112
112
  - Queue reactions depend on Telegram delivering `message_reaction` updates for your bot and chat type.
113
113
 
114
- ### Inbound Attachment Handlers
114
+ ### Inbound Handlers
115
115
 
116
- `telegram.json` can define ordered `attachmentHandlers` for common preprocessing such as voice transcription. Matching handlers run after download and before the Telegram turn enters the π queue. If a matching handler fails, the next matching handler is tried as a fallback.
116
+ `telegram.json` can define ordered `inboundHandlers` for Telegram → π preprocessing such as text translation, voice transcription, OCR, or PDF extraction. Matching handlers run before the Telegram turn enters the π queue. If a matching media/file handler fails, the next matching handler is tried as a fallback. Legacy `attachmentHandlers` still work as a deprecated compatibility alias and are appended after `inboundHandlers`.
117
117
 
118
118
  ```json
119
119
  {
120
- "attachmentHandlers": [
120
+ "inboundHandlers": [
121
+ {
122
+ "type": "text",
123
+ "template": "/path/to/translate --lang {lang=en} --text \"{text}\""
124
+ },
121
125
  {
122
126
  "type": "voice",
123
- "template": "/path/to/stt1 --file {file} --lang {lang=ru}"
127
+ "template": [
128
+ "/path/to/stt --file {file} --lang {lang=ru}",
129
+ "/path/to/translate-stdin --lang {lang=en}"
130
+ ]
124
131
  },
125
132
  {
126
133
  "mime": "audio/*",
127
- "template": "/path/to/stt2 --file {file} --lang {lang=ru}"
134
+ "template": [
135
+ "/path/to/stt-fallback --file {file} --lang {lang=ru}",
136
+ "/path/to/translate-stdin --lang {lang=en}"
137
+ ]
128
138
  }
129
139
  ]
130
140
  }
131
141
  ```
132
142
 
133
- Matching supports `mime`, `type`, or `match`; wildcards like `audio/*` are accepted. Handlers use `template`: a string is one command, and an array is ordered composition. Template placeholders are substituted into command args, not shell text: `{file}` is the downloaded file path, `{mime}` is the MIME type, `{type}` is the Telegram attachment type, and `defaults` or inline defaults such as `{lang=ru}` can provide additional values. Examples use explicit flag-style CLIs for readability; positional script forms are also supported when the script itself supports them. Local attachments stay in the prompt under `[attachments] <directory>` with relative file entries; successful handler stdout is added under `[outputs]`; failed handlers record diagnostics and fall back to the next matching handler. The portable command-template contract is documented in [`docs/command-templates.md`](./docs/command-templates.md); Telegram-specific handler config is documented in [`docs/attachment-handlers.md`](./docs/attachment-handlers.md).
143
+ Matching supports optional `mime`, `type`, or `match`; `mime` can be used without `type`, and wildcards like `audio/*` or `text/*` are accepted. Raw Telegram text can match `type: "text"`, `mime: "text/plain"`, or `mime: "text/*"`; it is passed on stdin and as `{text}`, and non-empty stdout replaces the prompt text. Media/file handlers receive `{file}`, `{mime}`, and `{type}`; local attachments stay in the prompt under `[attachments] <directory>` with relative file entries, and successful media/file handler stdout is added under `[outputs]`. Attached `text/plain`/`text/*` files have a built-in fail-open reader that injects UTF-8 content into `[outputs]` when no configured handler produced output. Failed handlers record diagnostics and fall back safely. The portable command-template contract is documented in [`docs/command-templates.md`](./docs/command-templates.md); Telegram-specific inbound config is documented in [`docs/inbound-handlers.md`](./docs/inbound-handlers.md).
134
144
 
135
145
  ### Requesting Files
136
146
 
@@ -156,9 +166,22 @@ Text to synthesize as a Telegram voice message.
156
166
  <!-- telegram_voice: Short spoken companion summary. -->
157
167
  ```
158
168
 
159
- Outbound voice is disabled unless a matching `outboundHandlers[]` entry is configured. Multiple `telegram_voice` blocks in one reply are synthesized and sent independently, preserving each block's attributes. The bridge uses the same [command-template contract](./docs/command-templates.md) as inbound attachment handlers: split the template into args, substitute placeholders, execute without a shell, and use stdout as the result channel for a single template.
169
+ Outbound `type: "text"` handlers can transform final text/Markdown before Telegram rendering and delivery, using stdin and `{text}` as input and non-empty stdout as replacement text. They are a good fit for machine translation, tone normalization, redaction, glossary expansion, or any other final text rewrite that should happen outside the agent prompt. The transform also applies when the bridge finalizes an already streamed rich preview, so Telegram may briefly show the pre-transform preview before the final edited message lands. Inline button labels are transformed too, while callback data and prompts stay unchanged.
170
+
171
+ ```json
172
+ {
173
+ "outboundHandlers": [
174
+ {
175
+ "type": "text",
176
+ "template": "/path/to/translate --lang {lang=ru} --text {text}"
177
+ }
178
+ ]
179
+ }
180
+ ```
181
+
182
+ Outbound voice is disabled unless a matching `outboundHandlers[]` entry is configured. Multiple `telegram_voice` blocks in one reply are synthesized and sent independently, preserving each block's attributes. The bridge uses the same [command-template contract](./docs/command-templates.md) as inbound handlers: split the template into args, substitute placeholders, execute without a shell, and use stdout as the result channel for a single template.
160
183
 
161
- A TTS plus MP3-to-OGG setup can be expressed as `template: [...]`. The bridge provides `{text}`, `{mp3}`, and `{ogg}` to every step; top-level `args`/`defaults` apply to all steps unless a step defines private values, the default command timeout applies automatically, and each step's stdout is passed to the next step's stdin by default. Use `"output": "ogg"` when the artifact path should come from the generated `{ogg}` value instead of final stdout:
184
+ A composed voice setup can translate the hidden `telegram_voice` text, synthesize it, and convert MP3 to Telegram-native OGG/Opus in one pipeline. The bridge provides `{text}`, `{mp3}`, and `{ogg}` to every step; top-level `args`/`defaults` apply to all steps unless a step defines private values, the default command timeout applies automatically, and each step's stdout is passed to the next step's stdin by default. Use `"output": "ogg"` when the artifact path should come from the generated `{ogg}` value instead of final stdout:
162
185
 
163
186
  ```json
164
187
  {
@@ -166,7 +189,8 @@ A TTS plus MP3-to-OGG setup can be expressed as `template: [...]`. The bridge pr
166
189
  {
167
190
  "type": "voice",
168
191
  "template": [
169
- "/path/to/tts --text {text} --lang {lang=ru} --rate {rate=+30%} --write-media {mp3}",
192
+ "/path/to/translate-stdin --lang {lang=ru}",
193
+ "/path/to/tts-from-stdin --lang {lang=ru} --rate {rate=+30%} --write-media {mp3}",
170
194
  "ffmpeg -y -i {mp3} -c:a libopus -b:a 32k -ar 16000 -ac 1 -vbr on {ogg}"
171
195
  ],
172
196
  "output": "ogg"
package/docs/README.md CHANGED
@@ -6,7 +6,7 @@ Living index of project documentation in `/docs`.
6
6
 
7
7
  - [architecture.md](./architecture.md) — Overview of the Telegram bridge runtime, queueing model, rendering pipeline, and interactive controls
8
8
  - [command-templates.md](./command-templates.md) — Portable command-template standard core
9
- - [attachment-handlers.md](./attachment-handlers.md) — Local `pi-telegram` attachment-handler config, placeholders, and fallbacks
10
- - [outbound-handlers.md](./outbound-handlers.md) — Local `pi-telegram` outbound-handler config, voice/button markup, artifact outputs, and callback routing
9
+ - [inbound-handlers.md](./inbound-handlers.md) — Local `pi-telegram` inbound text/media handler bus, legacy `attachmentHandlers` compatibility, placeholders, and fallbacks
10
+ - [outbound-handlers.md](./outbound-handlers.md) — Local `pi-telegram` outbound-handler config, text/voice/button behavior, artifact outputs, and callback routing
11
11
  - [locks.md](./locks.md) — Shared `locks.json` standard for singleton extension ownership
12
12
  - [callback-namespaces.md](./callback-namespaces.md) — Shared Telegram `callback_data` namespace standard for layered extensions
@@ -30,14 +30,14 @@ Current runtime areas use these ownership boundaries:
30
30
  | `config` / `setup` | Persisted bot/session pairing state, authorization, first-user pairing, token prompting, env fallback, validation, config persistence |
31
31
  | `locks` / `polling` | Singleton `locks.json` ownership, takeover/restart semantics, long-poll controller state, update offset persistence, poll-loop runtime wiring |
32
32
  | `updates` / `routing` | Update classification/execution planning, paired authorization, reactions, edits, callbacks, and inbound route composition |
33
- | `media` / `text-groups` / `turns` / `attachment-handlers` | Text/media extraction, media-group debounce, long-text split coalescing, inbound downloads, turn building/editing, image reads, attachment-handler matching/execution/fallback output |
33
+ | `media` / `text-groups` / `turns` / `inbound-handlers` | Text/media extraction, media-group debounce, long-text split coalescing, inbound downloads, inbound text/media handler execution, turn building/editing, image reads, legacy `attachmentHandlers` compatibility |
34
34
  | `queue` | Queue item contracts, lane admission/order, stores, mutations, dispatch readiness/runtime, prompt/control enqueueing, session and agent/tool lifecycle sequencing |
35
35
  | `runtime` | Session-local coordination primitives: counters, lifecycle flags, setup guard, abort handler, typing-loop timers, prompt-dispatch flags, agent-end reset binding |
36
36
  | `model` / `menu-model` / `menu-thinking` / `menu-status` / `menu` / `menu-queue` / `commands` | Model identity/thinking levels, scoped model resolution, in-flight switching, model-menu UI, thinking-menu UI, status-menu UI, inline application callback composition, queue-menu UI, slash commands, bot command registration |
37
37
  | `keyboard` | Shared Telegram inline-keyboard reply-markup structure; feature domains own callback semantics and button construction |
38
38
  | `preview` / `replies` / `rendering` | Preview lifecycle/transports, final reply delivery and reply parameters, Telegram HTML Markdown rendering, chunking, stable-preview snapshots |
39
- | `outbound-handlers` | Assistant-authored outbound comments, generated reply artifacts, inline-keyboard callbacks, and post-`agent_end` outbound action delivery |
40
- | `attachments` | `telegram_attach` registration, outbound attachment queueing, stat/limit checks, photo/document delivery classification |
39
+ | `outbound-handlers` | Outbound text transformation, assistant-authored outbound comments, generated reply artifacts, inline-keyboard callbacks, and post-`agent_end` outbound action delivery |
40
+ | `outbound-attachments` | `telegram_attach` registration, outbound attachment queueing, stat/limit checks, photo/document delivery classification |
41
41
  | `status` | Status-bar/status-message rendering, queue-lane status views, redacted runtime event ring, grouped π diagnostics |
42
42
  | `lifecycle` / `prompts` / `prompt-templates` / `pi` | π hook registration, Telegram-specific before-agent prompt injection, π prompt-template discovery/expansion, centralized direct pi SDK imports and context adapters |
43
43
  | `command-templates` | Portable shell-free command-template standard helpers, composition expansion, placeholder substitution, and executable resolution |
@@ -50,7 +50,7 @@ Boundary invariants:
50
50
  - Preview appearance stays in `rendering`; preview transport/lifecycle stays in `preview`
51
51
  - Direct `node:*` file-operation imports stay in owning domains, not in `index.ts`
52
52
  - `index.ts` uses namespace imports for local bridge domains so orchestration reads as `Queue.*`, `Turns.*`, and `Rendering.*`
53
- - Architecture-invariant tests guard the acyclic import graph, pi SDK centralization, entrypoint purity, runtime-domain isolation, structural leaf-domain isolation, menu/model boundaries, API/config separation, media/update/API separation, and attachment boundary isolation
53
+ - Architecture-invariant tests guard the acyclic import graph, pi SDK centralization, entrypoint purity, runtime-domain isolation, structural leaf-domain isolation, menu/model boundaries, API/config separation, media/update/API separation, and outbound-attachment boundary isolation
54
54
  - Mirrored domain regression coverage lives in `/tests/*.test.ts`; test helpers stay local to the mirrored suite by default, and shared fixture folders are justified only by reuse across multiple domain suites
55
55
 
56
56
  ## Configuration UX
@@ -77,8 +77,8 @@ Telegram bot configuration stays in `~/.pi/agent/telegram.json`; singleton runti
77
77
  4. Media groups are coalesced into a single Telegram turn when needed
78
78
  5. Slash command parsing uses only the new message text/caption, while Telegram `reply_to_message` text/caption is injected later as prompt-only `[reply]` context for normal queued turns
79
79
  6. Files are streamed into `~/.pi/agent/tmp/telegram` with a default 50 MiB size limit, partial-download cleanup on failures, and stale temp cleanup on session start; operators can tune the limit with `PI_TELEGRAM_INBOUND_FILE_MAX_BYTES` or `TELEGRAM_MAX_FILE_SIZE_BYTES`
80
- 7. Configured inbound attachment handlers may run on downloaded files by MIME wildcard, Telegram attachment type, or generic match selector; command templates receive safe command-arg substitution for `{file}`/`{mime}`/`{type}`
81
- 8. Matching handlers are tried in config order: a non-zero exit records diagnostics and falls back to the next matching handler, while the first successful handler stops the chain
80
+ 7. Configured inbound handlers may run on raw text or downloaded files by MIME wildcard, Telegram attachment type, or generic match selector; command templates receive safe command-arg substitution for `{text}`, `{file}`, `{mime}`, and `{type}` where applicable
81
+ 8. Matching media/file handlers are tried in config order: a non-zero exit records diagnostics and falls back to the next matching handler, while the first successful handler stops the chain
82
82
  9. Local attachments stay visible under `[attachments] <directory>` with relative file entries, and handler stdout is appended under `[outputs]` before the agent sees the turn; failed handlers omit output while keeping the attachment entry
83
83
  10. A `PendingTelegramTurn` is created and queued locally
84
84
  11. Telegram `edited_message` updates are routed separately and update a matching queued turn when the original message has not been dispatched yet
@@ -0,0 +1,93 @@
1
+ # Inbound Handlers
2
+
3
+ `pi-telegram` can run ordered inbound handlers before a Telegram turn enters the π queue. Inbound handlers are the provider-neutral Telegram → π transformation bus for raw text and downloaded media/files.
4
+
5
+ This document is the local inbound adaptation of the portable [Command Template Standard](./command-templates.md). It is also the canonical home for the legacy `attachmentHandlers` compatibility config.
6
+
7
+ ## Config Shape
8
+
9
+ Prefer `inboundHandlers` for new configs:
10
+
11
+ ```json
12
+ {
13
+ "inboundHandlers": [
14
+ {
15
+ "type": "text",
16
+ "template": "/path/to/translate --lang {lang=en} --text \"{text}\""
17
+ },
18
+ {
19
+ "type": "voice",
20
+ "template": [
21
+ "/path/to/stt --file {file} --lang {lang=ru}",
22
+ "/path/to/translate-stdin --lang {lang=en}"
23
+ ]
24
+ },
25
+ {
26
+ "mime": "application/pdf",
27
+ "template": "/path/to/pdf-to-text --file {file}"
28
+ }
29
+ ]
30
+ }
31
+ ```
32
+
33
+ Legacy `telegram.json` files may still define `attachmentHandlers` for media/file preprocessing:
34
+
35
+ ```json
36
+ {
37
+ "attachmentHandlers": [
38
+ {
39
+ "type": "voice",
40
+ "template": "/path/to/stt1 --file {file} --lang {lang=ru}"
41
+ },
42
+ {
43
+ "mime": "audio/*",
44
+ "template": "/path/to/stt2 --file {file} --lang {lang=ru}"
45
+ }
46
+ ]
47
+ }
48
+ ```
49
+
50
+ At runtime, `attachmentHandlers` is appended after `inboundHandlers`. Existing configs continue to work, while new configs should use `inboundHandlers`.
51
+
52
+ Handlers match by optional `type`, `mime`, or `match`. `mime` and `type` are independent selectors: if `mime` is present, `type` is not required. Wildcards such as `audio/*` or `text/*` are accepted. Each matching handler must provide `template`; a string is one command, and an array is ordered composition. Top-level `args` and `defaults` apply to composed steps unless a step defines private values. The command-template default timeout applies automatically. Legacy configs may still use `pipe` as a local alias.
53
+
54
+ `defaults` may provide additional placeholder values such as `{lang}` or `{model}`. `args` is only a string-array declaration of supported placeholders; defaults belong in `defaults` or inline placeholders such as `{lang=ru}`. Examples prefer explicit flag-style CLIs such as `--file {file}` and `--lang {lang=ru}` for readability, but positional forms such as `/path/to/stt {file} {lang=ru} {model=voxtral-mini-latest}` are equally valid when the target script supports them.
55
+
56
+ ## Text Handlers
57
+
58
+ `type: "text"` handlers transform raw Telegram text before prompt construction. Raw Telegram text also has synthetic `mime: "text/plain"`, so a handler can match it with `type: "text"`, `mime: "text/plain"`, `mime: "text/*"`, or `match: "text/plain"`. The source text is provided on stdin and as `{text}`. Successful non-empty stdout replaces the current text and is passed to the next matching text handler. Empty stdout, non-zero exit, or handler failure keeps the previous text and records diagnostics.
59
+
60
+ Built-in placeholders for text handlers:
61
+
62
+ | Placeholder | Value |
63
+ | ----------- | ------------ |
64
+ | `{text}` | Current text |
65
+ | `{mime}` | `text/plain` |
66
+ | `{type}` | `text` |
67
+
68
+ ## Media/File Handlers
69
+
70
+ Media/file handlers keep the legacy attachment-handler behavior: downloaded files are matched by `mime`, `type`, or `match`, then each file runs the first successful matching handler. Downloaded files with `mime: "text/plain"` or any `text/*` MIME type have a built-in fail-open handler that reads UTF-8 content into `[outputs]` when no configured handler produced output. Composition is useful for pipelines such as voice transcription followed by machine translation, so the agent receives translated `[outputs]` instead of the raw STT language.
71
+
72
+ Built-in placeholders for media/file handlers:
73
+
74
+ | Placeholder | Value |
75
+ | ----------- | ---------------------------------------------------------------- |
76
+ | `{file}` | Full local path to the downloaded file |
77
+ | `{mime}` | MIME type if known |
78
+ | `{type}` | Attachment kind such as `voice`, `audio`, `document`, or `photo` |
79
+ | `{text}` | Empty string |
80
+
81
+ If a top-level one-step media handler template has no `{file}` placeholder, the downloaded file path is appended as the last command arg as a one-step handler convenience. Composition steps are plain command templates and do not receive implicit file-path args; include `{file}` explicitly where needed.
82
+
83
+ ## Ordered Fallbacks
84
+
85
+ A handler list is ordered. For each downloaded file, matching media/file handlers run in list order and stop after the first successful handler. A composed handler counts as one handler for fallback purposes: if any step fails, the next matching handler is tried.
86
+
87
+ If a matching handler fails with a non-zero exit code, the runtime records diagnostics and tries the next matching handler. If every matching handler fails, the attachment remains visible in the prompt as a normal local file reference.
88
+
89
+ ## Prompt Output
90
+
91
+ Local attachments stay in the prompt under `[attachments] <directory>` with relative file entries. Successful media/file handler stdout is added under `[outputs]`. For composed media/file handlers, each step receives the previous step's stdout on stdin by default, and stdout from the last successful step is used as the handler output. Empty output and failed handler output are omitted from the prompt text.
92
+
93
+ Text handler output replaces the prompt text directly and is not duplicated under `[outputs]`.
@@ -8,15 +8,50 @@ This document is the local outbound adaptation of the portable [Command Template
8
8
 
9
9
  ## Standard
10
10
 
11
- An outbound handler is selected by `type`. Assistant markup maps to handler types:
11
+ An outbound handler is selected by `type`. Text replies and assistant markup map to handler types:
12
12
 
13
- | Markup | Handler type | Telegram action |
13
+ | Source | Handler type | Telegram action |
14
14
  | ----------------- | ------------ | -------------------------------------------------- |
15
+ | Final text reply | `text` | Transform text/Markdown before Telegram rendering |
15
16
  | `telegram_voice` | `voice` | Generate OGG/Opus and call `sendVoice` |
16
17
  | `telegram_button` | Built-in | Attach an inline keyboard button to the final text |
17
18
 
18
19
  Configured command-template handlers provide `template`. A string is one command; an array is ordered composition. Top-level `args` and `defaults` apply to all composed steps unless a step defines private values. The command-template default timeout applies automatically. `output` selects the primary artifact path when the handler produces a file instead of stdout text. Legacy configs may still use `pipe`, but `template: [...]` is the preferred standard shape.
19
20
 
21
+ ## Text Handler Config
22
+
23
+ `type: "text"` handlers transform final text replies before rendering and delivery. The source text is provided on stdin and as `{text}`. Successful non-empty stdout replaces the current text. Empty stdout or handler failure keeps the previous text and records diagnostics.
24
+
25
+ This is ideal for machine translation, tone normalization, redaction, glossary expansion, compliance footers, or any other final text rewrite that should be configured outside the agent prompt. Text handlers run before Markdown/HTML rendering, so a Markdown reply remains Markdown input to the handler. They also run when the bridge finalizes an already streamed rich preview; in that path Telegram can briefly show a pre-transform preview before the final edited message is replaced with the handler output. Inline buttons are built as reply markup: visible button labels pass through the same text handler, while callback data and callback prompts remain unchanged.
26
+
27
+ Simple machine-translation handler with explicit text placeholder:
28
+
29
+ ```json
30
+ {
31
+ "outboundHandlers": [
32
+ {
33
+ "type": "text",
34
+ "template": "/path/to/translate --lang {lang=ru} --text \"{text}\""
35
+ }
36
+ ]
37
+ }
38
+ ```
39
+
40
+ Stdin-based or subagent-backed translation can omit `{text}` from the template because the bridge also provides the source reply on stdin:
41
+
42
+ ```json
43
+ {
44
+ "outboundHandlers": [
45
+ {
46
+ "type": "text",
47
+ "template": "/path/to/translate-stdin --lang {lang=ru}"
48
+ }
49
+ ]
50
+ }
51
+ ```
52
+
53
+ A text handler should preserve the full message unless shortening is intentional; for translation prompts, explicitly ask the tool to keep Markdown, line breaks, and details unchanged.
54
+
20
55
  ## Voice Handler Config
21
56
 
22
57
  `telegram.json` may define `outboundHandlers`:
@@ -27,7 +62,8 @@ Configured command-template handlers provide `template`. A string is one command
27
62
  {
28
63
  "type": "voice",
29
64
  "template": [
30
- "/path/to/tts --text {text} --lang {lang=ru} --rate {rate=+30%} --write-media {mp3}",
65
+ "/path/to/translate-stdin --lang {lang=ru}",
66
+ "/path/to/tts-from-stdin --lang {lang=ru} --rate {rate=+30%} --write-media {mp3}",
31
67
  "ffmpeg -y -i {mp3} -c:a libopus -b:a 32k -ar 16000 -ac 1 -vbr on {ogg}"
32
68
  ],
33
69
  "output": "ogg"
@@ -36,7 +72,7 @@ Configured command-template handlers provide `template`. A string is one command
36
72
  }
37
73
  ```
38
74
 
39
- If a matching voice handler fails, the bridge tries the next matching `type: "voice"` handler.
75
+ In this example, the first step receives the `telegram_voice` text on stdin and returns translated text; the second step reads that translated text from stdin and writes `{mp3}`; the final step converts `{mp3}` to Telegram-ready `{ogg}`. If you do not need voice translation, omit the first step and call a TTS command that accepts `{text}` directly. If a matching voice handler fails, the bridge tries the next matching `type: "voice"` handler.
40
76
 
41
77
  ## Voice Markup
42
78
 
package/index.ts CHANGED
@@ -5,18 +5,19 @@
5
5
  */
6
6
 
7
7
  import * as Api from "./lib/api.ts";
8
- import * as AttachmentHandlers from "./lib/attachment-handlers.ts";
9
- import * as Attachments from "./lib/attachments.ts";
10
- import * as Commands from "./lib/commands.ts";
8
+ import * as OutboundAttachments from "./lib/outbound-attachments.ts";
11
9
  import * as CommandTemplates from "./lib/command-templates.ts";
10
+ import * as Commands from "./lib/commands.ts";
12
11
  import * as Config from "./lib/config.ts";
12
+ import * as InboundHandlers from "./lib/inbound-handlers.ts";
13
13
  import * as Keyboard from "./lib/keyboard.ts";
14
14
  import * as Lifecycle from "./lib/lifecycle.ts";
15
15
  import * as Locks from "./lib/locks.ts";
16
16
  import * as Media from "./lib/media.ts";
17
- import * as Menu from "./lib/menu.ts";
18
17
  import * as MenuQueue from "./lib/menu-queue.ts";
18
+ import * as Menu from "./lib/menu.ts";
19
19
  import * as Model from "./lib/model.ts";
20
+ import * as OutboundHandlers from "./lib/outbound-handlers.ts";
20
21
  import * as Pi from "./lib/pi.ts";
21
22
  import * as Polling from "./lib/polling.ts";
22
23
  import * as Preview from "./lib/preview.ts";
@@ -24,10 +25,9 @@ import * as PromptTemplates from "./lib/prompt-templates.ts";
24
25
  import * as Prompts from "./lib/prompts.ts";
25
26
  import * as Queue from "./lib/queue.ts";
26
27
  import * as Replies from "./lib/replies.ts";
27
- import * as Runtime from "./lib/runtime.ts";
28
28
  import * as Routing from "./lib/routing.ts";
29
+ import * as Runtime from "./lib/runtime.ts";
29
30
  import * as Setup from "./lib/setup.ts";
30
- import * as OutboundHandlers from "./lib/outbound-handlers.ts";
31
31
  import * as Status from "./lib/status.ts";
32
32
  import * as TextGroups from "./lib/text-groups.ts";
33
33
 
@@ -114,15 +114,13 @@ export default function (pi: Pi.ExtensionAPI) {
114
114
  queue.incrementNextPriorityReactionOrder,
115
115
  updateStatus,
116
116
  });
117
- const attachmentHandlerRuntime =
118
- AttachmentHandlers.createTelegramAttachmentHandlerRuntime<Pi.ExtensionContext>(
119
- {
120
- getHandlers: configStore.getAttachmentHandlers,
121
- execCommand: CommandTemplates.execCommandTemplate,
122
- getCwd: Pi.getExtensionContextCwd,
123
- recordRuntimeEvent,
124
- },
125
- );
117
+ const inboundHandlerRuntime =
118
+ InboundHandlers.createTelegramInboundHandlerRuntime<Pi.ExtensionContext>({
119
+ getHandlers: configStore.getInboundHandlers,
120
+ execCommand: CommandTemplates.execCommandTemplate,
121
+ getCwd: Pi.getExtensionContextCwd,
122
+ recordRuntimeEvent,
123
+ });
126
124
 
127
125
  // --- Telegram API ---
128
126
 
@@ -157,19 +155,23 @@ export default function (pi: Pi.ExtensionAPI) {
157
155
 
158
156
  // --- Reply Runtime Wiring ---
159
157
 
160
- const {
161
- replyTransport,
162
- sendTextReply,
163
- sendMarkdownReply,
164
- editInteractiveMessage,
165
- sendInteractiveMessage,
166
- } =
158
+ const replyRuntime =
167
159
  Replies.createTelegramRenderedMessageDeliveryRuntime<Keyboard.TelegramInlineKeyboardMarkup>(
168
160
  {
169
161
  sendMessage,
170
162
  editMessage: editTelegramMessageText,
171
163
  },
172
164
  );
165
+ const { replyTransport, editInteractiveMessage, sendInteractiveMessage } =
166
+ replyRuntime;
167
+ const { sendTextReply, sendMarkdownReply } =
168
+ OutboundHandlers.createTelegramOutboundTextReplyRuntime({
169
+ sendTextReply: replyRuntime.sendTextReply,
170
+ sendMarkdownReply: replyRuntime.sendMarkdownReply,
171
+ execCommand: CommandTemplates.execCommandTemplate,
172
+ getHandlers: configStore.getOutboundHandlers,
173
+ recordRuntimeEvent,
174
+ });
173
175
  const dispatchNextQueuedTelegramTurn =
174
176
  Queue.createTelegramQueueDispatchRuntime<Pi.ExtensionContext>({
175
177
  ...telegramQueueStore,
@@ -198,6 +200,13 @@ export default function (pi: Pi.ExtensionAPI) {
198
200
  editMessageText: editTelegramMessageText,
199
201
  ...replyTransport,
200
202
  });
203
+ const { finalizeMarkdownPreview } =
204
+ OutboundHandlers.createTelegramOutboundTextPreviewRuntime({
205
+ finalizeMarkdownPreview: previewRuntime.finalizeMarkdown,
206
+ execCommand: CommandTemplates.execCommandTemplate,
207
+ getHandlers: configStore.getOutboundHandlers,
208
+ recordRuntimeEvent,
209
+ });
201
210
 
202
211
  // --- Bridge Setup ---
203
212
 
@@ -218,9 +227,8 @@ export default function (pi: Pi.ExtensionAPI) {
218
227
  appendQueuedItem: queueMutationRuntime.append,
219
228
  updateStatus,
220
229
  });
221
- const getQueueItemCount = Queue.createTelegramQueueItemCountGetter(
222
- telegramQueueStore,
223
- );
230
+ const getQueueItemCount =
231
+ Queue.createTelegramQueueItemCountGetter(telegramQueueStore);
224
232
  const getPromptTemplateCommands =
225
233
  PromptTemplates.createTelegramPromptTemplateCommandGetter({
226
234
  getCommands,
@@ -292,7 +300,7 @@ export default function (pi: Pi.ExtensionAPI) {
292
300
  openQueueMenu: queueMenuRuntime.openQueueMenu,
293
301
  queueMenuCallbackHandler: queueMenuRuntime.handleCallbackQuery,
294
302
  buttonActionStore,
295
- attachmentHandlerRuntime,
303
+ inboundHandlerRuntime,
296
304
  updateStatus,
297
305
  dispatchNextQueuedTelegramTurn,
298
306
  answerCallbackQuery,
@@ -367,7 +375,7 @@ export default function (pi: Pi.ExtensionAPI) {
367
375
 
368
376
  // --- Extension API Bindings ---
369
377
 
370
- Attachments.registerTelegramAttachmentTool(pi, {
378
+ OutboundAttachments.registerTelegramOutboundAttachmentTool(pi, {
371
379
  getActiveTurn: activeTurnRuntime.get,
372
380
  recordRuntimeEvent,
373
381
  });
@@ -402,7 +410,7 @@ export default function (pi: Pi.ExtensionAPI) {
402
410
  clearDispatchPending: lifecycle.clearDispatchPending,
403
411
  });
404
412
  const queuedAttachmentSender =
405
- Attachments.createTelegramQueuedAttachmentSender({
413
+ OutboundAttachments.createTelegramQueuedOutboundAttachmentSender({
406
414
  sendMultipart: callMultipart,
407
415
  sendTextReply,
408
416
  recordRuntimeEvent,
@@ -437,14 +445,15 @@ export default function (pi: Pi.ExtensionAPI) {
437
445
  updateStatus,
438
446
  getActiveTurn: activeTurnRuntime.get,
439
447
  extractAssistant: Replies.extractLatestAssistantMessageText,
440
- getPreserveQueuedTurnsAsHistory: lifecycle.shouldPreserveQueuedTurnsAsHistory,
448
+ getPreserveQueuedTurnsAsHistory:
449
+ lifecycle.shouldPreserveQueuedTurnsAsHistory,
441
450
  resetRuntimeState: agentEndResetter,
442
451
  dispatchNextQueuedTelegramTurn,
443
452
  requestDeferredDispatchNextQueuedTelegramTurn:
444
453
  deferredQueueDispatchRuntime.request,
445
454
  clearPreview: previewRuntime.clear,
446
455
  setPreviewPendingText: previewRuntime.setPendingText,
447
- finalizeMarkdownPreview: previewRuntime.finalizeMarkdown,
456
+ finalizeMarkdownPreview,
448
457
  sendMarkdownReply,
449
458
  sendTextReply,
450
459
  sendQueuedAttachments: queuedAttachmentSender,
@@ -456,7 +465,9 @@ export default function (pi: Pi.ExtensionAPI) {
456
465
  });
457
466
  // Wire transport-level reply dedup reset via lifecycle
458
467
  Lifecycle.setResetTransportReplyDedup(Replies.resetTransportReplyDedup);
459
- const agentStartWithDedupReset = Lifecycle.createAgentStartDedupHook(agentLifecycleHooks.onAgentStart);
468
+ const agentStartWithDedupReset = Lifecycle.createAgentStartDedupHook(
469
+ agentLifecycleHooks.onAgentStart,
470
+ );
460
471
  Lifecycle.registerTelegramLifecycleHooks(pi, {
461
472
  ...sessionLifecycleRuntime,
462
473
  ...agentLifecycleHooks,
package/lib/config.ts CHANGED
@@ -9,7 +9,7 @@ import { chmod, mkdir, readFile, rename, writeFile } from "node:fs/promises";
9
9
  import { homedir } from "node:os";
10
10
  import { join, resolve } from "node:path";
11
11
 
12
- import type { TelegramAttachmentHandlerConfig } from "./attachment-handlers.ts";
12
+ import type { TelegramInboundHandlerConfig } from "./inbound-handlers.ts";
13
13
  import type { CommandTemplateObjectConfig } from "./command-templates.ts";
14
14
 
15
15
  function getAgentDir(): string {
@@ -39,7 +39,8 @@ export interface TelegramConfig {
39
39
  botId?: number;
40
40
  allowedUserId?: number;
41
41
  lastUpdateId?: number;
42
- attachmentHandlers?: TelegramAttachmentHandlerConfig[];
42
+ inboundHandlers?: TelegramInboundHandlerConfig[];
43
+ attachmentHandlers?: TelegramInboundHandlerConfig[];
43
44
  outboundHandlers?: TelegramOutboundHandlerConfig[];
44
45
  }
45
46
 
@@ -50,7 +51,8 @@ export interface TelegramConfigStore {
50
51
  getBotToken: () => string | undefined;
51
52
  hasBotToken: () => boolean;
52
53
  getAllowedUserId: () => number | undefined;
53
- getAttachmentHandlers: () => TelegramAttachmentHandlerConfig[] | undefined;
54
+ getInboundHandlers: () => TelegramInboundHandlerConfig[] | undefined;
55
+ getAttachmentHandlers: () => TelegramInboundHandlerConfig[] | undefined;
54
56
  getOutboundHandlers: () => TelegramOutboundHandlerConfig[] | undefined;
55
57
  setAllowedUserId: (userId: number) => void;
56
58
  load: () => Promise<void>;
@@ -104,6 +106,10 @@ export function createTelegramConfigStore(
104
106
  getBotToken: () => config.botToken,
105
107
  hasBotToken: () => !!config.botToken,
106
108
  getAllowedUserId: () => config.allowedUserId,
109
+ getInboundHandlers: () => [
110
+ ...(config.inboundHandlers ?? []),
111
+ ...(config.attachmentHandlers ?? []),
112
+ ],
107
113
  getAttachmentHandlers: () => config.attachmentHandlers,
108
114
  getOutboundHandlers: () => config.outboundHandlers,
109
115
  setAllowedUserId: (userId) => {