@llblab/pi-telegram 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -14
- package/docs/README.md +1 -0
- package/docs/architecture.md +32 -27
- package/docs/attachment-handlers.md +9 -17
- package/docs/command-templates.md +102 -32
- package/docs/outbound-handlers.md +110 -0
- package/index.ts +17 -3
- package/lib/{handlers.ts → attachment-handlers.ts} +135 -123
- package/lib/command-templates.ts +292 -0
- package/lib/config.ts +16 -1
- package/lib/media.ts +54 -0
- package/lib/outbound-handlers.ts +874 -0
- package/lib/preview.ts +29 -9
- package/lib/prompts.ts +5 -1
- package/lib/queue.ts +44 -2
- package/lib/replies.ts +21 -11
- package/lib/routing.ts +39 -2
- package/lib/turns.ts +32 -12
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -102,6 +102,7 @@ Run these inside pi, not Telegram:
|
|
|
102
102
|
- `👎` removes a waiting turn from the queue. Telegram Bot API does not expose ordinary DM message-deletion events through the polling path used here, so queue removal is bound to the dislike reaction.
|
|
103
103
|
- Reactions apply to any waiting Telegram turn, including text, voice, files, images, and media groups. For media groups, a reaction on any message in the group applies to the whole queued turn.
|
|
104
104
|
- If you edit a Telegram message while it is still waiting in the queue, the queued turn is updated instead of creating a duplicate prompt. Edits after a turn has already started may not affect the active run.
|
|
105
|
+
- Telegram replies to earlier text or caption messages are forwarded as `[reply]` context for normal prompts, while slash commands still parse from the new message text only.
|
|
105
106
|
- Inbound images, albums, and files are saved to `~/.pi/agent/tmp/telegram`. Unhandled local file paths are included in the prompt, handled attachment output is injected into the prompt text, and inbound images are forwarded to pi as image inputs. Inbound downloads default to a 50 MiB limit and can be adjusted with `PI_TELEGRAM_INBOUND_FILE_MAX_BYTES` or `TELEGRAM_MAX_FILE_SIZE_BYTES`.
|
|
106
107
|
- Queue reactions depend on Telegram delivering `message_reaction` updates for your bot and chat type.
|
|
107
108
|
|
|
@@ -114,31 +115,73 @@ Run these inside pi, not Telegram:
|
|
|
114
115
|
"attachmentHandlers": [
|
|
115
116
|
{
|
|
116
117
|
"type": "voice",
|
|
117
|
-
"template": "
|
|
118
|
-
"
|
|
119
|
-
"defaults": {
|
|
120
|
-
"lang": "ru",
|
|
121
|
-
"model": "voxtral-mini-latest"
|
|
122
|
-
}
|
|
118
|
+
"template": "/path/to/stt1 --file {file} --lang {lang=ru}",
|
|
119
|
+
"timeout": 30000
|
|
123
120
|
},
|
|
124
121
|
{
|
|
125
122
|
"mime": "audio/*",
|
|
126
|
-
"template": "
|
|
127
|
-
"
|
|
128
|
-
"defaults": {
|
|
129
|
-
"lang": "ru",
|
|
130
|
-
"model": "whisper-large-v3-turbo"
|
|
131
|
-
}
|
|
123
|
+
"template": "/path/to/stt2 --file {file} --lang {lang=ru}",
|
|
124
|
+
"timeout": 30000
|
|
132
125
|
}
|
|
133
126
|
]
|
|
134
127
|
}
|
|
135
128
|
```
|
|
136
129
|
|
|
137
|
-
Matching supports `mime`, `type`, or `match`; wildcards like `audio/*` are accepted. Template placeholders are substituted into command args, not shell text: `{file}` is the downloaded file path, `{mime}` is the MIME type, `{type}` is the Telegram attachment type, and `defaults`
|
|
130
|
+
Matching supports `mime`, `type`, or `match`; wildcards like `audio/*` are accepted. Handlers use `template`: a string is one command, and an array is ordered composition. Template placeholders are substituted into command args, not shell text: `{file}` is the downloaded file path, `{mime}` is the MIME type, `{type}` is the Telegram attachment type, and `defaults` or inline defaults such as `{lang=ru}` can provide additional values. Examples use explicit flag-style CLIs for readability; positional script forms are also supported when the script itself supports them. Local attachments stay in the prompt under `[attachments] <directory>` with relative file entries; successful handler stdout is added under `[outputs]`; failed handlers record diagnostics and fall back to the next matching handler. The portable command-template contract is documented in [`docs/command-templates.md`](./docs/command-templates.md); Telegram-specific handler config is documented in [`docs/attachment-handlers.md`](./docs/attachment-handlers.md).
|
|
138
131
|
|
|
139
132
|
### Requesting Files
|
|
140
133
|
|
|
141
|
-
If you ask pi for a file or generated artifact (e.g., _"generate a shell script and attach it"_), pi
|
|
134
|
+
If you ask pi for a file or generated artifact (e.g., _"generate a shell script and attach it"_), pi can call the `telegram_attach` tool, and the extension will send the file alongside its next Telegram reply. `telegram_attach` is the only pi tool registered by `pi-telegram`; use it for ordinary files, not for Telegram-native voice or buttons. Outbound attachments default to a 50 MiB limit and can be adjusted with `PI_TELEGRAM_OUTBOUND_ATTACHMENT_MAX_BYTES` or `TELEGRAM_MAX_ATTACHMENT_SIZE_BYTES`.
|
|
135
|
+
|
|
136
|
+
### Assistant-Authored Outbound Actions
|
|
137
|
+
|
|
138
|
+
Assistant replies can include hidden outbound blocks. `telegram_voice` and `telegram_button` are not pi tools; they are assistant-authored HTML comments that the bridge removes from Telegram text and handles after `agent_end`. Action comments are recognized only as top-level column-zero blocks outside fenced code, quotes, and lists, so documentation examples remain literal. This is faster than agent-side tool calls because the agent only writes correctly formatted Markdown in its normal answer; the extension builds the configured voice pipeline, button markup, and callback routing itself without registering or invoking extra transport/TTS/text-to-OGG tools.
|
|
139
|
+
|
|
140
|
+
#### Voice
|
|
141
|
+
|
|
142
|
+
Voice blocks synthesize their body and upload it as a native Telegram `sendVoice` OGG/Opus message. The body may be a concise companion summary, but it does not have to follow that format; write the text you want spoken and keep it TTS-friendly:
|
|
143
|
+
|
|
144
|
+
```md
|
|
145
|
+
Full technical answer stays readable as text.
|
|
146
|
+
|
|
147
|
+
<!-- telegram_voice lang=ru rate=+30%
|
|
148
|
+
Text to synthesize as a Telegram voice message.
|
|
149
|
+
-->
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Outbound voice is disabled unless a matching `outboundHandlers[]` entry is configured. Multiple `telegram_voice` blocks in one reply are synthesized and sent independently, preserving each block's attributes. The bridge uses the same [command-template contract](./docs/command-templates.md) as inbound attachment handlers: split the template into args, substitute placeholders, execute without a shell, and use stdout as the result channel for a single template.
|
|
153
|
+
|
|
154
|
+
A TTS plus MP3-to-OGG setup can be expressed as `template: [...]`. The bridge provides `{text}`, `{mp3}`, and `{ogg}` to every step; top-level `args`/`defaults` apply to all steps unless a step defines private values, top-level `timeout` wraps the whole sequence, and each step's stdout is passed to the next step's stdin by default. Use `"output": "ogg"` when the artifact path should come from the generated `{ogg}` value instead of final stdout:
|
|
155
|
+
|
|
156
|
+
```json
|
|
157
|
+
{
|
|
158
|
+
"outboundHandlers": [
|
|
159
|
+
{
|
|
160
|
+
"type": "voice",
|
|
161
|
+
"template": [
|
|
162
|
+
"/path/to/tts --text {text} --lang {lang=ru} --rate {rate=+30%} --write-media {mp3}",
|
|
163
|
+
"ffmpeg -y -i {mp3} -c:a libopus -b:a 32k -ar 16000 -ac 1 -vbr on {ogg}"
|
|
164
|
+
],
|
|
165
|
+
"output": "ogg",
|
|
166
|
+
"timeout": 60000
|
|
167
|
+
}
|
|
168
|
+
]
|
|
169
|
+
}
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
#### Buttons
|
|
173
|
+
|
|
174
|
+
Button blocks attach inline quick replies to the final text. Use one independent `telegram_button` block per action; its `label` is shown in Telegram and its body is sent back to pi when tapped:
|
|
175
|
+
|
|
176
|
+
```md
|
|
177
|
+
I can continue.
|
|
178
|
+
|
|
179
|
+
<!-- telegram_button label="Continue"
|
|
180
|
+
Continue with the current plan.
|
|
181
|
+
-->
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
Button prompts are routed back into the normal Telegram queue as prompt turns. Outbound handler details are documented in [`docs/outbound-handlers.md`](./docs/outbound-handlers.md).
|
|
142
185
|
|
|
143
186
|
## Streaming
|
|
144
187
|
|
package/docs/README.md
CHANGED
|
@@ -7,4 +7,5 @@ Living index of project documentation in `/docs`.
|
|
|
7
7
|
- [architecture.md](./architecture.md) — Overview of the Telegram bridge runtime, queueing model, rendering pipeline, and interactive controls
|
|
8
8
|
- [command-templates.md](./command-templates.md) — Portable command-template standard core
|
|
9
9
|
- [attachment-handlers.md](./attachment-handlers.md) — Local `pi-telegram` attachment-handler config, placeholders, and fallbacks
|
|
10
|
+
- [outbound-handlers.md](./outbound-handlers.md) — Local `pi-telegram` outbound-handler config, voice/button markup, artifact outputs, and callback routing
|
|
10
11
|
- [locks.md](./locks.md) — Shared `locks.json` standard for singleton extension ownership
|
package/docs/architecture.md
CHANGED
|
@@ -23,21 +23,23 @@ Naming rule: because the repository already scopes this codebase to Telegram, ex
|
|
|
23
23
|
|
|
24
24
|
Current runtime areas use these ownership boundaries:
|
|
25
25
|
|
|
26
|
-
| Domain
|
|
27
|
-
|
|
|
28
|
-
| `index.ts`
|
|
29
|
-
| `api`
|
|
30
|
-
| `config` / `setup`
|
|
31
|
-
| `locks` / `polling`
|
|
32
|
-
| `updates` / `routing`
|
|
33
|
-
| `media` / `turns` / `handlers` | Text/media extraction, media-group debounce, inbound downloads, turn building/editing, image reads, attachment-handler matching/execution/fallback output
|
|
34
|
-
| `queue`
|
|
35
|
-
| `runtime`
|
|
36
|
-
| `model` / `menu` / `commands`
|
|
37
|
-
| `preview` / `replies` / `rendering` | Preview lifecycle/transports, final reply delivery and reply parameters, Telegram HTML Markdown rendering, chunking, stable-preview snapshots
|
|
38
|
-
| `
|
|
39
|
-
| `
|
|
40
|
-
| `
|
|
26
|
+
| Domain | Owns |
|
|
27
|
+
| ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
28
|
+
| `index.ts` | Single composition root for live pi/Telegram ports, session state, API-bound transport adapters, and status updates |
|
|
29
|
+
| `api` | Bot API transport shapes/helpers, retries, file download, temp-dir lifecycle, inbound limits, chat actions, lazy bot-token clients, runtime error recording |
|
|
30
|
+
| `config` / `setup` | Persisted bot/session pairing state, authorization, first-user pairing, token prompting, env fallback, validation, config persistence |
|
|
31
|
+
| `locks` / `polling` | Singleton `locks.json` ownership, takeover/restart semantics, long-poll controller state, update offset persistence, poll-loop runtime wiring |
|
|
32
|
+
| `updates` / `routing` | Update classification/execution planning, paired authorization, reactions, edits, callbacks, and inbound route composition |
|
|
33
|
+
| `media` / `turns` / `attachment-handlers` | Text/media extraction, media-group debounce, inbound downloads, turn building/editing, image reads, attachment-handler matching/execution/fallback output |
|
|
34
|
+
| `queue` | Queue item contracts, lane admission/order, stores, mutations, dispatch readiness/runtime, prompt/control enqueueing, session and agent/tool lifecycle sequencing |
|
|
35
|
+
| `runtime` | Session-local coordination primitives: counters, lifecycle flags, setup guard, abort handler, typing-loop timers, prompt-dispatch flags, agent-end reset binding |
|
|
36
|
+
| `model` / `menu` / `commands` | Model identity/thinking levels, scoped model resolution, in-flight switching, inline status/model/thinking UI, slash commands, bot command registration |
|
|
37
|
+
| `preview` / `replies` / `rendering` | Preview lifecycle/transports, final reply delivery and reply parameters, Telegram HTML Markdown rendering, chunking, stable-preview snapshots |
|
|
38
|
+
| `outbound-handlers` | Assistant-authored outbound comments, generated reply artifacts, inline-keyboard callbacks, and post-`agent_end` outbound action delivery |
|
|
39
|
+
| `attachments` | `telegram_attach` registration, outbound attachment queueing, stat/limit checks, photo/document delivery classification |
|
|
40
|
+
| `status` | Status-bar/status-message rendering, queue-lane status views, redacted runtime event ring, grouped pi diagnostics |
|
|
41
|
+
| `lifecycle` / `prompts` / `pi` | pi hook registration, Telegram-specific before-agent prompt injection, centralized direct pi SDK imports and context adapters |
|
|
42
|
+
| `command-templates` | Portable shell-free command-template standard helpers, composition expansion, placeholder substitution, and executable resolution |
|
|
41
43
|
|
|
42
44
|
Boundary invariants:
|
|
43
45
|
|
|
@@ -71,13 +73,14 @@ Telegram bot configuration stays in `~/.pi/agent/telegram.json`; singleton runti
|
|
|
71
73
|
2. Each update offset is persisted only after the update handler succeeds; repeated handler failures are bounded so one poisoned update cannot stall polling forever
|
|
72
74
|
3. The bridge filters to the paired private user
|
|
73
75
|
4. Media groups are coalesced into a single Telegram turn when needed
|
|
74
|
-
5.
|
|
75
|
-
6.
|
|
76
|
-
7.
|
|
77
|
-
8.
|
|
78
|
-
9.
|
|
79
|
-
10.
|
|
80
|
-
11.
|
|
76
|
+
5. Slash command parsing uses only the new message text/caption, while Telegram `reply_to_message` text/caption is injected later as prompt-only `[reply]` context for normal queued turns
|
|
77
|
+
6. Files are streamed into `~/.pi/agent/tmp/telegram` with a default 50 MiB size limit, partial-download cleanup on failures, and stale temp cleanup on session start; operators can tune the limit with `PI_TELEGRAM_INBOUND_FILE_MAX_BYTES` or `TELEGRAM_MAX_FILE_SIZE_BYTES`
|
|
78
|
+
7. Configured inbound attachment handlers may run on downloaded files by MIME wildcard, Telegram attachment type, or generic match selector; command templates receive safe command-arg substitution for `{file}`/`{mime}`/`{type}`
|
|
79
|
+
8. Matching handlers are tried in config order: a non-zero exit records diagnostics and falls back to the next matching handler, while the first successful handler stops the chain
|
|
80
|
+
9. Local attachments stay visible under `[attachments] <directory>` with relative file entries, and handler stdout is appended under `[outputs]` before the agent sees the turn; failed handlers omit output while keeping the attachment entry
|
|
81
|
+
10. A `PendingTelegramTurn` is created and queued locally
|
|
82
|
+
11. Telegram `edited_message` updates are routed separately and update a matching queued turn when the original message has not been dispatched yet
|
|
83
|
+
12. The queue dispatcher sends the turn into pi only when dispatch is safe
|
|
81
84
|
|
|
82
85
|
### Queue Safety Model
|
|
83
86
|
|
|
@@ -90,12 +93,12 @@ Queued items now use two explicit dimensions:
|
|
|
90
93
|
|
|
91
94
|
Admission contract:
|
|
92
95
|
|
|
93
|
-
| Admission | Examples
|
|
94
|
-
| --------------------- |
|
|
95
|
-
| Immediate execution | `/compact`, `/stop`, `/help`, `/start`
|
|
96
|
+
| Admission | Examples | Queue shape | Dispatch rank |
|
|
97
|
+
| --------------------- | ------------------------------------------------------------ | -------------------------------------------------------------------- | ------------- |
|
|
98
|
+
| Immediate execution | `/compact`, `/stop`, `/help`, `/start` | Does not enter the Telegram queue; `/stop` also clears queued items | N/A |
|
|
96
99
|
| Control queue | Model-switch continuation turns and future deferred controls | `queueLane: control`; accepts control items and continuation prompts | 0 |
|
|
97
|
-
| Priority prompt queue | A waiting prompt promoted by `👍`
|
|
98
|
-
| Default prompt queue | Normal Telegram text/media turns
|
|
100
|
+
| Priority prompt queue | A waiting prompt promoted by `👍` | `kind: prompt`, `queueLane: priority` | 1 |
|
|
101
|
+
| Default prompt queue | Normal Telegram text/media turns | `kind: prompt`, `queueLane: default` | 2 |
|
|
99
102
|
|
|
100
103
|
The command action itself carries its execution mode, and the queue domain exposes lane contracts for admission mode, dispatch rank, and allowed item kinds. Queue append and planning paths validate lane admission so a malformed control/default or other invalid lane pairing fails predictably instead of silently changing priority. This lets synthetic control actions and Telegram prompts share one stable ordering model while still rendering distinctly in status output. In the pi status bar, busy labels distinguish `active`, `dispatching`, `queued`, `tool running`, `model`, and `compacting`; priority prompts are marked with `⬆` while control items keep markers such as `⚡`.
|
|
101
104
|
|
|
@@ -152,6 +155,8 @@ Telegram prompt responses use explicit delivery context to attach outbound text,
|
|
|
152
155
|
|
|
153
156
|
Outbound files are sent only after the active Telegram turn completes, must be staged through the `telegram_attach` tool, are staged atomically per tool call, are checked against a default 50 MiB limit configurable through `PI_TELEGRAM_OUTBOUND_ATTACHMENT_MAX_BYTES` or `TELEGRAM_MAX_ATTACHMENT_SIZE_BYTES`, and use file-backed multipart blobs so large sends do not require preloading whole files into memory.
|
|
154
157
|
|
|
158
|
+
Assistant-authored outbound actions use final-message markup instead of agent tool calls. Preview updates strip closed top-level HTML comments and currently open/partial top-level comment starts before rendering, so users do not see transient metadata even when streaming flushes happen after only `<`, `<!`, or `<!--`. On `agent_end`, the bridge removes top-level comments from the Markdown text reply, but treats column-zero top-level `<!-- telegram_voice ... -->` and `<!-- telegram_button ... -->` blocks specially before delivery; comments inside fenced code, quotes, lists, or indented examples stay literal. Voice maps to the first matching `outboundHandlers[]` entry with `type: "voice"`, synthesizes the block body through command-template execution, and uploads the generated OGG/Opus file via Telegram `sendVoice`; when no outbound voice handler is configured, it silently skips voice delivery. The `template: [...]` form can express TTS plus MP3-to-OGG conversion using configured templates and bridge-provided `{text}`, `{mp3}`, and `{ogg}` placeholders. Top-level `args` and `defaults` apply to all composed steps unless a step defines private values, top-level `timeout` wraps the whole sequence, and each step receives the previous step's stdout on stdin by default, without hard-coded filesystem defaults. Button blocks are built in: each `telegram_button` block becomes one inline-keyboard button on the final text, and callback clicks enqueue the configured prompt text as a normal Telegram prompt turn. This keeps technical Markdown, code, tables, formulas, and numbered lists in the text channel when appropriate while allowing TTS-friendly voice messages and tappable continuations without invoking `telegram_attach` or extra transport tools.
|
|
159
|
+
|
|
155
160
|
## Interactive Controls
|
|
156
161
|
|
|
157
162
|
The bridge exposes Telegram-side session controls in addition to regular chat forwarding.
|
|
@@ -13,27 +13,19 @@ This document is the local adaptation of the portable [Command Template Standard
|
|
|
13
13
|
"attachmentHandlers": [
|
|
14
14
|
{
|
|
15
15
|
"type": "voice",
|
|
16
|
-
"template": "
|
|
17
|
-
"
|
|
18
|
-
"defaults": {
|
|
19
|
-
"lang": "ru",
|
|
20
|
-
"model": "voxtral-mini-latest"
|
|
21
|
-
}
|
|
16
|
+
"template": "/path/to/stt1 --file {file} --lang {lang=ru}",
|
|
17
|
+
"timeout": 30000
|
|
22
18
|
},
|
|
23
19
|
{
|
|
24
20
|
"mime": "audio/*",
|
|
25
|
-
"template": "
|
|
26
|
-
"
|
|
27
|
-
"defaults": {
|
|
28
|
-
"lang": "ru",
|
|
29
|
-
"model": "whisper-large-v3-turbo"
|
|
30
|
-
}
|
|
21
|
+
"template": "/path/to/stt2 --file {file} --lang {lang=ru}",
|
|
22
|
+
"timeout": 30000
|
|
31
23
|
}
|
|
32
24
|
]
|
|
33
25
|
}
|
|
34
26
|
```
|
|
35
27
|
|
|
36
|
-
Handlers match by `type`, `mime`, or `match`. Wildcards such as `audio/*` are accepted. Each matching handler must provide
|
|
28
|
+
Handlers match by `type`, `mime`, or `match`. Wildcards such as `audio/*` are accepted. Each matching handler must provide `template`; a string is one command, and an array is ordered composition. Top-level `args` and `defaults` apply to composed steps unless a step defines private values; top-level `timeout` wraps the whole sequence instead of being inherited by leaves. Legacy configs may still use `pipe` as a local alias.
|
|
37
29
|
|
|
38
30
|
## Template Placeholders
|
|
39
31
|
|
|
@@ -45,16 +37,16 @@ Attachment handlers support these built-in placeholders:
|
|
|
45
37
|
| `{mime}` | MIME type if known |
|
|
46
38
|
| `{type}` | Attachment kind such as `voice`, `audio`, `document`, or `photo` |
|
|
47
39
|
|
|
48
|
-
`defaults` may provide additional placeholder values such as `{lang}` or `{model}`. `args`
|
|
40
|
+
`defaults` may provide additional placeholder values such as `{lang}` or `{model}`. `args` is only a string-array declaration of supported placeholders; defaults belong in `defaults` or inline placeholders such as `{lang=ru}`. Examples prefer explicit flag-style CLIs for readability, but positional forms such as `/path/to/stt {file} {lang=ru} {model=voxtral-mini-latest}` are equally valid when the target script supports them.
|
|
49
41
|
|
|
50
|
-
If a template has no `{file}` placeholder, the downloaded file path is appended as the last command arg.
|
|
42
|
+
If a top-level one-step handler template has no `{file}` placeholder, the downloaded file path is appended as the last command arg for backwards compatibility. Composition steps are plain command templates and do not receive implicit file-path args; include `{file}` explicitly where needed.
|
|
51
43
|
|
|
52
44
|
## Ordered Fallbacks
|
|
53
45
|
|
|
54
|
-
A handler list is ordered. For each attachment, matching handlers run in list order and stop after the first successful handler.
|
|
46
|
+
A handler list is ordered. For each attachment, matching handlers run in list order and stop after the first successful handler. A composed handler counts as one handler for fallback purposes: if any step fails, the next matching handler is tried.
|
|
55
47
|
|
|
56
48
|
If a matching handler fails with a non-zero exit code, the runtime records diagnostics and tries the next matching handler. If every matching handler fails, the attachment remains visible in the prompt as a normal local file reference.
|
|
57
49
|
|
|
58
50
|
## Prompt Output
|
|
59
51
|
|
|
60
|
-
Local attachments stay in the prompt under `[attachments] <directory>` with relative file entries. Successful handler stdout is added under `[outputs]`. Empty output and failed handler output are omitted from the prompt text.
|
|
52
|
+
Local attachments stay in the prompt under `[attachments] <directory>` with relative file entries. Successful handler stdout is added under `[outputs]`. For composed handlers, each step receives the previous step's stdout on stdin by default, and stdout from the last successful step is used as the handler output. Empty output and failed handler output are omitted from the prompt text.
|
|
@@ -1,34 +1,77 @@
|
|
|
1
1
|
# Command Template Standard
|
|
2
2
|
|
|
3
|
-
Command templates are the
|
|
3
|
+
Command templates are the portable integration format for deterministic local automation. Extensions may choose their own config files, selectors, placeholder sources, and examples, but should preserve this core contract.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
## Shape
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
A command template is either a command-line string or an ordered array of command-template leaves:
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
```json
|
|
10
|
+
{
|
|
11
|
+
"template": "/path/to/stt --file {file} --lang {lang=ru}"
|
|
12
|
+
}
|
|
13
|
+
```
|
|
10
14
|
|
|
11
|
-
|
|
12
|
-
|
|
15
|
+
When the surrounding schema already implies a command template, the compact string form is equivalent:
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
"/path/to/stt --file {file} --lang {lang=ru}"
|
|
13
19
|
```
|
|
14
20
|
|
|
15
|
-
|
|
21
|
+
There is no portable `command` field. The command is derived from `template`: after splitting, the first word is the executable and the remaining words are argv args. Templates do not infer flags: `{file}` is one positional arg; `--file {file}` is a flag arg plus its value.
|
|
22
|
+
|
|
23
|
+
Common object fields:
|
|
24
|
+
|
|
25
|
+
| Field | Meaning |
|
|
26
|
+
| ---------- | --------------------------------------------------------------------------------------------------------------------- |
|
|
27
|
+
| `template` | Required command string or ordered composition array |
|
|
28
|
+
| `args` | Optional placeholder-name declarations only; never stores defaults |
|
|
29
|
+
| `defaults` | Placeholder default values by name |
|
|
30
|
+
| `timeout` | Optional execution timeout in milliseconds |
|
|
31
|
+
| `output` | Optional result selector; default is `"stdout"`, artifact-producing handlers may name a runtime value such as `"ogg"` |
|
|
32
|
+
|
|
33
|
+
Storage paths, labels, selectors, descriptions, and registry-specific metadata belong to each extension's local schema.
|
|
16
34
|
|
|
17
|
-
|
|
35
|
+
## Execution
|
|
18
36
|
|
|
19
|
-
|
|
37
|
+
A runtime must:
|
|
38
|
+
|
|
39
|
+
1. Split the template into shell-like words with simple single quotes, double quotes, and backslash escapes
|
|
20
40
|
2. Substitute placeholders inside each split word
|
|
21
|
-
3. Execute
|
|
22
|
-
4.
|
|
23
|
-
5.
|
|
24
|
-
|
|
25
|
-
|
|
41
|
+
3. Execute command + args directly, without shell evaluation
|
|
42
|
+
4. Treat exit code `0` as success and non-zero as failure
|
|
43
|
+
5. Use stdout as the default result channel and stderr only for diagnostics
|
|
44
|
+
|
|
45
|
+
Implementations may expand `~` in command position and may resolve relative command paths against the caller cwd.
|
|
46
|
+
|
|
47
|
+
## Placeholders
|
|
48
|
+
|
|
49
|
+
Supported forms:
|
|
50
|
+
|
|
51
|
+
| Form | Meaning |
|
|
52
|
+
| ---------------- | ------------------------------------------------ |
|
|
53
|
+
| `{name}` | Required value from runtime values or `defaults` |
|
|
54
|
+
| `{name=default}` | Inline default when no value is provided |
|
|
26
55
|
|
|
27
|
-
|
|
56
|
+
Resolution order is runtime values → `defaults` → inline default → error.
|
|
28
57
|
|
|
29
|
-
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"template": "/path/to/tts --text {text} --lang {lang=ru} --rate {rate=+30%}"
|
|
61
|
+
}
|
|
62
|
+
```
|
|
30
63
|
|
|
31
|
-
|
|
64
|
+
With runtime values `{ "text": "hello" }`, argv is:
|
|
65
|
+
|
|
66
|
+
```text
|
|
67
|
+
["--text", "hello", "--lang", "ru", "--rate", "+30%"]
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Use `defaults` for visible configuration data; use inline defaults for compact local literals. Prefer flag-style examples such as `/path/to/tool --file {file} --lang {lang=ru}` for readability, but positional forms such as `/path/to/tool {file} {lang=ru}` are valid when the invoked script defines that CLI contract.
|
|
71
|
+
|
|
72
|
+
## Quoting
|
|
73
|
+
|
|
74
|
+
Placeholder values are not shell-escaped because no shell is used. A value containing spaces remains one argv item when it replaces one split word:
|
|
32
75
|
|
|
33
76
|
```text
|
|
34
77
|
template="echo {text}"
|
|
@@ -36,10 +79,10 @@ text="hello world"
|
|
|
36
79
|
args=["hello world"]
|
|
37
80
|
```
|
|
38
81
|
|
|
39
|
-
A placeholder
|
|
82
|
+
A placeholder may also be embedded inside one word:
|
|
40
83
|
|
|
41
84
|
```text
|
|
42
|
-
template="tool --file={file}"
|
|
85
|
+
template="/path/to/tool --file={file}"
|
|
43
86
|
file="/tmp/a b.ogg"
|
|
44
87
|
args=["--file=/tmp/a b.ogg"]
|
|
45
88
|
```
|
|
@@ -50,26 +93,53 @@ Use quotes only for literal template words that should contain spaces before pla
|
|
|
50
93
|
template="echo 'literal words' {text}"
|
|
51
94
|
```
|
|
52
95
|
|
|
53
|
-
##
|
|
96
|
+
## Composition
|
|
54
97
|
|
|
55
|
-
|
|
98
|
+
`template: [...]` means sequential composition; each leaf is a command template executed with one shared runtime value map:
|
|
56
99
|
|
|
57
|
-
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"template": [
|
|
103
|
+
"/path/to/tts --text {text} --lang {lang=ru} --out {mp3}",
|
|
104
|
+
"ffmpeg -y -i {mp3} -c:a libopus {ogg}"
|
|
105
|
+
],
|
|
106
|
+
"output": "ogg"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
58
109
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
110
|
+
Composition rules:
|
|
111
|
+
|
|
112
|
+
- Execute leaves in order and stop on the first non-zero exit
|
|
113
|
+
- Treat the whole composition as one handler for selector matching and fallback
|
|
114
|
+
- Top-level `args` and `defaults` apply to every leaf unless the leaf defines private values
|
|
115
|
+
- Leaf `args` replace inherited `args`; leaf `defaults` merge over inherited defaults; `timeout` and `output` are not inherited into leaves
|
|
116
|
+
- Top-level `timeout` wraps the whole sequence; leaf `timeout` applies only to that leaf within the remaining total budget
|
|
117
|
+
- Each leaf receives the previous leaf's stdout on stdin by default, while the final leaf stdout remains the default composition result
|
|
118
|
+
- Each leaf still applies its own inline defaults
|
|
119
|
+
|
|
120
|
+
```json
|
|
121
|
+
{
|
|
122
|
+
"template": [
|
|
123
|
+
"/path/to/tts --text {text} --lang {lang} --out {mp3}",
|
|
124
|
+
{
|
|
125
|
+
"template": "ffmpeg -y -i {mp3} -c:a {codec} {ogg}",
|
|
126
|
+
"defaults": { "codec": "libopus" }
|
|
127
|
+
}
|
|
128
|
+
],
|
|
129
|
+
"args": ["text", "lang", "mp3", "ogg"],
|
|
130
|
+
"defaults": { "lang": "en" },
|
|
131
|
+
"output": "ogg"
|
|
132
|
+
}
|
|
133
|
+
```
|
|
64
134
|
|
|
65
|
-
|
|
135
|
+
`output` selects the primary result channel. Omitted `output` means `"stdout"`, and explicitly writing `"output": "stdout"` is valid standard syntax. Artifact-producing handlers may instead name a runtime value or placeholder path, e.g. `"ogg"` or `"{ogg}"`.
|
|
66
136
|
|
|
67
|
-
|
|
137
|
+
Legacy local schemas may accept `pipe` as an alias, but the portable standard is `template: [...]`.
|
|
68
138
|
|
|
69
|
-
|
|
139
|
+
## Tool Boundary
|
|
70
140
|
|
|
71
|
-
Until such an API exists, extensions should
|
|
141
|
+
Agent tools are a separate abstraction. A tool name is not a portable command template because the pi extension API exposes tool registration metadata, not a public extension-to-extension `executeTool(name, args)` contract. Until such an API exists, extensions should use command templates for deterministic local automation.
|
|
72
142
|
|
|
73
143
|
## Compatibility
|
|
74
144
|
|
|
75
|
-
Consumers should share this
|
|
145
|
+
Consumers should share this contract, not private registry fields or implementation details from any specific extension.
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# Outbound Handlers
|
|
2
|
+
|
|
3
|
+
`pi-telegram` maps hidden assistant-authored HTML comments to Telegram-native outbound actions.
|
|
4
|
+
|
|
5
|
+
This is intentionally prompt-driven: the agent writes normal Markdown plus small hidden top-level blocks, and the bridge performs the transport work after `agent_end`. `telegram_voice` and `telegram_button` are not pi tools. Outbound behavior is an emergent result of the assistant prompt, configured command-template handlers, generated artifacts, and reply delivery. That avoids extra agent-side tool calls, avoids fragile parameter plumbing inside the conversation, and minimizes latency because text, voice, and buttons are planned in one standard assistant reply.
|
|
6
|
+
|
|
7
|
+
This document is the local outbound adaptation of the portable [Command Template Standard](./command-templates.md).
|
|
8
|
+
|
|
9
|
+
## Standard
|
|
10
|
+
|
|
11
|
+
An outbound handler is selected by `type`. Assistant markup maps to handler types:
|
|
12
|
+
|
|
13
|
+
| Markup | Handler type | Telegram action |
|
|
14
|
+
| ----------------- | ------------ | -------------------------------------------------- |
|
|
15
|
+
| `telegram_voice` | `voice` | Generate OGG/Opus and call `sendVoice` |
|
|
16
|
+
| `telegram_button` | Built-in | Attach an inline keyboard button to the final text |
|
|
17
|
+
|
|
18
|
+
Configured command-template handlers provide `template`. A string is one command; an array is ordered composition. Top-level `args`, `defaults`, and `timeout` apply to all composed steps unless a step defines private values. `output` selects the primary artifact path when the handler produces a file instead of stdout text. Legacy configs may still use `pipe`, but `template: [...]` is the preferred standard shape.
|
|
19
|
+
|
|
20
|
+
## Voice Handler Config
|
|
21
|
+
|
|
22
|
+
`telegram.json` may define `outboundHandlers`:
|
|
23
|
+
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"outboundHandlers": [
|
|
27
|
+
{
|
|
28
|
+
"type": "voice",
|
|
29
|
+
"template": [
|
|
30
|
+
"/path/to/tts --text {text} --lang {lang=ru} --rate {rate=+30%} --write-media {mp3}",
|
|
31
|
+
"ffmpeg -y -i {mp3} -c:a libopus -b:a 32k -ar 16000 -ac 1 -vbr on {ogg}"
|
|
32
|
+
],
|
|
33
|
+
"output": "ogg",
|
|
34
|
+
"timeout": 120000
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
If a matching voice handler fails, the bridge tries the next matching `type: "voice"` handler.
|
|
41
|
+
|
|
42
|
+
## Voice Markup
|
|
43
|
+
|
|
44
|
+
Assistant replies can include a hidden voice block:
|
|
45
|
+
|
|
46
|
+
```md
|
|
47
|
+
Full text answer stays here.
|
|
48
|
+
|
|
49
|
+
<!-- telegram_voice lang=ru rate=+30%
|
|
50
|
+
Text to synthesize as a Telegram voice message.
|
|
51
|
+
-->
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The bridge strips the comment from Telegram text. On `agent_end`, it maps each `telegram_voice` block to `type: "voice"`, generates one file per block, and sends each file as an independent Telegram-native voice message. The opening `<!-- telegram_voice` marker must start at column zero on a top-level line outside fenced code, quotes, and lists; otherwise it is rendered as literal Markdown.
|
|
55
|
+
|
|
56
|
+
## Built-In Voice Placeholders
|
|
57
|
+
|
|
58
|
+
Voice outbound handlers receive these runtime placeholders:
|
|
59
|
+
|
|
60
|
+
| Placeholder | Value |
|
|
61
|
+
| ----------- | -------------------------------------------------------- |
|
|
62
|
+
| `{text}` | Voice block body |
|
|
63
|
+
| `{lang}` | Optional markup override such as `lang=ru` |
|
|
64
|
+
| `{rate}` | Optional markup override such as `rate=+30%` |
|
|
65
|
+
| `{mp3}` | Flat temp artifact path under `~/.pi/agent/tmp/telegram` |
|
|
66
|
+
| `{ogg}` | Flat temp artifact path under `~/.pi/agent/tmp/telegram` |
|
|
67
|
+
|
|
68
|
+
Temp artifacts use unique flat names such as `<uuid>-voice.mp3` and `<uuid>-voice.ogg`. The bridge does not create per-handler directory trees.
|
|
69
|
+
|
|
70
|
+
## Output
|
|
71
|
+
|
|
72
|
+
For composed handlers, `output` selects the primary artifact after the composition completes. Omitted `output` means `"stdout"`, so the final step should print the generated OGG/Opus path. `"output": "ogg"` means the generated file path comes from `{ogg}`. A value such as `"{ogg}"` is equivalent. Composition also follows the command-template standard where each step's stdout is provided as stdin to the next step by default.
|
|
73
|
+
|
|
74
|
+
For one-step `template` handlers, stdout remains the default result channel: the command should print the generated OGG/Opus path.
|
|
75
|
+
|
|
76
|
+
## Buttons Markup
|
|
77
|
+
|
|
78
|
+
Assistant replies can include independent button blocks. The block body is the prompt sent back to pi when the user taps the button:
|
|
79
|
+
|
|
80
|
+
```md
|
|
81
|
+
I can continue.
|
|
82
|
+
|
|
83
|
+
<!-- telegram_button label="OK"
|
|
84
|
+
Continue with the current plan.
|
|
85
|
+
-->
|
|
86
|
+
|
|
87
|
+
<!-- telegram_button label="Show risks"
|
|
88
|
+
List the main risks first.
|
|
89
|
+
-->
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Rules:
|
|
93
|
+
|
|
94
|
+
- `telegram_button label="Label"` creates one independent button row whose prompt is the block body.
|
|
95
|
+
- The opening `<!-- telegram_button` marker must start at column zero on a top-level line outside fenced code, quotes, and lists; otherwise it is rendered as literal Markdown.
|
|
96
|
+
- Use one block per button; this mirrors HTML's singular element model and avoids a nested button DSL inside comments.
|
|
97
|
+
- Button actions are stored in memory with short `callback_data`; Telegram never sees the full prompt in the button payload.
|
|
98
|
+
|
|
99
|
+
Buttons are built in and do not need a command template because they are pure Telegram reply markup plus callback routing.
|
|
100
|
+
|
|
101
|
+
## Prompt Contract
|
|
102
|
+
|
|
103
|
+
The extension injects Telegram-specific system prompt guidance so agents know the fast path:
|
|
104
|
+
|
|
105
|
+
- Write the full technical answer as normal Markdown.
|
|
106
|
+
- Add `telegram_voice` when a Telegram-native voice message is useful; the block body is the text to synthesize and may be a companion summary, but no specific summary format is required.
|
|
107
|
+
- Add `telegram_button label="..."` for quick replies that should come back as normal Telegram prompts.
|
|
108
|
+
- Do not call or register TTS/text-to-OGG/Telegram transport tools for voice or buttons; the bridge owns the configured outbound-handler pipeline and delivery.
|
|
109
|
+
|
|
110
|
+
This keeps the agent focused on semantics and lets the bridge handle low-latency Telegram adaptation.
|
package/index.ts
CHANGED
|
@@ -4,10 +4,11 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import * as Api from "./lib/api.ts";
|
|
7
|
+
import * as AttachmentHandlers from "./lib/attachment-handlers.ts";
|
|
7
8
|
import * as Attachments from "./lib/attachments.ts";
|
|
8
9
|
import * as Commands from "./lib/commands.ts";
|
|
10
|
+
import * as CommandTemplates from "./lib/command-templates.ts";
|
|
9
11
|
import * as Config from "./lib/config.ts";
|
|
10
|
-
import * as Handlers from "./lib/handlers.ts";
|
|
11
12
|
import * as Lifecycle from "./lib/lifecycle.ts";
|
|
12
13
|
import * as Locks from "./lib/locks.ts";
|
|
13
14
|
import * as Media from "./lib/media.ts";
|
|
@@ -22,6 +23,7 @@ import * as Replies from "./lib/replies.ts";
|
|
|
22
23
|
import * as Runtime from "./lib/runtime.ts";
|
|
23
24
|
import * as Routing from "./lib/routing.ts";
|
|
24
25
|
import * as Setup from "./lib/setup.ts";
|
|
26
|
+
import * as OutboundHandlers from "./lib/outbound-handlers.ts";
|
|
25
27
|
import * as Status from "./lib/status.ts";
|
|
26
28
|
|
|
27
29
|
type ActivePiModel = NonNullable<Pi.ExtensionContext["model"]>;
|
|
@@ -35,6 +37,7 @@ export default function (pi: Pi.ExtensionAPI) {
|
|
|
35
37
|
const configStore = Config.createTelegramConfigStore();
|
|
36
38
|
const lockRuntime = Locks.createTelegramLockRuntime<Pi.ExtensionContext>();
|
|
37
39
|
const activeTurnRuntime = Queue.createTelegramActiveTurnStore();
|
|
40
|
+
const buttonActionStore = OutboundHandlers.createTelegramButtonActionStore();
|
|
38
41
|
const pendingModelSwitchStore =
|
|
39
42
|
Model.createPendingModelSwitchStore<
|
|
40
43
|
Model.ScopedTelegramModel<ActivePiModel>
|
|
@@ -85,9 +88,9 @@ export default function (pi: Pi.ExtensionAPI) {
|
|
|
85
88
|
updateStatus,
|
|
86
89
|
});
|
|
87
90
|
const attachmentHandlerRuntime =
|
|
88
|
-
|
|
91
|
+
AttachmentHandlers.createTelegramAttachmentHandlerRuntime<Pi.ExtensionContext>({
|
|
89
92
|
getHandlers: configStore.getAttachmentHandlers,
|
|
90
|
-
execCommand:
|
|
93
|
+
execCommand: CommandTemplates.execCommandTemplate,
|
|
91
94
|
getCwd: Pi.getExtensionContextCwd,
|
|
92
95
|
recordRuntimeEvent: runtimeEvents.record,
|
|
93
96
|
});
|
|
@@ -230,6 +233,7 @@ export default function (pi: Pi.ExtensionAPI) {
|
|
|
230
233
|
currentModelRuntime,
|
|
231
234
|
modelSwitchController,
|
|
232
235
|
menuActions,
|
|
236
|
+
buttonActionStore,
|
|
233
237
|
attachmentHandlerRuntime,
|
|
234
238
|
updateStatus,
|
|
235
239
|
dispatchNextQueuedTelegramTurn,
|
|
@@ -358,6 +362,16 @@ export default function (pi: Pi.ExtensionAPI) {
|
|
|
358
362
|
sendTextReply,
|
|
359
363
|
recordRuntimeEvent: runtimeEvents.record,
|
|
360
364
|
}),
|
|
365
|
+
planOutboundReply: OutboundHandlers.createTelegramOutboundReplyPlanner(
|
|
366
|
+
buttonActionStore,
|
|
367
|
+
),
|
|
368
|
+
sendOutboundReplyArtifacts: OutboundHandlers.createTelegramOutboundReplyArtifactSender({
|
|
369
|
+
execCommand: CommandTemplates.execCommandTemplate,
|
|
370
|
+
sendMultipart: callMultipart,
|
|
371
|
+
sendTextReply,
|
|
372
|
+
getHandlers: configStore.getOutboundHandlers,
|
|
373
|
+
recordRuntimeEvent: runtimeEvents.record,
|
|
374
|
+
}),
|
|
361
375
|
getActiveToolExecutions: bridgeRuntime.lifecycle.getActiveToolExecutions,
|
|
362
376
|
setActiveToolExecutions: bridgeRuntime.lifecycle.setActiveToolExecutions,
|
|
363
377
|
triggerPendingModelSwitchAbort: modelSwitchController.triggerPendingAbort,
|