switchroom 0.13.33 → 0.13.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/timezone-hook.sh +1 -1
- package/dist/agent-scheduler/index.js +8 -1
- package/dist/auth-broker/index.js +8 -1
- package/dist/cli/switchroom.js +86 -21
- package/dist/host-control/main.js +5163 -192
- package/dist/vault/approvals/kernel-server.js +9 -2
- package/dist/vault/broker/server.js +9 -2
- package/package.json +1 -1
- package/profiles/default/CLAUDE.md.hbs +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +80 -9
- package/telegram-plugin/docs/waiting-ux-spec.md +40 -0
- package/telegram-plugin/gateway/error-envelope-card.ts +64 -0
- package/telegram-plugin/gateway/gateway.ts +70 -15
- package/telegram-plugin/gateway/unhandled-rejection-policy.ts +46 -1
- package/telegram-plugin/tests/boot-clears-clean-shutdown-marker.test.ts +75 -0
- package/telegram-plugin/tests/error-envelope-unlock-card.test.ts +79 -0
- package/telegram-plugin/tests/silent-end-integration.test.ts +268 -0
- package/telegram-plugin/tests/silent-end.test.ts +105 -0
- package/telegram-plugin/tests/unhandled-rejection-policy.test.ts +51 -6
|
@@ -10948,7 +10948,7 @@ var init_dist = __esm(() => {
|
|
|
10948
10948
|
});
|
|
10949
10949
|
|
|
10950
10950
|
// src/config/schema.ts
|
|
10951
|
-
var CodeRepoEntrySchema, AgentBindMountSchema, ScheduleEntrySchema, AgentSoulSchema, AgentToolsSchema, AgentMemorySchema, HookEntrySchema, AgentHooksSchema, SubagentSchema, SessionSchema, SessionContinuitySchema, TelegramChannelSchema, ChannelsSchema, TIMEZONE_REGEX, ApproverIdSchema, GoogleWorkspaceTierSchema, GoogleWorkspaceConfigSchema, AgentGoogleWorkspaceConfigSchema, ReactionsSchema, ReleaseBlock, NetworkIsolationSchema, profileFields, ProfileSchema, _omitExtends, defaultsFields, AgentDefaultsSchema, AgentSchema, TelegramConfigSchema, MemoryBackendConfigSchema, VaultConfigSchema, QuotaConfigSchema, HostControlConfigSchema, HostdConfigSchema, SwitchroomConfigSchema;
|
|
10951
|
+
var CodeRepoEntrySchema, AgentBindMountSchema, ScheduleEntrySchema, AgentSoulSchema, AgentToolsSchema, AgentMemorySchema, HookEntrySchema, AgentHooksSchema, SubagentSchema, SessionSchema, SessionContinuitySchema, TelegramChannelSchema, ChannelsSchema, TIMEZONE_REGEX, ApproverIdSchema, GoogleWorkspaceTierSchema, GoogleWorkspaceConfigSchema, AgentGoogleWorkspaceConfigSchema, ReactionsSchema, ReleaseBlock, NetworkIsolationSchema, profileFields, ProfileSchema, _omitExtends, defaultsFields, AgentDefaultsSchema, AgentSchema, TelegramConfigSchema, MemoryBackendConfigSchema, VaultConfigSchema, QuotaConfigSchema, AutoReleaseCheckSchema, HostControlConfigSchema, HostdConfigSchema, SwitchroomConfigSchema;
|
|
10952
10952
|
var init_schema = __esm(() => {
|
|
10953
10953
|
init_zod();
|
|
10954
10954
|
CodeRepoEntrySchema = exports_external.object({
|
|
@@ -11294,8 +11294,15 @@ var init_schema = __esm(() => {
|
|
|
11294
11294
|
weekly_budget_usd: exports_external.number().positive().optional().describe("Weekly USD spend budget. If unset, the greeting shows raw usage only."),
|
|
11295
11295
|
monthly_budget_usd: exports_external.number().positive().optional().describe("Monthly USD spend budget. If unset, the greeting shows raw usage only.")
|
|
11296
11296
|
});
|
|
11297
|
+
AutoReleaseCheckSchema = exports_external.object({
|
|
11298
|
+
enabled: exports_external.boolean().default(false).describe("When true, hostd polls the remote release tag every " + "`interval_minutes` and applies + restarts the fleet when a new " + "release is detected. Default false — opt-in."),
|
|
11299
|
+
interval_minutes: exports_external.number().int().min(5).max(1440).default(5).describe("Poll interval in minutes. Floor of 5m matches the agent-config " + "cron rate limit; ceiling of 1440m (24h) is a sanity cap."),
|
|
11300
|
+
apply_on_detect: exports_external.boolean().default(true).describe("When false, hostd logs `release_detected` but does NOT call " + "update_apply / restart all. Useful for dogfooding the detector " + "without rolling the fleet."),
|
|
11301
|
+
image_ref: exports_external.string().default("ghcr.io/switchroom/switchroom-agent:latest").describe("Image reference whose remote digest is compared to the local " + "image digest. Defaults to the agent image's :latest tag, which " + "is the canonical signal that a release has been promoted.")
|
|
11302
|
+
});
|
|
11297
11303
|
HostControlConfigSchema = exports_external.object({
|
|
11298
|
-
enabled: exports_external.boolean().default(true).describe("Whether the host-control daemon is in use. Default: true (since " + "RFC C Phase 2 default-flip — the gateway's /restart, /new, /reset, " + "and /update apply slash-commands all dispatch through hostd, and " + "without it those verbs fail on docker-mode installs because the " + "agent container has no docker binary/socket). " + "When true, the compose generator emits per-agent bind mounts " + "at `~/.switchroom/hostd/<name>/sock` for every admin-flagged " + "agent. Install the daemon with `switchroom hostd install` — " + "it runs as a docker container in its own compose project " + "(`switchroom-hostd`), separate from the agent fleet's compose " + "project so `up -d --remove-orphans` cycles of the fleet " + "can't recreate the daemon mid-RPC. See RFC C §5.1. " + "Set enabled: false only on legacy systemd-mode installs that " + "still rely on the in-container `spawnSwitchroomDetached` " + "shellout (removal is tracked as RFC C Phase 3).")
|
|
11304
|
+
enabled: exports_external.boolean().default(true).describe("Whether the host-control daemon is in use. Default: true (since " + "RFC C Phase 2 default-flip — the gateway's /restart, /new, /reset, " + "and /update apply slash-commands all dispatch through hostd, and " + "without it those verbs fail on docker-mode installs because the " + "agent container has no docker binary/socket). " + "When true, the compose generator emits per-agent bind mounts " + "at `~/.switchroom/hostd/<name>/sock` for every admin-flagged " + "agent. Install the daemon with `switchroom hostd install` — " + "it runs as a docker container in its own compose project " + "(`switchroom-hostd`), separate from the agent fleet's compose " + "project so `up -d --remove-orphans` cycles of the fleet " + "can't recreate the daemon mid-RPC. See RFC C §5.1. " + "Set enabled: false only on legacy systemd-mode installs that " + "still rely on the in-container `spawnSwitchroomDetached` " + "shellout (removal is tracked as RFC C Phase 3)."),
|
|
11305
|
+
auto_release_check: AutoReleaseCheckSchema.default({}).describe("Pull-based release-triggered fleet restart (#1743). hostd polls " + "the remote release tag on a fixed interval and applies + " + "restarts the fleet (graceful) when a new release is detected. " + "Opt-in: default enabled=false.")
|
|
11299
11306
|
});
|
|
11300
11307
|
HostdConfigSchema = exports_external.object({
|
|
11301
11308
|
config_edit_enabled: exports_external.boolean().default(false).describe("Opt-in toggle for the `config_propose_edit` hostd verb (RFC " + "admin-agent-config-edit §3). Default false — the verb returns " + "`E_CONFIG_EDIT_DISABLED` until the operator explicitly flips " + "this to true. When true (and once PR 1c lands the apply path), " + "admin agents can propose unified-diff patches against " + "`/state/config/switchroom.yaml`, gated by an operator approval " + "card in the primary chat. Same trust posture as `update_apply` " + "and `agent_restart`: the human-in-the-loop tap is the security " + "boundary, not the agent's judgement."),
|
|
@@ -10948,7 +10948,7 @@ var init_zod = __esm(() => {
|
|
|
10948
10948
|
});
|
|
10949
10949
|
|
|
10950
10950
|
// src/config/schema.ts
|
|
10951
|
-
var CodeRepoEntrySchema, AgentBindMountSchema, ScheduleEntrySchema, AgentSoulSchema, AgentToolsSchema, AgentMemorySchema, HookEntrySchema, AgentHooksSchema, SubagentSchema, SessionSchema, SessionContinuitySchema, TelegramChannelSchema, ChannelsSchema, TIMEZONE_REGEX, ApproverIdSchema, GoogleWorkspaceTierSchema, GoogleWorkspaceConfigSchema, AgentGoogleWorkspaceConfigSchema, ReactionsSchema, ReleaseBlock, NetworkIsolationSchema, profileFields, ProfileSchema, _omitExtends, defaultsFields, AgentDefaultsSchema, AgentSchema, TelegramConfigSchema, MemoryBackendConfigSchema, VaultConfigSchema, QuotaConfigSchema, HostControlConfigSchema, HostdConfigSchema, SwitchroomConfigSchema;
|
|
10951
|
+
var CodeRepoEntrySchema, AgentBindMountSchema, ScheduleEntrySchema, AgentSoulSchema, AgentToolsSchema, AgentMemorySchema, HookEntrySchema, AgentHooksSchema, SubagentSchema, SessionSchema, SessionContinuitySchema, TelegramChannelSchema, ChannelsSchema, TIMEZONE_REGEX, ApproverIdSchema, GoogleWorkspaceTierSchema, GoogleWorkspaceConfigSchema, AgentGoogleWorkspaceConfigSchema, ReactionsSchema, ReleaseBlock, NetworkIsolationSchema, profileFields, ProfileSchema, _omitExtends, defaultsFields, AgentDefaultsSchema, AgentSchema, TelegramConfigSchema, MemoryBackendConfigSchema, VaultConfigSchema, QuotaConfigSchema, AutoReleaseCheckSchema, HostControlConfigSchema, HostdConfigSchema, SwitchroomConfigSchema;
|
|
10952
10952
|
var init_schema = __esm(() => {
|
|
10953
10953
|
init_zod();
|
|
10954
10954
|
CodeRepoEntrySchema = exports_external.object({
|
|
@@ -11294,8 +11294,15 @@ var init_schema = __esm(() => {
|
|
|
11294
11294
|
weekly_budget_usd: exports_external.number().positive().optional().describe("Weekly USD spend budget. If unset, the greeting shows raw usage only."),
|
|
11295
11295
|
monthly_budget_usd: exports_external.number().positive().optional().describe("Monthly USD spend budget. If unset, the greeting shows raw usage only.")
|
|
11296
11296
|
});
|
|
11297
|
+
AutoReleaseCheckSchema = exports_external.object({
|
|
11298
|
+
enabled: exports_external.boolean().default(false).describe("When true, hostd polls the remote release tag every " + "`interval_minutes` and applies + restarts the fleet when a new " + "release is detected. Default false — opt-in."),
|
|
11299
|
+
interval_minutes: exports_external.number().int().min(5).max(1440).default(5).describe("Poll interval in minutes. Floor of 5m matches the agent-config " + "cron rate limit; ceiling of 1440m (24h) is a sanity cap."),
|
|
11300
|
+
apply_on_detect: exports_external.boolean().default(true).describe("When false, hostd logs `release_detected` but does NOT call " + "update_apply / restart all. Useful for dogfooding the detector " + "without rolling the fleet."),
|
|
11301
|
+
image_ref: exports_external.string().default("ghcr.io/switchroom/switchroom-agent:latest").describe("Image reference whose remote digest is compared to the local " + "image digest. Defaults to the agent image's :latest tag, which " + "is the canonical signal that a release has been promoted.")
|
|
11302
|
+
});
|
|
11297
11303
|
HostControlConfigSchema = exports_external.object({
|
|
11298
|
-
enabled: exports_external.boolean().default(true).describe("Whether the host-control daemon is in use. Default: true (since " + "RFC C Phase 2 default-flip — the gateway's /restart, /new, /reset, " + "and /update apply slash-commands all dispatch through hostd, and " + "without it those verbs fail on docker-mode installs because the " + "agent container has no docker binary/socket). " + "When true, the compose generator emits per-agent bind mounts " + "at `~/.switchroom/hostd/<name>/sock` for every admin-flagged " + "agent. Install the daemon with `switchroom hostd install` — " + "it runs as a docker container in its own compose project " + "(`switchroom-hostd`), separate from the agent fleet's compose " + "project so `up -d --remove-orphans` cycles of the fleet " + "can't recreate the daemon mid-RPC. See RFC C §5.1. " + "Set enabled: false only on legacy systemd-mode installs that " + "still rely on the in-container `spawnSwitchroomDetached` " + "shellout (removal is tracked as RFC C Phase 3).")
|
|
11304
|
+
enabled: exports_external.boolean().default(true).describe("Whether the host-control daemon is in use. Default: true (since " + "RFC C Phase 2 default-flip — the gateway's /restart, /new, /reset, " + "and /update apply slash-commands all dispatch through hostd, and " + "without it those verbs fail on docker-mode installs because the " + "agent container has no docker binary/socket). " + "When true, the compose generator emits per-agent bind mounts " + "at `~/.switchroom/hostd/<name>/sock` for every admin-flagged " + "agent. Install the daemon with `switchroom hostd install` — " + "it runs as a docker container in its own compose project " + "(`switchroom-hostd`), separate from the agent fleet's compose " + "project so `up -d --remove-orphans` cycles of the fleet " + "can't recreate the daemon mid-RPC. See RFC C §5.1. " + "Set enabled: false only on legacy systemd-mode installs that " + "still rely on the in-container `spawnSwitchroomDetached` " + "shellout (removal is tracked as RFC C Phase 3)."),
|
|
11305
|
+
auto_release_check: AutoReleaseCheckSchema.default({}).describe("Pull-based release-triggered fleet restart (#1743). hostd polls " + "the remote release tag on a fixed interval and applies + " + "restarts the fleet (graceful) when a new release is detected. " + "Opt-in: default enabled=false.")
|
|
11299
11306
|
});
|
|
11300
11307
|
HostdConfigSchema = exports_external.object({
|
|
11301
11308
|
config_edit_enabled: exports_external.boolean().default(false).describe("Opt-in toggle for the `config_propose_edit` hostd verb (RFC " + "admin-agent-config-edit §3). Default false — the verb returns " + "`E_CONFIG_EDIT_DISABLED` until the operator explicitly flips " + "this to true. When true (and once PR 1c lands the apply path), " + "admin agents can propose unified-diff patches against " + "`/state/config/switchroom.yaml`, gated by an operator approval " + "card in the primary chat. Same trust posture as `update_apply` " + "and `agent_restart`: the human-in-the-loop tap is the security " + "boundary, not the agent's judgement."),
|
package/package.json
CHANGED
|
@@ -108,7 +108,7 @@ If no sub-agents are configured, do the work yourself.
|
|
|
108
108
|
|
|
109
109
|
By default, every restart starts a **fresh `claude` session** — the in-flight transcript is NOT carried over (`session_continuity.resume_mode: handoff`, the default since switchroom #362). Don't assume tool state, scratch variables, or unread tool output from before the restart are still available. What does survive:
|
|
110
110
|
|
|
111
|
-
- **Handoff briefing** — on a clean shutdown, the Stop hook writes a bounded raw transcript tail of the prior session to `.handoff.md`. On boot, start.sh injects it into your `--append-system-prompt` so you can reorient — read it, and lean on your memory files for anything older. If
|
|
111
|
+
- **Handoff briefing** — on a clean shutdown, the Stop hook writes a bounded raw transcript tail of the prior session to `.handoff.md`. On boot, start.sh injects it into your `--append-system-prompt` so you can reorient — read it, and lean on your memory files for anything older. If `.handoff.md` is missing or stale (fresh agent, or pre-Stop-hook crash), `start.sh` runs `handoff-briefing.sh` to assemble `.handoff-briefing.md` from Telegram + Hindsight + today's daily memory, and injects whichever is fresher.
|
|
112
112
|
- **Hindsight memory** — auto-recall fires on every inbound user message and surfaces relevant memories from past sessions. Long-term facts, decisions, and mental models live here, not in the transcript.
|
|
113
113
|
- **Telegram history** — the gateway's SQLite buffer remembers every inbound/outbound message. Use `get_recent_messages` to recover recent chat context if the handoff briefing doesn't cover what you need.
|
|
114
114
|
- **`SWITCHROOM_PENDING_TURN`** — if your previous session was killed mid-turn (watchdog, SIGTERM, timeout), start.sh exports this env var plus the chat/thread/last-user-message context. Acknowledge the interruption and ask for direction rather than silently resuming.
|
|
@@ -23592,7 +23592,7 @@ var init_dist = __esm(() => {
|
|
|
23592
23592
|
});
|
|
23593
23593
|
|
|
23594
23594
|
// ../src/config/schema.ts
|
|
23595
|
-
var CodeRepoEntrySchema, AgentBindMountSchema, ScheduleEntrySchema, AgentSoulSchema, AgentToolsSchema, AgentMemorySchema, HookEntrySchema, AgentHooksSchema, SubagentSchema, SessionSchema, SessionContinuitySchema, TelegramChannelSchema, ChannelsSchema, TIMEZONE_REGEX, ApproverIdSchema, GoogleWorkspaceTierSchema, GoogleWorkspaceConfigSchema, AgentGoogleWorkspaceConfigSchema, ReactionsSchema, ReleaseBlock, NetworkIsolationSchema, profileFields, ProfileSchema, _omitExtends, defaultsFields, AgentDefaultsSchema, AgentSchema, TelegramConfigSchema, MemoryBackendConfigSchema, VaultConfigSchema, QuotaConfigSchema, HostControlConfigSchema, HostdConfigSchema, SwitchroomConfigSchema;
|
|
23595
|
+
var CodeRepoEntrySchema, AgentBindMountSchema, ScheduleEntrySchema, AgentSoulSchema, AgentToolsSchema, AgentMemorySchema, HookEntrySchema, AgentHooksSchema, SubagentSchema, SessionSchema, SessionContinuitySchema, TelegramChannelSchema, ChannelsSchema, TIMEZONE_REGEX, ApproverIdSchema, GoogleWorkspaceTierSchema, GoogleWorkspaceConfigSchema, AgentGoogleWorkspaceConfigSchema, ReactionsSchema, ReleaseBlock, NetworkIsolationSchema, profileFields, ProfileSchema, _omitExtends, defaultsFields, AgentDefaultsSchema, AgentSchema, TelegramConfigSchema, MemoryBackendConfigSchema, VaultConfigSchema, QuotaConfigSchema, AutoReleaseCheckSchema, HostControlConfigSchema, HostdConfigSchema, SwitchroomConfigSchema;
|
|
23596
23596
|
var init_schema = __esm(() => {
|
|
23597
23597
|
init_zod();
|
|
23598
23598
|
CodeRepoEntrySchema = exports_external.object({
|
|
@@ -23938,8 +23938,15 @@ var init_schema = __esm(() => {
|
|
|
23938
23938
|
weekly_budget_usd: exports_external.number().positive().optional().describe("Weekly USD spend budget. If unset, the greeting shows raw usage only."),
|
|
23939
23939
|
monthly_budget_usd: exports_external.number().positive().optional().describe("Monthly USD spend budget. If unset, the greeting shows raw usage only.")
|
|
23940
23940
|
});
|
|
23941
|
+
AutoReleaseCheckSchema = exports_external.object({
|
|
23942
|
+
enabled: exports_external.boolean().default(false).describe("When true, hostd polls the remote release tag every " + "`interval_minutes` and applies + restarts the fleet when a new " + "release is detected. Default false \u2014 opt-in."),
|
|
23943
|
+
interval_minutes: exports_external.number().int().min(5).max(1440).default(5).describe("Poll interval in minutes. Floor of 5m matches the agent-config " + "cron rate limit; ceiling of 1440m (24h) is a sanity cap."),
|
|
23944
|
+
apply_on_detect: exports_external.boolean().default(true).describe("When false, hostd logs `release_detected` but does NOT call " + "update_apply / restart all. Useful for dogfooding the detector " + "without rolling the fleet."),
|
|
23945
|
+
image_ref: exports_external.string().default("ghcr.io/switchroom/switchroom-agent:latest").describe("Image reference whose remote digest is compared to the local " + "image digest. Defaults to the agent image's :latest tag, which " + "is the canonical signal that a release has been promoted.")
|
|
23946
|
+
});
|
|
23941
23947
|
HostControlConfigSchema = exports_external.object({
|
|
23942
|
-
enabled: exports_external.boolean().default(true).describe("Whether the host-control daemon is in use. Default: true (since " + "RFC C Phase 2 default-flip \u2014 the gateway's /restart, /new, /reset, " + "and /update apply slash-commands all dispatch through hostd, and " + "without it those verbs fail on docker-mode installs because the " + "agent container has no docker binary/socket). " + "When true, the compose generator emits per-agent bind mounts " + "at `~/.switchroom/hostd/<name>/sock` for every admin-flagged " + "agent. Install the daemon with `switchroom hostd install` \u2014 " + "it runs as a docker container in its own compose project " + "(`switchroom-hostd`), separate from the agent fleet's compose " + "project so `up -d --remove-orphans` cycles of the fleet " + "can't recreate the daemon mid-RPC. See RFC C \u00a75.1. " + "Set enabled: false only on legacy systemd-mode installs that " + "still rely on the in-container `spawnSwitchroomDetached` " + "shellout (removal is tracked as RFC C Phase 3).")
|
|
23948
|
+
enabled: exports_external.boolean().default(true).describe("Whether the host-control daemon is in use. Default: true (since " + "RFC C Phase 2 default-flip \u2014 the gateway's /restart, /new, /reset, " + "and /update apply slash-commands all dispatch through hostd, and " + "without it those verbs fail on docker-mode installs because the " + "agent container has no docker binary/socket). " + "When true, the compose generator emits per-agent bind mounts " + "at `~/.switchroom/hostd/<name>/sock` for every admin-flagged " + "agent. Install the daemon with `switchroom hostd install` \u2014 " + "it runs as a docker container in its own compose project " + "(`switchroom-hostd`), separate from the agent fleet's compose " + "project so `up -d --remove-orphans` cycles of the fleet " + "can't recreate the daemon mid-RPC. See RFC C \u00a75.1. " + "Set enabled: false only on legacy systemd-mode installs that " + "still rely on the in-container `spawnSwitchroomDetached` " + "shellout (removal is tracked as RFC C Phase 3)."),
|
|
23949
|
+
auto_release_check: AutoReleaseCheckSchema.default({}).describe("Pull-based release-triggered fleet restart (#1743). hostd polls " + "the remote release tag on a fixed interval and applies + " + "restarts the fleet (graceful) when a new release is detected. " + "Opt-in: default enabled=false.")
|
|
23943
23950
|
});
|
|
23944
23951
|
HostdConfigSchema = exports_external.object({
|
|
23945
23952
|
config_edit_enabled: exports_external.boolean().default(false).describe("Opt-in toggle for the `config_propose_edit` hostd verb (RFC " + "admin-agent-config-edit \u00a73). Default false \u2014 the verb returns " + "`E_CONFIG_EDIT_DISABLED` until the operator explicitly flips " + "this to true. When true (and once PR 1c lands the apply path), " + "admin agents can propose unified-diff patches against " + "`/state/config/switchroom.yaml`, gated by an operator approval " + "card in the primary chat. Same trust posture as `update_apply` " + "and `agent_restart`: the human-in-the-loop tap is the security " + "boundary, not the agent's judgement."),
|
|
@@ -43307,6 +43314,45 @@ var RequestSchema3 = exports_external.discriminatedUnion("op", [
|
|
|
43307
43314
|
ConfigProposeEditRequestSchema
|
|
43308
43315
|
]);
|
|
43309
43316
|
var ResultSchema = exports_external.enum(["started", "completed", "denied", "error"]);
|
|
43317
|
+
var ErrorFixSchema = exports_external.discriminatedUnion("kind", [
|
|
43318
|
+
exports_external.object({
|
|
43319
|
+
kind: exports_external.literal("flip_yaml_flag"),
|
|
43320
|
+
yaml_path: exports_external.string(),
|
|
43321
|
+
to: exports_external.unknown()
|
|
43322
|
+
}),
|
|
43323
|
+
exports_external.object({
|
|
43324
|
+
kind: exports_external.literal("request_vault_grant"),
|
|
43325
|
+
vault_key: exports_external.string()
|
|
43326
|
+
}),
|
|
43327
|
+
exports_external.object({
|
|
43328
|
+
kind: exports_external.literal("operator_action"),
|
|
43329
|
+
subkind: exports_external.enum(["policy_denied", "infra", "out_of_scope"]),
|
|
43330
|
+
operator_steps: exports_external.array(exports_external.string()).min(1).optional()
|
|
43331
|
+
}),
|
|
43332
|
+
exports_external.object({
|
|
43333
|
+
kind: exports_external.literal("retry_after"),
|
|
43334
|
+
retry_at: exports_external.string()
|
|
43335
|
+
}),
|
|
43336
|
+
exports_external.object({
|
|
43337
|
+
kind: exports_external.literal("quota_exceeded"),
|
|
43338
|
+
quota: exports_external.string(),
|
|
43339
|
+
current: exports_external.number(),
|
|
43340
|
+
limit: exports_external.number()
|
|
43341
|
+
}),
|
|
43342
|
+
exports_external.object({
|
|
43343
|
+
kind: exports_external.literal("bad_input"),
|
|
43344
|
+
field: exports_external.string().optional()
|
|
43345
|
+
})
|
|
43346
|
+
]);
|
|
43347
|
+
var ErrorEnvelopeSchema = exports_external.object({
|
|
43348
|
+
v: exports_external.literal(1),
|
|
43349
|
+
code: exports_external.string().regex(/^(E_[A-Z0-9_]+|VAULT-[A-Z-]+)$/),
|
|
43350
|
+
human: exports_external.string().min(1),
|
|
43351
|
+
why: exports_external.string().optional(),
|
|
43352
|
+
fix: ErrorFixSchema.optional(),
|
|
43353
|
+
docs: exports_external.string().url().optional(),
|
|
43354
|
+
request_id: exports_external.string().min(1)
|
|
43355
|
+
});
|
|
43310
43356
|
var ResponseEnvelope = {
|
|
43311
43357
|
v: exports_external.literal(1),
|
|
43312
43358
|
request_id: exports_external.string().min(1).max(128),
|
|
@@ -43316,7 +43362,8 @@ var ResponseEnvelope = {
|
|
|
43316
43362
|
audit_id: exports_external.string().min(1).optional(),
|
|
43317
43363
|
stdout_tail: exports_external.string().optional(),
|
|
43318
43364
|
stderr_tail: exports_external.string().optional(),
|
|
43319
|
-
error: exports_external.string().optional()
|
|
43365
|
+
error: exports_external.string().optional(),
|
|
43366
|
+
error_envelope: ErrorEnvelopeSchema.optional()
|
|
43320
43367
|
};
|
|
43321
43368
|
var ResponseSchema3 = exports_external.object(ResponseEnvelope);
|
|
43322
43369
|
function encodeRequest3(req) {
|
|
@@ -45787,6 +45834,11 @@ function readCleanShutdownMarker(path) {
|
|
|
45787
45834
|
return null;
|
|
45788
45835
|
}
|
|
45789
45836
|
}
|
|
45837
|
+
function clearCleanShutdownMarker(path) {
|
|
45838
|
+
try {
|
|
45839
|
+
unlinkSync8(path);
|
|
45840
|
+
} catch {}
|
|
45841
|
+
}
|
|
45790
45842
|
function shouldSuppressRecoveryBanner(marker, now, maxAgeMs = DEFAULT_MAX_AGE_MS) {
|
|
45791
45843
|
if (marker === null)
|
|
45792
45844
|
return false;
|
|
@@ -48504,10 +48556,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
48504
48556
|
}
|
|
48505
48557
|
|
|
48506
48558
|
// ../src/build-info.ts
|
|
48507
|
-
var VERSION = "0.13.
|
|
48508
|
-
var COMMIT_SHA = "
|
|
48509
|
-
var COMMIT_DATE = "2026-05-
|
|
48510
|
-
var LATEST_PR =
|
|
48559
|
+
var VERSION = "0.13.35";
|
|
48560
|
+
var COMMIT_SHA = "c41aabe5";
|
|
48561
|
+
var COMMIT_DATE = "2026-05-25T01:43:28Z";
|
|
48562
|
+
var LATEST_PR = 1765;
|
|
48511
48563
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
48512
48564
|
|
|
48513
48565
|
// gateway/boot-version.ts
|
|
@@ -48555,9 +48607,17 @@ function composeBootVersionString(inputs) {
|
|
|
48555
48607
|
var import_grammy6 = __toESM(require_mod2(), 1);
|
|
48556
48608
|
function classifyRejection(err, opts = {}) {
|
|
48557
48609
|
const isGrammy = opts.isGrammyError != null ? opts.isGrammyError(err) : err instanceof import_grammy6.GrammyError;
|
|
48610
|
+
const isHttp = opts.isHttpError != null ? opts.isHttpError(err) : err instanceof import_grammy6.HttpError;
|
|
48611
|
+
if (isHttp)
|
|
48612
|
+
return "log_only";
|
|
48558
48613
|
if (!isGrammy)
|
|
48559
48614
|
return "shutdown";
|
|
48560
48615
|
const e = err;
|
|
48616
|
+
if (e.error_code === 429)
|
|
48617
|
+
return "log_only";
|
|
48618
|
+
if (typeof e.error_code === "number" && e.error_code >= 500 && e.error_code < 600) {
|
|
48619
|
+
return "log_only";
|
|
48620
|
+
}
|
|
48561
48621
|
if (e.error_code !== 400)
|
|
48562
48622
|
return "shutdown";
|
|
48563
48623
|
const desc = (e.description ?? "").toLowerCase();
|
|
@@ -51122,7 +51182,9 @@ ${url}`;
|
|
|
51122
51182
|
});
|
|
51123
51183
|
noteOutbound(statusKey(chat_id, threadId), Date.now());
|
|
51124
51184
|
noteOutbound2(statusKey(chat_id, threadId), Date.now());
|
|
51125
|
-
|
|
51185
|
+
if (isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
51186
|
+
clearSilentEndState(statusKey(chat_id, threadId));
|
|
51187
|
+
}
|
|
51126
51188
|
if (previewMessageId != null && reply_to != null && replyMode !== "off") {
|
|
51127
51189
|
await deleteStalePreview(previewMessageId);
|
|
51128
51190
|
previewMessageId = null;
|
|
@@ -51445,7 +51507,13 @@ async function executeStreamReply(args) {
|
|
|
51445
51507
|
const sKey = statusKey(streamChatId, streamThreadId);
|
|
51446
51508
|
noteOutbound(sKey, Date.now());
|
|
51447
51509
|
noteOutbound2(sKey, Date.now());
|
|
51448
|
-
|
|
51510
|
+
if (isFinalAnswerReply({
|
|
51511
|
+
text: args.text ?? "",
|
|
51512
|
+
disableNotification: args.disable_notification === true,
|
|
51513
|
+
done: args.done === true
|
|
51514
|
+
})) {
|
|
51515
|
+
clearSilentEndState(sKey);
|
|
51516
|
+
}
|
|
51449
51517
|
}
|
|
51450
51518
|
}
|
|
51451
51519
|
const result = await handleStreamReply({
|
|
@@ -51521,6 +51589,8 @@ async function executeStreamReply(args) {
|
|
|
51521
51589
|
done: args.done === true
|
|
51522
51590
|
})) {
|
|
51523
51591
|
turn.finalAnswerDelivered = true;
|
|
51592
|
+
const streamThreadIdForClear = args.message_thread_id != null ? Number(args.message_thread_id) : undefined;
|
|
51593
|
+
clearSilentEndState(statusKey(streamChatId, streamThreadIdForClear));
|
|
51524
51594
|
}
|
|
51525
51595
|
{
|
|
51526
51596
|
const sChat = args.chat_id;
|
|
@@ -57917,6 +57987,7 @@ var didOneTimeSetup = false;
|
|
|
57917
57987
|
process.stderr.write(`telegram gateway: boot.clean_shutdown_marker_stale age=${ageSec}s signal=${cleanMarker.signal}${reasonTag}
|
|
57918
57988
|
`);
|
|
57919
57989
|
}
|
|
57990
|
+
clearCleanShutdownMarker(GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH);
|
|
57920
57991
|
}
|
|
57921
57992
|
if (marker) {
|
|
57922
57993
|
const ageMs = nowMs2 - marker.ts;
|
|
@@ -247,3 +247,43 @@ For posterity:
|
|
|
247
247
|
- **v2 rewrite** (this PR series, also numbered #553): same Phase 3
|
|
248
248
|
harness, plus three v2 helpers; new spec test file pins the
|
|
249
249
|
three-class contract.
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
## Silent-end contract (#1122 / #1161 / #1664 / #1741 / #1744)
|
|
253
|
+
|
|
254
|
+
The "silent-end" safety net catches turns that end without the agent delivering a final answer via `reply` / `stream_reply`. The hook re-prompts once; on the second consecutive silent end it gives up and sends a fallback message so the turn never just vanishes.
|
|
255
|
+
|
|
256
|
+
### What it is
|
|
257
|
+
|
|
258
|
+
A per-agent state file (`$TELEGRAM_STATE_DIR/silent-end-pending.json`, fallback `~/.claude/channels/telegram/silent-end-pending.json`) acts as a one-bit handshake between the gateway and the Stop hook (`telegram-plugin/hooks/silent-end-interrupt-stop.mjs`). When the file exists, the Stop hook blocks the session stop and injects a re-prompt; when absent, the stop is allowed.
|
|
259
|
+
|
|
260
|
+
### When the file is WRITTEN
|
|
261
|
+
|
|
262
|
+
The gateway writes the file at `turn_end` if and only if `turn.finalAnswerDelivered === false` — i.e. no `reply` or `stream_reply` call during this turn passed the `isFinalAnswerReply` predicate. See `gateway.ts` around L7267 (`recordUndeliveredTurnEnd`). The write is idempotent across re-prompt rounds: the same turnKey inherits the prior `retryCount`; a different turnKey resets to 0.
|
|
263
|
+
|
|
264
|
+
### When the file is CLEARED
|
|
265
|
+
|
|
266
|
+
The gateway calls `clearSilentEndState(turnKey)` on every reply that qualifies as the final answer. Three call sites:
|
|
267
|
+
|
|
268
|
+
1. `executeReply` (gateway.ts L4599-4611) — fires on every `reply` tool call. Clears iff `isFinalAnswerReply` returns true.
|
|
269
|
+
2. `executeStreamReply` first-emit branch (gateway.ts L5172-5195) — fires only on the FIRST emit per stream (gated by `!activeDraftStreams.has(sKey)`). Clears iff that first emit qualifies as final.
|
|
270
|
+
3. `executeStreamReply` final-answer site (gateway.ts L5335-5358, added in #1744 follow-up) — fires on every emit that qualifies as final, regardless of whether it is the first emit. This is the load-bearing site for streams whose first emit was ack-shaped but whose later emit (typically `done=true`) carries the real answer. Without site 3, such streams leak the state file.
|
|
271
|
+
|
|
272
|
+
The clear is fail-silent and turnKey-keyed: a clear for turnKey A does NOT unlink a state file written for turnKey B (see `silent-end.ts clearSilentEndState`). This makes calling it unconditionally on every final-answer emit safe.
|
|
273
|
+
|
|
274
|
+
### The gate predicate
|
|
275
|
+
|
|
276
|
+
A reply qualifies as the final answer when `isFinalAnswerReply` returns true. The predicate (in `final-answer-detect.ts`) is a logical OR of three signals:
|
|
277
|
+
|
|
278
|
+
- `done === true` (stream_reply terminal call), OR
|
|
279
|
+
- `disableNotification === false` (pacing-contract final-answer signal), OR
|
|
280
|
+
- `text.length >= 200` (length backstop for substantive replies mis-marked as interim).
|
|
281
|
+
|
|
282
|
+
A reply with `disable_notification: true`, short text, and no `done` is an interim ack — it never clears the state.
|
|
283
|
+
|
|
284
|
+
### Send sites and tests
|
|
285
|
+
|
|
286
|
+
- `executeReply` (gateway.ts:4340) — single send site, single clear (site 1 above).
|
|
287
|
+
- `executeStreamReply` (gateway.ts:5089) — two clear sites (sites 2 and 3 above) to cover both the first-emit and the later-emit final-answer paths.
|
|
288
|
+
- Unit coverage: `tests/silent-end.test.ts` (`#1741` block) — the executeReply gate as a unit.
|
|
289
|
+
- Integration coverage: `tests/silent-end-integration.test.ts` (#1744) — the stream first-emit-vs-later-emit branching, with the ack-then-final regression case pinned.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Render a one-tap unlock card for hostd error_envelopes that carry a
|
|
3
|
+
* `flip_yaml_flag` fix (#1758 Phase 1).
|
|
4
|
+
*
|
|
5
|
+
* CRITICAL safety: the `yaml_path` MUST be on the
|
|
6
|
+
* `UNLOCK_CARD_YAML_ALLOWLIST` exported from
|
|
7
|
+
* `src/host-control/config-edit-validator.ts`. A malformed or hostile
|
|
8
|
+
* envelope from any backend could otherwise nudge the operator into
|
|
9
|
+
* one-tap-approving an arbitrary flag flip. Non-allowlisted paths fall
|
|
10
|
+
* back to plain-text rendering (the caller surfaces `resp.error` as
|
|
11
|
+
* today).
|
|
12
|
+
*
|
|
13
|
+
* Phase 1 scope: ONLY `flip_yaml_flag`. `request_vault_grant` is
|
|
14
|
+
* explicitly deferred to a later phase (still plain-text rendered).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import type { HostdResponse } from "../../src/host-control/protocol.js";
|
|
18
|
+
import { isAllowlistedYamlPath } from "../../src/host-control/config-edit-validator.js";
|
|
19
|
+
import {
|
|
20
|
+
buildApprovalCard,
|
|
21
|
+
type BuiltApprovalCard,
|
|
22
|
+
} from "./approval-card.js";
|
|
23
|
+
|
|
24
|
+
export type UnlockCardOutcome =
|
|
25
|
+
| { kind: "card"; card: BuiltApprovalCard; yaml_path: string; to: unknown }
|
|
26
|
+
| { kind: "plain-text" };
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Decide whether to render a one-tap unlock card for the given
|
|
30
|
+
* response. Returns `{kind: "plain-text"}` whenever the envelope
|
|
31
|
+
* lacks a `flip_yaml_flag` fix OR the path isn't on the allowlist.
|
|
32
|
+
*
|
|
33
|
+
* `approvalRequestId` is the 32-hex nonce minted by the approval
|
|
34
|
+
* kernel; caller is responsible for binding the card to that nonce
|
|
35
|
+
* and recording the apply-on-tap intent.
|
|
36
|
+
*/
|
|
37
|
+
export function renderErrorEnvelopeCard(
|
|
38
|
+
resp: HostdResponse,
|
|
39
|
+
agentName: string,
|
|
40
|
+
approvalRequestId: string,
|
|
41
|
+
): UnlockCardOutcome {
|
|
42
|
+
const env = resp.error_envelope;
|
|
43
|
+
if (!env || !env.fix) return { kind: "plain-text" };
|
|
44
|
+
if (env.fix.kind !== "flip_yaml_flag") {
|
|
45
|
+
// request_vault_grant is Phase-2 work; everything else has no
|
|
46
|
+
// unlock-card UX. Caller falls back to plain-text rendering.
|
|
47
|
+
return { kind: "plain-text" };
|
|
48
|
+
}
|
|
49
|
+
const { yaml_path, to } = env.fix;
|
|
50
|
+
if (!isAllowlistedYamlPath(yaml_path)) {
|
|
51
|
+
// Defense-in-depth: never render a one-tap card for a path the
|
|
52
|
+
// operator hasn't explicitly opted into.
|
|
53
|
+
return { kind: "plain-text" };
|
|
54
|
+
}
|
|
55
|
+
const card = buildApprovalCard({
|
|
56
|
+
request_id: approvalRequestId,
|
|
57
|
+
agent: agentName,
|
|
58
|
+
scope_humanized: `flip ${yaml_path} → ${JSON.stringify(to)}`,
|
|
59
|
+
why: env.human + (env.why ? ` — ${env.why}` : ""),
|
|
60
|
+
offer_always: false,
|
|
61
|
+
offer_ttl: false,
|
|
62
|
+
});
|
|
63
|
+
return { kind: "card", card, yaml_path, to };
|
|
64
|
+
}
|
|
@@ -319,12 +319,16 @@ import {
|
|
|
319
319
|
import {
|
|
320
320
|
writeCleanShutdownMarker,
|
|
321
321
|
readCleanShutdownMarker,
|
|
322
|
-
//
|
|
323
|
-
// the marker is
|
|
324
|
-
//
|
|
325
|
-
//
|
|
326
|
-
//
|
|
327
|
-
//
|
|
322
|
+
// 2026-05-25 — clearCleanShutdownMarker IS imported and called on every
|
|
323
|
+
// boot after the marker is read. The earlier "intentionally NOT imported"
|
|
324
|
+
// comment was wrong: it assumed every shutdown writes a fresh marker, but
|
|
325
|
+
// unhandledRejection / uncaughtException paths explicitly SKIP the write
|
|
326
|
+
// (gateway.ts:15107 — "crash path"). A marker from a prior graceful
|
|
327
|
+
// shutdown then sits on disk for hours and triggers a misleading stale-
|
|
328
|
+
// marker crash banner on the next boot after an unhandled rejection.
|
|
329
|
+
// Clearing on boot collapses the marker to "describes the immediately
|
|
330
|
+
// preceding shutdown only" semantics.
|
|
331
|
+
clearCleanShutdownMarker,
|
|
328
332
|
shouldSuppressRecoveryBanner,
|
|
329
333
|
resolveShutdownMarker,
|
|
330
334
|
DEFAULT_MAX_AGE_MS as CLEAN_SHUTDOWN_MAX_AGE_MS,
|
|
@@ -4594,12 +4598,21 @@ async function executeReply(args: Record<string, unknown>): Promise<{ content: A
|
|
|
4594
4598
|
// #1122 KPI: a `reply` always produces a fresh user-visible outbound
|
|
4595
4599
|
// message — count it for the outbound-gap / TTFO KPI AND reset the
|
|
4596
4600
|
// silence-poke clock so the next poke is measured from this send.
|
|
4597
|
-
// Also clear any silent-end state file so the Stop hook doesn't fire
|
|
4598
|
-
// a stale block when the session ends (deterministic restore of the
|
|
4599
|
-
// detection PR3 inadvertently removed).
|
|
4600
4601
|
signalTracker.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
4601
4602
|
silencePoke.noteOutbound(statusKey(chat_id, threadId), Date.now())
|
|
4602
|
-
|
|
4603
|
+
// #1741 — only clear silent-end state on a plausibly-final reply.
|
|
4604
|
+
// An interim ack (disable_notification:true, short text, no done)
|
|
4605
|
+
// must NOT clear the state file; otherwise a turn that ends with
|
|
4606
|
+
// ack-only + answer-as-transcript leaves no state for the Stop
|
|
4607
|
+
// hook to act on if `turn_end` never lands (the `turn_duration`
|
|
4608
|
+
// system event is unreliable for trivial-prompt turns — see the
|
|
4609
|
+
// executeReply finalize comments). Final-answer replies still
|
|
4610
|
+
// clear; the main turn-end path also re-writes the state when
|
|
4611
|
+
// finalAnswerDelivered=false, so this is a belt-and-braces gate
|
|
4612
|
+
// for the turn_end-missing case (#1741).
|
|
4613
|
+
if (isFinalAnswerReply({ text: rawText, disableNotification })) {
|
|
4614
|
+
clearSilentEndState(statusKey(chat_id, threadId))
|
|
4615
|
+
}
|
|
4603
4616
|
|
|
4604
4617
|
if (previewMessageId != null && reply_to != null && replyMode !== 'off') {
|
|
4605
4618
|
await deleteStalePreview(previewMessageId)
|
|
@@ -5170,7 +5183,19 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
5170
5183
|
const sKey = statusKey(streamChatId, streamThreadId)
|
|
5171
5184
|
signalTracker.noteOutbound(sKey, Date.now())
|
|
5172
5185
|
silencePoke.noteOutbound(sKey, Date.now())
|
|
5173
|
-
|
|
5186
|
+
// #1741 — see executeReply for the rationale: only a plausibly-
|
|
5187
|
+
// final stream_reply clears the silent-end state. An interim
|
|
5188
|
+
// ack via stream_reply must NOT clear; the Stop hook needs
|
|
5189
|
+
// the state to persist if turn_end fails to land.
|
|
5190
|
+
if (
|
|
5191
|
+
isFinalAnswerReply({
|
|
5192
|
+
text: (args.text as string | undefined) ?? '',
|
|
5193
|
+
disableNotification: args.disable_notification === true,
|
|
5194
|
+
done: args.done === true,
|
|
5195
|
+
})
|
|
5196
|
+
) {
|
|
5197
|
+
clearSilentEndState(sKey)
|
|
5198
|
+
}
|
|
5174
5199
|
}
|
|
5175
5200
|
}
|
|
5176
5201
|
|
|
@@ -5320,6 +5345,20 @@ async function executeStreamReply(args: Record<string, unknown>): Promise<unknow
|
|
|
5320
5345
|
})
|
|
5321
5346
|
) {
|
|
5322
5347
|
turn.finalAnswerDelivered = true
|
|
5348
|
+
// #1744 follow-up — stream_reply edge case. The first-emit gate at
|
|
5349
|
+
// L5178 only clears silent-end state on the FIRST emit of a stream.
|
|
5350
|
+
// If a stream's first emit was ack-shaped (disable_notification:true,
|
|
5351
|
+
// short text, no done) it correctly did NOT clear the state. But a
|
|
5352
|
+
// LATER emit in the same stream may flip `done=true` or carry
|
|
5353
|
+
// substantive text — that's the real final answer landing, and the
|
|
5354
|
+
// state file must be cleared here too. clearSilentEndState is
|
|
5355
|
+
// idempotent (no-op when the file is absent or the turnKey doesn't
|
|
5356
|
+
// match), so calling it unconditionally on every final-answer-shaped
|
|
5357
|
+
// emit is safe even if the first-emit path already cleared.
|
|
5358
|
+
const streamThreadIdForClear = args.message_thread_id != null
|
|
5359
|
+
? Number(args.message_thread_id)
|
|
5360
|
+
: undefined
|
|
5361
|
+
clearSilentEndState(statusKey(streamChatId, streamThreadIdForClear))
|
|
5323
5362
|
}
|
|
5324
5363
|
// v0.13.30 follow-up — release the buffer gate on every successful
|
|
5325
5364
|
// stream_reply too. Same rationale as executeReply: short replies
|
|
@@ -15618,10 +15657,26 @@ void (async () => {
|
|
|
15618
15657
|
} else {
|
|
15619
15658
|
process.stderr.write(`telegram gateway: boot.clean_shutdown_marker_stale age=${ageSec}s signal=${cleanMarker.signal}${reasonTag}\n`)
|
|
15620
15659
|
}
|
|
15621
|
-
//
|
|
15622
|
-
//
|
|
15623
|
-
//
|
|
15624
|
-
//
|
|
15660
|
+
// 2026-05-25 — clear the marker after this boot has read it.
|
|
15661
|
+
// Pre-fix the comment here claimed the file was "self-
|
|
15662
|
+
// overwriting, age-gated, harmless to leave on disk" — that's
|
|
15663
|
+
// true ONLY for the cycle where every shutdown writes a fresh
|
|
15664
|
+
// marker. The unhandledRejection / uncaughtException paths
|
|
15665
|
+
// explicitly SKIP writing (gateway.ts:15107 — the "crash path")
|
|
15666
|
+
// so a marker from an earlier graceful shutdown sits on disk
|
|
15667
|
+
// for hours, then on the next boot looks stale-by-age and
|
|
15668
|
+
// fires a misleading agent-crashed banner with detail
|
|
15669
|
+
// `clean-shutdown marker stale age=39976s` (clerk 2026-05-25
|
|
15670
|
+
// 01:11). Clearing now means the marker only ever describes
|
|
15671
|
+
// the IMMEDIATELY PRECEDING shutdown, not "some shutdown in
|
|
15672
|
+
// the past". After this clear: a subsequent crash with no
|
|
15673
|
+
// marker write = no marker file = correctly classified
|
|
15674
|
+
// 'crash' via the sessionMarker fallback (boot-reason.ts:84);
|
|
15675
|
+
// a graceful shutdown writes a fresh marker that the next
|
|
15676
|
+
// boot reads + clears. The historical session-greeting.sh
|
|
15677
|
+
// ownership the old comment referred to is gone since #142
|
|
15678
|
+
// but the GC step was never re-homed — this is it.
|
|
15679
|
+
clearCleanShutdownMarker(GATEWAY_CLEAN_SHUTDOWN_MARKER_PATH)
|
|
15625
15680
|
}
|
|
15626
15681
|
|
|
15627
15682
|
if (marker) {
|
|
@@ -10,13 +10,15 @@
|
|
|
10
10
|
* Pure helper so it can be tested without spinning up the gateway.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import { GrammyError } from 'grammy'
|
|
13
|
+
import { GrammyError, HttpError } from 'grammy'
|
|
14
14
|
|
|
15
15
|
export type RejectionAction = 'shutdown' | 'log_only'
|
|
16
16
|
|
|
17
17
|
export interface RejectionPolicyOptions {
|
|
18
18
|
/** Allow tests to inject error type detection without depending on grammy. */
|
|
19
19
|
isGrammyError?: (err: unknown) => boolean
|
|
20
|
+
/** Allow tests to inject HttpError detection without depending on grammy. */
|
|
21
|
+
isHttpError?: (err: unknown) => boolean
|
|
20
22
|
}
|
|
21
23
|
|
|
22
24
|
/**
|
|
@@ -42,9 +44,52 @@ export function classifyRejection(
|
|
|
42
44
|
? opts.isGrammyError(err)
|
|
43
45
|
: err instanceof GrammyError
|
|
44
46
|
|
|
47
|
+
// Transient network-layer failures: grammy throws an `HttpError` wrapping
|
|
48
|
+
// the underlying fetch failure (ECONNRESET, ETIMEDOUT, fetch failed, DNS
|
|
49
|
+
// failures, etc.). These are the SAME class `retry-api-call.ts:146-162`
|
|
50
|
+
// already retries with exponential backoff — if one leaks past the retry
|
|
51
|
+
// policy (3 attempts exhausted, or a fire-and-forget callsite without
|
|
52
|
+
// robustApiCall wrapping), crashing the gateway turns one bad packet into
|
|
53
|
+
// a crash banner. log_only is the right posture: the request failed, the
|
|
54
|
+
// user-visible UX recovers on the next retry cycle, and a daemon that
|
|
55
|
+
// crashes on network errors isn't always-on.
|
|
56
|
+
//
|
|
57
|
+
// Surfaced 2026-05-25 on clerk via the boot-card sendMessage path: an
|
|
58
|
+
// HttpError leaked past the boot-card's try/catch (the async post-settle
|
|
59
|
+
// probe-loop IIFE at boot-card.ts:616 had no .catch on its outer void),
|
|
60
|
+
// triggering an unhandledRejection → shutdown → user-visible
|
|
61
|
+
// "agent-crashed" banner for what was really just a transient network hiccup.
|
|
62
|
+
const isHttp =
|
|
63
|
+
opts.isHttpError != null
|
|
64
|
+
? opts.isHttpError(err)
|
|
65
|
+
: err instanceof HttpError
|
|
66
|
+
if (isHttp) return 'log_only'
|
|
67
|
+
|
|
45
68
|
if (!isGrammy) return 'shutdown'
|
|
46
69
|
|
|
47
70
|
const e = err as { error_code?: number; description?: string }
|
|
71
|
+
|
|
72
|
+
// 429 (Too Many Requests / flood-wait): grammy's flood-wait response.
|
|
73
|
+
// Already handled in retry-api-call.ts:100-108 with the
|
|
74
|
+
// `parameters.retry_after` backoff. If one leaks past — caller exceeded
|
|
75
|
+
// maxRetries=3 of sustained 429s, or didn't wrap in robustApiCall — the
|
|
76
|
+
// right posture is log_only (matches the HttpError rationale above).
|
|
77
|
+
// The bot is rate-limited; crashing makes it worse (boot fires more
|
|
78
|
+
// API calls that hit fresh 429s).
|
|
79
|
+
//
|
|
80
|
+
// Surfaced 2026-05-25 on clerk via a sendMessage that exceeded the 3-
|
|
81
|
+
// attempt retry budget; the rejection bubbled to this handler, triggered
|
|
82
|
+
// shutdown, and posted an "agent-crashed" operator-event banner.
|
|
83
|
+
if (e.error_code === 429) return 'log_only'
|
|
84
|
+
|
|
85
|
+
// 5xx (Bad Gateway / Service Unavailable / Gateway Timeout): Telegram
|
|
86
|
+
// intermittently returns these during their own load events. Same
|
|
87
|
+
// posture as 429 — retry policy already backs off and re-tries; if
|
|
88
|
+
// one leaks past, log don't crash.
|
|
89
|
+
if (typeof e.error_code === 'number' && e.error_code >= 500 && e.error_code < 600) {
|
|
90
|
+
return 'log_only'
|
|
91
|
+
}
|
|
92
|
+
|
|
48
93
|
if (e.error_code !== 400) return 'shutdown'
|
|
49
94
|
|
|
50
95
|
const desc = (e.description ?? '').toLowerCase()
|