typeclaw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +134 -0
- package/auth.schema.json +63 -0
- package/cron.schema.json +96 -0
- package/package.json +72 -0
- package/scripts/emit-base-dockerfile.ts +5 -0
- package/scripts/generate-schema.ts +34 -0
- package/secrets.schema.json +63 -0
- package/src/agent/auth.ts +119 -0
- package/src/agent/compaction.ts +35 -0
- package/src/agent/git-nudge.ts +95 -0
- package/src/agent/index.ts +451 -0
- package/src/agent/plugin-tools.ts +269 -0
- package/src/agent/reload-tool.ts +71 -0
- package/src/agent/self.ts +45 -0
- package/src/agent/session-origin.ts +288 -0
- package/src/agent/subagents.ts +253 -0
- package/src/agent/system-prompt.ts +68 -0
- package/src/agent/tools/channel-fetch-attachment.ts +118 -0
- package/src/agent/tools/channel-history.ts +119 -0
- package/src/agent/tools/channel-reply.ts +182 -0
- package/src/agent/tools/channel-send.ts +212 -0
- package/src/agent/tools/ddg.ts +218 -0
- package/src/agent/tools/restart.ts +122 -0
- package/src/agent/tools/stream-snapshot.ts +181 -0
- package/src/agent/tools/webfetch/fetch.ts +102 -0
- package/src/agent/tools/webfetch/index.ts +1 -0
- package/src/agent/tools/webfetch/strategies/grep.ts +70 -0
- package/src/agent/tools/webfetch/strategies/jq.ts +31 -0
- package/src/agent/tools/webfetch/strategies/raw.ts +3 -0
- package/src/agent/tools/webfetch/strategies/readability.ts +30 -0
- package/src/agent/tools/webfetch/strategies/selector.ts +41 -0
- package/src/agent/tools/webfetch/strategies/snapshot.ts +135 -0
- package/src/agent/tools/webfetch/tool.ts +281 -0
- package/src/agent/tools/webfetch/types.ts +33 -0
- package/src/agent/tools/websearch.ts +96 -0
- package/src/agent/tools/wikipedia.ts +52 -0
- package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +170 -0
- package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +421 -0
- package/src/bundled-plugins/agent-browser/index.ts +179 -0
- package/src/bundled-plugins/agent-browser/shim-install.ts +158 -0
- package/src/bundled-plugins/agent-browser/shim.ts +152 -0
- package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +113 -0
- package/src/bundled-plugins/guard/index.ts +26 -0
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +98 -0
- package/src/bundled-plugins/guard/policies/skill-authoring.ts +185 -0
- package/src/bundled-plugins/guard/policies/uncommitted-changes.ts +85 -0
- package/src/bundled-plugins/guard/policy.ts +18 -0
- package/src/bundled-plugins/memory/README.md +71 -0
- package/src/bundled-plugins/memory/append-tool.ts +84 -0
- package/src/bundled-plugins/memory/dreaming-state.ts +86 -0
- package/src/bundled-plugins/memory/dreaming.ts +470 -0
- package/src/bundled-plugins/memory/fragment-parser.ts +67 -0
- package/src/bundled-plugins/memory/index.ts +238 -0
- package/src/bundled-plugins/memory/load-memory.ts +122 -0
- package/src/bundled-plugins/memory/memory-logger.ts +257 -0
- package/src/bundled-plugins/memory/secret-detector.ts +49 -0
- package/src/bundled-plugins/memory/watermark.ts +15 -0
- package/src/bundled-plugins/security/index.ts +35 -0
- package/src/bundled-plugins/security/policies/git-exfil.ts +120 -0
- package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +167 -0
- package/src/bundled-plugins/security/policies/prompt-injection.ts +488 -0
- package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +99 -0
- package/src/bundled-plugins/security/policies/secret-exfil-read.ts +127 -0
- package/src/bundled-plugins/security/policies/session-search-secrets.ts +86 -0
- package/src/bundled-plugins/security/policies/ssrf.ts +196 -0
- package/src/bundled-plugins/security/policies/system-prompt-leak.ts +81 -0
- package/src/bundled-plugins/security/policy.ts +9 -0
- package/src/channels/adapters/discord-bot-channel-resolver.ts +77 -0
- package/src/channels/adapters/discord-bot-classify.ts +148 -0
- package/src/channels/adapters/discord-bot.ts +640 -0
- package/src/channels/adapters/kakaotalk-author-resolver.ts +78 -0
- package/src/channels/adapters/kakaotalk-channel-resolver.ts +105 -0
- package/src/channels/adapters/kakaotalk-classify.ts +77 -0
- package/src/channels/adapters/kakaotalk.ts +622 -0
- package/src/channels/adapters/slack-bot-author-resolver.ts +80 -0
- package/src/channels/adapters/slack-bot-channel-resolver.ts +84 -0
- package/src/channels/adapters/slack-bot-classify.ts +213 -0
- package/src/channels/adapters/slack-bot-dedupe.ts +51 -0
- package/src/channels/adapters/slack-bot-time.ts +10 -0
- package/src/channels/adapters/slack-bot.ts +881 -0
- package/src/channels/adapters/telegram-bot-classify.ts +155 -0
- package/src/channels/adapters/telegram-bot-format.ts +309 -0
- package/src/channels/adapters/telegram-bot.ts +604 -0
- package/src/channels/engagement.ts +227 -0
- package/src/channels/index.ts +21 -0
- package/src/channels/manager.ts +292 -0
- package/src/channels/membership-cache.ts +116 -0
- package/src/channels/membership-from-history.ts +53 -0
- package/src/channels/membership.ts +30 -0
- package/src/channels/participants.ts +47 -0
- package/src/channels/persistence.ts +209 -0
- package/src/channels/reloadable.ts +28 -0
- package/src/channels/router.ts +1570 -0
- package/src/channels/schema.ts +273 -0
- package/src/channels/types.ts +160 -0
- package/src/cli/channel.ts +403 -0
- package/src/cli/compose-status.ts +95 -0
- package/src/cli/compose.ts +240 -0
- package/src/cli/hostd.ts +163 -0
- package/src/cli/index.ts +27 -0
- package/src/cli/init.ts +592 -0
- package/src/cli/logs.ts +38 -0
- package/src/cli/reload.ts +68 -0
- package/src/cli/restart.ts +66 -0
- package/src/cli/run.ts +77 -0
- package/src/cli/shell.ts +33 -0
- package/src/cli/start.ts +57 -0
- package/src/cli/status.ts +178 -0
- package/src/cli/stop.ts +31 -0
- package/src/cli/tui.ts +35 -0
- package/src/cli/ui.ts +110 -0
- package/src/commands/index.ts +74 -0
- package/src/compose/discover.ts +43 -0
- package/src/compose/index.ts +25 -0
- package/src/compose/logs.ts +162 -0
- package/src/compose/restart.ts +69 -0
- package/src/compose/start.ts +62 -0
- package/src/compose/status.ts +28 -0
- package/src/compose/stop.ts +43 -0
- package/src/config/config.ts +424 -0
- package/src/config/index.ts +25 -0
- package/src/config/providers.ts +234 -0
- package/src/config/reloadable.ts +47 -0
- package/src/container/index.ts +27 -0
- package/src/container/logs.ts +37 -0
- package/src/container/port.ts +137 -0
- package/src/container/shared.ts +290 -0
- package/src/container/shell.ts +58 -0
- package/src/container/start.ts +670 -0
- package/src/container/status.ts +76 -0
- package/src/container/stop.ts +120 -0
- package/src/container/verify-running.ts +149 -0
- package/src/cron/consumer.ts +138 -0
- package/src/cron/index.ts +54 -0
- package/src/cron/reloadable.ts +64 -0
- package/src/cron/scheduler.ts +200 -0
- package/src/cron/schema.ts +96 -0
- package/src/hostd/client.ts +113 -0
- package/src/hostd/daemon.ts +587 -0
- package/src/hostd/index.ts +25 -0
- package/src/hostd/paths.ts +82 -0
- package/src/hostd/portbroker-manager.ts +101 -0
- package/src/hostd/protocol.ts +48 -0
- package/src/hostd/spawn.ts +224 -0
- package/src/hostd/supervisor.ts +60 -0
- package/src/hostd/tailscale.ts +172 -0
- package/src/hostd/version.ts +115 -0
- package/src/init/dockerfile.ts +327 -0
- package/src/init/ensure-deps.ts +152 -0
- package/src/init/gitignore.ts +46 -0
- package/src/init/hatching.ts +60 -0
- package/src/init/index.ts +786 -0
- package/src/init/kakaotalk-auth.ts +114 -0
- package/src/init/models-dev.ts +130 -0
- package/src/init/oauth-login.ts +74 -0
- package/src/init/packagejson.ts +94 -0
- package/src/init/paths.ts +2 -0
- package/src/init/run-bun-install.ts +20 -0
- package/src/markdown/chunk.ts +299 -0
- package/src/markdown/index.ts +1 -0
- package/src/plugin/context.ts +40 -0
- package/src/plugin/define.ts +35 -0
- package/src/plugin/hooks.ts +204 -0
- package/src/plugin/index.ts +63 -0
- package/src/plugin/loader.ts +111 -0
- package/src/plugin/manager.ts +136 -0
- package/src/plugin/registry.ts +145 -0
- package/src/plugin/skills.ts +62 -0
- package/src/plugin/types.ts +172 -0
- package/src/portbroker/bind-with-forward.ts +102 -0
- package/src/portbroker/container-server.ts +305 -0
- package/src/portbroker/forward-result-bus.ts +36 -0
- package/src/portbroker/hostd-client.ts +443 -0
- package/src/portbroker/index.ts +33 -0
- package/src/portbroker/policy.ts +24 -0
- package/src/portbroker/proc-net-tcp.ts +72 -0
- package/src/portbroker/protocol.ts +39 -0
- package/src/reload/client.ts +59 -0
- package/src/reload/index.ts +3 -0
- package/src/reload/registry.ts +60 -0
- package/src/reload/types.ts +13 -0
- package/src/run/bundled-plugins.ts +24 -0
- package/src/run/channel-session-factory.ts +105 -0
- package/src/run/index.ts +432 -0
- package/src/run/plugin-runtime.ts +43 -0
- package/src/run/schema-with-plugins.ts +14 -0
- package/src/secrets/index.ts +13 -0
- package/src/secrets/migrate.ts +95 -0
- package/src/secrets/schema.ts +75 -0
- package/src/secrets/storage.ts +231 -0
- package/src/server/index.ts +436 -0
- package/src/sessions/index.ts +23 -0
- package/src/shared/index.ts +9 -0
- package/src/shared/local-time.ts +21 -0
- package/src/shared/protocol.ts +25 -0
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +87 -0
- package/src/skills/typeclaw-channel-telegram-bot/SKILL.md +64 -0
- package/src/skills/typeclaw-config/SKILL.md +643 -0
- package/src/skills/typeclaw-cron/SKILL.md +159 -0
- package/src/skills/typeclaw-git/SKILL.md +89 -0
- package/src/skills/typeclaw-memory/SKILL.md +174 -0
- package/src/skills/typeclaw-monorepo/SKILL.md +175 -0
- package/src/skills/typeclaw-plugins/SKILL.md +594 -0
- package/src/skills/typeclaw-skills/SKILL.md +246 -0
- package/src/stream/broker.ts +161 -0
- package/src/stream/index.ts +16 -0
- package/src/stream/types.ts +69 -0
- package/src/tui/client.ts +45 -0
- package/src/tui/format.ts +317 -0
- package/src/tui/index.ts +225 -0
- package/src/tui/theme.ts +41 -0
- package/typeclaw.schema.json +826 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import type { TelegramBotUser, TelegramMessage, TelegramMessageEntity } from 'agent-messenger/telegrambot'
|
|
2
|
+
|
|
3
|
+
import { isAllowed, type ChannelAdapterConfig } from '@/channels/schema'
|
|
4
|
+
import type { InboundMessage } from '@/channels/types'
|
|
5
|
+
|
|
6
|
+
export type InboundDropReason = 'self_author' | 'no_user' | 'empty_text' | 'not_in_allow_list' | 'pre_connect'
|
|
7
|
+
|
|
8
|
+
export type InboundClassification =
|
|
9
|
+
| { kind: 'drop'; reason: InboundDropReason }
|
|
10
|
+
| { kind: 'route'; payload: InboundMessage }
|
|
11
|
+
|
|
12
|
+
export const TELEGRAM_WORKSPACE = 'telegram'
|
|
13
|
+
|
|
14
|
+
// Telegram has no team/guild concept — every chat is identified by an
|
|
15
|
+
// absolute (signed) numeric id. We pin `workspace` to a single bucket so
|
|
16
|
+
// allow-rules like `tg:*` and `tg:<chat_id>` have a stable key to match
|
|
17
|
+
// against. DMs use `private` chats and route the same way as group chats
|
|
18
|
+
// from the router's perspective; `isDm` is set from `chat.type` so the
|
|
19
|
+
// engagement layer can apply the DM-specific trigger.
|
|
20
|
+
export function classifyInbound(
|
|
21
|
+
event: TelegramMessage,
|
|
22
|
+
config: ChannelAdapterConfig,
|
|
23
|
+
bot: TelegramBotUser | null,
|
|
24
|
+
): InboundClassification {
|
|
25
|
+
const author = event.from
|
|
26
|
+
if (author === undefined) {
|
|
27
|
+
return { kind: 'drop', reason: 'no_user' }
|
|
28
|
+
}
|
|
29
|
+
if (bot !== null && author.id === bot.id) {
|
|
30
|
+
return { kind: 'drop', reason: 'self_author' }
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const text = inboundText(event)
|
|
34
|
+
if (text === '') return { kind: 'drop', reason: 'empty_text' }
|
|
35
|
+
|
|
36
|
+
const chat = String(event.chat.id)
|
|
37
|
+
if (!isAllowed(config.allow, TELEGRAM_WORKSPACE, chat)) {
|
|
38
|
+
return { kind: 'drop', reason: 'not_in_allow_list' }
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (bot === null) {
|
|
42
|
+
return { kind: 'drop', reason: 'pre_connect' }
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const isDm = event.chat.type === 'private'
|
|
46
|
+
const entities = event.entities ?? event.caption_entities ?? []
|
|
47
|
+
const fullText = event.text ?? event.caption ?? ''
|
|
48
|
+
const botUsername = bot.username
|
|
49
|
+
const userEntities = entities.filter(isUserMentionEntity)
|
|
50
|
+
const isBotMention = isDm || mentionsBot(entities, fullText, bot.id, botUsername)
|
|
51
|
+
// Mirror the Discord/Slack semantics: `mentionsOthers` is true only when
|
|
52
|
+
// the message contains user-mention entities AND none of them resolve to
|
|
53
|
+
// the bot. If the bot is in the mention list, the message is at least
|
|
54
|
+
// partially addressed to us, so the engagement layer should not suppress
|
|
55
|
+
// the solo-human fallback on the basis of a "tagged someone else" signal.
|
|
56
|
+
const mentionsOthers =
|
|
57
|
+
userEntities.length > 0 && !userEntities.some((e) => isUserMentionForBot(e, fullText, bot.id, botUsername))
|
|
58
|
+
|
|
59
|
+
const replyParent = event.reply_to_message
|
|
60
|
+
const replyToBotMessageId =
|
|
61
|
+
replyParent !== undefined && replyParent.from?.id === bot.id ? String(replyParent.message_id) : null
|
|
62
|
+
const replyToOtherMessageId =
|
|
63
|
+
replyParent !== undefined && replyToBotMessageId === null ? String(replyParent.message_id) : null
|
|
64
|
+
|
|
65
|
+
const thread = event.message_thread_id !== undefined ? String(event.message_thread_id) : null
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
kind: 'route',
|
|
69
|
+
payload: {
|
|
70
|
+
adapter: 'telegram-bot',
|
|
71
|
+
workspace: TELEGRAM_WORKSPACE,
|
|
72
|
+
chat,
|
|
73
|
+
thread,
|
|
74
|
+
text,
|
|
75
|
+
externalMessageId: String(event.message_id),
|
|
76
|
+
authorId: String(author.id),
|
|
77
|
+
authorName: formatAuthorName(author),
|
|
78
|
+
authorIsBot: author.is_bot === true,
|
|
79
|
+
isBotMention,
|
|
80
|
+
replyToBotMessageId,
|
|
81
|
+
mentionsOthers,
|
|
82
|
+
replyToOtherMessageId,
|
|
83
|
+
isDm,
|
|
84
|
+
ts: event.date * 1000,
|
|
85
|
+
},
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function formatAuthorName(user: TelegramBotUser): string {
|
|
90
|
+
if (user.username !== undefined && user.username !== '') return user.username
|
|
91
|
+
const last = user.last_name ?? ''
|
|
92
|
+
return last === '' ? user.first_name : `${user.first_name} ${last}`
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Telegram's privacy mode only delivers messages that mention the bot
|
|
96
|
+
// (`@<botname>` or `text_mention` entity targeting the bot's id), are
|
|
97
|
+
// replies to bot messages, or are slash commands. We mirror that by treating
|
|
98
|
+
// any of those signals as a bot mention from the engagement layer's view.
|
|
99
|
+
function mentionsBot(
|
|
100
|
+
entities: readonly TelegramMessageEntity[],
|
|
101
|
+
fullText: string,
|
|
102
|
+
botId: number,
|
|
103
|
+
botUsername: string | undefined,
|
|
104
|
+
): boolean {
|
|
105
|
+
for (const entity of entities) {
|
|
106
|
+
if (entity.type === 'text_mention' && entity.user?.id === botId) return true
|
|
107
|
+
if (entity.type === 'mention' && botUsername !== undefined) {
|
|
108
|
+
const slice = fullText.slice(entity.offset, entity.offset + entity.length)
|
|
109
|
+
if (slice.toLowerCase() === `@${botUsername.toLowerCase()}`) return true
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return false
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function isUserMentionEntity(entity: TelegramMessageEntity): boolean {
|
|
116
|
+
return entity.type === 'mention' || entity.type === 'text_mention'
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function isUserMentionForBot(
|
|
120
|
+
entity: TelegramMessageEntity,
|
|
121
|
+
fullText: string,
|
|
122
|
+
botId: number,
|
|
123
|
+
botUsername: string | undefined,
|
|
124
|
+
): boolean {
|
|
125
|
+
if (entity.type === 'text_mention') {
|
|
126
|
+
return entity.user?.id === botId
|
|
127
|
+
}
|
|
128
|
+
if (entity.type === 'mention' && botUsername !== undefined) {
|
|
129
|
+
const slice = fullText.slice(entity.offset, entity.offset + entity.length)
|
|
130
|
+
return slice.toLowerCase() === `@${botUsername.toLowerCase()}`
|
|
131
|
+
}
|
|
132
|
+
return false
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function inboundText(event: TelegramMessage): string {
|
|
136
|
+
const body = event.text ?? event.caption ?? ''
|
|
137
|
+
const mediaSummary = summarizeMedia(event)
|
|
138
|
+
if (mediaSummary.length === 0) return body
|
|
139
|
+
const summary = `[Telegram message with ${mediaSummary.join('; ')}]`
|
|
140
|
+
return body === '' ? summary : `${body}\n${summary}`
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function summarizeMedia(event: TelegramMessage): string[] {
|
|
144
|
+
const parts: string[] = []
|
|
145
|
+
if (event.document !== undefined) {
|
|
146
|
+
const name = event.document.file_name ?? event.document.file_id
|
|
147
|
+
const mime = event.document.mime_type !== undefined ? ` (${event.document.mime_type})` : ''
|
|
148
|
+
parts.push(`document: ${name}${mime} file_id=${event.document.file_id}`)
|
|
149
|
+
}
|
|
150
|
+
if (event.photo !== undefined && event.photo.length > 0) {
|
|
151
|
+
const largest = event.photo[event.photo.length - 1]!
|
|
152
|
+
parts.push(`photo: ${largest.width}x${largest.height} file_id=${largest.file_id}`)
|
|
153
|
+
}
|
|
154
|
+
return parts
|
|
155
|
+
}
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
// Convert the model's common-Markdown output to valid Telegram MarkdownV2.
|
|
2
|
+
//
|
|
3
|
+
// Why this exists: the agent writes Markdown the way a human would type
|
|
4
|
+
// it (`**bold**`, `*italic*`, `` `code` ``, fenced blocks, `[text](url)`).
|
|
5
|
+
// Telegram's MarkdownV2 parser is unforgiving — any unescaped special
|
|
6
|
+
// char (`_*[]()~``>#+-=|{}.!\`) outside an entity returns
|
|
7
|
+
// `Bad Request: can't parse entities` and the whole message is rejected.
|
|
8
|
+
// A plain-text send keeps the message intact at the cost of literal
|
|
9
|
+
// `**bold**` artifacts; HTML-mode would still need every `<&>` escaped.
|
|
10
|
+
// MarkdownV2 with smart escaping is the only mode where the agent gets
|
|
11
|
+
// rendered formatting AND raw user text never crashes the parser.
|
|
12
|
+
//
|
|
13
|
+
// Strategy: walk the source, recognize a small fixed set of inline and
|
|
14
|
+
// block constructs, emit MarkdownV2 with the right escape rules per
|
|
15
|
+
// region. Anything we don't recognize (headings, list markers, blockquote
|
|
16
|
+
// arrows, raw special chars) falls through as escaped literal text —
|
|
17
|
+
// MarkdownV2 has no native heading or list rendering anyway, so this
|
|
18
|
+
// matches what Telegram can actually display rather than failing.
|
|
19
|
+
//
|
|
20
|
+
// Telegram entity rules (https://core.telegram.org/bots/api#markdownv2-style):
|
|
21
|
+
// - Outside entities: escape `_ * [ ] ( ) ~ ` > # + - = | { } . !`.
|
|
22
|
+
// Backslash is the escape char, so a literal `\` must be `\\`.
|
|
23
|
+
// - Inside `code` / `pre`: escape `` ` `` and `\` only — every other
|
|
24
|
+
// special char is literal.
|
|
25
|
+
// - Inside link `(url)`: escape `)` and `\` only.
|
|
26
|
+
// - Inside link `[text]`: full outside-entity rules apply.
|
|
27
|
+
|
|
28
|
+
const SPECIAL_CHARS_OUTSIDE = /[_*[\]()~`>#+\-=|{}.!\\]/g
|
|
29
|
+
const SPECIAL_CHARS_CODE = /[`\\]/g
|
|
30
|
+
const SPECIAL_CHARS_LINK_URL = /[)\\]/g
|
|
31
|
+
|
|
32
|
+
export function escapeMarkdownV2(text: string): string {
|
|
33
|
+
return text.replace(SPECIAL_CHARS_OUTSIDE, '\\$&')
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function escapeCodeContent(text: string): string {
|
|
37
|
+
return text.replace(SPECIAL_CHARS_CODE, '\\$&')
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function escapeLinkUrl(url: string): string {
|
|
41
|
+
return url.replace(SPECIAL_CHARS_LINK_URL, '\\$&')
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Public entry point. Takes the agent's raw text (common Markdown) and
|
|
45
|
+
// returns a string safe to send with `parse_mode: 'MarkdownV2'`. The
|
|
46
|
+
// returned string is guaranteed never to crash Telegram's parser for any
|
|
47
|
+
// input the agent could plausibly produce — the conversion is best-effort
|
|
48
|
+
// for formatting and total for safety.
|
|
49
|
+
export function toTelegramMarkdownV2(input: string): string {
|
|
50
|
+
// Block pass first: pull out fenced code blocks so their contents are
|
|
51
|
+
// never re-tokenized as inline constructs (a `*` inside ```code``` is
|
|
52
|
+
// literal, not italic). The remaining text goes through the inline
|
|
53
|
+
// tokenizer.
|
|
54
|
+
const out: string[] = []
|
|
55
|
+
let i = 0
|
|
56
|
+
while (i < input.length) {
|
|
57
|
+
if (matchesAt(input, i, '```')) {
|
|
58
|
+
const fenceEnd = findFenceEnd(input, i + 3)
|
|
59
|
+
if (fenceEnd !== -1) {
|
|
60
|
+
const inner = input.slice(i + 3, fenceEnd)
|
|
61
|
+
out.push(renderFence(inner))
|
|
62
|
+
i = fenceEnd + 3
|
|
63
|
+
continue
|
|
64
|
+
}
|
|
65
|
+
// Unterminated fence — render the rest of the input as escaped
|
|
66
|
+
// inline (the open backticks become `\`\`\`` literals) so we
|
|
67
|
+
// never infinite-loop on `nextFence === i` and never swallow the
|
|
68
|
+
// rest of the message.
|
|
69
|
+
out.push(renderInline(input.slice(i)))
|
|
70
|
+
break
|
|
71
|
+
}
|
|
72
|
+
const nextFence = input.indexOf('```', i)
|
|
73
|
+
const segmentEnd = nextFence === -1 ? input.length : nextFence
|
|
74
|
+
out.push(renderInline(input.slice(i, segmentEnd)))
|
|
75
|
+
i = segmentEnd
|
|
76
|
+
}
|
|
77
|
+
return out.join('')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function matchesAt(s: string, idx: number, needle: string): boolean {
|
|
81
|
+
return s.slice(idx, idx + needle.length) === needle
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function findFenceEnd(s: string, start: number): number {
|
|
85
|
+
// Telegram pre-blocks don't nest, so the first ``` after `start` is the
|
|
86
|
+
// close. We do not require it to be on its own line — the agent often
|
|
87
|
+
// emits ` ```python\ncode``` ` inline.
|
|
88
|
+
return s.indexOf('```', start)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function renderFence(inner: string): string {
|
|
92
|
+
// Optional language hint on the first line (` ```python\n... ``` `).
|
|
93
|
+
// MarkdownV2 supports it as `\`\`\`<lang>\n...\`\`\``. Strip the
|
|
94
|
+
// newline immediately after the language if present, and also strip a
|
|
95
|
+
// bare leading newline (` ```\nbody\n``` `) so the rendered block
|
|
96
|
+
// doesn't carry an extra empty first line.
|
|
97
|
+
let lang = ''
|
|
98
|
+
let body = inner
|
|
99
|
+
const newline = inner.indexOf('\n')
|
|
100
|
+
if (newline !== -1) {
|
|
101
|
+
const candidate = inner.slice(0, newline).trim()
|
|
102
|
+
if (candidate !== '' && /^[A-Za-z0-9_+\-.]+$/.test(candidate)) {
|
|
103
|
+
lang = candidate
|
|
104
|
+
body = inner.slice(newline + 1)
|
|
105
|
+
} else if (candidate === '') {
|
|
106
|
+
body = inner.slice(newline + 1)
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (body.endsWith('\n')) body = body.slice(0, -1)
|
|
110
|
+
const escapedBody = escapeCodeContent(body)
|
|
111
|
+
return lang === '' ? '```\n' + escapedBody + '\n```' : '```' + lang + '\n' + escapedBody + '\n```'
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Inline tokenizer. Recognizes (in priority order):
|
|
115
|
+
// 1. Inline code: `code`
|
|
116
|
+
// 2. Links: [text](url)
|
|
117
|
+
// 3. Bold: **text** or __text__
|
|
118
|
+
// 4. Strikethrough:~~text~~
|
|
119
|
+
// 5. Spoiler: ||text||
|
|
120
|
+
// 6. Italic: *text* or _text_
|
|
121
|
+
//
|
|
122
|
+
// Italic is checked LAST because `**` would otherwise be eaten as two
|
|
123
|
+
// italic markers. Underscore italic / underscore bold collapse to the
|
|
124
|
+
// asterisk forms because MarkdownV2 reserves `_` for italic and `__` for
|
|
125
|
+
// underline — using `_` for italic and `*` for bold sidesteps the
|
|
126
|
+
// underline-vs-italic ambiguity.
|
|
127
|
+
function renderInline(text: string): string {
|
|
128
|
+
const out: string[] = []
|
|
129
|
+
let i = 0
|
|
130
|
+
while (i < text.length) {
|
|
131
|
+
const ch = text[i]!
|
|
132
|
+
|
|
133
|
+
if (ch === '`') {
|
|
134
|
+
const close = text.indexOf('`', i + 1)
|
|
135
|
+
if (close !== -1) {
|
|
136
|
+
const inner = text.slice(i + 1, close)
|
|
137
|
+
out.push('`' + escapeCodeContent(inner) + '`')
|
|
138
|
+
i = close + 1
|
|
139
|
+
continue
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if (ch === '[') {
|
|
144
|
+
const link = parseLink(text, i)
|
|
145
|
+
if (link !== null) {
|
|
146
|
+
const renderedText = renderInline(link.label)
|
|
147
|
+
out.push('[' + renderedText + '](' + escapeLinkUrl(link.url) + ')')
|
|
148
|
+
i = link.end
|
|
149
|
+
continue
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Paired markers (bold/strike/spoiler): empty content is rejected
|
|
154
|
+
// by Telegram as an empty entity, so collapse `****` etc. to escaped
|
|
155
|
+
// literals rather than emit zero-width entities.
|
|
156
|
+
if (ch === '*' && text[i + 1] === '*') {
|
|
157
|
+
const close = findClose(text, i + 2, '**')
|
|
158
|
+
if (close !== -1 && close > i + 2) {
|
|
159
|
+
const inner = text.slice(i + 2, close)
|
|
160
|
+
out.push('*' + renderInline(inner) + '*')
|
|
161
|
+
i = close + 2
|
|
162
|
+
continue
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
if (ch === '_' && text[i + 1] === '_' && !isWordChar(text[i - 1])) {
|
|
166
|
+
// `__bold__` only when the open marker is not adjacent to a word
|
|
167
|
+
// char on the LEFT (`my__var__name` is a snake_case identifier the
|
|
168
|
+
// model accidentally wrote, not bold). The close marker is
|
|
169
|
+
// checked for the same on its RIGHT side below.
|
|
170
|
+
const close = findClose(text, i + 2, '__')
|
|
171
|
+
if (close !== -1 && close > i + 2 && !isWordChar(text[close + 2])) {
|
|
172
|
+
const inner = text.slice(i + 2, close)
|
|
173
|
+
out.push('*' + renderInline(inner) + '*')
|
|
174
|
+
i = close + 2
|
|
175
|
+
continue
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (ch === '~' && text[i + 1] === '~') {
|
|
180
|
+
const close = findClose(text, i + 2, '~~')
|
|
181
|
+
if (close !== -1 && close > i + 2) {
|
|
182
|
+
const inner = text.slice(i + 2, close)
|
|
183
|
+
out.push('~' + renderInline(inner) + '~')
|
|
184
|
+
i = close + 2
|
|
185
|
+
continue
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (ch === '|' && text[i + 1] === '|') {
|
|
190
|
+
const close = findClose(text, i + 2, '||')
|
|
191
|
+
if (close !== -1 && close > i + 2) {
|
|
192
|
+
const inner = text.slice(i + 2, close)
|
|
193
|
+
out.push('||' + renderInline(inner) + '||')
|
|
194
|
+
i = close + 2
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Italic: word-boundary guard on BOTH sides — `a*b*c` and
|
|
200
|
+
// `var_name` must NOT italicize. The model emits literal
|
|
201
|
+
// asterisks/underscores in math, code references, and identifiers.
|
|
202
|
+
if (ch === '*' && !isWordChar(text[i - 1])) {
|
|
203
|
+
const close = findInlineClose(text, i + 1, '*')
|
|
204
|
+
if (close !== -1 && !isWordChar(text[close + 1])) {
|
|
205
|
+
const inner = text.slice(i + 1, close)
|
|
206
|
+
if (inner !== '' && !/^\s|\s$/.test(inner)) {
|
|
207
|
+
out.push('_' + renderInline(inner) + '_')
|
|
208
|
+
i = close + 1
|
|
209
|
+
continue
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
if (ch === '_' && !isWordChar(text[i - 1])) {
|
|
214
|
+
const close = findInlineClose(text, i + 1, '_')
|
|
215
|
+
if (close !== -1 && !isWordChar(text[close + 1])) {
|
|
216
|
+
const inner = text.slice(i + 1, close)
|
|
217
|
+
if (inner !== '' && !/^\s|\s$/.test(inner)) {
|
|
218
|
+
out.push('_' + renderInline(inner) + '_')
|
|
219
|
+
i = close + 1
|
|
220
|
+
continue
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
out.push(SPECIAL_CHARS_OUTSIDE.test(ch) ? '\\' + ch : ch)
|
|
226
|
+
SPECIAL_CHARS_OUTSIDE.lastIndex = 0
|
|
227
|
+
i++
|
|
228
|
+
}
|
|
229
|
+
return out.join('')
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function findClose(text: string, from: number, marker: string): number {
|
|
233
|
+
// Find the next occurrence of `marker` at or after `from` that is NOT
|
|
234
|
+
// preceded by a backslash escape. Used for paired `**` / `__` / `~~` /
|
|
235
|
+
// `||`. A line break inside a marker pair is allowed — the model often
|
|
236
|
+
// emits multi-line bold.
|
|
237
|
+
let i = from
|
|
238
|
+
while (i <= text.length - marker.length) {
|
|
239
|
+
if (text[i] === '\\') {
|
|
240
|
+
i += 2
|
|
241
|
+
continue
|
|
242
|
+
}
|
|
243
|
+
if (matchesAt(text, i, marker)) return i
|
|
244
|
+
i++
|
|
245
|
+
}
|
|
246
|
+
return -1
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function findInlineClose(text: string, from: number, marker: string): number {
|
|
250
|
+
// Same as findClose but stops at line breaks — used for single-marker
|
|
251
|
+
// italic so a stray `*` doesn't stretch across paragraphs.
|
|
252
|
+
let i = from
|
|
253
|
+
while (i < text.length) {
|
|
254
|
+
if (text[i] === '\n') return -1
|
|
255
|
+
if (text[i] === '\\') {
|
|
256
|
+
i += 2
|
|
257
|
+
continue
|
|
258
|
+
}
|
|
259
|
+
if (matchesAt(text, i, marker)) return i
|
|
260
|
+
i++
|
|
261
|
+
}
|
|
262
|
+
return -1
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function parseLink(text: string, start: number): { label: string; url: string; end: number } | null {
|
|
266
|
+
// `[label](url)` — labels can contain anything but unescaped `]`,
|
|
267
|
+
// urls anything but unescaped `)`. Newlines inside either part
|
|
268
|
+
// disqualify the match (the model meant a literal bracket, not a link).
|
|
269
|
+
let i = start + 1
|
|
270
|
+
const labelStart = i
|
|
271
|
+
while (i < text.length) {
|
|
272
|
+
const c = text[i]!
|
|
273
|
+
if (c === '\\') {
|
|
274
|
+
i += 2
|
|
275
|
+
continue
|
|
276
|
+
}
|
|
277
|
+
if (c === ']') break
|
|
278
|
+
if (c === '\n') return null
|
|
279
|
+
i++
|
|
280
|
+
}
|
|
281
|
+
if (text[i] !== ']' || text[i + 1] !== '(') return null
|
|
282
|
+
const label = text.slice(labelStart, i)
|
|
283
|
+
const urlStart = i + 2
|
|
284
|
+
let j = urlStart
|
|
285
|
+
while (j < text.length) {
|
|
286
|
+
const c = text[j]!
|
|
287
|
+
if (c === '\\') {
|
|
288
|
+
j += 2
|
|
289
|
+
continue
|
|
290
|
+
}
|
|
291
|
+
if (c === ')') break
|
|
292
|
+
// An unescaped `(` makes the close-paren ambiguous (Wikipedia
|
|
293
|
+
// links like `Foo_(bar)` would close on the inner `)` and emit
|
|
294
|
+
// mangled MarkdownV2). Drop the link match and let the bracket
|
|
295
|
+
// chars fall through escaped — the URL still appears as plain
|
|
296
|
+
// text, just not as a clickable link.
|
|
297
|
+
if (c === '(') return null
|
|
298
|
+
if (c === '\n') return null
|
|
299
|
+
j++
|
|
300
|
+
}
|
|
301
|
+
if (text[j] !== ')') return null
|
|
302
|
+
const url = text.slice(urlStart, j)
|
|
303
|
+
return { label, url, end: j + 1 }
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function isWordChar(ch: string | undefined): boolean {
|
|
307
|
+
if (ch === undefined) return false
|
|
308
|
+
return /[A-Za-z0-9_]/.test(ch)
|
|
309
|
+
}
|