typeclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +134 -0
  3. package/auth.schema.json +63 -0
  4. package/cron.schema.json +96 -0
  5. package/package.json +72 -0
  6. package/scripts/emit-base-dockerfile.ts +5 -0
  7. package/scripts/generate-schema.ts +34 -0
  8. package/secrets.schema.json +63 -0
  9. package/src/agent/auth.ts +119 -0
  10. package/src/agent/compaction.ts +35 -0
  11. package/src/agent/git-nudge.ts +95 -0
  12. package/src/agent/index.ts +451 -0
  13. package/src/agent/plugin-tools.ts +269 -0
  14. package/src/agent/reload-tool.ts +71 -0
  15. package/src/agent/self.ts +45 -0
  16. package/src/agent/session-origin.ts +288 -0
  17. package/src/agent/subagents.ts +253 -0
  18. package/src/agent/system-prompt.ts +68 -0
  19. package/src/agent/tools/channel-fetch-attachment.ts +118 -0
  20. package/src/agent/tools/channel-history.ts +119 -0
  21. package/src/agent/tools/channel-reply.ts +182 -0
  22. package/src/agent/tools/channel-send.ts +212 -0
  23. package/src/agent/tools/ddg.ts +218 -0
  24. package/src/agent/tools/restart.ts +122 -0
  25. package/src/agent/tools/stream-snapshot.ts +181 -0
  26. package/src/agent/tools/webfetch/fetch.ts +102 -0
  27. package/src/agent/tools/webfetch/index.ts +1 -0
  28. package/src/agent/tools/webfetch/strategies/grep.ts +70 -0
  29. package/src/agent/tools/webfetch/strategies/jq.ts +31 -0
  30. package/src/agent/tools/webfetch/strategies/raw.ts +3 -0
  31. package/src/agent/tools/webfetch/strategies/readability.ts +30 -0
  32. package/src/agent/tools/webfetch/strategies/selector.ts +41 -0
  33. package/src/agent/tools/webfetch/strategies/snapshot.ts +135 -0
  34. package/src/agent/tools/webfetch/tool.ts +281 -0
  35. package/src/agent/tools/webfetch/types.ts +33 -0
  36. package/src/agent/tools/websearch.ts +96 -0
  37. package/src/agent/tools/wikipedia.ts +52 -0
  38. package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +170 -0
  39. package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +421 -0
  40. package/src/bundled-plugins/agent-browser/index.ts +179 -0
  41. package/src/bundled-plugins/agent-browser/shim-install.ts +158 -0
  42. package/src/bundled-plugins/agent-browser/shim.ts +152 -0
  43. package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +113 -0
  44. package/src/bundled-plugins/guard/index.ts +26 -0
  45. package/src/bundled-plugins/guard/policies/non-workspace-write.ts +98 -0
  46. package/src/bundled-plugins/guard/policies/skill-authoring.ts +185 -0
  47. package/src/bundled-plugins/guard/policies/uncommitted-changes.ts +85 -0
  48. package/src/bundled-plugins/guard/policy.ts +18 -0
  49. package/src/bundled-plugins/memory/README.md +71 -0
  50. package/src/bundled-plugins/memory/append-tool.ts +84 -0
  51. package/src/bundled-plugins/memory/dreaming-state.ts +86 -0
  52. package/src/bundled-plugins/memory/dreaming.ts +470 -0
  53. package/src/bundled-plugins/memory/fragment-parser.ts +67 -0
  54. package/src/bundled-plugins/memory/index.ts +238 -0
  55. package/src/bundled-plugins/memory/load-memory.ts +122 -0
  56. package/src/bundled-plugins/memory/memory-logger.ts +257 -0
  57. package/src/bundled-plugins/memory/secret-detector.ts +49 -0
  58. package/src/bundled-plugins/memory/watermark.ts +15 -0
  59. package/src/bundled-plugins/security/index.ts +35 -0
  60. package/src/bundled-plugins/security/policies/git-exfil.ts +120 -0
  61. package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +167 -0
  62. package/src/bundled-plugins/security/policies/prompt-injection.ts +488 -0
  63. package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +99 -0
  64. package/src/bundled-plugins/security/policies/secret-exfil-read.ts +127 -0
  65. package/src/bundled-plugins/security/policies/session-search-secrets.ts +86 -0
  66. package/src/bundled-plugins/security/policies/ssrf.ts +196 -0
  67. package/src/bundled-plugins/security/policies/system-prompt-leak.ts +81 -0
  68. package/src/bundled-plugins/security/policy.ts +9 -0
  69. package/src/channels/adapters/discord-bot-channel-resolver.ts +77 -0
  70. package/src/channels/adapters/discord-bot-classify.ts +148 -0
  71. package/src/channels/adapters/discord-bot.ts +640 -0
  72. package/src/channels/adapters/kakaotalk-author-resolver.ts +78 -0
  73. package/src/channels/adapters/kakaotalk-channel-resolver.ts +105 -0
  74. package/src/channels/adapters/kakaotalk-classify.ts +77 -0
  75. package/src/channels/adapters/kakaotalk.ts +622 -0
  76. package/src/channels/adapters/slack-bot-author-resolver.ts +80 -0
  77. package/src/channels/adapters/slack-bot-channel-resolver.ts +84 -0
  78. package/src/channels/adapters/slack-bot-classify.ts +213 -0
  79. package/src/channels/adapters/slack-bot-dedupe.ts +51 -0
  80. package/src/channels/adapters/slack-bot-time.ts +10 -0
  81. package/src/channels/adapters/slack-bot.ts +881 -0
  82. package/src/channels/adapters/telegram-bot-classify.ts +155 -0
  83. package/src/channels/adapters/telegram-bot-format.ts +309 -0
  84. package/src/channels/adapters/telegram-bot.ts +604 -0
  85. package/src/channels/engagement.ts +227 -0
  86. package/src/channels/index.ts +21 -0
  87. package/src/channels/manager.ts +292 -0
  88. package/src/channels/membership-cache.ts +116 -0
  89. package/src/channels/membership-from-history.ts +53 -0
  90. package/src/channels/membership.ts +30 -0
  91. package/src/channels/participants.ts +47 -0
  92. package/src/channels/persistence.ts +209 -0
  93. package/src/channels/reloadable.ts +28 -0
  94. package/src/channels/router.ts +1570 -0
  95. package/src/channels/schema.ts +273 -0
  96. package/src/channels/types.ts +160 -0
  97. package/src/cli/channel.ts +403 -0
  98. package/src/cli/compose-status.ts +95 -0
  99. package/src/cli/compose.ts +240 -0
  100. package/src/cli/hostd.ts +163 -0
  101. package/src/cli/index.ts +27 -0
  102. package/src/cli/init.ts +592 -0
  103. package/src/cli/logs.ts +38 -0
  104. package/src/cli/reload.ts +68 -0
  105. package/src/cli/restart.ts +66 -0
  106. package/src/cli/run.ts +77 -0
  107. package/src/cli/shell.ts +33 -0
  108. package/src/cli/start.ts +57 -0
  109. package/src/cli/status.ts +178 -0
  110. package/src/cli/stop.ts +31 -0
  111. package/src/cli/tui.ts +35 -0
  112. package/src/cli/ui.ts +110 -0
  113. package/src/commands/index.ts +74 -0
  114. package/src/compose/discover.ts +43 -0
  115. package/src/compose/index.ts +25 -0
  116. package/src/compose/logs.ts +162 -0
  117. package/src/compose/restart.ts +69 -0
  118. package/src/compose/start.ts +62 -0
  119. package/src/compose/status.ts +28 -0
  120. package/src/compose/stop.ts +43 -0
  121. package/src/config/config.ts +424 -0
  122. package/src/config/index.ts +25 -0
  123. package/src/config/providers.ts +234 -0
  124. package/src/config/reloadable.ts +47 -0
  125. package/src/container/index.ts +27 -0
  126. package/src/container/logs.ts +37 -0
  127. package/src/container/port.ts +137 -0
  128. package/src/container/shared.ts +290 -0
  129. package/src/container/shell.ts +58 -0
  130. package/src/container/start.ts +670 -0
  131. package/src/container/status.ts +76 -0
  132. package/src/container/stop.ts +120 -0
  133. package/src/container/verify-running.ts +149 -0
  134. package/src/cron/consumer.ts +138 -0
  135. package/src/cron/index.ts +54 -0
  136. package/src/cron/reloadable.ts +64 -0
  137. package/src/cron/scheduler.ts +200 -0
  138. package/src/cron/schema.ts +96 -0
  139. package/src/hostd/client.ts +113 -0
  140. package/src/hostd/daemon.ts +587 -0
  141. package/src/hostd/index.ts +25 -0
  142. package/src/hostd/paths.ts +82 -0
  143. package/src/hostd/portbroker-manager.ts +101 -0
  144. package/src/hostd/protocol.ts +48 -0
  145. package/src/hostd/spawn.ts +224 -0
  146. package/src/hostd/supervisor.ts +60 -0
  147. package/src/hostd/tailscale.ts +172 -0
  148. package/src/hostd/version.ts +115 -0
  149. package/src/init/dockerfile.ts +327 -0
  150. package/src/init/ensure-deps.ts +152 -0
  151. package/src/init/gitignore.ts +46 -0
  152. package/src/init/hatching.ts +60 -0
  153. package/src/init/index.ts +786 -0
  154. package/src/init/kakaotalk-auth.ts +114 -0
  155. package/src/init/models-dev.ts +130 -0
  156. package/src/init/oauth-login.ts +74 -0
  157. package/src/init/packagejson.ts +94 -0
  158. package/src/init/paths.ts +2 -0
  159. package/src/init/run-bun-install.ts +20 -0
  160. package/src/markdown/chunk.ts +299 -0
  161. package/src/markdown/index.ts +1 -0
  162. package/src/plugin/context.ts +40 -0
  163. package/src/plugin/define.ts +35 -0
  164. package/src/plugin/hooks.ts +204 -0
  165. package/src/plugin/index.ts +63 -0
  166. package/src/plugin/loader.ts +111 -0
  167. package/src/plugin/manager.ts +136 -0
  168. package/src/plugin/registry.ts +145 -0
  169. package/src/plugin/skills.ts +62 -0
  170. package/src/plugin/types.ts +172 -0
  171. package/src/portbroker/bind-with-forward.ts +102 -0
  172. package/src/portbroker/container-server.ts +305 -0
  173. package/src/portbroker/forward-result-bus.ts +36 -0
  174. package/src/portbroker/hostd-client.ts +443 -0
  175. package/src/portbroker/index.ts +33 -0
  176. package/src/portbroker/policy.ts +24 -0
  177. package/src/portbroker/proc-net-tcp.ts +72 -0
  178. package/src/portbroker/protocol.ts +39 -0
  179. package/src/reload/client.ts +59 -0
  180. package/src/reload/index.ts +3 -0
  181. package/src/reload/registry.ts +60 -0
  182. package/src/reload/types.ts +13 -0
  183. package/src/run/bundled-plugins.ts +24 -0
  184. package/src/run/channel-session-factory.ts +105 -0
  185. package/src/run/index.ts +432 -0
  186. package/src/run/plugin-runtime.ts +43 -0
  187. package/src/run/schema-with-plugins.ts +14 -0
  188. package/src/secrets/index.ts +13 -0
  189. package/src/secrets/migrate.ts +95 -0
  190. package/src/secrets/schema.ts +75 -0
  191. package/src/secrets/storage.ts +231 -0
  192. package/src/server/index.ts +436 -0
  193. package/src/sessions/index.ts +23 -0
  194. package/src/shared/index.ts +9 -0
  195. package/src/shared/local-time.ts +21 -0
  196. package/src/shared/protocol.ts +25 -0
  197. package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +87 -0
  198. package/src/skills/typeclaw-channel-telegram-bot/SKILL.md +64 -0
  199. package/src/skills/typeclaw-config/SKILL.md +643 -0
  200. package/src/skills/typeclaw-cron/SKILL.md +159 -0
  201. package/src/skills/typeclaw-git/SKILL.md +89 -0
  202. package/src/skills/typeclaw-memory/SKILL.md +174 -0
  203. package/src/skills/typeclaw-monorepo/SKILL.md +175 -0
  204. package/src/skills/typeclaw-plugins/SKILL.md +594 -0
  205. package/src/skills/typeclaw-skills/SKILL.md +246 -0
  206. package/src/stream/broker.ts +161 -0
  207. package/src/stream/index.ts +16 -0
  208. package/src/stream/types.ts +69 -0
  209. package/src/tui/client.ts +45 -0
  210. package/src/tui/format.ts +317 -0
  211. package/src/tui/index.ts +225 -0
  212. package/src/tui/theme.ts +41 -0
  213. package/typeclaw.schema.json +826 -0
@@ -0,0 +1,135 @@
1
+ import * as cheerio from 'cheerio'
2
+ import type { AnyNode, Element } from 'domhandler'
3
+
4
+ const SEMANTIC_TAGS = new Set([
5
+ 'header',
6
+ 'nav',
7
+ 'main',
8
+ 'aside',
9
+ 'footer',
10
+ 'section',
11
+ 'article',
12
+ 'h1',
13
+ 'h2',
14
+ 'h3',
15
+ 'h4',
16
+ 'h5',
17
+ 'h6',
18
+ 'form',
19
+ 'input',
20
+ 'button',
21
+ 'select',
22
+ 'textarea',
23
+ 'label',
24
+ 'a',
25
+ 'img',
26
+ 'ul',
27
+ 'ol',
28
+ 'li',
29
+ 'table',
30
+ 'thead',
31
+ 'tbody',
32
+ 'tr',
33
+ 'th',
34
+ 'td',
35
+ ])
36
+
37
+ const ROLE_FOR_TAG: Record<string, string> = {
38
+ h1: 'heading',
39
+ h2: 'heading',
40
+ h3: 'heading',
41
+ h4: 'heading',
42
+ h5: 'heading',
43
+ h6: 'heading',
44
+ a: 'link',
45
+ button: 'button',
46
+ input: 'input',
47
+ select: 'select',
48
+ textarea: 'textarea',
49
+ img: 'image',
50
+ form: 'form',
51
+ nav: 'navigation',
52
+ header: 'banner',
53
+ footer: 'contentinfo',
54
+ main: 'main',
55
+ aside: 'complementary',
56
+ section: 'section',
57
+ article: 'article',
58
+ ul: 'list',
59
+ ol: 'list',
60
+ li: 'listitem',
61
+ table: 'table',
62
+ tr: 'row',
63
+ th: 'columnheader',
64
+ td: 'cell',
65
+ label: 'label',
66
+ }
67
+
68
+ export function applySnapshot(html: string): string {
69
+ const $ = cheerio.load(html)
70
+ const lines: string[] = []
71
+ const body = $('body').get(0)
72
+ const roots: AnyNode[] = body ? [body] : ($.root().get(0)?.children ?? [])
73
+ for (const root of roots) {
74
+ walk($, root, 0, lines)
75
+ }
76
+ return lines.length > 0 ? lines.join('\n') : 'Page contains no semantic structure.'
77
+ }
78
+
79
+ function isElement(node: AnyNode): node is Element {
80
+ return node.type === 'tag'
81
+ }
82
+
83
+ function walk($: cheerio.CheerioAPI, node: AnyNode, depth: number, out: string[]): void {
84
+ if (!isElement(node)) return
85
+
86
+ const tag = node.name.toLowerCase()
87
+ let nextDepth = depth
88
+
89
+ if (SEMANTIC_TAGS.has(tag)) {
90
+ const role = ROLE_FOR_TAG[tag] ?? tag
91
+ const label = labelFor($, node)
92
+ const indent = ' '.repeat(depth)
93
+ out.push(label ? `${indent}- ${role}: ${label}` : `${indent}- ${role}`)
94
+ nextDepth = depth + 1
95
+ }
96
+
97
+ for (const child of node.children) {
98
+ walk($, child, nextDepth, out)
99
+ }
100
+ }
101
+
102
+ function labelFor($: cheerio.CheerioAPI, element: Element): string {
103
+ const $el = $(element)
104
+ const tag = element.name.toLowerCase()
105
+
106
+ if (tag === 'a') {
107
+ const text = $el.text().replace(/\s+/g, ' ').trim()
108
+ const href = $el.attr('href') ?? ''
109
+ return text && href ? `"${truncate(text, 80)}" → ${href}` : text || href
110
+ }
111
+ if (tag === 'img') {
112
+ const alt = $el.attr('alt') ?? ''
113
+ const src = $el.attr('src') ?? ''
114
+ return alt ? `"${truncate(alt, 80)}" (${src})` : src
115
+ }
116
+ if (tag === 'input' || tag === 'select' || tag === 'textarea') {
117
+ const name = $el.attr('name') ?? ''
118
+ const type = $el.attr('type') ?? tag
119
+ const placeholder = $el.attr('placeholder') ?? ''
120
+ const parts = [
121
+ type ? `type=${type}` : '',
122
+ name ? `name=${name}` : '',
123
+ placeholder ? `placeholder="${truncate(placeholder, 40)}"` : '',
124
+ ]
125
+ return parts.filter(Boolean).join(' ')
126
+ }
127
+ if (tag === 'button' || tag === 'label' || /^h[1-6]$/.test(tag) || tag === 'th' || tag === 'td') {
128
+ return truncate($el.text().replace(/\s+/g, ' ').trim(), 120)
129
+ }
130
+ return ''
131
+ }
132
+
133
+ function truncate(text: string, max: number): string {
134
+ return text.length > max ? `${text.slice(0, max - 1)}…` : text
135
+ }
@@ -0,0 +1,281 @@
1
+ import { Type } from '@mariozechner/pi-ai'
2
+ import { defineTool } from '@mariozechner/pi-coding-agent'
3
+
4
+ import { fetchWithLimits, normalizeUrl, parseMimeType, WebfetchError } from './fetch'
5
+ import { applyGrep, GrepError } from './strategies/grep'
6
+ import { applyJq, JqError } from './strategies/jq'
7
+ import { applyRaw } from './strategies/raw'
8
+ import { applyReadability } from './strategies/readability'
9
+ import { applySelector, SelectorError } from './strategies/selector'
10
+ import { applySnapshot } from './strategies/snapshot'
11
+ import {
12
+ type CompactionStrategy,
13
+ DEFAULT_TIMEOUT_SECONDS,
14
+ MAX_TIMEOUT_SECONDS,
15
+ OUTPUT_CAPS,
16
+ type WebfetchDetails,
17
+ } from './types'
18
+
19
+ const STRATEGY_VALUES = ['readability', 'jq', 'selector', 'grep', 'snapshot', 'raw'] as const
20
+
21
+ export const webfetchTool = defineTool({
22
+ name: 'webfetch',
23
+ label: 'Web Fetch',
24
+ description:
25
+ 'Fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy. ' +
26
+ 'Use this when the user references a specific URL or when websearch surfaced a result you need to read in full. ' +
27
+ 'Strategy guide:\n' +
28
+ '- "readability": extract article content as markdown (blogs, docs, news). Default for HTML.\n' +
29
+ '- "jq": query JSON APIs (npm registry, GitHub API). Pass `query` (e.g. ".items[].name").\n' +
30
+ '- "selector": extract text from elements matching a CSS selector. Pass `selector` (e.g. ".price").\n' +
31
+ '- "grep": filter lines by regex with optional `before`/`after` context. Pass `pattern`.\n' +
32
+ '- "snapshot": indented semantic tree of the page (forms, headings, links).\n' +
33
+ '- "raw": no processing.\n' +
34
+ 'If `strategy` is omitted, it is inferred from content-type. JSON responses require explicit `strategy: "jq"` (or "raw"). ' +
35
+ 'No SSRF protection is applied; do not use on untrusted user-supplied URLs without an outer guard.',
36
+ parameters: Type.Object({
37
+ url: Type.String({
38
+ description: 'URL to fetch (http:// or https://). Bare hostnames are rewritten to https://.',
39
+ }),
40
+ strategy: Type.Optional(
41
+ Type.Union(
42
+ STRATEGY_VALUES.map((value) => Type.Literal(value)),
43
+ { description: 'How to compact the response. If omitted, auto-detected from content-type.' },
44
+ ),
45
+ ),
46
+ query: Type.Optional(Type.String({ description: 'jq query (required when strategy="jq")' })),
47
+ selector: Type.Optional(Type.String({ description: 'CSS selector (required when strategy="selector")' })),
48
+ pattern: Type.Optional(Type.String({ description: 'Regex pattern (required when strategy="grep")' })),
49
+ before: Type.Optional(Type.Integer({ description: 'grep -B context lines', minimum: 0 })),
50
+ after: Type.Optional(Type.Integer({ description: 'grep -A context lines', minimum: 0 })),
51
+ limit: Type.Optional(Type.Integer({ description: 'grep: max result lines (default 100)', minimum: 1 })),
52
+ offset: Type.Optional(Type.Integer({ description: 'grep: pagination offset', minimum: 0 })),
53
+ timeout: Type.Optional(
54
+ Type.Integer({
55
+ description: `Timeout in seconds (default ${DEFAULT_TIMEOUT_SECONDS}, max ${MAX_TIMEOUT_SECONDS}).`,
56
+ minimum: 1,
57
+ maximum: MAX_TIMEOUT_SECONDS,
58
+ }),
59
+ ),
60
+ }),
61
+
62
+ async execute(_toolCallId, params, signal) {
63
+ const startedAt = Date.now()
64
+ const inputUrl = params.url
65
+
66
+ let normalizedUrl: string
67
+ try {
68
+ normalizedUrl = normalizeUrl(inputUrl)
69
+ } catch (error) {
70
+ const message = error instanceof WebfetchError ? error.message : `Invalid URL: ${error}`
71
+ return errorResult(inputUrl, message, { startedAt })
72
+ }
73
+
74
+ const timeout = clampTimeout(params.timeout)
75
+
76
+ let response
77
+ try {
78
+ response = await fetchWithLimits(normalizedUrl, timeout, signal)
79
+ } catch (error) {
80
+ const message = error instanceof Error ? error.message : String(error)
81
+ return errorResult(normalizedUrl, message, { startedAt, finalUrl: normalizedUrl })
82
+ }
83
+
84
+ const mime = parseMimeType(response.contentType)
85
+ const resolved = resolveStrategy(params.strategy, mime)
86
+ if (resolved.kind === 'error') {
87
+ return errorResult(normalizedUrl, resolved.message, {
88
+ startedAt,
89
+ finalUrl: response.finalUrl,
90
+ contentType: response.contentType,
91
+ httpStatus: response.httpStatus,
92
+ bytesIn: response.bytesIn,
93
+ })
94
+ }
95
+ const strategy = resolved.strategy
96
+ const autoDetected = resolved.autoDetected
97
+
98
+ const validation = validateStrategyArgs(strategy, params)
99
+ if (validation) {
100
+ return errorResult(normalizedUrl, validation, {
101
+ startedAt,
102
+ finalUrl: response.finalUrl,
103
+ contentType: response.contentType,
104
+ httpStatus: response.httpStatus,
105
+ bytesIn: response.bytesIn,
106
+ strategy,
107
+ autoDetected,
108
+ })
109
+ }
110
+
111
+ let output: string
112
+ try {
113
+ output = await runStrategy(strategy, response.body, response.finalUrl, params)
114
+ } catch (error) {
115
+ const message = error instanceof Error ? error.message : String(error)
116
+ return errorResult(normalizedUrl, message, {
117
+ startedAt,
118
+ finalUrl: response.finalUrl,
119
+ contentType: response.contentType,
120
+ httpStatus: response.httpStatus,
121
+ bytesIn: response.bytesIn,
122
+ strategy,
123
+ autoDetected,
124
+ })
125
+ }
126
+
127
+ const capped = capOutput(output, strategy)
128
+ const details: WebfetchDetails = {
129
+ url: normalizedUrl,
130
+ finalUrl: response.finalUrl,
131
+ strategy,
132
+ autoDetected,
133
+ contentType: response.contentType,
134
+ httpStatus: response.httpStatus,
135
+ bytesIn: response.bytesIn,
136
+ bytesOut: byteLength(capped.text),
137
+ truncated: capped.truncated,
138
+ durationMs: Date.now() - startedAt,
139
+ }
140
+
141
+ return {
142
+ content: [{ type: 'text' as const, text: capped.text }],
143
+ details,
144
+ }
145
+ },
146
+ })
147
+
148
+ type WebfetchParams = {
149
+ url: string
150
+ strategy?: CompactionStrategy
151
+ query?: string
152
+ selector?: string
153
+ pattern?: string
154
+ before?: number
155
+ after?: number
156
+ limit?: number
157
+ offset?: number
158
+ timeout?: number
159
+ }
160
+
161
+ function clampTimeout(value: number | undefined): number {
162
+ if (value === undefined) return DEFAULT_TIMEOUT_SECONDS
163
+ return Math.min(Math.max(1, Math.floor(value)), MAX_TIMEOUT_SECONDS)
164
+ }
165
+
166
+ type ResolvedStrategy =
167
+ | { kind: 'ok'; strategy: CompactionStrategy; autoDetected: boolean }
168
+ | { kind: 'error'; message: string }
169
+
170
+ function resolveStrategy(explicit: CompactionStrategy | undefined, mime: string): ResolvedStrategy {
171
+ if (explicit) return { kind: 'ok', strategy: explicit, autoDetected: false }
172
+
173
+ if (mime === 'text/html' || mime === 'application/xhtml+xml') {
174
+ return { kind: 'ok', strategy: 'readability', autoDetected: true }
175
+ }
176
+ if (mime === 'application/json' || mime.endsWith('+json')) {
177
+ return {
178
+ kind: 'error',
179
+ message: 'JSON response — pass `strategy: "jq"` with a `query`, or `strategy: "raw"` to get it untransformed.',
180
+ }
181
+ }
182
+ return { kind: 'ok', strategy: 'raw', autoDetected: true }
183
+ }
184
+
185
+ function validateStrategyArgs(strategy: CompactionStrategy, params: WebfetchParams): string | null {
186
+ if (strategy === 'jq' && !params.query) return 'Missing required arg `query` for strategy "jq".'
187
+ if (strategy === 'selector' && !params.selector) return 'Missing required arg `selector` for strategy "selector".'
188
+ if (strategy === 'grep' && !params.pattern) return 'Missing required arg `pattern` for strategy "grep".'
189
+ return null
190
+ }
191
+
192
+ async function runStrategy(
193
+ strategy: CompactionStrategy,
194
+ body: string,
195
+ url: string,
196
+ params: WebfetchParams,
197
+ ): Promise<string> {
198
+ switch (strategy) {
199
+ case 'raw':
200
+ return applyRaw(body)
201
+ case 'readability':
202
+ return applyReadability(body, url)
203
+ case 'jq':
204
+ try {
205
+ return await applyJq(body, params.query ?? '')
206
+ } catch (error) {
207
+ if (error instanceof JqError) throw new Error(error.message)
208
+ throw error
209
+ }
210
+ case 'selector':
211
+ try {
212
+ return applySelector(body, params.selector ?? '')
213
+ } catch (error) {
214
+ if (error instanceof SelectorError) throw new Error(error.message)
215
+ throw error
216
+ }
217
+ case 'grep':
218
+ try {
219
+ return applyGrep(body, {
220
+ pattern: params.pattern ?? '',
221
+ before: params.before,
222
+ after: params.after,
223
+ limit: params.limit,
224
+ offset: params.offset,
225
+ })
226
+ } catch (error) {
227
+ if (error instanceof GrepError) throw new Error(error.message)
228
+ throw error
229
+ }
230
+ case 'snapshot':
231
+ return applySnapshot(body)
232
+ }
233
+ }
234
+
235
+ function capOutput(text: string, strategy: CompactionStrategy): { text: string; truncated: boolean } {
236
+ const cap = OUTPUT_CAPS[strategy]
237
+ if (byteLength(text) <= cap) return { text, truncated: false }
238
+ const head = sliceByBytes(text, cap)
239
+ const fullKb = (byteLength(text) / 1024).toFixed(1)
240
+ const shownKb = (byteLength(head) / 1024).toFixed(1)
241
+ const footer = `\n\n[Output truncated: shown ${shownKb} KB of ${fullKb} KB. Use a more specific strategy or a tighter pattern.]`
242
+ return { text: `${head}${footer}`, truncated: true }
243
+ }
244
+
245
+ function errorResult(
246
+ url: string,
247
+ message: string,
248
+ partial: Partial<WebfetchDetails> & { startedAt: number },
249
+ ): { content: [{ type: 'text'; text: string }]; details: WebfetchDetails } {
250
+ const { startedAt, ...rest } = partial
251
+ const details: WebfetchDetails = {
252
+ url,
253
+ finalUrl: rest.finalUrl ?? url,
254
+ strategy: rest.strategy ?? 'none',
255
+ autoDetected: rest.autoDetected ?? false,
256
+ contentType: rest.contentType ?? '',
257
+ httpStatus: rest.httpStatus ?? 0,
258
+ bytesIn: rest.bytesIn ?? 0,
259
+ bytesOut: byteLength(message),
260
+ truncated: false,
261
+ durationMs: Date.now() - startedAt,
262
+ error: true,
263
+ message,
264
+ }
265
+ return {
266
+ content: [{ type: 'text' as const, text: message }],
267
+ details,
268
+ }
269
+ }
270
+
271
+ const ENCODER = new TextEncoder()
272
+
273
+ function byteLength(text: string): number {
274
+ return ENCODER.encode(text).byteLength
275
+ }
276
+
277
+ function sliceByBytes(text: string, maxBytes: number): string {
278
+ const encoded = ENCODER.encode(text)
279
+ if (encoded.byteLength <= maxBytes) return text
280
+ return new TextDecoder('utf-8', { fatal: false }).decode(encoded.slice(0, maxBytes))
281
+ }
@@ -0,0 +1,33 @@
1
+ export type CompactionStrategy = 'readability' | 'jq' | 'selector' | 'grep' | 'snapshot' | 'raw'
2
+
3
+ export type WebfetchDetails = {
4
+ url: string
5
+ finalUrl: string
6
+ strategy: CompactionStrategy | 'none'
7
+ autoDetected: boolean
8
+ contentType: string
9
+ httpStatus: number
10
+ bytesIn: number
11
+ bytesOut: number
12
+ truncated: boolean
13
+ durationMs: number
14
+ error?: boolean
15
+ message?: string
16
+ }
17
+
18
+ // Per-strategy output caps. Web pages are huge; aggressive caps keep the model's
19
+ // context lean. Lifted from oh-my-openagent PR #434's lesson that the default
20
+ // 50k-token cap is too generous for fetched content.
21
+ export const OUTPUT_CAPS: Record<CompactionStrategy, number> = {
22
+ raw: 100_000,
23
+ jq: 50_000,
24
+ readability: 200_000,
25
+ selector: 100_000,
26
+ grep: 100_000,
27
+ snapshot: 50_000,
28
+ }
29
+
30
+ export const MAX_RESPONSE_BYTES = 5 * 1024 * 1024
31
+
32
+ export const DEFAULT_TIMEOUT_SECONDS = 30
33
+ export const MAX_TIMEOUT_SECONDS = 120
@@ -0,0 +1,96 @@
1
+ import { Type } from '@mariozechner/pi-ai'
2
+ import { defineTool } from '@mariozechner/pi-coding-agent'
3
+
4
+ import { ddgSearch, DdgCaptchaError, type DdgResult } from './ddg'
5
+ import { wikipediaSearch, type WikipediaResult } from './wikipedia'
6
+
7
+ const DEFAULT_LIMIT = 10
8
+ const MAX_LIMIT = 20
9
+
10
+ type WebsearchDetails = {
11
+ query: string
12
+ source: 'web' | 'wikipedia' | 'none'
13
+ count: number
14
+ results: (DdgResult | WikipediaResult)[]
15
+ error?: boolean
16
+ message?: string
17
+ }
18
+
19
+ export const websearchTool = defineTool({
20
+ name: 'websearch',
21
+ label: 'Web Search',
22
+ description:
23
+ 'Search the public web. Returns a ranked list of {title, url, snippet} entries. Use `source: "wikipedia"` for encyclopedic lookups; otherwise default to general web results from DuckDuckGo. Pair this with the `read` tool by visiting URLs you find with `bash` (curl) when you need full page contents.',
24
+ parameters: Type.Object({
25
+ query: Type.String({ description: 'The search query.' }),
26
+ limit: Type.Optional(
27
+ Type.Integer({
28
+ description: `Max number of results to return (1-${MAX_LIMIT}, default ${DEFAULT_LIMIT}).`,
29
+ minimum: 1,
30
+ maximum: MAX_LIMIT,
31
+ }),
32
+ ),
33
+ source: Type.Optional(
34
+ Type.Union([Type.Literal('web'), Type.Literal('wikipedia')], {
35
+ description: 'Which engine to query. Defaults to "web" (DuckDuckGo).',
36
+ }),
37
+ ),
38
+ }),
39
+
40
+ async execute(_toolCallId, params, signal) {
41
+ const query = params.query.trim()
42
+ if (!query) {
43
+ return errorResult('Query is empty.')
44
+ }
45
+ const limit = clampLimit(params.limit)
46
+ const source = params.source ?? 'web'
47
+
48
+ try {
49
+ const results =
50
+ source === 'wikipedia' ? await wikipediaSearch(query, limit, signal) : await ddgSearch(query, limit, signal)
51
+ return successResult(query, source, results)
52
+ } catch (error) {
53
+ if (error instanceof DdgCaptchaError) {
54
+ return errorResult(error.message)
55
+ }
56
+ const message = error instanceof Error ? error.message : String(error)
57
+ return errorResult(`Search failed: ${message}`)
58
+ }
59
+ },
60
+ })
61
+
62
+ function clampLimit(value: number | undefined): number {
63
+ if (value === undefined) return DEFAULT_LIMIT
64
+ return Math.min(Math.max(1, Math.floor(value)), MAX_LIMIT)
65
+ }
66
+
67
+ function successResult(query: string, source: 'web' | 'wikipedia', results: DdgResult[] | WikipediaResult[]) {
68
+ const details: WebsearchDetails = { query, source, count: results.length, results }
69
+ if (results.length === 0) {
70
+ return {
71
+ content: [{ type: 'text' as const, text: `No results for "${query}" on ${source}.` }],
72
+ details,
73
+ }
74
+ }
75
+
76
+ const lines = [`Search results for "${query}" (${source}, ${results.length}):`, '']
77
+ results.forEach((result, index) => {
78
+ lines.push(`${index + 1}. ${result.title}`)
79
+ lines.push(` ${result.url}`)
80
+ if (result.snippet) lines.push(` ${result.snippet}`)
81
+ lines.push('')
82
+ })
83
+
84
+ return {
85
+ content: [{ type: 'text' as const, text: lines.join('\n').trimEnd() }],
86
+ details,
87
+ }
88
+ }
89
+
90
+ function errorResult(message: string) {
91
+ const details: WebsearchDetails = { query: '', source: 'none', count: 0, results: [], error: true, message }
92
+ return {
93
+ content: [{ type: 'text' as const, text: message }],
94
+ details,
95
+ }
96
+ }
@@ -0,0 +1,52 @@
1
+ // Wikipedia OpenSearch API: free, official, no key, no rate limit on the free tier.
2
+ // Returns a JSON tuple [query, titles[], descriptions[], urls[]]. Descriptions are
3
+ // usually empty strings, so we don't expose them.
4
+
5
+ const OPENSEARCH_URL = 'https://en.wikipedia.org/w/api.php'
6
+
7
+ export type WikipediaResult = {
8
+ title: string
9
+ url: string
10
+ snippet: string
11
+ }
12
+
13
+ export async function wikipediaSearch(query: string, limit: number, signal?: AbortSignal): Promise<WikipediaResult[]> {
14
+ const params = new URLSearchParams({
15
+ action: 'opensearch',
16
+ search: query,
17
+ limit: String(limit),
18
+ format: 'json',
19
+ namespace: '0',
20
+ })
21
+ const response = await fetch(`${OPENSEARCH_URL}?${params.toString()}`, {
22
+ headers: {
23
+ 'User-Agent': 'TypeClaw/0.1 (https://github.com/devxoul/typeclaw)',
24
+ Accept: 'application/json',
25
+ },
26
+ signal,
27
+ })
28
+ if (!response.ok) {
29
+ throw new Error(`Wikipedia HTTP ${response.status} ${response.statusText}`)
30
+ }
31
+ const json = (await response.json()) as unknown
32
+ return parseOpenSearch(json)
33
+ }
34
+
35
+ export function parseOpenSearch(json: unknown): WikipediaResult[] {
36
+ if (!Array.isArray(json) || json.length < 4) return []
37
+ const titles = asStringArray(json[1])
38
+ const descriptions = asStringArray(json[2])
39
+ const urls = asStringArray(json[3])
40
+ const results: WikipediaResult[] = []
41
+ for (let i = 0; i < titles.length; i++) {
42
+ const title = titles[i]
43
+ const url = urls[i]
44
+ if (!title || !url) continue
45
+ results.push({ title, url, snippet: descriptions[i] ?? '' })
46
+ }
47
+ return results
48
+ }
49
+
50
+ function asStringArray(value: unknown): string[] {
51
+ return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []
52
+ }