typeclaw 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +134 -0
- package/auth.schema.json +63 -0
- package/cron.schema.json +96 -0
- package/package.json +72 -0
- package/scripts/emit-base-dockerfile.ts +5 -0
- package/scripts/generate-schema.ts +34 -0
- package/secrets.schema.json +63 -0
- package/src/agent/auth.ts +119 -0
- package/src/agent/compaction.ts +35 -0
- package/src/agent/git-nudge.ts +95 -0
- package/src/agent/index.ts +451 -0
- package/src/agent/plugin-tools.ts +269 -0
- package/src/agent/reload-tool.ts +71 -0
- package/src/agent/self.ts +45 -0
- package/src/agent/session-origin.ts +288 -0
- package/src/agent/subagents.ts +253 -0
- package/src/agent/system-prompt.ts +68 -0
- package/src/agent/tools/channel-fetch-attachment.ts +118 -0
- package/src/agent/tools/channel-history.ts +119 -0
- package/src/agent/tools/channel-reply.ts +182 -0
- package/src/agent/tools/channel-send.ts +212 -0
- package/src/agent/tools/ddg.ts +218 -0
- package/src/agent/tools/restart.ts +122 -0
- package/src/agent/tools/stream-snapshot.ts +181 -0
- package/src/agent/tools/webfetch/fetch.ts +102 -0
- package/src/agent/tools/webfetch/index.ts +1 -0
- package/src/agent/tools/webfetch/strategies/grep.ts +70 -0
- package/src/agent/tools/webfetch/strategies/jq.ts +31 -0
- package/src/agent/tools/webfetch/strategies/raw.ts +3 -0
- package/src/agent/tools/webfetch/strategies/readability.ts +30 -0
- package/src/agent/tools/webfetch/strategies/selector.ts +41 -0
- package/src/agent/tools/webfetch/strategies/snapshot.ts +135 -0
- package/src/agent/tools/webfetch/tool.ts +281 -0
- package/src/agent/tools/webfetch/types.ts +33 -0
- package/src/agent/tools/websearch.ts +96 -0
- package/src/agent/tools/wikipedia.ts +52 -0
- package/src/bundled-plugins/agent-browser/dashboard-discovery.ts +170 -0
- package/src/bundled-plugins/agent-browser/dashboard-proxy.ts +421 -0
- package/src/bundled-plugins/agent-browser/index.ts +179 -0
- package/src/bundled-plugins/agent-browser/shim-install.ts +158 -0
- package/src/bundled-plugins/agent-browser/shim.ts +152 -0
- package/src/bundled-plugins/agent-browser/skills/agent-browser/SKILL.md +113 -0
- package/src/bundled-plugins/guard/index.ts +26 -0
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +98 -0
- package/src/bundled-plugins/guard/policies/skill-authoring.ts +185 -0
- package/src/bundled-plugins/guard/policies/uncommitted-changes.ts +85 -0
- package/src/bundled-plugins/guard/policy.ts +18 -0
- package/src/bundled-plugins/memory/README.md +71 -0
- package/src/bundled-plugins/memory/append-tool.ts +84 -0
- package/src/bundled-plugins/memory/dreaming-state.ts +86 -0
- package/src/bundled-plugins/memory/dreaming.ts +470 -0
- package/src/bundled-plugins/memory/fragment-parser.ts +67 -0
- package/src/bundled-plugins/memory/index.ts +238 -0
- package/src/bundled-plugins/memory/load-memory.ts +122 -0
- package/src/bundled-plugins/memory/memory-logger.ts +257 -0
- package/src/bundled-plugins/memory/secret-detector.ts +49 -0
- package/src/bundled-plugins/memory/watermark.ts +15 -0
- package/src/bundled-plugins/security/index.ts +35 -0
- package/src/bundled-plugins/security/policies/git-exfil.ts +120 -0
- package/src/bundled-plugins/security/policies/outbound-secret-scan.ts +167 -0
- package/src/bundled-plugins/security/policies/prompt-injection.ts +488 -0
- package/src/bundled-plugins/security/policies/secret-exfil-bash.ts +99 -0
- package/src/bundled-plugins/security/policies/secret-exfil-read.ts +127 -0
- package/src/bundled-plugins/security/policies/session-search-secrets.ts +86 -0
- package/src/bundled-plugins/security/policies/ssrf.ts +196 -0
- package/src/bundled-plugins/security/policies/system-prompt-leak.ts +81 -0
- package/src/bundled-plugins/security/policy.ts +9 -0
- package/src/channels/adapters/discord-bot-channel-resolver.ts +77 -0
- package/src/channels/adapters/discord-bot-classify.ts +148 -0
- package/src/channels/adapters/discord-bot.ts +640 -0
- package/src/channels/adapters/kakaotalk-author-resolver.ts +78 -0
- package/src/channels/adapters/kakaotalk-channel-resolver.ts +105 -0
- package/src/channels/adapters/kakaotalk-classify.ts +77 -0
- package/src/channels/adapters/kakaotalk.ts +622 -0
- package/src/channels/adapters/slack-bot-author-resolver.ts +80 -0
- package/src/channels/adapters/slack-bot-channel-resolver.ts +84 -0
- package/src/channels/adapters/slack-bot-classify.ts +213 -0
- package/src/channels/adapters/slack-bot-dedupe.ts +51 -0
- package/src/channels/adapters/slack-bot-time.ts +10 -0
- package/src/channels/adapters/slack-bot.ts +881 -0
- package/src/channels/adapters/telegram-bot-classify.ts +155 -0
- package/src/channels/adapters/telegram-bot-format.ts +309 -0
- package/src/channels/adapters/telegram-bot.ts +604 -0
- package/src/channels/engagement.ts +227 -0
- package/src/channels/index.ts +21 -0
- package/src/channels/manager.ts +292 -0
- package/src/channels/membership-cache.ts +116 -0
- package/src/channels/membership-from-history.ts +53 -0
- package/src/channels/membership.ts +30 -0
- package/src/channels/participants.ts +47 -0
- package/src/channels/persistence.ts +209 -0
- package/src/channels/reloadable.ts +28 -0
- package/src/channels/router.ts +1570 -0
- package/src/channels/schema.ts +273 -0
- package/src/channels/types.ts +160 -0
- package/src/cli/channel.ts +403 -0
- package/src/cli/compose-status.ts +95 -0
- package/src/cli/compose.ts +240 -0
- package/src/cli/hostd.ts +163 -0
- package/src/cli/index.ts +27 -0
- package/src/cli/init.ts +592 -0
- package/src/cli/logs.ts +38 -0
- package/src/cli/reload.ts +68 -0
- package/src/cli/restart.ts +66 -0
- package/src/cli/run.ts +77 -0
- package/src/cli/shell.ts +33 -0
- package/src/cli/start.ts +57 -0
- package/src/cli/status.ts +178 -0
- package/src/cli/stop.ts +31 -0
- package/src/cli/tui.ts +35 -0
- package/src/cli/ui.ts +110 -0
- package/src/commands/index.ts +74 -0
- package/src/compose/discover.ts +43 -0
- package/src/compose/index.ts +25 -0
- package/src/compose/logs.ts +162 -0
- package/src/compose/restart.ts +69 -0
- package/src/compose/start.ts +62 -0
- package/src/compose/status.ts +28 -0
- package/src/compose/stop.ts +43 -0
- package/src/config/config.ts +424 -0
- package/src/config/index.ts +25 -0
- package/src/config/providers.ts +234 -0
- package/src/config/reloadable.ts +47 -0
- package/src/container/index.ts +27 -0
- package/src/container/logs.ts +37 -0
- package/src/container/port.ts +137 -0
- package/src/container/shared.ts +290 -0
- package/src/container/shell.ts +58 -0
- package/src/container/start.ts +670 -0
- package/src/container/status.ts +76 -0
- package/src/container/stop.ts +120 -0
- package/src/container/verify-running.ts +149 -0
- package/src/cron/consumer.ts +138 -0
- package/src/cron/index.ts +54 -0
- package/src/cron/reloadable.ts +64 -0
- package/src/cron/scheduler.ts +200 -0
- package/src/cron/schema.ts +96 -0
- package/src/hostd/client.ts +113 -0
- package/src/hostd/daemon.ts +587 -0
- package/src/hostd/index.ts +25 -0
- package/src/hostd/paths.ts +82 -0
- package/src/hostd/portbroker-manager.ts +101 -0
- package/src/hostd/protocol.ts +48 -0
- package/src/hostd/spawn.ts +224 -0
- package/src/hostd/supervisor.ts +60 -0
- package/src/hostd/tailscale.ts +172 -0
- package/src/hostd/version.ts +115 -0
- package/src/init/dockerfile.ts +327 -0
- package/src/init/ensure-deps.ts +152 -0
- package/src/init/gitignore.ts +46 -0
- package/src/init/hatching.ts +60 -0
- package/src/init/index.ts +786 -0
- package/src/init/kakaotalk-auth.ts +114 -0
- package/src/init/models-dev.ts +130 -0
- package/src/init/oauth-login.ts +74 -0
- package/src/init/packagejson.ts +94 -0
- package/src/init/paths.ts +2 -0
- package/src/init/run-bun-install.ts +20 -0
- package/src/markdown/chunk.ts +299 -0
- package/src/markdown/index.ts +1 -0
- package/src/plugin/context.ts +40 -0
- package/src/plugin/define.ts +35 -0
- package/src/plugin/hooks.ts +204 -0
- package/src/plugin/index.ts +63 -0
- package/src/plugin/loader.ts +111 -0
- package/src/plugin/manager.ts +136 -0
- package/src/plugin/registry.ts +145 -0
- package/src/plugin/skills.ts +62 -0
- package/src/plugin/types.ts +172 -0
- package/src/portbroker/bind-with-forward.ts +102 -0
- package/src/portbroker/container-server.ts +305 -0
- package/src/portbroker/forward-result-bus.ts +36 -0
- package/src/portbroker/hostd-client.ts +443 -0
- package/src/portbroker/index.ts +33 -0
- package/src/portbroker/policy.ts +24 -0
- package/src/portbroker/proc-net-tcp.ts +72 -0
- package/src/portbroker/protocol.ts +39 -0
- package/src/reload/client.ts +59 -0
- package/src/reload/index.ts +3 -0
- package/src/reload/registry.ts +60 -0
- package/src/reload/types.ts +13 -0
- package/src/run/bundled-plugins.ts +24 -0
- package/src/run/channel-session-factory.ts +105 -0
- package/src/run/index.ts +432 -0
- package/src/run/plugin-runtime.ts +43 -0
- package/src/run/schema-with-plugins.ts +14 -0
- package/src/secrets/index.ts +13 -0
- package/src/secrets/migrate.ts +95 -0
- package/src/secrets/schema.ts +75 -0
- package/src/secrets/storage.ts +231 -0
- package/src/server/index.ts +436 -0
- package/src/sessions/index.ts +23 -0
- package/src/shared/index.ts +9 -0
- package/src/shared/local-time.ts +21 -0
- package/src/shared/protocol.ts +25 -0
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +87 -0
- package/src/skills/typeclaw-channel-telegram-bot/SKILL.md +64 -0
- package/src/skills/typeclaw-config/SKILL.md +643 -0
- package/src/skills/typeclaw-cron/SKILL.md +159 -0
- package/src/skills/typeclaw-git/SKILL.md +89 -0
- package/src/skills/typeclaw-memory/SKILL.md +174 -0
- package/src/skills/typeclaw-monorepo/SKILL.md +175 -0
- package/src/skills/typeclaw-plugins/SKILL.md +594 -0
- package/src/skills/typeclaw-skills/SKILL.md +246 -0
- package/src/stream/broker.ts +161 -0
- package/src/stream/index.ts +16 -0
- package/src/stream/types.ts +69 -0
- package/src/tui/client.ts +45 -0
- package/src/tui/format.ts +317 -0
- package/src/tui/index.ts +225 -0
- package/src/tui/theme.ts +41 -0
- package/typeclaw.schema.json +826 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import * as cheerio from 'cheerio'
|
|
2
|
+
import type { AnyNode, Element } from 'domhandler'
|
|
3
|
+
|
|
4
|
+
const SEMANTIC_TAGS = new Set([
|
|
5
|
+
'header',
|
|
6
|
+
'nav',
|
|
7
|
+
'main',
|
|
8
|
+
'aside',
|
|
9
|
+
'footer',
|
|
10
|
+
'section',
|
|
11
|
+
'article',
|
|
12
|
+
'h1',
|
|
13
|
+
'h2',
|
|
14
|
+
'h3',
|
|
15
|
+
'h4',
|
|
16
|
+
'h5',
|
|
17
|
+
'h6',
|
|
18
|
+
'form',
|
|
19
|
+
'input',
|
|
20
|
+
'button',
|
|
21
|
+
'select',
|
|
22
|
+
'textarea',
|
|
23
|
+
'label',
|
|
24
|
+
'a',
|
|
25
|
+
'img',
|
|
26
|
+
'ul',
|
|
27
|
+
'ol',
|
|
28
|
+
'li',
|
|
29
|
+
'table',
|
|
30
|
+
'thead',
|
|
31
|
+
'tbody',
|
|
32
|
+
'tr',
|
|
33
|
+
'th',
|
|
34
|
+
'td',
|
|
35
|
+
])
|
|
36
|
+
|
|
37
|
+
const ROLE_FOR_TAG: Record<string, string> = {
|
|
38
|
+
h1: 'heading',
|
|
39
|
+
h2: 'heading',
|
|
40
|
+
h3: 'heading',
|
|
41
|
+
h4: 'heading',
|
|
42
|
+
h5: 'heading',
|
|
43
|
+
h6: 'heading',
|
|
44
|
+
a: 'link',
|
|
45
|
+
button: 'button',
|
|
46
|
+
input: 'input',
|
|
47
|
+
select: 'select',
|
|
48
|
+
textarea: 'textarea',
|
|
49
|
+
img: 'image',
|
|
50
|
+
form: 'form',
|
|
51
|
+
nav: 'navigation',
|
|
52
|
+
header: 'banner',
|
|
53
|
+
footer: 'contentinfo',
|
|
54
|
+
main: 'main',
|
|
55
|
+
aside: 'complementary',
|
|
56
|
+
section: 'section',
|
|
57
|
+
article: 'article',
|
|
58
|
+
ul: 'list',
|
|
59
|
+
ol: 'list',
|
|
60
|
+
li: 'listitem',
|
|
61
|
+
table: 'table',
|
|
62
|
+
tr: 'row',
|
|
63
|
+
th: 'columnheader',
|
|
64
|
+
td: 'cell',
|
|
65
|
+
label: 'label',
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function applySnapshot(html: string): string {
|
|
69
|
+
const $ = cheerio.load(html)
|
|
70
|
+
const lines: string[] = []
|
|
71
|
+
const body = $('body').get(0)
|
|
72
|
+
const roots: AnyNode[] = body ? [body] : ($.root().get(0)?.children ?? [])
|
|
73
|
+
for (const root of roots) {
|
|
74
|
+
walk($, root, 0, lines)
|
|
75
|
+
}
|
|
76
|
+
return lines.length > 0 ? lines.join('\n') : 'Page contains no semantic structure.'
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function isElement(node: AnyNode): node is Element {
|
|
80
|
+
return node.type === 'tag'
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function walk($: cheerio.CheerioAPI, node: AnyNode, depth: number, out: string[]): void {
|
|
84
|
+
if (!isElement(node)) return
|
|
85
|
+
|
|
86
|
+
const tag = node.name.toLowerCase()
|
|
87
|
+
let nextDepth = depth
|
|
88
|
+
|
|
89
|
+
if (SEMANTIC_TAGS.has(tag)) {
|
|
90
|
+
const role = ROLE_FOR_TAG[tag] ?? tag
|
|
91
|
+
const label = labelFor($, node)
|
|
92
|
+
const indent = ' '.repeat(depth)
|
|
93
|
+
out.push(label ? `${indent}- ${role}: ${label}` : `${indent}- ${role}`)
|
|
94
|
+
nextDepth = depth + 1
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
for (const child of node.children) {
|
|
98
|
+
walk($, child, nextDepth, out)
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function labelFor($: cheerio.CheerioAPI, element: Element): string {
|
|
103
|
+
const $el = $(element)
|
|
104
|
+
const tag = element.name.toLowerCase()
|
|
105
|
+
|
|
106
|
+
if (tag === 'a') {
|
|
107
|
+
const text = $el.text().replace(/\s+/g, ' ').trim()
|
|
108
|
+
const href = $el.attr('href') ?? ''
|
|
109
|
+
return text && href ? `"${truncate(text, 80)}" → ${href}` : text || href
|
|
110
|
+
}
|
|
111
|
+
if (tag === 'img') {
|
|
112
|
+
const alt = $el.attr('alt') ?? ''
|
|
113
|
+
const src = $el.attr('src') ?? ''
|
|
114
|
+
return alt ? `"${truncate(alt, 80)}" (${src})` : src
|
|
115
|
+
}
|
|
116
|
+
if (tag === 'input' || tag === 'select' || tag === 'textarea') {
|
|
117
|
+
const name = $el.attr('name') ?? ''
|
|
118
|
+
const type = $el.attr('type') ?? tag
|
|
119
|
+
const placeholder = $el.attr('placeholder') ?? ''
|
|
120
|
+
const parts = [
|
|
121
|
+
type ? `type=${type}` : '',
|
|
122
|
+
name ? `name=${name}` : '',
|
|
123
|
+
placeholder ? `placeholder="${truncate(placeholder, 40)}"` : '',
|
|
124
|
+
]
|
|
125
|
+
return parts.filter(Boolean).join(' ')
|
|
126
|
+
}
|
|
127
|
+
if (tag === 'button' || tag === 'label' || /^h[1-6]$/.test(tag) || tag === 'th' || tag === 'td') {
|
|
128
|
+
return truncate($el.text().replace(/\s+/g, ' ').trim(), 120)
|
|
129
|
+
}
|
|
130
|
+
return ''
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function truncate(text: string, max: number): string {
|
|
134
|
+
return text.length > max ? `${text.slice(0, max - 1)}…` : text
|
|
135
|
+
}
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import { Type } from '@mariozechner/pi-ai'
|
|
2
|
+
import { defineTool } from '@mariozechner/pi-coding-agent'
|
|
3
|
+
|
|
4
|
+
import { fetchWithLimits, normalizeUrl, parseMimeType, WebfetchError } from './fetch'
|
|
5
|
+
import { applyGrep, GrepError } from './strategies/grep'
|
|
6
|
+
import { applyJq, JqError } from './strategies/jq'
|
|
7
|
+
import { applyRaw } from './strategies/raw'
|
|
8
|
+
import { applyReadability } from './strategies/readability'
|
|
9
|
+
import { applySelector, SelectorError } from './strategies/selector'
|
|
10
|
+
import { applySnapshot } from './strategies/snapshot'
|
|
11
|
+
import {
|
|
12
|
+
type CompactionStrategy,
|
|
13
|
+
DEFAULT_TIMEOUT_SECONDS,
|
|
14
|
+
MAX_TIMEOUT_SECONDS,
|
|
15
|
+
OUTPUT_CAPS,
|
|
16
|
+
type WebfetchDetails,
|
|
17
|
+
} from './types'
|
|
18
|
+
|
|
19
|
+
const STRATEGY_VALUES = ['readability', 'jq', 'selector', 'grep', 'snapshot', 'raw'] as const
|
|
20
|
+
|
|
21
|
+
export const webfetchTool = defineTool({
|
|
22
|
+
name: 'webfetch',
|
|
23
|
+
label: 'Web Fetch',
|
|
24
|
+
description:
|
|
25
|
+
'Fetch a single HTTP(S) URL and return the body, optionally compacted by a strategy. ' +
|
|
26
|
+
'Use this when the user references a specific URL or when websearch surfaced a result you need to read in full. ' +
|
|
27
|
+
'Strategy guide:\n' +
|
|
28
|
+
'- "readability": extract article content as markdown (blogs, docs, news). Default for HTML.\n' +
|
|
29
|
+
'- "jq": query JSON APIs (npm registry, GitHub API). Pass `query` (e.g. ".items[].name").\n' +
|
|
30
|
+
'- "selector": extract text from elements matching a CSS selector. Pass `selector` (e.g. ".price").\n' +
|
|
31
|
+
'- "grep": filter lines by regex with optional `before`/`after` context. Pass `pattern`.\n' +
|
|
32
|
+
'- "snapshot": indented semantic tree of the page (forms, headings, links).\n' +
|
|
33
|
+
'- "raw": no processing.\n' +
|
|
34
|
+
'If `strategy` is omitted, it is inferred from content-type. JSON responses require explicit `strategy: "jq"` (or "raw"). ' +
|
|
35
|
+
'No SSRF protection is applied; do not use on untrusted user-supplied URLs without an outer guard.',
|
|
36
|
+
parameters: Type.Object({
|
|
37
|
+
url: Type.String({
|
|
38
|
+
description: 'URL to fetch (http:// or https://). Bare hostnames are rewritten to https://.',
|
|
39
|
+
}),
|
|
40
|
+
strategy: Type.Optional(
|
|
41
|
+
Type.Union(
|
|
42
|
+
STRATEGY_VALUES.map((value) => Type.Literal(value)),
|
|
43
|
+
{ description: 'How to compact the response. If omitted, auto-detected from content-type.' },
|
|
44
|
+
),
|
|
45
|
+
),
|
|
46
|
+
query: Type.Optional(Type.String({ description: 'jq query (required when strategy="jq")' })),
|
|
47
|
+
selector: Type.Optional(Type.String({ description: 'CSS selector (required when strategy="selector")' })),
|
|
48
|
+
pattern: Type.Optional(Type.String({ description: 'Regex pattern (required when strategy="grep")' })),
|
|
49
|
+
before: Type.Optional(Type.Integer({ description: 'grep -B context lines', minimum: 0 })),
|
|
50
|
+
after: Type.Optional(Type.Integer({ description: 'grep -A context lines', minimum: 0 })),
|
|
51
|
+
limit: Type.Optional(Type.Integer({ description: 'grep: max result lines (default 100)', minimum: 1 })),
|
|
52
|
+
offset: Type.Optional(Type.Integer({ description: 'grep: pagination offset', minimum: 0 })),
|
|
53
|
+
timeout: Type.Optional(
|
|
54
|
+
Type.Integer({
|
|
55
|
+
description: `Timeout in seconds (default ${DEFAULT_TIMEOUT_SECONDS}, max ${MAX_TIMEOUT_SECONDS}).`,
|
|
56
|
+
minimum: 1,
|
|
57
|
+
maximum: MAX_TIMEOUT_SECONDS,
|
|
58
|
+
}),
|
|
59
|
+
),
|
|
60
|
+
}),
|
|
61
|
+
|
|
62
|
+
async execute(_toolCallId, params, signal) {
|
|
63
|
+
const startedAt = Date.now()
|
|
64
|
+
const inputUrl = params.url
|
|
65
|
+
|
|
66
|
+
let normalizedUrl: string
|
|
67
|
+
try {
|
|
68
|
+
normalizedUrl = normalizeUrl(inputUrl)
|
|
69
|
+
} catch (error) {
|
|
70
|
+
const message = error instanceof WebfetchError ? error.message : `Invalid URL: ${error}`
|
|
71
|
+
return errorResult(inputUrl, message, { startedAt })
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const timeout = clampTimeout(params.timeout)
|
|
75
|
+
|
|
76
|
+
let response
|
|
77
|
+
try {
|
|
78
|
+
response = await fetchWithLimits(normalizedUrl, timeout, signal)
|
|
79
|
+
} catch (error) {
|
|
80
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
81
|
+
return errorResult(normalizedUrl, message, { startedAt, finalUrl: normalizedUrl })
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const mime = parseMimeType(response.contentType)
|
|
85
|
+
const resolved = resolveStrategy(params.strategy, mime)
|
|
86
|
+
if (resolved.kind === 'error') {
|
|
87
|
+
return errorResult(normalizedUrl, resolved.message, {
|
|
88
|
+
startedAt,
|
|
89
|
+
finalUrl: response.finalUrl,
|
|
90
|
+
contentType: response.contentType,
|
|
91
|
+
httpStatus: response.httpStatus,
|
|
92
|
+
bytesIn: response.bytesIn,
|
|
93
|
+
})
|
|
94
|
+
}
|
|
95
|
+
const strategy = resolved.strategy
|
|
96
|
+
const autoDetected = resolved.autoDetected
|
|
97
|
+
|
|
98
|
+
const validation = validateStrategyArgs(strategy, params)
|
|
99
|
+
if (validation) {
|
|
100
|
+
return errorResult(normalizedUrl, validation, {
|
|
101
|
+
startedAt,
|
|
102
|
+
finalUrl: response.finalUrl,
|
|
103
|
+
contentType: response.contentType,
|
|
104
|
+
httpStatus: response.httpStatus,
|
|
105
|
+
bytesIn: response.bytesIn,
|
|
106
|
+
strategy,
|
|
107
|
+
autoDetected,
|
|
108
|
+
})
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
let output: string
|
|
112
|
+
try {
|
|
113
|
+
output = await runStrategy(strategy, response.body, response.finalUrl, params)
|
|
114
|
+
} catch (error) {
|
|
115
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
116
|
+
return errorResult(normalizedUrl, message, {
|
|
117
|
+
startedAt,
|
|
118
|
+
finalUrl: response.finalUrl,
|
|
119
|
+
contentType: response.contentType,
|
|
120
|
+
httpStatus: response.httpStatus,
|
|
121
|
+
bytesIn: response.bytesIn,
|
|
122
|
+
strategy,
|
|
123
|
+
autoDetected,
|
|
124
|
+
})
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const capped = capOutput(output, strategy)
|
|
128
|
+
const details: WebfetchDetails = {
|
|
129
|
+
url: normalizedUrl,
|
|
130
|
+
finalUrl: response.finalUrl,
|
|
131
|
+
strategy,
|
|
132
|
+
autoDetected,
|
|
133
|
+
contentType: response.contentType,
|
|
134
|
+
httpStatus: response.httpStatus,
|
|
135
|
+
bytesIn: response.bytesIn,
|
|
136
|
+
bytesOut: byteLength(capped.text),
|
|
137
|
+
truncated: capped.truncated,
|
|
138
|
+
durationMs: Date.now() - startedAt,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
content: [{ type: 'text' as const, text: capped.text }],
|
|
143
|
+
details,
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
type WebfetchParams = {
|
|
149
|
+
url: string
|
|
150
|
+
strategy?: CompactionStrategy
|
|
151
|
+
query?: string
|
|
152
|
+
selector?: string
|
|
153
|
+
pattern?: string
|
|
154
|
+
before?: number
|
|
155
|
+
after?: number
|
|
156
|
+
limit?: number
|
|
157
|
+
offset?: number
|
|
158
|
+
timeout?: number
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function clampTimeout(value: number | undefined): number {
|
|
162
|
+
if (value === undefined) return DEFAULT_TIMEOUT_SECONDS
|
|
163
|
+
return Math.min(Math.max(1, Math.floor(value)), MAX_TIMEOUT_SECONDS)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
type ResolvedStrategy =
|
|
167
|
+
| { kind: 'ok'; strategy: CompactionStrategy; autoDetected: boolean }
|
|
168
|
+
| { kind: 'error'; message: string }
|
|
169
|
+
|
|
170
|
+
function resolveStrategy(explicit: CompactionStrategy | undefined, mime: string): ResolvedStrategy {
|
|
171
|
+
if (explicit) return { kind: 'ok', strategy: explicit, autoDetected: false }
|
|
172
|
+
|
|
173
|
+
if (mime === 'text/html' || mime === 'application/xhtml+xml') {
|
|
174
|
+
return { kind: 'ok', strategy: 'readability', autoDetected: true }
|
|
175
|
+
}
|
|
176
|
+
if (mime === 'application/json' || mime.endsWith('+json')) {
|
|
177
|
+
return {
|
|
178
|
+
kind: 'error',
|
|
179
|
+
message: 'JSON response — pass `strategy: "jq"` with a `query`, or `strategy: "raw"` to get it untransformed.',
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return { kind: 'ok', strategy: 'raw', autoDetected: true }
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function validateStrategyArgs(strategy: CompactionStrategy, params: WebfetchParams): string | null {
|
|
186
|
+
if (strategy === 'jq' && !params.query) return 'Missing required arg `query` for strategy "jq".'
|
|
187
|
+
if (strategy === 'selector' && !params.selector) return 'Missing required arg `selector` for strategy "selector".'
|
|
188
|
+
if (strategy === 'grep' && !params.pattern) return 'Missing required arg `pattern` for strategy "grep".'
|
|
189
|
+
return null
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async function runStrategy(
|
|
193
|
+
strategy: CompactionStrategy,
|
|
194
|
+
body: string,
|
|
195
|
+
url: string,
|
|
196
|
+
params: WebfetchParams,
|
|
197
|
+
): Promise<string> {
|
|
198
|
+
switch (strategy) {
|
|
199
|
+
case 'raw':
|
|
200
|
+
return applyRaw(body)
|
|
201
|
+
case 'readability':
|
|
202
|
+
return applyReadability(body, url)
|
|
203
|
+
case 'jq':
|
|
204
|
+
try {
|
|
205
|
+
return await applyJq(body, params.query ?? '')
|
|
206
|
+
} catch (error) {
|
|
207
|
+
if (error instanceof JqError) throw new Error(error.message)
|
|
208
|
+
throw error
|
|
209
|
+
}
|
|
210
|
+
case 'selector':
|
|
211
|
+
try {
|
|
212
|
+
return applySelector(body, params.selector ?? '')
|
|
213
|
+
} catch (error) {
|
|
214
|
+
if (error instanceof SelectorError) throw new Error(error.message)
|
|
215
|
+
throw error
|
|
216
|
+
}
|
|
217
|
+
case 'grep':
|
|
218
|
+
try {
|
|
219
|
+
return applyGrep(body, {
|
|
220
|
+
pattern: params.pattern ?? '',
|
|
221
|
+
before: params.before,
|
|
222
|
+
after: params.after,
|
|
223
|
+
limit: params.limit,
|
|
224
|
+
offset: params.offset,
|
|
225
|
+
})
|
|
226
|
+
} catch (error) {
|
|
227
|
+
if (error instanceof GrepError) throw new Error(error.message)
|
|
228
|
+
throw error
|
|
229
|
+
}
|
|
230
|
+
case 'snapshot':
|
|
231
|
+
return applySnapshot(body)
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function capOutput(text: string, strategy: CompactionStrategy): { text: string; truncated: boolean } {
|
|
236
|
+
const cap = OUTPUT_CAPS[strategy]
|
|
237
|
+
if (byteLength(text) <= cap) return { text, truncated: false }
|
|
238
|
+
const head = sliceByBytes(text, cap)
|
|
239
|
+
const fullKb = (byteLength(text) / 1024).toFixed(1)
|
|
240
|
+
const shownKb = (byteLength(head) / 1024).toFixed(1)
|
|
241
|
+
const footer = `\n\n[Output truncated: shown ${shownKb} KB of ${fullKb} KB. Use a more specific strategy or a tighter pattern.]`
|
|
242
|
+
return { text: `${head}${footer}`, truncated: true }
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function errorResult(
|
|
246
|
+
url: string,
|
|
247
|
+
message: string,
|
|
248
|
+
partial: Partial<WebfetchDetails> & { startedAt: number },
|
|
249
|
+
): { content: [{ type: 'text'; text: string }]; details: WebfetchDetails } {
|
|
250
|
+
const { startedAt, ...rest } = partial
|
|
251
|
+
const details: WebfetchDetails = {
|
|
252
|
+
url,
|
|
253
|
+
finalUrl: rest.finalUrl ?? url,
|
|
254
|
+
strategy: rest.strategy ?? 'none',
|
|
255
|
+
autoDetected: rest.autoDetected ?? false,
|
|
256
|
+
contentType: rest.contentType ?? '',
|
|
257
|
+
httpStatus: rest.httpStatus ?? 0,
|
|
258
|
+
bytesIn: rest.bytesIn ?? 0,
|
|
259
|
+
bytesOut: byteLength(message),
|
|
260
|
+
truncated: false,
|
|
261
|
+
durationMs: Date.now() - startedAt,
|
|
262
|
+
error: true,
|
|
263
|
+
message,
|
|
264
|
+
}
|
|
265
|
+
return {
|
|
266
|
+
content: [{ type: 'text' as const, text: message }],
|
|
267
|
+
details,
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const ENCODER = new TextEncoder()
|
|
272
|
+
|
|
273
|
+
function byteLength(text: string): number {
|
|
274
|
+
return ENCODER.encode(text).byteLength
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function sliceByBytes(text: string, maxBytes: number): string {
|
|
278
|
+
const encoded = ENCODER.encode(text)
|
|
279
|
+
if (encoded.byteLength <= maxBytes) return text
|
|
280
|
+
return new TextDecoder('utf-8', { fatal: false }).decode(encoded.slice(0, maxBytes))
|
|
281
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export type CompactionStrategy = 'readability' | 'jq' | 'selector' | 'grep' | 'snapshot' | 'raw'
|
|
2
|
+
|
|
3
|
+
export type WebfetchDetails = {
|
|
4
|
+
url: string
|
|
5
|
+
finalUrl: string
|
|
6
|
+
strategy: CompactionStrategy | 'none'
|
|
7
|
+
autoDetected: boolean
|
|
8
|
+
contentType: string
|
|
9
|
+
httpStatus: number
|
|
10
|
+
bytesIn: number
|
|
11
|
+
bytesOut: number
|
|
12
|
+
truncated: boolean
|
|
13
|
+
durationMs: number
|
|
14
|
+
error?: boolean
|
|
15
|
+
message?: string
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Per-strategy output caps. Web pages are huge; aggressive caps keep the model's
|
|
19
|
+
// context lean. Lifted from oh-my-openagent PR #434's lesson that the default
|
|
20
|
+
// 50k-token cap is too generous for fetched content.
|
|
21
|
+
export const OUTPUT_CAPS: Record<CompactionStrategy, number> = {
|
|
22
|
+
raw: 100_000,
|
|
23
|
+
jq: 50_000,
|
|
24
|
+
readability: 200_000,
|
|
25
|
+
selector: 100_000,
|
|
26
|
+
grep: 100_000,
|
|
27
|
+
snapshot: 50_000,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export const MAX_RESPONSE_BYTES = 5 * 1024 * 1024
|
|
31
|
+
|
|
32
|
+
export const DEFAULT_TIMEOUT_SECONDS = 30
|
|
33
|
+
export const MAX_TIMEOUT_SECONDS = 120
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { Type } from '@mariozechner/pi-ai'
|
|
2
|
+
import { defineTool } from '@mariozechner/pi-coding-agent'
|
|
3
|
+
|
|
4
|
+
import { ddgSearch, DdgCaptchaError, type DdgResult } from './ddg'
|
|
5
|
+
import { wikipediaSearch, type WikipediaResult } from './wikipedia'
|
|
6
|
+
|
|
7
|
+
const DEFAULT_LIMIT = 10
|
|
8
|
+
const MAX_LIMIT = 20
|
|
9
|
+
|
|
10
|
+
type WebsearchDetails = {
|
|
11
|
+
query: string
|
|
12
|
+
source: 'web' | 'wikipedia' | 'none'
|
|
13
|
+
count: number
|
|
14
|
+
results: (DdgResult | WikipediaResult)[]
|
|
15
|
+
error?: boolean
|
|
16
|
+
message?: string
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export const websearchTool = defineTool({
|
|
20
|
+
name: 'websearch',
|
|
21
|
+
label: 'Web Search',
|
|
22
|
+
description:
|
|
23
|
+
'Search the public web. Returns a ranked list of {title, url, snippet} entries. Use `source: "wikipedia"` for encyclopedic lookups; otherwise default to general web results from DuckDuckGo. Pair this with the `read` tool by visiting URLs you find with `bash` (curl) when you need full page contents.',
|
|
24
|
+
parameters: Type.Object({
|
|
25
|
+
query: Type.String({ description: 'The search query.' }),
|
|
26
|
+
limit: Type.Optional(
|
|
27
|
+
Type.Integer({
|
|
28
|
+
description: `Max number of results to return (1-${MAX_LIMIT}, default ${DEFAULT_LIMIT}).`,
|
|
29
|
+
minimum: 1,
|
|
30
|
+
maximum: MAX_LIMIT,
|
|
31
|
+
}),
|
|
32
|
+
),
|
|
33
|
+
source: Type.Optional(
|
|
34
|
+
Type.Union([Type.Literal('web'), Type.Literal('wikipedia')], {
|
|
35
|
+
description: 'Which engine to query. Defaults to "web" (DuckDuckGo).',
|
|
36
|
+
}),
|
|
37
|
+
),
|
|
38
|
+
}),
|
|
39
|
+
|
|
40
|
+
async execute(_toolCallId, params, signal) {
|
|
41
|
+
const query = params.query.trim()
|
|
42
|
+
if (!query) {
|
|
43
|
+
return errorResult('Query is empty.')
|
|
44
|
+
}
|
|
45
|
+
const limit = clampLimit(params.limit)
|
|
46
|
+
const source = params.source ?? 'web'
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
const results =
|
|
50
|
+
source === 'wikipedia' ? await wikipediaSearch(query, limit, signal) : await ddgSearch(query, limit, signal)
|
|
51
|
+
return successResult(query, source, results)
|
|
52
|
+
} catch (error) {
|
|
53
|
+
if (error instanceof DdgCaptchaError) {
|
|
54
|
+
return errorResult(error.message)
|
|
55
|
+
}
|
|
56
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
57
|
+
return errorResult(`Search failed: ${message}`)
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
function clampLimit(value: number | undefined): number {
|
|
63
|
+
if (value === undefined) return DEFAULT_LIMIT
|
|
64
|
+
return Math.min(Math.max(1, Math.floor(value)), MAX_LIMIT)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function successResult(query: string, source: 'web' | 'wikipedia', results: DdgResult[] | WikipediaResult[]) {
|
|
68
|
+
const details: WebsearchDetails = { query, source, count: results.length, results }
|
|
69
|
+
if (results.length === 0) {
|
|
70
|
+
return {
|
|
71
|
+
content: [{ type: 'text' as const, text: `No results for "${query}" on ${source}.` }],
|
|
72
|
+
details,
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const lines = [`Search results for "${query}" (${source}, ${results.length}):`, '']
|
|
77
|
+
results.forEach((result, index) => {
|
|
78
|
+
lines.push(`${index + 1}. ${result.title}`)
|
|
79
|
+
lines.push(` ${result.url}`)
|
|
80
|
+
if (result.snippet) lines.push(` ${result.snippet}`)
|
|
81
|
+
lines.push('')
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
content: [{ type: 'text' as const, text: lines.join('\n').trimEnd() }],
|
|
86
|
+
details,
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function errorResult(message: string) {
|
|
91
|
+
const details: WebsearchDetails = { query: '', source: 'none', count: 0, results: [], error: true, message }
|
|
92
|
+
return {
|
|
93
|
+
content: [{ type: 'text' as const, text: message }],
|
|
94
|
+
details,
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// Wikipedia OpenSearch API: free, official, no key, no rate limit on the free tier.
|
|
2
|
+
// Returns a JSON tuple [query, titles[], descriptions[], urls[]]. Descriptions are
|
|
3
|
+
// usually empty strings, so we don't expose them.
|
|
4
|
+
|
|
5
|
+
const OPENSEARCH_URL = 'https://en.wikipedia.org/w/api.php'
|
|
6
|
+
|
|
7
|
+
export type WikipediaResult = {
|
|
8
|
+
title: string
|
|
9
|
+
url: string
|
|
10
|
+
snippet: string
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export async function wikipediaSearch(query: string, limit: number, signal?: AbortSignal): Promise<WikipediaResult[]> {
|
|
14
|
+
const params = new URLSearchParams({
|
|
15
|
+
action: 'opensearch',
|
|
16
|
+
search: query,
|
|
17
|
+
limit: String(limit),
|
|
18
|
+
format: 'json',
|
|
19
|
+
namespace: '0',
|
|
20
|
+
})
|
|
21
|
+
const response = await fetch(`${OPENSEARCH_URL}?${params.toString()}`, {
|
|
22
|
+
headers: {
|
|
23
|
+
'User-Agent': 'TypeClaw/0.1 (https://github.com/devxoul/typeclaw)',
|
|
24
|
+
Accept: 'application/json',
|
|
25
|
+
},
|
|
26
|
+
signal,
|
|
27
|
+
})
|
|
28
|
+
if (!response.ok) {
|
|
29
|
+
throw new Error(`Wikipedia HTTP ${response.status} ${response.statusText}`)
|
|
30
|
+
}
|
|
31
|
+
const json = (await response.json()) as unknown
|
|
32
|
+
return parseOpenSearch(json)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function parseOpenSearch(json: unknown): WikipediaResult[] {
|
|
36
|
+
if (!Array.isArray(json) || json.length < 4) return []
|
|
37
|
+
const titles = asStringArray(json[1])
|
|
38
|
+
const descriptions = asStringArray(json[2])
|
|
39
|
+
const urls = asStringArray(json[3])
|
|
40
|
+
const results: WikipediaResult[] = []
|
|
41
|
+
for (let i = 0; i < titles.length; i++) {
|
|
42
|
+
const title = titles[i]
|
|
43
|
+
const url = urls[i]
|
|
44
|
+
if (!title || !url) continue
|
|
45
|
+
results.push({ title, url, snippet: descriptions[i] ?? '' })
|
|
46
|
+
}
|
|
47
|
+
return results
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function asStringArray(value: unknown): string[] {
|
|
51
|
+
return Array.isArray(value) ? value.filter((item): item is string => typeof item === 'string') : []
|
|
52
|
+
}
|