@oyasmi/pipiclaw 0.5.8 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -3
- package/dist/agent/channel-runner.d.ts +5 -0
- package/dist/agent/channel-runner.js +59 -15
- package/dist/agent/prompt-builder.js +6 -0
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -1
- package/dist/memory/consolidation.js +11 -2
- package/dist/memory/session.js +2 -2
- package/dist/memory/sidecar-worker.d.ts +1 -0
- package/dist/memory/sidecar-worker.js +56 -1
- package/dist/paths.d.ts +2 -0
- package/dist/paths.js +2 -0
- package/dist/runtime/bootstrap.d.ts +2 -1
- package/dist/runtime/bootstrap.js +74 -23
- package/dist/runtime/delivery.js +56 -5
- package/dist/runtime/dingtalk.d.ts +2 -0
- package/dist/runtime/dingtalk.js +14 -7
- package/dist/runtime/events.d.ts +3 -0
- package/dist/runtime/events.js +30 -5
- package/dist/security/command-guard.js +4 -0
- package/dist/security/config.d.ts +6 -0
- package/dist/security/config.js +57 -6
- package/dist/security/network.d.ts +28 -0
- package/dist/security/network.js +246 -0
- package/dist/security/path-guard.js +4 -0
- package/dist/security/platform.d.ts +1 -0
- package/dist/security/platform.js +3 -0
- package/dist/security/types.d.ts +16 -1
- package/dist/settings.d.ts +4 -1
- package/dist/settings.js +31 -6
- package/dist/shared/config-diagnostics.d.ts +7 -0
- package/dist/shared/config-diagnostics.js +3 -0
- package/dist/subagents/discovery.d.ts +1 -1
- package/dist/subagents/discovery.js +1 -1
- package/dist/subagents/tool.d.ts +2 -0
- package/dist/subagents/tool.js +24 -2
- package/dist/tools/config.d.ts +37 -0
- package/dist/tools/config.js +170 -0
- package/dist/tools/index.d.ts +3 -0
- package/dist/tools/index.js +23 -1
- package/dist/tools/web-fetch.d.ts +17 -0
- package/dist/tools/web-fetch.js +29 -0
- package/dist/tools/web-search.d.ts +16 -0
- package/dist/tools/web-search.js +29 -0
- package/dist/web/client.d.ts +41 -0
- package/dist/web/client.js +193 -0
- package/dist/web/config.d.ts +19 -0
- package/dist/web/config.js +35 -0
- package/dist/web/extract.d.ts +7 -0
- package/dist/web/extract.js +122 -0
- package/dist/web/fetch.d.ts +23 -0
- package/dist/web/fetch.js +150 -0
- package/dist/web/format.d.ts +21 -0
- package/dist/web/format.js +38 -0
- package/dist/web/search-providers.d.ts +15 -0
- package/dist/web/search-providers.js +199 -0
- package/dist/web/search.d.ts +19 -0
- package/dist/web/search.js +52 -0
- package/package.json +9 -2
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { APP_HOME_DIR, TOOLS_CONFIG_PATH } from "../paths.js";
|
|
4
|
+
import { isRecord } from "../shared/type-guards.js";
|
|
5
|
+
const WEB_SEARCH_PROVIDERS = ["brave", "tavily", "jina", "searxng", "duckduckgo"];
|
|
6
|
+
export const DEFAULT_TOOLS_CONFIG = {
|
|
7
|
+
tools: {
|
|
8
|
+
web: {
|
|
9
|
+
enable: false,
|
|
10
|
+
proxy: null,
|
|
11
|
+
search: {
|
|
12
|
+
provider: "brave",
|
|
13
|
+
apiKey: "",
|
|
14
|
+
baseUrl: "",
|
|
15
|
+
maxResults: 5,
|
|
16
|
+
timeoutMs: 30_000,
|
|
17
|
+
},
|
|
18
|
+
fetch: {
|
|
19
|
+
maxChars: 50_000,
|
|
20
|
+
timeoutMs: 30_000,
|
|
21
|
+
maxImageBytes: 10 * 1024 * 1024,
|
|
22
|
+
maxResponseBytes: 5 * 1024 * 1024,
|
|
23
|
+
preferJina: false,
|
|
24
|
+
enableJinaFallback: false,
|
|
25
|
+
defaultExtractMode: "markdown",
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
};
|
|
30
|
+
function clampInteger(value, fallback, minimum, maximum) {
|
|
31
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
32
|
+
return fallback;
|
|
33
|
+
}
|
|
34
|
+
const normalized = Math.floor(value);
|
|
35
|
+
if (normalized < minimum) {
|
|
36
|
+
return fallback;
|
|
37
|
+
}
|
|
38
|
+
if (maximum !== undefined && normalized > maximum) {
|
|
39
|
+
return fallback;
|
|
40
|
+
}
|
|
41
|
+
return normalized;
|
|
42
|
+
}
|
|
43
|
+
function asTrimmedString(value, fallback = "") {
|
|
44
|
+
return typeof value === "string" ? value.trim() : fallback;
|
|
45
|
+
}
|
|
46
|
+
function asOptionalProxy(value) {
|
|
47
|
+
if (value === null || value === undefined) {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
if (typeof value !== "string") {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
const trimmed = value.trim();
|
|
54
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
55
|
+
}
|
|
56
|
+
function pushInvalidValueDiagnostic(diagnostics, configPath, field, message) {
|
|
57
|
+
diagnostics.push({
|
|
58
|
+
source: "tools",
|
|
59
|
+
path: configPath,
|
|
60
|
+
severity: "warning",
|
|
61
|
+
message: `${field}: ${message}`,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
function mergeToolsConfig(source, configPath, diagnostics) {
|
|
65
|
+
if (!isRecord(source)) {
|
|
66
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "root", "expected a JSON object; using defaults");
|
|
67
|
+
return DEFAULT_TOOLS_CONFIG;
|
|
68
|
+
}
|
|
69
|
+
const tools = isRecord(source.tools) ? source.tools : {};
|
|
70
|
+
const web = isRecord(tools.web) ? tools.web : {};
|
|
71
|
+
const search = isRecord(web.search) ? web.search : {};
|
|
72
|
+
const fetch = isRecord(web.fetch) ? web.fetch : {};
|
|
73
|
+
const providerValue = asTrimmedString(search.provider, DEFAULT_TOOLS_CONFIG.tools.web.search.provider).toLowerCase();
|
|
74
|
+
const provider = WEB_SEARCH_PROVIDERS.includes(providerValue)
|
|
75
|
+
? providerValue
|
|
76
|
+
: (() => {
|
|
77
|
+
if (search.provider !== undefined) {
|
|
78
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.search.provider", `unknown provider "${String(search.provider)}"; using ${DEFAULT_TOOLS_CONFIG.tools.web.search.provider}`);
|
|
79
|
+
}
|
|
80
|
+
return DEFAULT_TOOLS_CONFIG.tools.web.search.provider;
|
|
81
|
+
})();
|
|
82
|
+
const defaultExtractMode = asTrimmedString(fetch.defaultExtractMode, DEFAULT_TOOLS_CONFIG.tools.web.fetch.defaultExtractMode);
|
|
83
|
+
if (web.proxy !== undefined && web.proxy !== null && typeof web.proxy !== "string") {
|
|
84
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.proxy", "expected a string or null; using null");
|
|
85
|
+
}
|
|
86
|
+
if (search.maxResults !== undefined && clampInteger(search.maxResults, -1, 1, 10) === -1) {
|
|
87
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.search.maxResults", "expected an integer between 1 and 10; using default");
|
|
88
|
+
}
|
|
89
|
+
if (search.timeoutMs !== undefined && clampInteger(search.timeoutMs, -1, 1) === -1) {
|
|
90
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.search.timeoutMs", "expected a positive integer; using default");
|
|
91
|
+
}
|
|
92
|
+
if (fetch.maxChars !== undefined && clampInteger(fetch.maxChars, -1, 100) === -1) {
|
|
93
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.fetch.maxChars", "expected an integer >= 100; using default");
|
|
94
|
+
}
|
|
95
|
+
if (fetch.timeoutMs !== undefined && clampInteger(fetch.timeoutMs, -1, 1) === -1) {
|
|
96
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.fetch.timeoutMs", "expected a positive integer; using default");
|
|
97
|
+
}
|
|
98
|
+
if (fetch.maxImageBytes !== undefined && clampInteger(fetch.maxImageBytes, -1, 1) === -1) {
|
|
99
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.fetch.maxImageBytes", "expected a positive integer; using default");
|
|
100
|
+
}
|
|
101
|
+
if (fetch.maxResponseBytes !== undefined && clampInteger(fetch.maxResponseBytes, -1, 1) === -1) {
|
|
102
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.fetch.maxResponseBytes", "expected a positive integer; using default");
|
|
103
|
+
}
|
|
104
|
+
if (fetch.defaultExtractMode !== undefined && defaultExtractMode !== "text" && defaultExtractMode !== "markdown") {
|
|
105
|
+
pushInvalidValueDiagnostic(diagnostics, configPath, "tools.web.fetch.defaultExtractMode", `expected "markdown" or "text"; using ${DEFAULT_TOOLS_CONFIG.tools.web.fetch.defaultExtractMode}`);
|
|
106
|
+
}
|
|
107
|
+
return {
|
|
108
|
+
tools: {
|
|
109
|
+
web: {
|
|
110
|
+
enable: typeof web.enable === "boolean" ? web.enable : DEFAULT_TOOLS_CONFIG.tools.web.enable,
|
|
111
|
+
proxy: asOptionalProxy(web.proxy),
|
|
112
|
+
search: {
|
|
113
|
+
provider,
|
|
114
|
+
apiKey: asTrimmedString(search.apiKey),
|
|
115
|
+
baseUrl: asTrimmedString(search.baseUrl),
|
|
116
|
+
maxResults: clampInteger(search.maxResults, DEFAULT_TOOLS_CONFIG.tools.web.search.maxResults, 1, 10),
|
|
117
|
+
timeoutMs: clampInteger(search.timeoutMs, DEFAULT_TOOLS_CONFIG.tools.web.search.timeoutMs, 1),
|
|
118
|
+
},
|
|
119
|
+
fetch: {
|
|
120
|
+
maxChars: clampInteger(fetch.maxChars, DEFAULT_TOOLS_CONFIG.tools.web.fetch.maxChars, 100),
|
|
121
|
+
timeoutMs: clampInteger(fetch.timeoutMs, DEFAULT_TOOLS_CONFIG.tools.web.fetch.timeoutMs, 1),
|
|
122
|
+
maxImageBytes: clampInteger(fetch.maxImageBytes, DEFAULT_TOOLS_CONFIG.tools.web.fetch.maxImageBytes, 1),
|
|
123
|
+
maxResponseBytes: clampInteger(fetch.maxResponseBytes, DEFAULT_TOOLS_CONFIG.tools.web.fetch.maxResponseBytes, 1),
|
|
124
|
+
preferJina: typeof fetch.preferJina === "boolean"
|
|
125
|
+
? fetch.preferJina
|
|
126
|
+
: DEFAULT_TOOLS_CONFIG.tools.web.fetch.preferJina,
|
|
127
|
+
enableJinaFallback: typeof fetch.enableJinaFallback === "boolean"
|
|
128
|
+
? fetch.enableJinaFallback
|
|
129
|
+
: DEFAULT_TOOLS_CONFIG.tools.web.fetch.enableJinaFallback,
|
|
130
|
+
defaultExtractMode: defaultExtractMode === "text" || defaultExtractMode === "markdown"
|
|
131
|
+
? defaultExtractMode
|
|
132
|
+
: DEFAULT_TOOLS_CONFIG.tools.web.fetch.defaultExtractMode,
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
export function getToolsConfigPath(appHomeDir = APP_HOME_DIR) {
|
|
139
|
+
return appHomeDir === APP_HOME_DIR ? TOOLS_CONFIG_PATH : join(appHomeDir, "tools.json");
|
|
140
|
+
}
|
|
141
|
+
export function loadToolsConfigWithDiagnostics(appHomeDir = APP_HOME_DIR) {
|
|
142
|
+
const configPath = getToolsConfigPath(appHomeDir);
|
|
143
|
+
if (!existsSync(configPath)) {
|
|
144
|
+
return { config: DEFAULT_TOOLS_CONFIG, diagnostics: [] };
|
|
145
|
+
}
|
|
146
|
+
try {
|
|
147
|
+
const raw = JSON.parse(readFileSync(configPath, "utf-8"));
|
|
148
|
+
const diagnostics = [];
|
|
149
|
+
return {
|
|
150
|
+
config: mergeToolsConfig(raw, configPath, diagnostics),
|
|
151
|
+
diagnostics,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
return {
|
|
156
|
+
config: DEFAULT_TOOLS_CONFIG,
|
|
157
|
+
diagnostics: [
|
|
158
|
+
{
|
|
159
|
+
source: "tools",
|
|
160
|
+
path: configPath,
|
|
161
|
+
severity: "error",
|
|
162
|
+
message: error instanceof Error ? error.message : String(error),
|
|
163
|
+
},
|
|
164
|
+
],
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
export function loadToolsConfig(appHomeDir = APP_HOME_DIR) {
|
|
169
|
+
return loadToolsConfigWithDiagnostics(appHomeDir).config;
|
|
170
|
+
}
|
package/dist/tools/index.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ import type { Executor, SandboxConfig } from "../sandbox.js";
|
|
|
4
4
|
import type { SecurityConfig, SecurityRuntimeContext } from "../security/types.js";
|
|
5
5
|
import type { PipiclawMemoryRecallSettings } from "../settings.js";
|
|
6
6
|
import type { SubAgentDiscoveryResult } from "../subagents/discovery.js";
|
|
7
|
+
import type { PipiclawToolsConfig } from "./config.js";
|
|
7
8
|
export interface CreatePipiclawToolsOptions {
|
|
8
9
|
executor: Executor;
|
|
9
10
|
getCurrentModel: () => Model<Api>;
|
|
@@ -16,6 +17,8 @@ export interface CreatePipiclawToolsOptions {
|
|
|
16
17
|
sandboxConfig: SandboxConfig;
|
|
17
18
|
getSubAgentDiscovery: () => SubAgentDiscoveryResult;
|
|
18
19
|
getMemoryRecallSettings: () => PipiclawMemoryRecallSettings;
|
|
20
|
+
securityConfig?: SecurityConfig;
|
|
21
|
+
toolsConfig?: PipiclawToolsConfig;
|
|
19
22
|
}
|
|
20
23
|
export interface CreatePipiclawBaseToolsOptions {
|
|
21
24
|
securityConfig?: SecurityConfig;
|
package/dist/tools/index.js
CHANGED
|
@@ -2,8 +2,11 @@ import { APP_HOME_DIR } from "../paths.js";
|
|
|
2
2
|
import { loadSecurityConfig } from "../security/config.js";
|
|
3
3
|
import { createSubAgentTool } from "../subagents/tool.js";
|
|
4
4
|
import { createBashTool } from "./bash.js";
|
|
5
|
+
import { loadToolsConfig } from "./config.js";
|
|
5
6
|
import { createEditTool } from "./edit.js";
|
|
6
7
|
import { createReadTool } from "./read.js";
|
|
8
|
+
import { createWebFetchTool } from "./web-fetch.js";
|
|
9
|
+
import { createWebSearchTool } from "./web-search.js";
|
|
7
10
|
import { createWriteTool } from "./write.js";
|
|
8
11
|
export function createPipiclawBaseTools(executor, options = {}) {
|
|
9
12
|
const hasSecurityOptions = options.securityConfig || options.securityContext || options.channelId;
|
|
@@ -22,7 +25,8 @@ export function createPipiclawBaseTools(executor, options = {}) {
|
|
|
22
25
|
];
|
|
23
26
|
}
|
|
24
27
|
export function createPipiclawTools(options) {
|
|
25
|
-
const securityConfig = loadSecurityConfig(APP_HOME_DIR);
|
|
28
|
+
const securityConfig = options.securityConfig ?? loadSecurityConfig(APP_HOME_DIR);
|
|
29
|
+
const toolsConfig = options.toolsConfig ?? loadToolsConfig(APP_HOME_DIR);
|
|
26
30
|
const securityContext = {
|
|
27
31
|
workspaceDir: options.workspaceDir,
|
|
28
32
|
workspacePath: options.workspacePath,
|
|
@@ -33,8 +37,25 @@ export function createPipiclawTools(options) {
|
|
|
33
37
|
securityContext,
|
|
34
38
|
channelId: options.channelId,
|
|
35
39
|
});
|
|
40
|
+
const webTools = toolsConfig.tools.web.enable === false
|
|
41
|
+
? []
|
|
42
|
+
: [
|
|
43
|
+
createWebSearchTool({
|
|
44
|
+
webConfig: toolsConfig.tools.web,
|
|
45
|
+
securityConfig,
|
|
46
|
+
workspaceDir: options.workspaceDir,
|
|
47
|
+
channelId: options.channelId,
|
|
48
|
+
}),
|
|
49
|
+
createWebFetchTool({
|
|
50
|
+
webConfig: toolsConfig.tools.web,
|
|
51
|
+
securityConfig,
|
|
52
|
+
workspaceDir: options.workspaceDir,
|
|
53
|
+
channelId: options.channelId,
|
|
54
|
+
}),
|
|
55
|
+
];
|
|
36
56
|
return [
|
|
37
57
|
...baseTools,
|
|
58
|
+
...webTools,
|
|
38
59
|
createSubAgentTool({
|
|
39
60
|
executor: options.executor,
|
|
40
61
|
getCurrentModel: options.getCurrentModel,
|
|
@@ -45,6 +66,7 @@ export function createPipiclawTools(options) {
|
|
|
45
66
|
getSubAgentDiscovery: options.getSubAgentDiscovery,
|
|
46
67
|
getMemoryRecallSettings: options.getMemoryRecallSettings,
|
|
47
68
|
securityConfig,
|
|
69
|
+
webConfig: toolsConfig.tools.web,
|
|
48
70
|
runtimeContext: {
|
|
49
71
|
workspacePath: options.workspacePath,
|
|
50
72
|
channelId: options.channelId,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
|
2
|
+
import type { SecurityConfig } from "../security/types.js";
|
|
3
|
+
import type { PipiclawWebToolsConfig } from "./config.js";
|
|
4
|
+
declare const webFetchSchema: import("@sinclair/typebox").TObject<{
|
|
5
|
+
label: import("@sinclair/typebox").TString;
|
|
6
|
+
url: import("@sinclair/typebox").TString;
|
|
7
|
+
extractMode: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TUnion<[import("@sinclair/typebox").TLiteral<"markdown">, import("@sinclair/typebox").TLiteral<"text">]>>;
|
|
8
|
+
maxChars: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
9
|
+
}>;
|
|
10
|
+
export interface WebFetchToolOptions {
|
|
11
|
+
webConfig: PipiclawWebToolsConfig;
|
|
12
|
+
securityConfig: SecurityConfig;
|
|
13
|
+
workspaceDir: string;
|
|
14
|
+
channelId?: string;
|
|
15
|
+
}
|
|
16
|
+
export declare function createWebFetchTool(options: WebFetchToolOptions): AgentTool<typeof webFetchSchema>;
|
|
17
|
+
export {};
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
import { resolveWebFetchRequest } from "../web/config.js";
|
|
3
|
+
import { runWebFetch } from "../web/fetch.js";
|
|
4
|
+
const webFetchSchema = Type.Object({
|
|
5
|
+
label: Type.String({ description: "Brief description of what you're fetching and why (shown to user)" }),
|
|
6
|
+
url: Type.String({ description: "HTTP or HTTPS URL to fetch" }),
|
|
7
|
+
extractMode: Type.Optional(Type.Union([Type.Literal("markdown"), Type.Literal("text")], {
|
|
8
|
+
description: "Preferred text extraction format for HTML pages",
|
|
9
|
+
})),
|
|
10
|
+
maxChars: Type.Optional(Type.Number({ description: "Maximum extracted text characters to return" })),
|
|
11
|
+
});
|
|
12
|
+
export function createWebFetchTool(options) {
|
|
13
|
+
return {
|
|
14
|
+
name: "web_fetch",
|
|
15
|
+
label: "web_fetch",
|
|
16
|
+
description: "Fetch a public URL and extract readable content. Returns text for HTML/JSON/text pages and image content blocks for images.",
|
|
17
|
+
parameters: webFetchSchema,
|
|
18
|
+
execute: async (_toolCallId, { url, extractMode, maxChars, }, signal) => {
|
|
19
|
+
const request = resolveWebFetchRequest(options.webConfig.fetch, url, extractMode, maxChars);
|
|
20
|
+
const result = await runWebFetch({
|
|
21
|
+
webConfig: options.webConfig,
|
|
22
|
+
securityConfig: options.securityConfig,
|
|
23
|
+
workspaceDir: options.workspaceDir,
|
|
24
|
+
channelId: options.channelId,
|
|
25
|
+
}, request, signal);
|
|
26
|
+
return result;
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
|
2
|
+
import type { SecurityConfig } from "../security/types.js";
|
|
3
|
+
import type { PipiclawWebToolsConfig } from "./config.js";
|
|
4
|
+
declare const webSearchSchema: import("@sinclair/typebox").TObject<{
|
|
5
|
+
label: import("@sinclair/typebox").TString;
|
|
6
|
+
query: import("@sinclair/typebox").TString;
|
|
7
|
+
count: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TNumber>;
|
|
8
|
+
}>;
|
|
9
|
+
export interface WebSearchToolOptions {
|
|
10
|
+
webConfig: PipiclawWebToolsConfig;
|
|
11
|
+
securityConfig: SecurityConfig;
|
|
12
|
+
workspaceDir: string;
|
|
13
|
+
channelId?: string;
|
|
14
|
+
}
|
|
15
|
+
export declare function createWebSearchTool(options: WebSearchToolOptions): AgentTool<typeof webSearchSchema>;
|
|
16
|
+
export {};
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { Type } from "@sinclair/typebox";
|
|
2
|
+
import { resolveWebSearchRequest } from "../web/config.js";
|
|
3
|
+
import { runWebSearch } from "../web/search.js";
|
|
4
|
+
const webSearchSchema = Type.Object({
|
|
5
|
+
label: Type.String({ description: "Brief description of what you're searching for and why (shown to user)" }),
|
|
6
|
+
query: Type.String({ description: "Search query" }),
|
|
7
|
+
count: Type.Optional(Type.Number({ description: "Maximum number of results to return (1-10)" })),
|
|
8
|
+
});
|
|
9
|
+
export function createWebSearchTool(options) {
|
|
10
|
+
return {
|
|
11
|
+
name: "web_search",
|
|
12
|
+
label: "web_search",
|
|
13
|
+
description: "Search the public web and return titles, URLs, and snippets from the configured provider.",
|
|
14
|
+
parameters: webSearchSchema,
|
|
15
|
+
execute: async (_toolCallId, { query, count }, signal) => {
|
|
16
|
+
const request = resolveWebSearchRequest(options.webConfig.search, query, count);
|
|
17
|
+
const result = await runWebSearch({
|
|
18
|
+
webConfig: options.webConfig,
|
|
19
|
+
securityConfig: options.securityConfig,
|
|
20
|
+
workspaceDir: options.workspaceDir,
|
|
21
|
+
channelId: options.channelId,
|
|
22
|
+
}, request.query, request.count, signal);
|
|
23
|
+
return {
|
|
24
|
+
content: [{ type: "text", text: result.content }],
|
|
25
|
+
details: result.details,
|
|
26
|
+
};
|
|
27
|
+
},
|
|
28
|
+
};
|
|
29
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { Buffer } from "node:buffer";
|
|
2
|
+
import type { SecurityConfig } from "../security/types.js";
|
|
3
|
+
import type { PipiclawWebToolsConfig } from "../tools/config.js";
|
|
4
|
+
export declare const WEB_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Pipiclaw/0.5";
|
|
5
|
+
export interface WebRuntimeContext {
|
|
6
|
+
webConfig: PipiclawWebToolsConfig;
|
|
7
|
+
securityConfig: SecurityConfig;
|
|
8
|
+
workspaceDir: string;
|
|
9
|
+
channelId?: string;
|
|
10
|
+
}
|
|
11
|
+
export interface WebHttpResponse {
|
|
12
|
+
status: number;
|
|
13
|
+
finalUrl: string;
|
|
14
|
+
headers: Record<string, string>;
|
|
15
|
+
body: Buffer;
|
|
16
|
+
}
|
|
17
|
+
export interface WebHttpRequestOptions {
|
|
18
|
+
method?: "GET" | "POST";
|
|
19
|
+
url: string;
|
|
20
|
+
headers?: Record<string, string>;
|
|
21
|
+
params?: Record<string, string | number | boolean | undefined>;
|
|
22
|
+
data?: unknown;
|
|
23
|
+
timeoutMs: number;
|
|
24
|
+
signal?: AbortSignal;
|
|
25
|
+
maxRedirects?: number;
|
|
26
|
+
maxResponseBytes?: number;
|
|
27
|
+
}
|
|
28
|
+
export declare class WebHttpClient {
|
|
29
|
+
private readonly context;
|
|
30
|
+
constructor(context: WebRuntimeContext);
|
|
31
|
+
request(options: WebHttpRequestOptions): Promise<WebHttpResponse>;
|
|
32
|
+
requestJson<T>(options: WebHttpRequestOptions): Promise<{
|
|
33
|
+
response: WebHttpResponse;
|
|
34
|
+
data: T;
|
|
35
|
+
}>;
|
|
36
|
+
requestText(options: WebHttpRequestOptions): Promise<{
|
|
37
|
+
response: WebHttpResponse;
|
|
38
|
+
text: string;
|
|
39
|
+
}>;
|
|
40
|
+
}
|
|
41
|
+
export declare function createWebHttpClient(context: WebRuntimeContext): WebHttpClient;
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import { Buffer } from "node:buffer";
|
|
2
|
+
import axios from "axios";
|
|
3
|
+
import { HttpProxyAgent } from "http-proxy-agent";
|
|
4
|
+
import { HttpsProxyAgent } from "https-proxy-agent";
|
|
5
|
+
import { getProxyForUrl } from "proxy-from-env";
|
|
6
|
+
import { SocksProxyAgent } from "socks-proxy-agent";
|
|
7
|
+
import { logSecurityEvent } from "../security/logger.js";
|
|
8
|
+
import { NetworkGuardError, validateNetworkTarget, validateRedirectTarget } from "../security/network.js";
|
|
9
|
+
export const WEB_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Pipiclaw/0.5";
|
|
10
|
+
const agentCache = new Map();
|
|
11
|
+
function normalizeHeaders(headers) {
|
|
12
|
+
if (!headers || typeof headers !== "object") {
|
|
13
|
+
return {};
|
|
14
|
+
}
|
|
15
|
+
const result = {};
|
|
16
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
17
|
+
if (typeof value === "string") {
|
|
18
|
+
result[key.toLowerCase()] = value;
|
|
19
|
+
}
|
|
20
|
+
else if (Array.isArray(value)) {
|
|
21
|
+
result[key.toLowerCase()] = value.join(", ");
|
|
22
|
+
}
|
|
23
|
+
else if (value !== undefined && value !== null) {
|
|
24
|
+
result[key.toLowerCase()] = String(value);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return result;
|
|
28
|
+
}
|
|
29
|
+
function buildUrlWithParams(url, params) {
|
|
30
|
+
if (!params) {
|
|
31
|
+
return url;
|
|
32
|
+
}
|
|
33
|
+
const resolved = new URL(url);
|
|
34
|
+
for (const [key, value] of Object.entries(params)) {
|
|
35
|
+
if (value === undefined) {
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
resolved.searchParams.set(key, String(value));
|
|
39
|
+
}
|
|
40
|
+
return resolved.toString();
|
|
41
|
+
}
|
|
42
|
+
function getProxyAgent(requestUrl, explicitProxy) {
|
|
43
|
+
const proxyUrl = explicitProxy?.trim() || getProxyForUrl(requestUrl);
|
|
44
|
+
if (!proxyUrl) {
|
|
45
|
+
return undefined;
|
|
46
|
+
}
|
|
47
|
+
const requestProtocol = new URL(requestUrl).protocol;
|
|
48
|
+
const proxyProtocol = new URL(proxyUrl).protocol;
|
|
49
|
+
const cacheKey = `${requestProtocol}|${proxyUrl}`;
|
|
50
|
+
const cached = agentCache.get(cacheKey);
|
|
51
|
+
if (cached) {
|
|
52
|
+
return cached;
|
|
53
|
+
}
|
|
54
|
+
let agent;
|
|
55
|
+
if (proxyProtocol.startsWith("socks")) {
|
|
56
|
+
agent = new SocksProxyAgent(proxyUrl);
|
|
57
|
+
}
|
|
58
|
+
else if (requestProtocol === "https:") {
|
|
59
|
+
agent = new HttpsProxyAgent(proxyUrl);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
agent = new HttpProxyAgent(proxyUrl);
|
|
63
|
+
}
|
|
64
|
+
agentCache.set(cacheKey, agent);
|
|
65
|
+
return agent;
|
|
66
|
+
}
|
|
67
|
+
function logBlockedRequest(context, error) {
|
|
68
|
+
logSecurityEvent(context.workspaceDir, context.securityConfig, {
|
|
69
|
+
type: "network",
|
|
70
|
+
tool: "web",
|
|
71
|
+
channelId: context.channelId,
|
|
72
|
+
url: error.url,
|
|
73
|
+
stage: error.stage,
|
|
74
|
+
resolvedHost: error.resolvedHost,
|
|
75
|
+
resolvedAddress: error.resolvedAddress,
|
|
76
|
+
category: error.category,
|
|
77
|
+
reason: error.message,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
function decodeBody(body) {
|
|
81
|
+
return new TextDecoder("utf-8", { fatal: false }).decode(body);
|
|
82
|
+
}
|
|
83
|
+
function isRedirectStatus(status) {
|
|
84
|
+
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
|
|
85
|
+
}
|
|
86
|
+
export class WebHttpClient {
|
|
87
|
+
constructor(context) {
|
|
88
|
+
this.context = context;
|
|
89
|
+
}
|
|
90
|
+
async request(options) {
|
|
91
|
+
const maxRedirects = options.maxRedirects ?? this.context.securityConfig.networkGuard.maxRedirects;
|
|
92
|
+
let currentUrl = buildUrlWithParams(options.url, options.params);
|
|
93
|
+
let method = options.method ?? "GET";
|
|
94
|
+
let data = options.data;
|
|
95
|
+
for (let redirectCount = 0; redirectCount <= maxRedirects; redirectCount++) {
|
|
96
|
+
try {
|
|
97
|
+
if (redirectCount === 0) {
|
|
98
|
+
await validateNetworkTarget(currentUrl, { config: this.context.securityConfig });
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
await validateRedirectTarget(currentUrl, { config: this.context.securityConfig });
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch (error) {
|
|
105
|
+
if (error instanceof NetworkGuardError) {
|
|
106
|
+
logBlockedRequest(this.context, error);
|
|
107
|
+
}
|
|
108
|
+
throw error;
|
|
109
|
+
}
|
|
110
|
+
const agent = getProxyAgent(currentUrl, this.context.webConfig.proxy);
|
|
111
|
+
let response;
|
|
112
|
+
try {
|
|
113
|
+
response = await axios.request({
|
|
114
|
+
method,
|
|
115
|
+
url: currentUrl,
|
|
116
|
+
data,
|
|
117
|
+
headers: {
|
|
118
|
+
"User-Agent": WEB_USER_AGENT,
|
|
119
|
+
Accept: "*/*",
|
|
120
|
+
...options.headers,
|
|
121
|
+
},
|
|
122
|
+
responseType: "arraybuffer",
|
|
123
|
+
validateStatus: () => true,
|
|
124
|
+
timeout: options.timeoutMs,
|
|
125
|
+
signal: options.signal,
|
|
126
|
+
maxRedirects: 0,
|
|
127
|
+
maxContentLength: options.maxResponseBytes ?? Number.POSITIVE_INFINITY,
|
|
128
|
+
proxy: false,
|
|
129
|
+
httpAgent: agent,
|
|
130
|
+
httpsAgent: agent,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
if (options.maxResponseBytes &&
|
|
135
|
+
typeof error?.message === "string" &&
|
|
136
|
+
error.message.includes("maxContentLength")) {
|
|
137
|
+
throw new Error(`Response exceeds maxResponseBytes (${options.maxResponseBytes} bytes)`);
|
|
138
|
+
}
|
|
139
|
+
throw error;
|
|
140
|
+
}
|
|
141
|
+
const headers = normalizeHeaders(response.headers);
|
|
142
|
+
const body = Buffer.isBuffer(response.data) ? response.data : Buffer.from(response.data);
|
|
143
|
+
if (isRedirectStatus(response.status) && headers.location) {
|
|
144
|
+
if (redirectCount === maxRedirects) {
|
|
145
|
+
throw new Error(`Too many redirects while fetching ${options.url}`);
|
|
146
|
+
}
|
|
147
|
+
currentUrl = new URL(headers.location, currentUrl).toString();
|
|
148
|
+
if (response.status === 303 ||
|
|
149
|
+
((response.status === 301 || response.status === 302) && method === "POST")) {
|
|
150
|
+
method = "GET";
|
|
151
|
+
data = undefined;
|
|
152
|
+
}
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
return {
|
|
156
|
+
status: response.status,
|
|
157
|
+
finalUrl: currentUrl,
|
|
158
|
+
headers,
|
|
159
|
+
body,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
throw new Error(`Too many redirects while fetching ${options.url}`);
|
|
163
|
+
}
|
|
164
|
+
async requestJson(options) {
|
|
165
|
+
const response = await this.request({
|
|
166
|
+
...options,
|
|
167
|
+
headers: {
|
|
168
|
+
Accept: "application/json",
|
|
169
|
+
...options.headers,
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
const text = decodeBody(response.body);
|
|
173
|
+
try {
|
|
174
|
+
return {
|
|
175
|
+
response,
|
|
176
|
+
data: JSON.parse(text),
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
catch (error) {
|
|
180
|
+
throw new Error(`Expected JSON response from ${options.url}, got invalid JSON: ${error instanceof Error ? error.message : String(error)}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
async requestText(options) {
|
|
184
|
+
const response = await this.request(options);
|
|
185
|
+
return {
|
|
186
|
+
response,
|
|
187
|
+
text: decodeBody(response.body),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
export function createWebHttpClient(context) {
|
|
192
|
+
return new WebHttpClient(context);
|
|
193
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { PipiclawWebFetchConfig, PipiclawWebSearchConfig, PipiclawWebToolsConfig } from "../tools/config.js";
|
|
2
|
+
export interface ResolvedWebSearchRequest {
|
|
3
|
+
query: string;
|
|
4
|
+
count: number;
|
|
5
|
+
timeoutMs: number;
|
|
6
|
+
}
|
|
7
|
+
export interface ResolvedWebFetchRequest {
|
|
8
|
+
url: string;
|
|
9
|
+
extractMode: "markdown" | "text";
|
|
10
|
+
maxChars: number;
|
|
11
|
+
timeoutMs: number;
|
|
12
|
+
maxImageBytes: number;
|
|
13
|
+
maxResponseBytes: number;
|
|
14
|
+
preferJina: boolean;
|
|
15
|
+
enableJinaFallback: boolean;
|
|
16
|
+
}
|
|
17
|
+
export declare function resolveWebSearchRequest(config: PipiclawWebSearchConfig, query: string, count?: number): ResolvedWebSearchRequest;
|
|
18
|
+
export declare function resolveWebFetchRequest(config: PipiclawWebFetchConfig, url: string, extractMode?: "markdown" | "text", maxChars?: number): ResolvedWebFetchRequest;
|
|
19
|
+
export declare function isWebToolsEnabled(config: PipiclawWebToolsConfig): boolean;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export function resolveWebSearchRequest(config, query, count) {
|
|
2
|
+
return {
|
|
3
|
+
query: query.trim(),
|
|
4
|
+
count: clamp(count, config.maxResults, 1, 10),
|
|
5
|
+
timeoutMs: config.timeoutMs,
|
|
6
|
+
};
|
|
7
|
+
}
|
|
8
|
+
export function resolveWebFetchRequest(config, url, extractMode, maxChars) {
|
|
9
|
+
return {
|
|
10
|
+
url: url.trim(),
|
|
11
|
+
extractMode: extractMode === "text" ? "text" : extractMode === "markdown" ? "markdown" : config.defaultExtractMode,
|
|
12
|
+
maxChars: clamp(maxChars, config.maxChars, 100),
|
|
13
|
+
timeoutMs: config.timeoutMs,
|
|
14
|
+
maxImageBytes: config.maxImageBytes,
|
|
15
|
+
maxResponseBytes: config.maxResponseBytes,
|
|
16
|
+
preferJina: config.preferJina,
|
|
17
|
+
enableJinaFallback: config.enableJinaFallback,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
export function isWebToolsEnabled(config) {
|
|
21
|
+
return config.enable !== false;
|
|
22
|
+
}
|
|
23
|
+
function clamp(value, fallback, minimum, maximum) {
|
|
24
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
25
|
+
return fallback;
|
|
26
|
+
}
|
|
27
|
+
const normalized = Math.floor(value);
|
|
28
|
+
if (normalized < minimum) {
|
|
29
|
+
return fallback;
|
|
30
|
+
}
|
|
31
|
+
if (maximum !== undefined && normalized > maximum) {
|
|
32
|
+
return fallback;
|
|
33
|
+
}
|
|
34
|
+
return normalized;
|
|
35
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export declare function htmlToText(html: string): string;
|
|
2
|
+
export declare function htmlToMarkdown(html: string): string;
|
|
3
|
+
export declare function extractReadableContent(html: string, url: string, extractMode: "markdown" | "text"): {
|
|
4
|
+
title: string;
|
|
5
|
+
content: string;
|
|
6
|
+
extractor: string;
|
|
7
|
+
};
|