prepia 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +312 -0
- package/bin/prepia.mjs +119 -0
- package/package.json +53 -0
- package/skill/SKILL.md +148 -0
- package/skill/config.json +29 -0
- package/src/analytics/dashboard.mjs +84 -0
- package/src/analytics/tracker.mjs +131 -0
- package/src/api/middleware.mjs +219 -0
- package/src/api/routes.mjs +142 -0
- package/src/api/server.mjs +150 -0
- package/src/cache/disk-store.mjs +199 -0
- package/src/cache/manager.mjs +142 -0
- package/src/cache/memory-store.mjs +205 -0
- package/src/chain/dag.mjs +209 -0
- package/src/chain/executor.mjs +103 -0
- package/src/chain/scheduler.mjs +89 -0
- package/src/client/adapters.mjs +483 -0
- package/src/client/connector.mjs +391 -0
- package/src/client/index.mjs +483 -0
- package/src/client/websocket.mjs +353 -0
- package/src/core/context-packager.mjs +169 -0
- package/src/core/engine.mjs +338 -0
- package/src/core/event-bus.mjs +84 -0
- package/src/core/prepimshot.mjs +120 -0
- package/src/core/task-decomposer.mjs +158 -0
- package/src/edge/lite.mjs +90 -0
- package/src/guard/checker.mjs +123 -0
- package/src/guard/fact-checker.mjs +105 -0
- package/src/guard/hallucination.mjs +108 -0
- package/src/index.mjs +67 -0
- package/src/models/local-model.mjs +171 -0
- package/src/models/provider.mjs +192 -0
- package/src/models/router.mjs +156 -0
- package/src/morph/optimizer.mjs +142 -0
- package/src/network/p2p.mjs +146 -0
- package/src/persona/detector.mjs +118 -0
- package/src/plugins/loader.mjs +120 -0
- package/src/plugins/registry.mjs +164 -0
- package/src/plugins/sandbox.mjs +79 -0
- package/src/rate/limiter.mjs +145 -0
- package/src/rate/shield.mjs +150 -0
- package/src/script/executor.mjs +164 -0
- package/src/script/parser.mjs +134 -0
- package/src/security/privacy.mjs +108 -0
- package/src/security/sanitizer.mjs +133 -0
- package/src/shadow/daemon.mjs +128 -0
- package/src/stream/handler.mjs +204 -0
- package/src/tools/calculator.mjs +312 -0
- package/src/tools/file-ops.mjs +138 -0
- package/src/tools/http-client.mjs +127 -0
- package/src/tools/orchestrator.mjs +205 -0
- package/src/tools/web-scraper.mjs +159 -0
- package/src/tools/web-search.mjs +129 -0
- package/src/vault/knowledge-base.mjs +207 -0
- package/src/vault/pattern-learner.mjs +192 -0
- package/workflows/analyze.json +32 -0
- package/workflows/automate.json +32 -0
- package/workflows/research.json +37 -0
- package/workflows/summarize.json +32 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview File system operations tool.
|
|
3
|
+
* @module tools/file-ops
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { promises as fs } from 'node:fs';
|
|
7
|
+
import path from 'node:path';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Read a file's contents.
|
|
11
|
+
* @param {string} filepath - Path to file
|
|
12
|
+
* @param {Object} [options]
|
|
13
|
+
* @param {string} [options.encoding='utf-8'] - File encoding
|
|
14
|
+
* @returns {Promise<string>}
|
|
15
|
+
*/
|
|
16
|
+
export async function readFile(filepath, options = {}) {
|
|
17
|
+
const { encoding = 'utf-8' } = options;
|
|
18
|
+
return fs.readFile(filepath, encoding);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Write content to a file.
|
|
23
|
+
* @param {string} filepath - Path to file
|
|
24
|
+
* @param {string} content - Content to write
|
|
25
|
+
* @param {Object} [options]
|
|
26
|
+
* @param {boolean} [options.createDirs=true] - Create parent directories
|
|
27
|
+
* @returns {Promise<void>}
|
|
28
|
+
*/
|
|
29
|
+
export async function writeFile(filepath, content, options = {}) {
|
|
30
|
+
const { createDirs = true } = options;
|
|
31
|
+
if (createDirs) {
|
|
32
|
+
await fs.mkdir(path.dirname(filepath), { recursive: true });
|
|
33
|
+
}
|
|
34
|
+
await fs.writeFile(filepath, content, 'utf-8');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Append content to a file.
|
|
39
|
+
* @param {string} filepath - Path to file
|
|
40
|
+
* @param {string} content - Content to append
|
|
41
|
+
* @returns {Promise<void>}
|
|
42
|
+
*/
|
|
43
|
+
export async function appendFile(filepath, content) {
|
|
44
|
+
await fs.appendFile(filepath, content, 'utf-8');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Check if a file exists.
|
|
49
|
+
* @param {string} filepath - Path to file
|
|
50
|
+
* @returns {Promise<boolean>}
|
|
51
|
+
*/
|
|
52
|
+
export async function exists(filepath) {
|
|
53
|
+
try {
|
|
54
|
+
await fs.access(filepath);
|
|
55
|
+
return true;
|
|
56
|
+
} catch {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get file stats.
|
|
63
|
+
* @param {string} filepath - Path to file
|
|
64
|
+
* @returns {Promise<Object>}
|
|
65
|
+
*/
|
|
66
|
+
export async function stat(filepath) {
|
|
67
|
+
const s = await fs.stat(filepath);
|
|
68
|
+
return {
|
|
69
|
+
size: s.size,
|
|
70
|
+
isFile: s.isFile(),
|
|
71
|
+
isDirectory: s.isDirectory(),
|
|
72
|
+
created: s.birthtime.toISOString(),
|
|
73
|
+
modified: s.mtime.toISOString(),
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* List directory contents.
|
|
79
|
+
* @param {string} dirpath - Path to directory
|
|
80
|
+
* @param {Object} [options]
|
|
81
|
+
* @param {boolean} [options.recursive=false] - List recursively
|
|
82
|
+
* @returns {Promise<string[]>}
|
|
83
|
+
*/
|
|
84
|
+
export async function listDir(dirpath, options = {}) {
|
|
85
|
+
const { recursive = false } = options;
|
|
86
|
+
if (!recursive) {
|
|
87
|
+
return fs.readdir(dirpath);
|
|
88
|
+
}
|
|
89
|
+
const results = [];
|
|
90
|
+
async function walk(dir) {
|
|
91
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
92
|
+
for (const entry of entries) {
|
|
93
|
+
const fullPath = path.join(dir, entry.name);
|
|
94
|
+
results.push(fullPath);
|
|
95
|
+
if (entry.isDirectory()) {
|
|
96
|
+
await walk(fullPath);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
await walk(dirpath);
|
|
101
|
+
return results;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Delete a file.
|
|
106
|
+
* @param {string} filepath - Path to file
|
|
107
|
+
* @returns {Promise<boolean>}
|
|
108
|
+
*/
|
|
109
|
+
export async function deleteFile(filepath) {
|
|
110
|
+
try {
|
|
111
|
+
await fs.unlink(filepath);
|
|
112
|
+
return true;
|
|
113
|
+
} catch {
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Create a directory.
|
|
120
|
+
* @param {string} dirpath - Path to directory
|
|
121
|
+
* @returns {Promise<void>}
|
|
122
|
+
*/
|
|
123
|
+
export async function mkdir(dirpath) {
|
|
124
|
+
await fs.mkdir(dirpath, { recursive: true });
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Copy a file.
|
|
129
|
+
* @param {string} src - Source path
|
|
130
|
+
* @param {string} dest - Destination path
|
|
131
|
+
* @returns {Promise<void>}
|
|
132
|
+
*/
|
|
133
|
+
export async function copyFile(src, dest) {
|
|
134
|
+
await fs.mkdir(path.dirname(dest), { recursive: true });
|
|
135
|
+
await fs.copyFile(src, dest);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export default { readFile, writeFile, appendFile, exists, stat, listDir, deleteFile, mkdir, copyFile };
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview HTTP client with retries, timeouts, and error handling.
|
|
3
|
+
* Uses Node 22 global fetch.
|
|
4
|
+
* @module tools/http-client
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Make an HTTP request with retries and timeout.
|
|
9
|
+
* @param {string} url - Request URL
|
|
10
|
+
* @param {Object} [options]
|
|
11
|
+
* @param {string} [options.method='GET'] - HTTP method
|
|
12
|
+
* @param {Object} [options.headers] - Request headers
|
|
13
|
+
* @param {*} [options.body] - Request body
|
|
14
|
+
* @param {number} [options.timeout=30000] - Timeout in ms
|
|
15
|
+
* @param {number} [options.retries=3] - Max retries
|
|
16
|
+
* @param {number} [options.retryDelay=1000] - Delay between retries in ms
|
|
17
|
+
* @returns {Promise<Object>} Response with status, headers, body
|
|
18
|
+
*/
|
|
19
|
+
export async function request(url, options = {}) {
|
|
20
|
+
const {
|
|
21
|
+
method = 'GET',
|
|
22
|
+
headers = {},
|
|
23
|
+
body,
|
|
24
|
+
timeout = 30000,
|
|
25
|
+
retries = 3,
|
|
26
|
+
retryDelay = 1000,
|
|
27
|
+
} = options;
|
|
28
|
+
|
|
29
|
+
let lastError;
|
|
30
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
31
|
+
try {
|
|
32
|
+
const controller = new AbortController();
|
|
33
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
34
|
+
|
|
35
|
+
const fetchOptions = {
|
|
36
|
+
method,
|
|
37
|
+
headers: { 'User-Agent': 'Prepia/1.0', ...headers },
|
|
38
|
+
signal: controller.signal,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
if (body && method !== 'GET' && method !== 'HEAD') {
|
|
42
|
+
fetchOptions.body = typeof body === 'string' ? body : JSON.stringify(body);
|
|
43
|
+
if (!headers['Content-Type'] && !headers['content-type']) {
|
|
44
|
+
fetchOptions.headers['Content-Type'] = 'application/json';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const response = await fetch(url, fetchOptions);
|
|
49
|
+
clearTimeout(timer);
|
|
50
|
+
|
|
51
|
+
const responseBody = await response.text();
|
|
52
|
+
let parsedBody;
|
|
53
|
+
try {
|
|
54
|
+
parsedBody = JSON.parse(responseBody);
|
|
55
|
+
} catch {
|
|
56
|
+
parsedBody = responseBody;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
status: response.status,
|
|
61
|
+
statusText: response.statusText,
|
|
62
|
+
headers: Object.fromEntries(response.headers.entries()),
|
|
63
|
+
body: parsedBody,
|
|
64
|
+
ok: response.ok,
|
|
65
|
+
};
|
|
66
|
+
} catch (err) {
|
|
67
|
+
lastError = err;
|
|
68
|
+
if (attempt < retries) {
|
|
69
|
+
await sleep(retryDelay * Math.pow(2, attempt));
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
throw new Error(`HTTP request failed after ${retries + 1} attempts: ${lastError?.message}`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Make a GET request.
|
|
78
|
+
* @param {string} url
|
|
79
|
+
* @param {Object} [options]
|
|
80
|
+
* @returns {Promise<Object>}
|
|
81
|
+
*/
|
|
82
|
+
export function get(url, options = {}) {
|
|
83
|
+
return request(url, { ...options, method: 'GET' });
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Make a POST request.
|
|
88
|
+
* @param {string} url
|
|
89
|
+
* @param {*} body
|
|
90
|
+
* @param {Object} [options]
|
|
91
|
+
* @returns {Promise<Object>}
|
|
92
|
+
*/
|
|
93
|
+
export function post(url, body, options = {}) {
|
|
94
|
+
return request(url, { ...options, method: 'POST', body });
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Make a PUT request.
|
|
99
|
+
* @param {string} url
|
|
100
|
+
* @param {*} body
|
|
101
|
+
* @param {Object} [options]
|
|
102
|
+
* @returns {Promise<Object>}
|
|
103
|
+
*/
|
|
104
|
+
export function put(url, body, options = {}) {
|
|
105
|
+
return request(url, { ...options, method: 'PUT', body });
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Make a DELETE request.
|
|
110
|
+
* @param {string} url
|
|
111
|
+
* @param {Object} [options]
|
|
112
|
+
* @returns {Promise<Object>}
|
|
113
|
+
*/
|
|
114
|
+
export function del(url, options = {}) {
|
|
115
|
+
return request(url, { ...options, method: 'DELETE' });
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Sleep for a given duration.
|
|
120
|
+
* @param {number} ms
|
|
121
|
+
* @returns {Promise<void>}
|
|
122
|
+
*/
|
|
123
|
+
function sleep(ms) {
|
|
124
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
export default { request, get, post, put, del };
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Tool routing and execution orchestrator.
|
|
3
|
+
* @module tools/orchestrator
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { EventEmitter } from 'node:events';
|
|
7
|
+
import * as webSearch from './web-search.mjs';
|
|
8
|
+
import * as webScraper from './web-scraper.mjs';
|
|
9
|
+
import * as fileOps from './file-ops.mjs';
|
|
10
|
+
import { evaluate } from './calculator.mjs';
|
|
11
|
+
import * as httpClient from './http-client.mjs';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @typedef {Object} ToolCall
|
|
15
|
+
* @property {string} tool - Tool name
|
|
16
|
+
* @property {Object} params - Tool parameters
|
|
17
|
+
* @property {string} [id] - Call ID for tracking
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @typedef {Object} ToolResult
|
|
22
|
+
* @property {string} id - Call ID
|
|
23
|
+
* @property {string} tool - Tool name
|
|
24
|
+
* @property {*} result - Tool result
|
|
25
|
+
* @property {string|null} error - Error message if failed
|
|
26
|
+
* @property {number} duration - Execution time in ms
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
/** Built-in tool registry */
|
|
30
|
+
const BUILTIN_TOOLS = {
|
|
31
|
+
'web-search': {
|
|
32
|
+
name: 'web-search',
|
|
33
|
+
description: 'Search the web for information',
|
|
34
|
+
execute: async (params) => webSearch.search(params.query, params),
|
|
35
|
+
},
|
|
36
|
+
'web-scrape': {
|
|
37
|
+
name: 'web-scrape',
|
|
38
|
+
description: 'Extract content from a web page',
|
|
39
|
+
execute: async (params) => webScraper.scrape(params.url, params),
|
|
40
|
+
},
|
|
41
|
+
'file-read': {
|
|
42
|
+
name: 'file-read',
|
|
43
|
+
description: 'Read file contents',
|
|
44
|
+
execute: async (params) => fileOps.readFile(params.path),
|
|
45
|
+
},
|
|
46
|
+
'file-write': {
|
|
47
|
+
name: 'file-write',
|
|
48
|
+
description: 'Write content to a file',
|
|
49
|
+
execute: async (params) => fileOps.writeFile(params.path, params.content),
|
|
50
|
+
},
|
|
51
|
+
'calculator': {
|
|
52
|
+
name: 'calculator',
|
|
53
|
+
description: 'Evaluate math expressions',
|
|
54
|
+
execute: async (params) => evaluate(params.expression),
|
|
55
|
+
},
|
|
56
|
+
'http-get': {
|
|
57
|
+
name: 'http-get',
|
|
58
|
+
description: 'Make an HTTP GET request',
|
|
59
|
+
execute: async (params) => httpClient.get(params.url, params),
|
|
60
|
+
},
|
|
61
|
+
'http-post': {
|
|
62
|
+
name: 'http-post',
|
|
63
|
+
description: 'Make an HTTP POST request',
|
|
64
|
+
execute: async (params) => httpClient.post(params.url, params.body, params),
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
export class Orchestrator extends EventEmitter {
|
|
69
|
+
/**
|
|
70
|
+
* Create a new Orchestrator.
|
|
71
|
+
* @param {Object} [options]
|
|
72
|
+
* @param {Object} [options.tools] - Additional tools to register
|
|
73
|
+
* @param {number} [options.maxConcurrency=5] - Max parallel tool executions
|
|
74
|
+
*/
|
|
75
|
+
constructor(options = {}) {
|
|
76
|
+
super();
|
|
77
|
+
this._tools = new Map(Object.entries(BUILTIN_TOOLS));
|
|
78
|
+
if (options.tools) {
|
|
79
|
+
for (const [name, tool] of Object.entries(options.tools)) {
|
|
80
|
+
this._tools.set(name, tool);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
this._maxConcurrency = options.maxConcurrency ?? 5;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Register a tool.
|
|
88
|
+
* @param {string} name - Tool name
|
|
89
|
+
* @param {Object} tool - Tool definition
|
|
90
|
+
* @param {string} tool.description - Tool description
|
|
91
|
+
* @param {Function} tool.execute - Async execution function
|
|
92
|
+
*/
|
|
93
|
+
register(name, tool) {
|
|
94
|
+
this._tools.set(name, tool);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Unregister a tool.
|
|
99
|
+
* @param {string} name
|
|
100
|
+
* @returns {boolean}
|
|
101
|
+
*/
|
|
102
|
+
unregister(name) {
|
|
103
|
+
return this._tools.delete(name);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Get all registered tool names.
|
|
108
|
+
* @returns {string[]}
|
|
109
|
+
*/
|
|
110
|
+
listTools() {
|
|
111
|
+
return Array.from(this._tools.keys());
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Get tool definitions for LLM context.
|
|
116
|
+
* @returns {Object[]}
|
|
117
|
+
*/
|
|
118
|
+
getToolDefinitions() {
|
|
119
|
+
return Array.from(this._tools.entries()).map(([name, tool]) => ({
|
|
120
|
+
name,
|
|
121
|
+
description: tool.description,
|
|
122
|
+
parameters: tool.parameters || {},
|
|
123
|
+
}));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Execute a single tool call.
|
|
128
|
+
* @param {ToolCall} call
|
|
129
|
+
* @returns {Promise<ToolResult>}
|
|
130
|
+
*/
|
|
131
|
+
async executeOne(call) {
|
|
132
|
+
const tool = this._tools.get(call.tool);
|
|
133
|
+
const id = call.id || `call_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
134
|
+
const start = Date.now();
|
|
135
|
+
|
|
136
|
+
if (!tool) {
|
|
137
|
+
return { id, tool: call.tool, result: null, error: `Unknown tool: ${call.tool}`, duration: 0 };
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
this.emit('tool:start', { id, tool: call.tool });
|
|
142
|
+
const result = await tool.execute(call.params || {});
|
|
143
|
+
const duration = Date.now() - start;
|
|
144
|
+
this.emit('tool:complete', { id, tool: call.tool, duration });
|
|
145
|
+
return { id, tool: call.tool, result, error: null, duration };
|
|
146
|
+
} catch (err) {
|
|
147
|
+
const duration = Date.now() - start;
|
|
148
|
+
this.emit('tool:error', { id, tool: call.tool, error: err.message, duration });
|
|
149
|
+
return { id, tool: call.tool, result: null, error: err.message, duration };
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Execute multiple tool calls in parallel with concurrency limit.
|
|
155
|
+
* @param {ToolCall[]} calls
|
|
156
|
+
* @returns {Promise<ToolResult[]>}
|
|
157
|
+
*/
|
|
158
|
+
async executeMany(calls) {
|
|
159
|
+
const results = [];
|
|
160
|
+
const executing = new Set();
|
|
161
|
+
|
|
162
|
+
for (const call of calls) {
|
|
163
|
+
const promise = this.executeOne(call).then(result => {
|
|
164
|
+
executing.delete(promise);
|
|
165
|
+
return result;
|
|
166
|
+
});
|
|
167
|
+
executing.add(promise);
|
|
168
|
+
results.push(promise);
|
|
169
|
+
|
|
170
|
+
if (executing.size >= this._maxConcurrency) {
|
|
171
|
+
await Promise.race(executing);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return Promise.all(results);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Route a task to the appropriate tool based on intent.
|
|
180
|
+
* @param {string} intent - Task intent
|
|
181
|
+
* @param {Object} context - Task context
|
|
182
|
+
* @returns {ToolCall[]}
|
|
183
|
+
*/
|
|
184
|
+
route(intent, context = {}) {
|
|
185
|
+
const calls = [];
|
|
186
|
+
const lower = intent.toLowerCase();
|
|
187
|
+
|
|
188
|
+
if (lower.includes('search') || lower.includes('find') || lower.includes('look up')) {
|
|
189
|
+
calls.push({ tool: 'web-search', params: { query: context.query || intent } });
|
|
190
|
+
}
|
|
191
|
+
if (lower.includes('scrape') || lower.includes('extract') || lower.includes('read page')) {
|
|
192
|
+
calls.push({ tool: 'web-scrape', params: { url: context.url } });
|
|
193
|
+
}
|
|
194
|
+
if (lower.includes('calculate') || lower.includes('compute') || lower.includes('math')) {
|
|
195
|
+
calls.push({ tool: 'calculator', params: { expression: context.expression } });
|
|
196
|
+
}
|
|
197
|
+
if (lower.includes('read file') || lower.includes('load file')) {
|
|
198
|
+
calls.push({ tool: 'file-read', params: { path: context.path } });
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
return calls;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export default Orchestrator;
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Web content extraction - fetch and extract readable content from URLs.
|
|
3
|
+
* @module tools/web-scraper
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Fetch and extract readable content from a URL.
|
|
8
|
+
* @param {string} url - URL to scrape
|
|
9
|
+
* @param {Object} [options]
|
|
10
|
+
* @param {number} [options.timeout=15000] - Request timeout in ms
|
|
11
|
+
* @param {number} [options.maxLength=50000] - Max content length
|
|
12
|
+
* @returns {Promise<Object>} Extracted content
|
|
13
|
+
*/
|
|
14
|
+
export async function scrape(url, options = {}) {
|
|
15
|
+
if (!url || typeof url !== 'string') {
|
|
16
|
+
throw new Error('URL must be a non-empty string');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const { timeout = 15000, maxLength = 50000 } = options;
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
const response = await fetch(url, {
|
|
23
|
+
headers: {
|
|
24
|
+
'User-Agent': 'Mozilla/5.0 (compatible; Prepia/1.0)',
|
|
25
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
26
|
+
},
|
|
27
|
+
signal: AbortSignal.timeout(timeout),
|
|
28
|
+
redirect: 'follow',
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
if (!response.ok) {
|
|
32
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const contentType = response.headers.get('content-type') || '';
|
|
36
|
+
const html = await response.text();
|
|
37
|
+
|
|
38
|
+
const extracted = extractContent(html, contentType);
|
|
39
|
+
const title = extractTitle(html);
|
|
40
|
+
const description = extractMeta(html, 'description');
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
url,
|
|
44
|
+
title,
|
|
45
|
+
description,
|
|
46
|
+
text: extracted.text.substring(0, maxLength),
|
|
47
|
+
links: extracted.links,
|
|
48
|
+
headings: extracted.headings,
|
|
49
|
+
};
|
|
50
|
+
} catch (err) {
|
|
51
|
+
throw new Error(`Failed to scrape ${url}: ${err.message}`);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Extract readable content from HTML.
|
|
57
|
+
* @param {string} html
|
|
58
|
+
* @param {string} contentType
|
|
59
|
+
* @returns {Object}
|
|
60
|
+
*/
|
|
61
|
+
function extractContent(html, contentType) {
|
|
62
|
+
// Remove scripts, styles, nav, footer, header, aside
|
|
63
|
+
let cleaned = html
|
|
64
|
+
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
65
|
+
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
66
|
+
.replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, '')
|
|
67
|
+
.replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, '')
|
|
68
|
+
.replace(/<header[^>]*>[\s\S]*?<\/header>/gi, '')
|
|
69
|
+
.replace(/<aside[^>]*>[\s\S]*?<\/aside>/gi, '')
|
|
70
|
+
.replace(/<form[^>]*>[\s\S]*?<\/form>/gi, '')
|
|
71
|
+
.replace(/<!--[\s\S]*?-->/g, '');
|
|
72
|
+
|
|
73
|
+
// Extract links
|
|
74
|
+
const links = [];
|
|
75
|
+
const linkPattern = /<a[^>]+href="([^"]*)"[^>]*>(.*?)<\/a>/gs;
|
|
76
|
+
let match;
|
|
77
|
+
while ((match = linkPattern.exec(cleaned)) !== null) {
|
|
78
|
+
const href = match[1];
|
|
79
|
+
const text = match[2].replace(/<[^>]*>/g, '').trim();
|
|
80
|
+
if (href && text && !href.startsWith('#') && !href.startsWith('javascript:')) {
|
|
81
|
+
links.push({ href, text });
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Extract headings
|
|
86
|
+
const headings = [];
|
|
87
|
+
const headingPattern = /<h([1-6])[^>]*>(.*?)<\/h\1>/gs;
|
|
88
|
+
while ((match = headingPattern.exec(cleaned)) !== null) {
|
|
89
|
+
headings.push({
|
|
90
|
+
level: parseInt(match[1]),
|
|
91
|
+
text: match[2].replace(/<[^>]*>/g, '').trim(),
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Extract main content - try article/main first, fall back to body
|
|
96
|
+
let mainContent = cleaned;
|
|
97
|
+
const articleMatch = cleaned.match(/<(article|main)[^>]*>([\s\S]*?)<\/\1>/i);
|
|
98
|
+
if (articleMatch) {
|
|
99
|
+
mainContent = articleMatch[2];
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Strip remaining HTML tags
|
|
103
|
+
let text = mainContent
|
|
104
|
+
.replace(/<br[^>]*>/gi, '\n')
|
|
105
|
+
.replace(/<\/p>/gi, '\n\n')
|
|
106
|
+
.replace(/<\/div>/gi, '\n')
|
|
107
|
+
.replace(/<\/li>/gi, '\n')
|
|
108
|
+
.replace(/<\/tr>/gi, '\n')
|
|
109
|
+
.replace(/<[^>]*>/g, '')
|
|
110
|
+
.replace(/ /g, ' ')
|
|
111
|
+
.replace(/&/g, '&')
|
|
112
|
+
.replace(/</g, '<')
|
|
113
|
+
.replace(/>/g, '>')
|
|
114
|
+
.replace(/"/g, '"')
|
|
115
|
+
.replace(/'/g, "'")
|
|
116
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
117
|
+
.replace(/[ \t]+/g, ' ')
|
|
118
|
+
.split('\n')
|
|
119
|
+
.map(line => line.trim())
|
|
120
|
+
.filter(line => line.length > 0)
|
|
121
|
+
.join('\n')
|
|
122
|
+
.trim();
|
|
123
|
+
|
|
124
|
+
return { text, links: links.slice(0, 50), headings };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Extract the page title.
|
|
129
|
+
* @param {string} html
|
|
130
|
+
* @returns {string}
|
|
131
|
+
*/
|
|
132
|
+
function extractTitle(html) {
|
|
133
|
+
const match = html.match(/<title[^>]*>(.*?)<\/title>/i);
|
|
134
|
+
return match ? match[1].replace(/<[^>]*>/g, '').trim() : '';
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Extract a meta tag content.
|
|
139
|
+
* @param {string} html
|
|
140
|
+
* @param {string} name
|
|
141
|
+
* @returns {string}
|
|
142
|
+
*/
|
|
143
|
+
function extractMeta(html, name) {
|
|
144
|
+
const match = html.match(new RegExp(`<meta[^>]*(?:name|property)=["']${name}["'][^>]*content=["']([^"']*)["']`, 'i'))
|
|
145
|
+
|| html.match(new RegExp(`<meta[^>]*content=["']([^"']*)["'][^>]*(?:name|property)=["']${name}["']`, 'i'));
|
|
146
|
+
return match ? match[1].trim() : '';
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Extract all text content from a URL (simple version).
|
|
151
|
+
* @param {string} url
|
|
152
|
+
* @returns {Promise<string>}
|
|
153
|
+
*/
|
|
154
|
+
export async function getText(url) {
|
|
155
|
+
const result = await scrape(url);
|
|
156
|
+
return result.text;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export default { scrape, getText };
|