opc-agent 4.1.0 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +20 -20
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +14 -14
  3. package/.github/PULL_REQUEST_TEMPLATE.md +13 -13
  4. package/CHANGELOG.md +48 -48
  5. package/CONTRIBUTING.md +36 -36
  6. package/README.zh-CN.md +497 -497
  7. package/dist/channels/wechat.js +6 -6
  8. package/dist/deploy/index.js +56 -56
  9. package/dist/studio/server.js +30 -1
  10. package/dist/studio-ui/index.html +230 -10
  11. package/dist/ui/components.js +105 -105
  12. package/examples/README.md +22 -22
  13. package/examples/basic-agent.ts +90 -90
  14. package/examples/brain-integration.ts +71 -71
  15. package/examples/multi-channel.ts +74 -74
  16. package/fix-sidebar.mjs +188 -188
  17. package/install.ps1 +154 -154
  18. package/install.sh +164 -164
  19. package/package.json +1 -1
  20. package/scripts/install.ps1 +31 -31
  21. package/scripts/install.sh +40 -40
  22. package/serve-studio.js +13 -13
  23. package/serve-test.js +25 -25
  24. package/src/channels/dingtalk.ts +46 -46
  25. package/src/channels/email.ts +351 -351
  26. package/src/channels/feishu.ts +349 -349
  27. package/src/channels/googlechat.ts +42 -42
  28. package/src/channels/imessage.ts +31 -31
  29. package/src/channels/irc.ts +82 -82
  30. package/src/channels/line.ts +32 -32
  31. package/src/channels/matrix.ts +33 -33
  32. package/src/channels/mattermost.ts +57 -57
  33. package/src/channels/msteams.ts +32 -32
  34. package/src/channels/nostr.ts +32 -32
  35. package/src/channels/qq.ts +33 -33
  36. package/src/channels/signal.ts +32 -32
  37. package/src/channels/sms.ts +33 -33
  38. package/src/channels/telegram.ts +616 -616
  39. package/src/channels/twitch.ts +65 -65
  40. package/src/channels/voice-call.ts +100 -100
  41. package/src/channels/websocket.ts +399 -399
  42. package/src/channels/wechat.ts +329 -329
  43. package/src/channels/whatsapp.ts +32 -32
  44. package/src/cli/chat.ts +99 -99
  45. package/src/cli/setup.ts +314 -314
  46. package/src/core/agent.ts +476 -476
  47. package/src/core/api-server.ts +277 -277
  48. package/src/core/audio.ts +98 -98
  49. package/src/core/collaboration.ts +275 -275
  50. package/src/core/context-discovery.ts +85 -85
  51. package/src/core/context-refs.ts +140 -140
  52. package/src/core/gateway.ts +106 -106
  53. package/src/core/heartbeat.ts +51 -51
  54. package/src/core/hooks.ts +105 -105
  55. package/src/core/ide-bridge.ts +133 -133
  56. package/src/core/node-network.ts +86 -86
  57. package/src/core/profiles.ts +122 -122
  58. package/src/core/scheduler.ts +187 -187
  59. package/src/core/session-manager.ts +137 -137
  60. package/src/core/subagent.ts +98 -98
  61. package/src/core/vision.ts +180 -180
  62. package/src/core/workflow-graph.ts +365 -365
  63. package/src/daemon.ts +96 -96
  64. package/src/deploy/index.ts +255 -255
  65. package/src/doctor.ts +156 -156
  66. package/src/eval/index.ts +211 -211
  67. package/src/eval/suites/basic.json +16 -16
  68. package/src/eval/suites/memory.json +12 -12
  69. package/src/eval/suites/safety.json +14 -14
  70. package/src/hub/brain-seed.ts +54 -54
  71. package/src/hub/client.ts +60 -60
  72. package/src/mcp/servers/calculator-mcp.ts +65 -65
  73. package/src/mcp/servers/crypto-mcp.ts +73 -73
  74. package/src/mcp/servers/database-mcp.ts +72 -72
  75. package/src/mcp/servers/datetime-mcp.ts +69 -69
  76. package/src/mcp/servers/filesystem.ts +66 -66
  77. package/src/mcp/servers/github-mcp.ts +58 -58
  78. package/src/mcp/servers/index.ts +63 -63
  79. package/src/mcp/servers/json-mcp.ts +102 -102
  80. package/src/mcp/servers/memory-mcp.ts +56 -56
  81. package/src/mcp/servers/regex-mcp.ts +53 -53
  82. package/src/mcp/servers/web-mcp.ts +49 -49
  83. package/src/memory/context-compressor.ts +189 -189
  84. package/src/memory/seed-loader.ts +212 -212
  85. package/src/memory/user-profiler.ts +215 -215
  86. package/src/plugins/content-filter.ts +23 -23
  87. package/src/plugins/logger.ts +18 -18
  88. package/src/plugins/rate-limiter.ts +38 -38
  89. package/src/protocols/a2a/client.ts +132 -132
  90. package/src/protocols/a2a/index.ts +8 -8
  91. package/src/protocols/a2a/server.ts +333 -333
  92. package/src/protocols/a2a/types.ts +88 -88
  93. package/src/protocols/a2a/utils.ts +50 -50
  94. package/src/protocols/agui/client.ts +83 -83
  95. package/src/protocols/agui/index.ts +4 -4
  96. package/src/protocols/agui/server.ts +218 -218
  97. package/src/protocols/agui/types.ts +153 -153
  98. package/src/protocols/index.ts +2 -2
  99. package/src/protocols/mcp/agent-tools.ts +134 -134
  100. package/src/protocols/mcp/index.ts +8 -8
  101. package/src/protocols/mcp/server.ts +262 -262
  102. package/src/protocols/mcp/types.ts +69 -69
  103. package/src/providers/index.ts +632 -632
  104. package/src/publish/index.ts +376 -376
  105. package/src/scheduler/cron-engine.ts +191 -191
  106. package/src/scheduler/index.ts +2 -2
  107. package/src/schema/oad.ts +217 -217
  108. package/src/security/approval.ts +131 -131
  109. package/src/security/approvals.ts +143 -143
  110. package/src/security/elevated.ts +105 -105
  111. package/src/security/guardrails.ts +248 -248
  112. package/src/security/index.ts +9 -9
  113. package/src/security/keys.ts +87 -87
  114. package/src/security/secrets.ts +129 -129
  115. package/src/skills/builtin/index.ts +408 -408
  116. package/src/skills/marketplace.ts +113 -113
  117. package/src/skills/types.ts +42 -42
  118. package/src/studio/server.ts +31 -1
  119. package/src/studio/templates-data.ts +178 -178
  120. package/src/studio-ui/index.html +230 -10
  121. package/src/telemetry/index.ts +324 -324
  122. package/src/tools/builtin/browser.ts +299 -299
  123. package/src/tools/builtin/datetime.ts +41 -41
  124. package/src/tools/builtin/file.ts +107 -107
  125. package/src/tools/builtin/home-assistant.ts +116 -116
  126. package/src/tools/builtin/rl-tools.ts +243 -243
  127. package/src/tools/builtin/shell.ts +43 -43
  128. package/src/tools/builtin/vision.ts +64 -64
  129. package/src/tools/builtin/web-search.ts +126 -126
  130. package/src/tools/builtin/web.ts +35 -35
  131. package/src/tools/document-processor.ts +213 -213
  132. package/src/tools/image-generator.ts +150 -150
  133. package/src/tools/integrations/calendar.ts +73 -73
  134. package/src/tools/integrations/code-exec.ts +39 -39
  135. package/src/tools/integrations/csv-analyzer.ts +92 -92
  136. package/src/tools/integrations/database.ts +44 -44
  137. package/src/tools/integrations/email-send.ts +76 -76
  138. package/src/tools/integrations/git-tool.ts +42 -42
  139. package/src/tools/integrations/github-tool.ts +76 -76
  140. package/src/tools/integrations/image-gen.ts +56 -56
  141. package/src/tools/integrations/index.ts +92 -92
  142. package/src/tools/integrations/jira.ts +83 -83
  143. package/src/tools/integrations/notion.ts +71 -71
  144. package/src/tools/integrations/npm-tool.ts +48 -48
  145. package/src/tools/integrations/pdf-reader.ts +58 -58
  146. package/src/tools/integrations/slack.ts +65 -65
  147. package/src/tools/integrations/summarizer.ts +49 -49
  148. package/src/tools/integrations/translator.ts +48 -48
  149. package/src/tools/integrations/trello.ts +60 -60
  150. package/src/tools/integrations/vector-search.ts +42 -42
  151. package/src/tools/integrations/web-scraper.ts +47 -47
  152. package/src/tools/integrations/web-search.ts +58 -58
  153. package/src/tools/integrations/webhook.ts +38 -38
  154. package/src/tools/mcp-client.ts +131 -131
  155. package/src/tools/web-scraper.ts +179 -179
  156. package/src/tools/web-search.ts +180 -180
  157. package/src/ui/components.ts +127 -127
  158. package/srv-out.txt +1 -1
  159. package/templates/ecommerce-assistant/README.md +45 -45
  160. package/templates/ecommerce-assistant/oad.yaml +47 -47
  161. package/templates/tech-support/README.md +43 -43
  162. package/templates/tech-support/oad.yaml +45 -45
  163. package/test-agent/Dockerfile +9 -9
  164. package/test-agent/README.md +50 -50
  165. package/test-agent/agent.yaml +23 -23
  166. package/test-agent/docker-compose.yml +11 -11
  167. package/test-agent/oad.yaml +31 -31
  168. package/test-agent/package-lock.json +1492 -1492
  169. package/test-agent/package.json +17 -17
  170. package/test-agent/src/index.ts +24 -24
  171. package/test-agent/src/skills/echo.ts +15 -15
  172. package/test-agent/tsconfig.json +24 -24
  173. package/test-full.js +43 -43
  174. package/test-sidebar.js +22 -22
  175. package/test-studio3.js +75 -75
  176. package/test-studio4.js +41 -41
  177. package/tests/a2a-protocol.test.ts +285 -285
  178. package/tests/agui-protocol.test.ts +246 -246
  179. package/tests/api-server.test.ts +148 -148
  180. package/tests/approvals.test.ts +89 -89
  181. package/tests/audio.test.ts +40 -40
  182. package/tests/brain-seed-extended.test.ts +490 -490
  183. package/tests/brain-seed.test.ts +239 -239
  184. package/tests/browser.test.ts +179 -179
  185. package/tests/channels/discord.test.ts +79 -79
  186. package/tests/channels/email.test.ts +148 -148
  187. package/tests/channels/feishu.test.ts +123 -123
  188. package/tests/channels/telegram.test.ts +129 -129
  189. package/tests/channels/websocket.test.ts +53 -53
  190. package/tests/channels/wechat.test.ts +170 -170
  191. package/tests/channels-extra.test.ts +45 -45
  192. package/tests/chat-cli.test.ts +160 -160
  193. package/tests/cli.test.ts +46 -46
  194. package/tests/context-compressor.test.ts +172 -172
  195. package/tests/context-refs.test.ts +121 -121
  196. package/tests/cron-engine.test.ts +101 -101
  197. package/tests/daemon.test.ts +135 -135
  198. package/tests/deepbrain-wire.test.ts +234 -234
  199. package/tests/deploy-and-dag.test.ts +196 -196
  200. package/tests/doctor.test.ts +38 -38
  201. package/tests/document-processor.test.ts +69 -69
  202. package/tests/e2e-nocode.test.ts +442 -442
  203. package/tests/elevated.test.ts +69 -69
  204. package/tests/eval.test.ts +173 -173
  205. package/tests/gateway.test.ts +63 -63
  206. package/tests/guardrails.test.ts +177 -177
  207. package/tests/home-assistant.test.ts +40 -40
  208. package/tests/hooks.test.ts +79 -79
  209. package/tests/ide-bridge.test.ts +38 -38
  210. package/tests/image-generator.test.ts +84 -84
  211. package/tests/init-role.test.ts +124 -124
  212. package/tests/integrations.test.ts +249 -249
  213. package/tests/mcp-client.test.ts +92 -92
  214. package/tests/mcp-server.test.ts +178 -178
  215. package/tests/mcp-servers.test.ts +260 -260
  216. package/tests/node-network.test.ts +74 -74
  217. package/tests/plugin-a2a-enhanced.test.ts +230 -230
  218. package/tests/profiles.test.ts +61 -61
  219. package/tests/publish.test.ts +231 -231
  220. package/tests/rl-tools.test.ts +93 -93
  221. package/tests/sandbox-manager.test.ts +46 -46
  222. package/tests/scheduler.test.ts +200 -200
  223. package/tests/secrets.test.ts +107 -107
  224. package/tests/security-enhanced.test.ts +233 -233
  225. package/tests/settings-api.test.ts +148 -148
  226. package/tests/setup.test.ts +73 -73
  227. package/tests/subagent.test.ts +193 -193
  228. package/tests/telegram-discord.test.ts +60 -60
  229. package/tests/telemetry.test.ts +186 -186
  230. package/tests/user-profiler.test.ts +169 -169
  231. package/tests/v090-features.test.ts +254 -254
  232. package/tests/vision.test.ts +61 -61
  233. package/tests/voice-call.test.ts +47 -47
  234. package/tests/voice-enhanced.test.ts +169 -169
  235. package/tests/voice-interaction.test.ts +38 -38
  236. package/tests/web-search.test.ts +155 -155
  237. package/tests/workflow-graph.test.ts +279 -279
  238. package/tutorial/customer-service-agent/README.md +612 -612
  239. package/tutorial/customer-service-agent/SOUL.md +26 -26
  240. package/tutorial/customer-service-agent/agent.yaml +63 -63
  241. package/tutorial/customer-service-agent/package.json +19 -19
  242. package/tutorial/customer-service-agent/src/index.ts +69 -69
  243. package/tutorial/customer-service-agent/src/skills/faq.ts +27 -27
  244. package/tutorial/customer-service-agent/src/skills/ticket.ts +22 -22
  245. package/tutorial/customer-service-agent/tsconfig.json +14 -14
@@ -1,131 +1,131 @@
1
- import { spawn, type ChildProcess } from 'child_process';
2
- import type { MCPToolDefinition, MCPToolResult } from './mcp';
3
-
4
- export interface MCPServerConfig {
5
- name: string;
6
- command: string;
7
- args?: string[];
8
- env?: Record<string, string>;
9
- }
10
-
11
- export class MCPClient {
12
- private process: ChildProcess | null = null;
13
- private config: MCPServerConfig | null = null;
14
- private nextId = 1;
15
- private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>();
16
- private buffer = '';
17
- private connected = false;
18
-
19
- async connect(config: MCPServerConfig): Promise<void> {
20
- this.config = config;
21
- this.process = spawn(config.command, config.args ?? [], {
22
- stdio: ['pipe', 'pipe', 'pipe'],
23
- env: { ...process.env, ...config.env },
24
- });
25
-
26
- this.process.stdout!.on('data', (data: Buffer) => {
27
- this.buffer += data.toString();
28
- this.processBuffer();
29
- });
30
-
31
- this.process.on('error', (err) => {
32
- for (const [, p] of this.pending) p.reject(err);
33
- this.pending.clear();
34
- });
35
-
36
- this.process.on('exit', () => {
37
- this.connected = false;
38
- for (const [, p] of this.pending) p.reject(new Error('MCP server exited'));
39
- this.pending.clear();
40
- });
41
-
42
- // Send initialize
43
- await this.sendRequest('initialize', {
44
- protocolVersion: '2024-11-05',
45
- capabilities: {},
46
- clientInfo: { name: 'opc-agent', version: '0.7.0' },
47
- });
48
-
49
- // Send initialized notification
50
- this.sendNotification('notifications/initialized', {});
51
- this.connected = true;
52
- }
53
-
54
- private processBuffer(): void {
55
- const lines = this.buffer.split('\n');
56
- this.buffer = lines.pop() ?? '';
57
- for (const line of lines) {
58
- const trimmed = line.trim();
59
- if (!trimmed) continue;
60
- try {
61
- const msg = JSON.parse(trimmed);
62
- if (msg.id !== undefined && this.pending.has(msg.id)) {
63
- const p = this.pending.get(msg.id)!;
64
- this.pending.delete(msg.id);
65
- if (msg.error) {
66
- p.reject(new Error(msg.error.message || JSON.stringify(msg.error)));
67
- } else {
68
- p.resolve(msg.result);
69
- }
70
- }
71
- } catch { /* skip non-JSON lines */ }
72
- }
73
- }
74
-
75
- private sendRequest(method: string, params?: Record<string, unknown>): Promise<any> {
76
- return new Promise((resolve, reject) => {
77
- if (!this.process?.stdin?.writable) {
78
- reject(new Error('MCP server not connected'));
79
- return;
80
- }
81
- const id = this.nextId++;
82
- this.pending.set(id, { resolve, reject });
83
- const msg = JSON.stringify({ jsonrpc: '2.0', method, params: params ?? {}, id });
84
- this.process.stdin.write(msg + '\n');
85
-
86
- // Timeout after 30s
87
- setTimeout(() => {
88
- if (this.pending.has(id)) {
89
- this.pending.delete(id);
90
- reject(new Error(`MCP request timed out: ${method}`));
91
- }
92
- }, 30000);
93
- });
94
- }
95
-
96
- private sendNotification(method: string, params: Record<string, unknown>): void {
97
- if (!this.process?.stdin?.writable) return;
98
- const msg = JSON.stringify({ jsonrpc: '2.0', method, params });
99
- this.process.stdin.write(msg + '\n');
100
- }
101
-
102
- async listTools(): Promise<MCPToolDefinition[]> {
103
- const result = await this.sendRequest('tools/list');
104
- return (result.tools ?? []).map((t: any) => ({
105
- name: t.name,
106
- description: t.description ?? '',
107
- inputSchema: t.inputSchema ?? {},
108
- }));
109
- }
110
-
111
- async callTool(name: string, input: Record<string, unknown>): Promise<MCPToolResult> {
112
- const result = await this.sendRequest('tools/call', { name, arguments: input });
113
- const content = (result.content ?? [])
114
- .map((c: any) => c.text ?? JSON.stringify(c))
115
- .join('\n');
116
- return { content, isError: result.isError ?? false };
117
- }
118
-
119
- async disconnect(): Promise<void> {
120
- if (this.process) {
121
- this.process.kill();
122
- this.process = null;
123
- }
124
- this.connected = false;
125
- this.pending.clear();
126
- }
127
-
128
- isConnected(): boolean {
129
- return this.connected;
130
- }
131
- }
1
+ import { spawn, type ChildProcess } from 'child_process';
2
+ import type { MCPToolDefinition, MCPToolResult } from './mcp';
3
+
4
+ export interface MCPServerConfig {
5
+ name: string;
6
+ command: string;
7
+ args?: string[];
8
+ env?: Record<string, string>;
9
+ }
10
+
11
+ export class MCPClient {
12
+ private process: ChildProcess | null = null;
13
+ private config: MCPServerConfig | null = null;
14
+ private nextId = 1;
15
+ private pending = new Map<number, { resolve: (v: any) => void; reject: (e: Error) => void }>();
16
+ private buffer = '';
17
+ private connected = false;
18
+
19
+ async connect(config: MCPServerConfig): Promise<void> {
20
+ this.config = config;
21
+ this.process = spawn(config.command, config.args ?? [], {
22
+ stdio: ['pipe', 'pipe', 'pipe'],
23
+ env: { ...process.env, ...config.env },
24
+ });
25
+
26
+ this.process.stdout!.on('data', (data: Buffer) => {
27
+ this.buffer += data.toString();
28
+ this.processBuffer();
29
+ });
30
+
31
+ this.process.on('error', (err) => {
32
+ for (const [, p] of this.pending) p.reject(err);
33
+ this.pending.clear();
34
+ });
35
+
36
+ this.process.on('exit', () => {
37
+ this.connected = false;
38
+ for (const [, p] of this.pending) p.reject(new Error('MCP server exited'));
39
+ this.pending.clear();
40
+ });
41
+
42
+ // Send initialize
43
+ await this.sendRequest('initialize', {
44
+ protocolVersion: '2024-11-05',
45
+ capabilities: {},
46
+ clientInfo: { name: 'opc-agent', version: '0.7.0' },
47
+ });
48
+
49
+ // Send initialized notification
50
+ this.sendNotification('notifications/initialized', {});
51
+ this.connected = true;
52
+ }
53
+
54
+ private processBuffer(): void {
55
+ const lines = this.buffer.split('\n');
56
+ this.buffer = lines.pop() ?? '';
57
+ for (const line of lines) {
58
+ const trimmed = line.trim();
59
+ if (!trimmed) continue;
60
+ try {
61
+ const msg = JSON.parse(trimmed);
62
+ if (msg.id !== undefined && this.pending.has(msg.id)) {
63
+ const p = this.pending.get(msg.id)!;
64
+ this.pending.delete(msg.id);
65
+ if (msg.error) {
66
+ p.reject(new Error(msg.error.message || JSON.stringify(msg.error)));
67
+ } else {
68
+ p.resolve(msg.result);
69
+ }
70
+ }
71
+ } catch { /* skip non-JSON lines */ }
72
+ }
73
+ }
74
+
75
+ private sendRequest(method: string, params?: Record<string, unknown>): Promise<any> {
76
+ return new Promise((resolve, reject) => {
77
+ if (!this.process?.stdin?.writable) {
78
+ reject(new Error('MCP server not connected'));
79
+ return;
80
+ }
81
+ const id = this.nextId++;
82
+ this.pending.set(id, { resolve, reject });
83
+ const msg = JSON.stringify({ jsonrpc: '2.0', method, params: params ?? {}, id });
84
+ this.process.stdin.write(msg + '\n');
85
+
86
+ // Timeout after 30s
87
+ setTimeout(() => {
88
+ if (this.pending.has(id)) {
89
+ this.pending.delete(id);
90
+ reject(new Error(`MCP request timed out: ${method}`));
91
+ }
92
+ }, 30000);
93
+ });
94
+ }
95
+
96
+ private sendNotification(method: string, params: Record<string, unknown>): void {
97
+ if (!this.process?.stdin?.writable) return;
98
+ const msg = JSON.stringify({ jsonrpc: '2.0', method, params });
99
+ this.process.stdin.write(msg + '\n');
100
+ }
101
+
102
+ async listTools(): Promise<MCPToolDefinition[]> {
103
+ const result = await this.sendRequest('tools/list');
104
+ return (result.tools ?? []).map((t: any) => ({
105
+ name: t.name,
106
+ description: t.description ?? '',
107
+ inputSchema: t.inputSchema ?? {},
108
+ }));
109
+ }
110
+
111
+ async callTool(name: string, input: Record<string, unknown>): Promise<MCPToolResult> {
112
+ const result = await this.sendRequest('tools/call', { name, arguments: input });
113
+ const content = (result.content ?? [])
114
+ .map((c: any) => c.text ?? JSON.stringify(c))
115
+ .join('\n');
116
+ return { content, isError: result.isError ?? false };
117
+ }
118
+
119
+ async disconnect(): Promise<void> {
120
+ if (this.process) {
121
+ this.process.kill();
122
+ this.process = null;
123
+ }
124
+ this.connected = false;
125
+ this.pending.clear();
126
+ }
127
+
128
+ isConnected(): boolean {
129
+ return this.connected;
130
+ }
131
+ }
@@ -1,179 +1,179 @@
1
- /**
2
- * Web Scraper - v0.10.0
3
- * Fetch URL content and extract readable text in markdown format.
4
- * Uses a simple readability-style extraction (no external dependencies).
5
- */
6
-
7
- export interface ScrapedContent {
8
- title: string;
9
- content: string; // markdown
10
- url: string;
11
- wordCount: number;
12
- }
13
-
14
- const MAX_CONTENT_LENGTH = 5000;
15
-
16
- /**
17
- * Fetch a URL and extract readable content as markdown.
18
- */
19
- export async function scrapeUrl(url: string, maxLength = MAX_CONTENT_LENGTH): Promise<ScrapedContent> {
20
- const response = await fetch(url, {
21
- headers: {
22
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
23
- Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
24
- },
25
- signal: AbortSignal.timeout(15000),
26
- redirect: 'follow',
27
- });
28
-
29
- const contentType = response.headers.get('content-type') || '';
30
- const text = await response.text();
31
-
32
- // If not HTML, return raw text
33
- if (!contentType.includes('html')) {
34
- const truncated = text.slice(0, maxLength);
35
- return {
36
- title: url,
37
- content: truncated,
38
- url,
39
- wordCount: truncated.split(/\s+/).length,
40
- };
41
- }
42
-
43
- return extractReadableContent(text, url, maxLength);
44
- }
45
-
46
- /**
47
- * Extract readable content from HTML using simple heuristics.
48
- */
49
- export function extractReadableContent(html: string, url: string, maxLength = MAX_CONTENT_LENGTH): ScrapedContent {
50
- // Extract title
51
- const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
52
- const title = titleMatch ? decodeEntities(titleMatch[1]).trim() : url;
53
-
54
- // Remove non-content elements
55
- let content = html;
56
-
57
- // Remove script, style, nav, header, footer, aside, iframe
58
- const removePatterns = [
59
- /<script[\s\S]*?<\/script>/gi,
60
- /<style[\s\S]*?<\/style>/gi,
61
- /<nav[\s\S]*?<\/nav>/gi,
62
- /<footer[\s\S]*?<\/footer>/gi,
63
- /<aside[\s\S]*?<\/aside>/gi,
64
- /<iframe[\s\S]*?<\/iframe>/gi,
65
- /<noscript[\s\S]*?<\/noscript>/gi,
66
- /<!--[\s\S]*?-->/g,
67
- ];
68
-
69
- for (const pattern of removePatterns) {
70
- content = content.replace(pattern, '');
71
- }
72
-
73
- // Try to find main content area
74
- const mainContent = findMainContent(content);
75
- content = mainContent || content;
76
-
77
- // Convert to markdown-ish text
78
- content = htmlToMarkdown(content);
79
-
80
- // Clean up whitespace
81
- content = content
82
- .replace(/\n{3,}/g, '\n\n')
83
- .replace(/[ \t]+/g, ' ')
84
- .trim();
85
-
86
- // Truncate
87
- if (content.length > maxLength) {
88
- content = content.slice(0, maxLength) + '\n\n...[truncated]';
89
- }
90
-
91
- return {
92
- title,
93
- content,
94
- url,
95
- wordCount: content.split(/\s+/).filter(Boolean).length,
96
- };
97
- }
98
-
99
- /**
100
- * Try to find the main content area of the page.
101
- */
102
- function findMainContent(html: string): string | null {
103
- // Try common content selectors
104
- const patterns = [
105
- /<article[^>]*>([\s\S]*?)<\/article>/i,
106
- /<main[^>]*>([\s\S]*?)<\/main>/i,
107
- /<div[^>]*class="[^"]*(?:content|article|post|entry|main)[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
108
- /<div[^>]*id="[^"]*(?:content|article|post|entry|main)[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
109
- ];
110
-
111
- for (const pattern of patterns) {
112
- const match = html.match(pattern);
113
- if (match && match[1] && match[1].length > 200) {
114
- return match[1];
115
- }
116
- }
117
-
118
- // Fallback: find body content
119
- const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
120
- return bodyMatch ? bodyMatch[1] : null;
121
- }
122
-
123
- /**
124
- * Simple HTML to Markdown conversion.
125
- */
126
- function htmlToMarkdown(html: string): string {
127
- let md = html;
128
-
129
- // Headers
130
- md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, '\n# $1\n');
131
- md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, '\n## $1\n');
132
- md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, '\n### $1\n');
133
- md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, '\n#### $1\n');
134
- md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, '\n##### $1\n');
135
- md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, '\n###### $1\n');
136
-
137
- // Paragraphs and line breaks
138
- md = md.replace(/<p[^>]*>/gi, '\n');
139
- md = md.replace(/<\/p>/gi, '\n');
140
- md = md.replace(/<br\s*\/?>/gi, '\n');
141
-
142
- // Links
143
- md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
144
-
145
- // Bold and italic
146
- md = md.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, '**$1**');
147
- md = md.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, '*$1*');
148
-
149
- // Code
150
- md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
151
- md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, '\n```\n$1\n```\n');
152
-
153
- // Lists
154
- md = md.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
155
-
156
- // Blockquote
157
- md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, '\n> $1\n');
158
-
159
- // Remove remaining HTML tags
160
- md = md.replace(/<[^>]+>/g, '');
161
-
162
- // Decode entities
163
- md = decodeEntities(md);
164
-
165
- return md;
166
- }
167
-
168
- function decodeEntities(text: string): string {
169
- return text
170
- .replace(/&amp;/g, '&')
171
- .replace(/&lt;/g, '<')
172
- .replace(/&gt;/g, '>')
173
- .replace(/&quot;/g, '"')
174
- .replace(/&#x27;/g, "'")
175
- .replace(/&#39;/g, "'")
176
- .replace(/&nbsp;/g, ' ')
177
- .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n)))
178
- .replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)));
179
- }
1
+ /**
2
+ * Web Scraper - v0.10.0
3
+ * Fetch URL content and extract readable text in markdown format.
4
+ * Uses a simple readability-style extraction (no external dependencies).
5
+ */
6
+
7
+ export interface ScrapedContent {
8
+ title: string;
9
+ content: string; // markdown
10
+ url: string;
11
+ wordCount: number;
12
+ }
13
+
14
+ const MAX_CONTENT_LENGTH = 5000;
15
+
16
+ /**
17
+ * Fetch a URL and extract readable content as markdown.
18
+ */
19
+ export async function scrapeUrl(url: string, maxLength = MAX_CONTENT_LENGTH): Promise<ScrapedContent> {
20
+ const response = await fetch(url, {
21
+ headers: {
22
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
23
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
24
+ },
25
+ signal: AbortSignal.timeout(15000),
26
+ redirect: 'follow',
27
+ });
28
+
29
+ const contentType = response.headers.get('content-type') || '';
30
+ const text = await response.text();
31
+
32
+ // If not HTML, return raw text
33
+ if (!contentType.includes('html')) {
34
+ const truncated = text.slice(0, maxLength);
35
+ return {
36
+ title: url,
37
+ content: truncated,
38
+ url,
39
+ wordCount: truncated.split(/\s+/).length,
40
+ };
41
+ }
42
+
43
+ return extractReadableContent(text, url, maxLength);
44
+ }
45
+
46
+ /**
47
+ * Extract readable content from HTML using simple heuristics.
48
+ */
49
+ export function extractReadableContent(html: string, url: string, maxLength = MAX_CONTENT_LENGTH): ScrapedContent {
50
+ // Extract title
51
+ const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
52
+ const title = titleMatch ? decodeEntities(titleMatch[1]).trim() : url;
53
+
54
+ // Remove non-content elements
55
+ let content = html;
56
+
57
+ // Remove script, style, nav, header, footer, aside, iframe
58
+ const removePatterns = [
59
+ /<script[\s\S]*?<\/script>/gi,
60
+ /<style[\s\S]*?<\/style>/gi,
61
+ /<nav[\s\S]*?<\/nav>/gi,
62
+ /<footer[\s\S]*?<\/footer>/gi,
63
+ /<aside[\s\S]*?<\/aside>/gi,
64
+ /<iframe[\s\S]*?<\/iframe>/gi,
65
+ /<noscript[\s\S]*?<\/noscript>/gi,
66
+ /<!--[\s\S]*?-->/g,
67
+ ];
68
+
69
+ for (const pattern of removePatterns) {
70
+ content = content.replace(pattern, '');
71
+ }
72
+
73
+ // Try to find main content area
74
+ const mainContent = findMainContent(content);
75
+ content = mainContent || content;
76
+
77
+ // Convert to markdown-ish text
78
+ content = htmlToMarkdown(content);
79
+
80
+ // Clean up whitespace
81
+ content = content
82
+ .replace(/\n{3,}/g, '\n\n')
83
+ .replace(/[ \t]+/g, ' ')
84
+ .trim();
85
+
86
+ // Truncate
87
+ if (content.length > maxLength) {
88
+ content = content.slice(0, maxLength) + '\n\n...[truncated]';
89
+ }
90
+
91
+ return {
92
+ title,
93
+ content,
94
+ url,
95
+ wordCount: content.split(/\s+/).filter(Boolean).length,
96
+ };
97
+ }
98
+
99
+ /**
100
+ * Try to find the main content area of the page.
101
+ */
102
+ function findMainContent(html: string): string | null {
103
+ // Try common content selectors
104
+ const patterns = [
105
+ /<article[^>]*>([\s\S]*?)<\/article>/i,
106
+ /<main[^>]*>([\s\S]*?)<\/main>/i,
107
+ /<div[^>]*class="[^"]*(?:content|article|post|entry|main)[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
108
+ /<div[^>]*id="[^"]*(?:content|article|post|entry|main)[^"]*"[^>]*>([\s\S]*?)<\/div>/i,
109
+ ];
110
+
111
+ for (const pattern of patterns) {
112
+ const match = html.match(pattern);
113
+ if (match && match[1] && match[1].length > 200) {
114
+ return match[1];
115
+ }
116
+ }
117
+
118
+ // Fallback: find body content
119
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
120
+ return bodyMatch ? bodyMatch[1] : null;
121
+ }
122
+
123
+ /**
124
+ * Simple HTML to Markdown conversion.
125
+ */
126
+ function htmlToMarkdown(html: string): string {
127
+ let md = html;
128
+
129
+ // Headers
130
+ md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, '\n# $1\n');
131
+ md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, '\n## $1\n');
132
+ md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, '\n### $1\n');
133
+ md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, '\n#### $1\n');
134
+ md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, '\n##### $1\n');
135
+ md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, '\n###### $1\n');
136
+
137
+ // Paragraphs and line breaks
138
+ md = md.replace(/<p[^>]*>/gi, '\n');
139
+ md = md.replace(/<\/p>/gi, '\n');
140
+ md = md.replace(/<br\s*\/?>/gi, '\n');
141
+
142
+ // Links
143
+ md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
144
+
145
+ // Bold and italic
146
+ md = md.replace(/<(?:strong|b)[^>]*>([\s\S]*?)<\/(?:strong|b)>/gi, '**$1**');
147
+ md = md.replace(/<(?:em|i)[^>]*>([\s\S]*?)<\/(?:em|i)>/gi, '*$1*');
148
+
149
+ // Code
150
+ md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
151
+ md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, '\n```\n$1\n```\n');
152
+
153
+ // Lists
154
+ md = md.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
155
+
156
+ // Blockquote
157
+ md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, '\n> $1\n');
158
+
159
+ // Remove remaining HTML tags
160
+ md = md.replace(/<[^>]+>/g, '');
161
+
162
+ // Decode entities
163
+ md = decodeEntities(md);
164
+
165
+ return md;
166
+ }
167
+
168
+ function decodeEntities(text: string): string {
169
+ return text
170
+ .replace(/&amp;/g, '&')
171
+ .replace(/&lt;/g, '<')
172
+ .replace(/&gt;/g, '>')
173
+ .replace(/&quot;/g, '"')
174
+ .replace(/&#x27;/g, "'")
175
+ .replace(/&#39;/g, "'")
176
+ .replace(/&nbsp;/g, ' ')
177
+ .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n)))
178
+ .replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)));
179
+ }