natureco-cli 2.23.30 → 2.23.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/natureco.js +178 -167
- package/package.json +1 -1
- package/src/commands/acp.js +39 -0
- package/src/commands/admin-rpc.js +83 -0
- package/src/commands/agent.js +214 -23
- package/src/commands/agents.js +114 -30
- package/src/commands/approvals.js +172 -11
- package/src/commands/ask.js +1 -1
- package/src/commands/browser.js +815 -0
- package/src/commands/capability.js +195 -22
- package/src/commands/channels.js +422 -267
- package/src/commands/chat.js +5 -8
- package/src/commands/clawbot.js +19 -0
- package/src/commands/code.js +3 -2
- package/src/commands/commitments.js +125 -9
- package/src/commands/completion.js +40 -32
- package/src/commands/config.js +228 -30
- package/src/commands/configure.js +84 -67
- package/src/commands/cron.js +239 -19
- package/src/commands/daemon.js +34 -4
- package/src/commands/dashboard.js +47 -374
- package/src/commands/devices.js +53 -26
- package/src/commands/directory.js +146 -14
- package/src/commands/dns.js +148 -10
- package/src/commands/docs.js +119 -26
- package/src/commands/doctor.js +143 -492
- package/src/commands/exec-policy.js +57 -48
- package/src/commands/gateway.js +492 -249
- package/src/commands/health.js +141 -11
- package/src/commands/help.js +24 -25
- package/src/commands/hooks.js +141 -87
- package/src/commands/infer.js +1442 -41
- package/src/commands/logs.js +122 -99
- package/src/commands/mcp.js +121 -309
- package/src/commands/memory.js +128 -0
- package/src/commands/message.js +720 -140
- package/src/commands/models.js +39 -1
- package/src/commands/node.js +77 -77
- package/src/commands/nodes.js +278 -22
- package/src/commands/onboard.js +115 -56
- package/src/commands/pairing.js +108 -107
- package/src/commands/path.js +206 -0
- package/src/commands/plugins.js +35 -1
- package/src/commands/proxy.js +159 -8
- package/src/commands/qr.js +55 -13
- package/src/commands/reset.js +101 -94
- package/src/commands/secrets.js +104 -21
- package/src/commands/sessions.js +110 -51
- package/src/commands/setup.js +229 -649
- package/src/commands/skills.js +67 -1
- package/src/commands/status.js +101 -127
- package/src/commands/tasks.js +208 -100
- package/src/commands/terminal.js +130 -12
- package/src/commands/transcripts.js +24 -1
- package/src/commands/tui.js +41 -0
- package/src/commands/uninstall.js +73 -92
- package/src/commands/update.js +146 -91
- package/src/commands/web-fetch.js +34 -0
- package/src/commands/webhooks.js +58 -66
- package/src/commands/wiki.js +783 -0
- package/src/utils/agents-md.js +85 -0
- package/src/utils/api.js +40 -41
- package/src/utils/format.js +144 -0
- package/src/utils/headless.js +2 -1
- package/src/utils/parallel-tools.js +106 -0
- package/src/utils/sub-agent.js +148 -0
- package/src/utils/token-budget.js +304 -0
- package/src/utils/tool-runner.js +7 -5
- package/src/utils/web-fetch.js +107 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const os = require('os');
|
|
4
|
+
|
|
5
|
+
const BUDGET_FILE = path.join(os.homedir(), '.natureco', 'token-budget.json');
|
|
6
|
+
const USAGE_FILE = path.join(os.homedir(), '.natureco', 'token-usage.json');
|
|
7
|
+
|
|
8
|
+
const PRESETS = {
|
|
9
|
+
efficient: {
|
|
10
|
+
label: 'Efficient — minimum token use',
|
|
11
|
+
maxContextTokens: 8192,
|
|
12
|
+
preserveRecentTokens: 2048,
|
|
13
|
+
tailTurns: 4,
|
|
14
|
+
toolMaxLines: 30,
|
|
15
|
+
toolMaxChars: 800,
|
|
16
|
+
toolMaxBytes: 4096,
|
|
17
|
+
mcpDescMaxChars: 60,
|
|
18
|
+
systemPromptMaxChars: 300,
|
|
19
|
+
memoryMaxFacts: 3,
|
|
20
|
+
memoryMaxChars: 500,
|
|
21
|
+
projectMemoryMaxChars: 1000,
|
|
22
|
+
fileContentMaxChars: 1000,
|
|
23
|
+
conversationOnDisk: 6,
|
|
24
|
+
conversationInContext: 8,
|
|
25
|
+
autoCompact: true,
|
|
26
|
+
compactModel: null,
|
|
27
|
+
reservedTokens: 1024
|
|
28
|
+
},
|
|
29
|
+
balanced: {
|
|
30
|
+
label: 'Balanced — good quality with reasonable cost',
|
|
31
|
+
maxContextTokens: 16384,
|
|
32
|
+
preserveRecentTokens: 4096,
|
|
33
|
+
tailTurns: 8,
|
|
34
|
+
toolMaxLines: 60,
|
|
35
|
+
toolMaxChars: 1500,
|
|
36
|
+
toolMaxBytes: 8192,
|
|
37
|
+
mcpDescMaxChars: 100,
|
|
38
|
+
systemPromptMaxChars: 500,
|
|
39
|
+
memoryMaxFacts: 5,
|
|
40
|
+
memoryMaxChars: 1000,
|
|
41
|
+
projectMemoryMaxChars: 2000,
|
|
42
|
+
fileContentMaxChars: 2000,
|
|
43
|
+
conversationOnDisk: 10,
|
|
44
|
+
conversationInContext: 12,
|
|
45
|
+
autoCompact: true,
|
|
46
|
+
compactModel: null,
|
|
47
|
+
reservedTokens: 2048
|
|
48
|
+
},
|
|
49
|
+
quality: {
|
|
50
|
+
label: 'Quality — maximum context for best results',
|
|
51
|
+
maxContextTokens: 65536,
|
|
52
|
+
preserveRecentTokens: 8192,
|
|
53
|
+
tailTurns: 20,
|
|
54
|
+
toolMaxLines: 200,
|
|
55
|
+
toolMaxChars: 5000,
|
|
56
|
+
toolMaxBytes: 32768,
|
|
57
|
+
mcpDescMaxChars: 200,
|
|
58
|
+
systemPromptMaxChars: 2000,
|
|
59
|
+
memoryMaxFacts: 10,
|
|
60
|
+
memoryMaxChars: 3000,
|
|
61
|
+
projectMemoryMaxChars: 5000,
|
|
62
|
+
fileContentMaxChars: 5000,
|
|
63
|
+
conversationOnDisk: 20,
|
|
64
|
+
conversationInContext: 25,
|
|
65
|
+
autoCompact: false,
|
|
66
|
+
compactModel: null,
|
|
67
|
+
reservedTokens: 4096
|
|
68
|
+
}
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
let _cached = null;
|
|
72
|
+
|
|
73
|
+
function load() {
|
|
74
|
+
if (_cached) return _cached;
|
|
75
|
+
try {
|
|
76
|
+
if (fs.existsSync(BUDGET_FILE)) {
|
|
77
|
+
const data = JSON.parse(fs.readFileSync(BUDGET_FILE, 'utf8'));
|
|
78
|
+
const preset = data.preset || 'balanced';
|
|
79
|
+
const base = { ...PRESETS[preset] || PRESETS.balanced, ...data };
|
|
80
|
+
base.preset = preset;
|
|
81
|
+
_cached = base;
|
|
82
|
+
return base;
|
|
83
|
+
}
|
|
84
|
+
} catch {}
|
|
85
|
+
_cached = { preset: 'balanced', ...PRESETS.balanced };
|
|
86
|
+
return _cached;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function save(budget) {
|
|
90
|
+
const dir = path.dirname(BUDGET_FILE);
|
|
91
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
92
|
+
fs.writeFileSync(BUDGET_FILE, JSON.stringify(budget, null, 2));
|
|
93
|
+
_cached = null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function setPreset(name) {
|
|
97
|
+
const p = PRESETS[name];
|
|
98
|
+
if (!p) return false;
|
|
99
|
+
const budget = load();
|
|
100
|
+
Object.assign(budget, p);
|
|
101
|
+
budget.preset = name;
|
|
102
|
+
save(budget);
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function getPresets() {
|
|
107
|
+
return Object.entries(PRESETS).map(([key, val]) => ({
|
|
108
|
+
key,
|
|
109
|
+
label: val.label,
|
|
110
|
+
maxContextTokens: val.maxContextTokens
|
|
111
|
+
}));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function capToolOutput(output) {
|
|
115
|
+
const budget = load();
|
|
116
|
+
if (!output) return output;
|
|
117
|
+
let str = typeof output === 'string' ? output : JSON.stringify(output);
|
|
118
|
+
const lines = str.split('\n');
|
|
119
|
+
if (lines.length > budget.toolMaxLines) {
|
|
120
|
+
str = lines.slice(0, budget.toolMaxLines).join('\n') + `\n... (${lines.length - budget.toolMaxLines} more lines)`;
|
|
121
|
+
}
|
|
122
|
+
if (str.length > budget.toolMaxChars) {
|
|
123
|
+
str = str.slice(0, budget.toolMaxChars) + `... (truncated, ${str.length} total chars)`;
|
|
124
|
+
}
|
|
125
|
+
return str;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function capMcpDesc(desc) {
|
|
129
|
+
const budget = load();
|
|
130
|
+
return (desc || '').slice(0, budget.mcpDescMaxChars);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function trimMessages(messages) {
|
|
134
|
+
const budget = load();
|
|
135
|
+
if (!messages || messages.length === 0) return messages;
|
|
136
|
+
|
|
137
|
+
const nonSystem = messages.filter(m => m.role !== 'system');
|
|
138
|
+
const systemMsgs = messages.filter(m => m.role === 'system');
|
|
139
|
+
|
|
140
|
+
if (nonSystem.length <= budget.conversationInContext) return messages;
|
|
141
|
+
|
|
142
|
+
const tail = nonSystem.slice(-budget.tailTurns * 2);
|
|
143
|
+
const compacted = {
|
|
144
|
+
role: 'system',
|
|
145
|
+
content: `[Previous conversation compressed: ${nonSystem.length - tail.length} messages omitted. Key context retained below.]`
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
return [...systemMsgs, compacted, ...tail];
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function trimMemory(memories) {
|
|
152
|
+
const budget = load();
|
|
153
|
+
if (!memories || memories.length === 0) return memories;
|
|
154
|
+
const scored = memories.map(m => ({
|
|
155
|
+
...m,
|
|
156
|
+
_score: (m.score || 0) + (m.relevance || 0)
|
|
157
|
+
})).sort((a, b) => b._score - a._score);
|
|
158
|
+
return scored.slice(0, budget.memoryMaxFacts);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function trimSystemPrompt(prompt) {
|
|
162
|
+
const budget = load();
|
|
163
|
+
if (!prompt) return prompt;
|
|
164
|
+
return prompt.slice(0, budget.systemPromptMaxChars);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function trimProjectMemory(content) {
|
|
168
|
+
const budget = load();
|
|
169
|
+
if (!content) return content;
|
|
170
|
+
return content.slice(0, budget.projectMemoryMaxChars);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function trimFileContent(content) {
|
|
174
|
+
const budget = load();
|
|
175
|
+
if (!content) return content;
|
|
176
|
+
return content.slice(0, budget.fileContentMaxChars);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Score a message by importance (higher = keep first)
|
|
180
|
+
function importanceScore(msg) {
|
|
181
|
+
let score = 0;
|
|
182
|
+
if (msg.role === 'system') score += 100;
|
|
183
|
+
if (msg.role === 'tool') score -= 20;
|
|
184
|
+
if (msg.role === 'assistant' && msg.tool_calls) score += 10;
|
|
185
|
+
if (msg.role === 'user') score += 30;
|
|
186
|
+
const contentLen = (msg.content || '').length;
|
|
187
|
+
if (contentLen > 500) score += 5;
|
|
188
|
+
if (contentLen < 20) score -= 5;
|
|
189
|
+
return score;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Smart trim: keep system messages + highest-scoring messages up to budget
|
|
193
|
+
// But always keep the last `tailTurns` turns
|
|
194
|
+
function smartTrim(messages) {
|
|
195
|
+
const budget = load();
|
|
196
|
+
if (!messages || messages.length === 0) return messages;
|
|
197
|
+
|
|
198
|
+
const systemMsgs = messages.filter(m => m.role === 'system');
|
|
199
|
+
const nonSystem = messages.filter(m => m.role !== 'system');
|
|
200
|
+
|
|
201
|
+
if (nonSystem.length <= budget.conversationInContext) return messages;
|
|
202
|
+
|
|
203
|
+
// Calculate position bonus (recent = higher)
|
|
204
|
+
const tailCount = budget.tailTurns * 2;
|
|
205
|
+
const tailStart = Math.max(0, nonSystem.length - tailCount);
|
|
206
|
+
const tail = nonSystem.slice(tailStart);
|
|
207
|
+
const candidates = nonSystem.slice(0, tailStart);
|
|
208
|
+
|
|
209
|
+
// Score candidates with position bonus
|
|
210
|
+
const scored = candidates.map((m, i) => ({
|
|
211
|
+
msg: m,
|
|
212
|
+
score: importanceScore(m) + (i / candidates.length) * 20,
|
|
213
|
+
})).sort((a, b) => b.score - a.score);
|
|
214
|
+
|
|
215
|
+
// Budget calculation: rough estimate of tokens from characters
|
|
216
|
+
const systemTokens = systemMsgs.reduce((s, m) => s + (m.content || '').length / 4, 0);
|
|
217
|
+
const tailTokens = tail.reduce((s, m) => s + (m.content || '').length / 4, 0);
|
|
218
|
+
const availableTokens = budget.maxContextTokens - budget.reservedTokens - systemTokens - tailTokens;
|
|
219
|
+
const maxCandidatesChars = Math.max(0, availableTokens * 4);
|
|
220
|
+
|
|
221
|
+
let keptChars = 0;
|
|
222
|
+
const kept = [];
|
|
223
|
+
for (const item of scored) {
|
|
224
|
+
const charLen = (item.msg.content || '').length;
|
|
225
|
+
if (keptChars + charLen <= maxCandidatesChars || kept.length < 3) {
|
|
226
|
+
kept.push(item.msg);
|
|
227
|
+
keptChars += charLen;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Sort kept messages back to original order
|
|
232
|
+
const originalOrder = nonSystem.filter(m => kept.includes(m) || tail.includes(m));
|
|
233
|
+
|
|
234
|
+
return [...systemMsgs, ...originalOrder];
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// ── Token usage tracking ──────────────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
function trackUsage(sessionId, tokens) {
|
|
240
|
+
try {
|
|
241
|
+
let usage = {};
|
|
242
|
+
if (fs.existsSync(USAGE_FILE)) {
|
|
243
|
+
usage = JSON.parse(fs.readFileSync(USAGE_FILE, 'utf8'));
|
|
244
|
+
}
|
|
245
|
+
if (!usage[sessionId]) {
|
|
246
|
+
usage[sessionId] = { input: 0, output: 0, total: 0, count: 0, firstSeen: Date.now() };
|
|
247
|
+
}
|
|
248
|
+
const u = usage[sessionId];
|
|
249
|
+
if (tokens.input) u.input += tokens.input;
|
|
250
|
+
if (tokens.output) u.output += tokens.output;
|
|
251
|
+
u.total = u.input + u.output;
|
|
252
|
+
u.count++;
|
|
253
|
+
u.lastSeen = Date.now();
|
|
254
|
+
const dir = path.dirname(USAGE_FILE);
|
|
255
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
256
|
+
fs.writeFileSync(USAGE_FILE, JSON.stringify(usage, null, 2));
|
|
257
|
+
} catch {}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function getUsage(sessionId) {
|
|
261
|
+
try {
|
|
262
|
+
if (!fs.existsSync(USAGE_FILE)) return null;
|
|
263
|
+
const usage = JSON.parse(fs.readFileSync(USAGE_FILE, 'utf8'));
|
|
264
|
+
if (sessionId) return usage[sessionId] || null;
|
|
265
|
+
return usage;
|
|
266
|
+
} catch { return null; }
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function getAllUsage() {
|
|
270
|
+
return getUsage(null);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function formatUsage(u) {
|
|
274
|
+
if (!u) return 'No data';
|
|
275
|
+
const parts = [];
|
|
276
|
+
if (u.total) parts.push(`${u.total} total`);
|
|
277
|
+
if (u.input) parts.push(`${u.input} in`);
|
|
278
|
+
if (u.output) parts.push(`${u.output} out`);
|
|
279
|
+
if (u.count) parts.push(`${u.count} calls`);
|
|
280
|
+
return parts.join(', ');
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
module.exports = {
|
|
284
|
+
load,
|
|
285
|
+
save,
|
|
286
|
+
setPreset,
|
|
287
|
+
getPresets,
|
|
288
|
+
capToolOutput,
|
|
289
|
+
capMcpDesc,
|
|
290
|
+
trimMessages,
|
|
291
|
+
trimMemory,
|
|
292
|
+
trimSystemPrompt,
|
|
293
|
+
trimProjectMemory,
|
|
294
|
+
trimFileContent,
|
|
295
|
+
trackUsage,
|
|
296
|
+
getUsage,
|
|
297
|
+
getAllUsage,
|
|
298
|
+
formatUsage,
|
|
299
|
+
PRESETS,
|
|
300
|
+
BUDGET_FILE,
|
|
301
|
+
USAGE_FILE,
|
|
302
|
+
importanceScore,
|
|
303
|
+
smartTrim,
|
|
304
|
+
};
|
package/src/utils/tool-runner.js
CHANGED
|
@@ -75,11 +75,13 @@ function loadTools() {
|
|
|
75
75
|
|
|
76
76
|
function getToolDefinitions() {
|
|
77
77
|
const tools = loadTools();
|
|
78
|
-
return Object.values(tools)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
78
|
+
return Object.values(tools)
|
|
79
|
+
.filter(t => t.name !== 'canvas')
|
|
80
|
+
.map(tool => ({
|
|
81
|
+
name: tool.name,
|
|
82
|
+
description: tool.description,
|
|
83
|
+
inputSchema: tool.inputSchema
|
|
84
|
+
}));
|
|
83
85
|
}
|
|
84
86
|
|
|
85
87
|
// ── Execute a single tool ─────────────────────────────────────────────────────
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
const https = require('https');
|
|
2
|
+
const http = require('http');
|
|
3
|
+
const { URL } = require('url');
|
|
4
|
+
|
|
5
|
+
const MAX_CONTENT_LENGTH = 10000;
|
|
6
|
+
|
|
7
|
+
function fetchAsMarkdown(urlString) {
|
|
8
|
+
return new Promise((resolve) => {
|
|
9
|
+
const result = {
|
|
10
|
+
title: '',
|
|
11
|
+
content: '',
|
|
12
|
+
url: urlString,
|
|
13
|
+
fetchedAt: new Date().toISOString(),
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
let parsedUrl;
|
|
17
|
+
try {
|
|
18
|
+
parsedUrl = new URL(urlString);
|
|
19
|
+
} catch {
|
|
20
|
+
result.content = 'Invalid URL';
|
|
21
|
+
return resolve(result);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const mod = parsedUrl.protocol === 'https:' ? https : http;
|
|
25
|
+
|
|
26
|
+
const req = mod.get(urlString, { timeout: 15000 }, (res) => {
|
|
27
|
+
if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
28
|
+
const redirectUrl = new URL(res.headers.location, urlString).href;
|
|
29
|
+
return resolve(fetchAsMarkdown(redirectUrl));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
33
|
+
result.content = `HTTP ${res.statusCode}`;
|
|
34
|
+
return resolve(result);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const chunks = [];
|
|
38
|
+
res.on('data', (chunk) => chunks.push(chunk));
|
|
39
|
+
res.on('end', () => {
|
|
40
|
+
const raw = Buffer.concat(chunks).toString('utf8');
|
|
41
|
+
const converted = htmlToMarkdown(raw);
|
|
42
|
+
result.title = converted.title;
|
|
43
|
+
result.content = converted.content.slice(0, MAX_CONTENT_LENGTH);
|
|
44
|
+
resolve(result);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
req.on('error', (err) => {
|
|
49
|
+
result.content = `Fetch error: ${err.message}`;
|
|
50
|
+
resolve(result);
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
req.on('timeout', () => {
|
|
54
|
+
req.destroy();
|
|
55
|
+
result.content = 'Request timed out';
|
|
56
|
+
resolve(result);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function htmlToMarkdown(html) {
|
|
62
|
+
let title = '';
|
|
63
|
+
|
|
64
|
+
// Extract title
|
|
65
|
+
const titleMatch = html.match(/<title[^>]*>([^<]*)<\/title>/i);
|
|
66
|
+
if (titleMatch) {
|
|
67
|
+
title = titleMatch[1].trim();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Strip scripts and styles
|
|
71
|
+
let cleaned = html.replace(/<script[\s\S]*?<\/script>/gi, ' ');
|
|
72
|
+
cleaned = cleaned.replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
|
73
|
+
cleaned = cleaned.replace(/<nav[\s\S]*?<\/nav>/gi, ' ');
|
|
74
|
+
cleaned = cleaned.replace(/<footer[\s\S]*?<\/footer>/gi, ' ');
|
|
75
|
+
|
|
76
|
+
// Try to extract main content
|
|
77
|
+
let main = cleaned.match(/<article[\s\S]*?<\/article>/i);
|
|
78
|
+
if (!main) main = cleaned.match(/<main[\s\S]*?<\/main>/i);
|
|
79
|
+
if (!main) main = cleaned.match(/<div[^>]*class="[^"]*content[^"]*"[\s\S]*?<\/div>/i);
|
|
80
|
+
if (!main) main = cleaned.match(/<body[\s\S]*?<\/body>/i);
|
|
81
|
+
|
|
82
|
+
const bodyContent = main ? main[0] : cleaned;
|
|
83
|
+
|
|
84
|
+
// Remove remaining tags, convert to text
|
|
85
|
+
let text = bodyContent
|
|
86
|
+
.replace(/<br\s*\/?>/gi, '\n')
|
|
87
|
+
.replace(/<\/p>/gi, '\n\n')
|
|
88
|
+
.replace(/<\/h[1-6]>/gi, '\n\n')
|
|
89
|
+
.replace(/<\/li>/gi, '\n')
|
|
90
|
+
.replace(/<\/tr>/gi, '\n')
|
|
91
|
+
.replace(/<\/td>/gi, ' ')
|
|
92
|
+
.replace(/<[^>]+>/g, ' ')
|
|
93
|
+
.replace(/&/g, '&')
|
|
94
|
+
.replace(/</g, '<')
|
|
95
|
+
.replace(/>/g, '>')
|
|
96
|
+
.replace(/"/g, '"')
|
|
97
|
+
.replace(/&#(\d+);/g, (_, c) => String.fromCharCode(c))
|
|
98
|
+
.replace(/'/g, "'")
|
|
99
|
+
.replace(/ /g, ' ')
|
|
100
|
+
.replace(/\s+/g, ' ')
|
|
101
|
+
.replace(/\n\s*\n\s*\n/g, '\n\n')
|
|
102
|
+
.trim();
|
|
103
|
+
|
|
104
|
+
return { title, content: text };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
module.exports = { fetchAsMarkdown };
|