okfy-ai 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -3
- package/dist/{chunk-6AP7LVJG.js → chunk-QE5W5AJS.js} +311 -105
- package/dist/cli.js +6 -6
- package/dist/index.d.ts +8 -0
- package/dist/index.js +1 -1
- package/docs/mcp-clients.md +1 -1
- package/examples/README.md +3 -3
- package/examples/bundles/okfy-docs/concepts/index.md +3 -13
- package/examples/bundles/okfy-docs/guides/index.md +3 -13
- package/examples/bundles/okfy-docs/home.md +22 -0
- package/examples/bundles/okfy-docs/index.md +8 -22
- package/examples/bundles/okfy-docs/okfy-example.json +2 -2
- package/examples/bundles/okfy-docs/reference/index.md +2 -12
- package/examples/bundles/stripe-checkout-small/home.md +21 -0
- package/examples/bundles/stripe-checkout-small/index.md +4 -19
- package/examples/bundles/stripe-checkout-small/okfy-example.json +1 -2
- package/examples/local-markdown/okfy-example.json +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# okfy-ai
|
|
2
2
|
|
|
3
|
-
Turn docs into agent-readable Open Knowledge Format bundles, then serve them to Claude, Codex, Cursor, or any MCP client.
|
|
3
|
+
Turn docs into agent-readable Open Knowledge Format v0.1-conformant bundles, then serve them to Claude, Codex, Cursor, or any MCP client.
|
|
4
4
|
|
|
5
5
|
## Use With Agents
|
|
6
6
|
|
|
@@ -146,14 +146,18 @@ docs site or Markdown folder
|
|
|
146
146
|
-> source-backed agent answers
|
|
147
147
|
```
|
|
148
148
|
|
|
149
|
-
Each source page or Markdown file becomes one OKF concept in v0.1.
|
|
149
|
+
Each non-reserved source page or Markdown file becomes one OKF concept in v0.1. `index.md` and `log.md` are reserved files, not concepts, and generated indexes are plain Markdown. Concept counts, search, graph links, types, tags, and `read_concept` exclude reserved files.
|
|
150
|
+
|
|
151
|
+
Validation errors are limited to OKF conformance: malformed or missing concept frontmatter, missing `type`, or invalid reserved-file structure. Broken internal links and missing indexes are warnings.
|
|
150
152
|
|
|
151
153
|
## Security Defaults
|
|
152
154
|
|
|
153
155
|
- Crawls respect `robots.txt` by default.
|
|
154
156
|
- Crawls stay same-origin by default.
|
|
155
157
|
- Page count, depth, response size, and concurrency are capped.
|
|
156
|
-
- Private network targets are rejected by default for URL crawls.
|
|
158
|
+
- Private network URL literals and redirects to private targets are rejected by default for URL crawls.
|
|
159
|
+
- Preflight DNS-resolved private targets are rejected before fetch; fetch-time DNS is not IP-pinned.
|
|
160
|
+
- `--force` refuses unsafe output directories such as `.`, `/`, the home dir, repo root, input path, input parent, and symlink output dirs unless an explicit dangerous override is provided.
|
|
157
161
|
- HTML and Markdown are treated as text. Scripts are not executed.
|
|
158
162
|
- MCP tools are read-only in v0.1.
|
|
159
163
|
|
|
@@ -136,9 +136,24 @@ function descriptionFromMarkdown(markdown) {
|
|
|
136
136
|
|
|
137
137
|
// src/writer.ts
|
|
138
138
|
import fs from "fs/promises";
|
|
139
|
+
import os from "os";
|
|
140
|
+
import path3 from "path";
|
|
141
|
+
|
|
142
|
+
// src/okf.ts
|
|
139
143
|
import path2 from "path";
|
|
144
|
+
var RESERVED_FILENAMES = /* @__PURE__ */ new Set(["index.md", "log.md"]);
|
|
145
|
+
function toOkfPath(input) {
|
|
146
|
+
return input.split(path2.sep).join("/");
|
|
147
|
+
}
|
|
148
|
+
function isReservedOkfPath(input) {
|
|
149
|
+
return RESERVED_FILENAMES.has(path2.posix.basename(toOkfPath(input)).toLowerCase());
|
|
150
|
+
}
|
|
151
|
+
function isConceptMarkdownPath(input) {
|
|
152
|
+
return input.toLowerCase().endsWith(".md") && !isReservedOkfPath(input);
|
|
153
|
+
}
|
|
140
154
|
|
|
141
155
|
// src/util/url.ts
|
|
156
|
+
import dns from "dns/promises";
|
|
142
157
|
import net from "net";
|
|
143
158
|
var TRACKING_PARAMS = [/^utm_/i, /^fbclid$/i, /^gclid$/i, /^mc_/i];
|
|
144
159
|
function canonicalizeUrl(input, base) {
|
|
@@ -167,20 +182,63 @@ function isHttpUrl(input) {
|
|
|
167
182
|
return false;
|
|
168
183
|
}
|
|
169
184
|
}
|
|
185
|
+
function isPrivateIpv4Parts(parts) {
|
|
186
|
+
const [a = 0, b = 0] = parts;
|
|
187
|
+
return a === 0 || a === 10 || a === 127 || a === 100 && b >= 64 && b <= 127 || a === 172 && b >= 16 && b <= 31 || a === 192 && b === 168 || a === 169 && b === 254 || a >= 224;
|
|
188
|
+
}
|
|
189
|
+
function mappedIpv4PartsFromIpv6(host) {
|
|
190
|
+
const dotted = host.match(/^(?:::|0:0:0:0:0:)ffff:(\d{1,3}(?:\.\d{1,3}){3})$/i)?.[1];
|
|
191
|
+
if (dotted) {
|
|
192
|
+
const parts = dotted.split(".").map(Number);
|
|
193
|
+
if (parts.length === 4 && parts.every((part) => Number.isInteger(part) && part >= 0 && part <= 255)) return parts;
|
|
194
|
+
}
|
|
195
|
+
const hex = host.match(/^(?:::|0:0:0:0:0:)ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i);
|
|
196
|
+
if (!hex) return void 0;
|
|
197
|
+
const high = Number.parseInt(hex[1] ?? "", 16);
|
|
198
|
+
const low = Number.parseInt(hex[2] ?? "", 16);
|
|
199
|
+
if (!Number.isInteger(high) || !Number.isInteger(low) || high < 0 || high > 65535 || low < 0 || low > 65535) {
|
|
200
|
+
return void 0;
|
|
201
|
+
}
|
|
202
|
+
return [high >> 8, high & 255, low >> 8, low & 255];
|
|
203
|
+
}
|
|
170
204
|
function isPrivateNetworkUrl(input) {
|
|
171
205
|
const url = new URL(input);
|
|
172
|
-
const host = url.hostname.toLowerCase();
|
|
206
|
+
const host = url.hostname.toLowerCase().replace(/^\[/, "").replace(/\]$/, "");
|
|
173
207
|
if (host === "localhost" || host.endsWith(".localhost")) return true;
|
|
174
|
-
if (host === "::1" || host.startsWith("fe80:")) return true;
|
|
208
|
+
if (host === "::" || host === "::1" || host.startsWith("fe80:")) return true;
|
|
175
209
|
const ipKind = net.isIP(host);
|
|
176
210
|
if (ipKind === 4) {
|
|
177
211
|
const parts = host.split(".").map(Number);
|
|
178
|
-
|
|
179
|
-
|
|
212
|
+
return isPrivateIpv4Parts(parts);
|
|
213
|
+
}
|
|
214
|
+
if (ipKind === 6) {
|
|
215
|
+
const mappedIpv4Parts = mappedIpv4PartsFromIpv6(host);
|
|
216
|
+
if (mappedIpv4Parts) return isPrivateIpv4Parts(mappedIpv4Parts);
|
|
217
|
+
return host === "::" || host === "::1" || host.startsWith("fc") || host.startsWith("fd") || host.startsWith("fe80:");
|
|
180
218
|
}
|
|
181
|
-
if (ipKind === 6) return host === "::1" || host.startsWith("fc") || host.startsWith("fd");
|
|
182
219
|
return false;
|
|
183
220
|
}
|
|
221
|
+
async function resolvesToPrivateNetwork(input) {
|
|
222
|
+
if (isPrivateNetworkUrl(input)) return true;
|
|
223
|
+
const url = new URL(input);
|
|
224
|
+
const host = url.hostname.toLowerCase().replace(/^\[/, "").replace(/\]$/, "");
|
|
225
|
+
if (net.isIP(host)) return false;
|
|
226
|
+
let records;
|
|
227
|
+
try {
|
|
228
|
+
records = await dns.lookup(host, { all: true, verbatim: true });
|
|
229
|
+
} catch {
|
|
230
|
+
return false;
|
|
231
|
+
}
|
|
232
|
+
return records.some((record) => {
|
|
233
|
+
const host2 = record.address.includes(":") ? `[${record.address}]` : record.address;
|
|
234
|
+
return isPrivateNetworkUrl(`${url.protocol}//${host2}`);
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
async function assertPublicNetworkUrl(input) {
|
|
238
|
+
if (await resolvesToPrivateNetwork(input)) {
|
|
239
|
+
throw new Error("Private network crawl target rejected. Use --allow-private-network for trusted local fixtures.");
|
|
240
|
+
}
|
|
241
|
+
}
|
|
184
242
|
|
|
185
243
|
// src/writer.ts
|
|
186
244
|
function yamlScalar(value) {
|
|
@@ -216,12 +274,12 @@ function assignOutputPaths(docs) {
|
|
|
216
274
|
const used = /* @__PURE__ */ new Set();
|
|
217
275
|
const result = /* @__PURE__ */ new Map();
|
|
218
276
|
for (const doc of docs) {
|
|
219
|
-
const base = doc.resource ? urlToOutputPath(doc.resource) : ensureMarkdownPath(doc.sourcePath ?? doc.sourceId);
|
|
277
|
+
const base = safeConceptOutputPath(doc.resource ? urlToOutputPath(doc.resource) : ensureMarkdownPath(doc.sourcePath ?? doc.sourceId));
|
|
220
278
|
let candidate = base;
|
|
221
279
|
let index = 2;
|
|
222
280
|
while (used.has(candidate)) {
|
|
223
|
-
const parsed =
|
|
224
|
-
candidate =
|
|
281
|
+
const parsed = path3.posix.parse(base);
|
|
282
|
+
candidate = path3.posix.join(parsed.dir, `${parsed.name}-${index}${parsed.ext}`);
|
|
225
283
|
index += 1;
|
|
226
284
|
}
|
|
227
285
|
used.add(candidate);
|
|
@@ -230,6 +288,12 @@ function assignOutputPaths(docs) {
|
|
|
230
288
|
}
|
|
231
289
|
return result;
|
|
232
290
|
}
|
|
291
|
+
function safeConceptOutputPath(candidate) {
|
|
292
|
+
if (!isReservedOkfPath(candidate)) return candidate;
|
|
293
|
+
const parsed = path3.posix.parse(candidate);
|
|
294
|
+
const safeName = parsed.name.toLowerCase() === "log" ? "change-log" : parsed.dir ? "overview" : "home";
|
|
295
|
+
return path3.posix.join(parsed.dir, `${safeName}.md`);
|
|
296
|
+
}
|
|
233
297
|
function rewriteLinks(doc, sourceToOutput) {
|
|
234
298
|
return doc.markdown.replace(/\[([^\]]*)\]\(([^)\s]+)([^)]*)\)/g, (full, text, href, suffix) => {
|
|
235
299
|
if (/^(https?:)?\/\//.test(href)) {
|
|
@@ -254,7 +318,7 @@ function rewriteLinks(doc, sourceToOutput) {
|
|
|
254
318
|
}
|
|
255
319
|
}
|
|
256
320
|
if (!href.startsWith("#") && doc.sourcePath) {
|
|
257
|
-
const abs = toPosixPath(
|
|
321
|
+
const abs = toPosixPath(path3.posix.normalize(path3.posix.join(path3.posix.dirname(doc.sourcePath), href)));
|
|
258
322
|
const noHash = abs.split("#")[0] ?? abs;
|
|
259
323
|
const target = sourceToOutput.get(noHash);
|
|
260
324
|
if (target && doc.outputPath) return `[${text}](${relativeMarkdownLink(doc.outputPath, target)}${suffix})`;
|
|
@@ -262,11 +326,64 @@ function rewriteLinks(doc, sourceToOutput) {
|
|
|
262
326
|
return full;
|
|
263
327
|
});
|
|
264
328
|
}
|
|
265
|
-
async function
|
|
329
|
+
async function pathExists(target) {
|
|
330
|
+
try {
|
|
331
|
+
await fs.lstat(target);
|
|
332
|
+
return true;
|
|
333
|
+
} catch (error) {
|
|
334
|
+
if (error?.code === "ENOENT") return false;
|
|
335
|
+
throw error;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
async function resolveForSafety(target) {
|
|
339
|
+
const resolved = path3.resolve(target);
|
|
340
|
+
if (await pathExists(resolved)) return fs.realpath(resolved);
|
|
341
|
+
const parent = path3.dirname(resolved);
|
|
342
|
+
const realParent = await fs.realpath(parent);
|
|
343
|
+
return path3.join(realParent, path3.basename(resolved));
|
|
344
|
+
}
|
|
345
|
+
async function findRepoRoot(start) {
|
|
346
|
+
let current = path3.resolve(start);
|
|
347
|
+
while (true) {
|
|
348
|
+
if (await pathExists(path3.join(current, ".git"))) return fs.realpath(current);
|
|
349
|
+
const parent = path3.dirname(current);
|
|
350
|
+
if (parent === current) return void 0;
|
|
351
|
+
current = parent;
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
async function assertSafeForceOutDir(outDir, options) {
|
|
355
|
+
if (options.dangerouslyAllowUnsafeOutput) return;
|
|
356
|
+
if (outDir.trim() === "") throw new Error("Unsafe output directory for --force: empty path.");
|
|
357
|
+
const rawResolved = path3.resolve(outDir);
|
|
358
|
+
const existing = await pathExists(rawResolved);
|
|
359
|
+
if (existing) {
|
|
360
|
+
const stat = await fs.lstat(rawResolved);
|
|
361
|
+
if (stat.isSymbolicLink()) {
|
|
362
|
+
throw new Error(`Unsafe output directory for --force: refusing symlink ${outDir}.`);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
const realOutDir = await resolveForSafety(outDir);
|
|
366
|
+
const forbidden = /* @__PURE__ */ new Map([
|
|
367
|
+
[path3.parse(realOutDir).root, "filesystem root"],
|
|
368
|
+
[await fs.realpath(os.homedir()), "home directory"],
|
|
369
|
+
[await fs.realpath(process.cwd()), "current working directory"]
|
|
370
|
+
]);
|
|
371
|
+
const repoRoot = await findRepoRoot(process.cwd());
|
|
372
|
+
if (repoRoot) forbidden.set(repoRoot, "repository root");
|
|
373
|
+
if (options.inputPath) {
|
|
374
|
+
const inputReal = await resolveForSafety(options.inputPath);
|
|
375
|
+
forbidden.set(inputReal, "input path");
|
|
376
|
+
forbidden.set(path3.dirname(inputReal), "parent of input path");
|
|
377
|
+
}
|
|
378
|
+
const reason = forbidden.get(realOutDir);
|
|
379
|
+
if (reason) throw new Error(`Unsafe output directory for --force: refusing to delete ${reason} (${realOutDir}).`);
|
|
380
|
+
}
|
|
381
|
+
async function ensureCleanOutDir(outDir, options) {
|
|
382
|
+
if (options.force) await assertSafeForceOutDir(outDir, options);
|
|
266
383
|
try {
|
|
267
384
|
const entries = await fs.readdir(outDir);
|
|
268
385
|
if (entries.length > 0) {
|
|
269
|
-
if (!force) throw new Error(`Output directory is not empty: ${outDir}. Use --force to overwrite.`);
|
|
386
|
+
if (!options.force) throw new Error(`Output directory is not empty: ${outDir}. Use --force to overwrite.`);
|
|
270
387
|
await fs.rm(outDir, { recursive: true, force: true });
|
|
271
388
|
}
|
|
272
389
|
} catch (error) {
|
|
@@ -274,70 +391,57 @@ async function ensureCleanOutDir(outDir, force) {
|
|
|
274
391
|
}
|
|
275
392
|
await fs.mkdir(outDir, { recursive: true });
|
|
276
393
|
}
|
|
394
|
+
function titleForPath(relPath, fallback) {
|
|
395
|
+
const basename = path3.posix.basename(relPath, ".md");
|
|
396
|
+
return fallback || basename;
|
|
397
|
+
}
|
|
398
|
+
function markdownLink(fromDir, toPath) {
|
|
399
|
+
if (fromDir === ".") return toPath;
|
|
400
|
+
return path3.posix.relative(fromDir, toPath);
|
|
401
|
+
}
|
|
402
|
+
function indexTitle(dir, options) {
|
|
403
|
+
if (dir === ".") return options.title ?? options.sourceName ?? "OKF Bundle";
|
|
404
|
+
const leaf = path3.posix.basename(dir);
|
|
405
|
+
return leaf.split(/[-_\s]+/).filter(Boolean).map((word) => word.slice(0, 1).toUpperCase() + word.slice(1)).join(" ");
|
|
406
|
+
}
|
|
407
|
+
async function writePlainIndex(outDir, dir, concepts, options) {
|
|
408
|
+
const indexPath = dir === "." ? "index.md" : path3.posix.join(dir, "index.md");
|
|
409
|
+
const entries = (dir === "." ? concepts : concepts.filter((concept) => path3.posix.dirname(concept.relPath) === dir)).slice().sort((a, b) => a.relPath.localeCompare(b.relPath));
|
|
410
|
+
const lines = [
|
|
411
|
+
`# ${indexTitle(dir, options)}`,
|
|
412
|
+
"",
|
|
413
|
+
...entries.map((concept) => `* [${concept.title}](${markdownLink(dir, concept.relPath)}) - ${concept.description}`)
|
|
414
|
+
];
|
|
415
|
+
await fs.mkdir(path3.dirname(path3.join(outDir, indexPath)), { recursive: true });
|
|
416
|
+
await fs.writeFile(path3.join(outDir, indexPath), `${lines.join("\n").trimEnd()}
|
|
417
|
+
`, "utf8");
|
|
418
|
+
return indexPath;
|
|
419
|
+
}
|
|
277
420
|
async function writeOkfBundle(docs, options) {
|
|
278
421
|
if (docs.length === 0) throw new Error("No documents to write.");
|
|
279
|
-
await ensureCleanOutDir(options.outDir, options
|
|
422
|
+
await ensureCleanOutDir(options.outDir, options);
|
|
280
423
|
const timestamp = options.timestamp ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
281
424
|
const sourceToOutput = assignOutputPaths(docs);
|
|
282
425
|
const written = [];
|
|
426
|
+
const concepts = [];
|
|
283
427
|
for (const doc of docs) {
|
|
284
428
|
const relPath = doc.outputPath ?? "index.md";
|
|
285
|
-
const absolute =
|
|
286
|
-
await fs.mkdir(
|
|
429
|
+
const absolute = path3.join(options.outDir, relPath);
|
|
430
|
+
await fs.mkdir(path3.dirname(absolute), { recursive: true });
|
|
287
431
|
const body = withTitle(doc.title, rewriteLinks(doc, sourceToOutput));
|
|
288
432
|
await fs.writeFile(absolute, `${frontmatter(doc, timestamp)}${body}
|
|
289
433
|
`, "utf8");
|
|
290
434
|
written.push(relPath);
|
|
435
|
+
concepts.push({
|
|
436
|
+
relPath,
|
|
437
|
+
title: titleForPath(relPath, doc.title),
|
|
438
|
+
description: descriptionFromMarkdown(doc.markdown)
|
|
439
|
+
});
|
|
291
440
|
}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
const list = written.sort().map((file) => `- [${file.replace(/\.md$/, "")}](./${file})`).join("\n");
|
|
295
|
-
const indexDoc = [
|
|
296
|
-
"---",
|
|
297
|
-
'type: "Bundle Index"',
|
|
298
|
-
`title: ${yamlScalar(title)}`,
|
|
299
|
-
`description: ${yamlScalar(`Index for ${title}.`)}`,
|
|
300
|
-
`resource: ${yamlScalar(options.sourceName ?? title)}`,
|
|
301
|
-
"tags:",
|
|
302
|
-
' - "index"',
|
|
303
|
-
`timestamp: ${yamlScalar(timestamp)}`,
|
|
304
|
-
"---",
|
|
305
|
-
"",
|
|
306
|
-
`# ${title}`,
|
|
307
|
-
"",
|
|
308
|
-
list,
|
|
309
|
-
""
|
|
310
|
-
].join("\n");
|
|
311
|
-
await fs.writeFile(path2.join(options.outDir, "index.md"), indexDoc, "utf8");
|
|
312
|
-
written.unshift("index.md");
|
|
313
|
-
}
|
|
314
|
-
const dirs = [...new Set(written.map((file) => path2.posix.dirname(file)).filter((dir) => dir !== "."))].sort();
|
|
441
|
+
written.push(await writePlainIndex(options.outDir, ".", concepts, options));
|
|
442
|
+
const dirs = [...new Set(concepts.map((concept) => path3.posix.dirname(concept.relPath)).filter((dir) => dir !== "."))].sort();
|
|
315
443
|
for (const dir of dirs) {
|
|
316
|
-
|
|
317
|
-
if (written.includes(indexPath)) continue;
|
|
318
|
-
const children = written.filter((file) => path2.posix.dirname(file) === dir && path2.posix.basename(file) !== "index.md").sort();
|
|
319
|
-
if (children.length === 0) continue;
|
|
320
|
-
const title = `${dir.split("/").map((segment) => segment.slice(0, 1).toUpperCase() + segment.slice(1)).join(" / ")} Index`;
|
|
321
|
-
const list = children.map((file) => `- [${path2.posix.basename(file, ".md")}](./${path2.posix.basename(file)})`).join("\n");
|
|
322
|
-
const folderIndex = [
|
|
323
|
-
"---",
|
|
324
|
-
'type: "Folder Index"',
|
|
325
|
-
`title: ${yamlScalar(title)}`,
|
|
326
|
-
`description: ${yamlScalar(`Index for ${dir}.`)}`,
|
|
327
|
-
`resource: ${yamlScalar(options.sourceName ?? dir)}`,
|
|
328
|
-
"tags:",
|
|
329
|
-
' - "index"',
|
|
330
|
-
`timestamp: ${yamlScalar(timestamp)}`,
|
|
331
|
-
"---",
|
|
332
|
-
"",
|
|
333
|
-
`# ${title}`,
|
|
334
|
-
"",
|
|
335
|
-
list,
|
|
336
|
-
""
|
|
337
|
-
].join("\n");
|
|
338
|
-
await fs.mkdir(path2.join(options.outDir, dir), { recursive: true });
|
|
339
|
-
await fs.writeFile(path2.join(options.outDir, indexPath), folderIndex, "utf8");
|
|
340
|
-
written.push(indexPath);
|
|
444
|
+
written.push(await writePlainIndex(options.outDir, dir, concepts, options));
|
|
341
445
|
}
|
|
342
446
|
return written.sort();
|
|
343
447
|
}
|
|
@@ -370,18 +474,40 @@ function matchesAnyPattern(value, patterns) {
|
|
|
370
474
|
// src/crawler.ts
|
|
371
475
|
var USER_AGENT = "okfy/0.1 (+https://github.com/0dust/OKFy)";
|
|
372
476
|
var MAX_RESPONSE_BYTES = 5 * 1024 * 1024;
|
|
373
|
-
|
|
477
|
+
function isRedirect(status) {
|
|
478
|
+
return status >= 300 && status < 400;
|
|
479
|
+
}
|
|
480
|
+
function isSecurityRejection(error) {
|
|
481
|
+
const message = error instanceof Error ? error.message : "";
|
|
482
|
+
return message.includes("Private network crawl target rejected") || message.includes("Cross-origin redirect rejected");
|
|
483
|
+
}
|
|
484
|
+
async function fetchWithRedirects(url, options, signal) {
|
|
485
|
+
let current = url;
|
|
486
|
+
for (let redirectCount = 0; redirectCount <= 10; redirectCount += 1) {
|
|
487
|
+
if (!options.allowPrivateNetwork) await assertPublicNetworkUrl(current);
|
|
488
|
+
if (options.sameOriginSeed && !sameOrigin(current, options.sameOriginSeed)) {
|
|
489
|
+
throw new Error(`Cross-origin redirect rejected: ${current}`);
|
|
490
|
+
}
|
|
491
|
+
const response = await fetch(current, {
|
|
492
|
+
signal,
|
|
493
|
+
headers: { "user-agent": USER_AGENT, accept: "text/html,text/markdown,text/plain,*/*" },
|
|
494
|
+
redirect: "manual"
|
|
495
|
+
});
|
|
496
|
+
if (!isRedirect(response.status)) return response;
|
|
497
|
+
const location = response.headers.get("location");
|
|
498
|
+
if (!location) throw new Error(`Redirect missing location for ${current}`);
|
|
499
|
+
current = canonicalizeUrl(location, current);
|
|
500
|
+
}
|
|
501
|
+
throw new Error(`Too many redirects for ${url}`);
|
|
502
|
+
}
|
|
503
|
+
async function fetchText(url, options = {}) {
|
|
374
504
|
const controller = new AbortController();
|
|
375
505
|
const timeout = setTimeout(() => controller.abort(), 15e3);
|
|
376
506
|
try {
|
|
377
507
|
let lastError;
|
|
378
508
|
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
379
509
|
try {
|
|
380
|
-
const response = await
|
|
381
|
-
signal: controller.signal,
|
|
382
|
-
headers: { "user-agent": USER_AGENT, accept: "text/html,text/markdown,text/plain,*/*" },
|
|
383
|
-
redirect: "follow"
|
|
384
|
-
});
|
|
510
|
+
const response = await fetchWithRedirects(url, options, controller.signal);
|
|
385
511
|
if (!response.ok) {
|
|
386
512
|
if ((response.status >= 500 || response.status === 429) && attempt < 2) {
|
|
387
513
|
await new Promise((resolve) => setTimeout(resolve, 250 * 2 ** attempt));
|
|
@@ -396,6 +522,7 @@ async function fetchText(url) {
|
|
|
396
522
|
return { text, contentType: response.headers.get("content-type") ?? "" };
|
|
397
523
|
} catch (error) {
|
|
398
524
|
lastError = error;
|
|
525
|
+
if (isSecurityRejection(error)) throw error;
|
|
399
526
|
if (attempt < 2) await new Promise((resolve) => setTimeout(resolve, 250 * 2 ** attempt));
|
|
400
527
|
}
|
|
401
528
|
}
|
|
@@ -408,8 +535,8 @@ async function loadRobots(seedUrl, enabled) {
|
|
|
408
535
|
if (!enabled) return void 0;
|
|
409
536
|
const origin = new URL(seedUrl).origin;
|
|
410
537
|
try {
|
|
411
|
-
const
|
|
412
|
-
const text =
|
|
538
|
+
const fetched = await fetchText(`${origin}/robots.txt`, { sameOriginSeed: seedUrl });
|
|
539
|
+
const text = fetched.text;
|
|
413
540
|
return robotsParser(`${origin}/robots.txt`, text);
|
|
414
541
|
} catch {
|
|
415
542
|
return robotsParser(`${origin}/robots.txt`, "");
|
|
@@ -444,6 +571,7 @@ async function crawlWebsite(options) {
|
|
|
444
571
|
if (!options.allowPrivateNetwork && isPrivateNetworkUrl(seed)) {
|
|
445
572
|
throw new Error("Private network crawl target rejected. Use --allow-private-network for trusted local fixtures.");
|
|
446
573
|
}
|
|
574
|
+
if (!options.allowPrivateNetwork) await assertPublicNetworkUrl(seed);
|
|
447
575
|
const maxPages = options.maxPages ?? 100;
|
|
448
576
|
const maxDepth = options.maxDepth ?? 4;
|
|
449
577
|
const robots = await loadRobots(seed, options.respectRobots ?? true);
|
|
@@ -471,7 +599,10 @@ async function crawlWebsite(options) {
|
|
|
471
599
|
planned.push(item.url);
|
|
472
600
|
options.onProgress?.({ type: "fetch", url: item.url, fetched: documents.length, queued: queue.length, maxPages });
|
|
473
601
|
try {
|
|
474
|
-
const fetched = await fetchText(item.url
|
|
602
|
+
const fetched = await fetchText(item.url, {
|
|
603
|
+
allowPrivateNetwork: options.allowPrivateNetwork,
|
|
604
|
+
sameOriginSeed: options.sameOrigin ?? true ? seed : void 0
|
|
605
|
+
});
|
|
475
606
|
const contentType = contentTypeFromHeader(fetched.contentType);
|
|
476
607
|
if (!contentType) {
|
|
477
608
|
skipped += 1;
|
|
@@ -492,7 +623,7 @@ async function crawlWebsite(options) {
|
|
|
492
623
|
for (const link of links) {
|
|
493
624
|
try {
|
|
494
625
|
const next = canonicalizeUrl(link.href, item.url);
|
|
495
|
-
if (!queued.has(next) && shouldVisit(next, seed, options, robots) && queued.size < maxPages * 4) {
|
|
626
|
+
if (!queued.has(next) && shouldVisit(next, seed, options, robots) && (options.allowPrivateNetwork || !await resolvesToPrivateNetwork(next)) && queued.size < maxPages * 4) {
|
|
496
627
|
queued.add(next);
|
|
497
628
|
queue.push({ url: next, depth: item.depth + 1 });
|
|
498
629
|
discovered += 1;
|
|
@@ -510,7 +641,8 @@ async function crawlWebsite(options) {
|
|
|
510
641
|
discovered,
|
|
511
642
|
maxPages
|
|
512
643
|
});
|
|
513
|
-
} catch {
|
|
644
|
+
} catch (error) {
|
|
645
|
+
if (isSecurityRejection(error)) throw error;
|
|
514
646
|
failed += 1;
|
|
515
647
|
options.onProgress?.({ type: "failed", url: item.url, fetched: documents.length, queued: queue.length, maxPages });
|
|
516
648
|
}
|
|
@@ -529,6 +661,7 @@ async function crawlWebsite(options) {
|
|
|
529
661
|
title: options.title,
|
|
530
662
|
sourceName: seed,
|
|
531
663
|
force: options.force,
|
|
664
|
+
dangerouslyAllowUnsafeOutput: options.dangerouslyAllowUnsafeOutput,
|
|
532
665
|
timestamp: options.timestamp
|
|
533
666
|
});
|
|
534
667
|
return { pagesFetched: documents.length, skipped, failed, written, documents };
|
|
@@ -536,9 +669,9 @@ async function crawlWebsite(options) {
|
|
|
536
669
|
|
|
537
670
|
// src/importer.ts
|
|
538
671
|
import fs2 from "fs/promises";
|
|
539
|
-
import
|
|
672
|
+
import path4 from "path";
|
|
540
673
|
function contentTypeFor(file) {
|
|
541
|
-
const ext =
|
|
674
|
+
const ext = path4.extname(file).toLowerCase();
|
|
542
675
|
if (ext === ".md") return "markdown";
|
|
543
676
|
if (ext === ".mdx") return "mdx";
|
|
544
677
|
if (ext === ".html" || ext === ".htm") return "html";
|
|
@@ -551,7 +684,7 @@ async function listFiles(root) {
|
|
|
551
684
|
const files = [];
|
|
552
685
|
async function walk(dir) {
|
|
553
686
|
for (const entry of await fs2.readdir(dir, { withFileTypes: true })) {
|
|
554
|
-
const absolute =
|
|
687
|
+
const absolute = path4.join(dir, entry.name);
|
|
555
688
|
if (entry.isDirectory()) {
|
|
556
689
|
if (![".git", "node_modules", "dist"].includes(entry.name)) await walk(absolute);
|
|
557
690
|
} else if (entry.isFile()) {
|
|
@@ -563,11 +696,11 @@ async function listFiles(root) {
|
|
|
563
696
|
return files.sort();
|
|
564
697
|
}
|
|
565
698
|
async function importLocal(options) {
|
|
566
|
-
const root =
|
|
699
|
+
const root = path4.resolve(options.inputPath);
|
|
567
700
|
const files = await listFiles(root);
|
|
568
701
|
const docs = [];
|
|
569
702
|
for (const file of files) {
|
|
570
|
-
const rel =
|
|
703
|
+
const rel = path4.relative(root, file).split(path4.sep).join("/");
|
|
571
704
|
if (options.include?.length && !matchesAnyPattern(rel, options.include)) continue;
|
|
572
705
|
if (matchesAnyPattern(rel, options.exclude)) continue;
|
|
573
706
|
const contentType = contentTypeFor(file);
|
|
@@ -587,21 +720,25 @@ async function importLocal(options) {
|
|
|
587
720
|
title: options.sourceName,
|
|
588
721
|
sourceName: options.sourceName ?? options.inputPath,
|
|
589
722
|
force: options.force,
|
|
723
|
+
inputPath: root,
|
|
724
|
+
dangerouslyAllowUnsafeOutput: options.dangerouslyAllowUnsafeOutput,
|
|
590
725
|
timestamp: options.timestamp
|
|
591
726
|
});
|
|
592
727
|
return { written, documents: docs };
|
|
593
728
|
}
|
|
594
729
|
|
|
595
730
|
// src/graph.ts
|
|
596
|
-
import
|
|
731
|
+
import path5 from "path";
|
|
597
732
|
function extractInternalLinks(concept) {
|
|
598
733
|
const links = /* @__PURE__ */ new Set();
|
|
599
734
|
for (const match of concept.body.matchAll(/\[[^\]]*]\(([^)\s]+)(?:\s+"[^"]*")?\)/g)) {
|
|
600
735
|
const href = match[1] ?? "";
|
|
601
|
-
if (/^(https?:)?\/\//.test(href) || href.startsWith("mailto:") || href.startsWith("#")) continue;
|
|
602
736
|
const noHash = href.split("#")[0] ?? href;
|
|
603
737
|
if (!noHash) continue;
|
|
604
|
-
|
|
738
|
+
if (/^(https?:)?\/\//i.test(noHash) || /^mailto:/i.test(noHash)) continue;
|
|
739
|
+
if (/^[a-z][a-z0-9+.-]*:/i.test(noHash)) continue;
|
|
740
|
+
const resolved = noHash.startsWith("/") ? path5.posix.normalize(noHash.slice(1)) : path5.posix.normalize(path5.posix.join(path5.posix.dirname(concept.path), noHash));
|
|
741
|
+
if (!resolved || resolved === ".") continue;
|
|
605
742
|
links.add(stripMdExtension(resolved));
|
|
606
743
|
}
|
|
607
744
|
return [...links].sort();
|
|
@@ -627,13 +764,13 @@ function buildGraph(conceptsByAnyKey) {
|
|
|
627
764
|
|
|
628
765
|
// src/reader.ts
|
|
629
766
|
import fs3 from "fs/promises";
|
|
630
|
-
import
|
|
767
|
+
import path6 from "path";
|
|
631
768
|
import matter from "gray-matter";
|
|
632
769
|
async function listMarkdownFiles(dir) {
|
|
633
770
|
const result = [];
|
|
634
771
|
async function walk(current) {
|
|
635
772
|
for (const entry of await fs3.readdir(current, { withFileTypes: true })) {
|
|
636
|
-
const absolute =
|
|
773
|
+
const absolute = path6.join(current, entry.name);
|
|
637
774
|
if (entry.isDirectory()) await walk(absolute);
|
|
638
775
|
else if (entry.isFile() && entry.name.endsWith(".md")) result.push(absolute);
|
|
639
776
|
}
|
|
@@ -648,7 +785,8 @@ function stringArray(value) {
|
|
|
648
785
|
async function readConceptFile(bundleDir, absolutePath) {
|
|
649
786
|
const raw = await fs3.readFile(absolutePath, "utf8");
|
|
650
787
|
const parsed = matter(raw);
|
|
651
|
-
const relPath = toPosixPath(
|
|
788
|
+
const relPath = toPosixPath(path6.relative(bundleDir, absolutePath));
|
|
789
|
+
if (isReservedOkfPath(relPath)) throw new Error(`Reserved OKF file is not a concept: ${relPath}`);
|
|
652
790
|
const id = stripMdExtension(relPath);
|
|
653
791
|
const frontmatter2 = parsed.data;
|
|
654
792
|
return {
|
|
@@ -667,6 +805,8 @@ async function readBundle(bundleDir) {
|
|
|
667
805
|
const files = await listMarkdownFiles(bundleDir);
|
|
668
806
|
const concepts = /* @__PURE__ */ new Map();
|
|
669
807
|
for (const file of files) {
|
|
808
|
+
const relPath = toPosixPath(path6.relative(bundleDir, file));
|
|
809
|
+
if (!isConceptMarkdownPath(relPath)) continue;
|
|
670
810
|
const concept = await readConceptFile(bundleDir, file);
|
|
671
811
|
concepts.set(concept.id, concept);
|
|
672
812
|
concepts.set(concept.path, concept);
|
|
@@ -730,13 +870,13 @@ var BundleSearch = class _BundleSearch {
|
|
|
730
870
|
|
|
731
871
|
// src/validate.ts
|
|
732
872
|
import fs4 from "fs/promises";
|
|
733
|
-
import
|
|
873
|
+
import path7 from "path";
|
|
734
874
|
import matter2 from "gray-matter";
|
|
735
875
|
async function listMarkdownFiles2(dir) {
|
|
736
876
|
const result = [];
|
|
737
877
|
async function walk(current) {
|
|
738
878
|
for (const entry of await fs4.readdir(current, { withFileTypes: true })) {
|
|
739
|
-
const absolute =
|
|
879
|
+
const absolute = path7.join(current, entry.name);
|
|
740
880
|
if (entry.isDirectory()) await walk(absolute);
|
|
741
881
|
else if (entry.isFile() && entry.name.endsWith(".md")) result.push(absolute);
|
|
742
882
|
}
|
|
@@ -747,6 +887,59 @@ async function listMarkdownFiles2(dir) {
|
|
|
747
887
|
function issue(severity, code, message, file) {
|
|
748
888
|
return { severity, code, message, path: file };
|
|
749
889
|
}
|
|
890
|
+
function firstContentLine(content) {
|
|
891
|
+
return content.split(/\r?\n/).map((line) => line.trim()).find(Boolean) ?? "";
|
|
892
|
+
}
|
|
893
|
+
function parseFrontmatter(raw) {
|
|
894
|
+
const parsed = matter2(raw);
|
|
895
|
+
return { data: parsed.data, content: parsed.content };
|
|
896
|
+
}
|
|
897
|
+
function validateIndexFile(raw, rel, issues) {
|
|
898
|
+
let body = raw;
|
|
899
|
+
if (raw.startsWith("---")) {
|
|
900
|
+
if (rel !== "index.md") {
|
|
901
|
+
issues.push(issue("error", "reserved_index_frontmatter", "Only bundle-root index.md may contain okf_version frontmatter.", rel));
|
|
902
|
+
return;
|
|
903
|
+
}
|
|
904
|
+
let parsed;
|
|
905
|
+
try {
|
|
906
|
+
parsed = parseFrontmatter(raw);
|
|
907
|
+
} catch (error) {
|
|
908
|
+
issues.push(issue("error", "malformed_frontmatter", error?.message ?? "Malformed YAML frontmatter.", rel));
|
|
909
|
+
return;
|
|
910
|
+
}
|
|
911
|
+
const keys = Object.keys(parsed.data);
|
|
912
|
+
if (keys.length !== 1 || keys[0] !== "okf_version" || typeof parsed.data.okf_version !== "string") {
|
|
913
|
+
issues.push(issue("error", "reserved_index_frontmatter", "Root index.md frontmatter may contain only string okf_version.", rel));
|
|
914
|
+
}
|
|
915
|
+
body = parsed.content;
|
|
916
|
+
}
|
|
917
|
+
const firstLine = firstContentLine(body);
|
|
918
|
+
if (!firstLine.startsWith("# ")) {
|
|
919
|
+
issues.push(issue("error", "invalid_index_structure", "index.md must be a markdown directory listing headed by a section title.", rel));
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
function validateLogFile(raw, rel, issues) {
|
|
923
|
+
if (raw.startsWith("---")) {
|
|
924
|
+
issues.push(issue("error", "reserved_log_frontmatter", "log.md must not contain YAML frontmatter.", rel));
|
|
925
|
+
return;
|
|
926
|
+
}
|
|
927
|
+
const firstLine = firstContentLine(raw);
|
|
928
|
+
if (!firstLine.startsWith("# ")) {
|
|
929
|
+
issues.push(issue("error", "invalid_log_structure", "log.md must be a markdown update log headed by a title.", rel));
|
|
930
|
+
}
|
|
931
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
932
|
+
const heading = line.match(/^##\s+(.+)$/);
|
|
933
|
+
if (heading && !/^\d{4}-\d{2}-\d{2}\b/.test(heading[1] ?? "")) {
|
|
934
|
+
issues.push(issue("error", "invalid_log_date", "log.md date headings must use YYYY-MM-DD.", rel));
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
function validateReservedFile(raw, rel, issues) {
|
|
939
|
+
const name = path7.posix.basename(rel).toLowerCase();
|
|
940
|
+
if (name === "index.md") validateIndexFile(raw, rel, issues);
|
|
941
|
+
if (name === "log.md") validateLogFile(raw, rel, issues);
|
|
942
|
+
}
|
|
750
943
|
async function validateBundle(bundleDir) {
|
|
751
944
|
const issues = [];
|
|
752
945
|
let files = [];
|
|
@@ -756,13 +949,22 @@ async function validateBundle(bundleDir) {
|
|
|
756
949
|
return {
|
|
757
950
|
valid: false,
|
|
758
951
|
issues: [issue("error", "bundle_unreadable", error?.message ?? "Bundle cannot be read.")],
|
|
759
|
-
conceptCount: 0
|
|
952
|
+
conceptCount: 0,
|
|
953
|
+
reservedFileCount: 0,
|
|
954
|
+
warningCount: 0
|
|
760
955
|
};
|
|
761
956
|
}
|
|
762
|
-
const
|
|
957
|
+
const conceptFiles = files.filter((file) => isConceptMarkdownPath(path7.relative(bundleDir, file).split(path7.sep).join("/")));
|
|
958
|
+
const reservedFiles = files.filter((file) => isReservedOkfPath(path7.relative(bundleDir, file).split(path7.sep).join("/")));
|
|
959
|
+
for (const file of reservedFiles) {
|
|
960
|
+
const rel = path7.relative(bundleDir, file).split(path7.sep).join("/");
|
|
961
|
+
const raw = await fs4.readFile(file, "utf8");
|
|
962
|
+
validateReservedFile(raw, rel, issues);
|
|
963
|
+
}
|
|
763
964
|
for (const file of files) {
|
|
764
|
-
const rel =
|
|
765
|
-
if (
|
|
965
|
+
const rel = path7.relative(bundleDir, file).split(path7.sep).join("/");
|
|
966
|
+
if (!isConceptMarkdownPath(rel)) continue;
|
|
967
|
+
if (rel.includes("..") || path7.isAbsolute(rel)) {
|
|
766
968
|
issues.push(issue("error", "unsafe_path", "Concept path is unsafe.", rel));
|
|
767
969
|
}
|
|
768
970
|
const raw = await fs4.readFile(file, "utf8");
|
|
@@ -783,38 +985,32 @@ async function validateBundle(bundleDir) {
|
|
|
783
985
|
}
|
|
784
986
|
for (const key of ["title", "description", "resource", "timestamp"]) {
|
|
785
987
|
if (data[key] !== void 0 && typeof data[key] !== "string") {
|
|
786
|
-
issues.push(issue("
|
|
988
|
+
issues.push(issue("warning", "bad_field_shape", `${key} should be a string when present.`, rel));
|
|
787
989
|
}
|
|
788
990
|
}
|
|
789
991
|
if (data.tags !== void 0 && (!Array.isArray(data.tags) || data.tags.some((tag) => typeof tag !== "string"))) {
|
|
790
|
-
issues.push(issue("
|
|
791
|
-
}
|
|
792
|
-
if (parsed.content.trim().length === 0) {
|
|
793
|
-
issues.push(issue("error", "empty_concept", "Concept body must not be empty.", rel));
|
|
992
|
+
issues.push(issue("warning", "bad_field_shape", "tags should be an array of strings when present.", rel));
|
|
794
993
|
}
|
|
795
|
-
const id = rel.replace(/\.md$/i, "");
|
|
796
|
-
if (seenIds.has(id)) issues.push(issue("error", "duplicate_concept_id", `Duplicate concept id: ${id}`, rel));
|
|
797
|
-
seenIds.add(id);
|
|
798
994
|
}
|
|
799
995
|
const concepts = await readBundle(bundleDir).catch(() => /* @__PURE__ */ new Map());
|
|
800
996
|
const canonicalIds = new Set([...concepts.values()].map((concept) => concept.id));
|
|
801
997
|
for (const concept of new Map([...concepts.values()].map((concept2) => [concept2.id, concept2])).values()) {
|
|
802
998
|
for (const target of extractInternalLinks(concept)) {
|
|
803
999
|
if (!canonicalIds.has(target)) {
|
|
804
|
-
issues.push(issue("
|
|
1000
|
+
issues.push(issue("warning", "broken_internal_link", `Broken internal link to ${target}.`, concept.path));
|
|
805
1001
|
}
|
|
806
1002
|
}
|
|
807
1003
|
}
|
|
808
|
-
const dirs = new Set(
|
|
1004
|
+
const dirs = new Set(conceptFiles.map((file) => path7.dirname(file)));
|
|
809
1005
|
for (const dir of dirs) {
|
|
810
|
-
const index =
|
|
1006
|
+
const index = path7.join(dir, "index.md");
|
|
811
1007
|
if (!files.includes(index)) {
|
|
812
1008
|
issues.push(
|
|
813
1009
|
issue(
|
|
814
1010
|
"warning",
|
|
815
1011
|
"missing_folder_index",
|
|
816
1012
|
"Folder has concepts but no index.md.",
|
|
817
|
-
|
|
1013
|
+
path7.relative(bundleDir, dir).split(path7.sep).join("/") || "."
|
|
818
1014
|
)
|
|
819
1015
|
);
|
|
820
1016
|
}
|
|
@@ -822,7 +1018,9 @@ async function validateBundle(bundleDir) {
|
|
|
822
1018
|
return {
|
|
823
1019
|
valid: !issues.some((item) => item.severity === "error"),
|
|
824
1020
|
issues,
|
|
825
|
-
conceptCount:
|
|
1021
|
+
conceptCount: conceptFiles.length,
|
|
1022
|
+
reservedFileCount: reservedFiles.length,
|
|
1023
|
+
warningCount: issues.filter((item) => item.severity === "warning").length
|
|
826
1024
|
};
|
|
827
1025
|
}
|
|
828
1026
|
async function inspectBundle(bundleDir) {
|
|
@@ -848,8 +1046,10 @@ async function inspectBundle(bundleDir) {
|
|
|
848
1046
|
const linkCount = [...graph.outbound.values()].reduce((sum, links) => sum + links.length, 0);
|
|
849
1047
|
const validation = await validateBundle(bundleDir);
|
|
850
1048
|
return {
|
|
851
|
-
title:
|
|
1049
|
+
title: path7.basename(bundleDir),
|
|
852
1050
|
conceptCount: concepts.length,
|
|
1051
|
+
reservedFileCount: validation.reservedFileCount,
|
|
1052
|
+
warningCount: validation.warningCount,
|
|
853
1053
|
typeDistribution,
|
|
854
1054
|
tagDistribution,
|
|
855
1055
|
linkCount,
|
|
@@ -991,7 +1191,13 @@ async function createMcpServer(options) {
|
|
|
991
1191
|
}
|
|
992
1192
|
if (request.params.name === "bundle_summary") {
|
|
993
1193
|
const [stats, validation] = await Promise.all([inspectBundle(options.bundleDir), validateBundle(options.bundleDir)]);
|
|
994
|
-
return json({
|
|
1194
|
+
return json({
|
|
1195
|
+
...stats,
|
|
1196
|
+
reservedFileCount: validation.reservedFileCount,
|
|
1197
|
+
warningCount: validation.warningCount,
|
|
1198
|
+
validationStatus: validation.valid ? "valid" : "invalid",
|
|
1199
|
+
validationIssues: validation.issues
|
|
1200
|
+
});
|
|
995
1201
|
}
|
|
996
1202
|
return json({ error: { code: "unknown_tool", message: `Unknown tool: ${request.params.name}` } });
|
|
997
1203
|
} catch (error) {
|
package/dist/cli.js
CHANGED
|
@@ -5,7 +5,7 @@ import {
|
|
|
5
5
|
inspectBundle,
|
|
6
6
|
serveMcpStdio,
|
|
7
7
|
validateBundle
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-QE5W5AJS.js";
|
|
9
9
|
|
|
10
10
|
// src/cli.ts
|
|
11
11
|
import fs from "fs";
|
|
@@ -92,7 +92,7 @@ function printCrawlProgress(event) {
|
|
|
92
92
|
}
|
|
93
93
|
}
|
|
94
94
|
program.name("okfy").description("Turn docs into agent memory with Open Knowledge Format and MCP.").version(readPackageVersion());
|
|
95
|
-
program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("--out <dir>", "Output OKF bundle directory").option("--max-pages <n>", "Maximum pages", (value) => Number(value), 100).option("--max-depth <n>", "Maximum crawl depth", (value) => Number(value), 4).option("--include <pattern>", "Include glob or regex", collect, []).option("--exclude <pattern>", "Exclude glob or regex", collect, []).option("--same-origin", "Stay on same origin", true).option("--no-same-origin", "Allow cross-origin links").option("--respect-robots", "Respect robots.txt", true).option("--no-respect-robots", "Ignore robots.txt").option("--concurrency <n>", "Fetch concurrency", (value) => Number(value), 4).option("--title <name>", "Bundle title").option("--force", "Overwrite output directory", false).option("--dry-run", "List pages that would be crawled", false).option("--allow-private-network", "Allow localhost/private IP crawl targets", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (url, options) => {
|
|
95
|
+
program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("--out <dir>", "Output OKF bundle directory").option("--max-pages <n>", "Maximum pages", (value) => Number(value), 100).option("--max-depth <n>", "Maximum crawl depth", (value) => Number(value), 4).option("--include <pattern>", "Include glob or regex", collect, []).option("--exclude <pattern>", "Exclude glob or regex", collect, []).option("--same-origin", "Stay on same origin", true).option("--no-same-origin", "Allow cross-origin links").option("--respect-robots", "Respect robots.txt", true).option("--no-respect-robots", "Ignore robots.txt").option("--concurrency <n>", "Fetch concurrency", (value) => Number(value), 4).option("--title <name>", "Bundle title").option("--force", "Overwrite output directory", false).option("--dry-run", "List pages that would be crawled", false).option("--allow-private-network", "Allow localhost/private IP crawl targets", false).option("--dangerously-allow-unsafe-output", "Dangerously allow --force to delete otherwise unsafe output paths", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (url, options) => {
|
|
96
96
|
try {
|
|
97
97
|
const result = await crawlWebsite({
|
|
98
98
|
seedUrl: url,
|
|
@@ -109,7 +109,7 @@ program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("
|
|
|
109
109
|
console.log("okfy crawl");
|
|
110
110
|
console.log(`Seed: ${url}`);
|
|
111
111
|
console.log(`Pages: ${result.pagesFetched} fetched, ${result.skipped} skipped, ${result.failed} failed`);
|
|
112
|
-
console.log(`Concepts: ${result.
|
|
112
|
+
console.log(`Concepts: ${result.documents.length} written`);
|
|
113
113
|
console.log(`Output: ${options.out}`);
|
|
114
114
|
console.log("\nNext:");
|
|
115
115
|
console.log(` okfy validate ${options.out}`);
|
|
@@ -119,7 +119,7 @@ program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("
|
|
|
119
119
|
process.exitCode = 1;
|
|
120
120
|
}
|
|
121
121
|
});
|
|
122
|
-
program.command("import").argument("<path>", "Local docs folder or file").requiredOption("--out <dir>", "Output OKF bundle directory").option("--source-name <name>", "Source name").option("--include <glob>", "Include glob", collect, []).option("--exclude <glob>", "Exclude glob", collect, []).option("--force", "Overwrite output directory", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (input, options) => {
|
|
122
|
+
program.command("import").argument("<path>", "Local docs folder or file").requiredOption("--out <dir>", "Output OKF bundle directory").option("--source-name <name>", "Source name").option("--include <glob>", "Include glob", collect, []).option("--exclude <glob>", "Exclude glob", collect, []).option("--force", "Overwrite output directory", false).option("--dangerously-allow-unsafe-output", "Dangerously allow --force to delete otherwise unsafe output paths", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (input, options) => {
|
|
123
123
|
try {
|
|
124
124
|
printStatus(`okfy import: reading ${input}`);
|
|
125
125
|
printStatus(`okfy import: writing bundle to ${options.out}`);
|
|
@@ -131,9 +131,9 @@ program.command("import").argument("<path>", "Local docs folder or file").requir
|
|
|
131
131
|
});
|
|
132
132
|
console.log("okfy import");
|
|
133
133
|
console.log(`Source: ${input}`);
|
|
134
|
-
console.log(`Concepts: ${result.
|
|
134
|
+
console.log(`Concepts: ${result.documents.length} written`);
|
|
135
135
|
console.log(`Output: ${options.out}`);
|
|
136
|
-
printStatus(`okfy import: done, wrote ${result.
|
|
136
|
+
printStatus(`okfy import: done, wrote ${result.documents.length} concepts`);
|
|
137
137
|
} catch (error) {
|
|
138
138
|
console.error(pc.red(error?.message ?? "Import failed."));
|
|
139
139
|
process.exitCode = 1;
|
package/dist/index.d.ts
CHANGED
|
@@ -54,10 +54,14 @@ type ValidationReport = {
|
|
|
54
54
|
valid: boolean;
|
|
55
55
|
issues: ValidationIssue[];
|
|
56
56
|
conceptCount: number;
|
|
57
|
+
reservedFileCount: number;
|
|
58
|
+
warningCount: number;
|
|
57
59
|
};
|
|
58
60
|
type BundleStats = {
|
|
59
61
|
title: string;
|
|
60
62
|
conceptCount: number;
|
|
63
|
+
reservedFileCount: number;
|
|
64
|
+
warningCount: number;
|
|
61
65
|
typeDistribution: Record<string, number>;
|
|
62
66
|
tagDistribution: Record<string, number>;
|
|
63
67
|
linkCount: number;
|
|
@@ -85,6 +89,7 @@ type CrawlOptions = {
|
|
|
85
89
|
force?: boolean;
|
|
86
90
|
dryRun?: boolean;
|
|
87
91
|
allowPrivateNetwork?: boolean;
|
|
92
|
+
dangerouslyAllowUnsafeOutput?: boolean;
|
|
88
93
|
timestamp?: string;
|
|
89
94
|
onProgress?: (event: CrawlProgressEvent) => void;
|
|
90
95
|
};
|
|
@@ -143,6 +148,7 @@ type ImportOptions = {
|
|
|
143
148
|
include?: string[];
|
|
144
149
|
exclude?: string[];
|
|
145
150
|
force?: boolean;
|
|
151
|
+
dangerouslyAllowUnsafeOutput?: boolean;
|
|
146
152
|
timestamp?: string;
|
|
147
153
|
};
|
|
148
154
|
declare function importLocal(options: ImportOptions): Promise<{
|
|
@@ -208,6 +214,8 @@ type WriteBundleOptions = {
|
|
|
208
214
|
title?: string;
|
|
209
215
|
sourceName?: string;
|
|
210
216
|
force?: boolean;
|
|
217
|
+
inputPath?: string;
|
|
218
|
+
dangerouslyAllowUnsafeOutput?: boolean;
|
|
211
219
|
timestamp?: string;
|
|
212
220
|
};
|
|
213
221
|
declare function writeOkfBundle(docs: NormalizedDocument[], options: WriteBundleOptions): Promise<string[]>;
|
package/dist/index.js
CHANGED
package/docs/mcp-clients.md
CHANGED
package/examples/README.md
CHANGED
|
@@ -13,7 +13,7 @@ pnpm okfy import examples/local-markdown --out examples/bundles/okfy-docs --sour
|
|
|
13
13
|
Expected concept count:
|
|
14
14
|
|
|
15
15
|
```text
|
|
16
|
-
|
|
16
|
+
6
|
|
17
17
|
```
|
|
18
18
|
|
|
19
19
|
Expected validation status:
|
|
@@ -35,7 +35,7 @@ Purpose: small curated Stripe Checkout sample for launch demos when live crawlin
|
|
|
35
35
|
Source command:
|
|
36
36
|
|
|
37
37
|
```bash
|
|
38
|
-
pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name "Stripe Checkout
|
|
38
|
+
pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name "Stripe Checkout" --force --stable-timestamps
|
|
39
39
|
```
|
|
40
40
|
|
|
41
41
|
Expected concept count:
|
|
@@ -69,7 +69,7 @@ okfy import ./examples/local-markdown --out ./tmp/okfy-docs --force --stable-tim
|
|
|
69
69
|
Expected concept count:
|
|
70
70
|
|
|
71
71
|
```text
|
|
72
|
-
|
|
72
|
+
6
|
|
73
73
|
```
|
|
74
74
|
|
|
75
75
|
Expected validation status:
|
|
@@ -1,14 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
type: "Folder Index"
|
|
3
|
-
title: "Concepts Index"
|
|
4
|
-
description: "Index for concepts."
|
|
5
|
-
resource: "okfy docs"
|
|
6
|
-
tags:
|
|
7
|
-
- "index"
|
|
8
|
-
timestamp: "2026-06-14T00:00:00.000Z"
|
|
9
|
-
---
|
|
1
|
+
# Concepts
|
|
10
2
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
- [okf-bundle](./okf-bundle.md)
|
|
14
|
-
- [progressive-disclosure](./progressive-disclosure.md)
|
|
3
|
+
* [OKF Bundle Structure](okf-bundle.md) - An Open Knowledge Format bundle is a directory of Markdown files with YAML frontmatter. Minimum valid concept: Useful generated fields include title, description, resource, tags, a
|
|
4
|
+
* [Progressive Disclosure](progressive-disclosure.md) - Progressive disclosure means an agent starts with small previews and only loads full concept content when needed. For okfy, the default pattern is: This keeps prompt context smalle
|
|
@@ -1,14 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
type: "Folder Index"
|
|
3
|
-
title: "Guides Index"
|
|
4
|
-
description: "Index for guides."
|
|
5
|
-
resource: "okfy docs"
|
|
6
|
-
tags:
|
|
7
|
-
- "index"
|
|
8
|
-
timestamp: "2026-06-14T00:00:00.000Z"
|
|
9
|
-
---
|
|
1
|
+
# Guides
|
|
10
2
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
- [import-local-markdown](./import-local-markdown.md)
|
|
14
|
-
- [serve-over-mcp](./serve-over-mcp.md)
|
|
3
|
+
* [Import Local Markdown](import-local-markdown.md) - Use okfy import when docs already live in a local project checkout, wiki export, Obsidian vault, or staticsite source folder. Expected result: The importer preserves headings, code
|
|
4
|
+
* [Serve Over MCP](serve-over-mcp.md) - After generating an OKF bundle, serve it over stdio MCP: Agents should not read the whole bundle first. The efficient flow is: Use searchconcepts for discovery, readconcept for gro
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: "API Reference"
|
|
3
|
+
title: "okfy Local Markdown Fixture"
|
|
4
|
+
description: "This fixture models a small docs folder that can be imported into OKF without network access. Start with Import Local Markdown, then read Serve Over MCP. Key topics: OKF bundle str"
|
|
5
|
+
resource: "index.md"
|
|
6
|
+
tags:
|
|
7
|
+
- "okfy"
|
|
8
|
+
- "local"
|
|
9
|
+
- "fixture"
|
|
10
|
+
timestamp: "2026-06-14T00:00:00.000Z"
|
|
11
|
+
---
|
|
12
|
+
# okfy Local Markdown Fixture
|
|
13
|
+
|
|
14
|
+
This fixture models a small docs folder that can be imported into OKF without network access.
|
|
15
|
+
|
|
16
|
+
Start with [Import Local Markdown](./guides/import-local-markdown.md), then read [Serve Over MCP](./guides/serve-over-mcp.md).
|
|
17
|
+
|
|
18
|
+
Key topics:
|
|
19
|
+
|
|
20
|
+
- [OKF bundle structure](./concepts/okf-bundle.md)
|
|
21
|
+
- [Progressive disclosure](./concepts/progressive-disclosure.md)
|
|
22
|
+
- [MCP tools](./reference/mcp-tools.md)
|
|
@@ -1,22 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
- "fixture"
|
|
10
|
-
timestamp: "2026-06-14T00:00:00.000Z"
|
|
11
|
-
---
|
|
12
|
-
# okfy Local Markdown Fixture
|
|
13
|
-
|
|
14
|
-
This fixture models a small docs folder that can be imported into OKF without network access.
|
|
15
|
-
|
|
16
|
-
Start with [Import Local Markdown](./guides/import-local-markdown.md), then read [Serve Over MCP](./guides/serve-over-mcp.md).
|
|
17
|
-
|
|
18
|
-
Key topics:
|
|
19
|
-
|
|
20
|
-
- [OKF bundle structure](./concepts/okf-bundle.md)
|
|
21
|
-
- [Progressive disclosure](./concepts/progressive-disclosure.md)
|
|
22
|
-
- [MCP tools](./reference/mcp-tools.md)
|
|
1
|
+
# okfy docs
|
|
2
|
+
|
|
3
|
+
* [OKF Bundle Structure](concepts/okf-bundle.md) - An Open Knowledge Format bundle is a directory of Markdown files with YAML frontmatter. Minimum valid concept: Useful generated fields include title, description, resource, tags, a
|
|
4
|
+
* [Progressive Disclosure](concepts/progressive-disclosure.md) - Progressive disclosure means an agent starts with small previews and only loads full concept content when needed. For okfy, the default pattern is: This keeps prompt context smalle
|
|
5
|
+
* [Import Local Markdown](guides/import-local-markdown.md) - Use okfy import when docs already live in a local project checkout, wiki export, Obsidian vault, or staticsite source folder. Expected result: The importer preserves headings, code
|
|
6
|
+
* [Serve Over MCP](guides/serve-over-mcp.md) - After generating an OKF bundle, serve it over stdio MCP: Agents should not read the whole bundle first. The efficient flow is: Use searchconcepts for discovery, readconcept for gro
|
|
7
|
+
* [okfy Local Markdown Fixture](home.md) - This fixture models a small docs folder that can be imported into OKF without network access. Start with Import Local Markdown, then read Serve Over MCP. Key topics: OKF bundle str
|
|
8
|
+
* [MCP Tools](reference/mcp-tools.md) - okfy exposes these readonly MCP tools: | Tool | Purpose | | | | | searchconcepts | Find concept previews by query, type, or tags. | | readconcept | Read one concept body, frontmatt
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"sourceCommand": "pnpm okfy import examples/local-markdown --out examples/bundles/okfy-docs --source-name \"okfy docs\" --force --stable-timestamps",
|
|
3
|
-
"expectedConceptCount":
|
|
3
|
+
"expectedConceptCount": 6,
|
|
4
4
|
"expectedValidationStatus": "valid",
|
|
5
5
|
"suggestedAgentQuestions": [
|
|
6
|
-
"Search
|
|
6
|
+
"Search for crawler security defaults, read the relevant concepts, and cite the source resource.",
|
|
7
7
|
"Read the MCP setup concept and explain the stdio config.",
|
|
8
8
|
"Find importer concepts and list supported input formats."
|
|
9
9
|
]
|
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
type: "Folder Index"
|
|
3
|
-
title: "Reference Index"
|
|
4
|
-
description: "Index for reference."
|
|
5
|
-
resource: "okfy docs"
|
|
6
|
-
tags:
|
|
7
|
-
- "index"
|
|
8
|
-
timestamp: "2026-06-14T00:00:00.000Z"
|
|
9
|
-
---
|
|
1
|
+
# Reference
|
|
10
2
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
- [mcp-tools](./mcp-tools.md)
|
|
3
|
+
* [MCP Tools](mcp-tools.md) - okfy exposes these readonly MCP tools: | Tool | Purpose | | | | | searchconcepts | Find concept previews by query, type, or tags. | | readconcept | Read one concept body, frontmatt
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
type: "API Reference"
|
|
3
|
+
title: "Stripe Checkout"
|
|
4
|
+
description: "Checkout is a prebuilt payment form for accepting cards and other payment methods. This saved fixture mirrors the launch demo shape without requiring network access. Checkout quick"
|
|
5
|
+
resource: "index.html"
|
|
6
|
+
tags:
|
|
7
|
+
- "stripe"
|
|
8
|
+
- "checkout"
|
|
9
|
+
timestamp: "2026-06-14T00:00:00.000Z"
|
|
10
|
+
---
|
|
11
|
+
# Stripe Checkout
|
|
12
|
+
|
|
13
|
+
Checkout is a prebuilt payment form for accepting cards and other payment methods.
|
|
14
|
+
|
|
15
|
+
This saved fixture mirrors the launch demo shape without requiring network access.
|
|
16
|
+
|
|
17
|
+
- [Checkout quickstart](./quickstart.md)
|
|
18
|
+
- [Checkout Sessions API](./sessions.md)
|
|
19
|
+
- [Checkout webhooks](./webhooks.md)
|
|
20
|
+
|
|
21
|
+
Original source: [https://docs.stripe.com/checkout](https://docs.stripe.com/checkout)
|
|
@@ -1,21 +1,6 @@
|
|
|
1
|
-
---
|
|
2
|
-
type: "API Reference"
|
|
3
|
-
title: "Stripe Checkout"
|
|
4
|
-
description: "Checkout is a prebuilt payment form for accepting cards and other payment methods. This saved fixture mirrors the launch demo shape without requiring network access. Checkout quick"
|
|
5
|
-
resource: "index.html"
|
|
6
|
-
tags:
|
|
7
|
-
- "stripe"
|
|
8
|
-
- "checkout"
|
|
9
|
-
timestamp: "2026-06-14T00:00:00.000Z"
|
|
10
|
-
---
|
|
11
1
|
# Stripe Checkout
|
|
12
2
|
|
|
13
|
-
Checkout is a prebuilt payment form for accepting cards and other payment methods.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
- [Checkout quickstart](./quickstart.md)
|
|
18
|
-
- [Checkout Sessions API](./sessions.md)
|
|
19
|
-
- [Checkout webhooks](./webhooks.md)
|
|
20
|
-
|
|
21
|
-
Original source: [https://docs.stripe.com/checkout](https://docs.stripe.com/checkout)
|
|
3
|
+
* [Stripe Checkout](home.md) - Checkout is a prebuilt payment form for accepting cards and other payment methods. This saved fixture mirrors the launch demo shape without requiring network access. Checkout quick
|
|
4
|
+
* [Checkout quickstart](quickstart.md) - Create a server endpoint that creates a Checkout Session, then redirect the customer to the session URL. See Checkout Sessions API and Checkout webhooks. Original source: https://d
|
|
5
|
+
* [Checkout Sessions API](sessions.md) - A Checkout Session represents a customer's session as they pay for onetime purchases or subscriptions. Important parameters include mode, lineitems, successurl, and cancelurl. Star
|
|
6
|
+
* [Checkout webhooks](webhooks.md) - Listen for checkout.session.completed before fulfilling orders or granting access. Webhook handlers should verify signatures and handle retries idempotently. Related: Checkout Sess
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
{
|
|
2
|
-
"sourceCommand": "pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name \"Stripe Checkout
|
|
2
|
+
"sourceCommand": "pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name \"Stripe Checkout\" --force --stable-timestamps",
|
|
3
3
|
"expectedConceptCount": 4,
|
|
4
4
|
"expectedValidationStatus": "valid",
|
|
5
|
-
"sampleNote": "Curated saved-HTML sample used when live crawling is flaky; source URLs are preserved in concept bodies and resource metadata points to the fixture source.",
|
|
6
5
|
"suggestedAgentQuestions": [
|
|
7
6
|
"Search for Checkout Sessions, read the strongest match, and explain required server parameters.",
|
|
8
7
|
"Find webhook-related concepts and summarize fulfillment safety notes.",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"sourceCommand": "okfy import ./examples/local-markdown --out ./tmp/okfy-docs --force --stable-timestamps",
|
|
3
|
-
"expectedConceptCount":
|
|
3
|
+
"expectedConceptCount": 6,
|
|
4
4
|
"expectedValidationStatus": "valid",
|
|
5
5
|
"suggestedAgentQuestions": [
|
|
6
6
|
"Search for import workflow concepts, read the best match, and explain how to convert a local Markdown folder into OKF.",
|