okfy-ai 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # okfy-ai
2
2
 
3
- Turn docs into agent-readable Open Knowledge Format bundles, then serve them to Claude, Codex, Cursor, or any MCP client.
3
+ Turn docs into agent-readable Open Knowledge Format v0.1-conformant bundles, then serve them to Claude, Codex, Cursor, or any MCP client.
4
4
 
5
5
  ## Use With Agents
6
6
 
@@ -146,14 +146,18 @@ docs site or Markdown folder
146
146
  -> source-backed agent answers
147
147
  ```
148
148
 
149
- Each source page or Markdown file becomes one OKF concept in v0.1. Generated bundles are plain files, so they can be opened, reviewed, diffed, committed, and served locally.
149
+ Each non-reserved source page or Markdown file becomes one OKF concept in v0.1. `index.md` and `log.md` are reserved files, not concepts, and generated indexes are plain Markdown. Concept counts, search, graph links, types, tags, and `read_concept` exclude reserved files.
150
+
151
+ Validation errors are limited to OKF conformance: malformed or missing concept frontmatter, missing `type`, or invalid reserved-file structure. Broken internal links and missing indexes are warnings.
150
152
 
151
153
  ## Security Defaults
152
154
 
153
155
  - Crawls respect `robots.txt` by default.
154
156
  - Crawls stay same-origin by default.
155
157
  - Page count, depth, response size, and concurrency are capped.
156
- - Private network targets are rejected by default for URL crawls.
158
+ - Private network URL literals and redirects to private targets are rejected by default for URL crawls.
159
+ - Preflight DNS-resolved private targets are rejected before fetch; fetch-time DNS is not IP-pinned.
160
+ - `--force` refuses unsafe output directories such as `.`, `/`, the home dir, repo root, input path, input parent, and symlink output dirs unless an explicit dangerous override is provided.
157
161
  - HTML and Markdown are treated as text. Scripts are not executed.
158
162
  - MCP tools are read-only in v0.1.
159
163
 
@@ -136,9 +136,24 @@ function descriptionFromMarkdown(markdown) {
136
136
 
137
137
  // src/writer.ts
138
138
  import fs from "fs/promises";
139
+ import os from "os";
140
+ import path3 from "path";
141
+
142
+ // src/okf.ts
139
143
  import path2 from "path";
144
+ var RESERVED_FILENAMES = /* @__PURE__ */ new Set(["index.md", "log.md"]);
145
+ function toOkfPath(input) {
146
+ return input.split(path2.sep).join("/");
147
+ }
148
+ function isReservedOkfPath(input) {
149
+ return RESERVED_FILENAMES.has(path2.posix.basename(toOkfPath(input)).toLowerCase());
150
+ }
151
+ function isConceptMarkdownPath(input) {
152
+ return input.toLowerCase().endsWith(".md") && !isReservedOkfPath(input);
153
+ }
140
154
 
141
155
  // src/util/url.ts
156
+ import dns from "dns/promises";
142
157
  import net from "net";
143
158
  var TRACKING_PARAMS = [/^utm_/i, /^fbclid$/i, /^gclid$/i, /^mc_/i];
144
159
  function canonicalizeUrl(input, base) {
@@ -167,20 +182,63 @@ function isHttpUrl(input) {
167
182
  return false;
168
183
  }
169
184
  }
185
+ function isPrivateIpv4Parts(parts) {
186
+ const [a = 0, b = 0] = parts;
187
+ return a === 0 || a === 10 || a === 127 || a === 100 && b >= 64 && b <= 127 || a === 172 && b >= 16 && b <= 31 || a === 192 && b === 168 || a === 169 && b === 254 || a >= 224;
188
+ }
189
+ function mappedIpv4PartsFromIpv6(host) {
190
+ const dotted = host.match(/^(?:::|0:0:0:0:0:)ffff:(\d{1,3}(?:\.\d{1,3}){3})$/i)?.[1];
191
+ if (dotted) {
192
+ const parts = dotted.split(".").map(Number);
193
+ if (parts.length === 4 && parts.every((part) => Number.isInteger(part) && part >= 0 && part <= 255)) return parts;
194
+ }
195
+ const hex = host.match(/^(?:::|0:0:0:0:0:)ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i);
196
+ if (!hex) return void 0;
197
+ const high = Number.parseInt(hex[1] ?? "", 16);
198
+ const low = Number.parseInt(hex[2] ?? "", 16);
199
+ if (!Number.isInteger(high) || !Number.isInteger(low) || high < 0 || high > 65535 || low < 0 || low > 65535) {
200
+ return void 0;
201
+ }
202
+ return [high >> 8, high & 255, low >> 8, low & 255];
203
+ }
170
204
  function isPrivateNetworkUrl(input) {
171
205
  const url = new URL(input);
172
- const host = url.hostname.toLowerCase();
206
+ const host = url.hostname.toLowerCase().replace(/^\[/, "").replace(/\]$/, "");
173
207
  if (host === "localhost" || host.endsWith(".localhost")) return true;
174
- if (host === "::1" || host.startsWith("fe80:")) return true;
208
+ if (host === "::" || host === "::1" || host.startsWith("fe80:")) return true;
175
209
  const ipKind = net.isIP(host);
176
210
  if (ipKind === 4) {
177
211
  const parts = host.split(".").map(Number);
178
- const [a = 0, b = 0] = parts;
179
- return a === 10 || a === 127 || a === 172 && b >= 16 && b <= 31 || a === 192 && b === 168 || a === 169 && b === 254;
212
+ return isPrivateIpv4Parts(parts);
213
+ }
214
+ if (ipKind === 6) {
215
+ const mappedIpv4Parts = mappedIpv4PartsFromIpv6(host);
216
+ if (mappedIpv4Parts) return isPrivateIpv4Parts(mappedIpv4Parts);
217
+ return host === "::" || host === "::1" || host.startsWith("fc") || host.startsWith("fd") || host.startsWith("fe80:");
180
218
  }
181
- if (ipKind === 6) return host === "::1" || host.startsWith("fc") || host.startsWith("fd");
182
219
  return false;
183
220
  }
221
+ async function resolvesToPrivateNetwork(input) {
222
+ if (isPrivateNetworkUrl(input)) return true;
223
+ const url = new URL(input);
224
+ const host = url.hostname.toLowerCase().replace(/^\[/, "").replace(/\]$/, "");
225
+ if (net.isIP(host)) return false;
226
+ let records;
227
+ try {
228
+ records = await dns.lookup(host, { all: true, verbatim: true });
229
+ } catch {
230
+ return false;
231
+ }
232
+ return records.some((record) => {
233
+ const host2 = record.address.includes(":") ? `[${record.address}]` : record.address;
234
+ return isPrivateNetworkUrl(`${url.protocol}//${host2}`);
235
+ });
236
+ }
237
+ async function assertPublicNetworkUrl(input) {
238
+ if (await resolvesToPrivateNetwork(input)) {
239
+ throw new Error("Private network crawl target rejected. Use --allow-private-network for trusted local fixtures.");
240
+ }
241
+ }
184
242
 
185
243
  // src/writer.ts
186
244
  function yamlScalar(value) {
@@ -216,12 +274,12 @@ function assignOutputPaths(docs) {
216
274
  const used = /* @__PURE__ */ new Set();
217
275
  const result = /* @__PURE__ */ new Map();
218
276
  for (const doc of docs) {
219
- const base = doc.resource ? urlToOutputPath(doc.resource) : ensureMarkdownPath(doc.sourcePath ?? doc.sourceId);
277
+ const base = safeConceptOutputPath(doc.resource ? urlToOutputPath(doc.resource) : ensureMarkdownPath(doc.sourcePath ?? doc.sourceId));
220
278
  let candidate = base;
221
279
  let index = 2;
222
280
  while (used.has(candidate)) {
223
- const parsed = path2.posix.parse(base);
224
- candidate = path2.posix.join(parsed.dir, `${parsed.name}-${index}${parsed.ext}`);
281
+ const parsed = path3.posix.parse(base);
282
+ candidate = path3.posix.join(parsed.dir, `${parsed.name}-${index}${parsed.ext}`);
225
283
  index += 1;
226
284
  }
227
285
  used.add(candidate);
@@ -230,6 +288,12 @@ function assignOutputPaths(docs) {
230
288
  }
231
289
  return result;
232
290
  }
291
+ function safeConceptOutputPath(candidate) {
292
+ if (!isReservedOkfPath(candidate)) return candidate;
293
+ const parsed = path3.posix.parse(candidate);
294
+ const safeName = parsed.name.toLowerCase() === "log" ? "change-log" : parsed.dir ? "overview" : "home";
295
+ return path3.posix.join(parsed.dir, `${safeName}.md`);
296
+ }
233
297
  function rewriteLinks(doc, sourceToOutput) {
234
298
  return doc.markdown.replace(/\[([^\]]*)\]\(([^)\s]+)([^)]*)\)/g, (full, text, href, suffix) => {
235
299
  if (/^(https?:)?\/\//.test(href)) {
@@ -254,7 +318,7 @@ function rewriteLinks(doc, sourceToOutput) {
254
318
  }
255
319
  }
256
320
  if (!href.startsWith("#") && doc.sourcePath) {
257
- const abs = toPosixPath(path2.posix.normalize(path2.posix.join(path2.posix.dirname(doc.sourcePath), href)));
321
+ const abs = toPosixPath(path3.posix.normalize(path3.posix.join(path3.posix.dirname(doc.sourcePath), href)));
258
322
  const noHash = abs.split("#")[0] ?? abs;
259
323
  const target = sourceToOutput.get(noHash);
260
324
  if (target && doc.outputPath) return `[${text}](${relativeMarkdownLink(doc.outputPath, target)}${suffix})`;
@@ -262,11 +326,64 @@ function rewriteLinks(doc, sourceToOutput) {
262
326
  return full;
263
327
  });
264
328
  }
265
- async function ensureCleanOutDir(outDir, force) {
329
+ async function pathExists(target) {
330
+ try {
331
+ await fs.lstat(target);
332
+ return true;
333
+ } catch (error) {
334
+ if (error?.code === "ENOENT") return false;
335
+ throw error;
336
+ }
337
+ }
338
+ async function resolveForSafety(target) {
339
+ const resolved = path3.resolve(target);
340
+ if (await pathExists(resolved)) return fs.realpath(resolved);
341
+ const parent = path3.dirname(resolved);
342
+ const realParent = await fs.realpath(parent);
343
+ return path3.join(realParent, path3.basename(resolved));
344
+ }
345
+ async function findRepoRoot(start) {
346
+ let current = path3.resolve(start);
347
+ while (true) {
348
+ if (await pathExists(path3.join(current, ".git"))) return fs.realpath(current);
349
+ const parent = path3.dirname(current);
350
+ if (parent === current) return void 0;
351
+ current = parent;
352
+ }
353
+ }
354
+ async function assertSafeForceOutDir(outDir, options) {
355
+ if (options.dangerouslyAllowUnsafeOutput) return;
356
+ if (outDir.trim() === "") throw new Error("Unsafe output directory for --force: empty path.");
357
+ const rawResolved = path3.resolve(outDir);
358
+ const existing = await pathExists(rawResolved);
359
+ if (existing) {
360
+ const stat = await fs.lstat(rawResolved);
361
+ if (stat.isSymbolicLink()) {
362
+ throw new Error(`Unsafe output directory for --force: refusing symlink ${outDir}.`);
363
+ }
364
+ }
365
+ const realOutDir = await resolveForSafety(outDir);
366
+ const forbidden = /* @__PURE__ */ new Map([
367
+ [path3.parse(realOutDir).root, "filesystem root"],
368
+ [await fs.realpath(os.homedir()), "home directory"],
369
+ [await fs.realpath(process.cwd()), "current working directory"]
370
+ ]);
371
+ const repoRoot = await findRepoRoot(process.cwd());
372
+ if (repoRoot) forbidden.set(repoRoot, "repository root");
373
+ if (options.inputPath) {
374
+ const inputReal = await resolveForSafety(options.inputPath);
375
+ forbidden.set(inputReal, "input path");
376
+ forbidden.set(path3.dirname(inputReal), "parent of input path");
377
+ }
378
+ const reason = forbidden.get(realOutDir);
379
+ if (reason) throw new Error(`Unsafe output directory for --force: refusing to delete ${reason} (${realOutDir}).`);
380
+ }
381
+ async function ensureCleanOutDir(outDir, options) {
382
+ if (options.force) await assertSafeForceOutDir(outDir, options);
266
383
  try {
267
384
  const entries = await fs.readdir(outDir);
268
385
  if (entries.length > 0) {
269
- if (!force) throw new Error(`Output directory is not empty: ${outDir}. Use --force to overwrite.`);
386
+ if (!options.force) throw new Error(`Output directory is not empty: ${outDir}. Use --force to overwrite.`);
270
387
  await fs.rm(outDir, { recursive: true, force: true });
271
388
  }
272
389
  } catch (error) {
@@ -274,70 +391,57 @@ async function ensureCleanOutDir(outDir, force) {
274
391
  }
275
392
  await fs.mkdir(outDir, { recursive: true });
276
393
  }
394
+ function titleForPath(relPath, fallback) {
395
+ const basename = path3.posix.basename(relPath, ".md");
396
+ return fallback || basename;
397
+ }
398
+ function markdownLink(fromDir, toPath) {
399
+ if (fromDir === ".") return toPath;
400
+ return path3.posix.relative(fromDir, toPath);
401
+ }
402
+ function indexTitle(dir, options) {
403
+ if (dir === ".") return options.title ?? options.sourceName ?? "OKF Bundle";
404
+ const leaf = path3.posix.basename(dir);
405
+ return leaf.split(/[-_\s]+/).filter(Boolean).map((word) => word.slice(0, 1).toUpperCase() + word.slice(1)).join(" ");
406
+ }
407
+ async function writePlainIndex(outDir, dir, concepts, options) {
408
+ const indexPath = dir === "." ? "index.md" : path3.posix.join(dir, "index.md");
409
+ const entries = (dir === "." ? concepts : concepts.filter((concept) => path3.posix.dirname(concept.relPath) === dir)).slice().sort((a, b) => a.relPath.localeCompare(b.relPath));
410
+ const lines = [
411
+ `# ${indexTitle(dir, options)}`,
412
+ "",
413
+ ...entries.map((concept) => `* [${concept.title}](${markdownLink(dir, concept.relPath)}) - ${concept.description}`)
414
+ ];
415
+ await fs.mkdir(path3.dirname(path3.join(outDir, indexPath)), { recursive: true });
416
+ await fs.writeFile(path3.join(outDir, indexPath), `${lines.join("\n").trimEnd()}
417
+ `, "utf8");
418
+ return indexPath;
419
+ }
277
420
  async function writeOkfBundle(docs, options) {
278
421
  if (docs.length === 0) throw new Error("No documents to write.");
279
- await ensureCleanOutDir(options.outDir, options.force);
422
+ await ensureCleanOutDir(options.outDir, options);
280
423
  const timestamp = options.timestamp ?? (/* @__PURE__ */ new Date()).toISOString();
281
424
  const sourceToOutput = assignOutputPaths(docs);
282
425
  const written = [];
426
+ const concepts = [];
283
427
  for (const doc of docs) {
284
428
  const relPath = doc.outputPath ?? "index.md";
285
- const absolute = path2.join(options.outDir, relPath);
286
- await fs.mkdir(path2.dirname(absolute), { recursive: true });
429
+ const absolute = path3.join(options.outDir, relPath);
430
+ await fs.mkdir(path3.dirname(absolute), { recursive: true });
287
431
  const body = withTitle(doc.title, rewriteLinks(doc, sourceToOutput));
288
432
  await fs.writeFile(absolute, `${frontmatter(doc, timestamp)}${body}
289
433
  `, "utf8");
290
434
  written.push(relPath);
435
+ concepts.push({
436
+ relPath,
437
+ title: titleForPath(relPath, doc.title),
438
+ description: descriptionFromMarkdown(doc.markdown)
439
+ });
291
440
  }
292
- if (!written.includes("index.md")) {
293
- const title = options.title ?? options.sourceName ?? "OKF Bundle";
294
- const list = written.sort().map((file) => `- [${file.replace(/\.md$/, "")}](./${file})`).join("\n");
295
- const indexDoc = [
296
- "---",
297
- 'type: "Bundle Index"',
298
- `title: ${yamlScalar(title)}`,
299
- `description: ${yamlScalar(`Index for ${title}.`)}`,
300
- `resource: ${yamlScalar(options.sourceName ?? title)}`,
301
- "tags:",
302
- ' - "index"',
303
- `timestamp: ${yamlScalar(timestamp)}`,
304
- "---",
305
- "",
306
- `# ${title}`,
307
- "",
308
- list,
309
- ""
310
- ].join("\n");
311
- await fs.writeFile(path2.join(options.outDir, "index.md"), indexDoc, "utf8");
312
- written.unshift("index.md");
313
- }
314
- const dirs = [...new Set(written.map((file) => path2.posix.dirname(file)).filter((dir) => dir !== "."))].sort();
441
+ written.push(await writePlainIndex(options.outDir, ".", concepts, options));
442
+ const dirs = [...new Set(concepts.map((concept) => path3.posix.dirname(concept.relPath)).filter((dir) => dir !== "."))].sort();
315
443
  for (const dir of dirs) {
316
- const indexPath = path2.posix.join(dir, "index.md");
317
- if (written.includes(indexPath)) continue;
318
- const children = written.filter((file) => path2.posix.dirname(file) === dir && path2.posix.basename(file) !== "index.md").sort();
319
- if (children.length === 0) continue;
320
- const title = `${dir.split("/").map((segment) => segment.slice(0, 1).toUpperCase() + segment.slice(1)).join(" / ")} Index`;
321
- const list = children.map((file) => `- [${path2.posix.basename(file, ".md")}](./${path2.posix.basename(file)})`).join("\n");
322
- const folderIndex = [
323
- "---",
324
- 'type: "Folder Index"',
325
- `title: ${yamlScalar(title)}`,
326
- `description: ${yamlScalar(`Index for ${dir}.`)}`,
327
- `resource: ${yamlScalar(options.sourceName ?? dir)}`,
328
- "tags:",
329
- ' - "index"',
330
- `timestamp: ${yamlScalar(timestamp)}`,
331
- "---",
332
- "",
333
- `# ${title}`,
334
- "",
335
- list,
336
- ""
337
- ].join("\n");
338
- await fs.mkdir(path2.join(options.outDir, dir), { recursive: true });
339
- await fs.writeFile(path2.join(options.outDir, indexPath), folderIndex, "utf8");
340
- written.push(indexPath);
444
+ written.push(await writePlainIndex(options.outDir, dir, concepts, options));
341
445
  }
342
446
  return written.sort();
343
447
  }
@@ -370,18 +474,40 @@ function matchesAnyPattern(value, patterns) {
370
474
  // src/crawler.ts
371
475
  var USER_AGENT = "okfy/0.1 (+https://github.com/0dust/OKFy)";
372
476
  var MAX_RESPONSE_BYTES = 5 * 1024 * 1024;
373
- async function fetchText(url) {
477
+ function isRedirect(status) {
478
+ return status >= 300 && status < 400;
479
+ }
480
+ function isSecurityRejection(error) {
481
+ const message = error instanceof Error ? error.message : "";
482
+ return message.includes("Private network crawl target rejected") || message.includes("Cross-origin redirect rejected");
483
+ }
484
+ async function fetchWithRedirects(url, options, signal) {
485
+ let current = url;
486
+ for (let redirectCount = 0; redirectCount <= 10; redirectCount += 1) {
487
+ if (!options.allowPrivateNetwork) await assertPublicNetworkUrl(current);
488
+ if (options.sameOriginSeed && !sameOrigin(current, options.sameOriginSeed)) {
489
+ throw new Error(`Cross-origin redirect rejected: ${current}`);
490
+ }
491
+ const response = await fetch(current, {
492
+ signal,
493
+ headers: { "user-agent": USER_AGENT, accept: "text/html,text/markdown,text/plain,*/*" },
494
+ redirect: "manual"
495
+ });
496
+ if (!isRedirect(response.status)) return response;
497
+ const location = response.headers.get("location");
498
+ if (!location) throw new Error(`Redirect missing location for ${current}`);
499
+ current = canonicalizeUrl(location, current);
500
+ }
501
+ throw new Error(`Too many redirects for ${url}`);
502
+ }
503
+ async function fetchText(url, options = {}) {
374
504
  const controller = new AbortController();
375
505
  const timeout = setTimeout(() => controller.abort(), 15e3);
376
506
  try {
377
507
  let lastError;
378
508
  for (let attempt = 0; attempt < 3; attempt += 1) {
379
509
  try {
380
- const response = await fetch(url, {
381
- signal: controller.signal,
382
- headers: { "user-agent": USER_AGENT, accept: "text/html,text/markdown,text/plain,*/*" },
383
- redirect: "follow"
384
- });
510
+ const response = await fetchWithRedirects(url, options, controller.signal);
385
511
  if (!response.ok) {
386
512
  if ((response.status >= 500 || response.status === 429) && attempt < 2) {
387
513
  await new Promise((resolve) => setTimeout(resolve, 250 * 2 ** attempt));
@@ -396,6 +522,7 @@ async function fetchText(url) {
396
522
  return { text, contentType: response.headers.get("content-type") ?? "" };
397
523
  } catch (error) {
398
524
  lastError = error;
525
+ if (isSecurityRejection(error)) throw error;
399
526
  if (attempt < 2) await new Promise((resolve) => setTimeout(resolve, 250 * 2 ** attempt));
400
527
  }
401
528
  }
@@ -408,8 +535,8 @@ async function loadRobots(seedUrl, enabled) {
408
535
  if (!enabled) return void 0;
409
536
  const origin = new URL(seedUrl).origin;
410
537
  try {
411
- const response = await fetch(`${origin}/robots.txt`, { headers: { "user-agent": USER_AGENT } });
412
- const text = response.ok ? await response.text() : "";
538
+ const fetched = await fetchText(`${origin}/robots.txt`, { sameOriginSeed: seedUrl });
539
+ const text = fetched.text;
413
540
  return robotsParser(`${origin}/robots.txt`, text);
414
541
  } catch {
415
542
  return robotsParser(`${origin}/robots.txt`, "");
@@ -444,6 +571,7 @@ async function crawlWebsite(options) {
444
571
  if (!options.allowPrivateNetwork && isPrivateNetworkUrl(seed)) {
445
572
  throw new Error("Private network crawl target rejected. Use --allow-private-network for trusted local fixtures.");
446
573
  }
574
+ if (!options.allowPrivateNetwork) await assertPublicNetworkUrl(seed);
447
575
  const maxPages = options.maxPages ?? 100;
448
576
  const maxDepth = options.maxDepth ?? 4;
449
577
  const robots = await loadRobots(seed, options.respectRobots ?? true);
@@ -471,7 +599,10 @@ async function crawlWebsite(options) {
471
599
  planned.push(item.url);
472
600
  options.onProgress?.({ type: "fetch", url: item.url, fetched: documents.length, queued: queue.length, maxPages });
473
601
  try {
474
- const fetched = await fetchText(item.url);
602
+ const fetched = await fetchText(item.url, {
603
+ allowPrivateNetwork: options.allowPrivateNetwork,
604
+ sameOriginSeed: options.sameOrigin ?? true ? seed : void 0
605
+ });
475
606
  const contentType = contentTypeFromHeader(fetched.contentType);
476
607
  if (!contentType) {
477
608
  skipped += 1;
@@ -492,7 +623,7 @@ async function crawlWebsite(options) {
492
623
  for (const link of links) {
493
624
  try {
494
625
  const next = canonicalizeUrl(link.href, item.url);
495
- if (!queued.has(next) && shouldVisit(next, seed, options, robots) && queued.size < maxPages * 4) {
626
+ if (!queued.has(next) && shouldVisit(next, seed, options, robots) && (options.allowPrivateNetwork || !await resolvesToPrivateNetwork(next)) && queued.size < maxPages * 4) {
496
627
  queued.add(next);
497
628
  queue.push({ url: next, depth: item.depth + 1 });
498
629
  discovered += 1;
@@ -510,7 +641,8 @@ async function crawlWebsite(options) {
510
641
  discovered,
511
642
  maxPages
512
643
  });
513
- } catch {
644
+ } catch (error) {
645
+ if (isSecurityRejection(error)) throw error;
514
646
  failed += 1;
515
647
  options.onProgress?.({ type: "failed", url: item.url, fetched: documents.length, queued: queue.length, maxPages });
516
648
  }
@@ -529,6 +661,7 @@ async function crawlWebsite(options) {
529
661
  title: options.title,
530
662
  sourceName: seed,
531
663
  force: options.force,
664
+ dangerouslyAllowUnsafeOutput: options.dangerouslyAllowUnsafeOutput,
532
665
  timestamp: options.timestamp
533
666
  });
534
667
  return { pagesFetched: documents.length, skipped, failed, written, documents };
@@ -536,9 +669,9 @@ async function crawlWebsite(options) {
536
669
 
537
670
  // src/importer.ts
538
671
  import fs2 from "fs/promises";
539
- import path3 from "path";
672
+ import path4 from "path";
540
673
  function contentTypeFor(file) {
541
- const ext = path3.extname(file).toLowerCase();
674
+ const ext = path4.extname(file).toLowerCase();
542
675
  if (ext === ".md") return "markdown";
543
676
  if (ext === ".mdx") return "mdx";
544
677
  if (ext === ".html" || ext === ".htm") return "html";
@@ -551,7 +684,7 @@ async function listFiles(root) {
551
684
  const files = [];
552
685
  async function walk(dir) {
553
686
  for (const entry of await fs2.readdir(dir, { withFileTypes: true })) {
554
- const absolute = path3.join(dir, entry.name);
687
+ const absolute = path4.join(dir, entry.name);
555
688
  if (entry.isDirectory()) {
556
689
  if (![".git", "node_modules", "dist"].includes(entry.name)) await walk(absolute);
557
690
  } else if (entry.isFile()) {
@@ -563,11 +696,11 @@ async function listFiles(root) {
563
696
  return files.sort();
564
697
  }
565
698
  async function importLocal(options) {
566
- const root = path3.resolve(options.inputPath);
699
+ const root = path4.resolve(options.inputPath);
567
700
  const files = await listFiles(root);
568
701
  const docs = [];
569
702
  for (const file of files) {
570
- const rel = path3.relative(root, file).split(path3.sep).join("/");
703
+ const rel = path4.relative(root, file).split(path4.sep).join("/");
571
704
  if (options.include?.length && !matchesAnyPattern(rel, options.include)) continue;
572
705
  if (matchesAnyPattern(rel, options.exclude)) continue;
573
706
  const contentType = contentTypeFor(file);
@@ -587,21 +720,25 @@ async function importLocal(options) {
587
720
  title: options.sourceName,
588
721
  sourceName: options.sourceName ?? options.inputPath,
589
722
  force: options.force,
723
+ inputPath: root,
724
+ dangerouslyAllowUnsafeOutput: options.dangerouslyAllowUnsafeOutput,
590
725
  timestamp: options.timestamp
591
726
  });
592
727
  return { written, documents: docs };
593
728
  }
594
729
 
595
730
  // src/graph.ts
596
- import path4 from "path";
731
+ import path5 from "path";
597
732
  function extractInternalLinks(concept) {
598
733
  const links = /* @__PURE__ */ new Set();
599
734
  for (const match of concept.body.matchAll(/\[[^\]]*]\(([^)\s]+)(?:\s+"[^"]*")?\)/g)) {
600
735
  const href = match[1] ?? "";
601
- if (/^(https?:)?\/\//.test(href) || href.startsWith("mailto:") || href.startsWith("#")) continue;
602
736
  const noHash = href.split("#")[0] ?? href;
603
737
  if (!noHash) continue;
604
- const resolved = path4.posix.normalize(path4.posix.join(path4.posix.dirname(concept.path), noHash));
738
+ if (/^(https?:)?\/\//i.test(noHash) || /^mailto:/i.test(noHash)) continue;
739
+ if (/^[a-z][a-z0-9+.-]*:/i.test(noHash)) continue;
740
+ const resolved = noHash.startsWith("/") ? path5.posix.normalize(noHash.slice(1)) : path5.posix.normalize(path5.posix.join(path5.posix.dirname(concept.path), noHash));
741
+ if (!resolved || resolved === ".") continue;
605
742
  links.add(stripMdExtension(resolved));
606
743
  }
607
744
  return [...links].sort();
@@ -627,13 +764,13 @@ function buildGraph(conceptsByAnyKey) {
627
764
 
628
765
  // src/reader.ts
629
766
  import fs3 from "fs/promises";
630
- import path5 from "path";
767
+ import path6 from "path";
631
768
  import matter from "gray-matter";
632
769
  async function listMarkdownFiles(dir) {
633
770
  const result = [];
634
771
  async function walk(current) {
635
772
  for (const entry of await fs3.readdir(current, { withFileTypes: true })) {
636
- const absolute = path5.join(current, entry.name);
773
+ const absolute = path6.join(current, entry.name);
637
774
  if (entry.isDirectory()) await walk(absolute);
638
775
  else if (entry.isFile() && entry.name.endsWith(".md")) result.push(absolute);
639
776
  }
@@ -648,7 +785,8 @@ function stringArray(value) {
648
785
  async function readConceptFile(bundleDir, absolutePath) {
649
786
  const raw = await fs3.readFile(absolutePath, "utf8");
650
787
  const parsed = matter(raw);
651
- const relPath = toPosixPath(path5.relative(bundleDir, absolutePath));
788
+ const relPath = toPosixPath(path6.relative(bundleDir, absolutePath));
789
+ if (isReservedOkfPath(relPath)) throw new Error(`Reserved OKF file is not a concept: ${relPath}`);
652
790
  const id = stripMdExtension(relPath);
653
791
  const frontmatter2 = parsed.data;
654
792
  return {
@@ -667,6 +805,8 @@ async function readBundle(bundleDir) {
667
805
  const files = await listMarkdownFiles(bundleDir);
668
806
  const concepts = /* @__PURE__ */ new Map();
669
807
  for (const file of files) {
808
+ const relPath = toPosixPath(path6.relative(bundleDir, file));
809
+ if (!isConceptMarkdownPath(relPath)) continue;
670
810
  const concept = await readConceptFile(bundleDir, file);
671
811
  concepts.set(concept.id, concept);
672
812
  concepts.set(concept.path, concept);
@@ -730,13 +870,13 @@ var BundleSearch = class _BundleSearch {
730
870
 
731
871
  // src/validate.ts
732
872
  import fs4 from "fs/promises";
733
- import path6 from "path";
873
+ import path7 from "path";
734
874
  import matter2 from "gray-matter";
735
875
  async function listMarkdownFiles2(dir) {
736
876
  const result = [];
737
877
  async function walk(current) {
738
878
  for (const entry of await fs4.readdir(current, { withFileTypes: true })) {
739
- const absolute = path6.join(current, entry.name);
879
+ const absolute = path7.join(current, entry.name);
740
880
  if (entry.isDirectory()) await walk(absolute);
741
881
  else if (entry.isFile() && entry.name.endsWith(".md")) result.push(absolute);
742
882
  }
@@ -747,6 +887,59 @@ async function listMarkdownFiles2(dir) {
747
887
  function issue(severity, code, message, file) {
748
888
  return { severity, code, message, path: file };
749
889
  }
890
+ function firstContentLine(content) {
891
+ return content.split(/\r?\n/).map((line) => line.trim()).find(Boolean) ?? "";
892
+ }
893
+ function parseFrontmatter(raw) {
894
+ const parsed = matter2(raw);
895
+ return { data: parsed.data, content: parsed.content };
896
+ }
897
+ function validateIndexFile(raw, rel, issues) {
898
+ let body = raw;
899
+ if (raw.startsWith("---")) {
900
+ if (rel !== "index.md") {
901
+ issues.push(issue("error", "reserved_index_frontmatter", "Only bundle-root index.md may contain okf_version frontmatter.", rel));
902
+ return;
903
+ }
904
+ let parsed;
905
+ try {
906
+ parsed = parseFrontmatter(raw);
907
+ } catch (error) {
908
+ issues.push(issue("error", "malformed_frontmatter", error?.message ?? "Malformed YAML frontmatter.", rel));
909
+ return;
910
+ }
911
+ const keys = Object.keys(parsed.data);
912
+ if (keys.length !== 1 || keys[0] !== "okf_version" || typeof parsed.data.okf_version !== "string") {
913
+ issues.push(issue("error", "reserved_index_frontmatter", "Root index.md frontmatter may contain only string okf_version.", rel));
914
+ }
915
+ body = parsed.content;
916
+ }
917
+ const firstLine = firstContentLine(body);
918
+ if (!firstLine.startsWith("# ")) {
919
+ issues.push(issue("error", "invalid_index_structure", "index.md must be a markdown directory listing headed by a section title.", rel));
920
+ }
921
+ }
922
+ function validateLogFile(raw, rel, issues) {
923
+ if (raw.startsWith("---")) {
924
+ issues.push(issue("error", "reserved_log_frontmatter", "log.md must not contain YAML frontmatter.", rel));
925
+ return;
926
+ }
927
+ const firstLine = firstContentLine(raw);
928
+ if (!firstLine.startsWith("# ")) {
929
+ issues.push(issue("error", "invalid_log_structure", "log.md must be a markdown update log headed by a title.", rel));
930
+ }
931
+ for (const line of raw.split(/\r?\n/)) {
932
+ const heading = line.match(/^##\s+(.+)$/);
933
+ if (heading && !/^\d{4}-\d{2}-\d{2}\b/.test(heading[1] ?? "")) {
934
+ issues.push(issue("error", "invalid_log_date", "log.md date headings must use YYYY-MM-DD.", rel));
935
+ }
936
+ }
937
+ }
938
+ function validateReservedFile(raw, rel, issues) {
939
+ const name = path7.posix.basename(rel).toLowerCase();
940
+ if (name === "index.md") validateIndexFile(raw, rel, issues);
941
+ if (name === "log.md") validateLogFile(raw, rel, issues);
942
+ }
750
943
  async function validateBundle(bundleDir) {
751
944
  const issues = [];
752
945
  let files = [];
@@ -756,13 +949,22 @@ async function validateBundle(bundleDir) {
756
949
  return {
757
950
  valid: false,
758
951
  issues: [issue("error", "bundle_unreadable", error?.message ?? "Bundle cannot be read.")],
759
- conceptCount: 0
952
+ conceptCount: 0,
953
+ reservedFileCount: 0,
954
+ warningCount: 0
760
955
  };
761
956
  }
762
- const seenIds = /* @__PURE__ */ new Set();
957
+ const conceptFiles = files.filter((file) => isConceptMarkdownPath(path7.relative(bundleDir, file).split(path7.sep).join("/")));
958
+ const reservedFiles = files.filter((file) => isReservedOkfPath(path7.relative(bundleDir, file).split(path7.sep).join("/")));
959
+ for (const file of reservedFiles) {
960
+ const rel = path7.relative(bundleDir, file).split(path7.sep).join("/");
961
+ const raw = await fs4.readFile(file, "utf8");
962
+ validateReservedFile(raw, rel, issues);
963
+ }
763
964
  for (const file of files) {
764
- const rel = path6.relative(bundleDir, file).split(path6.sep).join("/");
765
- if (rel.includes("..") || path6.isAbsolute(rel)) {
965
+ const rel = path7.relative(bundleDir, file).split(path7.sep).join("/");
966
+ if (!isConceptMarkdownPath(rel)) continue;
967
+ if (rel.includes("..") || path7.isAbsolute(rel)) {
766
968
  issues.push(issue("error", "unsafe_path", "Concept path is unsafe.", rel));
767
969
  }
768
970
  const raw = await fs4.readFile(file, "utf8");
@@ -783,38 +985,32 @@ async function validateBundle(bundleDir) {
783
985
  }
784
986
  for (const key of ["title", "description", "resource", "timestamp"]) {
785
987
  if (data[key] !== void 0 && typeof data[key] !== "string") {
786
- issues.push(issue("error", "bad_field_shape", `${key} must be a string when present.`, rel));
988
+ issues.push(issue("warning", "bad_field_shape", `${key} should be a string when present.`, rel));
787
989
  }
788
990
  }
789
991
  if (data.tags !== void 0 && (!Array.isArray(data.tags) || data.tags.some((tag) => typeof tag !== "string"))) {
790
- issues.push(issue("error", "bad_field_shape", "tags must be an array of strings when present.", rel));
791
- }
792
- if (parsed.content.trim().length === 0) {
793
- issues.push(issue("error", "empty_concept", "Concept body must not be empty.", rel));
992
+ issues.push(issue("warning", "bad_field_shape", "tags should be an array of strings when present.", rel));
794
993
  }
795
- const id = rel.replace(/\.md$/i, "");
796
- if (seenIds.has(id)) issues.push(issue("error", "duplicate_concept_id", `Duplicate concept id: ${id}`, rel));
797
- seenIds.add(id);
798
994
  }
799
995
  const concepts = await readBundle(bundleDir).catch(() => /* @__PURE__ */ new Map());
800
996
  const canonicalIds = new Set([...concepts.values()].map((concept) => concept.id));
801
997
  for (const concept of new Map([...concepts.values()].map((concept2) => [concept2.id, concept2])).values()) {
802
998
  for (const target of extractInternalLinks(concept)) {
803
999
  if (!canonicalIds.has(target)) {
804
- issues.push(issue("error", "broken_internal_link", `Broken internal link to ${target}.`, concept.path));
1000
+ issues.push(issue("warning", "broken_internal_link", `Broken internal link to ${target}.`, concept.path));
805
1001
  }
806
1002
  }
807
1003
  }
808
- const dirs = new Set(files.map((file) => path6.dirname(file)));
1004
+ const dirs = new Set(conceptFiles.map((file) => path7.dirname(file)));
809
1005
  for (const dir of dirs) {
810
- const index = path6.join(dir, "index.md");
1006
+ const index = path7.join(dir, "index.md");
811
1007
  if (!files.includes(index)) {
812
1008
  issues.push(
813
1009
  issue(
814
1010
  "warning",
815
1011
  "missing_folder_index",
816
1012
  "Folder has concepts but no index.md.",
817
- path6.relative(bundleDir, dir).split(path6.sep).join("/") || "."
1013
+ path7.relative(bundleDir, dir).split(path7.sep).join("/") || "."
818
1014
  )
819
1015
  );
820
1016
  }
@@ -822,7 +1018,9 @@ async function validateBundle(bundleDir) {
822
1018
  return {
823
1019
  valid: !issues.some((item) => item.severity === "error"),
824
1020
  issues,
825
- conceptCount: files.length
1021
+ conceptCount: conceptFiles.length,
1022
+ reservedFileCount: reservedFiles.length,
1023
+ warningCount: issues.filter((item) => item.severity === "warning").length
826
1024
  };
827
1025
  }
828
1026
  async function inspectBundle(bundleDir) {
@@ -848,8 +1046,10 @@ async function inspectBundle(bundleDir) {
848
1046
  const linkCount = [...graph.outbound.values()].reduce((sum, links) => sum + links.length, 0);
849
1047
  const validation = await validateBundle(bundleDir);
850
1048
  return {
851
- title: concepts.find((concept) => concept.id === "index")?.title ?? path6.basename(bundleDir),
1049
+ title: path7.basename(bundleDir),
852
1050
  conceptCount: concepts.length,
1051
+ reservedFileCount: validation.reservedFileCount,
1052
+ warningCount: validation.warningCount,
853
1053
  typeDistribution,
854
1054
  tagDistribution,
855
1055
  linkCount,
@@ -991,7 +1191,13 @@ async function createMcpServer(options) {
991
1191
  }
992
1192
  if (request.params.name === "bundle_summary") {
993
1193
  const [stats, validation] = await Promise.all([inspectBundle(options.bundleDir), validateBundle(options.bundleDir)]);
994
- return json({ ...stats, validationStatus: validation.valid ? "valid" : "invalid", validationIssues: validation.issues });
1194
+ return json({
1195
+ ...stats,
1196
+ reservedFileCount: validation.reservedFileCount,
1197
+ warningCount: validation.warningCount,
1198
+ validationStatus: validation.valid ? "valid" : "invalid",
1199
+ validationIssues: validation.issues
1200
+ });
995
1201
  }
996
1202
  return json({ error: { code: "unknown_tool", message: `Unknown tool: ${request.params.name}` } });
997
1203
  } catch (error) {
package/dist/cli.js CHANGED
@@ -5,7 +5,7 @@ import {
5
5
  inspectBundle,
6
6
  serveMcpStdio,
7
7
  validateBundle
8
- } from "./chunk-6AP7LVJG.js";
8
+ } from "./chunk-QE5W5AJS.js";
9
9
 
10
10
  // src/cli.ts
11
11
  import fs from "fs";
@@ -92,7 +92,7 @@ function printCrawlProgress(event) {
92
92
  }
93
93
  }
94
94
  program.name("okfy").description("Turn docs into agent memory with Open Knowledge Format and MCP.").version(readPackageVersion());
95
- program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("--out <dir>", "Output OKF bundle directory").option("--max-pages <n>", "Maximum pages", (value) => Number(value), 100).option("--max-depth <n>", "Maximum crawl depth", (value) => Number(value), 4).option("--include <pattern>", "Include glob or regex", collect, []).option("--exclude <pattern>", "Exclude glob or regex", collect, []).option("--same-origin", "Stay on same origin", true).option("--no-same-origin", "Allow cross-origin links").option("--respect-robots", "Respect robots.txt", true).option("--no-respect-robots", "Ignore robots.txt").option("--concurrency <n>", "Fetch concurrency", (value) => Number(value), 4).option("--title <name>", "Bundle title").option("--force", "Overwrite output directory", false).option("--dry-run", "List pages that would be crawled", false).option("--allow-private-network", "Allow localhost/private IP crawl targets", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (url, options) => {
95
+ program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("--out <dir>", "Output OKF bundle directory").option("--max-pages <n>", "Maximum pages", (value) => Number(value), 100).option("--max-depth <n>", "Maximum crawl depth", (value) => Number(value), 4).option("--include <pattern>", "Include glob or regex", collect, []).option("--exclude <pattern>", "Exclude glob or regex", collect, []).option("--same-origin", "Stay on same origin", true).option("--no-same-origin", "Allow cross-origin links").option("--respect-robots", "Respect robots.txt", true).option("--no-respect-robots", "Ignore robots.txt").option("--concurrency <n>", "Fetch concurrency", (value) => Number(value), 4).option("--title <name>", "Bundle title").option("--force", "Overwrite output directory", false).option("--dry-run", "List pages that would be crawled", false).option("--allow-private-network", "Allow localhost/private IP crawl targets", false).option("--dangerously-allow-unsafe-output", "Dangerously allow --force to delete otherwise unsafe output paths", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (url, options) => {
96
96
  try {
97
97
  const result = await crawlWebsite({
98
98
  seedUrl: url,
@@ -109,7 +109,7 @@ program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("
109
109
  console.log("okfy crawl");
110
110
  console.log(`Seed: ${url}`);
111
111
  console.log(`Pages: ${result.pagesFetched} fetched, ${result.skipped} skipped, ${result.failed} failed`);
112
- console.log(`Concepts: ${result.written.length} written`);
112
+ console.log(`Concepts: ${result.documents.length} written`);
113
113
  console.log(`Output: ${options.out}`);
114
114
  console.log("\nNext:");
115
115
  console.log(` okfy validate ${options.out}`);
@@ -119,7 +119,7 @@ program.command("crawl").argument("<url>", "Docs URL to crawl").requiredOption("
119
119
  process.exitCode = 1;
120
120
  }
121
121
  });
122
- program.command("import").argument("<path>", "Local docs folder or file").requiredOption("--out <dir>", "Output OKF bundle directory").option("--source-name <name>", "Source name").option("--include <glob>", "Include glob", collect, []).option("--exclude <glob>", "Exclude glob", collect, []).option("--force", "Overwrite output directory", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (input, options) => {
122
+ program.command("import").argument("<path>", "Local docs folder or file").requiredOption("--out <dir>", "Output OKF bundle directory").option("--source-name <name>", "Source name").option("--include <glob>", "Include glob", collect, []).option("--exclude <glob>", "Exclude glob", collect, []).option("--force", "Overwrite output directory", false).option("--dangerously-allow-unsafe-output", "Dangerously allow --force to delete otherwise unsafe output paths", false).option("--stable-timestamps", "Use a deterministic timestamp in generated frontmatter", false).action(async (input, options) => {
123
123
  try {
124
124
  printStatus(`okfy import: reading ${input}`);
125
125
  printStatus(`okfy import: writing bundle to ${options.out}`);
@@ -131,9 +131,9 @@ program.command("import").argument("<path>", "Local docs folder or file").requir
131
131
  });
132
132
  console.log("okfy import");
133
133
  console.log(`Source: ${input}`);
134
- console.log(`Concepts: ${result.written.length} written`);
134
+ console.log(`Concepts: ${result.documents.length} written`);
135
135
  console.log(`Output: ${options.out}`);
136
- printStatus(`okfy import: done, wrote ${result.written.length} concepts`);
136
+ printStatus(`okfy import: done, wrote ${result.documents.length} concepts`);
137
137
  } catch (error) {
138
138
  console.error(pc.red(error?.message ?? "Import failed."));
139
139
  process.exitCode = 1;
package/dist/index.d.ts CHANGED
@@ -54,10 +54,14 @@ type ValidationReport = {
54
54
  valid: boolean;
55
55
  issues: ValidationIssue[];
56
56
  conceptCount: number;
57
+ reservedFileCount: number;
58
+ warningCount: number;
57
59
  };
58
60
  type BundleStats = {
59
61
  title: string;
60
62
  conceptCount: number;
63
+ reservedFileCount: number;
64
+ warningCount: number;
61
65
  typeDistribution: Record<string, number>;
62
66
  tagDistribution: Record<string, number>;
63
67
  linkCount: number;
@@ -85,6 +89,7 @@ type CrawlOptions = {
85
89
  force?: boolean;
86
90
  dryRun?: boolean;
87
91
  allowPrivateNetwork?: boolean;
92
+ dangerouslyAllowUnsafeOutput?: boolean;
88
93
  timestamp?: string;
89
94
  onProgress?: (event: CrawlProgressEvent) => void;
90
95
  };
@@ -143,6 +148,7 @@ type ImportOptions = {
143
148
  include?: string[];
144
149
  exclude?: string[];
145
150
  force?: boolean;
151
+ dangerouslyAllowUnsafeOutput?: boolean;
146
152
  timestamp?: string;
147
153
  };
148
154
  declare function importLocal(options: ImportOptions): Promise<{
@@ -208,6 +214,8 @@ type WriteBundleOptions = {
208
214
  title?: string;
209
215
  sourceName?: string;
210
216
  force?: boolean;
217
+ inputPath?: string;
218
+ dangerouslyAllowUnsafeOutput?: boolean;
211
219
  timestamp?: string;
212
220
  };
213
221
  declare function writeOkfBundle(docs: NormalizedDocument[], options: WriteBundleOptions): Promise<string[]>;
package/dist/index.js CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  serveMcpStdio,
18
18
  validateBundle,
19
19
  writeOkfBundle
20
- } from "./chunk-6AP7LVJG.js";
20
+ } from "./chunk-QE5W5AJS.js";
21
21
  export {
22
22
  BundleSearch,
23
23
  buildGraph,
@@ -25,7 +25,7 @@ okfy inspect ./tmp/okfy-docs
25
25
  Expected output:
26
26
 
27
27
  ```text
28
- Concepts: 9
28
+ Concepts: 6
29
29
  Validation: valid
30
30
  Broken links: 0
31
31
  ```
@@ -13,7 +13,7 @@ pnpm okfy import examples/local-markdown --out examples/bundles/okfy-docs --sour
13
13
  Expected concept count:
14
14
 
15
15
  ```text
16
- 9
16
+ 6
17
17
  ```
18
18
 
19
19
  Expected validation status:
@@ -35,7 +35,7 @@ Purpose: small curated Stripe Checkout sample for launch demos when live crawlin
35
35
  Source command:
36
36
 
37
37
  ```bash
38
- pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name "Stripe Checkout sample" --force --stable-timestamps
38
+ pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name "Stripe Checkout" --force --stable-timestamps
39
39
  ```
40
40
 
41
41
  Expected concept count:
@@ -69,7 +69,7 @@ okfy import ./examples/local-markdown --out ./tmp/okfy-docs --force --stable-tim
69
69
  Expected concept count:
70
70
 
71
71
  ```text
72
- 9
72
+ 6
73
73
  ```
74
74
 
75
75
  Expected validation status:
@@ -1,14 +1,4 @@
1
- ---
2
- type: "Folder Index"
3
- title: "Concepts Index"
4
- description: "Index for concepts."
5
- resource: "okfy docs"
6
- tags:
7
- - "index"
8
- timestamp: "2026-06-14T00:00:00.000Z"
9
- ---
1
+ # Concepts
10
2
 
11
- # Concepts Index
12
-
13
- - [okf-bundle](./okf-bundle.md)
14
- - [progressive-disclosure](./progressive-disclosure.md)
3
+ * [OKF Bundle Structure](okf-bundle.md) - An Open Knowledge Format bundle is a directory of Markdown files with YAML frontmatter. Minimum valid concept: Useful generated fields include title, description, resource, tags, a
4
+ * [Progressive Disclosure](progressive-disclosure.md) - Progressive disclosure means an agent starts with small previews and only loads full concept content when needed. For okfy, the default pattern is: This keeps prompt context smalle
@@ -1,14 +1,4 @@
1
- ---
2
- type: "Folder Index"
3
- title: "Guides Index"
4
- description: "Index for guides."
5
- resource: "okfy docs"
6
- tags:
7
- - "index"
8
- timestamp: "2026-06-14T00:00:00.000Z"
9
- ---
1
+ # Guides
10
2
 
11
- # Guides Index
12
-
13
- - [import-local-markdown](./import-local-markdown.md)
14
- - [serve-over-mcp](./serve-over-mcp.md)
3
+ * [Import Local Markdown](import-local-markdown.md) - Use okfy import when docs already live in a local project checkout, wiki export, Obsidian vault, or staticsite source folder. Expected result: The importer preserves headings, code
4
+ * [Serve Over MCP](serve-over-mcp.md) - After generating an OKF bundle, serve it over stdio MCP: Agents should not read the whole bundle first. The efficient flow is: Use searchconcepts for discovery, readconcept for gro
@@ -0,0 +1,22 @@
1
+ ---
2
+ type: "API Reference"
3
+ title: "okfy Local Markdown Fixture"
4
+ description: "This fixture models a small docs folder that can be imported into OKF without network access. Start with Import Local Markdown, then read Serve Over MCP. Key topics: OKF bundle str"
5
+ resource: "index.md"
6
+ tags:
7
+ - "okfy"
8
+ - "local"
9
+ - "fixture"
10
+ timestamp: "2026-06-14T00:00:00.000Z"
11
+ ---
12
+ # okfy Local Markdown Fixture
13
+
14
+ This fixture models a small docs folder that can be imported into OKF without network access.
15
+
16
+ Start with [Import Local Markdown](./guides/import-local-markdown.md), then read [Serve Over MCP](./guides/serve-over-mcp.md).
17
+
18
+ Key topics:
19
+
20
+ - [OKF bundle structure](./concepts/okf-bundle.md)
21
+ - [Progressive disclosure](./concepts/progressive-disclosure.md)
22
+ - [MCP tools](./reference/mcp-tools.md)
@@ -1,22 +1,8 @@
1
- ---
2
- type: "API Reference"
3
- title: "okfy Local Markdown Fixture"
4
- description: "This fixture models a small docs folder that can be imported into OKF without network access. Start with Import Local Markdown, then read Serve Over MCP. Key topics: OKF bundle str"
5
- resource: "index.md"
6
- tags:
7
- - "okfy"
8
- - "local"
9
- - "fixture"
10
- timestamp: "2026-06-14T00:00:00.000Z"
11
- ---
12
- # okfy Local Markdown Fixture
13
-
14
- This fixture models a small docs folder that can be imported into OKF without network access.
15
-
16
- Start with [Import Local Markdown](./guides/import-local-markdown.md), then read [Serve Over MCP](./guides/serve-over-mcp.md).
17
-
18
- Key topics:
19
-
20
- - [OKF bundle structure](./concepts/okf-bundle.md)
21
- - [Progressive disclosure](./concepts/progressive-disclosure.md)
22
- - [MCP tools](./reference/mcp-tools.md)
1
+ # okfy docs
2
+
3
+ * [OKF Bundle Structure](concepts/okf-bundle.md) - An Open Knowledge Format bundle is a directory of Markdown files with YAML frontmatter. Minimum valid concept: Useful generated fields include title, description, resource, tags, a
4
+ * [Progressive Disclosure](concepts/progressive-disclosure.md) - Progressive disclosure means an agent starts with small previews and only loads full concept content when needed. For okfy, the default pattern is: This keeps prompt context smalle
5
+ * [Import Local Markdown](guides/import-local-markdown.md) - Use okfy import when docs already live in a local project checkout, wiki export, Obsidian vault, or staticsite source folder. Expected result: The importer preserves headings, code
6
+ * [Serve Over MCP](guides/serve-over-mcp.md) - After generating an OKF bundle, serve it over stdio MCP: Agents should not read the whole bundle first. The efficient flow is: Use searchconcepts for discovery, readconcept for gro
7
+ * [okfy Local Markdown Fixture](home.md) - This fixture models a small docs folder that can be imported into OKF without network access. Start with Import Local Markdown, then read Serve Over MCP. Key topics: OKF bundle str
8
+ * [MCP Tools](reference/mcp-tools.md) - okfy exposes these readonly MCP tools: | Tool | Purpose | | | | | searchconcepts | Find concept previews by query, type, or tags. | | readconcept | Read one concept body, frontmatt
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "sourceCommand": "pnpm okfy import examples/local-markdown --out examples/bundles/okfy-docs --source-name \"okfy docs\" --force --stable-timestamps",
3
- "expectedConceptCount": 9,
3
+ "expectedConceptCount": 6,
4
4
  "expectedValidationStatus": "valid",
5
5
  "suggestedAgentQuestions": [
6
- "Search okfy docs for crawler security defaults, then cite source concepts.",
6
+ "Search for crawler security defaults, read the relevant concepts, and cite the source resource.",
7
7
  "Read the MCP setup concept and explain the stdio config.",
8
8
  "Find importer concepts and list supported input formats."
9
9
  ]
@@ -1,13 +1,3 @@
1
- ---
2
- type: "Folder Index"
3
- title: "Reference Index"
4
- description: "Index for reference."
5
- resource: "okfy docs"
6
- tags:
7
- - "index"
8
- timestamp: "2026-06-14T00:00:00.000Z"
9
- ---
1
+ # Reference
10
2
 
11
- # Reference Index
12
-
13
- - [mcp-tools](./mcp-tools.md)
3
+ * [MCP Tools](mcp-tools.md) - okfy exposes these readonly MCP tools: | Tool | Purpose | | | | | searchconcepts | Find concept previews by query, type, or tags. | | readconcept | Read one concept body, frontmatt
@@ -0,0 +1,21 @@
1
+ ---
2
+ type: "API Reference"
3
+ title: "Stripe Checkout"
4
+ description: "Checkout is a prebuilt payment form for accepting cards and other payment methods. This saved fixture mirrors the launch demo shape without requiring network access. Checkout quick"
5
+ resource: "index.html"
6
+ tags:
7
+ - "stripe"
8
+ - "checkout"
9
+ timestamp: "2026-06-14T00:00:00.000Z"
10
+ ---
11
+ # Stripe Checkout
12
+
13
+ Checkout is a prebuilt payment form for accepting cards and other payment methods.
14
+
15
+ This saved fixture mirrors the launch demo shape without requiring network access.
16
+
17
+ - [Checkout quickstart](./quickstart.md)
18
+ - [Checkout Sessions API](./sessions.md)
19
+ - [Checkout webhooks](./webhooks.md)
20
+
21
+ Original source: [https://docs.stripe.com/checkout](https://docs.stripe.com/checkout)
@@ -1,21 +1,6 @@
1
- ---
2
- type: "API Reference"
3
- title: "Stripe Checkout"
4
- description: "Checkout is a prebuilt payment form for accepting cards and other payment methods. This saved fixture mirrors the launch demo shape without requiring network access. Checkout quick"
5
- resource: "index.html"
6
- tags:
7
- - "stripe"
8
- - "checkout"
9
- timestamp: "2026-06-14T00:00:00.000Z"
10
- ---
11
1
  # Stripe Checkout
12
2
 
13
- Checkout is a prebuilt payment form for accepting cards and other payment methods.
14
-
15
- This saved fixture mirrors the launch demo shape without requiring network access.
16
-
17
- - [Checkout quickstart](./quickstart.md)
18
- - [Checkout Sessions API](./sessions.md)
19
- - [Checkout webhooks](./webhooks.md)
20
-
21
- Original source: [https://docs.stripe.com/checkout](https://docs.stripe.com/checkout)
3
+ * [Stripe Checkout](home.md) - Checkout is a prebuilt payment form for accepting cards and other payment methods. This saved fixture mirrors the launch demo shape without requiring network access. Checkout quick
4
+ * [Checkout quickstart](quickstart.md) - Create a server endpoint that creates a Checkout Session, then redirect the customer to the session URL. See Checkout Sessions API and Checkout webhooks. Original source: https://d
5
+ * [Checkout Sessions API](sessions.md) - A Checkout Session represents a customer's session as they pay for onetime purchases or subscriptions. Important parameters include mode, lineitems, successurl, and cancelurl. Star
6
+ * [Checkout webhooks](webhooks.md) - Listen for checkout.session.completed before fulfilling orders or granting access. Webhook handlers should verify signatures and handle retries idempotently. Related: Checkout Sess
@@ -1,8 +1,7 @@
1
1
  {
2
- "sourceCommand": "pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name \"Stripe Checkout sample\" --force --stable-timestamps",
2
+ "sourceCommand": "pnpm okfy import test-fixtures/stripe-checkout-html --out examples/bundles/stripe-checkout-small --source-name \"Stripe Checkout\" --force --stable-timestamps",
3
3
  "expectedConceptCount": 4,
4
4
  "expectedValidationStatus": "valid",
5
- "sampleNote": "Curated saved-HTML sample used when live crawling is flaky; source URLs are preserved in concept bodies and resource metadata points to the fixture source.",
6
5
  "suggestedAgentQuestions": [
7
6
  "Search for Checkout Sessions, read the strongest match, and explain required server parameters.",
8
7
  "Find webhook-related concepts and summarize fulfillment safety notes.",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "sourceCommand": "okfy import ./examples/local-markdown --out ./tmp/okfy-docs --force --stable-timestamps",
3
- "expectedConceptCount": 9,
3
+ "expectedConceptCount": 6,
4
4
  "expectedValidationStatus": "valid",
5
5
  "suggestedAgentQuestions": [
6
6
  "Search for import workflow concepts, read the best match, and explain how to convert a local Markdown folder into OKF.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "okfy-ai",
3
- "version": "0.1.3",
3
+ "version": "0.1.4",
4
4
  "description": "Convert docs into Open Knowledge Format bundles and serve them to MCP agents.",
5
5
  "type": "module",
6
6
  "bin": {