@astrofoundry/grimoire 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +148 -0
  2. package/dist/apikey.d.ts +5 -0
  3. package/dist/apikey.d.ts.map +1 -0
  4. package/dist/apikey.js +85 -0
  5. package/dist/apikey.js.map +1 -0
  6. package/dist/chunker.d.ts +7 -0
  7. package/dist/chunker.d.ts.map +1 -0
  8. package/dist/chunker.js +153 -0
  9. package/dist/chunker.js.map +1 -0
  10. package/dist/cli.d.ts +3 -0
  11. package/dist/cli.d.ts.map +1 -0
  12. package/dist/cli.js +496 -0
  13. package/dist/cli.js.map +1 -0
  14. package/dist/config.d.ts +18 -0
  15. package/dist/config.d.ts.map +1 -0
  16. package/dist/config.js +76 -0
  17. package/dist/config.js.map +1 -0
  18. package/dist/consumer-config.d.ts +11 -0
  19. package/dist/consumer-config.d.ts.map +1 -0
  20. package/dist/consumer-config.js +58 -0
  21. package/dist/consumer-config.js.map +1 -0
  22. package/dist/consumer.d.ts +8 -0
  23. package/dist/consumer.d.ts.map +1 -0
  24. package/dist/consumer.js +71 -0
  25. package/dist/consumer.js.map +1 -0
  26. package/dist/converter.d.ts +12 -0
  27. package/dist/converter.d.ts.map +1 -0
  28. package/dist/converter.js +95 -0
  29. package/dist/converter.js.map +1 -0
  30. package/dist/embedder.d.ts +3 -0
  31. package/dist/embedder.d.ts.map +1 -0
  32. package/dist/embedder.js +38 -0
  33. package/dist/embedder.js.map +1 -0
  34. package/dist/format.d.ts +5 -0
  35. package/dist/format.d.ts.map +1 -0
  36. package/dist/format.js +6 -0
  37. package/dist/format.js.map +1 -0
  38. package/dist/reranker.d.ts +6 -0
  39. package/dist/reranker.d.ts.map +1 -0
  40. package/dist/reranker.js +21 -0
  41. package/dist/reranker.js.map +1 -0
  42. package/dist/scraper.d.ts +9 -0
  43. package/dist/scraper.d.ts.map +1 -0
  44. package/dist/scraper.js +77 -0
  45. package/dist/scraper.js.map +1 -0
  46. package/dist/search.d.ts +8 -0
  47. package/dist/search.d.ts.map +1 -0
  48. package/dist/search.js +43 -0
  49. package/dist/search.js.map +1 -0
  50. package/dist/store.d.ts +11 -0
  51. package/dist/store.d.ts.map +1 -0
  52. package/dist/store.js +102 -0
  53. package/dist/store.js.map +1 -0
  54. package/dist/types.d.ts +25 -0
  55. package/dist/types.d.ts.map +1 -0
  56. package/dist/types.js +2 -0
  57. package/dist/types.js.map +1 -0
  58. package/package.json +47 -0
package/README.md ADDED
@@ -0,0 +1,148 @@
1
+ # grimoire
2
+
3
+ Documentation RAG System — scrape docs, embed, search with reranking.
4
+
5
+ ## Consumer Setup
6
+
7
+ ```bash
8
+ npm install -g @astrofoundry/grimoire
9
+ grimoire init
10
+ # Enter API URL and API key (provided by admin)
11
+ grimoire search "how to query firestore"
12
+ ```
13
+
14
+ ## Admin Setup
15
+
16
+ ```bash
17
+ pnpm install
18
+ pnpm build
19
+ pnpm link --global
20
+ ```
21
+
22
+ ### Firebase / GCP
23
+
24
+ ```bash
25
+ # Authenticate for Firestore access (grimoire-docs project)
26
+ gcloud auth application-default login --project=grimoire-docs
27
+ ```
28
+
29
+ ### Vector indexes (one-time, before first search)
30
+
31
+ ```bash
32
+ gcloud firestore indexes composite create \
33
+ --collection-group=grimoire_chunks \
34
+ --query-scope=COLLECTION \
35
+ --field-config='field-path=embedding,vector-config={"dimension":"768","flat":{}}' \
36
+ --database="(default)" \
37
+ --project=grimoire-docs
38
+
39
+ gcloud firestore indexes composite create \
40
+ --collection-group=grimoire_chunks \
41
+ --query-scope=COLLECTION \
42
+ --field-config='field-path=source,order=ASCENDING' \
43
+ --field-config='field-path=embedding,vector-config={"dimension":"768","flat":{}}' \
44
+ --database="(default)" \
45
+ --project=grimoire-docs
46
+ ```
47
+
48
+ ## Commands
49
+
50
+ ```bash
51
+ # Add a documentation source (interactive)
52
+ grimoire add <name> --url <start_url>
53
+
54
+ # Refresh a source (scrape → convert → chunk → embed → store)
55
+ grimoire refresh <source>
56
+
57
+ # Full refresh (purge all data, re-scrape everything)
58
+ grimoire refresh <source> --full
59
+
60
+ # Re-run from cached HTML (skip scraping)
61
+ grimoire refresh <source> --from-raw
62
+
63
+ # Re-store from cached embeddings (skip scraping + embedding)
64
+ grimoire refresh <source> --from-store
65
+
66
+ # Override concurrency (default: 10)
67
+ grimoire refresh <source> --concurrency 20
68
+
69
+ # Refresh all sources
70
+ grimoire refresh --all
71
+
72
+ # Search across all sources
73
+ grimoire search "<query>"
74
+
75
+ # Search within a specific source
76
+ grimoire search "<query>" --source <name>
77
+
78
+ # List all configured sources
79
+ grimoire list
80
+
81
+ # Show statistics
82
+ grimoire stats
83
+
84
+ # Export source as JSON
85
+ grimoire export <source>
86
+
87
+ # API key management (admin only)
88
+ grimoire apikey create <name>
89
+ grimoire apikey list
90
+ grimoire apikey revoke <name>
91
+ ```
92
+
93
+ ## Configuration
94
+
95
+ Sources are defined in `config/sources.yaml`. Each source needs site-specific cleanup config.
96
+
97
+ ```yaml
98
+ sources:
99
+ my-source:
100
+ name: My Docs # Display name
101
+ start_url: https://example.com/docs
102
+ nav_selector: nav # CSS selector for navigation element
103
+ content_selector: article # CSS selector for main content
104
+ include_patterns: # URL patterns to include
105
+ - /docs
106
+ exclude_patterns: # URL patterns to exclude (optional)
107
+ - /docs/legacy
108
+ remove_selectors: # CSS selectors to strip from content (site-specific)
109
+ - footer
110
+ - nav
111
+ - .sidebar
112
+ remove_text_patterns: # Regex patterns to strip from markdown (site-specific)
113
+ - "^Cookie notice.*$"
114
+ concurrency: 10 # Parallel browser tabs (default: 10)
115
+ rate_limit_ms: 1000 # Delay between requests (optional)
116
+ ```
117
+
118
+ The converter only strips `style`, `script`, `noscript`, `iframe`, `svg` by default. All other cleanup (nav, footer, banners, site-specific UI elements) must be configured per source via `remove_selectors` and `remove_text_patterns`.
119
+
120
+ See `config/sources.yaml` for the Firebase Firestore example with full cleanup config.
121
+
122
+ ## Environment Variables
123
+
124
+ Set in `.env` at project root (auto-loaded by CLI):
125
+
126
+ ```bash
127
+ GOOGLE_CLOUD_PROJECT=grimoire-docs # Firebase/GCP project ID
128
+ GEMINI_API_KEY=... # Google Gemini API key
129
+ RERANKER_URL=... # llama-cpp reranker endpoint
130
+ ```
131
+
132
+ ## Releasing
133
+
134
+ ```bash
135
+ pnpm release:patch # bump, commit, tag, push → GH Actions deploys functions + publishes npm
136
+ pnpm release:minor
137
+ pnpm release:major
138
+ ```
139
+
140
+ ## Development
141
+
142
+ ```bash
143
+ pnpm test # Run tests
144
+ pnpm lint # ESLint
145
+ pnpm check # Typecheck + lint + test
146
+ pnpm build # Compile TypeScript
147
+ pnpm build:watch # Watch mode
148
+ ```
@@ -0,0 +1,5 @@
1
+ export declare function createApiKey(name: string): Promise<string>;
2
+ export declare function listApiKeys(): Promise<void>;
3
+ export declare function revokeApiKey(name: string): Promise<void>;
4
+ export declare function cmdApiKey(): Promise<void>;
5
+ //# sourceMappingURL=apikey.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"apikey.d.ts","sourceRoot":"","sources":["../src/apikey.ts"],"names":[],"mappings":"AA2BA,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAYhE;AAED,wBAAsB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC,CAiBjD;AAED,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAY9D;AAED,wBAAsB,SAAS,IAAI,OAAO,CAAC,IAAI,CAAC,CAyB/C"}
package/dist/apikey.js ADDED
@@ -0,0 +1,85 @@
1
+ import { randomBytes, createHash } from "node:crypto";
2
+ import { getFirestore, } from "firebase-admin/firestore";
3
+ import { initializeApp, applicationDefault } from "firebase-admin/app";
4
+ import { bold } from "./format.js";
5
+ let app;
6
+ let db;
7
+ function getDb() {
8
+ if (!db) {
9
+ app = app ?? initializeApp({ credential: applicationDefault() });
10
+ db = getFirestore(app);
11
+ }
12
+ return db;
13
+ }
14
+ function hashKey(key) {
15
+ return createHash("sha256").update(key).digest("hex");
16
+ }
17
+ function apiKeysCol() {
18
+ return getDb().collection("grimoire_api_keys");
19
+ }
20
+ export async function createApiKey(name) {
21
+ const raw = `grim_${randomBytes(32).toString("base64url")}`;
22
+ const hash = hashKey(raw);
23
+ await apiKeysCol().doc(hash).set({
24
+ name,
25
+ created_at: new Date().toISOString(),
26
+ last_used_at: null,
27
+ active: true,
28
+ });
29
+ return raw;
30
+ }
31
+ export async function listApiKeys() {
32
+ const snapshot = await apiKeysCol().get();
33
+ if (snapshot.empty) {
34
+ console.log("No API keys found.");
35
+ return;
36
+ }
37
+ console.log("\nAPI Keys:\n");
38
+ for (const doc of snapshot.docs) {
39
+ const data = doc.data();
40
+ const status = data.active ? "active" : "revoked";
41
+ const lastUsed = data.last_used_at ?? "never";
42
+ console.log(` ${bold(data.name)} (${status})`);
43
+ console.log(` Created: ${data.created_at}`);
44
+ console.log(` Last used: ${lastUsed}`);
45
+ }
46
+ }
47
+ export async function revokeApiKey(name) {
48
+ const snapshot = await apiKeysCol().where("name", "==", name).where("active", "==", true).get();
49
+ if (snapshot.empty) {
50
+ throw new Error(`No active API key found with name "${name}".`);
51
+ }
52
+ for (const doc of snapshot.docs) {
53
+ await doc.ref.update({ active: false });
54
+ }
55
+ console.log(`API key "${name}" revoked.`);
56
+ }
57
+ export async function cmdApiKey() {
58
+ const subcommand = process.argv[3];
59
+ const arg = process.argv[4];
60
+ if (subcommand === "create") {
61
+ if (!arg) {
62
+ console.error("Usage: grimoire apikey create <name>");
63
+ process.exit(1);
64
+ }
65
+ const key = await createApiKey(arg);
66
+ console.log(`\nAPI key created for "${arg}":\n`);
67
+ console.log(` ${key}\n`);
68
+ console.log("Save this key — it will not be shown again.");
69
+ }
70
+ else if (subcommand === "list") {
71
+ await listApiKeys();
72
+ }
73
+ else if (subcommand === "revoke") {
74
+ if (!arg) {
75
+ console.error("Usage: grimoire apikey revoke <name>");
76
+ process.exit(1);
77
+ }
78
+ await revokeApiKey(arg);
79
+ }
80
+ else {
81
+ console.error("Usage: grimoire apikey <create|list|revoke> [name]");
82
+ process.exit(1);
83
+ }
84
+ }
85
+ //# sourceMappingURL=apikey.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"apikey.js","sourceRoot":"","sources":["../src/apikey.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACtD,OAAO,EACL,YAAY,GAEb,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAY,MAAM,oBAAoB,CAAC;AACjF,OAAO,EAAE,IAAI,EAAE,MAAM,aAAa,CAAC;AAEnC,IAAI,GAAoB,CAAC;AACzB,IAAI,EAAyB,CAAC;AAE9B,SAAS,KAAK;IACZ,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,GAAG,GAAG,GAAG,IAAI,aAAa,CAAC,EAAE,UAAU,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC;QACjE,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,OAAO,CAAC,GAAW;IAC1B,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,UAAU;IACjB,OAAO,KAAK,EAAE,CAAC,UAAU,CAAC,mBAAmB,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC7C,MAAM,GAAG,GAAG,QAAQ,WAAW,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;IAC5D,MAAM,IAAI,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;IAE1B,MAAM,UAAU,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC;QAC/B,IAAI;QACJ,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACpC,YAAY,EAAE,IAAI;QAClB,MAAM,EAAE,IAAI;KACb,CAAC,CAAC;IAEH,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,MAAM,QAAQ,GAAG,MAAM,UAAU,EAAE,CAAC,GAAG,EAAE,CAAC;IAE1C,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAC7B,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QACxB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC;QAClD,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,IAAI,OAAO,CAAC;QAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,CAAC,UAAU,EAAE,CAAC,CAAC;QAC/C,OAAO,CAAC,GAAG,CAAC,kBAAkB,QAAQ,EAAE,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC7C,MAAM,QAAQ,GAAG,MAAM,UAAU,EAAE,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;IAEhG,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,IAAI,CAAC,CAAC;IAClE,CAAC;IAED,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChC,MAAM,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1C,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,YAAY,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE5B,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;QAC5B,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;YACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,0BAA0B,GAAG,MAAM,CAAC,CAAC;QACjD,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;QAC1B,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;IAC7D,CAAC;SAAM,IAAI,UAAU,KAAK,MAAM,EAAE,CAAC;QACjC,MAAM,WAAW,EAAE,CAAC;IACtB,CAAC;SAAM,IAAI,UAAU,KAAK,QAAQ,EAAE,CAAC;QACnC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;YACtD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QACD,MAAM,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,oDAAoD,CAAC,CAAC;QACpE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC"}
@@ -0,0 +1,7 @@
1
+ import type { Chunk } from "./types.js";
2
+ export type { Chunk };
3
+ export declare function estimateTokens(text: string): number;
4
+ export declare function slugifyHeading(heading: string): string;
5
+ export declare function buildChunkId(source: string, url: string, headingSlug: string, index?: number): string;
6
+ export declare function chunkMarkdown(markdown: string, source: string, url: string, title: string): Chunk[];
7
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAExC,YAAY,EAAE,KAAK,EAAE,CAAC;AAItB,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAOtD;AAED,wBAAgB,YAAY,CAC1B,MAAM,EAAE,MAAM,EACd,GAAG,EAAE,MAAM,EACX,WAAW,EAAE,MAAM,EACnB,KAAK,CAAC,EAAE,MAAM,GACb,MAAM,CAIR;AA+FD,wBAAgB,aAAa,CAC3B,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,GACZ,KAAK,EAAE,CAkET"}
@@ -0,0 +1,153 @@
1
+ import { slugifyUrl } from "./scraper.js";
2
+ const MAX_TOKENS = 500;
3
+ export function estimateTokens(text) {
4
+ return Math.ceil(text.length / 4);
5
+ }
6
+ export function slugifyHeading(heading) {
7
+ return heading
8
+ .toLowerCase()
9
+ .replace(/[^a-z0-9\s-]/g, "")
10
+ .replace(/\s+/g, "-")
11
+ .replace(/-+/g, "-")
12
+ .replace(/^-|-$/g, "");
13
+ }
14
+ export function buildChunkId(source, url, headingSlug, index) {
15
+ const urlSlug = slugifyUrl(url);
16
+ const base = `${source}::${urlSlug}::${headingSlug}`;
17
+ return index !== undefined ? `${base}-${index}` : base;
18
+ }
19
+ function parseHeadingSections(markdown) {
20
+ const lines = markdown.split("\n");
21
+ const sections = [];
22
+ const headingStack = [];
23
+ const levelStack = [];
24
+ let currentSection = {
25
+ level: 0,
26
+ heading: "",
27
+ headingPath: [],
28
+ lines: [],
29
+ };
30
+ for (const line of lines) {
31
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
32
+ if (headingMatch) {
33
+ if (currentSection.lines.length > 0 || currentSection.heading !== "") {
34
+ sections.push(currentSection);
35
+ }
36
+ const level = headingMatch[1].length;
37
+ const heading = headingMatch[2].trim();
38
+ while (levelStack.length > 0 && levelStack[levelStack.length - 1] >= level) {
39
+ levelStack.pop();
40
+ headingStack.pop();
41
+ }
42
+ headingStack.push(heading);
43
+ levelStack.push(level);
44
+ currentSection = {
45
+ level,
46
+ heading,
47
+ headingPath: [...headingStack],
48
+ lines: [],
49
+ };
50
+ }
51
+ else {
52
+ currentSection.lines.push(line);
53
+ }
54
+ }
55
+ if (currentSection.lines.length > 0 || currentSection.heading !== "") {
56
+ sections.push(currentSection);
57
+ }
58
+ return sections;
59
+ }
60
+ function splitAtParagraphBoundaries(text, maxTokens) {
61
+ const paragraphs = text.split(/\n\n+/);
62
+ const parts = [];
63
+ let current = [];
64
+ let currentTokens = 0;
65
+ for (const para of paragraphs) {
66
+ const paraTokens = estimateTokens(para);
67
+ if (currentTokens + paraTokens > maxTokens && current.length > 0) {
68
+ parts.push(current.join("\n\n"));
69
+ current = [para];
70
+ currentTokens = paraTokens;
71
+ }
72
+ else {
73
+ current.push(para);
74
+ currentTokens += paraTokens;
75
+ }
76
+ }
77
+ if (current.length > 0) {
78
+ parts.push(current.join("\n\n"));
79
+ }
80
+ return parts;
81
+ }
82
+ function stripFrontmatter(markdown) {
83
+ if (markdown.startsWith("---")) {
84
+ const endIndex = markdown.indexOf("---", 3);
85
+ if (endIndex !== -1) {
86
+ return markdown.slice(endIndex + 3).trim();
87
+ }
88
+ }
89
+ return markdown;
90
+ }
91
+ export function chunkMarkdown(markdown, source, url, title) {
92
+ const stripped = stripFrontmatter(markdown);
93
+ const sections = parseHeadingSections(stripped);
94
+ const chunks = [];
95
+ const usedIds = new Set();
96
+ function uniqueId(baseSlug) {
97
+ let id = buildChunkId(source, url, baseSlug);
98
+ if (!usedIds.has(id)) {
99
+ usedIds.add(id);
100
+ return id;
101
+ }
102
+ let counter = 1;
103
+ while (usedIds.has(buildChunkId(source, url, baseSlug, counter))) {
104
+ counter++;
105
+ }
106
+ id = buildChunkId(source, url, baseSlug, counter);
107
+ usedIds.add(id);
108
+ return id;
109
+ }
110
+ for (const section of sections) {
111
+ const headingLine = section.heading
112
+ ? `${"#".repeat(section.level)} ${section.heading}\n\n`
113
+ : "";
114
+ const content = headingLine + section.lines.join("\n").trim();
115
+ if (!content.trim())
116
+ continue;
117
+ const headingSlug = section.heading
118
+ ? slugifyHeading(section.heading)
119
+ : "intro";
120
+ const tokens = estimateTokens(content);
121
+ if (tokens <= MAX_TOKENS) {
122
+ chunks.push({
123
+ id: uniqueId(headingSlug),
124
+ source,
125
+ url,
126
+ title,
127
+ heading_path: section.headingPath,
128
+ content,
129
+ token_count: tokens,
130
+ });
131
+ }
132
+ else {
133
+ const parts = splitAtParagraphBoundaries(content, MAX_TOKENS);
134
+ for (let i = 0; i < parts.length; i++) {
135
+ const partContent = parts[i].trim();
136
+ if (!partContent)
137
+ continue;
138
+ const partSlug = parts.length > 1 ? `${headingSlug}-${i}` : headingSlug;
139
+ chunks.push({
140
+ id: uniqueId(partSlug),
141
+ source,
142
+ url,
143
+ title,
144
+ heading_path: section.headingPath,
145
+ content: partContent,
146
+ token_count: estimateTokens(partContent),
147
+ });
148
+ }
149
+ }
150
+ }
151
+ return chunks;
152
+ }
153
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../src/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAK1C,MAAM,UAAU,GAAG,GAAG,CAAC;AAEvB,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,OAAO,OAAO;SACX,WAAW,EAAE;SACb,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;SAC5B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;SACnB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,MAAc,EACd,GAAW,EACX,WAAmB,EACnB,KAAc;IAEd,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAChC,MAAM,IAAI,GAAG,GAAG,MAAM,KAAK,OAAO,KAAK,WAAW,EAAE,CAAC;IACrD,OAAO,KAAK,KAAK,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AACzD,CAAC;AASD,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAqB,EAAE,CAAC;IACtC,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,IAAI,cAAc,GAAmB;QACnC,KAAK,EAAE,CAAC;QACR,OAAO,EAAE,EAAE;QACX,WAAW,EAAE,EAAE;QACf,KAAK,EAAE,EAAE;KACV,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAErD,IAAI,YAAY,EAAE,CAAC;YACjB,IAAI,cAAc,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,cAAc,CAAC,OAAO,KAAK,EAAE,EAAE,CAAC;gBACrE,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAChC,CAAC;YAED,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACrC,MAAM,OAAO,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAEvC,OAAO,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC;gBAC3E,UAAU,CAAC,GAAG,EAAE,CAAC;gBACjB,YAAY,CAAC,GAAG,EAAE,CAAC;YACrB,CAAC;YAED,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC3B,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAEvB,cAAc,GAAG;gBACf,KAAK;gBACL,OAAO;gBACP,WAAW,EAAE,CAAC,GAAG,YAAY,CAAC;gBAC9B,KAAK,EAAE,EAAE;aACV,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,IAAI,cAAc,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,cAAc,CAAC,OAAO,KAAK,EAAE,EAAE,CAAC;QACrE,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,0BAA0B,CAAC,IAAY,EAAE,SAAiB;IACjE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAa,EAAE,CAAC;IAC3B,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QAExC,IAAI,aAAa,GAAG,UAAU,GAAG,SAAS,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YACjC,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC;YACjB,aAAa,GAAG,UAAU,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,aAAa,IAAI,UAAU,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,gBAAgB,CAAC,QAAgB;IACxC,IAAI,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC5C,IAAI,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;YACpB,OAAO,QAAQ,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7C,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,QAAgB,EAChB,MAAc,EACd,GAAW,EACX,KAAa;IAEb,MAAM,QAAQ,GAAG,gBAAgB,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,QAAQ,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAChD,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAElC,SAAS,QAAQ,CAAC,QAAgB;QAChC,IAAI,EAAE,GAAG,YAAY,CAAC,MAAM,EAAE,GAAG,EAAE,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAChB,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,IAAI,OAAO,GAAG,CAAC,CAAC;QAChB,OAAO,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,EAAE,CAAC;YACjE,OAAO,EAAE,CAAC;QACZ,CAAC;QACD,EAAE,GAAG,YAAY,CAAC,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;QAClD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO;YACjC,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,OAAO,CAAC,OAAO,MAAM;YACvD,CAAC,CAAC,EAAE,CAAC;QACP,MAAM,OAAO,GAAG,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAE9D,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE;YAAE,SAAS;QAE9B,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO;YACjC,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC;YACjC,CAAC,CAAC,OAAO,CAAC;QAEZ,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QAEvC,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,QAAQ,CAAC,WAAW,CAAC;gBACzB,MAAM;gBACN,GAAG;gBACH,KAAK;gBACL,YAAY,EAAE,OAAO,CAAC,WAAW;gBACjC,OAAO;gBACP,WAAW,EAAE,MAAM;aACpB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,MAAM,KAAK,GAAG,0BAA0B,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;YAC9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBACpC,IAAI,CAAC,WAAW;oBAAE,SAAS;gBAE3B,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,WAAW,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC;gBACxE,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,QAAQ,CAAC,QAAQ,CAAC;oBACtB,MAAM;oBACN,GAAG;oBACH,KAAK;oBACL,YAAY,EAAE,OAAO,CAAC,WAAW;oBACjC,OAAO,EAAE,WAAW;oBACpB,WAAW,EAAE,cAAc,CAAC,WAAW,CAAC;iBACzC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
package/dist/cli.d.ts ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=cli.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}