@astrofoundry/grimoire 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/admin-HA6FNUV4.js +1516 -0
  2. package/dist/admin-HA6FNUV4.js.map +7 -0
  3. package/dist/chunk-BRS6X3AE.js +12 -0
  4. package/dist/chunk-BRS6X3AE.js.map +7 -0
  5. package/dist/cli.js +255 -713
  6. package/dist/cli.js.map +7 -1
  7. package/package.json +11 -12
  8. package/dist/apikey.d.ts +0 -5
  9. package/dist/apikey.d.ts.map +0 -1
  10. package/dist/apikey.js +0 -84
  11. package/dist/apikey.js.map +0 -1
  12. package/dist/chunker.d.ts +0 -7
  13. package/dist/chunker.d.ts.map +0 -1
  14. package/dist/chunker.js +0 -158
  15. package/dist/chunker.js.map +0 -1
  16. package/dist/cli.d.ts +0 -3
  17. package/dist/cli.d.ts.map +0 -1
  18. package/dist/config.d.ts +0 -23
  19. package/dist/config.d.ts.map +0 -1
  20. package/dist/config.js +0 -89
  21. package/dist/config.js.map +0 -1
  22. package/dist/consumer-config.d.ts +0 -11
  23. package/dist/consumer-config.d.ts.map +0 -1
  24. package/dist/consumer-config.js +0 -60
  25. package/dist/consumer-config.js.map +0 -1
  26. package/dist/consumer.d.ts +0 -11
  27. package/dist/consumer.d.ts.map +0 -1
  28. package/dist/consumer.js +0 -84
  29. package/dist/consumer.js.map +0 -1
  30. package/dist/converter.d.ts +0 -12
  31. package/dist/converter.d.ts.map +0 -1
  32. package/dist/converter.js +0 -95
  33. package/dist/converter.js.map +0 -1
  34. package/dist/embedder.d.ts +0 -9
  35. package/dist/embedder.d.ts.map +0 -1
  36. package/dist/embedder.js +0 -108
  37. package/dist/embedder.js.map +0 -1
  38. package/dist/format.d.ts +0 -5
  39. package/dist/format.d.ts.map +0 -1
  40. package/dist/format.js +0 -6
  41. package/dist/format.js.map +0 -1
  42. package/dist/llms-ingest.d.ts +0 -3
  43. package/dist/llms-ingest.d.ts.map +0 -1
  44. package/dist/llms-ingest.js +0 -85
  45. package/dist/llms-ingest.js.map +0 -1
  46. package/dist/reranker.d.ts +0 -6
  47. package/dist/reranker.d.ts.map +0 -1
  48. package/dist/reranker.js +0 -21
  49. package/dist/reranker.js.map +0 -1
  50. package/dist/scraper.d.ts +0 -9
  51. package/dist/scraper.d.ts.map +0 -1
  52. package/dist/scraper.js +0 -98
  53. package/dist/scraper.js.map +0 -1
  54. package/dist/search.d.ts +0 -8
  55. package/dist/search.d.ts.map +0 -1
  56. package/dist/search.js +0 -43
  57. package/dist/search.js.map +0 -1
  58. package/dist/store.d.ts +0 -15
  59. package/dist/store.d.ts.map +0 -1
  60. package/dist/store.js +0 -149
  61. package/dist/store.js.map +0 -1
  62. package/dist/types.d.ts +0 -26
  63. package/dist/types.d.ts.map +0 -1
  64. package/dist/types.js +0 -2
  65. package/dist/types.js.map +0 -1
package/dist/cli.js CHANGED
@@ -1,639 +1,189 @@
1
1
  #!/usr/bin/env node
2
+ import {
3
+ bold,
4
+ cyan,
5
+ yellow
6
+ } from "./chunk-BRS6X3AE.js";
7
+
8
+ // src/cli.ts
2
9
  import { parseArgs } from "node:util";
3
- import { readFile, writeFile, readdir, rm, mkdir } from "node:fs/promises";
4
10
  import { readFileSync, existsSync } from "node:fs";
5
- import { join, resolve } from "node:path";
11
+ import { join as join2, resolve } from "node:path";
12
+
13
+ // src/consumer-config.ts
14
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
15
+ import { join } from "node:path";
16
+ import { homedir } from "node:os";
6
17
  import { createInterface } from "node:readline";
7
- import { stringify } from "yaml";
8
- import { bold, cyan, yellow } from "./format.js";
9
- import { detectConsumerMode, resolveConsumerConfig, cmdInit } from "./consumer-config.js";
10
- import { cmdConsumerSearch, cmdConsumerList, cmdConsumerStats } from "./consumer.js";
11
- import { loadConfig } from "./config.js";
12
- import { scrapeSource, createBrowser, slugifyUrl } from "./scraper.js";
13
- import { convertSource } from "./converter.js";
14
- import { chunkMarkdown } from "./chunker.js";
15
- import { embedTexts } from "./embedder.js";
16
- import { storeChunks, purgeSource, updateSourceMeta, getAllSourcesMeta, getSourceMeta, deleteSourceMeta, deleteChunksByIds, getSourceChunkIds } from "./store.js";
17
- import { search } from "./search.js";
18
- import { cmdApiKey } from "./apikey.js";
19
- import { ingestLlmsFull } from "./llms-ingest.js";
20
- const PROJECT_ROOT = resolve(import.meta.dirname, "..");
21
- const CONFIG_PATH = join(PROJECT_ROOT, "config", "sources.yaml");
22
- const DATA_DIR = join(PROJECT_ROOT, "data");
23
- const envPath = join(PROJECT_ROOT, ".env");
24
- if (existsSync(envPath)) {
25
- for (const line of readFileSync(envPath, "utf-8").split("\n")) {
26
- const trimmed = line.trim();
27
- if (!trimmed || trimmed.startsWith("#"))
28
- continue;
29
- const eqIndex = trimmed.indexOf("=");
30
- if (eqIndex === -1)
31
- continue;
32
- const key = trimmed.slice(0, eqIndex);
33
- const value = trimmed.slice(eqIndex + 1);
34
- if (!process.env[key]) {
35
- process.env[key] = value;
36
- }
37
- }
38
- }
39
- function prompt(rl, question) {
40
- return new Promise((resolve) => rl.question(question, resolve));
18
+ var CONFIG_DIR = join(homedir(), ".grimoire");
19
+ var CONFIG_FILE = join(CONFIG_DIR, "config.json");
20
+ async function loadConsumerConfig() {
21
+ const raw = await readFile(CONFIG_FILE, "utf-8").catch(() => null);
22
+ if (!raw) return null;
23
+ const data = JSON.parse(raw);
24
+ if (typeof data.apiUrl === "string" && typeof data.apiKey === "string") {
25
+ return { apiUrl: data.apiUrl, apiKey: data.apiKey };
26
+ }
27
+ return null;
41
28
  }
42
- async function cmdAdd() {
43
- const args = parseArgs({
44
- args: process.argv.slice(3),
45
- options: {
46
- url: { type: "string" },
47
- },
48
- allowPositionals: true,
49
- });
50
- const name = args.positionals[0];
51
- const url = args.values.url;
52
- if (!name || !url) {
53
- console.error("Usage: grimoire add <name> --url <start_url>");
54
- process.exit(1);
55
- }
56
- console.log("Scanning page...\n");
57
- const browser = await createBrowser();
58
- const context = await browser.newContext();
59
- const page = await context.newPage();
60
- try {
61
- await page.goto(url, { waitUntil: "domcontentloaded" });
62
- const navCandidates = await page.evaluate(() => {
63
- const selectors = ["nav", "[role='navigation']"];
64
- const results = [];
65
- const seen = new Set();
66
- for (const sel of selectors) {
67
- for (const el of document.querySelectorAll(sel)) {
68
- if (seen.has(el))
69
- continue;
70
- seen.add(el);
71
- const links = el.querySelectorAll("a[href]");
72
- const label = el.getAttribute("aria-label") ||
73
- el.getAttribute("class") ||
74
- el.tagName.toLowerCase();
75
- results.push({
76
- selector: sel,
77
- label,
78
- linkCount: links.length,
79
- });
80
- }
81
- }
82
- return results.sort((a, b) => b.linkCount - a.linkCount);
83
- });
84
- if (navCandidates.length === 0) {
85
- console.error("No navigation elements found on this page.");
86
- process.exit(1);
87
- }
88
- console.log("Navigation candidates:");
89
- for (let i = 0; i < navCandidates.length; i++) {
90
- const c = navCandidates[i];
91
- console.log(` [${i + 1}] ${c.selector} (${c.label}) — ${c.linkCount} links`);
92
- }
93
- const rl = createInterface({ input: process.stdin, output: process.stdout });
94
- const navChoice = await prompt(rl, "\nSelect navigation: ");
95
- const navIndex = parseInt(navChoice, 10) - 1;
96
- if (isNaN(navIndex) || navIndex < 0 || navIndex >= navCandidates.length) {
97
- console.error("Invalid selection.");
98
- rl.close();
99
- process.exit(1);
100
- }
101
- const selectedNav = navCandidates[navIndex];
102
- const parsedUrl = new URL(url);
103
- const defaultPattern = parsedUrl.pathname.replace(/\/$/, "");
104
- const allLinks = await page.$$eval(`${selectedNav.selector} a[href]`, (links, pattern) => {
105
- return [...new Set(links
106
- .map((a) => a.href)
107
- .filter((h) => h.startsWith("http") && !h.includes("?hl=") && !h.endsWith("#") && h.includes(pattern)))];
108
- }, defaultPattern);
109
- console.log(`\nFound ${allLinks.length} links matching ${defaultPattern}`);
110
- const patternInput = await prompt(rl, `Include pattern [default: ${defaultPattern}]: `);
111
- const includePattern = patternInput.trim() || defaultPattern;
112
- const excludeInput = await prompt(rl, "Exclude patterns (comma-separated, optional): ");
113
- const excludePatterns = excludeInput.trim()
114
- ? excludeInput.split(",").map((p) => p.trim())
115
- : undefined;
116
- rl.close();
117
- const contentSelector = await page.evaluate(() => {
118
- if (document.querySelector("article"))
119
- return "article";
120
- if (document.querySelector("main"))
121
- return "main";
122
- return "body";
123
- });
124
- const removeSelectors = await page.evaluate(() => {
125
- const candidates = [
126
- { selector: "nav", label: "nav" },
127
- { selector: "footer", label: "footer" },
128
- { selector: "[role='complementary']", label: "[role='complementary']" },
129
- { selector: "[role='banner']", label: "[role='banner']" },
130
- { selector: ".breadcrumbs, .breadcrumb", label: ".breadcrumbs" },
131
- { selector: ".pagination-nav, .pagination", label: ".pagination-nav" },
132
- ];
133
- return candidates
134
- .filter((c) => document.querySelector(c.selector) !== null)
135
- .map((c) => c.label);
136
- });
137
- if (removeSelectors.length > 0) {
138
- console.log(`\nDetected removable elements: ${removeSelectors.join(", ")}`);
139
- }
140
- const parsedUrlForSitemap = new URL(url);
141
- let sitemapUrl;
142
- try {
143
- const sitemapCheck = await page.goto(`${parsedUrlForSitemap.origin}/sitemap.xml`, { waitUntil: "domcontentloaded", timeout: 10000 });
144
- if (sitemapCheck && sitemapCheck.status() === 200) {
145
- const body = await page.textContent("body");
146
- if (body && (body.includes("<urlset") || body.includes("<sitemapindex"))) {
147
- sitemapUrl = `${parsedUrlForSitemap.origin}/sitemap.xml`;
148
- console.log(`\nSitemap found: ${sitemapUrl}`);
149
- }
150
- }
151
- }
152
- catch {
153
- // No sitemap available
154
- }
155
- const source = {
156
- name: name.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
157
- start_url: url,
158
- ...(sitemapUrl ? { sitemap_url: sitemapUrl } : {}),
159
- nav_selector: selectedNav.selector,
160
- content_selector: contentSelector,
161
- include_patterns: [includePattern],
162
- ...(excludePatterns ? { exclude_patterns: excludePatterns } : {}),
163
- ...(removeSelectors.length > 0 ? { remove_selectors: removeSelectors } : {}),
164
- };
165
- let existingContent = "";
166
- try {
167
- existingContent = await readFile(CONFIG_PATH, "utf-8");
168
- }
169
- catch {
170
- existingContent = "sources:\n";
171
- }
172
- const newEntry = stringify({ [name]: source }, { indent: 2 });
173
- const indented = newEntry
174
- .split("\n")
175
- .map((line) => (line.trim() ? ` ${line}` : ""))
176
- .join("\n");
177
- await writeFile(CONFIG_PATH, existingContent.trimEnd() + "\n" + indented, "utf-8");
178
- console.log(`\nSource "${name}" added to config/sources.yaml`);
179
- console.log(`Run "grimoire refresh ${name}" to start scraping.`);
180
- }
181
- finally {
182
- await browser.close();
183
- }
29
+ async function saveConsumerConfig(config) {
30
+ await mkdir(CONFIG_DIR, { recursive: true });
31
+ await writeFile(CONFIG_FILE, JSON.stringify(config, null, 2) + "\n", "utf-8");
184
32
  }
185
- async function loadEmbeddingsCache(cachePath) {
186
- try {
187
- const data = await readFile(cachePath, "utf-8");
188
- return JSON.parse(data);
189
- }
190
- catch {
191
- return null;
192
- }
33
+ async function resolveConsumerConfig() {
34
+ const envUrl = process.env.GRIMOIRE_API_URL;
35
+ const envKey = process.env.GRIMOIRE_API_KEY;
36
+ if (envUrl && envKey) {
37
+ return { apiUrl: envUrl, apiKey: envKey };
38
+ }
39
+ const fileConfig = await loadConsumerConfig();
40
+ if (fileConfig) return fileConfig;
41
+ throw new Error("Grimoire is not configured. Run 'grimoire init' to set up.");
193
42
  }
194
- async function embedWithCheckpoint(texts, rawDir, embeddingsCachePath) {
195
- await mkdir(rawDir, { recursive: true });
196
- const partialCache = await loadEmbeddingsCache(embeddingsCachePath);
197
- const resumeFrom = partialCache && partialCache.length > 0 && partialCache.length < texts.length ? partialCache : undefined;
198
- return embedTexts(texts, {
199
- onProgress: (done, total) => {
200
- console.log(` [${done}/${total}] embedded`);
201
- },
202
- onCheckpoint: async (current) => {
203
- await writeFile(embeddingsCachePath, JSON.stringify(current), "utf-8");
204
- },
205
- resumeFrom,
206
- });
43
+ async function detectConsumerMode() {
44
+ if (process.env.GOOGLE_APPLICATION_CREDENTIALS) return false;
45
+ if (process.env.GRIMOIRE_API_URL) return true;
46
+ const config = await loadConsumerConfig();
47
+ return config !== null;
207
48
  }
208
- async function storeWithStrategy(sourceName, allChunks, embeddings, urlCount, version, diff) {
209
- if (diff) {
210
- console.log(" Computing diff...");
211
- const existingIds = await getSourceChunkIds(sourceName);
212
- const newIds = new Set(allChunks.map((c) => c.id));
213
- const toDelete = [...existingIds].filter((id) => !newIds.has(id));
214
- console.log(` Diff: ${toDelete.length} to delete, ${allChunks.length} to upsert (${existingIds.size} existing)`);
215
- if (toDelete.length > 0) {
216
- console.log(" Deleting removed chunks...");
217
- await deleteChunksByIds(toDelete, (cur, total) => {
218
- console.log(` [${cur}/${total}] deleted`);
219
- });
220
- }
221
- console.log(" Upserting chunks...");
222
- await storeChunks(allChunks, embeddings, (cur, total) => {
223
- console.log(` [${cur}/${total}] stored`);
224
- });
225
- }
226
- else {
227
- console.log(" Purging old chunks...");
228
- await purgeSource(sourceName);
229
- console.log(" Storing in Firestore...");
230
- await storeChunks(allChunks, embeddings, (cur, total) => {
231
- console.log(` [${cur}/${total}] stored`);
232
- });
233
- }
234
- await updateSourceMeta(sourceName, allChunks.length, urlCount, version);
235
- console.log(` Done. ${allChunks.length} chunks stored for "${sourceName}".`);
49
+ async function cmdInit() {
50
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
51
+ const ask = (q) => new Promise((resolve2) => rl.question(q, resolve2));
52
+ const existing = await loadConsumerConfig();
53
+ const apiUrl = await ask(`API URL${existing ? ` [${existing.apiUrl}]` : ""}: `);
54
+ const apiKey = await ask(`API Key${existing ? " [****]" : ""}: `);
55
+ const config = {
56
+ apiUrl: apiUrl.trim() || existing?.apiUrl || "",
57
+ apiKey: apiKey.trim() || existing?.apiKey || ""
58
+ };
59
+ rl.close();
60
+ if (!config.apiUrl || !config.apiKey) {
61
+ throw new Error("Both API URL and API Key are required.");
62
+ }
63
+ await saveConsumerConfig(config);
64
+ console.log(`
65
+ Saved to ${CONFIG_FILE}`);
236
66
  }
237
- async function cmdRefresh() {
238
- const args = parseArgs({
239
- args: process.argv.slice(3),
240
- options: {
241
- full: { type: "boolean", default: false },
242
- all: { type: "boolean", default: false },
243
- diff: { type: "boolean", default: false },
244
- concurrency: { type: "string" },
245
- limit: { type: "string" },
246
- "from-html": { type: "boolean", default: false },
247
- "from-markdown": { type: "boolean", default: false },
248
- "from-embeddings": { type: "boolean", default: false },
249
- "skip-store": { type: "boolean", default: false },
250
- },
251
- allowPositionals: true,
67
+
68
+ // src/consumer.ts
69
+ async function apiRequest(config, path, options) {
70
+ const url = `${config.apiUrl.replace(/\/$/, "")}${path}`;
71
+ let response;
72
+ try {
73
+ response = await fetch(url, {
74
+ ...options,
75
+ headers: {
76
+ "Content-Type": "application/json",
77
+ "x-api-key": config.apiKey,
78
+ ...options?.headers
79
+ }
252
80
  });
253
- const config = await loadConfig(CONFIG_PATH);
254
- const sourcesToRefresh = args.values.all
255
- ? Object.keys(config.sources)
256
- : [args.positionals[0]];
257
- if (!args.values.all && !sourcesToRefresh[0]) {
258
- console.error("Usage: grimoire refresh <source> [--full] [--from-html] [--from-markdown] [--from-embeddings] [--skip-store] [--limit <n>] [--concurrency <n>]");
259
- process.exit(1);
260
- }
261
- const concurrencyOverride = args.values.concurrency ? parseInt(args.values.concurrency, 10) : undefined;
262
- const urlLimit = args.values.limit ? parseInt(args.values.limit, 10) : undefined;
263
- for (const sourceName of sourcesToRefresh) {
264
- const source = config.sources[sourceName];
265
- if (!source) {
266
- console.error(`Source "${sourceName}" not found in config.`);
267
- process.exit(1);
268
- }
269
- if (concurrencyOverride) {
270
- source.concurrency = concurrencyOverride;
271
- }
272
- const rawDir = join(DATA_DIR, "raw", sourceName);
273
- const mdDir = join(DATA_DIR, "markdown", sourceName);
274
- const embeddingsCachePath = join(rawDir, "embeddings.json");
275
- console.log(`\nRefreshing "${sourceName}"...`);
276
- if (args.values.full) {
277
- console.log(" Purging existing chunks...");
278
- const deleted = await purgeSource(sourceName);
279
- console.log(` Deleted ${deleted} chunks.`);
280
- await rm(rawDir, { recursive: true, force: true });
281
- await rm(mdDir, { recursive: true, force: true });
282
- }
283
- let urls;
284
- if (args.values["from-embeddings"]) {
285
- console.log(" Loading cached embeddings...");
286
- const cached = await loadEmbeddingsCache(embeddingsCachePath);
287
- if (!cached) {
288
- console.error(" No cached embeddings found. Run without --from-embeddings first.");
289
- process.exit(1);
290
- }
291
- const mdFiles = await readdir(mdDir);
292
- const allPages = [];
293
- for (const f of mdFiles.filter((f) => f.endsWith(".md"))) {
294
- const content = await readFile(join(mdDir, f), "utf-8");
295
- const urlMatch = content.match(/^url: "(.+)"$/m);
296
- const titleMatch = content.match(/^title: "(.+)"$/m);
297
- allPages.push({
298
- markdown: content,
299
- url: urlMatch?.[1] ?? "",
300
- title: titleMatch?.[1] ?? "Untitled",
301
- });
302
- }
303
- console.log(" Chunking...");
304
- const allChunks = allPages.flatMap((p) => chunkMarkdown(p.markdown, sourceName, p.url, p.title));
305
- console.log(` Created ${allChunks.length} chunks.`);
306
- if (cached.length !== allChunks.length) {
307
- console.error(` Embeddings cache (${cached.length}) doesn't match chunk count (${allChunks.length}). Re-embed with --from-html.`);
308
- process.exit(1);
309
- }
310
- if (args.values["skip-store"]) {
311
- console.log(` Done. ${allChunks.length} chunks ready (skipped Firestore).`);
312
- continue;
313
- }
314
- await storeWithStrategy(sourceName, allChunks, cached, allPages.length, source.version, args.values.diff);
315
- continue;
316
- }
317
- if (args.values["from-markdown"]) {
318
- console.log(" Reading cached markdown...");
319
- const mdFiles = await readdir(mdDir).catch(() => []);
320
- const markdownFiles = mdFiles.filter((f) => f.endsWith(".md"));
321
- if (markdownFiles.length === 0) {
322
- console.error(" No cached markdown found. Run with --from-html first.");
323
- process.exit(1);
324
- }
325
- const pages = [];
326
- for (const f of markdownFiles) {
327
- const content = await readFile(join(mdDir, f), "utf-8");
328
- const urlMatch = content.match(/^url: "(.+)"$/m);
329
- const titleMatch = content.match(/^title: "(.+)"$/m);
330
- pages.push({
331
- markdown: content,
332
- url: urlMatch?.[1] ?? "",
333
- title: titleMatch?.[1] ?? "Untitled",
334
- });
335
- }
336
- console.log(` Found ${pages.length} cached pages.`);
337
- console.log(" Chunking...");
338
- const allChunks = pages.flatMap((p) => chunkMarkdown(p.markdown, sourceName, p.url, p.title));
339
- console.log(` Created ${allChunks.length} chunks.`);
340
- console.log(" Embedding chunks...");
341
- const texts = allChunks.map((c) => c.content);
342
- const embeddings = await embedWithCheckpoint(texts, rawDir, embeddingsCachePath);
343
- if (args.values["skip-store"]) {
344
- console.log(` Done. ${allChunks.length} chunks ready (skipped Firestore).`);
345
- continue;
346
- }
347
- await storeWithStrategy(sourceName, allChunks, embeddings, pages.length, source.version, args.values.diff);
348
- continue;
349
- }
350
- let pages;
351
- if (source.llms_full_url && !args.values["from-html"]) {
352
- console.log(` Fetching llms-full.txt from ${source.llms_full_url}...`);
353
- pages = await ingestLlmsFull(source.llms_full_url, sourceName, source.start_url, DATA_DIR, (cur, total) => {
354
- console.log(` [${cur}/${total}] pages processed`);
355
- });
356
- console.log(` Extracted ${pages.length} pages.`);
357
- }
358
- else {
359
- if (args.values["from-html"]) {
360
- console.log(" Reading URLs from cached HTML...");
361
- const urlsJsonPath = join(rawDir, "urls.json");
362
- try {
363
- urls = JSON.parse(await readFile(urlsJsonPath, "utf-8"));
364
- }
365
- catch {
366
- const rawFiles = await readdir(rawDir);
367
- const htmlFiles = rawFiles.filter((f) => f.endsWith(".html"));
368
- urls = [];
369
- for (const f of htmlFiles) {
370
- const fileSlug = f.replace(/\.html$/, "");
371
- const htmlPath = join(rawDir, f);
372
- const html = await readFile(htmlPath, "utf-8");
373
- const match = html.match(/<link[^>]+rel="canonical"[^>]+href="([^"]+)"/);
374
- if (match && slugifyUrl(match[1]) === fileSlug) {
375
- urls.push(match[1]);
376
- continue;
377
- }
378
- const ogMatch = html.match(/<meta[^>]+property="og:url"[^>]+content="([^"]+)"/);
379
- if (ogMatch && slugifyUrl(ogMatch[1]) === fileSlug) {
380
- urls.push(ogMatch[1]);
381
- continue;
382
- }
383
- urls.push(`https://recovered/${fileSlug}`);
384
- }
385
- }
386
- console.log(` Found ${urls.length} cached pages.`);
387
- }
388
- else {
389
- console.log(" Scraping URLs...");
390
- urls = await scrapeSource(source, sourceName, DATA_DIR, (cur, total, url) => {
391
- console.log(` [${cur}/${total}] ${url}`);
392
- });
393
- console.log(` Found ${urls.length} pages.`);
394
- }
395
- if (urlLimit && urls.length > urlLimit) {
396
- urls = urls.slice(0, urlLimit);
397
- console.log(` Limited to ${urlLimit} pages.`);
398
- }
399
- console.log(" Converting to markdown...");
400
- pages = await convertSource(sourceName, urls, source.content_selector, source.remove_selectors, source.remove_text_patterns, DATA_DIR, source.concurrency, (cur, total) => {
401
- if (cur % 10 === 0 || cur === total)
402
- console.log(` [${cur}/${total}] converted`);
403
- });
404
- }
405
- console.log(" Chunking...");
406
- const allChunks = pages.flatMap((p) => chunkMarkdown(p.markdown, sourceName, p.url, p.title));
407
- console.log(` Created ${allChunks.length} chunks.`);
408
- console.log(" Embedding chunks...");
409
- const texts = allChunks.map((c) => c.content);
410
- const embeddings = await embedWithCheckpoint(texts, rawDir, embeddingsCachePath);
411
- if (args.values["skip-store"]) {
412
- console.log(` Done. ${allChunks.length} chunks ready (skipped Firestore).`);
413
- continue;
414
- }
415
- await storeWithStrategy(sourceName, allChunks, embeddings, pages.length, source.version, args.values.diff);
416
- }
81
+ } catch {
82
+ throw new Error(`Cannot reach Grimoire API at ${config.apiUrl}. Check your GRIMOIRE_API_URL.`);
83
+ }
84
+ if (response.status === 401 || response.status === 403) {
85
+ throw new Error("Invalid API key. Check your GRIMOIRE_API_KEY or run 'grimoire init'.");
86
+ }
87
+ if (!response.ok) {
88
+ throw new Error(`API error: ${response.status} ${response.statusText}`);
89
+ }
90
+ return response.json();
417
91
  }
418
- async function cmdSearch() {
419
- const args = parseArgs({
420
- args: process.argv.slice(3),
421
- options: {
422
- source: { type: "string" },
423
- top: { type: "string" },
424
- compact: { type: "boolean", default: false },
425
- },
426
- allowPositionals: true,
427
- });
428
- const query = args.positionals.join(" ");
429
- if (!query) {
430
- console.error("Usage: grimoire search \"<query>\" [--source <name>] [--top <n>] [--compact]");
431
- process.exit(1);
432
- }
433
- const topN = args.values.top ? parseInt(args.values.top, 10) : undefined;
434
- const results = await search(query, { source: args.values.source, topN });
435
- if (results.length === 0) {
436
- console.log("No results found.");
437
- return;
438
- }
439
- if (args.values.compact) {
440
- for (const r of results) {
441
- console.log(`${r.relevance_score.toFixed(4)} | ${r.source} | ${r.title} | ${r.heading_path.join(" > ")} | ${r.url}`);
442
- }
443
- return;
444
- }
445
- for (let i = 0; i < results.length; i++) {
446
- const r = results[i];
447
- console.log(`\n${bold(`[${i + 1}] ${r.title}`)} (${r.relevance_score.toFixed(4)})`);
448
- console.log(` ${cyan(r.url)}`);
449
- console.log(` ${yellow(r.heading_path.join(" > "))}`);
450
- console.log(` ${r.content.replace(/\n/g, " ")}`);
451
- }
92
+ async function cmdConsumerSearch(config, query, options) {
93
+ const data = await apiRequest(config, "/search", {
94
+ method: "POST",
95
+ body: JSON.stringify({ query, source: options.source, topN: options.topN })
96
+ });
97
+ if (data.results.length === 0) {
98
+ console.log("No results found.");
99
+ return;
100
+ }
101
+ if (options.compact) {
102
+ for (const r of data.results) {
103
+ console.log(`${r.relevance_score.toFixed(4)} | ${r.source} | ${r.title} | ${r.heading_path.join(" > ")} | ${r.url}`);
104
+ }
105
+ return;
106
+ }
107
+ for (let i = 0; i < data.results.length; i++) {
108
+ const r = data.results[i];
109
+ console.log(`
110
+ ${bold(`[${i + 1}] ${r.title}`)} (${r.relevance_score.toFixed(4)})`);
111
+ console.log(` ${cyan(r.url)}`);
112
+ console.log(` ${yellow(r.heading_path.join(" > "))}`);
113
+ console.log(` ${r.content.replace(/\n/g, " ")}`);
114
+ }
452
115
  }
453
- async function cmdList() {
454
- const args = parseArgs({
455
- args: process.argv.slice(3),
456
- options: {
457
- names: { type: "boolean", default: false },
458
- },
459
- allowPositionals: true,
460
- });
461
- const metas = await getAllSourcesMeta();
462
- if (metas.length === 0) {
463
- console.log("No sources have been refreshed yet.");
464
- return;
465
- }
466
- if (args.values.names) {
467
- for (const meta of metas) {
468
- console.log(meta.source);
469
- }
470
- return;
471
- }
472
- console.log("\nSources:\n");
473
- for (const meta of metas) {
474
- const ver = meta.version ? ` v${meta.version}` : "";
475
- console.log(` ${bold(meta.source)}${ver}`);
476
- console.log(` ${meta.chunk_count} chunks, ${meta.url_count} URLs, last refreshed ${meta.last_refreshed}`);
477
- }
116
+ async function cmdConsumerList(config, options) {
117
+ const data = await apiRequest(config, "/list");
118
+ if (data.sources.length === 0) {
119
+ console.log("No sources available.");
120
+ return;
121
+ }
122
+ if (options?.names) {
123
+ for (const s of data.sources) {
124
+ console.log(s.source);
125
+ }
126
+ return;
127
+ }
128
+ console.log("\nSources:\n");
129
+ for (const s of data.sources) {
130
+ const ver = s.version ? ` v${s.version}` : "";
131
+ console.log(` ${bold(s.source)}${ver}`);
132
+ console.log(` ${s.chunk_count} chunks, ${s.url_count} URLs, last refreshed ${s.last_refreshed}`);
133
+ }
478
134
  }
479
- async function cmdStats() {
480
- const metas = await getAllSourcesMeta();
481
- if (metas.length === 0) {
482
- console.log("No sources have been refreshed yet.");
483
- return;
484
- }
485
- let totalChunks = 0;
486
- let totalUrls = 0;
487
- console.log("\nSource Statistics:\n");
488
- for (const meta of metas) {
489
- const ver = meta.version ? ` v${meta.version}` : "";
490
- console.log(` ${bold(meta.source)}${ver}`);
491
- console.log(` Chunks: ${meta.chunk_count}`);
492
- console.log(` URLs: ${meta.url_count}`);
493
- console.log(` Last refreshed: ${meta.last_refreshed}`);
494
- totalChunks += meta.chunk_count;
495
- totalUrls += meta.url_count;
496
- }
497
- console.log(`\n Total: ${totalChunks} chunks across ${totalUrls} URLs from ${metas.length} sources`);
135
+ async function cmdConsumerStats(config) {
136
+ const data = await apiRequest(config, "/stats");
137
+ if (data.sources.length === 0) {
138
+ console.log("No sources have been refreshed yet.");
139
+ return;
140
+ }
141
+ console.log("\nSource Statistics:\n");
142
+ for (const s of data.sources) {
143
+ const ver = s.version ? ` v${s.version}` : "";
144
+ console.log(` ${bold(s.source)}${ver}`);
145
+ console.log(` Chunks: ${s.chunk_count}`);
146
+ console.log(` URLs: ${s.url_count}`);
147
+ console.log(` Last refreshed: ${s.last_refreshed}`);
148
+ }
149
+ console.log(`
150
+ Total: ${data.totalChunks} chunks across ${data.totalUrls} URLs from ${data.sources.length} sources`);
498
151
  }
499
- async function cmdExport() {
500
- const args = parseArgs({
501
- args: process.argv.slice(3),
502
- options: {
503
- format: { type: "string", default: "json" },
504
- },
505
- allowPositionals: true,
506
- });
507
- const sourceName = args.positionals[0];
508
- if (!sourceName) {
509
- console.error("Usage: grimoire export <source> [--format json]");
510
- process.exit(1);
511
- }
512
- const mdDir = join(DATA_DIR, "markdown", sourceName);
513
- let files;
514
- try {
515
- files = await readdir(mdDir);
516
- }
517
- catch {
518
- console.error(`No markdown data found for source "${sourceName}".`);
519
- process.exit(1);
520
- }
521
- const pages = [];
522
- for (const file of files.filter((f) => f.endsWith(".md"))) {
523
- const content = await readFile(join(mdDir, file), "utf-8");
524
- pages.push({ file, content });
525
- }
526
- console.log(JSON.stringify(pages, null, 2));
527
- }
528
- async function cmdDelete() {
529
- const sourceName = process.argv[3];
530
- if (!sourceName) {
531
- console.error("Usage: grimoire delete <source>");
532
- process.exit(1);
533
- }
534
- const meta = await getSourceMeta(sourceName);
535
- if (!meta) {
536
- console.error(`Source "${sourceName}" not found in Firestore.`);
537
- process.exit(1);
538
- }
539
- console.log(`Deleting "${sourceName}" (${meta.chunk_count} chunks)...`);
540
- const deleted = await purgeSource(sourceName);
541
- await deleteSourceMeta(sourceName);
542
- console.log(`Deleted ${deleted} chunks and source metadata for "${sourceName}".`);
543
- }
544
- async function cmdScrapeUrls() {
545
- const args = parseArgs({
546
- args: process.argv.slice(3),
547
- options: {
548
- concurrency: { type: "string" },
549
- },
550
- allowPositionals: true,
551
- });
552
- const sourceName = args.positionals[0];
553
- if (!sourceName) {
554
- console.error("Usage: grimoire scrape-urls <source> [--concurrency <n>]");
555
- process.exit(1);
556
- }
557
- const config = await loadConfig(CONFIG_PATH);
558
- const source = config.sources[sourceName];
559
- if (!source) {
560
- console.error(`Source "${sourceName}" not found in config.`);
561
- process.exit(1);
562
- }
563
- const rawDir = join(DATA_DIR, "raw", sourceName);
564
- const urlsPath = join(rawDir, "urls.json");
565
- let urls;
566
- try {
567
- urls = JSON.parse(await readFile(urlsPath, "utf-8"));
568
- }
569
- catch {
570
- console.error(`No urls.json found for "${sourceName}". Run 'grimoire refresh ${sourceName} --skip-store' first.`);
571
- process.exit(1);
572
- }
573
- const missing = urls.filter((url) => !existsSync(join(rawDir, `${slugifyUrl(url)}.html`)));
574
- console.log(`\nTotal: ${urls.length}, Cached: ${urls.length - missing.length}, Missing: ${missing.length}`);
575
- if (missing.length === 0) {
576
- console.log("Nothing to scrape.");
577
- return;
578
- }
579
- const concurrency = args.values.concurrency ? parseInt(args.values.concurrency, 10) : source.concurrency ?? 20;
580
- const browser = await createBrowser();
581
- const context = await browser.newContext(source.user_agent ? { userAgent: source.user_agent } : {});
582
- let done = 0;
583
- for (let i = 0; i < missing.length; i += concurrency) {
584
- const batch = missing.slice(i, i + concurrency);
585
- await Promise.all(batch.map(async (url) => {
586
- const page = await context.newPage();
587
- try {
588
- await page.goto(url, { waitUntil: source.headed ? "networkidle" : "domcontentloaded", timeout: 30000 });
589
- const html = await page.content();
590
- await writeFile(join(rawDir, `${slugifyUrl(url)}.html`), html, "utf-8");
591
- done++;
592
- if (done % 10 === 0 || done === missing.length)
593
- console.log(` [${done}/${missing.length}]`);
594
- }
595
- catch (e) {
596
- console.error(` FAILED: ${url} - ${e instanceof Error ? e.message : String(e)}`);
597
- }
598
- finally {
599
- await page.close();
600
- }
601
- }));
602
- }
603
- console.log(`Done. Fetched ${done} pages.`);
604
- await browser.close();
152
+
153
+ // src/cli.ts
154
+ var PROJECT_ROOT = resolve(import.meta.dirname, "..");
155
+ var envPath = join2(PROJECT_ROOT, ".env");
156
+ if (existsSync(envPath)) {
157
+ for (const line of readFileSync(envPath, "utf-8").split("\n")) {
158
+ const trimmed = line.trim();
159
+ if (!trimmed || trimmed.startsWith("#")) continue;
160
+ const eqIndex = trimmed.indexOf("=");
161
+ if (eqIndex === -1) continue;
162
+ const key = trimmed.slice(0, eqIndex);
163
+ const value = trimmed.slice(eqIndex + 1);
164
+ if (!process.env[key]) {
165
+ process.env[key] = value;
166
+ }
167
+ }
605
168
  }
169
+ var ADMIN_ONLY_COMMANDS = ["add", "refresh", "delete", "scrape-urls", "export", "apikey"];
606
170
  async function cmdUpdate() {
607
- const { execSync } = await import("node:child_process");
608
- const pkg = JSON.parse(readFileSync(join(PROJECT_ROOT, "package.json"), "utf-8"));
609
- console.log(`Current version: ${pkg.version}`);
610
- console.log("Checking for updates...");
611
- execSync("pnpm add -g @astrofoundry/grimoire@latest", { stdio: "inherit" });
612
- const updated = JSON.parse(readFileSync(join(PROJECT_ROOT, "package.json"), "utf-8"));
613
- if (updated.version === pkg.version) {
614
- console.log("Already on the latest version.");
615
- }
616
- else {
617
- console.log(`Updated to ${updated.version}.`);
618
- }
171
+ const { execSync } = await import("node:child_process");
172
+ const pkg = JSON.parse(readFileSync(join2(PROJECT_ROOT, "package.json"), "utf-8"));
173
+ console.log(`Current version: ${pkg.version}`);
174
+ console.log("Checking for updates...");
175
+ execSync("npm install -g @astrofoundry/grimoire@latest", { stdio: "inherit" });
176
+ const updated = JSON.parse(readFileSync(join2(PROJECT_ROOT, "package.json"), "utf-8"));
177
+ if (updated.version === pkg.version) {
178
+ console.log("Already on the latest version.");
179
+ } else {
180
+ console.log(`Updated to ${updated.version}.`);
181
+ }
619
182
  }
620
- const ADMIN_COMMANDS = {
621
- add: cmdAdd,
622
- refresh: cmdRefresh,
623
- delete: cmdDelete,
624
- "scrape-urls": cmdScrapeUrls,
625
- update: cmdUpdate,
626
- search: cmdSearch,
627
- list: cmdList,
628
- stats: cmdStats,
629
- export: cmdExport,
630
- apikey: cmdApiKey,
631
- };
632
- const ADMIN_ONLY_COMMANDS = ["add", "refresh", "delete", "scrape-urls", "export", "apikey"];
633
183
  function showHelp(isConsumer) {
634
- if (isConsumer) {
635
- console.log(`
636
- grimoire Documentation RAG
184
+ if (isConsumer) {
185
+ console.log(`
186
+ grimoire \u2014 Documentation RAG
637
187
 
638
188
  USAGE
639
189
  grimoire search "<query>" [--source <name>] [--top <n>] [--compact]
@@ -660,7 +210,7 @@ FLAGS
660
210
  heading path, and content snippet.
661
211
 
662
212
  RELEVANCE SCORES
663
- Range 0–1 (higher = better). >0.85 strong match, 0.6–0.85 relevant,
213
+ Range 0\u20131 (higher = better). >0.85 strong match, 0.6\u20130.85 relevant,
664
214
  <0.6 usually too weak to cite. "No results found." + exit 0 = clean miss.
665
215
 
666
216
  MANAGEMENT
@@ -674,10 +224,9 @@ ENVIRONMENT
674
224
  GRIMOIRE_API_URL API endpoint URL
675
225
  GRIMOIRE_API_KEY API key
676
226
  `);
677
- }
678
- else {
679
- console.log(`
680
- grimoire — Documentation RAG System (admin)
227
+ } else {
228
+ console.log(`
229
+ grimoire \u2014 Documentation RAG System (admin)
681
230
 
682
231
  Commands:
683
232
  add <name> --url <url> Add a new documentation source
@@ -701,103 +250,96 @@ Commands:
701
250
  apikey list List API keys
702
251
  apikey delete <name> Delete an API key
703
252
  `);
704
- }
253
+ }
705
254
  }
706
255
  async function main() {
707
- const command = process.argv[2];
708
- const isConsumer = await detectConsumerMode();
709
- if (command === "--version" || command === "-v") {
710
- const pkg = JSON.parse(readFileSync(join(PROJECT_ROOT, "package.json"), "utf-8"));
711
- console.log(pkg.version);
712
- process.exit(0);
713
- }
714
- if (!command || command === "--help" || command === "-h") {
715
- showHelp(isConsumer);
716
- process.exit(0);
717
- }
718
- if (command === "update") {
719
- await cmdUpdate();
720
- return;
721
- }
722
- if (isConsumer) {
723
- if (command === "init") {
724
- await cmdInit();
725
- return;
726
- }
727
- if (ADMIN_ONLY_COMMANDS.includes(command)) {
728
- console.error(`The '${command}' command is only available in admin mode.`);
729
- process.exit(1);
730
- }
731
- const config = await resolveConsumerConfig().catch(() => {
732
- console.error("Grimoire is not configured yet. Run 'grimoire init' to set up your API connection.");
733
- process.exit(1);
734
- });
735
- if (command === "search") {
736
- const args = parseArgs({
737
- args: process.argv.slice(3),
738
- options: {
739
- source: { type: "string" },
740
- top: { type: "string" },
741
- compact: { type: "boolean", default: false },
742
- },
743
- allowPositionals: true,
744
- });
745
- const query = args.positionals[0];
746
- if (!query) {
747
- console.error("Usage: grimoire search \"<query>\" [--source <name>] [--top <n>] [--compact]");
748
- process.exit(1);
749
- }
750
- const topN = args.values.top ? parseInt(args.values.top, 10) : undefined;
751
- await cmdConsumerSearch(config, query, { source: args.values.source, topN, compact: args.values.compact });
752
- }
753
- else if (command === "list") {
754
- const args = parseArgs({
755
- args: process.argv.slice(3),
756
- options: { names: { type: "boolean", default: false } },
757
- allowPositionals: true,
758
- });
759
- await cmdConsumerList(config, { names: args.values.names });
760
- }
761
- else if (command === "stats") {
762
- await cmdConsumerStats(config);
763
- }
764
- else {
765
- console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
766
- process.exit(1);
767
- }
768
- return;
769
- }
256
+ const command = process.argv[2];
257
+ if (command === "--version" || command === "-v") {
258
+ const pkg = JSON.parse(readFileSync(join2(PROJECT_ROOT, "package.json"), "utf-8"));
259
+ console.log(pkg.version);
260
+ process.exit(0);
261
+ }
262
+ const isConsumer = await detectConsumerMode();
263
+ if (!command || command === "--help" || command === "-h") {
264
+ showHelp(isConsumer);
265
+ process.exit(0);
266
+ }
267
+ if (command === "update") {
268
+ await cmdUpdate();
269
+ return;
270
+ }
271
+ if (isConsumer) {
770
272
  if (command === "init") {
771
- await cmdInit();
772
- return;
273
+ await cmdInit();
274
+ return;
773
275
  }
774
- if (!existsSync(CONFIG_PATH)) {
775
- console.error("Grimoire is not configured. Run 'grimoire init' to set up your API key.");
776
- process.exit(1);
276
+ if (ADMIN_ONLY_COMMANDS.includes(command)) {
277
+ console.error(`The '${command}' command is only available in admin mode.`);
278
+ process.exit(1);
777
279
  }
778
- const handler = ADMIN_COMMANDS[command];
779
- if (!handler) {
780
- console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
280
+ const config = await resolveConsumerConfig().catch(() => {
281
+ console.error("Grimoire is not configured yet. Run 'grimoire init' to set up your API connection.");
282
+ process.exit(1);
283
+ });
284
+ if (command === "search") {
285
+ const args = parseArgs({
286
+ args: process.argv.slice(3),
287
+ options: {
288
+ source: { type: "string" },
289
+ top: { type: "string" },
290
+ compact: { type: "boolean", default: false }
291
+ },
292
+ allowPositionals: true
293
+ });
294
+ const query = args.positionals[0];
295
+ if (!query) {
296
+ console.error('Usage: grimoire search "<query>" [--source <name>] [--top <n>] [--compact]');
781
297
  process.exit(1);
782
- }
783
- await handler();
298
+ }
299
+ const topN = args.values.top ? parseInt(args.values.top, 10) : void 0;
300
+ await cmdConsumerSearch(config, query, { source: args.values.source, topN, compact: args.values.compact });
301
+ } else if (command === "list") {
302
+ const args = parseArgs({
303
+ args: process.argv.slice(3),
304
+ options: { names: { type: "boolean", default: false } },
305
+ allowPositionals: true
306
+ });
307
+ await cmdConsumerList(config, { names: args.values.names });
308
+ } else if (command === "stats") {
309
+ await cmdConsumerStats(config);
310
+ } else {
311
+ console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
312
+ process.exit(1);
313
+ }
314
+ return;
315
+ }
316
+ if (command === "init") {
317
+ await cmdInit();
318
+ return;
319
+ }
320
+ const { ADMIN_COMMANDS } = await import("./admin-HA6FNUV4.js");
321
+ const handler = ADMIN_COMMANDS[command];
322
+ if (!handler) {
323
+ console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
324
+ process.exit(1);
325
+ }
326
+ await handler();
784
327
  }
785
- const GCP_AUTH_PATTERNS = [
786
- "Unable to detect a Project Id",
787
- "Could not load the default credentials",
788
- "invalid_grant",
789
- "invalid_rapt",
790
- "UNAUTHENTICATED",
791
- "Getting metadata from plugin failed",
328
+ var GCP_AUTH_PATTERNS = [
329
+ "Unable to detect a Project Id",
330
+ "Could not load the default credentials",
331
+ "invalid_grant",
332
+ "invalid_rapt",
333
+ "UNAUTHENTICATED",
334
+ "Getting metadata from plugin failed"
792
335
  ];
793
336
  main().catch((err) => {
794
- const msg = err.message ?? String(err);
795
- if (GCP_AUTH_PATTERNS.some((p) => msg.includes(p))) {
796
- console.error("Google Cloud authentication failed. Re-authenticate with:\n\n gcloud auth application-default login\n");
797
- }
798
- else {
799
- console.error(`Error: ${msg}`);
800
- }
801
- process.exit(1);
337
+ const msg = err.message ?? String(err);
338
+ if (GCP_AUTH_PATTERNS.some((p) => msg.includes(p))) {
339
+ console.error("Google Cloud authentication failed. Re-authenticate with:\n\n gcloud auth application-default login\n");
340
+ } else {
341
+ console.error(`Error: ${msg}`);
342
+ }
343
+ process.exit(1);
802
344
  });
803
- //# sourceMappingURL=cli.js.map
345
+ //# sourceMappingURL=cli.js.map