@astrofoundry/grimoire 3.12.3 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin-HA6FNUV4.js +1516 -0
- package/dist/admin-HA6FNUV4.js.map +7 -0
- package/dist/chunk-BRS6X3AE.js +12 -0
- package/dist/chunk-BRS6X3AE.js.map +7 -0
- package/dist/cli.js +291 -722
- package/dist/cli.js.map +7 -1
- package/package.json +11 -12
- package/dist/apikey.d.ts +0 -5
- package/dist/apikey.d.ts.map +0 -1
- package/dist/apikey.js +0 -84
- package/dist/apikey.js.map +0 -1
- package/dist/chunker.d.ts +0 -7
- package/dist/chunker.d.ts.map +0 -1
- package/dist/chunker.js +0 -158
- package/dist/chunker.js.map +0 -1
- package/dist/cli.d.ts +0 -3
- package/dist/cli.d.ts.map +0 -1
- package/dist/config.d.ts +0 -23
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -89
- package/dist/config.js.map +0 -1
- package/dist/consumer-config.d.ts +0 -11
- package/dist/consumer-config.d.ts.map +0 -1
- package/dist/consumer-config.js +0 -60
- package/dist/consumer-config.js.map +0 -1
- package/dist/consumer.d.ts +0 -11
- package/dist/consumer.d.ts.map +0 -1
- package/dist/consumer.js +0 -84
- package/dist/consumer.js.map +0 -1
- package/dist/converter.d.ts +0 -12
- package/dist/converter.d.ts.map +0 -1
- package/dist/converter.js +0 -95
- package/dist/converter.js.map +0 -1
- package/dist/embedder.d.ts +0 -9
- package/dist/embedder.d.ts.map +0 -1
- package/dist/embedder.js +0 -108
- package/dist/embedder.js.map +0 -1
- package/dist/format.d.ts +0 -5
- package/dist/format.d.ts.map +0 -1
- package/dist/format.js +0 -6
- package/dist/format.js.map +0 -1
- package/dist/llms-ingest.d.ts +0 -3
- package/dist/llms-ingest.d.ts.map +0 -1
- package/dist/llms-ingest.js +0 -85
- package/dist/llms-ingest.js.map +0 -1
- package/dist/reranker.d.ts +0 -6
- package/dist/reranker.d.ts.map +0 -1
- package/dist/reranker.js +0 -21
- package/dist/reranker.js.map +0 -1
- package/dist/scraper.d.ts +0 -9
- package/dist/scraper.d.ts.map +0 -1
- package/dist/scraper.js +0 -98
- package/dist/scraper.js.map +0 -1
- package/dist/search.d.ts +0 -8
- package/dist/search.d.ts.map +0 -1
- package/dist/search.js +0 -43
- package/dist/search.js.map +0 -1
- package/dist/store.d.ts +0 -15
- package/dist/store.d.ts.map +0 -1
- package/dist/store.js +0 -149
- package/dist/store.js.map +0 -1
- package/dist/types.d.ts +0 -26
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -2
- package/dist/types.js.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -1,656 +1,232 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
bold,
|
|
4
|
+
cyan,
|
|
5
|
+
yellow
|
|
6
|
+
} from "./chunk-BRS6X3AE.js";
|
|
7
|
+
|
|
8
|
+
// src/cli.ts
|
|
2
9
|
import { parseArgs } from "node:util";
|
|
3
|
-
import { readFile, writeFile, readdir, rm, mkdir } from "node:fs/promises";
|
|
4
10
|
import { readFileSync, existsSync } from "node:fs";
|
|
5
|
-
import { join, resolve } from "node:path";
|
|
11
|
+
import { join as join2, resolve } from "node:path";
|
|
12
|
+
|
|
13
|
+
// src/consumer-config.ts
|
|
14
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
15
|
+
import { join } from "node:path";
|
|
16
|
+
import { homedir } from "node:os";
|
|
6
17
|
import { createInterface } from "node:readline";
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
import { search } from "./search.js";
|
|
18
|
-
import { cmdApiKey } from "./apikey.js";
|
|
19
|
-
import { ingestLlmsFull } from "./llms-ingest.js";
|
|
20
|
-
const PROJECT_ROOT = resolve(import.meta.dirname, "..");
|
|
21
|
-
const CONFIG_PATH = join(PROJECT_ROOT, "config", "sources.yaml");
|
|
22
|
-
const DATA_DIR = join(PROJECT_ROOT, "data");
|
|
23
|
-
const envPath = join(PROJECT_ROOT, ".env");
|
|
24
|
-
if (existsSync(envPath)) {
|
|
25
|
-
for (const line of readFileSync(envPath, "utf-8").split("\n")) {
|
|
26
|
-
const trimmed = line.trim();
|
|
27
|
-
if (!trimmed || trimmed.startsWith("#"))
|
|
28
|
-
continue;
|
|
29
|
-
const eqIndex = trimmed.indexOf("=");
|
|
30
|
-
if (eqIndex === -1)
|
|
31
|
-
continue;
|
|
32
|
-
const key = trimmed.slice(0, eqIndex);
|
|
33
|
-
const value = trimmed.slice(eqIndex + 1);
|
|
34
|
-
if (!process.env[key]) {
|
|
35
|
-
process.env[key] = value;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
function prompt(rl, question) {
|
|
40
|
-
return new Promise((resolve) => rl.question(question, resolve));
|
|
41
|
-
}
|
|
42
|
-
async function cmdAdd() {
|
|
43
|
-
const args = parseArgs({
|
|
44
|
-
args: process.argv.slice(3),
|
|
45
|
-
options: {
|
|
46
|
-
url: { type: "string" },
|
|
47
|
-
},
|
|
48
|
-
allowPositionals: true,
|
|
49
|
-
});
|
|
50
|
-
const name = args.positionals[0];
|
|
51
|
-
const url = args.values.url;
|
|
52
|
-
if (!name || !url) {
|
|
53
|
-
console.error("Usage: grimoire add <name> --url <start_url>");
|
|
54
|
-
process.exit(1);
|
|
55
|
-
}
|
|
56
|
-
console.log("Scanning page...\n");
|
|
57
|
-
const browser = await createBrowser();
|
|
58
|
-
const context = await browser.newContext();
|
|
59
|
-
const page = await context.newPage();
|
|
60
|
-
try {
|
|
61
|
-
await page.goto(url, { waitUntil: "domcontentloaded" });
|
|
62
|
-
const navCandidates = await page.evaluate(() => {
|
|
63
|
-
const selectors = ["nav", "[role='navigation']"];
|
|
64
|
-
const results = [];
|
|
65
|
-
const seen = new Set();
|
|
66
|
-
for (const sel of selectors) {
|
|
67
|
-
for (const el of document.querySelectorAll(sel)) {
|
|
68
|
-
if (seen.has(el))
|
|
69
|
-
continue;
|
|
70
|
-
seen.add(el);
|
|
71
|
-
const links = el.querySelectorAll("a[href]");
|
|
72
|
-
const label = el.getAttribute("aria-label") ||
|
|
73
|
-
el.getAttribute("class") ||
|
|
74
|
-
el.tagName.toLowerCase();
|
|
75
|
-
results.push({
|
|
76
|
-
selector: sel,
|
|
77
|
-
label,
|
|
78
|
-
linkCount: links.length,
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
return results.sort((a, b) => b.linkCount - a.linkCount);
|
|
83
|
-
});
|
|
84
|
-
if (navCandidates.length === 0) {
|
|
85
|
-
console.error("No navigation elements found on this page.");
|
|
86
|
-
process.exit(1);
|
|
87
|
-
}
|
|
88
|
-
console.log("Navigation candidates:");
|
|
89
|
-
for (let i = 0; i < navCandidates.length; i++) {
|
|
90
|
-
const c = navCandidates[i];
|
|
91
|
-
console.log(` [${i + 1}] ${c.selector} (${c.label}) — ${c.linkCount} links`);
|
|
92
|
-
}
|
|
93
|
-
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
94
|
-
const navChoice = await prompt(rl, "\nSelect navigation: ");
|
|
95
|
-
const navIndex = parseInt(navChoice, 10) - 1;
|
|
96
|
-
if (isNaN(navIndex) || navIndex < 0 || navIndex >= navCandidates.length) {
|
|
97
|
-
console.error("Invalid selection.");
|
|
98
|
-
rl.close();
|
|
99
|
-
process.exit(1);
|
|
100
|
-
}
|
|
101
|
-
const selectedNav = navCandidates[navIndex];
|
|
102
|
-
const parsedUrl = new URL(url);
|
|
103
|
-
const defaultPattern = parsedUrl.pathname.replace(/\/$/, "");
|
|
104
|
-
const allLinks = await page.$$eval(`${selectedNav.selector} a[href]`, (links, pattern) => {
|
|
105
|
-
return [...new Set(links
|
|
106
|
-
.map((a) => a.href)
|
|
107
|
-
.filter((h) => h.startsWith("http") && !h.includes("?hl=") && !h.endsWith("#") && h.includes(pattern)))];
|
|
108
|
-
}, defaultPattern);
|
|
109
|
-
console.log(`\nFound ${allLinks.length} links matching ${defaultPattern}`);
|
|
110
|
-
const patternInput = await prompt(rl, `Include pattern [default: ${defaultPattern}]: `);
|
|
111
|
-
const includePattern = patternInput.trim() || defaultPattern;
|
|
112
|
-
const excludeInput = await prompt(rl, "Exclude patterns (comma-separated, optional): ");
|
|
113
|
-
const excludePatterns = excludeInput.trim()
|
|
114
|
-
? excludeInput.split(",").map((p) => p.trim())
|
|
115
|
-
: undefined;
|
|
116
|
-
rl.close();
|
|
117
|
-
const contentSelector = await page.evaluate(() => {
|
|
118
|
-
if (document.querySelector("article"))
|
|
119
|
-
return "article";
|
|
120
|
-
if (document.querySelector("main"))
|
|
121
|
-
return "main";
|
|
122
|
-
return "body";
|
|
123
|
-
});
|
|
124
|
-
const removeSelectors = await page.evaluate(() => {
|
|
125
|
-
const candidates = [
|
|
126
|
-
{ selector: "nav", label: "nav" },
|
|
127
|
-
{ selector: "footer", label: "footer" },
|
|
128
|
-
{ selector: "[role='complementary']", label: "[role='complementary']" },
|
|
129
|
-
{ selector: "[role='banner']", label: "[role='banner']" },
|
|
130
|
-
{ selector: ".breadcrumbs, .breadcrumb", label: ".breadcrumbs" },
|
|
131
|
-
{ selector: ".pagination-nav, .pagination", label: ".pagination-nav" },
|
|
132
|
-
];
|
|
133
|
-
return candidates
|
|
134
|
-
.filter((c) => document.querySelector(c.selector) !== null)
|
|
135
|
-
.map((c) => c.label);
|
|
136
|
-
});
|
|
137
|
-
if (removeSelectors.length > 0) {
|
|
138
|
-
console.log(`\nDetected removable elements: ${removeSelectors.join(", ")}`);
|
|
139
|
-
}
|
|
140
|
-
const parsedUrlForSitemap = new URL(url);
|
|
141
|
-
let sitemapUrl;
|
|
142
|
-
try {
|
|
143
|
-
const sitemapCheck = await page.goto(`${parsedUrlForSitemap.origin}/sitemap.xml`, { waitUntil: "domcontentloaded", timeout: 10000 });
|
|
144
|
-
if (sitemapCheck && sitemapCheck.status() === 200) {
|
|
145
|
-
const body = await page.textContent("body");
|
|
146
|
-
if (body && (body.includes("<urlset") || body.includes("<sitemapindex"))) {
|
|
147
|
-
sitemapUrl = `${parsedUrlForSitemap.origin}/sitemap.xml`;
|
|
148
|
-
console.log(`\nSitemap found: ${sitemapUrl}`);
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
catch {
|
|
153
|
-
// No sitemap available
|
|
154
|
-
}
|
|
155
|
-
const source = {
|
|
156
|
-
name: name.replace(/-/g, " ").replace(/\b\w/g, (c) => c.toUpperCase()),
|
|
157
|
-
start_url: url,
|
|
158
|
-
...(sitemapUrl ? { sitemap_url: sitemapUrl } : {}),
|
|
159
|
-
nav_selector: selectedNav.selector,
|
|
160
|
-
content_selector: contentSelector,
|
|
161
|
-
include_patterns: [includePattern],
|
|
162
|
-
...(excludePatterns ? { exclude_patterns: excludePatterns } : {}),
|
|
163
|
-
...(removeSelectors.length > 0 ? { remove_selectors: removeSelectors } : {}),
|
|
164
|
-
};
|
|
165
|
-
let existingContent = "";
|
|
166
|
-
try {
|
|
167
|
-
existingContent = await readFile(CONFIG_PATH, "utf-8");
|
|
168
|
-
}
|
|
169
|
-
catch {
|
|
170
|
-
existingContent = "sources:\n";
|
|
171
|
-
}
|
|
172
|
-
const newEntry = stringify({ [name]: source }, { indent: 2 });
|
|
173
|
-
const indented = newEntry
|
|
174
|
-
.split("\n")
|
|
175
|
-
.map((line) => (line.trim() ? ` ${line}` : ""))
|
|
176
|
-
.join("\n");
|
|
177
|
-
await writeFile(CONFIG_PATH, existingContent.trimEnd() + "\n" + indented, "utf-8");
|
|
178
|
-
console.log(`\nSource "${name}" added to config/sources.yaml`);
|
|
179
|
-
console.log(`Run "grimoire refresh ${name}" to start scraping.`);
|
|
180
|
-
}
|
|
181
|
-
finally {
|
|
182
|
-
await browser.close();
|
|
183
|
-
}
|
|
18
|
+
var CONFIG_DIR = join(homedir(), ".grimoire");
|
|
19
|
+
var CONFIG_FILE = join(CONFIG_DIR, "config.json");
|
|
20
|
+
async function loadConsumerConfig() {
|
|
21
|
+
const raw = await readFile(CONFIG_FILE, "utf-8").catch(() => null);
|
|
22
|
+
if (!raw) return null;
|
|
23
|
+
const data = JSON.parse(raw);
|
|
24
|
+
if (typeof data.apiUrl === "string" && typeof data.apiKey === "string") {
|
|
25
|
+
return { apiUrl: data.apiUrl, apiKey: data.apiKey };
|
|
26
|
+
}
|
|
27
|
+
return null;
|
|
184
28
|
}
|
|
185
|
-
async function
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
return JSON.parse(data);
|
|
189
|
-
}
|
|
190
|
-
catch {
|
|
191
|
-
return null;
|
|
192
|
-
}
|
|
29
|
+
async function saveConsumerConfig(config) {
|
|
30
|
+
await mkdir(CONFIG_DIR, { recursive: true });
|
|
31
|
+
await writeFile(CONFIG_FILE, JSON.stringify(config, null, 2) + "\n", "utf-8");
|
|
193
32
|
}
|
|
194
|
-
async function
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
return
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
await writeFile(embeddingsCachePath, JSON.stringify(current), "utf-8");
|
|
204
|
-
},
|
|
205
|
-
resumeFrom,
|
|
206
|
-
});
|
|
33
|
+
async function resolveConsumerConfig() {
|
|
34
|
+
const envUrl = process.env.GRIMOIRE_API_URL;
|
|
35
|
+
const envKey = process.env.GRIMOIRE_API_KEY;
|
|
36
|
+
if (envUrl && envKey) {
|
|
37
|
+
return { apiUrl: envUrl, apiKey: envKey };
|
|
38
|
+
}
|
|
39
|
+
const fileConfig = await loadConsumerConfig();
|
|
40
|
+
if (fileConfig) return fileConfig;
|
|
41
|
+
throw new Error("Grimoire is not configured. Run 'grimoire init' to set up.");
|
|
207
42
|
}
|
|
208
|
-
async function
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
const toDelete = [...existingIds].filter((id) => !newIds.has(id));
|
|
214
|
-
console.log(` Diff: ${toDelete.length} to delete, ${allChunks.length} to upsert (${existingIds.size} existing)`);
|
|
215
|
-
if (toDelete.length > 0) {
|
|
216
|
-
console.log(" Deleting removed chunks...");
|
|
217
|
-
await deleteChunksByIds(toDelete, (cur, total) => {
|
|
218
|
-
console.log(` [${cur}/${total}] deleted`);
|
|
219
|
-
});
|
|
220
|
-
}
|
|
221
|
-
console.log(" Upserting chunks...");
|
|
222
|
-
await storeChunks(allChunks, embeddings, (cur, total) => {
|
|
223
|
-
console.log(` [${cur}/${total}] stored`);
|
|
224
|
-
});
|
|
225
|
-
}
|
|
226
|
-
else {
|
|
227
|
-
console.log(" Purging old chunks...");
|
|
228
|
-
await purgeSource(sourceName);
|
|
229
|
-
console.log(" Storing in Firestore...");
|
|
230
|
-
await storeChunks(allChunks, embeddings, (cur, total) => {
|
|
231
|
-
console.log(` [${cur}/${total}] stored`);
|
|
232
|
-
});
|
|
233
|
-
}
|
|
234
|
-
await updateSourceMeta(sourceName, allChunks.length, urlCount, version);
|
|
235
|
-
console.log(` Done. ${allChunks.length} chunks stored for "${sourceName}".`);
|
|
236
|
-
}
|
|
237
|
-
async function cmdRefresh() {
|
|
238
|
-
const args = parseArgs({
|
|
239
|
-
args: process.argv.slice(3),
|
|
240
|
-
options: {
|
|
241
|
-
full: { type: "boolean", default: false },
|
|
242
|
-
all: { type: "boolean", default: false },
|
|
243
|
-
diff: { type: "boolean", default: false },
|
|
244
|
-
concurrency: { type: "string" },
|
|
245
|
-
limit: { type: "string" },
|
|
246
|
-
"from-html": { type: "boolean", default: false },
|
|
247
|
-
"from-markdown": { type: "boolean", default: false },
|
|
248
|
-
"from-embeddings": { type: "boolean", default: false },
|
|
249
|
-
"skip-store": { type: "boolean", default: false },
|
|
250
|
-
},
|
|
251
|
-
allowPositionals: true,
|
|
252
|
-
});
|
|
253
|
-
const config = await loadConfig(CONFIG_PATH);
|
|
254
|
-
const sourcesToRefresh = args.values.all
|
|
255
|
-
? Object.keys(config.sources)
|
|
256
|
-
: [args.positionals[0]];
|
|
257
|
-
if (!args.values.all && !sourcesToRefresh[0]) {
|
|
258
|
-
console.error("Usage: grimoire refresh <source> [--full] [--from-html] [--from-markdown] [--from-embeddings] [--skip-store] [--limit <n>] [--concurrency <n>]");
|
|
259
|
-
process.exit(1);
|
|
260
|
-
}
|
|
261
|
-
const concurrencyOverride = args.values.concurrency ? parseInt(args.values.concurrency, 10) : undefined;
|
|
262
|
-
const urlLimit = args.values.limit ? parseInt(args.values.limit, 10) : undefined;
|
|
263
|
-
for (const sourceName of sourcesToRefresh) {
|
|
264
|
-
const source = config.sources[sourceName];
|
|
265
|
-
if (!source) {
|
|
266
|
-
console.error(`Source "${sourceName}" not found in config.`);
|
|
267
|
-
process.exit(1);
|
|
268
|
-
}
|
|
269
|
-
if (concurrencyOverride) {
|
|
270
|
-
source.concurrency = concurrencyOverride;
|
|
271
|
-
}
|
|
272
|
-
const rawDir = join(DATA_DIR, "raw", sourceName);
|
|
273
|
-
const mdDir = join(DATA_DIR, "markdown", sourceName);
|
|
274
|
-
const embeddingsCachePath = join(rawDir, "embeddings.json");
|
|
275
|
-
console.log(`\nRefreshing "${sourceName}"...`);
|
|
276
|
-
if (args.values.full) {
|
|
277
|
-
console.log(" Purging existing chunks...");
|
|
278
|
-
const deleted = await purgeSource(sourceName);
|
|
279
|
-
console.log(` Deleted ${deleted} chunks.`);
|
|
280
|
-
await rm(rawDir, { recursive: true, force: true });
|
|
281
|
-
await rm(mdDir, { recursive: true, force: true });
|
|
282
|
-
}
|
|
283
|
-
let urls;
|
|
284
|
-
if (args.values["from-embeddings"]) {
|
|
285
|
-
console.log(" Loading cached embeddings...");
|
|
286
|
-
const cached = await loadEmbeddingsCache(embeddingsCachePath);
|
|
287
|
-
if (!cached) {
|
|
288
|
-
console.error(" No cached embeddings found. Run without --from-embeddings first.");
|
|
289
|
-
process.exit(1);
|
|
290
|
-
}
|
|
291
|
-
const mdFiles = await readdir(mdDir);
|
|
292
|
-
const allPages = [];
|
|
293
|
-
for (const f of mdFiles.filter((f) => f.endsWith(".md"))) {
|
|
294
|
-
const content = await readFile(join(mdDir, f), "utf-8");
|
|
295
|
-
const urlMatch = content.match(/^url: "(.+)"$/m);
|
|
296
|
-
const titleMatch = content.match(/^title: "(.+)"$/m);
|
|
297
|
-
allPages.push({
|
|
298
|
-
markdown: content,
|
|
299
|
-
url: urlMatch?.[1] ?? "",
|
|
300
|
-
title: titleMatch?.[1] ?? "Untitled",
|
|
301
|
-
});
|
|
302
|
-
}
|
|
303
|
-
console.log(" Chunking...");
|
|
304
|
-
const allChunks = allPages.flatMap((p) => chunkMarkdown(p.markdown, sourceName, p.url, p.title));
|
|
305
|
-
console.log(` Created ${allChunks.length} chunks.`);
|
|
306
|
-
if (cached.length !== allChunks.length) {
|
|
307
|
-
console.error(` Embeddings cache (${cached.length}) doesn't match chunk count (${allChunks.length}). Re-embed with --from-html.`);
|
|
308
|
-
process.exit(1);
|
|
309
|
-
}
|
|
310
|
-
if (args.values["skip-store"]) {
|
|
311
|
-
console.log(` Done. ${allChunks.length} chunks ready (skipped Firestore).`);
|
|
312
|
-
continue;
|
|
313
|
-
}
|
|
314
|
-
await storeWithStrategy(sourceName, allChunks, cached, allPages.length, source.version, args.values.diff);
|
|
315
|
-
continue;
|
|
316
|
-
}
|
|
317
|
-
if (args.values["from-markdown"]) {
|
|
318
|
-
console.log(" Reading cached markdown...");
|
|
319
|
-
const mdFiles = await readdir(mdDir).catch(() => []);
|
|
320
|
-
const markdownFiles = mdFiles.filter((f) => f.endsWith(".md"));
|
|
321
|
-
if (markdownFiles.length === 0) {
|
|
322
|
-
console.error(" No cached markdown found. Run with --from-html first.");
|
|
323
|
-
process.exit(1);
|
|
324
|
-
}
|
|
325
|
-
const pages = [];
|
|
326
|
-
for (const f of markdownFiles) {
|
|
327
|
-
const content = await readFile(join(mdDir, f), "utf-8");
|
|
328
|
-
const urlMatch = content.match(/^url: "(.+)"$/m);
|
|
329
|
-
const titleMatch = content.match(/^title: "(.+)"$/m);
|
|
330
|
-
pages.push({
|
|
331
|
-
markdown: content,
|
|
332
|
-
url: urlMatch?.[1] ?? "",
|
|
333
|
-
title: titleMatch?.[1] ?? "Untitled",
|
|
334
|
-
});
|
|
335
|
-
}
|
|
336
|
-
console.log(` Found ${pages.length} cached pages.`);
|
|
337
|
-
console.log(" Chunking...");
|
|
338
|
-
const allChunks = pages.flatMap((p) => chunkMarkdown(p.markdown, sourceName, p.url, p.title));
|
|
339
|
-
console.log(` Created ${allChunks.length} chunks.`);
|
|
340
|
-
console.log(" Embedding chunks...");
|
|
341
|
-
const texts = allChunks.map((c) => c.content);
|
|
342
|
-
const embeddings = await embedWithCheckpoint(texts, rawDir, embeddingsCachePath);
|
|
343
|
-
if (args.values["skip-store"]) {
|
|
344
|
-
console.log(` Done. ${allChunks.length} chunks ready (skipped Firestore).`);
|
|
345
|
-
continue;
|
|
346
|
-
}
|
|
347
|
-
await storeWithStrategy(sourceName, allChunks, embeddings, pages.length, source.version, args.values.diff);
|
|
348
|
-
continue;
|
|
349
|
-
}
|
|
350
|
-
let pages;
|
|
351
|
-
if (source.llms_full_url && !args.values["from-html"]) {
|
|
352
|
-
console.log(` Fetching llms-full.txt from ${source.llms_full_url}...`);
|
|
353
|
-
pages = await ingestLlmsFull(source.llms_full_url, sourceName, source.start_url, DATA_DIR, (cur, total) => {
|
|
354
|
-
console.log(` [${cur}/${total}] pages processed`);
|
|
355
|
-
});
|
|
356
|
-
console.log(` Extracted ${pages.length} pages.`);
|
|
357
|
-
}
|
|
358
|
-
else {
|
|
359
|
-
if (args.values["from-html"]) {
|
|
360
|
-
console.log(" Reading URLs from cached HTML...");
|
|
361
|
-
const urlsJsonPath = join(rawDir, "urls.json");
|
|
362
|
-
try {
|
|
363
|
-
urls = JSON.parse(await readFile(urlsJsonPath, "utf-8"));
|
|
364
|
-
}
|
|
365
|
-
catch {
|
|
366
|
-
const rawFiles = await readdir(rawDir);
|
|
367
|
-
const htmlFiles = rawFiles.filter((f) => f.endsWith(".html"));
|
|
368
|
-
urls = [];
|
|
369
|
-
for (const f of htmlFiles) {
|
|
370
|
-
const fileSlug = f.replace(/\.html$/, "");
|
|
371
|
-
const htmlPath = join(rawDir, f);
|
|
372
|
-
const html = await readFile(htmlPath, "utf-8");
|
|
373
|
-
const match = html.match(/<link[^>]+rel="canonical"[^>]+href="([^"]+)"/);
|
|
374
|
-
if (match && slugifyUrl(match[1]) === fileSlug) {
|
|
375
|
-
urls.push(match[1]);
|
|
376
|
-
continue;
|
|
377
|
-
}
|
|
378
|
-
const ogMatch = html.match(/<meta[^>]+property="og:url"[^>]+content="([^"]+)"/);
|
|
379
|
-
if (ogMatch && slugifyUrl(ogMatch[1]) === fileSlug) {
|
|
380
|
-
urls.push(ogMatch[1]);
|
|
381
|
-
continue;
|
|
382
|
-
}
|
|
383
|
-
urls.push(`https://recovered/${fileSlug}`);
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
|
-
console.log(` Found ${urls.length} cached pages.`);
|
|
387
|
-
}
|
|
388
|
-
else {
|
|
389
|
-
console.log(" Scraping URLs...");
|
|
390
|
-
urls = await scrapeSource(source, sourceName, DATA_DIR, (cur, total, url) => {
|
|
391
|
-
console.log(` [${cur}/${total}] ${url}`);
|
|
392
|
-
});
|
|
393
|
-
console.log(` Found ${urls.length} pages.`);
|
|
394
|
-
}
|
|
395
|
-
if (urlLimit && urls.length > urlLimit) {
|
|
396
|
-
urls = urls.slice(0, urlLimit);
|
|
397
|
-
console.log(` Limited to ${urlLimit} pages.`);
|
|
398
|
-
}
|
|
399
|
-
console.log(" Converting to markdown...");
|
|
400
|
-
pages = await convertSource(sourceName, urls, source.content_selector, source.remove_selectors, source.remove_text_patterns, DATA_DIR, source.concurrency, (cur, total) => {
|
|
401
|
-
if (cur % 10 === 0 || cur === total)
|
|
402
|
-
console.log(` [${cur}/${total}] converted`);
|
|
403
|
-
});
|
|
404
|
-
}
|
|
405
|
-
console.log(" Chunking...");
|
|
406
|
-
const allChunks = pages.flatMap((p) => chunkMarkdown(p.markdown, sourceName, p.url, p.title));
|
|
407
|
-
console.log(` Created ${allChunks.length} chunks.`);
|
|
408
|
-
console.log(" Embedding chunks...");
|
|
409
|
-
const texts = allChunks.map((c) => c.content);
|
|
410
|
-
const embeddings = await embedWithCheckpoint(texts, rawDir, embeddingsCachePath);
|
|
411
|
-
if (args.values["skip-store"]) {
|
|
412
|
-
console.log(` Done. ${allChunks.length} chunks ready (skipped Firestore).`);
|
|
413
|
-
continue;
|
|
414
|
-
}
|
|
415
|
-
await storeWithStrategy(sourceName, allChunks, embeddings, pages.length, source.version, args.values.diff);
|
|
416
|
-
}
|
|
43
|
+
async function detectConsumerMode() {
|
|
44
|
+
if (process.env.GOOGLE_APPLICATION_CREDENTIALS) return false;
|
|
45
|
+
if (process.env.GRIMOIRE_API_URL) return true;
|
|
46
|
+
const config = await loadConsumerConfig();
|
|
47
|
+
return config !== null;
|
|
417
48
|
}
|
|
418
|
-
async function
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
if (results.length === 0) {
|
|
436
|
-
console.log("No results found.");
|
|
437
|
-
return;
|
|
438
|
-
}
|
|
439
|
-
if (args.values.compact) {
|
|
440
|
-
for (const r of results) {
|
|
441
|
-
console.log(`${r.relevance_score.toFixed(4)} | ${r.source} | ${r.title} | ${r.heading_path.join(" > ")} | ${r.url}`);
|
|
442
|
-
}
|
|
443
|
-
return;
|
|
444
|
-
}
|
|
445
|
-
for (let i = 0; i < results.length; i++) {
|
|
446
|
-
const r = results[i];
|
|
447
|
-
console.log(`\n${bold(`[${i + 1}] ${r.title}`)} (${r.relevance_score.toFixed(4)})`);
|
|
448
|
-
console.log(` ${cyan(r.url)}`);
|
|
449
|
-
console.log(` ${yellow(r.heading_path.join(" > "))}`);
|
|
450
|
-
console.log(` ${r.content.replace(/\n/g, " ")}`);
|
|
451
|
-
}
|
|
49
|
+
async function cmdInit() {
|
|
50
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
51
|
+
const ask = (q) => new Promise((resolve2) => rl.question(q, resolve2));
|
|
52
|
+
const existing = await loadConsumerConfig();
|
|
53
|
+
const apiUrl = await ask(`API URL${existing ? ` [${existing.apiUrl}]` : ""}: `);
|
|
54
|
+
const apiKey = await ask(`API Key${existing ? " [****]" : ""}: `);
|
|
55
|
+
const config = {
|
|
56
|
+
apiUrl: apiUrl.trim() || existing?.apiUrl || "",
|
|
57
|
+
apiKey: apiKey.trim() || existing?.apiKey || ""
|
|
58
|
+
};
|
|
59
|
+
rl.close();
|
|
60
|
+
if (!config.apiUrl || !config.apiKey) {
|
|
61
|
+
throw new Error("Both API URL and API Key are required.");
|
|
62
|
+
}
|
|
63
|
+
await saveConsumerConfig(config);
|
|
64
|
+
console.log(`
|
|
65
|
+
Saved to ${CONFIG_FILE}`);
|
|
452
66
|
}
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
67
|
+
|
|
68
|
+
// src/consumer.ts
|
|
69
|
+
async function apiRequest(config, path, options) {
|
|
70
|
+
const url = `${config.apiUrl.replace(/\/$/, "")}${path}`;
|
|
71
|
+
let response;
|
|
72
|
+
try {
|
|
73
|
+
response = await fetch(url, {
|
|
74
|
+
...options,
|
|
75
|
+
headers: {
|
|
76
|
+
"Content-Type": "application/json",
|
|
77
|
+
"x-api-key": config.apiKey,
|
|
78
|
+
...options?.headers
|
|
79
|
+
}
|
|
460
80
|
});
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
}
|
|
472
|
-
console.log("\nSources:\n");
|
|
473
|
-
for (const meta of metas) {
|
|
474
|
-
const ver = meta.version ? ` v${meta.version}` : "";
|
|
475
|
-
console.log(` ${bold(meta.source)}${ver}`);
|
|
476
|
-
console.log(` ${meta.chunk_count} chunks, ${meta.url_count} URLs, last refreshed ${meta.last_refreshed}`);
|
|
477
|
-
}
|
|
81
|
+
} catch {
|
|
82
|
+
throw new Error(`Cannot reach Grimoire API at ${config.apiUrl}. Check your GRIMOIRE_API_URL.`);
|
|
83
|
+
}
|
|
84
|
+
if (response.status === 401 || response.status === 403) {
|
|
85
|
+
throw new Error("Invalid API key. Check your GRIMOIRE_API_KEY or run 'grimoire init'.");
|
|
86
|
+
}
|
|
87
|
+
if (!response.ok) {
|
|
88
|
+
throw new Error(`API error: ${response.status} ${response.statusText}`);
|
|
89
|
+
}
|
|
90
|
+
return response.json();
|
|
478
91
|
}
|
|
479
|
-
async function
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
92
|
+
async function cmdConsumerSearch(config, query, options) {
|
|
93
|
+
const data = await apiRequest(config, "/search", {
|
|
94
|
+
method: "POST",
|
|
95
|
+
body: JSON.stringify({ query, source: options.source, topN: options.topN })
|
|
96
|
+
});
|
|
97
|
+
if (data.results.length === 0) {
|
|
98
|
+
console.log("No results found.");
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
if (options.compact) {
|
|
102
|
+
for (const r of data.results) {
|
|
103
|
+
console.log(`${r.relevance_score.toFixed(4)} | ${r.source} | ${r.title} | ${r.heading_path.join(" > ")} | ${r.url}`);
|
|
104
|
+
}
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
for (let i = 0; i < data.results.length; i++) {
|
|
108
|
+
const r = data.results[i];
|
|
109
|
+
console.log(`
|
|
110
|
+
${bold(`[${i + 1}] ${r.title}`)} (${r.relevance_score.toFixed(4)})`);
|
|
111
|
+
console.log(` ${cyan(r.url)}`);
|
|
112
|
+
console.log(` ${yellow(r.heading_path.join(" > "))}`);
|
|
113
|
+
console.log(` ${r.content.replace(/\n/g, " ")}`);
|
|
114
|
+
}
|
|
498
115
|
}
|
|
499
|
-
async function
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
catch {
|
|
518
|
-
console.error(`No markdown data found for source "${sourceName}".`);
|
|
519
|
-
process.exit(1);
|
|
520
|
-
}
|
|
521
|
-
const pages = [];
|
|
522
|
-
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
523
|
-
const content = await readFile(join(mdDir, file), "utf-8");
|
|
524
|
-
pages.push({ file, content });
|
|
525
|
-
}
|
|
526
|
-
console.log(JSON.stringify(pages, null, 2));
|
|
116
|
+
async function cmdConsumerList(config, options) {
|
|
117
|
+
const data = await apiRequest(config, "/list");
|
|
118
|
+
if (data.sources.length === 0) {
|
|
119
|
+
console.log("No sources available.");
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
if (options?.names) {
|
|
123
|
+
for (const s of data.sources) {
|
|
124
|
+
console.log(s.source);
|
|
125
|
+
}
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
console.log("\nSources:\n");
|
|
129
|
+
for (const s of data.sources) {
|
|
130
|
+
const ver = s.version ? ` v${s.version}` : "";
|
|
131
|
+
console.log(` ${bold(s.source)}${ver}`);
|
|
132
|
+
console.log(` ${s.chunk_count} chunks, ${s.url_count} URLs, last refreshed ${s.last_refreshed}`);
|
|
133
|
+
}
|
|
527
134
|
}
|
|
528
|
-
async function
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
}
|
|
539
|
-
console.log(`
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
135
|
+
async function cmdConsumerStats(config) {
|
|
136
|
+
const data = await apiRequest(config, "/stats");
|
|
137
|
+
if (data.sources.length === 0) {
|
|
138
|
+
console.log("No sources have been refreshed yet.");
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
console.log("\nSource Statistics:\n");
|
|
142
|
+
for (const s of data.sources) {
|
|
143
|
+
const ver = s.version ? ` v${s.version}` : "";
|
|
144
|
+
console.log(` ${bold(s.source)}${ver}`);
|
|
145
|
+
console.log(` Chunks: ${s.chunk_count}`);
|
|
146
|
+
console.log(` URLs: ${s.url_count}`);
|
|
147
|
+
console.log(` Last refreshed: ${s.last_refreshed}`);
|
|
148
|
+
}
|
|
149
|
+
console.log(`
|
|
150
|
+
Total: ${data.totalChunks} chunks across ${data.totalUrls} URLs from ${data.sources.length} sources`);
|
|
543
151
|
}
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
const
|
|
553
|
-
if (
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
console.error(`Source "${sourceName}" not found in config.`);
|
|
561
|
-
process.exit(1);
|
|
562
|
-
}
|
|
563
|
-
const rawDir = join(DATA_DIR, "raw", sourceName);
|
|
564
|
-
const urlsPath = join(rawDir, "urls.json");
|
|
565
|
-
let urls;
|
|
566
|
-
try {
|
|
567
|
-
urls = JSON.parse(await readFile(urlsPath, "utf-8"));
|
|
568
|
-
}
|
|
569
|
-
catch {
|
|
570
|
-
console.error(`No urls.json found for "${sourceName}". Run 'grimoire refresh ${sourceName} --skip-store' first.`);
|
|
571
|
-
process.exit(1);
|
|
572
|
-
}
|
|
573
|
-
const missing = urls.filter((url) => !existsSync(join(rawDir, `${slugifyUrl(url)}.html`)));
|
|
574
|
-
console.log(`\nTotal: ${urls.length}, Cached: ${urls.length - missing.length}, Missing: ${missing.length}`);
|
|
575
|
-
if (missing.length === 0) {
|
|
576
|
-
console.log("Nothing to scrape.");
|
|
577
|
-
return;
|
|
578
|
-
}
|
|
579
|
-
const concurrency = args.values.concurrency ? parseInt(args.values.concurrency, 10) : source.concurrency ?? 20;
|
|
580
|
-
const browser = await createBrowser();
|
|
581
|
-
const context = await browser.newContext(source.user_agent ? { userAgent: source.user_agent } : {});
|
|
582
|
-
let done = 0;
|
|
583
|
-
for (let i = 0; i < missing.length; i += concurrency) {
|
|
584
|
-
const batch = missing.slice(i, i + concurrency);
|
|
585
|
-
await Promise.all(batch.map(async (url) => {
|
|
586
|
-
const page = await context.newPage();
|
|
587
|
-
try {
|
|
588
|
-
await page.goto(url, { waitUntil: source.headed ? "networkidle" : "domcontentloaded", timeout: 30000 });
|
|
589
|
-
const html = await page.content();
|
|
590
|
-
await writeFile(join(rawDir, `${slugifyUrl(url)}.html`), html, "utf-8");
|
|
591
|
-
done++;
|
|
592
|
-
if (done % 10 === 0 || done === missing.length)
|
|
593
|
-
console.log(` [${done}/${missing.length}]`);
|
|
594
|
-
}
|
|
595
|
-
catch (e) {
|
|
596
|
-
console.error(` FAILED: ${url} - ${e instanceof Error ? e.message : String(e)}`);
|
|
597
|
-
}
|
|
598
|
-
finally {
|
|
599
|
-
await page.close();
|
|
600
|
-
}
|
|
601
|
-
}));
|
|
602
|
-
}
|
|
603
|
-
console.log(`Done. Fetched ${done} pages.`);
|
|
604
|
-
await browser.close();
|
|
152
|
+
|
|
153
|
+
// src/cli.ts
|
|
154
|
+
var PROJECT_ROOT = resolve(import.meta.dirname, "..");
|
|
155
|
+
var envPath = join2(PROJECT_ROOT, ".env");
|
|
156
|
+
if (existsSync(envPath)) {
|
|
157
|
+
for (const line of readFileSync(envPath, "utf-8").split("\n")) {
|
|
158
|
+
const trimmed = line.trim();
|
|
159
|
+
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
160
|
+
const eqIndex = trimmed.indexOf("=");
|
|
161
|
+
if (eqIndex === -1) continue;
|
|
162
|
+
const key = trimmed.slice(0, eqIndex);
|
|
163
|
+
const value = trimmed.slice(eqIndex + 1);
|
|
164
|
+
if (!process.env[key]) {
|
|
165
|
+
process.env[key] = value;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
605
168
|
}
|
|
169
|
+
var ADMIN_ONLY_COMMANDS = ["add", "refresh", "delete", "scrape-urls", "export", "apikey"];
|
|
606
170
|
async function cmdUpdate() {
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
}
|
|
171
|
+
const { execSync } = await import("node:child_process");
|
|
172
|
+
const pkg = JSON.parse(readFileSync(join2(PROJECT_ROOT, "package.json"), "utf-8"));
|
|
173
|
+
console.log(`Current version: ${pkg.version}`);
|
|
174
|
+
console.log("Checking for updates...");
|
|
175
|
+
execSync("npm install -g @astrofoundry/grimoire@latest", { stdio: "inherit" });
|
|
176
|
+
const updated = JSON.parse(readFileSync(join2(PROJECT_ROOT, "package.json"), "utf-8"));
|
|
177
|
+
if (updated.version === pkg.version) {
|
|
178
|
+
console.log("Already on the latest version.");
|
|
179
|
+
} else {
|
|
180
|
+
console.log(`Updated to ${updated.version}.`);
|
|
181
|
+
}
|
|
619
182
|
}
|
|
620
|
-
const ADMIN_COMMANDS = {
|
|
621
|
-
add: cmdAdd,
|
|
622
|
-
refresh: cmdRefresh,
|
|
623
|
-
delete: cmdDelete,
|
|
624
|
-
"scrape-urls": cmdScrapeUrls,
|
|
625
|
-
update: cmdUpdate,
|
|
626
|
-
search: cmdSearch,
|
|
627
|
-
list: cmdList,
|
|
628
|
-
stats: cmdStats,
|
|
629
|
-
export: cmdExport,
|
|
630
|
-
apikey: cmdApiKey,
|
|
631
|
-
};
|
|
632
|
-
const ADMIN_ONLY_COMMANDS = ["add", "refresh", "delete", "scrape-urls", "export", "apikey"];
|
|
633
183
|
function showHelp(isConsumer) {
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
grimoire
|
|
184
|
+
if (isConsumer) {
|
|
185
|
+
console.log(`
|
|
186
|
+
grimoire \u2014 Documentation RAG
|
|
637
187
|
|
|
638
|
-
|
|
639
|
-
search "<query>" [--source <
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
188
|
+
USAGE
|
|
189
|
+
grimoire search "<query>" [--source <name>] [--top <n>] [--compact]
|
|
190
|
+
|
|
191
|
+
QUERY TIPS
|
|
192
|
+
- Use the library's own terminology ("Firestore batched writes"
|
|
193
|
+
not "how do I do multi-write in firestore")
|
|
194
|
+
- Scope with --source when you know the area
|
|
195
|
+
- Rephrase if first search misses
|
|
196
|
+
- Prefer --compact for scanning; omit for full snippets
|
|
197
|
+
- Cite the URL in your answer when precision matters
|
|
198
|
+
|
|
199
|
+
EXAMPLES
|
|
200
|
+
grimoire search "Firestore batched writes" --source firebase-firestore --compact
|
|
201
|
+
grimoire search "react server components" --top 3
|
|
202
|
+
grimoire list --names
|
|
203
|
+
|
|
204
|
+
FLAGS
|
|
205
|
+
--source <name> Scope to one indexed source. Run \`grimoire list --names\`
|
|
206
|
+
for the full list (sources are added regularly).
|
|
207
|
+
--top <n> Max results. Default: 5.
|
|
208
|
+
--compact One line per result: score | source | title | heading | url
|
|
209
|
+
Default (non-compact): multi-line block with title, URL,
|
|
210
|
+
heading path, and content snippet.
|
|
645
211
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
212
|
+
RELEVANCE SCORES
|
|
213
|
+
Range 0\u20131 (higher = better). >0.85 strong match, 0.6\u20130.85 relevant,
|
|
214
|
+
<0.6 usually too weak to cite. "No results found." + exit 0 = clean miss.
|
|
215
|
+
|
|
216
|
+
MANAGEMENT
|
|
217
|
+
grimoire list [--names] Show indexed sources
|
|
218
|
+
grimoire stats Index statistics
|
|
219
|
+
grimoire init Configure API connection (first-time setup)
|
|
220
|
+
grimoire update Update grimoire itself
|
|
221
|
+
grimoire --version Print CLI version
|
|
222
|
+
|
|
223
|
+
ENVIRONMENT
|
|
224
|
+
GRIMOIRE_API_URL API endpoint URL
|
|
225
|
+
GRIMOIRE_API_KEY API key
|
|
649
226
|
`);
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
grimoire — Documentation RAG System (admin)
|
|
227
|
+
} else {
|
|
228
|
+
console.log(`
|
|
229
|
+
grimoire \u2014 Documentation RAG System (admin)
|
|
654
230
|
|
|
655
231
|
Commands:
|
|
656
232
|
add <name> --url <url> Add a new documentation source
|
|
@@ -674,103 +250,96 @@ Commands:
|
|
|
674
250
|
apikey list List API keys
|
|
675
251
|
apikey delete <name> Delete an API key
|
|
676
252
|
`);
|
|
677
|
-
|
|
253
|
+
}
|
|
678
254
|
}
|
|
679
255
|
async function main() {
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
if (command === "init") {
|
|
697
|
-
await cmdInit();
|
|
698
|
-
return;
|
|
699
|
-
}
|
|
700
|
-
if (ADMIN_ONLY_COMMANDS.includes(command)) {
|
|
701
|
-
console.error(`The '${command}' command is only available in admin mode.`);
|
|
702
|
-
process.exit(1);
|
|
703
|
-
}
|
|
704
|
-
const config = await resolveConsumerConfig().catch(() => {
|
|
705
|
-
console.error("Grimoire is not configured yet. Run 'grimoire init' to set up your API connection.");
|
|
706
|
-
process.exit(1);
|
|
707
|
-
});
|
|
708
|
-
if (command === "search") {
|
|
709
|
-
const args = parseArgs({
|
|
710
|
-
args: process.argv.slice(3),
|
|
711
|
-
options: {
|
|
712
|
-
source: { type: "string" },
|
|
713
|
-
top: { type: "string" },
|
|
714
|
-
compact: { type: "boolean", default: false },
|
|
715
|
-
},
|
|
716
|
-
allowPositionals: true,
|
|
717
|
-
});
|
|
718
|
-
const query = args.positionals[0];
|
|
719
|
-
if (!query) {
|
|
720
|
-
console.error("Usage: grimoire search \"<query>\" [--source <name>] [--top <n>] [--compact]");
|
|
721
|
-
process.exit(1);
|
|
722
|
-
}
|
|
723
|
-
const topN = args.values.top ? parseInt(args.values.top, 10) : undefined;
|
|
724
|
-
await cmdConsumerSearch(config, query, { source: args.values.source, topN, compact: args.values.compact });
|
|
725
|
-
}
|
|
726
|
-
else if (command === "list") {
|
|
727
|
-
const args = parseArgs({
|
|
728
|
-
args: process.argv.slice(3),
|
|
729
|
-
options: { names: { type: "boolean", default: false } },
|
|
730
|
-
allowPositionals: true,
|
|
731
|
-
});
|
|
732
|
-
await cmdConsumerList(config, { names: args.values.names });
|
|
733
|
-
}
|
|
734
|
-
else if (command === "stats") {
|
|
735
|
-
await cmdConsumerStats(config);
|
|
736
|
-
}
|
|
737
|
-
else {
|
|
738
|
-
console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
|
|
739
|
-
process.exit(1);
|
|
740
|
-
}
|
|
741
|
-
return;
|
|
742
|
-
}
|
|
256
|
+
const command = process.argv[2];
|
|
257
|
+
if (command === "--version" || command === "-v") {
|
|
258
|
+
const pkg = JSON.parse(readFileSync(join2(PROJECT_ROOT, "package.json"), "utf-8"));
|
|
259
|
+
console.log(pkg.version);
|
|
260
|
+
process.exit(0);
|
|
261
|
+
}
|
|
262
|
+
const isConsumer = await detectConsumerMode();
|
|
263
|
+
if (!command || command === "--help" || command === "-h") {
|
|
264
|
+
showHelp(isConsumer);
|
|
265
|
+
process.exit(0);
|
|
266
|
+
}
|
|
267
|
+
if (command === "update") {
|
|
268
|
+
await cmdUpdate();
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
if (isConsumer) {
|
|
743
272
|
if (command === "init") {
|
|
744
|
-
|
|
745
|
-
|
|
273
|
+
await cmdInit();
|
|
274
|
+
return;
|
|
746
275
|
}
|
|
747
|
-
if (
|
|
748
|
-
|
|
749
|
-
|
|
276
|
+
if (ADMIN_ONLY_COMMANDS.includes(command)) {
|
|
277
|
+
console.error(`The '${command}' command is only available in admin mode.`);
|
|
278
|
+
process.exit(1);
|
|
750
279
|
}
|
|
751
|
-
const
|
|
752
|
-
|
|
753
|
-
|
|
280
|
+
const config = await resolveConsumerConfig().catch(() => {
|
|
281
|
+
console.error("Grimoire is not configured yet. Run 'grimoire init' to set up your API connection.");
|
|
282
|
+
process.exit(1);
|
|
283
|
+
});
|
|
284
|
+
if (command === "search") {
|
|
285
|
+
const args = parseArgs({
|
|
286
|
+
args: process.argv.slice(3),
|
|
287
|
+
options: {
|
|
288
|
+
source: { type: "string" },
|
|
289
|
+
top: { type: "string" },
|
|
290
|
+
compact: { type: "boolean", default: false }
|
|
291
|
+
},
|
|
292
|
+
allowPositionals: true
|
|
293
|
+
});
|
|
294
|
+
const query = args.positionals[0];
|
|
295
|
+
if (!query) {
|
|
296
|
+
console.error('Usage: grimoire search "<query>" [--source <name>] [--top <n>] [--compact]');
|
|
754
297
|
process.exit(1);
|
|
755
|
-
|
|
756
|
-
|
|
298
|
+
}
|
|
299
|
+
const topN = args.values.top ? parseInt(args.values.top, 10) : void 0;
|
|
300
|
+
await cmdConsumerSearch(config, query, { source: args.values.source, topN, compact: args.values.compact });
|
|
301
|
+
} else if (command === "list") {
|
|
302
|
+
const args = parseArgs({
|
|
303
|
+
args: process.argv.slice(3),
|
|
304
|
+
options: { names: { type: "boolean", default: false } },
|
|
305
|
+
allowPositionals: true
|
|
306
|
+
});
|
|
307
|
+
await cmdConsumerList(config, { names: args.values.names });
|
|
308
|
+
} else if (command === "stats") {
|
|
309
|
+
await cmdConsumerStats(config);
|
|
310
|
+
} else {
|
|
311
|
+
console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
|
|
312
|
+
process.exit(1);
|
|
313
|
+
}
|
|
314
|
+
return;
|
|
315
|
+
}
|
|
316
|
+
if (command === "init") {
|
|
317
|
+
await cmdInit();
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
const { ADMIN_COMMANDS } = await import("./admin-HA6FNUV4.js");
|
|
321
|
+
const handler = ADMIN_COMMANDS[command];
|
|
322
|
+
if (!handler) {
|
|
323
|
+
console.error(`Unknown command: ${command}. Run "grimoire --help" for usage.`);
|
|
324
|
+
process.exit(1);
|
|
325
|
+
}
|
|
326
|
+
await handler();
|
|
757
327
|
}
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
328
|
+
var GCP_AUTH_PATTERNS = [
|
|
329
|
+
"Unable to detect a Project Id",
|
|
330
|
+
"Could not load the default credentials",
|
|
331
|
+
"invalid_grant",
|
|
332
|
+
"invalid_rapt",
|
|
333
|
+
"UNAUTHENTICATED",
|
|
334
|
+
"Getting metadata from plugin failed"
|
|
765
335
|
];
|
|
766
336
|
main().catch((err) => {
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
process.exit(1);
|
|
337
|
+
const msg = err.message ?? String(err);
|
|
338
|
+
if (GCP_AUTH_PATTERNS.some((p) => msg.includes(p))) {
|
|
339
|
+
console.error("Google Cloud authentication failed. Re-authenticate with:\n\n gcloud auth application-default login\n");
|
|
340
|
+
} else {
|
|
341
|
+
console.error(`Error: ${msg}`);
|
|
342
|
+
}
|
|
343
|
+
process.exit(1);
|
|
775
344
|
});
|
|
776
|
-
//# sourceMappingURL=cli.js.map
|
|
345
|
+
//# sourceMappingURL=cli.js.map
|