@astrofoundry/grimoire 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -0
- package/dist/apikey.d.ts +5 -0
- package/dist/apikey.d.ts.map +1 -0
- package/dist/apikey.js +85 -0
- package/dist/apikey.js.map +1 -0
- package/dist/chunker.d.ts +7 -0
- package/dist/chunker.d.ts.map +1 -0
- package/dist/chunker.js +153 -0
- package/dist/chunker.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +496 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +18 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +76 -0
- package/dist/config.js.map +1 -0
- package/dist/consumer-config.d.ts +11 -0
- package/dist/consumer-config.d.ts.map +1 -0
- package/dist/consumer-config.js +58 -0
- package/dist/consumer-config.js.map +1 -0
- package/dist/consumer.d.ts +8 -0
- package/dist/consumer.d.ts.map +1 -0
- package/dist/consumer.js +71 -0
- package/dist/consumer.js.map +1 -0
- package/dist/converter.d.ts +12 -0
- package/dist/converter.d.ts.map +1 -0
- package/dist/converter.js +95 -0
- package/dist/converter.js.map +1 -0
- package/dist/embedder.d.ts +3 -0
- package/dist/embedder.d.ts.map +1 -0
- package/dist/embedder.js +38 -0
- package/dist/embedder.js.map +1 -0
- package/dist/format.d.ts +5 -0
- package/dist/format.d.ts.map +1 -0
- package/dist/format.js +6 -0
- package/dist/format.js.map +1 -0
- package/dist/reranker.d.ts +6 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/reranker.js +21 -0
- package/dist/reranker.js.map +1 -0
- package/dist/scraper.d.ts +9 -0
- package/dist/scraper.d.ts.map +1 -0
- package/dist/scraper.js +77 -0
- package/dist/scraper.js.map +1 -0
- package/dist/search.d.ts +8 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +43 -0
- package/dist/search.js.map +1 -0
- package/dist/store.d.ts +11 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +102 -0
- package/dist/store.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +47 -0
package/dist/store.js
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import { initializeApp, applicationDefault, getApps } from "firebase-admin/app";
|
|
2
|
+
import { getFirestore, FieldValue, } from "firebase-admin/firestore";
|
|
3
|
+
const BATCH_SIZE = 500;
|
|
4
|
+
let db;
|
|
5
|
+
function getDb() {
|
|
6
|
+
if (!db) {
|
|
7
|
+
if (getApps().length === 0) {
|
|
8
|
+
initializeApp({ credential: applicationDefault() });
|
|
9
|
+
}
|
|
10
|
+
db = getFirestore();
|
|
11
|
+
}
|
|
12
|
+
return db;
|
|
13
|
+
}
|
|
14
|
+
function chunksCol() {
|
|
15
|
+
return getDb().collection("grimoire_chunks");
|
|
16
|
+
}
|
|
17
|
+
function sourcesCol() {
|
|
18
|
+
return getDb().collection("grimoire_sources");
|
|
19
|
+
}
|
|
20
|
+
export async function storeChunks(chunks, embeddings, onProgress) {
|
|
21
|
+
const database = getDb();
|
|
22
|
+
const col = chunksCol();
|
|
23
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
24
|
+
const batch = database.batch();
|
|
25
|
+
const slice = chunks.slice(i, i + BATCH_SIZE);
|
|
26
|
+
const embSlice = embeddings.slice(i, i + BATCH_SIZE);
|
|
27
|
+
for (let j = 0; j < slice.length; j++) {
|
|
28
|
+
const chunk = slice[j];
|
|
29
|
+
batch.set(col.doc(chunk.id), {
|
|
30
|
+
source: chunk.source,
|
|
31
|
+
url: chunk.url,
|
|
32
|
+
title: chunk.title,
|
|
33
|
+
heading_path: chunk.heading_path,
|
|
34
|
+
content: chunk.content,
|
|
35
|
+
token_count: chunk.token_count,
|
|
36
|
+
embedded_at: new Date().toISOString(),
|
|
37
|
+
embedding: FieldValue.vector(embSlice[j]),
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
await batch.commit();
|
|
41
|
+
onProgress?.(Math.min(i + BATCH_SIZE, chunks.length), chunks.length);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
export async function purgeSource(sourceName) {
|
|
45
|
+
const database = getDb();
|
|
46
|
+
const col = chunksCol();
|
|
47
|
+
const snapshot = await col.where("source", "==", sourceName).get();
|
|
48
|
+
if (snapshot.empty)
|
|
49
|
+
return 0;
|
|
50
|
+
const batches = [];
|
|
51
|
+
let batch = database.batch();
|
|
52
|
+
let count = 0;
|
|
53
|
+
for (const doc of snapshot.docs) {
|
|
54
|
+
batch.delete(doc.ref);
|
|
55
|
+
count++;
|
|
56
|
+
if (count % BATCH_SIZE === 0) {
|
|
57
|
+
batches.push(batch.commit());
|
|
58
|
+
batch = database.batch();
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (count % BATCH_SIZE !== 0) {
|
|
62
|
+
batches.push(batch.commit());
|
|
63
|
+
}
|
|
64
|
+
await Promise.all(batches);
|
|
65
|
+
return count;
|
|
66
|
+
}
|
|
67
|
+
export async function updateSourceMeta(sourceName, chunkCount, urlCount) {
|
|
68
|
+
await sourcesCol().doc(sourceName).set({
|
|
69
|
+
source: sourceName,
|
|
70
|
+
last_refreshed: new Date().toISOString(),
|
|
71
|
+
chunk_count: chunkCount,
|
|
72
|
+
url_count: urlCount,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
export async function getSourceMeta(sourceName) {
|
|
76
|
+
const doc = await sourcesCol().doc(sourceName).get();
|
|
77
|
+
if (!doc.exists)
|
|
78
|
+
return null;
|
|
79
|
+
return doc.data();
|
|
80
|
+
}
|
|
81
|
+
export async function getAllSourcesMeta() {
|
|
82
|
+
const snapshot = await sourcesCol().get();
|
|
83
|
+
return snapshot.docs.map((doc) => doc.data());
|
|
84
|
+
}
|
|
85
|
+
export async function vectorSearch(queryEmbedding, limit, source) {
|
|
86
|
+
const col = chunksCol();
|
|
87
|
+
let query = col;
|
|
88
|
+
if (source) {
|
|
89
|
+
query = query.where("source", "==", source);
|
|
90
|
+
}
|
|
91
|
+
const snapshot = await query
|
|
92
|
+
.findNearest("embedding", FieldValue.vector(queryEmbedding), {
|
|
93
|
+
limit,
|
|
94
|
+
distanceMeasure: "COSINE",
|
|
95
|
+
})
|
|
96
|
+
.get();
|
|
97
|
+
return snapshot.docs.map((doc) => ({
|
|
98
|
+
id: doc.id,
|
|
99
|
+
data: doc.data(),
|
|
100
|
+
}));
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"store.js","sourceRoot":"","sources":["../src/store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAChF,OAAO,EACL,YAAY,EACZ,UAAU,GAEX,MAAM,0BAA0B,CAAC;AAGlC,MAAM,UAAU,GAAG,GAAG,CAAC;AAEvB,IAAI,EAAyB,CAAC;AAE9B,SAAS,KAAK;IACZ,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,IAAI,OAAO,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,aAAa,CAAC,EAAE,UAAU,EAAE,kBAAkB,EAAE,EAAE,CAAC,CAAC;QACtD,CAAC;QACD,EAAE,GAAG,YAAY,EAAE,CAAC;IACtB,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,SAAS;IAChB,OAAO,KAAK,EAAE,CAAC,UAAU,CAAC,iBAAiB,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,UAAU;IACjB,OAAO,KAAK,EAAE,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC;AAChD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,MAAe,EACf,UAAsB,EACtB,UAAqD;IAErD,MAAM,QAAQ,GAAG,KAAK,EAAE,CAAC;IACzB,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;IAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QACnD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QAC9C,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QAErD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACvB,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE;gBAC3B,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACrC,SAAS,EAAE,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;aAC1C,CAAC,CAAC;QACL,CAAC;QAED,MAAM,KAAK,CAAC,MAAM,EAAE,CAAC;QACrB,UAAU,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IACvE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,UAAkB;IAClD,MAAM,QAAQ,GAAG,KAAK,EAAE,CAAC;IACzB,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,UAAU,CAAC,CAAC,GAAG,EAAE,CAAC;IACnE,IAAI,QAAQ,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC;IAE7B,MAAM,OAAO,GAA+C,EAAE,CAAC;IAC/D,IAAI,KAAK,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC;IAC7B,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,MAAM,GAAG,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC;QAChC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACtB,KAAK,EAAE,CAAC;QACR,IAAI,KAAK,GAAG,UAAU,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;YAC7B,KAAK,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,IAAI,KAAK,GAAG,UAAU,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAC3B,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,UAAkB,EAClB,UAAkB,EAClB,QAAgB;IAEhB,MAAM,UAAU,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC;QACrC,MAAM,EAAE,UAAU;QAClB,cAAc,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACxC,WAAW,EAAE,UAAU;QACvB,SAAS,EAAE,QAAQ;KACpB,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,UAAkB;IACpD,MAAM,GAAG,GAAG,MAAM,UAAU,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,GAAG,EAAE,CAAC;IACrD,IAAI,CAAC,GAAG,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAC7B,OAAO,GAAG,CAAC,IAAI,EAAgB,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,iBAAiB;IACrC,MAAM,QAAQ,GAAG,MAAM,UAAU,EAAE,CAAC,GAAG,EAAE,CAAC;IAC1C,OAAO,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,EAAgB,CAAC,CAAC;AAC9D,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,cAAwB,EACxB,KAAa,EACb,MAAe;IAEf,MAAM,GAAG,GAAG,SAAS,EAAE,CAAC;IAExB,IAAI,KAAK,GAAG,GAA8B,CAAC;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,QAAQ,GAAG,MAAM,KAAK;SACzB,WAAW,CAAC,WAAW,EAAE,UAAU,CAAC,MAAM,CAAC,cAAc,CAAC,EAAE;QAC3D,KAAK;QACL,eAAe,EAAE,QAAQ;KAC1B,CAAC;SACD,GAAG,EAAE,CAAC;IAET,OAAO,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACjC,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE;KACjB,CAAC,CAAC,CAAC;AACN,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export interface Chunk {
|
|
2
|
+
id: string;
|
|
3
|
+
source: string;
|
|
4
|
+
url: string;
|
|
5
|
+
title: string;
|
|
6
|
+
heading_path: string[];
|
|
7
|
+
content: string;
|
|
8
|
+
token_count: number;
|
|
9
|
+
}
|
|
10
|
+
export interface SearchResult {
|
|
11
|
+
id: string;
|
|
12
|
+
source: string;
|
|
13
|
+
url: string;
|
|
14
|
+
title: string;
|
|
15
|
+
heading_path: string[];
|
|
16
|
+
content: string;
|
|
17
|
+
relevance_score: number;
|
|
18
|
+
}
|
|
19
|
+
export interface SourceMeta {
|
|
20
|
+
source: string;
|
|
21
|
+
last_refreshed: string;
|
|
22
|
+
chunk_count: number;
|
|
23
|
+
url_count: number;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,KAAK;IACpB,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,OAAO,EAAE,MAAM,CAAC;IAChB,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/package.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@astrofoundry/grimoire",
|
|
3
|
+
"version": "1.2.2",
|
|
4
|
+
"description": "Documentation RAG System",
|
|
5
|
+
"keywords": [],
|
|
6
|
+
"author": "",
|
|
7
|
+
"license": "ISC",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"bin": {
|
|
10
|
+
"grimoire": "dist/cli.js"
|
|
11
|
+
},
|
|
12
|
+
"devDependencies": {
|
|
13
|
+
"@eslint/js": "^10.0.1",
|
|
14
|
+
"@types/jsdom": "^28.0.1",
|
|
15
|
+
"@types/node": "^25.5.0",
|
|
16
|
+
"@types/turndown": "^5.0.6",
|
|
17
|
+
"eslint": "^10.1.0",
|
|
18
|
+
"globals": "^17.4.0",
|
|
19
|
+
"typescript": "^5.9.3",
|
|
20
|
+
"typescript-eslint": "^8.57.2",
|
|
21
|
+
"vitest": "^4.1.1"
|
|
22
|
+
},
|
|
23
|
+
"files": [
|
|
24
|
+
"dist",
|
|
25
|
+
"README.md"
|
|
26
|
+
],
|
|
27
|
+
"optionalDependencies": {
|
|
28
|
+
"@google/generative-ai": "^0.24.1",
|
|
29
|
+
"cohere-ai": "^7.20.0",
|
|
30
|
+
"firebase-admin": "^13.7.0",
|
|
31
|
+
"jsdom": "^29.0.1",
|
|
32
|
+
"playwright": "^1.58.2",
|
|
33
|
+
"turndown": "^7.2.2",
|
|
34
|
+
"yaml": "^2.8.3"
|
|
35
|
+
},
|
|
36
|
+
"scripts": {
|
|
37
|
+
"test": "vitest run",
|
|
38
|
+
"lint": "eslint .",
|
|
39
|
+
"typecheck": "tsc --noEmit",
|
|
40
|
+
"check": "tsc --noEmit && eslint . && vitest run",
|
|
41
|
+
"build": "tsc",
|
|
42
|
+
"build:watch": "tsc --watch",
|
|
43
|
+
"release:patch": "v=$(pnpm version patch --no-git-tag-version) && git add -A && git commit -m \"Release $v\" && git tag $v && git push && git push origin $v",
|
|
44
|
+
"release:minor": "v=$(pnpm version minor --no-git-tag-version) && git add -A && git commit -m \"Release $v\" && git tag $v && git push && git push origin $v",
|
|
45
|
+
"release:major": "v=$(pnpm version major --no-git-tag-version) && git add -A && git commit -m \"Release $v\" && git tag $v && git push && git push origin $v"
|
|
46
|
+
}
|
|
47
|
+
}
|