@jcode.labs/mimir 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -3
- package/CONTRIBUTING.md +28 -0
- package/README.md +262 -36
- package/SECURITY-HARDENING.md +58 -20
- package/dist/chunking.d.ts.map +1 -1
- package/dist/chunking.js +6 -3
- package/dist/chunking.js.map +1 -1
- package/dist/cli.js +103 -9
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +50 -36
- package/dist/config.js.map +1 -1
- package/dist/defaults.d.ts +11 -0
- package/dist/defaults.d.ts.map +1 -0
- package/dist/defaults.js +31 -0
- package/dist/defaults.js.map +1 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +85 -11
- package/dist/embeddings.js.map +1 -1
- package/dist/files.d.ts +2 -1
- package/dist/files.d.ts.map +1 -1
- package/dist/files.js +39 -2
- package/dist/files.js.map +1 -1
- package/dist/gitignore.d.ts +1 -1
- package/dist/gitignore.d.ts.map +1 -1
- package/dist/gitignore.js +8 -7
- package/dist/gitignore.js.map +1 -1
- package/dist/ingest.d.ts.map +1 -1
- package/dist/ingest.js +2 -1
- package/dist/ingest.js.map +1 -1
- package/dist/init.d.ts.map +1 -1
- package/dist/init.js +4 -24
- package/dist/init.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +14 -13
- package/dist/mcp.js.map +1 -1
- package/dist/parsing.d.ts.map +1 -1
- package/dist/parsing.js +138 -0
- package/dist/parsing.js.map +1 -1
- package/dist/query.d.ts.map +1 -1
- package/dist/query.js +14 -22
- package/dist/query.js.map +1 -1
- package/dist/security.js +16 -18
- package/dist/security.js.map +1 -1
- package/dist/skill.d.ts +2 -1
- package/dist/skill.d.ts.map +1 -1
- package/dist/skill.js +24 -9
- package/dist/skill.js.map +1 -1
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +2 -1
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +12 -14
- package/dist/types.d.ts.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/examples/sovereign-rag-demo/.kb/config.json +22 -0
- package/examples/sovereign-rag-demo/.kb/sources.txt +2 -0
- package/examples/sovereign-rag-demo/README.md +80 -0
- package/examples/sovereign-rag-demo/raw/dataset-inventory.csv +5 -0
- package/examples/sovereign-rag-demo/raw/incident-timeline.jsonl +4 -0
- package/examples/sovereign-rag-demo/raw/operations-brief.md +16 -0
- package/examples/sovereign-rag-demo/raw/review-notes.evidence +11 -0
- package/examples/sovereign-rag-demo/raw/security-policy.yaml +14 -0
- package/package.json +24 -25
- package/skills/mimir/SKILL.md +66 -5
- package/skills/mimir-audio-summary/SKILL.md +134 -0
- package/skills/mimir-audio-summary/forge-voice.sh +153 -0
- package/skills/mimir-audio-summary/split-lines.py +13 -0
- package/skills/mimir-audio-summary/xtts-voice.py +46 -0
- package/dist/network.d.ts +0 -4
- package/dist/network.d.ts.map +0 -1
- package/dist/network.js +0 -59
- package/dist/network.js.map +0 -1
package/dist/files.js
CHANGED
|
@@ -3,17 +3,51 @@ import { existsSync } from "node:fs";
|
|
|
3
3
|
import { readFile, stat } from "node:fs/promises";
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import fg from "fast-glob";
|
|
6
|
-
export const
|
|
6
|
+
export const DEFAULT_SUPPORTED_EXTENSIONS = new Set([
|
|
7
|
+
".atom",
|
|
8
|
+
".c",
|
|
9
|
+
".cfg",
|
|
10
|
+
".conf",
|
|
11
|
+
".cpp",
|
|
12
|
+
".cs",
|
|
13
|
+
".css",
|
|
7
14
|
".csv",
|
|
15
|
+
".docx",
|
|
16
|
+
".go",
|
|
17
|
+
".h",
|
|
8
18
|
".htm",
|
|
9
19
|
".html",
|
|
20
|
+
".ini",
|
|
21
|
+
".java",
|
|
22
|
+
".js",
|
|
10
23
|
".json",
|
|
24
|
+
".jsonl",
|
|
25
|
+
".jsx",
|
|
26
|
+
".log",
|
|
11
27
|
".md",
|
|
12
28
|
".mdx",
|
|
29
|
+
".ndjson",
|
|
30
|
+
".odp",
|
|
31
|
+
".ods",
|
|
32
|
+
".odt",
|
|
13
33
|
".pdf",
|
|
34
|
+
".php",
|
|
35
|
+
".pptx",
|
|
36
|
+
".properties",
|
|
37
|
+
".py",
|
|
38
|
+
".rb",
|
|
39
|
+
".rs",
|
|
40
|
+
".rss",
|
|
41
|
+
".rtf",
|
|
42
|
+
".sql",
|
|
14
43
|
".text",
|
|
44
|
+
".toml",
|
|
45
|
+
".ts",
|
|
15
46
|
".tsv",
|
|
47
|
+
".tsx",
|
|
16
48
|
".txt",
|
|
49
|
+
".xml",
|
|
50
|
+
".xlsx",
|
|
17
51
|
".yaml",
|
|
18
52
|
".yml",
|
|
19
53
|
]);
|
|
@@ -34,7 +68,7 @@ export async function listSourceFiles(config) {
|
|
|
34
68
|
});
|
|
35
69
|
for (const absolutePath of entries) {
|
|
36
70
|
const extension = path.extname(absolutePath).toLowerCase();
|
|
37
|
-
if (!
|
|
71
|
+
if (!supportedExtensions(config).has(extension)) {
|
|
38
72
|
continue;
|
|
39
73
|
}
|
|
40
74
|
const info = await stat(absolutePath);
|
|
@@ -52,6 +86,9 @@ export async function listSourceFiles(config) {
|
|
|
52
86
|
}
|
|
53
87
|
return [...files.values()].sort((a, b) => a.relativePath.localeCompare(b.relativePath));
|
|
54
88
|
}
|
|
89
|
+
export function supportedExtensions(config) {
|
|
90
|
+
return new Set([...DEFAULT_SUPPORTED_EXTENSIONS, ...config.includeExtensions]);
|
|
91
|
+
}
|
|
55
92
|
async function sourceRoots(config) {
|
|
56
93
|
const roots = [config.rawDir];
|
|
57
94
|
if (!existsSync(config.sourcesFile)) {
|
package/dist/files.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"files.js","sourceRoot":"","sources":["../src/files.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,MAAM,WAAW,CAAA;AAG1B,MAAM,CAAC,MAAM,
|
|
1
|
+
{"version":3,"file":"files.js","sourceRoot":"","sources":["../src/files.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,MAAM,WAAW,CAAA;AAG1B,MAAM,CAAC,MAAM,4BAA4B,GAAG,IAAI,GAAG,CAAC;IAClD,OAAO;IACP,IAAI;IACJ,MAAM;IACN,OAAO;IACP,MAAM;IACN,KAAK;IACL,MAAM;IACN,MAAM;IACN,OAAO;IACP,KAAK;IACL,IAAI;IACJ,MAAM;IACN,OAAO;IACP,MAAM;IACN,OAAO;IACP,KAAK;IACL,OAAO;IACP,QAAQ;IACR,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;IACN,SAAS;IACT,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,aAAa;IACb,KAAK;IACL,KAAK;IACL,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,MAAM;CACP,CAAC,CAAA;AAEF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,MAAc;IAClD,MAAM,KAAK,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,CAAA;IACvC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAsB,CAAA;IAE3C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACtB,SAAQ;QACV,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE;YAC/B,GAAG,EAAE,IAAI;YACT,QAAQ,EAAE,IAAI;YACd,SAAS,EAAE,IAAI;YACf,GAAG,EAAE,KAAK;YACV,mBAAmB,EAAE,KAAK;YAC1B,MAAM,EAAE,CAAC,YAAY,EAAE,oBAAoB,EAAE,WAAW,EAAE,cAAc,CAAC;SAC1E,CAAC,CAAA;QAEF,KAAK,MAAM,YAAY,IAAI,OAAO,EAAE,CAAC;YACnC,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC,WAAW,EAAE,CAAA;YAC1D,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAChD,SAAQ;YACV,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,CAAA;YACrC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,YAAY,CAAC,CAAA;YAC3C,KAAK,CAAC,GAAG,CAAC,YAAY,EAAE;gBACtB,YAAY;gBACZ,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,YAAY,CAAC;gBAC7D,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,YAAY,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC;gBACxE,SAAS;gBACT,KAAK,EAAE,IAAI,CAAC,IAAI;gBAChB,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,QAAQ,EAAE,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC5D,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAA;AACzF,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,MAAc;IAChD,OAAO,IAAI,GAAG,CAAC,CAAC,GAAG,4BAA4B,EAAE,GAAG,MAAM,CAAC,iBAAiB,CAAC,CAAC,CAAA;AAChF,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,MAAc;IACvC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;IAC7B,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC;QACpC,OAAO,KAAK,CAAA;IACd,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAAA;IAC1D,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;QAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxC,SAAQ;QACV,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAA;IAC5F,CAAC;IAED,OAAO,KAAK,CAAA;AACd,CAAC"}
|
package/dist/gitignore.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export declare const MIMIR_GITIGNORE_ENTRIES:
|
|
1
|
+
export declare const MIMIR_GITIGNORE_ENTRIES: string[];
|
|
2
2
|
export declare function ensureMimirGitignore(cwd?: string): Promise<boolean>;
|
|
3
3
|
//# sourceMappingURL=gitignore.d.ts.map
|
package/dist/gitignore.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gitignore.d.ts","sourceRoot":"","sources":["../src/gitignore.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"gitignore.d.ts","sourceRoot":"","sources":["../src/gitignore.ts"],"names":[],"mappings":"AAUA,eAAO,MAAM,uBAAuB,UAQnC,CAAA;AAED,wBAAsB,oBAAoB,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyBhF"}
|
package/dist/gitignore.js
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import { readFile, writeFile } from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import { KB_GITIGNORE_ENTRY, MIMIR_GITIGNORE_ENTRY, PRIVATE_DIR, PRIVATE_GITIGNORE_ENTRY, } from "./defaults.js";
|
|
4
5
|
export const MIMIR_GITIGNORE_ENTRIES = [
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
KB_GITIGNORE_ENTRY,
|
|
7
|
+
MIMIR_GITIGNORE_ENTRY,
|
|
8
|
+
PRIVATE_GITIGNORE_ENTRY,
|
|
9
|
+
`!${PRIVATE_DIR}/`,
|
|
10
|
+
`!${PRIVATE_DIR}/README.md`,
|
|
11
|
+
`!${PRIVATE_DIR}/**/`,
|
|
12
|
+
`!${PRIVATE_DIR}/**/.gitkeep`,
|
|
12
13
|
];
|
|
13
14
|
export async function ensureMimirGitignore(cwd = process.cwd()) {
|
|
14
15
|
const root = path.resolve(cwd);
|
package/dist/gitignore.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gitignore.js","sourceRoot":"","sources":["../src/gitignore.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACtD,OAAO,IAAI,MAAM,WAAW,CAAA;
|
|
1
|
+
{"version":3,"file":"gitignore.js","sourceRoot":"","sources":["../src/gitignore.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACtD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EACL,kBAAkB,EAClB,qBAAqB,EACrB,WAAW,EACX,uBAAuB,GACxB,MAAM,eAAe,CAAA;AAEtB,MAAM,CAAC,MAAM,uBAAuB,GAAG;IACrC,kBAAkB;IAClB,qBAAqB;IACrB,uBAAuB;IACvB,IAAI,WAAW,GAAG;IAClB,IAAI,WAAW,YAAY;IAC3B,IAAI,WAAW,MAAM;IACrB,IAAI,WAAW,cAAc;CAC9B,CAAA;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAC5D,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC9B,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,YAAY,CAAC,CAAA;IACnD,MAAM,OAAO,GAAG,UAAU,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACtF,MAAM,YAAY,GAAG,IAAI,GAAG,CAC1B,OAAO;SACJ,KAAK,CAAC,OAAO,CAAC;SACd,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,OAAO,CAAC,CACnB,CAAA;IACD,MAAM,cAAc,GAAG,uBAAuB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAA;IAE1F,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,KAAK,CAAA;IACd,CAAC;IAED,MAAM,cAAc,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,eAAe,CAAC,CAAA;IACvF,MAAM,KAAK,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,EAAE,GAAG,cAAc,CAAC;SACtE,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,SAAS,CAAC;SACpC,IAAI,CAAC,IAAI,CAAC,CAAA;IACb,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,EAAE,CAAA;IAChC,MAAM,IAAI,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,IAAI,CAAA;IAEzD,MAAM,SAAS,CAAC,aAAa,EAAE,IAAI,EAAE,MAAM,CAAC,CAAA;IAC5C,OAAO,IAAI,CAAA;AACb,CAAC"}
|
package/dist/ingest.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EACV,WAAW,EACX,aAAa,EACb,YAAY,EAIb,MAAM,YAAY,CAAA;
|
|
1
|
+
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAQA,OAAO,KAAK,EACV,WAAW,EACX,aAAa,EACb,YAAY,EAIb,MAAM,YAAY,CAAA;AAKnB,wBAAsB,MAAM,CAAC,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,CAAC,CA4D/E;AAED,wBAAsB,KAAK,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,WAAW,CAAC,CAoCrE"}
|
package/dist/ingest.js
CHANGED
|
@@ -7,6 +7,7 @@ import { parseFile } from "./parsing.js";
|
|
|
7
7
|
import { redactText, totalRedactions } from "./redaction.js";
|
|
8
8
|
import { openRowsTable, writeRows } from "./store.js";
|
|
9
9
|
const EMBED_BATCH_SIZE = 32;
|
|
10
|
+
const MAX_AUDIT_ROWS = 100_000;
|
|
10
11
|
export async function ingest(options = {}) {
|
|
11
12
|
const config = await loadConfig(String(options.cwd ?? process.cwd()));
|
|
12
13
|
const files = await listSourceFiles(config);
|
|
@@ -72,7 +73,7 @@ export async function audit(cwd = process.cwd()) {
|
|
|
72
73
|
totalChunks: 0,
|
|
73
74
|
};
|
|
74
75
|
}
|
|
75
|
-
const rows = (await table.query().limit(
|
|
76
|
+
const rows = (await table.query().limit(MAX_AUDIT_ROWS).toArray());
|
|
76
77
|
const counts = new Map();
|
|
77
78
|
for (const row of rows) {
|
|
78
79
|
counts.set(row.relativePath, (counts.get(row.relativePath) ?? 0) + 1);
|
package/dist/ingest.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAC5D,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAUrD,MAAM,gBAAgB,GAAG,EAAE,CAAA;
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAC5D,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAUrD,MAAM,gBAAgB,GAAG,EAAE,CAAA;AAC3B,MAAM,cAAc,GAAG,OAAO,CAAA;AAE9B,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,UAAyB,EAAE;IACtD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAA;IAC3C,MAAM,SAAS,GAAgB,EAAE,CAAA;IACjC,MAAM,MAAM,GAA2B,EAAE,CAAA;IACzC,MAAM,eAAe,GAAqB,EAAE,CAAA;IAC5C,IAAI,YAAY,GAAG,CAAC,CAAA;IAEpB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAA;YACpC,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YAChD,eAAe,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAA;YACxC,MAAM,MAAM,GAAG,aAAa,CAC1B,EAAE,GAAG,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,EAClC,MAAM,CAAC,SAAS,EAChB,MAAM,CAAC,YAAY,CACpB,CAAA;YACD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,YAAY,IAAI,CAAC,CAAA;YACnB,CAAC;YACD,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAA;QAC3B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,IAAI,CAAC,YAAY;gBACvB,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAChE,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAgB,EAAE,CAAA;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,gBAAgB,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,CAAA;QACtD,MAAM,UAAU,GAAG,MAAM,UAAU,CACjC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,EAChC,MAAM,CACP,CAAA;QACD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAA;YAChC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,KAAK,CAAC,YAAY,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,CAAA;YAC3F,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,MAAM,EAAE,CAAC,CAAA;QACjC,CAAC;IACH,CAAC;IAED,MAAM,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IAC7B,MAAM,YAAY,CAAC,MAAM,EAAE;QACzB,MAAM,EAAE,QAAQ;QAChB,WAAW,EAAE,IAAI,CAAC,MAAM;QACxB,UAAU,EAAE,eAAe,CAAC,eAAe,CAAC;KAC7C,CAAC,CAAA;IAEF,OAAO;QACL,YAAY,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI;QAC/D,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,YAAY;QACZ,UAAU,EAAE,eAAe,CAAC,eAAe,CAAC;QAC5C,MAAM;KACP,CAAA;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAC7C,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;IACpC,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAA;IAC3C,MAAM,cAAc,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;IAC7D,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAA;IAEzC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO;YACL,YAAY,EAAE,EAAE;YAChB,cAAc;YACd,gBAAgB,EAAE,cAAc;YAChC,YAAY,EAAE,EAAE;YAChB,WAAW,EAAE,CAAC;SACf,CAAA;IACH,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,OAAO,EAAE,CAE/D,CAAA;IACF,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAA;IACxC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IACvE,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,CAAA;IAC5C,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAA;IAEzC,OAAO;QACL,YAAY,EAAE,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;aAChC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;aACtC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAClD,cAAc;QACd,gBAAgB,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACxE,YAAY,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE;QAC9E,WAAW,EAAE,IAAI,CAAC,MAAM;KACzB,CAAA;AACH,CAAC"}
|
package/dist/init.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"AAMA,wBAAsB,WAAW,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAwCxE"}
|
package/dist/init.js
CHANGED
|
@@ -1,36 +1,16 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import { mkdir, writeFile } from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import { CONFIG_PATH, DEFAULT_CONFIG, KB_DIR, PRIVATE_DIR } from "./defaults.js";
|
|
4
5
|
import { ensureMimirGitignore } from "./gitignore.js";
|
|
5
|
-
const DEFAULT_CONFIG = {
|
|
6
|
-
rawDir: "private",
|
|
7
|
-
storageDir: ".kb/storage",
|
|
8
|
-
sourcesFile: ".kb/sources.txt",
|
|
9
|
-
accessLogPath: ".kb/access.log",
|
|
10
|
-
tableName: "chunks",
|
|
11
|
-
ollamaHost: "http://localhost:11434",
|
|
12
|
-
networkPolicy: "local-only",
|
|
13
|
-
embedModel: "nomic-embed-text",
|
|
14
|
-
llmModel: "gemma4:latest",
|
|
15
|
-
redaction: {
|
|
16
|
-
enabled: true,
|
|
17
|
-
builtIn: true,
|
|
18
|
-
patterns: [],
|
|
19
|
-
},
|
|
20
|
-
accessLog: true,
|
|
21
|
-
mcpMaxTopK: 10,
|
|
22
|
-
topK: 5,
|
|
23
|
-
chunkSize: 1200,
|
|
24
|
-
chunkOverlap: 150,
|
|
25
|
-
};
|
|
26
6
|
export async function initProject(cwd = process.cwd()) {
|
|
27
7
|
const root = path.resolve(cwd);
|
|
28
|
-
const kbDir = path.join(root,
|
|
29
|
-
const privateDir = path.join(root,
|
|
8
|
+
const kbDir = path.join(root, KB_DIR);
|
|
9
|
+
const privateDir = path.join(root, PRIVATE_DIR);
|
|
30
10
|
const created = [];
|
|
31
11
|
await mkdir(kbDir, { recursive: true });
|
|
32
12
|
await mkdir(privateDir, { recursive: true });
|
|
33
|
-
const configPath = path.join(
|
|
13
|
+
const configPath = path.join(root, CONFIG_PATH);
|
|
34
14
|
if (!existsSync(configPath)) {
|
|
35
15
|
await writeFile(configPath, `${JSON.stringify(DEFAULT_CONFIG, null, 2)}\n`, "utf8");
|
|
36
16
|
created.push(path.relative(root, configPath));
|
package/dist/init.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.js","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"init.js","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAChF,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AAErD,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IACnD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAA;IAC/C,MAAM,OAAO,GAAa,EAAE,CAAA;IAE5B,MAAM,KAAK,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACvC,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE5C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAA;IAC/C,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5B,MAAM,SAAS,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QACnF,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAA;IAC/C,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACnD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,MAAM,SAAS,CACb,WAAW,EACX,8FAA8F,EAC9F,MAAM,CACP,CAAA;QACD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAA;IAChD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,CAAA;IACrD,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5B,MAAM,SAAS,CACb,UAAU,EACV,6FAA6F,EAC7F,MAAM,CACP,CAAA;QACD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAA;IAC/C,CAAC;IAED,IAAI,MAAM,oBAAoB,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;IAC5B,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC"}
|
package/dist/mcp.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"AAeA,wBAAsB,QAAQ,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CA6EjE"}
|
package/dist/mcp.js
CHANGED
|
@@ -7,6 +7,10 @@ import { ask, search } from "./query.js";
|
|
|
7
7
|
import { securityAudit } from "./security.js";
|
|
8
8
|
import { countRows } from "./store.js";
|
|
9
9
|
import { VERSION } from "./version.js";
|
|
10
|
+
const queryToolInputSchema = z.object({
|
|
11
|
+
query: z.string().min(1),
|
|
12
|
+
topK: z.number().int().positive().optional(),
|
|
13
|
+
});
|
|
10
14
|
export async function serveMcp(cwd = process.cwd()) {
|
|
11
15
|
const server = new McpServer({
|
|
12
16
|
name: "mimir",
|
|
@@ -24,11 +28,14 @@ export async function serveMcp(cwd = process.cwd()) {
|
|
|
24
28
|
rawDir: config.rawDir,
|
|
25
29
|
storageDir: config.storageDir,
|
|
26
30
|
sourcesFile: config.sourcesFile,
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
31
|
+
embeddingProvider: config.embeddingProvider,
|
|
32
|
+
embeddingModel: config.embeddingModel,
|
|
33
|
+
embeddingModelPath: config.embeddingModelPath,
|
|
34
|
+
transformersAllowRemoteModels: config.transformersAllowRemoteModels,
|
|
35
|
+
llmGeneration: false,
|
|
30
36
|
redactionEnabled: config.redaction.enabled,
|
|
31
37
|
mcpMaxTopK: config.mcpMaxTopK,
|
|
38
|
+
includeExtensions: config.includeExtensions,
|
|
32
39
|
chunksIndexed,
|
|
33
40
|
};
|
|
34
41
|
return textResult(output);
|
|
@@ -36,18 +43,12 @@ export async function serveMcp(cwd = process.cwd()) {
|
|
|
36
43
|
server.registerTool("mimir_search", {
|
|
37
44
|
title: "Mimir Search",
|
|
38
45
|
description: "Retrieve relevant passages from the local Mimir knowledge base.",
|
|
39
|
-
inputSchema:
|
|
40
|
-
query: z.string().min(1),
|
|
41
|
-
topK: z.number().int().positive().optional(),
|
|
42
|
-
}),
|
|
46
|
+
inputSchema: queryToolInputSchema,
|
|
43
47
|
}, async ({ query, topK }) => textResult(await search(query, await searchOptions(cwd, topK))));
|
|
44
48
|
server.registerTool("mimir_ask", {
|
|
45
49
|
title: "Mimir Ask",
|
|
46
|
-
description: "
|
|
47
|
-
inputSchema:
|
|
48
|
-
query: z.string().min(1),
|
|
49
|
-
topK: z.number().int().positive().optional(),
|
|
50
|
-
}),
|
|
50
|
+
description: "Return cited retrieval context for a question without calling an LLM.",
|
|
51
|
+
inputSchema: queryToolInputSchema,
|
|
51
52
|
}, async ({ query, topK }) => textResult(await ask(query, await searchOptions(cwd, topK))));
|
|
52
53
|
server.registerTool("mimir_audit", {
|
|
53
54
|
title: "Mimir Audit",
|
|
@@ -56,7 +57,7 @@ export async function serveMcp(cwd = process.cwd()) {
|
|
|
56
57
|
}, async () => textResult(await audit(cwd)));
|
|
57
58
|
server.registerTool("mimir_security_audit", {
|
|
58
59
|
title: "Mimir Security Audit",
|
|
59
|
-
description: "Show local privacy,
|
|
60
|
+
description: "Show local privacy, provider, redaction, MCP, and gitignore posture.",
|
|
60
61
|
inputSchema: z.object({}),
|
|
61
62
|
}, async () => textResult(await securityAudit(cwd)));
|
|
62
63
|
await server.connect(new StdioServerTransport());
|
package/dist/mcp.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp.js","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AACvB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,YAAY,CAAA;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAEtC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAChD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO;KACjB,CAAC,CAAA;IAEF,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,0DAA0D;QACvE,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE;QACT,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;QACpC,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG;YACb,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,
|
|
1
|
+
{"version":3,"file":"mcp.js","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AACvB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,YAAY,CAAA;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAEtC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CAC7C,CAAC,CAAA;AAEF,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAChD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO;KACjB,CAAC,CAAA;IAEF,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,0DAA0D;QACvE,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE;QACT,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;QACpC,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG;YACb,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,kBAAkB,EAAE,MAAM,CAAC,kBAAkB;YAC7C,6BAA6B,EAAE,MAAM,CAAC,6BAA6B;YACnE,aAAa,EAAE,KAAK;YACpB,gBAAgB,EAAE,MAAM,CAAC,SAAS,CAAC,OAAO;YAC1C,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,aAAa;SACd,CAAA;QAED,OAAO,UAAU,CAAC,MAAM,CAAC,CAAA;IAC3B,CAAC,CACF,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,iEAAiE;QAC9E,WAAW,EAAE,oBAAoB;KAClC,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,MAAM,MAAM,CAAC,KAAK,EAAE,MAAM,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CAC3F,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,WAAW,EACX;QACE,KAAK,EAAE,WAAW;QAClB,WAAW,EAAE,uEAAuE;QACpF,WAAW,EAAE,oBAAoB;KAClC,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,KAAK,EAAE,MAAM,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CACxF,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,aAAa,EACb;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,uEAAuE;QACpF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC,CACzC,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,sBAAsB,EACtB;QACE,KAAK,EAAE,sBAAsB;QAC7B,WAAW,EAAE,sEAAsE;QACnF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC,CACjD,CAAA;IAED,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,oBAAoB,EAAE,CAAC,CAAA;AAClD,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;aACrC;SACF;KACF,CAAA;AACH,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,GAAW,EACX,IAAwB;IAExB,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;IACpC,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,CAAC,CAAA;IACpE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;AACnC,CAAC"}
|
package/dist/parsing.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parsing.d.ts","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parsing.d.ts","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAI5D,wBAAsB,SAAS,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,cAAc,CAAC,CA8CzE"}
|
package/dist/parsing.js
CHANGED
|
@@ -1,13 +1,29 @@
|
|
|
1
1
|
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { strFromU8, unzipSync } from "fflate";
|
|
2
3
|
import { htmlToText } from "html-to-text";
|
|
3
4
|
import { extractText, getDocumentProxy } from "unpdf";
|
|
4
5
|
import YAML from "yaml";
|
|
6
|
+
const MAX_OFFICE_XML_ENTRY_BYTES = 25_000_000;
|
|
5
7
|
export async function parseFile(file) {
|
|
6
8
|
let text;
|
|
7
9
|
switch (file.extension) {
|
|
8
10
|
case ".pdf":
|
|
9
11
|
text = await parsePdf(file.absolutePath);
|
|
10
12
|
break;
|
|
13
|
+
case ".docx":
|
|
14
|
+
text = await parseDocx(file.absolutePath);
|
|
15
|
+
break;
|
|
16
|
+
case ".pptx":
|
|
17
|
+
text = await parsePptx(file.absolutePath);
|
|
18
|
+
break;
|
|
19
|
+
case ".xlsx":
|
|
20
|
+
text = await parseXlsx(file.absolutePath);
|
|
21
|
+
break;
|
|
22
|
+
case ".odt":
|
|
23
|
+
case ".ods":
|
|
24
|
+
case ".odp":
|
|
25
|
+
text = await parseOpenDocument(file.absolutePath);
|
|
26
|
+
break;
|
|
11
27
|
case ".html":
|
|
12
28
|
case ".htm":
|
|
13
29
|
text = htmlToText(await readFile(file.absolutePath, "utf8"), {
|
|
@@ -25,11 +41,133 @@ export async function parseFile(file) {
|
|
|
25
41
|
case ".yml":
|
|
26
42
|
text = YAML.stringify(YAML.parse(await readFile(file.absolutePath, "utf8")));
|
|
27
43
|
break;
|
|
44
|
+
case ".rtf":
|
|
45
|
+
text = stripRtf(await readFile(file.absolutePath, "utf8"));
|
|
46
|
+
break;
|
|
28
47
|
default:
|
|
29
48
|
text = await readFile(file.absolutePath, "utf8");
|
|
30
49
|
}
|
|
31
50
|
return { file, text: normalizeText(text) };
|
|
32
51
|
}
|
|
52
|
+
async function parseDocx(filePath) {
|
|
53
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
54
|
+
return xmlEntriesToText(entries, [
|
|
55
|
+
/^word\/document\.xml$/u,
|
|
56
|
+
/^word\/header\d*\.xml$/u,
|
|
57
|
+
/^word\/footer\d*\.xml$/u,
|
|
58
|
+
/^word\/footnotes\.xml$/u,
|
|
59
|
+
/^word\/endnotes\.xml$/u,
|
|
60
|
+
/^word\/comments\.xml$/u,
|
|
61
|
+
]);
|
|
62
|
+
}
|
|
63
|
+
async function parsePptx(filePath) {
|
|
64
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
65
|
+
return xmlEntriesToText(entries, [
|
|
66
|
+
/^ppt\/slides\/slide\d+\.xml$/u,
|
|
67
|
+
/^ppt\/notesSlides\/notesSlide\d+\.xml$/u,
|
|
68
|
+
]);
|
|
69
|
+
}
|
|
70
|
+
async function parseXlsx(filePath) {
|
|
71
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
72
|
+
const sharedStrings = parseSharedStrings(entries.get("xl/sharedStrings.xml") ?? "");
|
|
73
|
+
const sheets = [...entries.entries()]
|
|
74
|
+
.filter(([name]) => /^xl\/worksheets\/sheet\d+\.xml$/u.test(name))
|
|
75
|
+
.sort(([a], [b]) => a.localeCompare(b));
|
|
76
|
+
const rows = [];
|
|
77
|
+
for (const [name, xml] of sheets) {
|
|
78
|
+
const values = parseSheetValues(xml, sharedStrings);
|
|
79
|
+
if (values.length > 0) {
|
|
80
|
+
rows.push(`# ${name}`, values.join("\n"));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return rows.join("\n\n");
|
|
84
|
+
}
|
|
85
|
+
async function parseOpenDocument(filePath) {
|
|
86
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
87
|
+
return xmlEntriesToText(entries, [/^content\.xml$/u, /^meta\.xml$/u]);
|
|
88
|
+
}
|
|
89
|
+
function unzipOfficeFile(buffer) {
|
|
90
|
+
const unzipped = unzipSync(new Uint8Array(buffer), {
|
|
91
|
+
filter: (file) => file.originalSize <= MAX_OFFICE_XML_ENTRY_BYTES,
|
|
92
|
+
});
|
|
93
|
+
const entries = new Map();
|
|
94
|
+
for (const [name, content] of Object.entries(unzipped)) {
|
|
95
|
+
if (name.endsWith(".xml")) {
|
|
96
|
+
entries.set(name, strFromU8(content));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return entries;
|
|
100
|
+
}
|
|
101
|
+
function xmlEntriesToText(entries, patterns) {
|
|
102
|
+
const parts = [];
|
|
103
|
+
for (const [name, xml] of [...entries.entries()].sort(([a], [b]) => a.localeCompare(b))) {
|
|
104
|
+
if (patterns.some((pattern) => pattern.test(name))) {
|
|
105
|
+
const text = xmlToText(xml);
|
|
106
|
+
if (text) {
|
|
107
|
+
parts.push(text);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return parts.join("\n\n");
|
|
112
|
+
}
|
|
113
|
+
function parseSharedStrings(xml) {
|
|
114
|
+
return [...xml.matchAll(/<si\b[\s\S]*?<\/si>/gu)].map(([item]) => xmlToText(item));
|
|
115
|
+
}
|
|
116
|
+
function parseSheetValues(xml, sharedStrings) {
|
|
117
|
+
const rows = [];
|
|
118
|
+
for (const rowMatch of xml.matchAll(/<row\b[\s\S]*?<\/row>/gu)) {
|
|
119
|
+
const rowXml = rowMatch[0];
|
|
120
|
+
const values = [...rowXml.matchAll(/<c\b([^>]*)>([\s\S]*?)<\/c>/gu)]
|
|
121
|
+
.map((cellMatch) => {
|
|
122
|
+
const attrs = cellMatch[1] ?? "";
|
|
123
|
+
const cellXml = cellMatch[2] ?? "";
|
|
124
|
+
const inline = firstMatch(cellXml, /<is\b[\s\S]*?<\/is>/u);
|
|
125
|
+
if (inline) {
|
|
126
|
+
return xmlToText(inline);
|
|
127
|
+
}
|
|
128
|
+
const rawValue = firstMatch(cellXml, /<v>([\s\S]*?)<\/v>/u);
|
|
129
|
+
if (!rawValue) {
|
|
130
|
+
return "";
|
|
131
|
+
}
|
|
132
|
+
if (/\bt="s"/u.test(attrs)) {
|
|
133
|
+
return sharedStrings[Number.parseInt(rawValue, 10)] ?? "";
|
|
134
|
+
}
|
|
135
|
+
return decodeXmlEntities(rawValue);
|
|
136
|
+
})
|
|
137
|
+
.filter(Boolean);
|
|
138
|
+
if (values.length > 0) {
|
|
139
|
+
rows.push(values.join("\t"));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return rows;
|
|
143
|
+
}
|
|
144
|
+
function firstMatch(input, pattern) {
|
|
145
|
+
const match = input.match(pattern);
|
|
146
|
+
return match?.[1] ?? match?.[0] ?? "";
|
|
147
|
+
}
|
|
148
|
+
function xmlToText(xml) {
|
|
149
|
+
return normalizeText(decodeXmlEntities(xml
|
|
150
|
+
.replace(/<w:tab\/>/gu, " ")
|
|
151
|
+
.replace(/<w:br\/>/gu, "\n")
|
|
152
|
+
.replace(/<\/(?:w:p|a:p|text:p|text:h|table:table-row)>/gu, "\n")
|
|
153
|
+
.replace(/<[^>]+>/gu, " ")
|
|
154
|
+
.replace(/[ \t]{2,}/gu, " ")));
|
|
155
|
+
}
|
|
156
|
+
function stripRtf(input) {
|
|
157
|
+
return input
|
|
158
|
+
.replace(/\\par[d]?/gu, "\n")
|
|
159
|
+
.replace(/\\'[0-9a-fA-F]{2}/gu, " ")
|
|
160
|
+
.replace(/\\[a-zA-Z]+-?\d* ?/gu, " ")
|
|
161
|
+
.replace(/[{}]/gu, " ");
|
|
162
|
+
}
|
|
163
|
+
function decodeXmlEntities(input) {
|
|
164
|
+
return input
|
|
165
|
+
.replace(/</gu, "<")
|
|
166
|
+
.replace(/>/gu, ">")
|
|
167
|
+
.replace(/"/gu, '"')
|
|
168
|
+
.replace(/'/gu, "'")
|
|
169
|
+
.replace(/&/gu, "&");
|
|
170
|
+
}
|
|
33
171
|
async function parsePdf(filePath) {
|
|
34
172
|
const buffer = await readFile(filePath);
|
|
35
173
|
const pdf = await getDocumentProxy(new Uint8Array(buffer));
|
package/dist/parsing.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parsing.js","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,OAAO,CAAA;AACrD,OAAO,IAAI,MAAM,MAAM,CAAA;AAGvB,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAgB;IAC9C,IAAI,IAAY,CAAA;IAEhB,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,KAAK,MAAM;YACT,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACxC,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,UAAU,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,EAAE;gBAC3D,QAAQ,EAAE,KAAK;gBACf,SAAS,EAAE;oBACT,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE;oBAChD,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE;iBACpC;aACF,CAAC,CAAA;YACF,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAA;YACrF,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC,CAAA;YAC5E,MAAK;QACP;YACE,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IACpD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAA;AAC5C,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,QAAgB;IACtC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAA;IACvC,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAA;IAC1D,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAA;IAC3D,OAAO,MAAM,CAAC,IAAI,CAAA;AACpB,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK;SACT,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC;SAC5B,IAAI,EAAE,CAAA;AACX,CAAC"}
|
|
1
|
+
{"version":3,"file":"parsing.js","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC3C,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,OAAO,CAAA;AACrD,OAAO,IAAI,MAAM,MAAM,CAAA;AAGvB,MAAM,0BAA0B,GAAG,UAAU,CAAA;AAE7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAgB;IAC9C,IAAI,IAAY,CAAA;IAEhB,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,KAAK,MAAM;YACT,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACxC,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACzC,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACzC,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACzC,MAAK;QACP,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM;YACT,IAAI,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACjD,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,UAAU,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,EAAE;gBAC3D,QAAQ,EAAE,KAAK;gBACf,SAAS,EAAE;oBACT,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE;oBAChD,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE;iBACpC;aACF,CAAC,CAAA;YACF,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAA;YACrF,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC,CAAA;YAC5E,MAAK;QACP,KAAK,MAAM;YACT,IAAI,GAAG,QAAQ,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;YAC1D,MAAK;QACP;YACE,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IACpD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAA;AAC5C,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,OAAO,gBAAgB,CAAC,OAAO,EAAE;QAC/B,wBAAwB;QACxB,yBAAyB;QACzB,yBAAyB;QACzB,yBAAyB;QACzB,wBAAwB;QACxB,wBAAwB;KACzB,CAAC,CAAA;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,OAAO,gBAAgB,CAAC,OAAO,EAAE;QAC/B,+BAA+B;QAC/B,yCAAyC;KAC1C,CAAC,CAAA;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,MAAM,aAAa,GAAG,kBAAkB,CAAC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAA;IACnF,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;SAClC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,kCAAkC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACjE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAA;IAEzC,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAA;QACnD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC3C,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;AAC1B,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,QAAgB;IAC/C,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,OAAO,gBAAgB,CAAC,OAAO,EAAE,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC,CAAA;AACvE,CAAC;AAED,SAAS,eAAe,CAAC,MAAc;IACrC,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE;QACjD,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,IAAI,0BAA0B;KAClE,CAAC,CAAA;IACF,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAA;IACzC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,OAAO,CAAC,CAAC,CAAA;QACvC,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,SAAS,gBAAgB,CAAC,OAA4B,EAAE,QAAkB;IACxE,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxF,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,CAAA;YAC3B,IAAI,IAAI,EAAE,CAAC;gBACT,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;AAC3B,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,OAAO,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,uBAAuB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAA;AACpF,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW,EAAE,aAAuB;IAC5D,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,KAAK,MAAM,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,yBAAyB,CAAC,EAAE,CAAC;QAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAA;QAC1B,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,+BAA+B,CAAC,CAAC;aACjE,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;YACjB,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAChC,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAClC,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAA;YAC1D,IAAI,MAAM,EAAE,CAAC;gBACX,OAAO,SAAS,CAAC,MAAM,CAAC,CAAA;YAC1B,CAAC;YAED,MAAM,QAAQ,GAAG,UAAU,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAA;YAC3D,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,EAAE,CAAA;YACX,CAAC;YAED,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC3B,OAAO,aAAa,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YAC3D,CAAC;YACD,OAAO,iBAAiB,CAAC,QAAQ,CAAC,CAAA;QACpC,CAAC,CAAC;aACD,MAAM,CAAC,OAAO,CAAC,CAAA;QAElB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC9B,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,UAAU,CAAC,KAAa,EAAE,OAAe;IAChD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;IAClC,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;AACvC,CAAC;AAED,SAAS,SAAS,CAAC,GAAW;IAC5B,OAAO,aAAa,CAClB,iBAAiB,CACf,GAAG;SACA,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC;SAC3B,OAAO,CAAC,iDAAiD,EAAE,IAAI,CAAC;SAChE,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAC/B,CACF,CAAA;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,IAAI,CAAC;SAC5B,OAAO,CAAC,qBAAqB,EAAE,GAAG,CAAC;SACnC,OAAO,CAAC,sBAAsB,EAAE,GAAG,CAAC;SACpC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAA;AAC3B,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAa;IACtC,OAAO,KAAK;SACT,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAA;AAC5B,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,QAAgB;IACtC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAA;IACvC,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAA;IAC1D,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAA;IAC3D,OAAO,MAAM,CAAC,IAAI,CAAA;AACpB,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK;SACT,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC;SAC5B,IAAI,EAAE,CAAA;AACX,CAAC"}
|
package/dist/query.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAUxE,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CA2BhG;AAED,wBAAsB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,SAAS,CAAC,CAsBxF"}
|
package/dist/query.js
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import { Ollama } from "ollama";
|
|
2
1
|
import { recordAccess } from "./access-log.js";
|
|
3
2
|
import { loadConfig } from "./config.js";
|
|
4
3
|
import { embedText } from "./embeddings.js";
|
|
5
|
-
import { assertNetworkPolicy } from "./network.js";
|
|
6
4
|
import { openRowsTable } from "./store.js";
|
|
7
5
|
export async function search(query, options = {}) {
|
|
8
6
|
const config = await loadConfig(String(options.cwd ?? process.cwd()));
|
|
@@ -39,25 +37,6 @@ export async function ask(query, options = {}) {
|
|
|
39
37
|
sources,
|
|
40
38
|
};
|
|
41
39
|
}
|
|
42
|
-
const context = sources
|
|
43
|
-
.map((source, index) => `[${index + 1}] ${source.relativePath}#${source.chunkIndex}\n${source.text}`)
|
|
44
|
-
.join("\n\n---\n\n");
|
|
45
|
-
assertNetworkPolicy(config);
|
|
46
|
-
const client = new Ollama({ host: config.ollamaHost });
|
|
47
|
-
const response = await client.chat({
|
|
48
|
-
model: config.llmModel,
|
|
49
|
-
messages: [
|
|
50
|
-
{
|
|
51
|
-
role: "system",
|
|
52
|
-
content: "Answer only from the provided context. If the context is insufficient, say what is missing. Cite sources with [1], [2], etc.",
|
|
53
|
-
},
|
|
54
|
-
{
|
|
55
|
-
role: "user",
|
|
56
|
-
content: `Question:\n${query}\n\nContext:\n${context}`,
|
|
57
|
-
},
|
|
58
|
-
],
|
|
59
|
-
stream: false,
|
|
60
|
-
});
|
|
61
40
|
await recordAccess(config, {
|
|
62
41
|
action: "ask",
|
|
63
42
|
query,
|
|
@@ -65,8 +44,21 @@ export async function ask(query, options = {}) {
|
|
|
65
44
|
resultCount: sources.length,
|
|
66
45
|
});
|
|
67
46
|
return {
|
|
68
|
-
answer:
|
|
47
|
+
answer: retrievalOnlyAnswer(sources),
|
|
69
48
|
sources,
|
|
70
49
|
};
|
|
71
50
|
}
|
|
51
|
+
function retrievalOnlyAnswer(sources) {
|
|
52
|
+
const snippets = sources
|
|
53
|
+
.map((source, index) => {
|
|
54
|
+
const text = source.text.replace(/\s+/gu, " ").trim();
|
|
55
|
+
return `[${index + 1}] ${source.relativePath}#${source.chunkIndex}: ${text}`;
|
|
56
|
+
})
|
|
57
|
+
.join("\n\n");
|
|
58
|
+
return [
|
|
59
|
+
"Mimir returns retrieval context only. Use these passages as grounded context for your agent or LLM:",
|
|
60
|
+
"",
|
|
61
|
+
snippets,
|
|
62
|
+
].join("\n");
|
|
63
|
+
}
|
|
72
64
|
//# sourceMappingURL=query.js.map
|
package/dist/query.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"query.js","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"query.js","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAC3C,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAW1C,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;IACrE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,EAAE,CAAA;IACX,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;IAC7C,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK;SACtB,YAAY,CAAC,MAAM,CAAC;SACpB,KAAK,CAAC,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC;SAClC,OAAO,EAAE,CAAgB,CAAA;IAE5B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACjC,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI;KACnE,CAAC,CAAC,CAAA;IACH,MAAM,YAAY,CAAC,MAAM,EAAE;QACzB,MAAM,EAAE,QAAQ;QAChB,KAAK;QACL,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI;QACjC,WAAW,EAAE,OAAO,CAAC,MAAM;KAC5B,CAAC,CAAA;IACF,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,KAAa,EAAE,UAAyB,EAAE;IAClE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;IAE5C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,MAAM,EAAE,2EAA2E;YACnF,OAAO;SACR,CAAA;IACH,CAAC;IAED,MAAM,YAAY,CAAC,MAAM,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,KAAK;QACL,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI;QACjC,WAAW,EAAE,OAAO,CAAC,MAAM;KAC5B,CAAC,CAAA;IAEF,OAAO;QACL,MAAM,EAAE,mBAAmB,CAAC,OAAO,CAAC;QACpC,OAAO;KACR,CAAA;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAuB;IAClD,MAAM,QAAQ,GAAG,OAAO;SACrB,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;QACrB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QACrD,OAAO,IAAI,KAAK,GAAG,CAAC,KAAK,MAAM,CAAC,YAAY,IAAI,MAAM,CAAC,UAAU,KAAK,IAAI,EAAE,CAAA;IAC9E,CAAC,CAAC;SACD,IAAI,CAAC,MAAM,CAAC,CAAA;IAEf,OAAO;QACL,qGAAqG;QACrG,EAAE;QACF,QAAQ;KACT,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACd,CAAC"}
|