@jcode.labs/mimir 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/CONTRIBUTING.md +28 -0
- package/README.md +307 -32
- package/SECURITY-HARDENING.md +194 -0
- package/SECURITY.md +21 -0
- package/dist/access-log.d.ts +10 -0
- package/dist/access-log.d.ts.map +1 -0
- package/dist/access-log.js +29 -0
- package/dist/access-log.js.map +1 -0
- package/dist/chunking.d.ts.map +1 -1
- package/dist/chunking.js +6 -3
- package/dist/chunking.js.map +1 -1
- package/dist/cli.js +151 -5
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +83 -20
- package/dist/config.js.map +1 -1
- package/dist/defaults.d.ts +11 -0
- package/dist/defaults.d.ts.map +1 -0
- package/dist/defaults.js +31 -0
- package/dist/defaults.js.map +1 -0
- package/dist/destroy.d.ts +3 -0
- package/dist/destroy.d.ts.map +1 -0
- package/dist/destroy.js +16 -0
- package/dist/destroy.js.map +1 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +85 -9
- package/dist/embeddings.js.map +1 -1
- package/dist/files.d.ts +2 -1
- package/dist/files.d.ts.map +1 -1
- package/dist/files.js +40 -3
- package/dist/files.js.map +1 -1
- package/dist/gitignore.d.ts +1 -1
- package/dist/gitignore.d.ts.map +1 -1
- package/dist/gitignore.js +8 -7
- package/dist/gitignore.js.map +1 -1
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/ingest.d.ts.map +1 -1
- package/dist/ingest.js +14 -2
- package/dist/ingest.js.map +1 -1
- package/dist/init.d.ts.map +1 -1
- package/dist/init.js +4 -15
- package/dist/init.js.map +1 -1
- package/dist/mcp.d.ts.map +1 -1
- package/dist/mcp.js +27 -15
- package/dist/mcp.js.map +1 -1
- package/dist/parsing.d.ts.map +1 -1
- package/dist/parsing.js +138 -0
- package/dist/parsing.js.map +1 -1
- package/dist/query.d.ts.map +1 -1
- package/dist/query.js +28 -20
- package/dist/query.js.map +1 -1
- package/dist/redaction.d.ts +7 -0
- package/dist/redaction.d.ts.map +1 -0
- package/dist/redaction.js +63 -0
- package/dist/redaction.js.map +1 -0
- package/dist/security.d.ts +3 -0
- package/dist/security.d.ts.map +1 -0
- package/dist/security.js +84 -0
- package/dist/security.js.map +1 -0
- package/dist/skill.d.ts +2 -1
- package/dist/skill.d.ts.map +1 -1
- package/dist/skill.js +24 -9
- package/dist/skill.js.map +1 -1
- package/dist/store.d.ts.map +1 -1
- package/dist/store.js +2 -1
- package/dist/store.js.map +1 -1
- package/dist/types.d.ts +68 -3
- package/dist/types.d.ts.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/examples/sovereign-rag-demo/.kb/config.json +22 -0
- package/examples/sovereign-rag-demo/.kb/sources.txt +2 -0
- package/examples/sovereign-rag-demo/README.md +80 -0
- package/examples/sovereign-rag-demo/raw/dataset-inventory.csv +5 -0
- package/examples/sovereign-rag-demo/raw/incident-timeline.jsonl +4 -0
- package/examples/sovereign-rag-demo/raw/operations-brief.md +16 -0
- package/examples/sovereign-rag-demo/raw/review-notes.evidence +11 -0
- package/examples/sovereign-rag-demo/raw/security-policy.yaml +14 -0
- package/package.json +28 -25
- package/skills/mimir/SKILL.md +77 -6
- package/skills/mimir-audio-summary/SKILL.md +134 -0
- package/skills/mimir-audio-summary/forge-voice.sh +153 -0
- package/skills/mimir-audio-summary/split-lines.py +13 -0
- package/skills/mimir-audio-summary/xtts-voice.py +46 -0
package/dist/ingest.js
CHANGED
|
@@ -1,20 +1,26 @@
|
|
|
1
|
+
import { recordAccess } from "./access-log.js";
|
|
1
2
|
import { chunkDocument } from "./chunking.js";
|
|
2
3
|
import { loadConfig } from "./config.js";
|
|
3
4
|
import { embedTexts } from "./embeddings.js";
|
|
4
5
|
import { listSourceFiles } from "./files.js";
|
|
5
6
|
import { parseFile } from "./parsing.js";
|
|
7
|
+
import { redactText, totalRedactions } from "./redaction.js";
|
|
6
8
|
import { openRowsTable, writeRows } from "./store.js";
|
|
7
9
|
const EMBED_BATCH_SIZE = 32;
|
|
10
|
+
const MAX_AUDIT_ROWS = 100_000;
|
|
8
11
|
export async function ingest(options = {}) {
|
|
9
12
|
const config = await loadConfig(String(options.cwd ?? process.cwd()));
|
|
10
13
|
const files = await listSourceFiles(config);
|
|
11
14
|
const allChunks = [];
|
|
12
15
|
const errors = [];
|
|
16
|
+
const redactionCounts = [];
|
|
13
17
|
let skippedFiles = 0;
|
|
14
18
|
for (const file of files) {
|
|
15
19
|
try {
|
|
16
20
|
const parsed = await parseFile(file);
|
|
17
|
-
const
|
|
21
|
+
const redacted = redactText(parsed.text, config);
|
|
22
|
+
redactionCounts.push(...redacted.counts);
|
|
23
|
+
const chunks = chunkDocument({ ...parsed, text: redacted.text }, config.chunkSize, config.chunkOverlap);
|
|
18
24
|
if (chunks.length === 0) {
|
|
19
25
|
skippedFiles += 1;
|
|
20
26
|
}
|
|
@@ -40,10 +46,16 @@ export async function ingest(options = {}) {
|
|
|
40
46
|
}
|
|
41
47
|
}
|
|
42
48
|
await writeRows(rows, config);
|
|
49
|
+
await recordAccess(config, {
|
|
50
|
+
action: "ingest",
|
|
51
|
+
resultCount: rows.length,
|
|
52
|
+
redactions: totalRedactions(redactionCounts),
|
|
53
|
+
});
|
|
43
54
|
return {
|
|
44
55
|
indexedFiles: new Set(rows.map((row) => row.relativePath)).size,
|
|
45
56
|
chunks: rows.length,
|
|
46
57
|
skippedFiles,
|
|
58
|
+
redactions: totalRedactions(redactionCounts),
|
|
47
59
|
errors,
|
|
48
60
|
};
|
|
49
61
|
}
|
|
@@ -61,7 +73,7 @@ export async function audit(cwd = process.cwd()) {
|
|
|
61
73
|
totalChunks: 0,
|
|
62
74
|
};
|
|
63
75
|
}
|
|
64
|
-
const rows = (await table.query().limit(
|
|
76
|
+
const rows = (await table.query().limit(MAX_AUDIT_ROWS).toArray());
|
|
65
77
|
const counts = new Map();
|
|
66
78
|
for (const row of rows) {
|
|
67
79
|
counts.set(row.relativePath, (counts.get(row.relativePath) ?? 0) + 1);
|
package/dist/ingest.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAA;AACxC,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAC5C,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAA;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAA;AACxC,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAA;AAC5D,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AAUrD,MAAM,gBAAgB,GAAG,EAAE,CAAA;AAC3B,MAAM,cAAc,GAAG,OAAO,CAAA;AAE9B,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,UAAyB,EAAE;IACtD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAA;IAC3C,MAAM,SAAS,GAAgB,EAAE,CAAA;IACjC,MAAM,MAAM,GAA2B,EAAE,CAAA;IACzC,MAAM,eAAe,GAAqB,EAAE,CAAA;IAC5C,IAAI,YAAY,GAAG,CAAC,CAAA;IAEpB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAA;YACpC,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YAChD,eAAe,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAA;YACxC,MAAM,MAAM,GAAG,aAAa,CAC1B,EAAE,GAAG,MAAM,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,EAClC,MAAM,CAAC,SAAS,EAChB,MAAM,CAAC,YAAY,CACpB,CAAA;YACD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,YAAY,IAAI,CAAC,CAAA;YACnB,CAAC;YACD,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAA;QAC3B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,IAAI,CAAC,YAAY;gBACvB,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAChE,CAAC,CAAA;QACJ,CAAC;IACH,CAAC;IAED,MAAM,IAAI,GAAgB,EAAE,CAAA;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,gBAAgB,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,CAAA;QACtD,MAAM,UAAU,GAAG,MAAM,UAAU,CACjC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,EAChC,MAAM,CACP,CAAA;QACD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YAC7C,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAA;YAChC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,KAAK,CAAC,YAAY,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC,CAAA;YAC3F,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,MAAM,EAAE,CAAC,CAAA;QACjC,CAAC;IACH,CAAC;IAED,MAAM,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IAC7B,MAAM,YAAY,CAAC,MAAM,EAAE;QACzB,MAAM,EAAE,QAAQ;QAChB,WAAW,EAAE,IAAI,CAAC,MAAM;QACxB,UAAU,EAAE,eAAe,CAAC,eAAe,CAAC;KAC7C,CAAC,CAAA;IAEF,OAAO;QACL,YAAY,EAAE,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI;QAC/D,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,YAAY;QACZ,UAAU,EAAE,eAAe,CAAC,eAAe,CAAC;QAC5C,MAAM;KACP,CAAA;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAC7C,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;IACpC,MAAM,KAAK,GAAG,MAAM,eAAe,CAAC,MAAM,CAAC,CAAA;IAC3C,MAAM,cAAc,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;IAC7D,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAA;IAEzC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO;YACL,YAAY,EAAE,EAAE;YAChB,cAAc;YACd,gBAAgB,EAAE,cAAc;YAChC,YAAY,EAAE,EAAE;YAChB,WAAW,EAAE,CAAC;SACf,CAAA;IACH,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,OAAO,EAAE,CAE/D,CAAA;IACF,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAA;IACxC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;IACvE,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,CAAA;IAC5C,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAA;IAEzC,OAAO;QACL,YAAY,EAAE,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;aAChC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;aACtC,GAAG,CAAC,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAClD,cAAc;QACd,gBAAgB,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACxE,YAAY,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE;QAC9E,WAAW,EAAE,IAAI,CAAC,MAAM;KACzB,CAAA;AACH,CAAC"}
|
package/dist/init.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"AAMA,wBAAsB,WAAW,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAwCxE"}
|
package/dist/init.js
CHANGED
|
@@ -1,27 +1,16 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import { mkdir, writeFile } from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import { CONFIG_PATH, DEFAULT_CONFIG, KB_DIR, PRIVATE_DIR } from "./defaults.js";
|
|
4
5
|
import { ensureMimirGitignore } from "./gitignore.js";
|
|
5
|
-
const DEFAULT_CONFIG = {
|
|
6
|
-
rawDir: "private",
|
|
7
|
-
storageDir: ".kb/storage",
|
|
8
|
-
sourcesFile: ".kb/sources.txt",
|
|
9
|
-
tableName: "chunks",
|
|
10
|
-
ollamaHost: "http://localhost:11434",
|
|
11
|
-
embedModel: "nomic-embed-text",
|
|
12
|
-
llmModel: "gemma4:latest",
|
|
13
|
-
topK: 5,
|
|
14
|
-
chunkSize: 1200,
|
|
15
|
-
chunkOverlap: 150,
|
|
16
|
-
};
|
|
17
6
|
export async function initProject(cwd = process.cwd()) {
|
|
18
7
|
const root = path.resolve(cwd);
|
|
19
|
-
const kbDir = path.join(root,
|
|
20
|
-
const privateDir = path.join(root,
|
|
8
|
+
const kbDir = path.join(root, KB_DIR);
|
|
9
|
+
const privateDir = path.join(root, PRIVATE_DIR);
|
|
21
10
|
const created = [];
|
|
22
11
|
await mkdir(kbDir, { recursive: true });
|
|
23
12
|
await mkdir(privateDir, { recursive: true });
|
|
24
|
-
const configPath = path.join(
|
|
13
|
+
const configPath = path.join(root, CONFIG_PATH);
|
|
25
14
|
if (!existsSync(configPath)) {
|
|
26
15
|
await writeFile(configPath, `${JSON.stringify(DEFAULT_CONFIG, null, 2)}\n`, "utf8");
|
|
27
16
|
created.push(path.relative(root, configPath));
|
package/dist/init.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"init.js","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"init.js","sourceRoot":"","sources":["../src/init.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,IAAI,MAAM,WAAW,CAAA;AAC5B,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAChF,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAA;AAErD,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IACnD,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACrC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAA;IAC/C,MAAM,OAAO,GAAa,EAAE,CAAA;IAE5B,MAAM,KAAK,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACvC,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IAE5C,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,WAAW,CAAC,CAAA;IAC/C,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5B,MAAM,SAAS,CAAC,UAAU,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QACnF,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAA;IAC/C,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACnD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,MAAM,SAAS,CACb,WAAW,EACX,8FAA8F,EAC9F,MAAM,CACP,CAAA;QACD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAA;IAChD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,CAAC,CAAA;IACrD,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5B,MAAM,SAAS,CACb,UAAU,EACV,6FAA6F,EAC7F,MAAM,CACP,CAAA;QACD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAA;IAC/C,CAAC;IAED,IAAI,MAAM,oBAAoB,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;IAC5B,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC"}
|
package/dist/mcp.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"AAeA,wBAAsB,QAAQ,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,CA6EjE"}
|
package/dist/mcp.js
CHANGED
|
@@ -4,8 +4,13 @@ import { z } from "zod";
|
|
|
4
4
|
import { loadConfig } from "./config.js";
|
|
5
5
|
import { audit } from "./ingest.js";
|
|
6
6
|
import { ask, search } from "./query.js";
|
|
7
|
+
import { securityAudit } from "./security.js";
|
|
7
8
|
import { countRows } from "./store.js";
|
|
8
9
|
import { VERSION } from "./version.js";
|
|
10
|
+
const queryToolInputSchema = z.object({
|
|
11
|
+
query: z.string().min(1),
|
|
12
|
+
topK: z.number().int().positive().optional(),
|
|
13
|
+
});
|
|
9
14
|
export async function serveMcp(cwd = process.cwd()) {
|
|
10
15
|
const server = new McpServer({
|
|
11
16
|
name: "mimir",
|
|
@@ -23,8 +28,14 @@ export async function serveMcp(cwd = process.cwd()) {
|
|
|
23
28
|
rawDir: config.rawDir,
|
|
24
29
|
storageDir: config.storageDir,
|
|
25
30
|
sourcesFile: config.sourcesFile,
|
|
26
|
-
|
|
27
|
-
|
|
31
|
+
embeddingProvider: config.embeddingProvider,
|
|
32
|
+
embeddingModel: config.embeddingModel,
|
|
33
|
+
embeddingModelPath: config.embeddingModelPath,
|
|
34
|
+
transformersAllowRemoteModels: config.transformersAllowRemoteModels,
|
|
35
|
+
llmGeneration: false,
|
|
36
|
+
redactionEnabled: config.redaction.enabled,
|
|
37
|
+
mcpMaxTopK: config.mcpMaxTopK,
|
|
38
|
+
includeExtensions: config.includeExtensions,
|
|
28
39
|
chunksIndexed,
|
|
29
40
|
};
|
|
30
41
|
return textResult(output);
|
|
@@ -32,24 +43,23 @@ export async function serveMcp(cwd = process.cwd()) {
|
|
|
32
43
|
server.registerTool("mimir_search", {
|
|
33
44
|
title: "Mimir Search",
|
|
34
45
|
description: "Retrieve relevant passages from the local Mimir knowledge base.",
|
|
35
|
-
inputSchema:
|
|
36
|
-
|
|
37
|
-
topK: z.number().int().positive().optional(),
|
|
38
|
-
}),
|
|
39
|
-
}, async ({ query, topK }) => textResult(await search(query, searchOptions(cwd, topK))));
|
|
46
|
+
inputSchema: queryToolInputSchema,
|
|
47
|
+
}, async ({ query, topK }) => textResult(await search(query, await searchOptions(cwd, topK))));
|
|
40
48
|
server.registerTool("mimir_ask", {
|
|
41
49
|
title: "Mimir Ask",
|
|
42
|
-
description: "
|
|
43
|
-
inputSchema:
|
|
44
|
-
|
|
45
|
-
topK: z.number().int().positive().optional(),
|
|
46
|
-
}),
|
|
47
|
-
}, async ({ query, topK }) => textResult(await ask(query, searchOptions(cwd, topK))));
|
|
50
|
+
description: "Return cited retrieval context for a question without calling an LLM.",
|
|
51
|
+
inputSchema: queryToolInputSchema,
|
|
52
|
+
}, async ({ query, topK }) => textResult(await ask(query, await searchOptions(cwd, topK))));
|
|
48
53
|
server.registerTool("mimir_audit", {
|
|
49
54
|
title: "Mimir Audit",
|
|
50
55
|
description: "Compare supported source files on disk with the current vector index.",
|
|
51
56
|
inputSchema: z.object({}),
|
|
52
57
|
}, async () => textResult(await audit(cwd)));
|
|
58
|
+
server.registerTool("mimir_security_audit", {
|
|
59
|
+
title: "Mimir Security Audit",
|
|
60
|
+
description: "Show local privacy, provider, redaction, MCP, and gitignore posture.",
|
|
61
|
+
inputSchema: z.object({}),
|
|
62
|
+
}, async () => textResult(await securityAudit(cwd)));
|
|
53
63
|
await server.connect(new StdioServerTransport());
|
|
54
64
|
}
|
|
55
65
|
function textResult(value) {
|
|
@@ -62,7 +72,9 @@ function textResult(value) {
|
|
|
62
72
|
],
|
|
63
73
|
};
|
|
64
74
|
}
|
|
65
|
-
function searchOptions(cwd, topK) {
|
|
66
|
-
|
|
75
|
+
async function searchOptions(cwd, topK) {
|
|
76
|
+
const config = await loadConfig(cwd);
|
|
77
|
+
const boundedTopK = Math.min(topK ?? config.topK, config.mcpMaxTopK);
|
|
78
|
+
return { cwd, topK: boundedTopK };
|
|
67
79
|
}
|
|
68
80
|
//# sourceMappingURL=mcp.js.map
|
package/dist/mcp.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp.js","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AACvB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,YAAY,CAAA;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAEtC,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAChD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO;KACjB,CAAC,CAAA;IAEF,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,0DAA0D;QACvE,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE;QACT,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;QACpC,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG;YACb,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,
|
|
1
|
+
{"version":3,"file":"mcp.js","sourceRoot":"","sources":["../src/mcp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAA;AACnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAA;AAChF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAA;AACvB,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAA;AACnC,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,YAAY,CAAA;AACxC,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAA;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAEtC,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;IACpC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CAC7C,CAAC,CAAA;AAEF,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,EAAE;IAChD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,OAAO;KACjB,CAAC,CAAA;IAEF,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,0DAA0D;QACvE,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE;QACT,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;QACpC,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,CAAA;QAC7C,MAAM,MAAM,GAAG;YACb,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM;YACrB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,kBAAkB,EAAE,MAAM,CAAC,kBAAkB;YAC7C,6BAA6B,EAAE,MAAM,CAAC,6BAA6B;YACnE,aAAa,EAAE,KAAK;YACpB,gBAAgB,EAAE,MAAM,CAAC,SAAS,CAAC,OAAO;YAC1C,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,iBAAiB,EAAE,MAAM,CAAC,iBAAiB;YAC3C,aAAa;SACd,CAAA;QAED,OAAO,UAAU,CAAC,MAAM,CAAC,CAAA;IAC3B,CAAC,CACF,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,cAAc,EACd;QACE,KAAK,EAAE,cAAc;QACrB,WAAW,EAAE,iEAAiE;QAC9E,WAAW,EAAE,oBAAoB;KAClC,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,MAAM,MAAM,CAAC,KAAK,EAAE,MAAM,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CAC3F,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,WAAW,EACX;QACE,KAAK,EAAE,WAAW;QAClB,WAAW,EAAE,uEAAuE;QACpF,WAAW,EAAE,oBAAoB;KAClC,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,KAAK,EAAE,MAAM,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CACxF,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,aAAa,EACb;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EAAE,uEAAuE;QACpF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC,CACzC,CAAA;IAED,MAAM,CAAC,YAAY,CACjB,sBAAsB,EACtB;QACE,KAAK,EAAE,sBAAsB;QAC7B,WAAW,EAAE,sEAAsE;QACnF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;KAC1B,EACD,KAAK,IAAI,EAAE,CAAC,UAAU,CAAC,MAAM,aAAa,CAAC,GAAG,CAAC,CAAC,CACjD,CAAA;IAED,MAAM,MAAM,CAAC,OAAO,CAAC,IAAI,oBAAoB,EAAE,CAAC,CAAA;AAClD,CAAC;AAED,SAAS,UAAU,CAAC,KAAc;IAChC,OAAO;QACL,OAAO,EAAE;YACP;gBACE,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;aACrC;SACF;KACF,CAAA;AACH,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,GAAW,EACX,IAAwB;IAExB,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,GAAG,CAAC,CAAA;IACpC,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,CAAC,CAAA;IACpE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,WAAW,EAAE,CAAA;AACnC,CAAC"}
|
package/dist/parsing.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parsing.d.ts","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parsing.d.ts","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,YAAY,CAAA;AAI5D,wBAAsB,SAAS,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,cAAc,CAAC,CA8CzE"}
|
package/dist/parsing.js
CHANGED
|
@@ -1,13 +1,29 @@
|
|
|
1
1
|
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { strFromU8, unzipSync } from "fflate";
|
|
2
3
|
import { htmlToText } from "html-to-text";
|
|
3
4
|
import { extractText, getDocumentProxy } from "unpdf";
|
|
4
5
|
import YAML from "yaml";
|
|
6
|
+
const MAX_OFFICE_XML_ENTRY_BYTES = 25_000_000;
|
|
5
7
|
export async function parseFile(file) {
|
|
6
8
|
let text;
|
|
7
9
|
switch (file.extension) {
|
|
8
10
|
case ".pdf":
|
|
9
11
|
text = await parsePdf(file.absolutePath);
|
|
10
12
|
break;
|
|
13
|
+
case ".docx":
|
|
14
|
+
text = await parseDocx(file.absolutePath);
|
|
15
|
+
break;
|
|
16
|
+
case ".pptx":
|
|
17
|
+
text = await parsePptx(file.absolutePath);
|
|
18
|
+
break;
|
|
19
|
+
case ".xlsx":
|
|
20
|
+
text = await parseXlsx(file.absolutePath);
|
|
21
|
+
break;
|
|
22
|
+
case ".odt":
|
|
23
|
+
case ".ods":
|
|
24
|
+
case ".odp":
|
|
25
|
+
text = await parseOpenDocument(file.absolutePath);
|
|
26
|
+
break;
|
|
11
27
|
case ".html":
|
|
12
28
|
case ".htm":
|
|
13
29
|
text = htmlToText(await readFile(file.absolutePath, "utf8"), {
|
|
@@ -25,11 +41,133 @@ export async function parseFile(file) {
|
|
|
25
41
|
case ".yml":
|
|
26
42
|
text = YAML.stringify(YAML.parse(await readFile(file.absolutePath, "utf8")));
|
|
27
43
|
break;
|
|
44
|
+
case ".rtf":
|
|
45
|
+
text = stripRtf(await readFile(file.absolutePath, "utf8"));
|
|
46
|
+
break;
|
|
28
47
|
default:
|
|
29
48
|
text = await readFile(file.absolutePath, "utf8");
|
|
30
49
|
}
|
|
31
50
|
return { file, text: normalizeText(text) };
|
|
32
51
|
}
|
|
52
|
+
async function parseDocx(filePath) {
|
|
53
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
54
|
+
return xmlEntriesToText(entries, [
|
|
55
|
+
/^word\/document\.xml$/u,
|
|
56
|
+
/^word\/header\d*\.xml$/u,
|
|
57
|
+
/^word\/footer\d*\.xml$/u,
|
|
58
|
+
/^word\/footnotes\.xml$/u,
|
|
59
|
+
/^word\/endnotes\.xml$/u,
|
|
60
|
+
/^word\/comments\.xml$/u,
|
|
61
|
+
]);
|
|
62
|
+
}
|
|
63
|
+
async function parsePptx(filePath) {
|
|
64
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
65
|
+
return xmlEntriesToText(entries, [
|
|
66
|
+
/^ppt\/slides\/slide\d+\.xml$/u,
|
|
67
|
+
/^ppt\/notesSlides\/notesSlide\d+\.xml$/u,
|
|
68
|
+
]);
|
|
69
|
+
}
|
|
70
|
+
async function parseXlsx(filePath) {
|
|
71
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
72
|
+
const sharedStrings = parseSharedStrings(entries.get("xl/sharedStrings.xml") ?? "");
|
|
73
|
+
const sheets = [...entries.entries()]
|
|
74
|
+
.filter(([name]) => /^xl\/worksheets\/sheet\d+\.xml$/u.test(name))
|
|
75
|
+
.sort(([a], [b]) => a.localeCompare(b));
|
|
76
|
+
const rows = [];
|
|
77
|
+
for (const [name, xml] of sheets) {
|
|
78
|
+
const values = parseSheetValues(xml, sharedStrings);
|
|
79
|
+
if (values.length > 0) {
|
|
80
|
+
rows.push(`# ${name}`, values.join("\n"));
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return rows.join("\n\n");
|
|
84
|
+
}
|
|
85
|
+
async function parseOpenDocument(filePath) {
|
|
86
|
+
const entries = unzipOfficeFile(await readFile(filePath));
|
|
87
|
+
return xmlEntriesToText(entries, [/^content\.xml$/u, /^meta\.xml$/u]);
|
|
88
|
+
}
|
|
89
|
+
function unzipOfficeFile(buffer) {
|
|
90
|
+
const unzipped = unzipSync(new Uint8Array(buffer), {
|
|
91
|
+
filter: (file) => file.originalSize <= MAX_OFFICE_XML_ENTRY_BYTES,
|
|
92
|
+
});
|
|
93
|
+
const entries = new Map();
|
|
94
|
+
for (const [name, content] of Object.entries(unzipped)) {
|
|
95
|
+
if (name.endsWith(".xml")) {
|
|
96
|
+
entries.set(name, strFromU8(content));
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return entries;
|
|
100
|
+
}
|
|
101
|
+
function xmlEntriesToText(entries, patterns) {
|
|
102
|
+
const parts = [];
|
|
103
|
+
for (const [name, xml] of [...entries.entries()].sort(([a], [b]) => a.localeCompare(b))) {
|
|
104
|
+
if (patterns.some((pattern) => pattern.test(name))) {
|
|
105
|
+
const text = xmlToText(xml);
|
|
106
|
+
if (text) {
|
|
107
|
+
parts.push(text);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return parts.join("\n\n");
|
|
112
|
+
}
|
|
113
|
+
function parseSharedStrings(xml) {
|
|
114
|
+
return [...xml.matchAll(/<si\b[\s\S]*?<\/si>/gu)].map(([item]) => xmlToText(item));
|
|
115
|
+
}
|
|
116
|
+
function parseSheetValues(xml, sharedStrings) {
|
|
117
|
+
const rows = [];
|
|
118
|
+
for (const rowMatch of xml.matchAll(/<row\b[\s\S]*?<\/row>/gu)) {
|
|
119
|
+
const rowXml = rowMatch[0];
|
|
120
|
+
const values = [...rowXml.matchAll(/<c\b([^>]*)>([\s\S]*?)<\/c>/gu)]
|
|
121
|
+
.map((cellMatch) => {
|
|
122
|
+
const attrs = cellMatch[1] ?? "";
|
|
123
|
+
const cellXml = cellMatch[2] ?? "";
|
|
124
|
+
const inline = firstMatch(cellXml, /<is\b[\s\S]*?<\/is>/u);
|
|
125
|
+
if (inline) {
|
|
126
|
+
return xmlToText(inline);
|
|
127
|
+
}
|
|
128
|
+
const rawValue = firstMatch(cellXml, /<v>([\s\S]*?)<\/v>/u);
|
|
129
|
+
if (!rawValue) {
|
|
130
|
+
return "";
|
|
131
|
+
}
|
|
132
|
+
if (/\bt="s"/u.test(attrs)) {
|
|
133
|
+
return sharedStrings[Number.parseInt(rawValue, 10)] ?? "";
|
|
134
|
+
}
|
|
135
|
+
return decodeXmlEntities(rawValue);
|
|
136
|
+
})
|
|
137
|
+
.filter(Boolean);
|
|
138
|
+
if (values.length > 0) {
|
|
139
|
+
rows.push(values.join("\t"));
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return rows;
|
|
143
|
+
}
|
|
144
|
+
function firstMatch(input, pattern) {
|
|
145
|
+
const match = input.match(pattern);
|
|
146
|
+
return match?.[1] ?? match?.[0] ?? "";
|
|
147
|
+
}
|
|
148
|
+
function xmlToText(xml) {
|
|
149
|
+
return normalizeText(decodeXmlEntities(xml
|
|
150
|
+
.replace(/<w:tab\/>/gu, " ")
|
|
151
|
+
.replace(/<w:br\/>/gu, "\n")
|
|
152
|
+
.replace(/<\/(?:w:p|a:p|text:p|text:h|table:table-row)>/gu, "\n")
|
|
153
|
+
.replace(/<[^>]+>/gu, " ")
|
|
154
|
+
.replace(/[ \t]{2,}/gu, " ")));
|
|
155
|
+
}
|
|
156
|
+
function stripRtf(input) {
|
|
157
|
+
return input
|
|
158
|
+
.replace(/\\par[d]?/gu, "\n")
|
|
159
|
+
.replace(/\\'[0-9a-fA-F]{2}/gu, " ")
|
|
160
|
+
.replace(/\\[a-zA-Z]+-?\d* ?/gu, " ")
|
|
161
|
+
.replace(/[{}]/gu, " ");
|
|
162
|
+
}
|
|
163
|
+
function decodeXmlEntities(input) {
|
|
164
|
+
return input
|
|
165
|
+
.replace(/</gu, "<")
|
|
166
|
+
.replace(/>/gu, ">")
|
|
167
|
+
.replace(/"/gu, '"')
|
|
168
|
+
.replace(/'/gu, "'")
|
|
169
|
+
.replace(/&/gu, "&");
|
|
170
|
+
}
|
|
33
171
|
async function parsePdf(filePath) {
|
|
34
172
|
const buffer = await readFile(filePath);
|
|
35
173
|
const pdf = await getDocumentProxy(new Uint8Array(buffer));
|
package/dist/parsing.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parsing.js","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,OAAO,CAAA;AACrD,OAAO,IAAI,MAAM,MAAM,CAAA;AAGvB,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAgB;IAC9C,IAAI,IAAY,CAAA;IAEhB,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,KAAK,MAAM;YACT,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACxC,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,UAAU,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,EAAE;gBAC3D,QAAQ,EAAE,KAAK;gBACf,SAAS,EAAE;oBACT,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE;oBAChD,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE;iBACpC;aACF,CAAC,CAAA;YACF,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAA;YACrF,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC,CAAA;YAC5E,MAAK;QACP;YACE,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IACpD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAA;AAC5C,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,QAAgB;IACtC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAA;IACvC,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAA;IAC1D,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAA;IAC3D,OAAO,MAAM,CAAC,IAAI,CAAA;AACpB,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK;SACT,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC;SAC5B,IAAI,EAAE,CAAA;AACX,CAAC"}
|
|
1
|
+
{"version":3,"file":"parsing.js","sourceRoot":"","sources":["../src/parsing.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;AAC3C,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAA;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,OAAO,CAAA;AACrD,OAAO,IAAI,MAAM,MAAM,CAAA;AAGvB,MAAM,0BAA0B,GAAG,UAAU,CAAA;AAE7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAgB;IAC9C,IAAI,IAAY,CAAA;IAEhB,QAAQ,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,KAAK,MAAM;YACT,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACxC,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACzC,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACzC,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACzC,MAAK;QACP,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,MAAM;YACT,IAAI,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YACjD,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,UAAU,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,EAAE;gBAC3D,QAAQ,EAAE,KAAK;gBACf,SAAS,EAAE;oBACT,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,EAAE;oBAChD,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE;iBACpC;aACF,CAAC,CAAA;YACF,MAAK;QACP,KAAK,OAAO;YACV,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,CAAA;YACrF,MAAK;QACP,KAAK,OAAO,CAAC;QACb,KAAK,MAAM;YACT,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC,CAAA;YAC5E,MAAK;QACP,KAAK,MAAM;YACT,IAAI,GAAG,QAAQ,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;YAC1D,MAAK;QACP;YACE,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,MAAM,CAAC,CAAA;IACpD,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,CAAC,IAAI,CAAC,EAAE,CAAA;AAC5C,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,OAAO,gBAAgB,CAAC,OAAO,EAAE;QAC/B,wBAAwB;QACxB,yBAAyB;QACzB,yBAAyB;QACzB,yBAAyB;QACzB,wBAAwB;QACxB,wBAAwB;KACzB,CAAC,CAAA;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,OAAO,gBAAgB,CAAC,OAAO,EAAE;QAC/B,+BAA+B;QAC/B,yCAAyC;KAC1C,CAAC,CAAA;AACJ,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,MAAM,aAAa,GAAG,kBAAkB,CAAC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,EAAE,CAAC,CAAA;IACnF,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC;SAClC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,kCAAkC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACjE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAA;IAEzC,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,EAAE,aAAa,CAAC,CAAA;QACnD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC3C,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;AAC1B,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,QAAgB;IAC/C,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,OAAO,gBAAgB,CAAC,OAAO,EAAE,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC,CAAA;AACvE,CAAC;AAED,SAAS,eAAe,CAAC,MAAc;IACrC,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE;QACjD,MAAM,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,YAAY,IAAI,0BAA0B;KAClE,CAAC,CAAA;IACF,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAA;IACzC,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvD,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,OAAO,CAAC,CAAC,CAAA;QACvC,CAAC;IACH,CAAC;IACD,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,SAAS,gBAAgB,CAAC,OAA4B,EAAE,QAAkB;IACxE,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxF,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC;YACnD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,CAAA;YAC3B,IAAI,IAAI,EAAE,CAAC;gBACT,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;AAC3B,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,OAAO,CAAC,GAAG,GAAG,CAAC,QAAQ,CAAC,uBAAuB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAA;AACpF,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW,EAAE,aAAuB;IAC5D,MAAM,IAAI,GAAa,EAAE,CAAA;IACzB,KAAK,MAAM,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,yBAAyB,CAAC,EAAE,CAAC;QAC/D,MAAM,MAAM,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAA;QAC1B,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,+BAA+B,CAAC,CAAC;aACjE,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;YACjB,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAChC,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;YAClC,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,EAAE,sBAAsB,CAAC,CAAA;YAC1D,IAAI,MAAM,EAAE,CAAC;gBACX,OAAO,SAAS,CAAC,MAAM,CAAC,CAAA;YAC1B,CAAC;YAED,MAAM,QAAQ,GAAG,UAAU,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAA;YAC3D,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,OAAO,EAAE,CAAA;YACX,CAAC;YAED,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC3B,OAAO,aAAa,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YAC3D,CAAC;YACD,OAAO,iBAAiB,CAAC,QAAQ,CAAC,CAAA;QACpC,CAAC,CAAC;aACD,MAAM,CAAC,OAAO,CAAC,CAAA;QAElB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAA;QAC9B,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,SAAS,UAAU,CAAC,KAAa,EAAE,OAAe;IAChD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;IAClC,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;AACvC,CAAC;AAED,SAAS,SAAS,CAAC,GAAW;IAC5B,OAAO,aAAa,CAClB,iBAAiB,CACf,GAAG;SACA,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC;SAC3B,OAAO,CAAC,iDAAiD,EAAE,IAAI,CAAC;SAChE,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAC/B,CACF,CAAA;AACH,CAAC;AAED,SAAS,QAAQ,CAAC,KAAa;IAC7B,OAAO,KAAK;SACT,OAAO,CAAC,aAAa,EAAE,IAAI,CAAC;SAC5B,OAAO,CAAC,qBAAqB,EAAE,GAAG,CAAC;SACnC,OAAO,CAAC,sBAAsB,EAAE,GAAG,CAAC;SACpC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAA;AAC3B,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAa;IACtC,OAAO,KAAK;SACT,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAA;AAC5B,CAAC;AAED,KAAK,UAAU,QAAQ,CAAC,QAAgB;IACtC,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAA;IACvC,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC,CAAA;IAC1D,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAA;IAC3D,OAAO,MAAM,CAAC,IAAI,CAAA;AACpB,CAAC;AAED,SAAS,aAAa,CAAC,KAAa;IAClC,OAAO,KAAK;SACT,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC;SAC1B,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC;SAC5B,IAAI,EAAE,CAAA;AACX,CAAC"}
|
package/dist/query.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAUxE,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"query.d.ts","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAUxE,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CA2BhG;AAED,wBAAsB,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE,aAAkB,GAAG,OAAO,CAAC,SAAS,CAAC,CAsBxF"}
|
package/dist/query.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { recordAccess } from "./access-log.js";
|
|
2
2
|
import { loadConfig } from "./config.js";
|
|
3
3
|
import { embedText } from "./embeddings.js";
|
|
4
4
|
import { openRowsTable } from "./store.js";
|
|
@@ -13,13 +13,20 @@ export async function search(query, options = {}) {
|
|
|
13
13
|
.vectorSearch(vector)
|
|
14
14
|
.limit(options.topK ?? config.topK)
|
|
15
15
|
.toArray());
|
|
16
|
-
|
|
16
|
+
const results = rows.map((row) => ({
|
|
17
17
|
source: row.source,
|
|
18
18
|
relativePath: row.relativePath,
|
|
19
19
|
chunkIndex: row.chunkIndex,
|
|
20
20
|
text: row.text,
|
|
21
21
|
distance: typeof row._distance === "number" ? row._distance : null,
|
|
22
22
|
}));
|
|
23
|
+
await recordAccess(config, {
|
|
24
|
+
action: "search",
|
|
25
|
+
query,
|
|
26
|
+
topK: options.topK ?? config.topK,
|
|
27
|
+
resultCount: results.length,
|
|
28
|
+
});
|
|
29
|
+
return results;
|
|
23
30
|
}
|
|
24
31
|
export async function ask(query, options = {}) {
|
|
25
32
|
const config = await loadConfig(String(options.cwd ?? process.cwd()));
|
|
@@ -30,27 +37,28 @@ export async function ask(query, options = {}) {
|
|
|
30
37
|
sources,
|
|
31
38
|
};
|
|
32
39
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
model: config.llmModel,
|
|
39
|
-
messages: [
|
|
40
|
-
{
|
|
41
|
-
role: "system",
|
|
42
|
-
content: "Answer only from the provided context. If the context is insufficient, say what is missing. Cite sources with [1], [2], etc.",
|
|
43
|
-
},
|
|
44
|
-
{
|
|
45
|
-
role: "user",
|
|
46
|
-
content: `Question:\n${query}\n\nContext:\n${context}`,
|
|
47
|
-
},
|
|
48
|
-
],
|
|
49
|
-
stream: false,
|
|
40
|
+
await recordAccess(config, {
|
|
41
|
+
action: "ask",
|
|
42
|
+
query,
|
|
43
|
+
topK: options.topK ?? config.topK,
|
|
44
|
+
resultCount: sources.length,
|
|
50
45
|
});
|
|
51
46
|
return {
|
|
52
|
-
answer:
|
|
47
|
+
answer: retrievalOnlyAnswer(sources),
|
|
53
48
|
sources,
|
|
54
49
|
};
|
|
55
50
|
}
|
|
51
|
+
function retrievalOnlyAnswer(sources) {
|
|
52
|
+
const snippets = sources
|
|
53
|
+
.map((source, index) => {
|
|
54
|
+
const text = source.text.replace(/\s+/gu, " ").trim();
|
|
55
|
+
return `[${index + 1}] ${source.relativePath}#${source.chunkIndex}: ${text}`;
|
|
56
|
+
})
|
|
57
|
+
.join("\n\n");
|
|
58
|
+
return [
|
|
59
|
+
"Mimir returns retrieval context only. Use these passages as grounded context for your agent or LLM:",
|
|
60
|
+
"",
|
|
61
|
+
snippets,
|
|
62
|
+
].join("\n");
|
|
63
|
+
}
|
|
56
64
|
//# sourceMappingURL=query.js.map
|
package/dist/query.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"query.js","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"query.js","sourceRoot":"","sources":["../src/query.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAC9C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAC3C,OAAO,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAW1C,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,KAAa,EAAE,UAAyB,EAAE;IACrE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,EAAE,CAAA;IACX,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAA;IAC7C,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK;SACtB,YAAY,CAAC,MAAM,CAAC;SACpB,KAAK,CAAC,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,CAAC;SAClC,OAAO,EAAE,CAAgB,CAAA;IAE5B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACjC,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI;KACnE,CAAC,CAAC,CAAA;IACH,MAAM,YAAY,CAAC,MAAM,EAAE;QACzB,MAAM,EAAE,QAAQ;QAChB,KAAK;QACL,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI;QACjC,WAAW,EAAE,OAAO,CAAC,MAAM;KAC5B,CAAC,CAAA;IACF,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,KAAa,EAAE,UAAyB,EAAE;IAClE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,CAAA;IAE5C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,MAAM,EAAE,2EAA2E;YACnF,OAAO;SACR,CAAA;IACH,CAAC;IAED,MAAM,YAAY,CAAC,MAAM,EAAE;QACzB,MAAM,EAAE,KAAK;QACb,KAAK;QACL,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI;QACjC,WAAW,EAAE,OAAO,CAAC,MAAM;KAC5B,CAAC,CAAA;IAEF,OAAO;QACL,MAAM,EAAE,mBAAmB,CAAC,OAAO,CAAC;QACpC,OAAO;KACR,CAAA;AACH,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAuB;IAClD,MAAM,QAAQ,GAAG,OAAO;SACrB,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE;QACrB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;QACrD,OAAO,IAAI,KAAK,GAAG,CAAC,KAAK,MAAM,CAAC,YAAY,IAAI,MAAM,CAAC,UAAU,KAAK,IAAI,EAAE,CAAA;IAC9E,CAAC,CAAC;SACD,IAAI,CAAC,MAAM,CAAC,CAAA;IAEf,OAAO;QACL,qGAAqG;QACrG,EAAE;QACF,QAAQ;KACT,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACd,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Config, RedactionCount } from "./types.js";
|
|
2
|
+
export declare function redactText(input: string, config: Config): {
|
|
3
|
+
text: string;
|
|
4
|
+
counts: RedactionCount[];
|
|
5
|
+
};
|
|
6
|
+
export declare function totalRedactions(counts: RedactionCount[]): number;
|
|
7
|
+
//# sourceMappingURL=redaction.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"redaction.d.ts","sourceRoot":"","sources":["../src/redaction.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,cAAc,EAAoB,MAAM,YAAY,CAAA;AAoC1E,wBAAgB,UAAU,CACxB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,GACb;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,cAAc,EAAE,CAAA;CAAE,CAyB5C;AAED,wBAAgB,eAAe,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,MAAM,CAEhE"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
const BUILT_IN_PATTERNS = [
|
|
2
|
+
{
|
|
3
|
+
name: "private_key",
|
|
4
|
+
pattern: "-----BEGIN [A-Z ]*PRIVATE KEY-----[\\s\\S]*?-----END [A-Z ]*PRIVATE KEY-----",
|
|
5
|
+
flags: "g",
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
name: "jwt",
|
|
9
|
+
pattern: "\\beyJ[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+\\.[A-Za-z0-9_-]+\\b",
|
|
10
|
+
flags: "g",
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
name: "api_token",
|
|
14
|
+
pattern: "\\b(?:sk|pk|ghp|gho|github_pat|npm)_[A-Za-z0-9_=-]{20,}\\b|\\b[A-Za-z0-9_-]{32,}\\.[A-Za-z0-9_-]{16,}\\.[A-Za-z0-9_-]{16,}\\b",
|
|
15
|
+
flags: "g",
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
name: "email",
|
|
19
|
+
pattern: "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\b",
|
|
20
|
+
flags: "gi",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
name: "iban",
|
|
24
|
+
pattern: "\\b[A-Z]{2}\\d{2}[A-Z0-9]{11,30}\\b",
|
|
25
|
+
flags: "g",
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: "credit_card",
|
|
29
|
+
pattern: "\\b(?:\\d[ -]*?){13,19}\\b",
|
|
30
|
+
flags: "g",
|
|
31
|
+
},
|
|
32
|
+
];
|
|
33
|
+
export function redactText(input, config) {
|
|
34
|
+
if (!config.redaction.enabled) {
|
|
35
|
+
return { text: input, counts: [] };
|
|
36
|
+
}
|
|
37
|
+
let text = input;
|
|
38
|
+
const counts = [];
|
|
39
|
+
const patterns = [
|
|
40
|
+
...(config.redaction.builtIn ? BUILT_IN_PATTERNS : []),
|
|
41
|
+
...config.redaction.patterns,
|
|
42
|
+
];
|
|
43
|
+
for (const pattern of patterns) {
|
|
44
|
+
const regexp = compilePattern(pattern);
|
|
45
|
+
let count = 0;
|
|
46
|
+
text = text.replace(regexp, () => {
|
|
47
|
+
count += 1;
|
|
48
|
+
return pattern.replacement ?? `[REDACTED_${pattern.name.toUpperCase()}]`;
|
|
49
|
+
});
|
|
50
|
+
if (count > 0) {
|
|
51
|
+
counts.push({ name: pattern.name, count });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return { text, counts };
|
|
55
|
+
}
|
|
56
|
+
export function totalRedactions(counts) {
|
|
57
|
+
return counts.reduce((total, entry) => total + entry.count, 0);
|
|
58
|
+
}
|
|
59
|
+
function compilePattern(pattern) {
|
|
60
|
+
const flags = pattern.flags?.includes("g") ? pattern.flags : `${pattern.flags ?? ""}g`;
|
|
61
|
+
return new RegExp(pattern.pattern, flags);
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=redaction.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"redaction.js","sourceRoot":"","sources":["../src/redaction.ts"],"names":[],"mappings":"AAEA,MAAM,iBAAiB,GAAuB;IAC5C;QACE,IAAI,EAAE,aAAa;QACnB,OAAO,EAAE,8EAA8E;QACvF,KAAK,EAAE,GAAG;KACX;IACD;QACE,IAAI,EAAE,KAAK;QACX,OAAO,EAAE,2DAA2D;QACpE,KAAK,EAAE,GAAG;KACX;IACD;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EACL,+HAA+H;QACjI,KAAK,EAAE,GAAG;KACX;IACD;QACE,IAAI,EAAE,OAAO;QACb,OAAO,EAAE,8CAA8C;QACvD,KAAK,EAAE,IAAI;KACZ;IACD;QACE,IAAI,EAAE,MAAM;QACZ,OAAO,EAAE,qCAAqC;QAC9C,KAAK,EAAE,GAAG;KACX;IACD;QACE,IAAI,EAAE,aAAa;QACnB,OAAO,EAAE,4BAA4B;QACrC,KAAK,EAAE,GAAG;KACX;CACF,CAAA;AAED,MAAM,UAAU,UAAU,CACxB,KAAa,EACb,MAAc;IAEd,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;IACpC,CAAC;IAED,IAAI,IAAI,GAAG,KAAK,CAAA;IAChB,MAAM,MAAM,GAAqB,EAAE,CAAA;IACnC,MAAM,QAAQ,GAAG;QACf,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE,CAAC;QACtD,GAAG,MAAM,CAAC,SAAS,CAAC,QAAQ;KAC7B,CAAA;IAED,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,cAAc,CAAC,OAAO,CAAC,CAAA;QACtC,IAAI,KAAK,GAAG,CAAC,CAAA;QACb,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,EAAE;YAC/B,KAAK,IAAI,CAAC,CAAA;YACV,OAAO,OAAO,CAAC,WAAW,IAAI,aAAa,OAAO,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,CAAA;QAC1E,CAAC,CAAC,CAAA;QACF,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,CAAA;QAC5C,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAA;AACzB,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,MAAwB;IACtD,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAA;AAChE,CAAC;AAED,SAAS,cAAc,CAAC,OAAyB;IAC/C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,GAAG,CAAA;IACtF,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,CAAA;AAC3C,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"security.d.ts","sourceRoot":"","sources":["../src/security.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAA;AAErD,wBAAsB,aAAa,CAAC,GAAG,SAAgB,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAqErF"}
|