@oscharko-dev/keiko-local-knowledge 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bounded-document-extraction.d.ts +27 -0
- package/dist/bounded-document-extraction.d.ts.map +1 -0
- package/dist/bounded-document-extraction.js +214 -0
- package/dist/capsule-lifecycle.d.ts +33 -0
- package/dist/capsule-lifecycle.d.ts.map +1 -0
- package/dist/capsule-lifecycle.js +292 -0
- package/dist/capsule-set-lifecycle.d.ts +15 -0
- package/dist/capsule-set-lifecycle.d.ts.map +1 -0
- package/dist/capsule-set-lifecycle.js +158 -0
- package/dist/chunking/chunker-persist.d.ts +36 -0
- package/dist/chunking/chunker-persist.d.ts.map +1 -0
- package/dist/chunking/chunker-persist.js +74 -0
- package/dist/chunking/chunker-runner.d.ts +9 -0
- package/dist/chunking/chunker-runner.d.ts.map +1 -0
- package/dist/chunking/chunker-runner.js +218 -0
- package/dist/chunking/chunker.d.ts +7 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +139 -0
- package/dist/chunking/citation-mapper.d.ts +4 -0
- package/dist/chunking/citation-mapper.d.ts.map +1 -0
- package/dist/chunking/citation-mapper.js +180 -0
- package/dist/chunking/index.d.ts +6 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/token-estimator.d.ts +3 -0
- package/dist/chunking/token-estimator.d.ts.map +1 -0
- package/dist/chunking/token-estimator.js +26 -0
- package/dist/chunking/types.d.ts +49 -0
- package/dist/chunking/types.d.ts.map +1 -0
- package/dist/chunking/types.js +26 -0
- package/dist/composition.d.ts +57 -0
- package/dist/composition.d.ts.map +1 -0
- package/dist/composition.js +310 -0
- package/dist/conversation/citation-attacher.d.ts +8 -0
- package/dist/conversation/citation-attacher.d.ts.map +1 -0
- package/dist/conversation/citation-attacher.js +55 -0
- package/dist/conversation/citation-excerpts.d.ts +4 -0
- package/dist/conversation/citation-excerpts.d.ts.map +1 -0
- package/dist/conversation/citation-excerpts.js +41 -0
- package/dist/conversation/grounded-answer-runner.d.ts +9 -0
- package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
- package/dist/conversation/grounded-answer-runner.js +61 -0
- package/dist/conversation/index.d.ts +5 -0
- package/dist/conversation/index.d.ts.map +1 -0
- package/dist/conversation/index.js +7 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
- package/dist/conversation/model-gateway-answer-generator.js +105 -0
- package/dist/conversation/types.d.ts +35 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +24 -0
- package/dist/discovery/discovery-runner.d.ts +23 -0
- package/dist/discovery/discovery-runner.d.ts.map +1 -0
- package/dist/discovery/discovery-runner.js +109 -0
- package/dist/discovery/extract-progressive.d.ts +17 -0
- package/dist/discovery/extract-progressive.d.ts.map +1 -0
- package/dist/discovery/extract-progressive.js +522 -0
- package/dist/discovery/extract.d.ts +26 -0
- package/dist/discovery/extract.d.ts.map +1 -0
- package/dist/discovery/extract.js +906 -0
- package/dist/discovery/glob.d.ts +10 -0
- package/dist/discovery/glob.d.ts.map +1 -0
- package/dist/discovery/glob.js +72 -0
- package/dist/discovery/index.d.ts +6 -0
- package/dist/discovery/index.d.ts.map +1 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/media-type.d.ts +4 -0
- package/dist/discovery/media-type.d.ts.map +1 -0
- package/dist/discovery/media-type.js +62 -0
- package/dist/discovery/persist.d.ts +63 -0
- package/dist/discovery/persist.d.ts.map +1 -0
- package/dist/discovery/persist.js +345 -0
- package/dist/discovery/test-support.d.ts +16 -0
- package/dist/discovery/test-support.d.ts.map +1 -0
- package/dist/discovery/test-support.js +127 -0
- package/dist/discovery/types.d.ts +63 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +28 -0
- package/dist/discovery/walk.d.ts +12 -0
- package/dist/discovery/walk.d.ts.map +1 -0
- package/dist/discovery/walk.js +302 -0
- package/dist/errors.d.ts +13 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +22 -0
- package/dist/evaluations/dimensions.d.ts +14 -0
- package/dist/evaluations/dimensions.d.ts.map +1 -0
- package/dist/evaluations/dimensions.js +191 -0
- package/dist/evaluations/fixtures.d.ts +18 -0
- package/dist/evaluations/fixtures.d.ts.map +1 -0
- package/dist/evaluations/fixtures.js +858 -0
- package/dist/evaluations/index.d.ts +7 -0
- package/dist/evaluations/index.d.ts.map +1 -0
- package/dist/evaluations/index.js +10 -0
- package/dist/evaluations/report.d.ts +3 -0
- package/dist/evaluations/report.d.ts.map +1 -0
- package/dist/evaluations/report.js +31 -0
- package/dist/evaluations/runner-seed.d.ts +12 -0
- package/dist/evaluations/runner-seed.d.ts.map +1 -0
- package/dist/evaluations/runner-seed.js +175 -0
- package/dist/evaluations/runner.d.ts +8 -0
- package/dist/evaluations/runner.d.ts.map +1 -0
- package/dist/evaluations/runner.js +205 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
- package/dist/evaluations/scripted-embedding-adapter.js +163 -0
- package/dist/evaluations/types.d.ts +116 -0
- package/dist/evaluations/types.d.ts.map +1 -0
- package/dist/evaluations/types.js +27 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/indexing/bounded-indexing.d.ts +41 -0
- package/dist/indexing/bounded-indexing.d.ts.map +1 -0
- package/dist/indexing/bounded-indexing.js +240 -0
- package/dist/indexing/checkpoint-persist.d.ts +8 -0
- package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
- package/dist/indexing/checkpoint-persist.js +135 -0
- package/dist/indexing/checkpoint-resume.d.ts +20 -0
- package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
- package/dist/indexing/checkpoint-resume.js +50 -0
- package/dist/indexing/embedding-batcher.d.ts +3 -0
- package/dist/indexing/embedding-batcher.d.ts.map +1 -0
- package/dist/indexing/embedding-batcher.js +390 -0
- package/dist/indexing/index.d.ts +7 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +11 -0
- package/dist/indexing/job-persist.d.ts +46 -0
- package/dist/indexing/job-persist.d.ts.map +1 -0
- package/dist/indexing/job-persist.js +157 -0
- package/dist/indexing/job-resume.d.ts +4 -0
- package/dist/indexing/job-resume.d.ts.map +1 -0
- package/dist/indexing/job-resume.js +14 -0
- package/dist/indexing/orchestrator.d.ts +3 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -0
- package/dist/indexing/orchestrator.js +1151 -0
- package/dist/indexing/types.d.ts +156 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +30 -0
- package/dist/indexing/vector-persist.d.ts +32 -0
- package/dist/indexing/vector-persist.d.ts.map +1 -0
- package/dist/indexing/vector-persist.js +105 -0
- package/dist/parsers/_internal.d.ts +20 -0
- package/dist/parsers/_internal.d.ts.map +1 -0
- package/dist/parsers/_internal.js +122 -0
- package/dist/parsers/csv-parser.d.ts +3 -0
- package/dist/parsers/csv-parser.d.ts.map +1 -0
- package/dist/parsers/csv-parser.js +202 -0
- package/dist/parsers/docx-parser.d.ts +3 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +390 -0
- package/dist/parsers/html-parser.d.ts +3 -0
- package/dist/parsers/html-parser.d.ts.map +1 -0
- package/dist/parsers/html-parser.js +310 -0
- package/dist/parsers/index.d.ts +15 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +41 -0
- package/dist/parsers/json-parser.d.ts +3 -0
- package/dist/parsers/json-parser.d.ts.map +1 -0
- package/dist/parsers/json-parser.js +192 -0
- package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
- package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
- package/dist/parsers/large-document/capability-discovery.js +76 -0
- package/dist/parsers/large-document/diagnostics.d.ts +3 -0
- package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
- package/dist/parsers/large-document/diagnostics.js +11 -0
- package/dist/parsers/large-document/index.d.ts +15 -0
- package/dist/parsers/large-document/index.d.ts.map +1 -0
- package/dist/parsers/large-document/index.js +10 -0
- package/dist/parsers/large-document/legacy-format.d.ts +5 -0
- package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
- package/dist/parsers/large-document/legacy-format.js +25 -0
- package/dist/parsers/large-document/preflight.d.ts +9 -0
- package/dist/parsers/large-document/preflight.d.ts.map +1 -0
- package/dist/parsers/large-document/preflight.js +43 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-extraction.js +123 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-pdf.js +145 -0
- package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
- package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
- package/dist/parsers/large-document/synthetic-source.js +101 -0
- package/dist/parsers/large-document/window-builder.d.ts +24 -0
- package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
- package/dist/parsers/large-document/window-builder.js +75 -0
- package/dist/parsers/ocr/index.d.ts +4 -0
- package/dist/parsers/ocr/index.d.ts.map +1 -0
- package/dist/parsers/ocr/index.js +4 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
- package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
- package/dist/parsers/ocr/types.d.ts +16 -0
- package/dist/parsers/ocr/types.d.ts.map +1 -0
- package/dist/parsers/ocr/types.js +4 -0
- package/dist/parsers/parser-test-fixtures.d.ts +28 -0
- package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
- package/dist/parsers/parser-test-fixtures.js +139 -0
- package/dist/parsers/pdf-parser.d.ts +43 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +388 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +57 -0
- package/dist/parsers/text-parser.d.ts +3 -0
- package/dist/parsers/text-parser.d.ts.map +1 -0
- package/dist/parsers/text-parser.js +214 -0
- package/dist/parsers/types.d.ts +53 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +21 -0
- package/dist/parsers/unsupported-parser.d.ts +4 -0
- package/dist/parsers/unsupported-parser.d.ts.map +1 -0
- package/dist/parsers/unsupported-parser.js +97 -0
- package/dist/parsers/xlsx-parser.d.ts +3 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +425 -0
- package/dist/privacy/audit-emitter.d.ts +5 -0
- package/dist/privacy/audit-emitter.d.ts.map +1 -0
- package/dist/privacy/audit-emitter.js +93 -0
- package/dist/privacy/diagnostic-redactor.d.ts +2 -0
- package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
- package/dist/privacy/diagnostic-redactor.js +153 -0
- package/dist/privacy/index.d.ts +5 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +6 -0
- package/dist/privacy/retention-applier.d.ts +5 -0
- package/dist/privacy/retention-applier.d.ts.map +1 -0
- package/dist/privacy/retention-applier.js +88 -0
- package/dist/privacy/types.d.ts +98 -0
- package/dist/privacy/types.d.ts.map +1 -0
- package/dist/privacy/types.js +12 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
- package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
- package/dist/qualityIntelligence/index.d.ts +3 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +5 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
- package/dist/qualityIntelligence/qiHandoff.js +82 -0
- package/dist/retrieval/answer-grounding.d.ts +9 -0
- package/dist/retrieval/answer-grounding.d.ts.map +1 -0
- package/dist/retrieval/answer-grounding.js +31 -0
- package/dist/retrieval/context-pack-assembler.d.ts +24 -0
- package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
- package/dist/retrieval/context-pack-assembler.js +50 -0
- package/dist/retrieval/index.d.ts +6 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +9 -0
- package/dist/retrieval/retrieval-runner.d.ts +10 -0
- package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
- package/dist/retrieval/retrieval-runner.js +163 -0
- package/dist/retrieval/scoped-vector-search.d.ts +24 -0
- package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
- package/dist/retrieval/scoped-vector-search.js +864 -0
- package/dist/retrieval/types.d.ts +28 -0
- package/dist/retrieval/types.d.ts.map +1 -0
- package/dist/retrieval/types.js +33 -0
- package/dist/section-path-hash.d.ts +3 -0
- package/dist/section-path-hash.d.ts.map +1 -0
- package/dist/section-path-hash.js +9 -0
- package/dist/source-lifecycle.d.ts +14 -0
- package/dist/source-lifecycle.d.ts.map +1 -0
- package/dist/source-lifecycle.js +155 -0
- package/dist/source-routing-validation.d.ts +11 -0
- package/dist/source-routing-validation.d.ts.map +1 -0
- package/dist/source-routing-validation.js +140 -0
- package/dist/store-content-cipher.d.ts +11 -0
- package/dist/store-content-cipher.d.ts.map +1 -0
- package/dist/store-content-cipher.js +67 -0
- package/dist/store-content-encryption.d.ts +12 -0
- package/dist/store-content-encryption.d.ts.map +1 -0
- package/dist/store-content-encryption.js +275 -0
- package/dist/store-paths.d.ts +6 -0
- package/dist/store-paths.d.ts.map +1 -0
- package/dist/store-paths.js +61 -0
- package/dist/store.d.ts +30 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +219 -0
- package/dist/testing.d.ts +47 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +170 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +43 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
// Test-only helpers for the discovery layer. Not exported from the package barrel —
|
|
2
|
+
// trust-8 (test-support naming) keeps production code from importing this module.
|
|
3
|
+
function toAbs(root, rel) {
|
|
4
|
+
if (rel === "")
|
|
5
|
+
return root;
|
|
6
|
+
return root.endsWith("/") ? `${root}${rel}` : `${root}/${rel}`;
|
|
7
|
+
}
|
|
8
|
+
function entriesByPrefix(root, files, dirAbs) {
|
|
9
|
+
const prefix = dirAbs === root ? `${root}/` : `${dirAbs}/`;
|
|
10
|
+
const dirNames = new Set();
|
|
11
|
+
const fileNames = new Set();
|
|
12
|
+
for (const relPath of files.keys()) {
|
|
13
|
+
const full = toAbs(root, relPath);
|
|
14
|
+
if (!full.startsWith(prefix))
|
|
15
|
+
continue;
|
|
16
|
+
const rest = full.slice(prefix.length);
|
|
17
|
+
const slash = rest.indexOf("/");
|
|
18
|
+
if (slash === -1) {
|
|
19
|
+
fileNames.add(rest);
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
dirNames.add(rest.slice(0, slash));
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
const dirs = [...dirNames].map((name) => ({
|
|
26
|
+
name,
|
|
27
|
+
isDirectory: true,
|
|
28
|
+
isFile: false,
|
|
29
|
+
isSymbolicLink: false,
|
|
30
|
+
}));
|
|
31
|
+
const filesArr = [...fileNames].map((name) => ({
|
|
32
|
+
name,
|
|
33
|
+
isDirectory: false,
|
|
34
|
+
isFile: true,
|
|
35
|
+
isSymbolicLink: false,
|
|
36
|
+
}));
|
|
37
|
+
return [...dirs, ...filesArr];
|
|
38
|
+
}
|
|
39
|
+
function buildMap(files) {
|
|
40
|
+
const encoder = new TextEncoder();
|
|
41
|
+
const map = new Map();
|
|
42
|
+
for (const f of files) {
|
|
43
|
+
const bytes = typeof f.content === "string" ? encoder.encode(f.content) : f.content;
|
|
44
|
+
const entry = {
|
|
45
|
+
content: bytes,
|
|
46
|
+
...(f.realPathOverride !== undefined ? { realPathOverride: f.realPathOverride } : {}),
|
|
47
|
+
...(f.hardLinkCount !== undefined ? { hardLinkCount: f.hardLinkCount } : {}),
|
|
48
|
+
...(f.isSymbolicLink !== undefined ? { isSymbolicLink: f.isSymbolicLink } : {}),
|
|
49
|
+
};
|
|
50
|
+
map.set(f.relativePath, entry);
|
|
51
|
+
}
|
|
52
|
+
return map;
|
|
53
|
+
}
|
|
54
|
+
function memoryStat(root, map, findKey) {
|
|
55
|
+
return (absolutePath) => {
|
|
56
|
+
const key = findKey(absolutePath);
|
|
57
|
+
if (key === undefined) {
|
|
58
|
+
const hasChildren = [...map.keys()].some((k) => toAbs(root, k).startsWith(`${absolutePath}/`));
|
|
59
|
+
if (hasChildren || absolutePath === root) {
|
|
60
|
+
return { size: 0, isFile: false, isDirectory: true, isSymbolicLink: false };
|
|
61
|
+
}
|
|
62
|
+
throw new Error(`ENOENT: ${absolutePath}`);
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
size: map.get(key)?.content.byteLength ?? 0,
|
|
66
|
+
isFile: true,
|
|
67
|
+
isDirectory: false,
|
|
68
|
+
isSymbolicLink: map.get(key)?.isSymbolicLink ?? false,
|
|
69
|
+
hardLinkCount: map.get(key)?.hardLinkCount,
|
|
70
|
+
};
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
export function memoryFs(root, files) {
|
|
74
|
+
const map = buildMap(files);
|
|
75
|
+
const findKey = (absolutePath) => {
|
|
76
|
+
for (const key of map.keys()) {
|
|
77
|
+
if (toAbs(root, key) === absolutePath)
|
|
78
|
+
return key;
|
|
79
|
+
}
|
|
80
|
+
return undefined;
|
|
81
|
+
};
|
|
82
|
+
return {
|
|
83
|
+
readFileUtf8: (absolutePath) => {
|
|
84
|
+
const key = findKey(absolutePath);
|
|
85
|
+
if (key === undefined)
|
|
86
|
+
throw new Error(`ENOENT: ${absolutePath}`);
|
|
87
|
+
return new TextDecoder("utf-8").decode(map.get(key)?.content ?? new Uint8Array());
|
|
88
|
+
},
|
|
89
|
+
stat: memoryStat(root, map, findKey),
|
|
90
|
+
readDir: (absolutePath) => entriesByPrefix(root, map, absolutePath),
|
|
91
|
+
realPath: (absolutePath) => {
|
|
92
|
+
const key = findKey(absolutePath);
|
|
93
|
+
const override = key === undefined ? undefined : map.get(key)?.realPathOverride;
|
|
94
|
+
return override ?? absolutePath;
|
|
95
|
+
},
|
|
96
|
+
exists: (absolutePath) => findKey(absolutePath) !== undefined || absolutePath === root,
|
|
97
|
+
readFileBytes: (absolutePath, maxBytes) => {
|
|
98
|
+
const key = findKey(absolutePath);
|
|
99
|
+
if (key === undefined)
|
|
100
|
+
return Promise.reject(new Error(`ENOENT: ${absolutePath}`));
|
|
101
|
+
const buf = map.get(key)?.content ?? new Uint8Array();
|
|
102
|
+
const cap = Math.max(0, Math.floor(maxBytes));
|
|
103
|
+
return Promise.resolve(buf.subarray(0, Math.min(buf.length, cap)));
|
|
104
|
+
},
|
|
105
|
+
readFileRange: (absolutePath, startByte, length) => {
|
|
106
|
+
const key = findKey(absolutePath);
|
|
107
|
+
if (key === undefined)
|
|
108
|
+
return Promise.reject(new Error(`ENOENT: ${absolutePath}`));
|
|
109
|
+
const buf = map.get(key)?.content ?? new Uint8Array();
|
|
110
|
+
const start = Math.max(0, Math.floor(startByte));
|
|
111
|
+
const cap = Math.max(0, Math.floor(length));
|
|
112
|
+
return Promise.resolve(buf.subarray(start, Math.min(buf.length, start + cap)));
|
|
113
|
+
},
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
export function folderScope(rootPath, options = {}) {
|
|
117
|
+
const base = {
|
|
118
|
+
kind: "folder",
|
|
119
|
+
rootPath,
|
|
120
|
+
recursive: options.recursive ?? true,
|
|
121
|
+
};
|
|
122
|
+
return {
|
|
123
|
+
...base,
|
|
124
|
+
...(options.includeGlobs !== undefined ? { includeGlobs: options.includeGlobs } : {}),
|
|
125
|
+
...(options.excludeGlobs !== undefined ? { excludeGlobs: options.excludeGlobs } : {}),
|
|
126
|
+
};
|
|
127
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import type { DocumentId, DocumentRecord, KnowledgeCapsuleId, KnowledgeSourceId, ParserDiagnostic } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
export interface DiscoveredFile {
|
|
3
|
+
readonly relativePath: string;
|
|
4
|
+
readonly sizeBytes: number;
|
|
5
|
+
}
|
|
6
|
+
export interface DiscoveryOptions {
|
|
7
|
+
readonly maxDepth: number;
|
|
8
|
+
readonly maxFiles: number;
|
|
9
|
+
readonly signal?: AbortSignal;
|
|
10
|
+
}
|
|
11
|
+
export declare const DEFAULT_DISCOVERY_OPTIONS: DiscoveryOptions;
|
|
12
|
+
export type DiscoveryErrorCode = "PATH_ESCAPE" | "READ_FAILED" | "OVERSIZED_FILE" | "UNSUPPORTED_FORMAT" | "CANCELLED" | "MALFORMED_INPUT" | "PARSER_TIMEOUT" | "PARSER_FAILED" | "STAT_FAILED" | "INVALID_SCOPE";
|
|
13
|
+
export interface DiscoveryError {
|
|
14
|
+
readonly code: DiscoveryErrorCode;
|
|
15
|
+
readonly message: string;
|
|
16
|
+
readonly relativePath?: string;
|
|
17
|
+
}
|
|
18
|
+
export type ExtractionOutcome = {
|
|
19
|
+
readonly kind: "persisted";
|
|
20
|
+
readonly document: DocumentRecord;
|
|
21
|
+
} | {
|
|
22
|
+
readonly kind: "skipped";
|
|
23
|
+
readonly document: DocumentRecord;
|
|
24
|
+
readonly reason: "unchanged";
|
|
25
|
+
} | {
|
|
26
|
+
readonly kind: "failed";
|
|
27
|
+
readonly document: DocumentRecord;
|
|
28
|
+
readonly error: DiscoveryError;
|
|
29
|
+
};
|
|
30
|
+
export interface ExtractionResult {
|
|
31
|
+
readonly capsuleId: KnowledgeCapsuleId;
|
|
32
|
+
readonly sourceId: KnowledgeSourceId;
|
|
33
|
+
readonly relativePath: string;
|
|
34
|
+
readonly outcome: ExtractionOutcome;
|
|
35
|
+
readonly diagnostics: readonly ParserDiagnostic[];
|
|
36
|
+
}
|
|
37
|
+
export type ExtractionEvent = {
|
|
38
|
+
readonly kind: "file-discovered";
|
|
39
|
+
readonly relativePath: string;
|
|
40
|
+
readonly sizeBytes: number;
|
|
41
|
+
} | {
|
|
42
|
+
readonly kind: "file-extracted";
|
|
43
|
+
readonly result: ExtractionResult;
|
|
44
|
+
} | {
|
|
45
|
+
readonly kind: "scope-error";
|
|
46
|
+
readonly error: DiscoveryError;
|
|
47
|
+
} | {
|
|
48
|
+
readonly kind: "cancelled";
|
|
49
|
+
readonly reason: string;
|
|
50
|
+
} | {
|
|
51
|
+
readonly kind: "completed";
|
|
52
|
+
readonly totalDiscovered: number;
|
|
53
|
+
readonly totalExtracted: number;
|
|
54
|
+
readonly totalSkipped: number;
|
|
55
|
+
readonly totalFailed: number;
|
|
56
|
+
};
|
|
57
|
+
export interface DocumentIdSource {
|
|
58
|
+
readonly capsuleId: KnowledgeCapsuleId;
|
|
59
|
+
readonly sourceId: KnowledgeSourceId;
|
|
60
|
+
readonly relativePath: string;
|
|
61
|
+
}
|
|
62
|
+
export declare function documentIdFor(input: DocumentIdSource): DocumentId;
|
|
63
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/discovery/types.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EACV,UAAU,EACV,cAAc,EACd,kBAAkB,EAClB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,+BAA+B,CAAC;AAGvC,MAAM,WAAW,cAAc;IAE7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAE9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAGD,MAAM,WAAW,gBAAgB;IAG/B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAG1B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;IAG1B,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;CAC/B;AAED,eAAO,MAAM,yBAAyB,EAAE,gBAG9B,CAAC;AAOX,MAAM,MAAM,kBAAkB,GAC1B,aAAa,GACb,aAAa,GACb,gBAAgB,GAChB,oBAAoB,GACpB,WAAW,GACX,iBAAiB,GACjB,gBAAgB,GAChB,eAAe,GACf,aAAa,GACb,eAAe,CAAC;AAEpB,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,IAAI,EAAE,kBAAkB,CAAC;IAClC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAGD,MAAM,MAAM,iBAAiB,GAIzB;IAAE,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAAC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAA;CAAE,GAEjE;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IAAC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAAC,QAAQ,CAAC,MAAM,EAAE,WAAW,CAAA;CAAE,GAE7F;IAAE,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IAAC,QAAQ,CAAC,QAAQ,EAAE,cAAc,CAAC;IAAC,QAAQ,CAAC,KAAK,EAAE,cAAc,CAAA;CAAE,CAAC;AAEnG,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAC;IACvC,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IACrC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,OAAO,EAAE,iBAAiB,CAAC;IACpC,QAAQ,CAAC,WAAW,EAAE,SAAS,gBAAgB,EAAE,CAAC;CACnD;AAGD,MAAM,MAAM,eAAe,GACvB;IACE,QAAQ,CAAC,IAAI,EAAE,iBAAiB,CAAC;IACjC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B,GACD;IACE,QAAQ,CAAC,IAAI,EAAE,gBAAgB,CAAC;IAChC,QAAQ,CAAC,MAAM,EAAE,gBAAgB,CAAC;CACnC,GACD;IACE,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;IAC7B,QAAQ,CAAC,KAAK,EAAE,cAAc,CAAC;CAChC,GACD;IACE,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAC3B,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;CACzB,GACD;IACE,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC;IAC3B,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B,CAAC;AAIN,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAC;IACvC,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IACrC,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CAC/B;AAaD,wBAAgB,aAAa,CAAC,KAAK,EAAE,gBAAgB,GAAG,UAAU,CAKjE"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
// Discovery + extraction types for the Local Knowledge Connector (Epic #189, Issue #194).
|
|
2
|
+
// The discovery layer is the bridge between a `KnowledgeSource` and a `KnowledgeStore`:
|
|
3
|
+
// it walks files in scope via the workspace `WorkspaceFs` port, hands each file to the
|
|
4
|
+
// parser registry, and persists the document/page/section/parsed-unit/diagnostic rows.
|
|
5
|
+
// Chunks and vectors are NOT written here — those are #195 (chunking) and #196 (indexing).
|
|
6
|
+
//
|
|
7
|
+
// `DiscoveryError.code` is a closed string union so downstream UI surfaces and the eventual
|
|
8
|
+
// audit ledger entries can branch on stable identifiers rather than free-form messages.
|
|
9
|
+
export const DEFAULT_DISCOVERY_OPTIONS = {
|
|
10
|
+
maxDepth: 12,
|
|
11
|
+
maxFiles: 5_000,
|
|
12
|
+
};
|
|
13
|
+
// Branded helper so callers (extract.ts) don't need to repeat the cast. The returned id is
|
|
14
|
+
// deterministic in `(capsuleId, sourceId, relativePath)` so a re-walk of the same scope
|
|
15
|
+
// targets the SAME `documents` row — that's the lineage anchor for incremental updates.
|
|
16
|
+
//
|
|
17
|
+
// Delimiter characters are percent-escaped before embedding so that the three tuple members
|
|
18
|
+
// stay injective even when source ids or paths contain `:`, `%`, or `#`. Ordinary paths keep
|
|
19
|
+
// their historical ids; only collision-capable characters are escaped.
|
|
20
|
+
function encodeDocumentIdComponent(value) {
|
|
21
|
+
return value.replace(/%/g, "%25").replace(/#/g, "%23").replace(/:/g, "%3A");
|
|
22
|
+
}
|
|
23
|
+
export function documentIdFor(input) {
|
|
24
|
+
const encodedCapsuleId = encodeDocumentIdComponent(String(input.capsuleId));
|
|
25
|
+
const encodedSourceId = encodeDocumentIdComponent(String(input.sourceId));
|
|
26
|
+
const encodedPath = encodeDocumentIdComponent(input.relativePath);
|
|
27
|
+
return `doc:${encodedCapsuleId}:${encodedSourceId}:${encodedPath}`;
|
|
28
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { KnowledgeSourceScope } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { WorkspaceFs } from "@oscharko-dev/keiko-workspace";
|
|
3
|
+
import { type DiscoveredFile, type DiscoveryError, type DiscoveryOptions } from "./types.js";
|
|
4
|
+
export type WalkYield = {
|
|
5
|
+
readonly kind: "file";
|
|
6
|
+
readonly file: DiscoveredFile;
|
|
7
|
+
} | {
|
|
8
|
+
readonly kind: "error";
|
|
9
|
+
readonly error: DiscoveryError;
|
|
10
|
+
};
|
|
11
|
+
export declare function walkSource(fs: WorkspaceFs, scope: KnowledgeSourceScope, options?: DiscoveryOptions): Generator<WalkYield>;
|
|
12
|
+
//# sourceMappingURL=walk.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"walk.d.ts","sourceRoot":"","sources":["../../src/discovery/walk.ts"],"names":[],"mappings":"AAmBA,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,+BAA+B,CAAC;AAE1E,OAAO,KAAK,EAAE,WAAW,EAAiB,MAAM,+BAA+B,CAAC;AAIhF,OAAO,EAEL,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACtB,MAAM,YAAY,CAAC;AAMpB,MAAM,MAAM,SAAS,GACjB;IAAE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAA;CAAE,GACxD;IAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IAAC,QAAQ,CAAC,KAAK,EAAE,cAAc,CAAA;CAAE,CAAC;AAqT/D,wBAAiB,UAAU,CACzB,EAAE,EAAE,WAAW,EACf,KAAK,EAAE,oBAAoB,EAC3B,OAAO,GAAE,gBAA4C,GACpD,SAAS,CAAC,SAAS,CAAC,CAoBtB"}
|
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
// Scope walker (Epic #189, Issue #194). Given a KnowledgeSourceScope and a WorkspaceFs port,
|
|
2
|
+
// yields each in-scope file as a `DiscoveredFile`. Boundary guarantees, in order of check:
|
|
3
|
+
//
|
|
4
|
+
// 1. The scope's rootPath is rejected when it fails the contract validator's safe-path
|
|
5
|
+
// gate (NUL, `..`, root markers, tilde, Windows drive prefix).
|
|
6
|
+
// 2. Every yielded file's `relativePath` joined to the scope root resolves via
|
|
7
|
+
// `WorkspaceFs.realPath` to a path STILL UNDER the scope root. A symlink whose
|
|
8
|
+
// realpath escapes the root is dropped and reported via `walkSource`'s second yield
|
|
9
|
+
// channel (an InvalidEntry record) rather than thrown — that lets the caller log a
|
|
10
|
+
// `PATH_ESCAPE` diagnostic against the file instead of aborting the whole walk.
|
|
11
|
+
// 3. The workspace deny list is enforced on every discovered descendant, including explicit
|
|
12
|
+
// `files` scopes. Hidden/generated-directory opt-in never relaxes the security deny list.
|
|
13
|
+
// 4. Include/exclude globs are applied on the workspace-relative POSIX path; exclude
|
|
14
|
+
// wins on overlap.
|
|
15
|
+
// 5. AbortSignal is checked at every directory boundary.
|
|
16
|
+
//
|
|
17
|
+
// Returns an async iterable of `WalkYield` values. The walker is otherwise PURE — no
|
|
18
|
+
// clock reads, no randomness — and the WorkspaceFs port is the only IO surface.
|
|
19
|
+
import { isSafeScopePath } from "@oscharko-dev/keiko-contracts";
|
|
20
|
+
import { isDenied } from "@oscharko-dev/keiko-workspace";
|
|
21
|
+
import { compileGlobList, matchesAny } from "./glob.js";
|
|
22
|
+
import { DEFAULT_DISCOVERY_OPTIONS, } from "./types.js";
|
|
23
|
+
// On Windows, WorkspaceFs.realPath() may return backslash-separated paths
|
|
24
|
+
// (e.g. C:\Users\workspace\file). Normalise both sides to forward slashes so
|
|
25
|
+
// containment checks and relative-path derivation work cross-platform.
|
|
26
|
+
function normaliseSep(p) {
|
|
27
|
+
return p.replace(/\\/g, "/");
|
|
28
|
+
}
|
|
29
|
+
function toPosixRelative(absoluteRoot, absolutePath) {
|
|
30
|
+
const normRoot = normaliseSep(absoluteRoot);
|
|
31
|
+
const normPath = normaliseSep(absolutePath);
|
|
32
|
+
if (normPath === normRoot) {
|
|
33
|
+
return "";
|
|
34
|
+
}
|
|
35
|
+
const prefix = normRoot.endsWith("/") ? normRoot : `${normRoot}/`;
|
|
36
|
+
if (normPath.startsWith(prefix)) {
|
|
37
|
+
return normPath.slice(prefix.length);
|
|
38
|
+
}
|
|
39
|
+
return normPath;
|
|
40
|
+
}
|
|
41
|
+
function isContained(absoluteRoot, absolutePath) {
|
|
42
|
+
const normRoot = normaliseSep(absoluteRoot);
|
|
43
|
+
const normPath = normaliseSep(absolutePath);
|
|
44
|
+
if (normPath === normRoot) {
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
const prefix = normRoot.endsWith("/") ? normRoot : `${normRoot}/`;
|
|
48
|
+
return normPath.startsWith(prefix);
|
|
49
|
+
}
|
|
50
|
+
function joinAbs(root, name) {
|
|
51
|
+
if (root.endsWith("/")) {
|
|
52
|
+
return `${root}${name}`;
|
|
53
|
+
}
|
|
54
|
+
return `${root}/${name}`;
|
|
55
|
+
}
|
|
56
|
+
function deriveScopeBounds(scope) {
|
|
57
|
+
if (scope.kind === "folder") {
|
|
58
|
+
if (!isSafeScopePath(scope.rootPath)) {
|
|
59
|
+
return { code: "INVALID_SCOPE", message: "scope.rootPath failed the safe-path gate" };
|
|
60
|
+
}
|
|
61
|
+
return {
|
|
62
|
+
rootPath: scope.rootPath,
|
|
63
|
+
recursive: scope.recursive,
|
|
64
|
+
includeGlobs: compileGlobList(scope.includeGlobs),
|
|
65
|
+
excludeGlobs: compileGlobList(scope.excludeGlobs),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
if (scope.kind === "repository") {
|
|
69
|
+
if (!isSafeScopePath(scope.repositoryRoot)) {
|
|
70
|
+
return {
|
|
71
|
+
code: "INVALID_SCOPE",
|
|
72
|
+
message: "scope.repositoryRoot failed the safe-path gate",
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
return {
|
|
76
|
+
rootPath: scope.repositoryRoot,
|
|
77
|
+
recursive: true,
|
|
78
|
+
includeGlobs: compileGlobList(scope.includeGlobs),
|
|
79
|
+
excludeGlobs: compileGlobList(scope.excludeGlobs),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
if (!isSafeScopePath(scope.rootPath)) {
|
|
83
|
+
return { code: "INVALID_SCOPE", message: "scope.rootPath failed the safe-path gate" };
|
|
84
|
+
}
|
|
85
|
+
for (const entry of scope.files) {
|
|
86
|
+
if (!isSafeScopePath(entry)) {
|
|
87
|
+
return {
|
|
88
|
+
code: "INVALID_SCOPE",
|
|
89
|
+
message: `scope.files entry failed the safe-path gate: ${entry}`,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
// `files` scope has no glob support — every entry is explicit. We still respect the
|
|
94
|
+
// realpath containment gate inside walkSource so a malicious symlink is rejected.
|
|
95
|
+
return {
|
|
96
|
+
rootPath: scope.rootPath,
|
|
97
|
+
recursive: false,
|
|
98
|
+
includeGlobs: [],
|
|
99
|
+
excludeGlobs: [],
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
function abortYield() {
|
|
103
|
+
return {
|
|
104
|
+
kind: "error",
|
|
105
|
+
error: { code: "CANCELLED", message: "walk cancelled by caller" },
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
function safeStatFile(fs, absolutePath, realPath, relativePath) {
|
|
109
|
+
try {
|
|
110
|
+
const requestedStats = fs.stat(absolutePath);
|
|
111
|
+
if (requestedStats.hardLinkCount !== undefined && requestedStats.hardLinkCount > 1) {
|
|
112
|
+
return {
|
|
113
|
+
code: "READ_FAILED",
|
|
114
|
+
message: "selected file is not eligible for extraction",
|
|
115
|
+
relativePath,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
// Some WorkspaceFs fakes only stat the canonical realPath shape (not the mixed-separator
|
|
121
|
+
// requested path). Fall through to stat the resolved path below.
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
const realStats = fs.stat(realPath);
|
|
125
|
+
if (!realStats.isFile) {
|
|
126
|
+
return undefined;
|
|
127
|
+
}
|
|
128
|
+
if (realStats.hardLinkCount !== undefined && realStats.hardLinkCount > 1) {
|
|
129
|
+
return {
|
|
130
|
+
code: "READ_FAILED",
|
|
131
|
+
message: "selected file is not eligible for extraction",
|
|
132
|
+
relativePath,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
return realStats;
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
return undefined;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
function safeRealPath(fs, absolutePath) {
|
|
142
|
+
try {
|
|
143
|
+
return fs.realPath(absolutePath);
|
|
144
|
+
}
|
|
145
|
+
catch {
|
|
146
|
+
return undefined;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const HIDDEN_OR_GENERATED_DIRS = new Set([
|
|
150
|
+
".git",
|
|
151
|
+
".hg",
|
|
152
|
+
".svn",
|
|
153
|
+
".next",
|
|
154
|
+
".turbo",
|
|
155
|
+
"node_modules",
|
|
156
|
+
"dist",
|
|
157
|
+
"build",
|
|
158
|
+
"coverage",
|
|
159
|
+
"out",
|
|
160
|
+
]);
|
|
161
|
+
function isGlobMatched(bounds, relativePath) {
|
|
162
|
+
// Exclude wins over include. An empty includeGlobs means "include everything"; an empty
|
|
163
|
+
// excludeGlobs means "exclude nothing".
|
|
164
|
+
if (matchesAny(bounds.excludeGlobs, relativePath, false)) {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
return matchesAny(bounds.includeGlobs, relativePath, true);
|
|
168
|
+
}
|
|
169
|
+
function shouldDescendIntoDirectory(entryName) {
|
|
170
|
+
return !entryName.startsWith(".") && !HIDDEN_OR_GENERATED_DIRS.has(entryName);
|
|
171
|
+
}
|
|
172
|
+
function shouldSkipDirectoryEntry(ctx, entryName) {
|
|
173
|
+
return !ctx.bounds.recursive || !shouldDescendIntoDirectory(entryName);
|
|
174
|
+
}
|
|
175
|
+
function isDeniedRelativePath(relativePath) {
|
|
176
|
+
return isDenied(relativePath);
|
|
177
|
+
}
|
|
178
|
+
function* yieldFileIfAllowed(ctx, absolutePath, relativePath) {
|
|
179
|
+
if (isDeniedRelativePath(relativePath)) {
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
// realpath containment gate (boundary). Skip the entry entirely on failure rather than
|
|
183
|
+
// yielding a misleading diagnostic — the entry might be a transient broken symlink.
|
|
184
|
+
const real = safeRealPath(ctx.fs, absolutePath);
|
|
185
|
+
if (real === undefined) {
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
if (!isContained(ctx.realRootPath, real)) {
|
|
189
|
+
yield {
|
|
190
|
+
kind: "error",
|
|
191
|
+
error: {
|
|
192
|
+
code: "PATH_ESCAPE",
|
|
193
|
+
message: `entry escapes the scope root via realpath: ${relativePath}`,
|
|
194
|
+
relativePath,
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
const realRel = toPosixRelative(ctx.realRootPath, real);
|
|
200
|
+
if (isDeniedRelativePath(realRel)) {
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
if (!isGlobMatched(ctx.bounds, relativePath)) {
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
const stat = safeStatFile(ctx.fs, absolutePath, real, relativePath);
|
|
207
|
+
if (stat === undefined) {
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
if ("code" in stat) {
|
|
211
|
+
yield { kind: "error", error: stat };
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
ctx.filesYielded += 1;
|
|
215
|
+
yield { kind: "file", file: { relativePath, sizeBytes: stat.size } };
|
|
216
|
+
}
|
|
217
|
+
function safeReadDir(fs, absolutePath) {
|
|
218
|
+
try {
|
|
219
|
+
return fs.readDir(absolutePath);
|
|
220
|
+
}
|
|
221
|
+
catch {
|
|
222
|
+
return [];
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// Read `signal?.aborted` through a function call so TypeScript control-flow analysis
|
|
226
|
+
// does NOT narrow the optional chain after the first false branch — a long iteration may
|
|
227
|
+
// observe abort between any two checks.
|
|
228
|
+
function isAborted(ctx) {
|
|
229
|
+
return ctx.options.signal?.aborted === true;
|
|
230
|
+
}
|
|
231
|
+
function* yieldDirectoryEntry(ctx, absoluteDir, entry, depth) {
|
|
232
|
+
const childAbs = joinAbs(absoluteDir, entry.name);
|
|
233
|
+
const childRel = toPosixRelative(ctx.bounds.rootPath, childAbs);
|
|
234
|
+
if (entry.isDirectory) {
|
|
235
|
+
if (isDeniedRelativePath(childRel))
|
|
236
|
+
return;
|
|
237
|
+
if (shouldSkipDirectoryEntry(ctx, entry.name))
|
|
238
|
+
return;
|
|
239
|
+
yield* descend(ctx, childAbs, depth + 1);
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
if (entry.isFile) {
|
|
243
|
+
yield* yieldFileIfAllowed(ctx, childAbs, childRel);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
function* descend(ctx, absoluteDir, depth) {
|
|
247
|
+
if (isAborted(ctx)) {
|
|
248
|
+
yield abortYield();
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
if (ctx.filesYielded >= ctx.options.maxFiles) {
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
if (depth > ctx.options.maxDepth) {
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
const entries = [...safeReadDir(ctx.fs, absoluteDir)].sort((a, b) => (a.name < b.name ? -1 : 1));
|
|
258
|
+
for (const entry of entries) {
|
|
259
|
+
if (ctx.filesYielded >= ctx.options.maxFiles) {
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
if (isAborted(ctx)) {
|
|
263
|
+
yield abortYield();
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
yield* yieldDirectoryEntry(ctx, absoluteDir, entry, depth);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
function* walkFilesScope(ctx, files) {
|
|
270
|
+
for (const rel of files) {
|
|
271
|
+
if (isAborted(ctx)) {
|
|
272
|
+
yield abortYield();
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
if (ctx.filesYielded >= ctx.options.maxFiles) {
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
const abs = joinAbs(ctx.bounds.rootPath, rel);
|
|
279
|
+
yield* yieldFileIfAllowed(ctx, abs, rel);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
export function* walkSource(fs, scope, options = DEFAULT_DISCOVERY_OPTIONS) {
|
|
283
|
+
const bounds = deriveScopeBounds(scope);
|
|
284
|
+
if ("code" in bounds) {
|
|
285
|
+
yield { kind: "error", error: bounds };
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
const realRootPath = safeRealPath(fs, bounds.rootPath);
|
|
289
|
+
if (realRootPath === undefined) {
|
|
290
|
+
yield {
|
|
291
|
+
kind: "error",
|
|
292
|
+
error: { code: "READ_FAILED", message: "realPath failed for selected source root" },
|
|
293
|
+
};
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
const ctx = { fs, bounds, realRootPath, options, filesYielded: 0 };
|
|
297
|
+
if (scope.kind === "files") {
|
|
298
|
+
yield* walkFilesScope(ctx, scope.files);
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
yield* descend(ctx, bounds.rootPath, 0);
|
|
302
|
+
}
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export declare class KnowledgeStoreError extends Error {
|
|
2
|
+
readonly name: string;
|
|
3
|
+
constructor(message: string, options?: {
|
|
4
|
+
readonly cause?: unknown;
|
|
5
|
+
});
|
|
6
|
+
}
|
|
7
|
+
export declare class KnowledgePathError extends KnowledgeStoreError {
|
|
8
|
+
readonly name: string;
|
|
9
|
+
}
|
|
10
|
+
export declare class KnowledgeNotFoundError extends KnowledgeStoreError {
|
|
11
|
+
readonly name: string;
|
|
12
|
+
}
|
|
13
|
+
//# sourceMappingURL=errors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"errors.d.ts","sourceRoot":"","sources":["../src/errors.ts"],"names":[],"mappings":"AAIA,qBAAa,mBAAoB,SAAQ,KAAK;IAC5C,SAAyB,IAAI,EAAE,MAAM,CAAyB;gBAC3C,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAK3E;AAKD,qBAAa,kBAAmB,SAAQ,mBAAmB;IACzD,SAAyB,IAAI,EAAE,MAAM,CAAwB;CAC9D;AAID,qBAAa,sBAAuB,SAAQ,mBAAmB;IAC7D,SAAyB,IAAI,EAAE,MAAM,CAA4B;CAClE"}
|
package/dist/errors.js
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// Typed errors for the local-knowledge runtime. All store APIs raise these so callers can
|
|
2
|
+
// distinguish path violations (caller bug) from missing rows (expected miss) from internal
|
|
3
|
+
// SQLite failures (corrupted state) without string-sniffing the message.
|
|
4
|
+
export class KnowledgeStoreError extends Error {
|
|
5
|
+
name = "KnowledgeStoreError";
|
|
6
|
+
constructor(message, options) {
|
|
7
|
+
// Forward `cause` only when present so we preserve the original stack from a wrapped
|
|
8
|
+
// SQLite/IO error without polluting the message of the higher-level error.
|
|
9
|
+
super(message, options);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
// Raised by store-paths.ts for any input that would let the resolved on-disk path escape the
|
|
13
|
+
// caller-provided runtime-state directory. Distinct from KnowledgeStoreError so a test for
|
|
14
|
+
// "path containment fail-closed" cannot accidentally accept any other error class.
|
|
15
|
+
export class KnowledgePathError extends KnowledgeStoreError {
|
|
16
|
+
name = "KnowledgePathError";
|
|
17
|
+
}
|
|
18
|
+
// Raised when a CRUD API targets a row that does not exist. Distinct so that retry/idempotency
|
|
19
|
+
// logic can branch on missing-row without catching unrelated failures.
|
|
20
|
+
export class KnowledgeNotFoundError extends KnowledgeStoreError {
|
|
21
|
+
name = "KnowledgeNotFoundError";
|
|
22
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { ChunkId, KnowledgeCapsuleId, RetrievalReference } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { ParsedUnit } from "@oscharko-dev/keiko-contracts";
|
|
3
|
+
export declare function scoreRecall(returned: readonly RetrievalReference[], expected: readonly ChunkId[]): number;
|
|
4
|
+
export declare function scorePrecision(returned: readonly RetrievalReference[], expected: readonly ChunkId[]): number;
|
|
5
|
+
export declare function scoreMeanReciprocalRank(returned: readonly RetrievalReference[], expected: readonly ChunkId[]): number;
|
|
6
|
+
export declare function scoreNdcg(returned: readonly RetrievalReference[], expected: readonly ChunkId[]): number;
|
|
7
|
+
export declare function scoreSourceIsolation(returned: readonly RetrievalReference[], scopeCapsuleIds: readonly KnowledgeCapsuleId[]): number;
|
|
8
|
+
type CitationRequirementKey = "page" | "section" | "json-path" | "csv-row" | "html-block" | "unsupported-media";
|
|
9
|
+
export declare function scoreCitationQuality(references: readonly RetrievalReference[], chunkUnitKinds: ReadonlyMap<string, CitationRequirementKey>): number;
|
|
10
|
+
export declare function citationRequirementForUnit(unit: ParsedUnit): CitationRequirementKey;
|
|
11
|
+
export type { CitationRequirementKey };
|
|
12
|
+
export declare function scoreNoEvidenceAccuracy(actualNoEvidence: boolean, expectedNoEvidence: boolean, actualReason?: string, expectedReason?: string): 0 | 1;
|
|
13
|
+
export declare function scoreContextBudgetFit(references: readonly RetrievalReference[], chunkTokenCounts: ReadonlyMap<string, number>, budgetTokens: number | undefined): number;
|
|
14
|
+
//# sourceMappingURL=dimensions.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dimensions.d.ts","sourceRoot":"","sources":["../../src/evaluations/dimensions.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EACV,OAAO,EACP,kBAAkB,EAClB,kBAAkB,EACnB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,+BAA+B,CAAC;AAMhE,wBAAgB,WAAW,CACzB,QAAQ,EAAE,SAAS,kBAAkB,EAAE,EACvC,QAAQ,EAAE,SAAS,OAAO,EAAE,GAC3B,MAAM,CASR;AAKD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,SAAS,kBAAkB,EAAE,EACvC,QAAQ,EAAE,SAAS,OAAO,EAAE,GAC3B,MAAM,CASR;AAMD,wBAAgB,uBAAuB,CACrC,QAAQ,EAAE,SAAS,kBAAkB,EAAE,EACvC,QAAQ,EAAE,SAAS,OAAO,EAAE,GAC3B,MAAM,CAUR;AAQD,wBAAgB,SAAS,CACvB,QAAQ,EAAE,SAAS,kBAAkB,EAAE,EACvC,QAAQ,EAAE,SAAS,OAAO,EAAE,GAC3B,MAAM,CAiBR;AASD,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,SAAS,kBAAkB,EAAE,EACvC,eAAe,EAAE,SAAS,kBAAkB,EAAE,GAC7C,MAAM,CAQR;AAcD,KAAK,sBAAsB,GACvB,MAAM,GACN,SAAS,GACT,WAAW,GACX,SAAS,GACT,YAAY,GACZ,mBAAmB,CAAC;AAyCxB,wBAAgB,oBAAoB,CAClC,UAAU,EAAE,SAAS,kBAAkB,EAAE,EACzC,cAAc,EAAE,WAAW,CAAC,MAAM,EAAE,sBAAsB,CAAC,GAC1D,MAAM,CAeR;AAID,wBAAgB,0BAA0B,CAAC,IAAI,EAAE,UAAU,GAAG,sBAAsB,CAEnF;AAED,YAAY,EAAE,sBAAsB,EAAE,CAAC;AAOvC,wBAAgB,uBAAuB,CACrC,gBAAgB,EAAE,OAAO,EACzB,kBAAkB,EAAE,OAAO,EAC3B,YAAY,CAAC,EAAE,MAAM,EACrB,cAAc,CAAC,EAAE,MAAM,GACtB,CAAC,GAAG,CAAC,CAMP;AAQD,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,SAAS,kBAAkB,EAAE,EACzC,gBAAgB,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,EAC7C,YAAY,EAAE,MAAM,GAAG,SAAS,GAC/B,MAAM,CAWR"}
|