@oscharko-dev/keiko-local-knowledge 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bounded-document-extraction.d.ts +27 -0
- package/dist/bounded-document-extraction.d.ts.map +1 -0
- package/dist/bounded-document-extraction.js +214 -0
- package/dist/capsule-lifecycle.d.ts +33 -0
- package/dist/capsule-lifecycle.d.ts.map +1 -0
- package/dist/capsule-lifecycle.js +292 -0
- package/dist/capsule-set-lifecycle.d.ts +15 -0
- package/dist/capsule-set-lifecycle.d.ts.map +1 -0
- package/dist/capsule-set-lifecycle.js +158 -0
- package/dist/chunking/chunker-persist.d.ts +36 -0
- package/dist/chunking/chunker-persist.d.ts.map +1 -0
- package/dist/chunking/chunker-persist.js +74 -0
- package/dist/chunking/chunker-runner.d.ts +9 -0
- package/dist/chunking/chunker-runner.d.ts.map +1 -0
- package/dist/chunking/chunker-runner.js +218 -0
- package/dist/chunking/chunker.d.ts +7 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +139 -0
- package/dist/chunking/citation-mapper.d.ts +4 -0
- package/dist/chunking/citation-mapper.d.ts.map +1 -0
- package/dist/chunking/citation-mapper.js +180 -0
- package/dist/chunking/index.d.ts +6 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/token-estimator.d.ts +3 -0
- package/dist/chunking/token-estimator.d.ts.map +1 -0
- package/dist/chunking/token-estimator.js +26 -0
- package/dist/chunking/types.d.ts +49 -0
- package/dist/chunking/types.d.ts.map +1 -0
- package/dist/chunking/types.js +26 -0
- package/dist/composition.d.ts +57 -0
- package/dist/composition.d.ts.map +1 -0
- package/dist/composition.js +310 -0
- package/dist/conversation/citation-attacher.d.ts +8 -0
- package/dist/conversation/citation-attacher.d.ts.map +1 -0
- package/dist/conversation/citation-attacher.js +55 -0
- package/dist/conversation/citation-excerpts.d.ts +4 -0
- package/dist/conversation/citation-excerpts.d.ts.map +1 -0
- package/dist/conversation/citation-excerpts.js +41 -0
- package/dist/conversation/grounded-answer-runner.d.ts +9 -0
- package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
- package/dist/conversation/grounded-answer-runner.js +61 -0
- package/dist/conversation/index.d.ts +5 -0
- package/dist/conversation/index.d.ts.map +1 -0
- package/dist/conversation/index.js +7 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
- package/dist/conversation/model-gateway-answer-generator.js +105 -0
- package/dist/conversation/types.d.ts +35 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +24 -0
- package/dist/discovery/discovery-runner.d.ts +23 -0
- package/dist/discovery/discovery-runner.d.ts.map +1 -0
- package/dist/discovery/discovery-runner.js +109 -0
- package/dist/discovery/extract-progressive.d.ts +17 -0
- package/dist/discovery/extract-progressive.d.ts.map +1 -0
- package/dist/discovery/extract-progressive.js +522 -0
- package/dist/discovery/extract.d.ts +26 -0
- package/dist/discovery/extract.d.ts.map +1 -0
- package/dist/discovery/extract.js +906 -0
- package/dist/discovery/glob.d.ts +10 -0
- package/dist/discovery/glob.d.ts.map +1 -0
- package/dist/discovery/glob.js +72 -0
- package/dist/discovery/index.d.ts +6 -0
- package/dist/discovery/index.d.ts.map +1 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/media-type.d.ts +4 -0
- package/dist/discovery/media-type.d.ts.map +1 -0
- package/dist/discovery/media-type.js +62 -0
- package/dist/discovery/persist.d.ts +63 -0
- package/dist/discovery/persist.d.ts.map +1 -0
- package/dist/discovery/persist.js +345 -0
- package/dist/discovery/test-support.d.ts +16 -0
- package/dist/discovery/test-support.d.ts.map +1 -0
- package/dist/discovery/test-support.js +127 -0
- package/dist/discovery/types.d.ts +63 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +28 -0
- package/dist/discovery/walk.d.ts +12 -0
- package/dist/discovery/walk.d.ts.map +1 -0
- package/dist/discovery/walk.js +302 -0
- package/dist/errors.d.ts +13 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +22 -0
- package/dist/evaluations/dimensions.d.ts +14 -0
- package/dist/evaluations/dimensions.d.ts.map +1 -0
- package/dist/evaluations/dimensions.js +191 -0
- package/dist/evaluations/fixtures.d.ts +18 -0
- package/dist/evaluations/fixtures.d.ts.map +1 -0
- package/dist/evaluations/fixtures.js +858 -0
- package/dist/evaluations/index.d.ts +7 -0
- package/dist/evaluations/index.d.ts.map +1 -0
- package/dist/evaluations/index.js +10 -0
- package/dist/evaluations/report.d.ts +3 -0
- package/dist/evaluations/report.d.ts.map +1 -0
- package/dist/evaluations/report.js +31 -0
- package/dist/evaluations/runner-seed.d.ts +12 -0
- package/dist/evaluations/runner-seed.d.ts.map +1 -0
- package/dist/evaluations/runner-seed.js +175 -0
- package/dist/evaluations/runner.d.ts +8 -0
- package/dist/evaluations/runner.d.ts.map +1 -0
- package/dist/evaluations/runner.js +205 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
- package/dist/evaluations/scripted-embedding-adapter.js +163 -0
- package/dist/evaluations/types.d.ts +116 -0
- package/dist/evaluations/types.d.ts.map +1 -0
- package/dist/evaluations/types.js +27 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/indexing/bounded-indexing.d.ts +41 -0
- package/dist/indexing/bounded-indexing.d.ts.map +1 -0
- package/dist/indexing/bounded-indexing.js +240 -0
- package/dist/indexing/checkpoint-persist.d.ts +8 -0
- package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
- package/dist/indexing/checkpoint-persist.js +135 -0
- package/dist/indexing/checkpoint-resume.d.ts +20 -0
- package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
- package/dist/indexing/checkpoint-resume.js +50 -0
- package/dist/indexing/embedding-batcher.d.ts +3 -0
- package/dist/indexing/embedding-batcher.d.ts.map +1 -0
- package/dist/indexing/embedding-batcher.js +390 -0
- package/dist/indexing/index.d.ts +7 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +11 -0
- package/dist/indexing/job-persist.d.ts +46 -0
- package/dist/indexing/job-persist.d.ts.map +1 -0
- package/dist/indexing/job-persist.js +157 -0
- package/dist/indexing/job-resume.d.ts +4 -0
- package/dist/indexing/job-resume.d.ts.map +1 -0
- package/dist/indexing/job-resume.js +14 -0
- package/dist/indexing/orchestrator.d.ts +3 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -0
- package/dist/indexing/orchestrator.js +1151 -0
- package/dist/indexing/types.d.ts +156 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +30 -0
- package/dist/indexing/vector-persist.d.ts +32 -0
- package/dist/indexing/vector-persist.d.ts.map +1 -0
- package/dist/indexing/vector-persist.js +105 -0
- package/dist/parsers/_internal.d.ts +20 -0
- package/dist/parsers/_internal.d.ts.map +1 -0
- package/dist/parsers/_internal.js +122 -0
- package/dist/parsers/csv-parser.d.ts +3 -0
- package/dist/parsers/csv-parser.d.ts.map +1 -0
- package/dist/parsers/csv-parser.js +202 -0
- package/dist/parsers/docx-parser.d.ts +3 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +390 -0
- package/dist/parsers/html-parser.d.ts +3 -0
- package/dist/parsers/html-parser.d.ts.map +1 -0
- package/dist/parsers/html-parser.js +310 -0
- package/dist/parsers/index.d.ts +15 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +41 -0
- package/dist/parsers/json-parser.d.ts +3 -0
- package/dist/parsers/json-parser.d.ts.map +1 -0
- package/dist/parsers/json-parser.js +192 -0
- package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
- package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
- package/dist/parsers/large-document/capability-discovery.js +76 -0
- package/dist/parsers/large-document/diagnostics.d.ts +3 -0
- package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
- package/dist/parsers/large-document/diagnostics.js +11 -0
- package/dist/parsers/large-document/index.d.ts +15 -0
- package/dist/parsers/large-document/index.d.ts.map +1 -0
- package/dist/parsers/large-document/index.js +10 -0
- package/dist/parsers/large-document/legacy-format.d.ts +5 -0
- package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
- package/dist/parsers/large-document/legacy-format.js +25 -0
- package/dist/parsers/large-document/preflight.d.ts +9 -0
- package/dist/parsers/large-document/preflight.d.ts.map +1 -0
- package/dist/parsers/large-document/preflight.js +43 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-extraction.js +123 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-pdf.js +145 -0
- package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
- package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
- package/dist/parsers/large-document/synthetic-source.js +101 -0
- package/dist/parsers/large-document/window-builder.d.ts +24 -0
- package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
- package/dist/parsers/large-document/window-builder.js +75 -0
- package/dist/parsers/ocr/index.d.ts +4 -0
- package/dist/parsers/ocr/index.d.ts.map +1 -0
- package/dist/parsers/ocr/index.js +4 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
- package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
- package/dist/parsers/ocr/types.d.ts +16 -0
- package/dist/parsers/ocr/types.d.ts.map +1 -0
- package/dist/parsers/ocr/types.js +4 -0
- package/dist/parsers/parser-test-fixtures.d.ts +28 -0
- package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
- package/dist/parsers/parser-test-fixtures.js +139 -0
- package/dist/parsers/pdf-parser.d.ts +43 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +388 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +57 -0
- package/dist/parsers/text-parser.d.ts +3 -0
- package/dist/parsers/text-parser.d.ts.map +1 -0
- package/dist/parsers/text-parser.js +214 -0
- package/dist/parsers/types.d.ts +53 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +21 -0
- package/dist/parsers/unsupported-parser.d.ts +4 -0
- package/dist/parsers/unsupported-parser.d.ts.map +1 -0
- package/dist/parsers/unsupported-parser.js +97 -0
- package/dist/parsers/xlsx-parser.d.ts +3 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +425 -0
- package/dist/privacy/audit-emitter.d.ts +5 -0
- package/dist/privacy/audit-emitter.d.ts.map +1 -0
- package/dist/privacy/audit-emitter.js +93 -0
- package/dist/privacy/diagnostic-redactor.d.ts +2 -0
- package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
- package/dist/privacy/diagnostic-redactor.js +153 -0
- package/dist/privacy/index.d.ts +5 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +6 -0
- package/dist/privacy/retention-applier.d.ts +5 -0
- package/dist/privacy/retention-applier.d.ts.map +1 -0
- package/dist/privacy/retention-applier.js +88 -0
- package/dist/privacy/types.d.ts +98 -0
- package/dist/privacy/types.d.ts.map +1 -0
- package/dist/privacy/types.js +12 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
- package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
- package/dist/qualityIntelligence/index.d.ts +3 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +5 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
- package/dist/qualityIntelligence/qiHandoff.js +82 -0
- package/dist/retrieval/answer-grounding.d.ts +9 -0
- package/dist/retrieval/answer-grounding.d.ts.map +1 -0
- package/dist/retrieval/answer-grounding.js +31 -0
- package/dist/retrieval/context-pack-assembler.d.ts +24 -0
- package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
- package/dist/retrieval/context-pack-assembler.js +50 -0
- package/dist/retrieval/index.d.ts +6 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +9 -0
- package/dist/retrieval/retrieval-runner.d.ts +10 -0
- package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
- package/dist/retrieval/retrieval-runner.js +163 -0
- package/dist/retrieval/scoped-vector-search.d.ts +24 -0
- package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
- package/dist/retrieval/scoped-vector-search.js +864 -0
- package/dist/retrieval/types.d.ts +28 -0
- package/dist/retrieval/types.d.ts.map +1 -0
- package/dist/retrieval/types.js +33 -0
- package/dist/section-path-hash.d.ts +3 -0
- package/dist/section-path-hash.d.ts.map +1 -0
- package/dist/section-path-hash.js +9 -0
- package/dist/source-lifecycle.d.ts +14 -0
- package/dist/source-lifecycle.d.ts.map +1 -0
- package/dist/source-lifecycle.js +155 -0
- package/dist/source-routing-validation.d.ts +11 -0
- package/dist/source-routing-validation.d.ts.map +1 -0
- package/dist/source-routing-validation.js +140 -0
- package/dist/store-content-cipher.d.ts +11 -0
- package/dist/store-content-cipher.d.ts.map +1 -0
- package/dist/store-content-cipher.js +67 -0
- package/dist/store-content-encryption.d.ts +12 -0
- package/dist/store-content-encryption.d.ts.map +1 -0
- package/dist/store-content-encryption.js +275 -0
- package/dist/store-paths.d.ts +6 -0
- package/dist/store-paths.d.ts.map +1 -0
- package/dist/store-paths.js +61 -0
- package/dist/store.d.ts +30 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +219 -0
- package/dist/testing.d.ts +47 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +170 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +43 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import type { CapsuleSetId, CitationReference, KnowledgeCapsuleId, RetrievalReference } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import { KnowledgeStoreError } from "../errors.js";
|
|
3
|
+
export type RetrievalErrorCode = "EMBEDDING_ADAPTER_FAILED" | "INCOMPATIBLE_EMBEDDING_IDENTITY" | "CAPSULE_NOT_FOUND" | "INVALID_QUERY" | "STORE_READ_FAILED";
|
|
4
|
+
export declare class RetrievalError extends KnowledgeStoreError {
|
|
5
|
+
readonly name: string;
|
|
6
|
+
readonly code: RetrievalErrorCode;
|
|
7
|
+
constructor(code: RetrievalErrorCode, message: string, options?: {
|
|
8
|
+
cause?: unknown;
|
|
9
|
+
});
|
|
10
|
+
}
|
|
11
|
+
export interface RetrievalQuery {
|
|
12
|
+
readonly capsuleSetId?: CapsuleSetId;
|
|
13
|
+
readonly capsuleId?: KnowledgeCapsuleId;
|
|
14
|
+
readonly text: string;
|
|
15
|
+
readonly topK?: number;
|
|
16
|
+
readonly minScore?: number;
|
|
17
|
+
}
|
|
18
|
+
export type RetrievalNoEvidenceReason = "no-scope" | "no-vectors" | "incompatible-embedding-identity" | "below-min-score" | "answer-grounding-rejected" | "no-evidence-stated" | "empty-query" | "embedding-failed";
|
|
19
|
+
export interface RetrievalResult {
|
|
20
|
+
readonly references: readonly RetrievalReference[];
|
|
21
|
+
readonly noEvidence: boolean;
|
|
22
|
+
readonly reason?: RetrievalNoEvidenceReason;
|
|
23
|
+
readonly embeddingDegraded?: true;
|
|
24
|
+
}
|
|
25
|
+
export declare const DEFAULT_RETRIEVAL_TOP_K = 10;
|
|
26
|
+
export declare const MAX_RETRIEVAL_TOP_K = 100;
|
|
27
|
+
export type { CitationReference, RetrievalReference };
|
|
28
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/retrieval/types.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EACV,YAAY,EACZ,iBAAiB,EACjB,kBAAkB,EAClB,kBAAkB,EACnB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAC;AAGnD,MAAM,MAAM,kBAAkB,GAC1B,0BAA0B,GAC1B,iCAAiC,GACjC,mBAAmB,GACnB,eAAe,GACf,mBAAmB,CAAC;AAExB,qBAAa,cAAe,SAAQ,mBAAmB;IACrD,SAAyB,IAAI,EAAE,MAAM,CAAoB;IACzD,SAAgB,IAAI,EAAE,kBAAkB,CAAC;gBACtB,IAAI,EAAE,kBAAkB,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,KAAK,CAAC,EAAE,OAAO,CAAA;KAAE;CAI5F;AAKD,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY,CAAC;IACrC,QAAQ,CAAC,SAAS,CAAC,EAAE,kBAAkB,CAAC;IACxC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAOD,MAAM,MAAM,yBAAyB,GACjC,UAAU,GACV,YAAY,GACZ,iCAAiC,GACjC,iBAAiB,GACjB,2BAA2B,GAC3B,oBAAoB,GACpB,aAAa,GACb,kBAAkB,CAAC;AAEvB,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,UAAU,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACnD,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC;IAC7B,QAAQ,CAAC,MAAM,CAAC,EAAE,yBAAyB,CAAC;IAI5C,QAAQ,CAAC,iBAAiB,CAAC,EAAE,IAAI,CAAC;CACnC;AAMD,eAAO,MAAM,uBAAuB,KAAK,CAAC;AAI1C,eAAO,MAAM,mBAAmB,MAAM,CAAC;AAGvC,YAAY,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
// Type contracts for the retrieval layer (Epic #189, Issue #199). Retrieval is the
|
|
2
|
+
// runtime side of the Local Knowledge Connector pipeline that #196 fed: a query string +
|
|
3
|
+
// a `ComposedRetrievalScope` (#263) → a ranked list of `RetrievalReference` taken from the
|
|
4
|
+
// `vectors` table, plus a `GroundedContextPack` derived from those references (citations
|
|
5
|
+
// only, never raw text body unless the capsule's `outputMode === "raw"` — and even then
|
|
6
|
+
// the body is sourced from the parsed-units row, not embedded vectors).
|
|
7
|
+
//
|
|
8
|
+
// `RetrievalResult` carries an explicit `noEvidence` boolean rather than overloading an
|
|
9
|
+
// empty `references` array because the caller (#200 Conversation Center integration) needs
|
|
10
|
+
// to discriminate "we scanned and found nothing" from "the answer-grounding policy refused
|
|
11
|
+
// to release the references". A short, closed-enumeration `reason` string lets the UI map
|
|
12
|
+
// the result to a precise message without re-deriving state.
|
|
13
|
+
//
|
|
14
|
+
// `RetrievalError` extends KnowledgeStoreError so callers that catch the parent class
|
|
15
|
+
// still see the failure — same pattern as `IndexingError` in `../indexing/types.ts`.
|
|
16
|
+
import { KnowledgeStoreError } from "../errors.js";
|
|
17
|
+
export class RetrievalError extends KnowledgeStoreError {
|
|
18
|
+
name = "RetrievalError";
|
|
19
|
+
code;
|
|
20
|
+
constructor(code, message, options) {
|
|
21
|
+
super(message, options);
|
|
22
|
+
this.code = code;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
// ─── Defaults ────────────────────────────────────────────────────────────────
|
|
26
|
+
// Match the scope's contract maxima for #200 the conversation-center surface: 10 is a
|
|
27
|
+
// reasonable default that still fits in a small context budget for an LLM grounding
|
|
28
|
+
// prompt and matches the "Top-K = 10" convention from sibling vector-store products.
|
|
29
|
+
export const DEFAULT_RETRIEVAL_TOP_K = 10;
|
|
30
|
+
// Hard cap on `topK`. A caller asking for more than this is clamped silently — we never
|
|
31
|
+
// surface every vector in a large capsule because that breaks the "ranked top-K" contract
|
|
32
|
+
// the answer-grounding policy depends on.
|
|
33
|
+
export const MAX_RETRIEVAL_TOP_K = 100;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"section-path-hash.d.ts","sourceRoot":"","sources":["../src/section-path-hash.ts"],"names":[],"mappings":"AAIA,wBAAgB,uBAAuB,CAAC,eAAe,EAAE,MAAM,GAAG,MAAM,CAGvE;AAED,wBAAgB,eAAe,CAAC,WAAW,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,CAEtE"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
const SECTION_PATH_HASH_PREFIX = "sha256:";
|
|
3
|
+
export function sectionPathHashFromJson(sectionPathJson) {
|
|
4
|
+
const digest = createHash("sha256").update(sectionPathJson, "utf8").digest("base64url");
|
|
5
|
+
return `${SECTION_PATH_HASH_PREFIX}${digest}`;
|
|
6
|
+
}
|
|
7
|
+
export function sectionPathHash(sectionPath) {
|
|
8
|
+
return sectionPathHashFromJson(JSON.stringify(sectionPath));
|
|
9
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { type KnowledgeCapsuleId, type KnowledgeSource, type KnowledgeSourceId, type KnowledgeSourceScope } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { AuditEventSink } from "./privacy/types.js";
|
|
3
|
+
import type { KnowledgeStore } from "./store.js";
|
|
4
|
+
export interface AddCapsuleSourceInput {
|
|
5
|
+
readonly id: KnowledgeSourceId;
|
|
6
|
+
readonly displayName: string;
|
|
7
|
+
readonly description?: string;
|
|
8
|
+
readonly tags: readonly string[];
|
|
9
|
+
readonly scope: KnowledgeSourceScope;
|
|
10
|
+
}
|
|
11
|
+
export declare function addSourceToCapsule(store: KnowledgeStore, capsuleId: KnowledgeCapsuleId, input: AddCapsuleSourceInput, auditSink?: AuditEventSink): KnowledgeSource;
|
|
12
|
+
export declare function listCapsuleSources(store: KnowledgeStore, capsuleId: KnowledgeCapsuleId): readonly KnowledgeSource[];
|
|
13
|
+
export declare function removeSourceFromCapsule(store: KnowledgeStore, capsuleId: KnowledgeCapsuleId, sourceId: KnowledgeSourceId, auditSink?: AuditEventSink): void;
|
|
14
|
+
//# sourceMappingURL=source-lifecycle.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source-lifecycle.d.ts","sourceRoot":"","sources":["../src/source-lifecycle.ts"],"names":[],"mappings":"AAOA,OAAO,EAGL,KAAK,kBAAkB,EACvB,KAAK,eAAe,EACpB,KAAK,iBAAiB,EACtB,KAAK,oBAAoB,EAC1B,MAAM,+BAA+B,CAAC;AAGvC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEjD,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,EAAE,EAAE,iBAAiB,CAAC;IAC/B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,CAAC;IACjC,QAAQ,CAAC,KAAK,EAAE,oBAAoB,CAAC;CACtC;AAiID,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,cAAc,EACrB,SAAS,EAAE,kBAAkB,EAC7B,KAAK,EAAE,qBAAqB,EAC5B,SAAS,CAAC,EAAE,cAAc,GACzB,eAAe,CAyBjB;AAoBD,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,cAAc,EACrB,SAAS,EAAE,kBAAkB,GAC5B,SAAS,eAAe,EAAE,CAG5B;AAED,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,cAAc,EACrB,SAAS,EAAE,kBAAkB,EAC7B,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,CAAC,EAAE,cAAc,GACzB,IAAI,CAuBN"}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
// source-lifecycle.ts — typed CRUD over `capsule_sources`. Every read is capsule-scoped:
|
|
2
|
+
// there is intentionally no `listAllSources(store)`. Deletion verifies the (capsuleId,
|
|
3
|
+
// sourceId) tuple matches a row BEFORE issuing DELETE so a wrong-tuple call cannot succeed
|
|
4
|
+
// by chance (a plain DELETE with COUNT-on-changes still races with concurrent CASCADE
|
|
5
|
+
// deletes of the parent capsule, but the verify+delete sequence is correct under WAL's
|
|
6
|
+
// single-writer semantics).
|
|
7
|
+
import { isSafeDisplaySummary, validateKnowledgeSourceScope, } from "@oscharko-dev/keiko-contracts";
|
|
8
|
+
import { KnowledgeNotFoundError, KnowledgeStoreError } from "./errors.js";
|
|
9
|
+
const INSERT_SQL = "INSERT INTO capsule_sources (id, capsule_id, display_name, description, tags_json, scope_kind, scope_json, created_at, updated_at) VALUES (:id, :capsule_id, :display_name, :description, :tags_json, :scope_kind, :scope_json, :created_at, :updated_at)";
|
|
10
|
+
const INSERT_KNOWLEDGE_SOURCE_SQL = "INSERT INTO knowledge_sources (id, display_name, description, tags_json, scope_kind, scope_json, created_at, updated_at) VALUES (:id, :display_name, :description, :tags_json, :scope_kind, :scope_json, :created_at, :updated_at) ON CONFLICT(id) DO UPDATE SET display_name = excluded.display_name, description = excluded.description, tags_json = excluded.tags_json, scope_kind = excluded.scope_kind, scope_json = excluded.scope_json, updated_at = excluded.updated_at";
|
|
11
|
+
const SELECT_BY_CAPSULE_SQL = "SELECT ks.* FROM capsule_sources AS cs JOIN knowledge_sources AS ks ON ks.id = cs.id WHERE cs.capsule_id = :c ORDER BY cs.created_at ASC, cs.id ASC";
|
|
12
|
+
const SELECT_BY_TUPLE_SQL = "SELECT id FROM capsule_sources WHERE capsule_id = :c AND id = :s";
|
|
13
|
+
const DELETE_BY_TUPLE_SQL = "DELETE FROM capsule_sources WHERE capsule_id = :c AND id = :s";
|
|
14
|
+
function parseTags(json) {
|
|
15
|
+
const parsed = JSON.parse(json);
|
|
16
|
+
if (!Array.isArray(parsed))
|
|
17
|
+
return [];
|
|
18
|
+
return parsed.filter((entry) => typeof entry === "string");
|
|
19
|
+
}
|
|
20
|
+
function assertSafeDisplayField(field, value) {
|
|
21
|
+
if (value.trim().length === 0 || !isSafeDisplaySummary(value)) {
|
|
22
|
+
throw new KnowledgeStoreError(`${field} must be a browser-safe non-empty string`);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
function assertSafeOptionalDisplayField(field, value) {
|
|
26
|
+
if (value !== undefined && !isSafeDisplaySummary(value)) {
|
|
27
|
+
throw new KnowledgeStoreError(`${field} must be browser-safe when set`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
function assertSafeScope(scope) {
|
|
31
|
+
const result = validateKnowledgeSourceScope(scope);
|
|
32
|
+
if (result.ok)
|
|
33
|
+
return;
|
|
34
|
+
throw new KnowledgeStoreError(result.errors.join(" "));
|
|
35
|
+
}
|
|
36
|
+
function parseScope(kind, json) {
|
|
37
|
+
const parsed = JSON.parse(json);
|
|
38
|
+
if (typeof parsed !== "object" || parsed === null) {
|
|
39
|
+
throw new KnowledgeStoreError(`Corrupt capsule_sources.scope_json (kind=${kind}).`);
|
|
40
|
+
}
|
|
41
|
+
// The contract validators in keiko-contracts shape these on write; we trust the row.
|
|
42
|
+
return { kind, ...parsed };
|
|
43
|
+
}
|
|
44
|
+
function rowToSource(row) {
|
|
45
|
+
const base = {
|
|
46
|
+
id: row.id,
|
|
47
|
+
displayName: row.display_name,
|
|
48
|
+
tags: parseTags(row.tags_json),
|
|
49
|
+
scope: parseScope(row.scope_kind, row.scope_json),
|
|
50
|
+
createdAt: row.created_at,
|
|
51
|
+
updatedAt: row.updated_at,
|
|
52
|
+
};
|
|
53
|
+
return row.description === null ? base : { ...base, description: row.description };
|
|
54
|
+
}
|
|
55
|
+
function scopeToJson(scope) {
|
|
56
|
+
// We persist only the fields beyond `kind` (kind lives in its own column). Build a
|
|
57
|
+
// plain object copy without `kind` rather than destructuring + discarding, which the
|
|
58
|
+
// lint config flags as an unused binding.
|
|
59
|
+
const copy = {};
|
|
60
|
+
for (const [key, value] of Object.entries(scope)) {
|
|
61
|
+
if (key === "kind")
|
|
62
|
+
continue;
|
|
63
|
+
copy[key] = value;
|
|
64
|
+
}
|
|
65
|
+
return JSON.stringify(copy);
|
|
66
|
+
}
|
|
67
|
+
function sourceParams(input, now) {
|
|
68
|
+
return {
|
|
69
|
+
id: input.id,
|
|
70
|
+
display_name: input.displayName,
|
|
71
|
+
description: input.description ?? null,
|
|
72
|
+
tags_json: JSON.stringify(input.tags),
|
|
73
|
+
scope_kind: input.scope.kind,
|
|
74
|
+
scope_json: scopeToJson(input.scope),
|
|
75
|
+
created_at: now,
|
|
76
|
+
updated_at: now,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
function insertSourceLink(store, capsuleId, params) {
|
|
80
|
+
const db = store._internal.db;
|
|
81
|
+
db.exec("BEGIN");
|
|
82
|
+
try {
|
|
83
|
+
db.prepare(INSERT_KNOWLEDGE_SOURCE_SQL).run(params);
|
|
84
|
+
db.prepare(INSERT_SQL).run({ ...params, capsule_id: capsuleId });
|
|
85
|
+
db.exec("COMMIT");
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
db.exec("ROLLBACK");
|
|
89
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
90
|
+
if (/UNIQUE|PRIMARY KEY/i.test(msg)) {
|
|
91
|
+
throw new KnowledgeStoreError("source already exists", { cause: error });
|
|
92
|
+
}
|
|
93
|
+
throw new KnowledgeStoreError("failed to add source", { cause: error });
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
export function addSourceToCapsule(store, capsuleId, input, auditSink) {
|
|
97
|
+
assertSafeDisplayField("displayName", input.displayName);
|
|
98
|
+
assertSafeOptionalDisplayField("description", input.description);
|
|
99
|
+
for (const tag of input.tags) {
|
|
100
|
+
assertSafeDisplayField("tag", tag);
|
|
101
|
+
}
|
|
102
|
+
assertSafeScope(input.scope);
|
|
103
|
+
const now = store._internal.now();
|
|
104
|
+
insertSourceLink(store, capsuleId, sourceParams(input, now));
|
|
105
|
+
const fetched = store._internal.db
|
|
106
|
+
.prepare(SELECT_BY_TUPLE_SQL)
|
|
107
|
+
.get({ c: capsuleId, s: input.id });
|
|
108
|
+
if (fetched === undefined) {
|
|
109
|
+
throw new KnowledgeStoreError(`addSourceToCapsule: insert succeeded but row not found for ${String(input.id)}`);
|
|
110
|
+
}
|
|
111
|
+
const source = readSource(store, capsuleId, input.id);
|
|
112
|
+
auditSink?.emit({
|
|
113
|
+
kind: "source-added",
|
|
114
|
+
capsuleId,
|
|
115
|
+
sourceId: input.id,
|
|
116
|
+
occurredAt: now,
|
|
117
|
+
});
|
|
118
|
+
return source;
|
|
119
|
+
}
|
|
120
|
+
function readSource(store, capsuleId, sourceId) {
|
|
121
|
+
const row = store._internal.db
|
|
122
|
+
.prepare("SELECT ks.* FROM capsule_sources AS cs JOIN knowledge_sources AS ks ON ks.id = cs.id WHERE cs.capsule_id = :c AND cs.id = :s")
|
|
123
|
+
.get({ c: capsuleId, s: sourceId });
|
|
124
|
+
if (row === undefined) {
|
|
125
|
+
throw new KnowledgeNotFoundError(`Source not found: capsule=${String(capsuleId)} source=${String(sourceId)}`);
|
|
126
|
+
}
|
|
127
|
+
return rowToSource(row);
|
|
128
|
+
}
|
|
129
|
+
export function listCapsuleSources(store, capsuleId) {
|
|
130
|
+
const rows = store._internal.db.prepare(SELECT_BY_CAPSULE_SQL).all({ c: capsuleId });
|
|
131
|
+
return rows.map((row) => rowToSource(row));
|
|
132
|
+
}
|
|
133
|
+
export function removeSourceFromCapsule(store, capsuleId, sourceId, auditSink) {
|
|
134
|
+
const db = store._internal.db;
|
|
135
|
+
const occurredAt = store._internal.now();
|
|
136
|
+
db.exec("BEGIN");
|
|
137
|
+
try {
|
|
138
|
+
// Verify the (capsule, source) tuple exists. Deleting on a non-matching tuple would
|
|
139
|
+
// silently succeed with 0 changes; we want the caller to learn about typos.
|
|
140
|
+
const probe = db.prepare(SELECT_BY_TUPLE_SQL).get({ c: capsuleId, s: sourceId });
|
|
141
|
+
if (probe === undefined) {
|
|
142
|
+
db.exec("ROLLBACK");
|
|
143
|
+
throw new KnowledgeNotFoundError(`Source not found for tuple capsule=${String(capsuleId)} source=${String(sourceId)}`);
|
|
144
|
+
}
|
|
145
|
+
db.prepare(DELETE_BY_TUPLE_SQL).run({ c: capsuleId, s: sourceId });
|
|
146
|
+
db.exec("COMMIT");
|
|
147
|
+
}
|
|
148
|
+
catch (error) {
|
|
149
|
+
if (!(error instanceof KnowledgeNotFoundError)) {
|
|
150
|
+
db.exec("ROLLBACK");
|
|
151
|
+
}
|
|
152
|
+
throw error;
|
|
153
|
+
}
|
|
154
|
+
auditSink?.emit({ kind: "source-removed", capsuleId, sourceId, occurredAt });
|
|
155
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { KnowledgeCapsule, KnowledgeSource, KnowledgeSourceScope } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
export type SourceRoutingValidationCode = "always-query-without-sources" | "always-query-capsule-not-ready" | "always-query-source-list-mismatch" | "unknown-source-token" | "instructions-empty" | "include-globs-empty-array" | "exclude-globs-empty-array" | "duplicate-glob" | "glob-path-escape" | "absolute-glob" | "exclude-cancels-include";
|
|
3
|
+
export declare class SourceRoutingValidationError extends Error {
|
|
4
|
+
readonly code: SourceRoutingValidationCode;
|
|
5
|
+
constructor(code: SourceRoutingValidationCode, message: string);
|
|
6
|
+
}
|
|
7
|
+
export declare function validateAlwaysQuery(capsule: KnowledgeCapsule, sources: readonly KnowledgeSource[]): void;
|
|
8
|
+
export declare function validateRoutingInstructionsScope(instructions: string | undefined, sources: readonly KnowledgeSource[]): void;
|
|
9
|
+
export declare function validateGlobPatterns(scope: KnowledgeSourceScope): void;
|
|
10
|
+
export declare function validateSourceRoutingForCapsule(capsule: KnowledgeCapsule, sources: readonly KnowledgeSource[]): void;
|
|
11
|
+
//# sourceMappingURL=source-routing-validation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"source-routing-validation.d.ts","sourceRoot":"","sources":["../src/source-routing-validation.ts"],"names":[],"mappings":"AAaA,OAAO,KAAK,EACV,gBAAgB,EAChB,eAAe,EACf,oBAAoB,EACrB,MAAM,+BAA+B,CAAC;AAEvC,MAAM,MAAM,2BAA2B,GACnC,8BAA8B,GAC9B,gCAAgC,GAChC,mCAAmC,GACnC,sBAAsB,GACtB,oBAAoB,GACpB,2BAA2B,GAC3B,2BAA2B,GAC3B,gBAAgB,GAChB,kBAAkB,GAClB,eAAe,GACf,yBAAyB,CAAC;AAE9B,qBAAa,4BAA6B,SAAQ,KAAK;IACrD,QAAQ,CAAC,IAAI,EAAE,2BAA2B,CAAC;gBAC/B,IAAI,EAAE,2BAA2B,EAAE,OAAO,EAAE,MAAM;CAK/D;AAYD,wBAAgB,mBAAmB,CACjC,OAAO,EAAE,gBAAgB,EACzB,OAAO,EAAE,SAAS,eAAe,EAAE,GAClC,IAAI,CAuBN;AAUD,wBAAgB,gCAAgC,CAC9C,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,OAAO,EAAE,SAAS,eAAe,EAAE,GAClC,IAAI,CAkBN;AAmBD,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,oBAAoB,GAAG,IAAI,CAetE;AA2DD,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,gBAAgB,EACzB,OAAO,EAAE,SAAS,eAAe,EAAE,GAClC,IAAI,CAMN"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// source-routing-validation.ts — pure validators for Knowledge Capsule source-routing
|
|
2
|
+
// controls (Epic #189, Issue #263). Foundry-IQ "no global pool" rule is enforced at the
|
|
3
|
+
// type system by every record carrying capsuleId+sourceId; this module catches the
|
|
4
|
+
// **semantic** misconfigurations the type system cannot see:
|
|
5
|
+
//
|
|
6
|
+
// * alwaysQuery=true on an empty or non-ready capsule (would query nothing or stale
|
|
7
|
+
// data on every conversation turn).
|
|
8
|
+
// * Routing instructions referencing source ids the capsule does not own.
|
|
9
|
+
// * Include/exclude globs that silently broaden retrieval (empty arrays, duplicates,
|
|
10
|
+
// `..` traversal, leading-slash absolute paths, or excludes that cancel includes).
|
|
11
|
+
//
|
|
12
|
+
// Pure module: no IO, no clock, no node:* imports. Safe to call from any layer.
|
|
13
|
+
export class SourceRoutingValidationError extends Error {
|
|
14
|
+
code;
|
|
15
|
+
constructor(code, message) {
|
|
16
|
+
super(message);
|
|
17
|
+
this.name = "SourceRoutingValidationError";
|
|
18
|
+
this.code = code;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
function fail(code, message) {
|
|
22
|
+
throw new SourceRoutingValidationError(code, message);
|
|
23
|
+
}
|
|
24
|
+
// ─── alwaysQuery ───────────────────────────────────────────────────────────────
|
|
25
|
+
// Truthy alwaysQuery means: include this capsule in every retrieval scope for the
|
|
26
|
+
// current conversation by default. That makes a misconfiguration silently dangerous —
|
|
27
|
+
// an empty or unindexed capsule would either return nothing every turn or expose
|
|
28
|
+
// stale chunks. We refuse both states up front.
|
|
29
|
+
export function validateAlwaysQuery(capsule, sources) {
|
|
30
|
+
if (capsule.alwaysQuery !== true)
|
|
31
|
+
return;
|
|
32
|
+
if (capsule.sourceIds.length === 0) {
|
|
33
|
+
fail("always-query-without-sources", `Capsule ${String(capsule.id)} has alwaysQuery=true but no sources.`);
|
|
34
|
+
}
|
|
35
|
+
if (capsule.lifecycleState !== "ready") {
|
|
36
|
+
fail("always-query-capsule-not-ready", `Capsule ${String(capsule.id)} cannot be alwaysQuery while lifecycleState=${capsule.lifecycleState}.`);
|
|
37
|
+
}
|
|
38
|
+
const ids = new Set(sources.map((s) => String(s.id)));
|
|
39
|
+
for (const declared of capsule.sourceIds) {
|
|
40
|
+
if (!ids.has(String(declared))) {
|
|
41
|
+
fail("always-query-source-list-mismatch", `Capsule ${String(capsule.id)} declares source ${String(declared)} but it is not in the supplied source list.`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// ─── Routing instructions ─────────────────────────────────────────────────────
|
|
46
|
+
// `sourceRoutingInstructions` is free-form prose, but a Foundry-IQ-style convention
|
|
47
|
+
// uses `@source-id` tokens (e.g. "prefer @docs over @specs") to address specific
|
|
48
|
+
// sources. The validator extracts those tokens and refuses any that do not resolve
|
|
49
|
+
// to a source in this capsule. Instructions without tokens are accepted as-is.
|
|
50
|
+
const SOURCE_TOKEN_RE = /@([A-Za-z0-9][A-Za-z0-9_.-]*)/g;
|
|
51
|
+
export function validateRoutingInstructionsScope(instructions, sources) {
|
|
52
|
+
if (instructions === undefined)
|
|
53
|
+
return;
|
|
54
|
+
if (instructions.trim().length === 0) {
|
|
55
|
+
fail("instructions-empty", "sourceRoutingInstructions must be omitted entirely or contain non-whitespace content.");
|
|
56
|
+
}
|
|
57
|
+
const knownIds = new Set(sources.map((s) => String(s.id)));
|
|
58
|
+
const tokens = extractSourceTokens(instructions);
|
|
59
|
+
for (const token of tokens) {
|
|
60
|
+
if (!knownIds.has(token)) {
|
|
61
|
+
fail("unknown-source-token", `sourceRoutingInstructions references @${token} which is not a source in this capsule.`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
function extractSourceTokens(instructions) {
|
|
66
|
+
const out = [];
|
|
67
|
+
for (const match of instructions.matchAll(SOURCE_TOKEN_RE)) {
|
|
68
|
+
const captured = match[1];
|
|
69
|
+
if (captured !== undefined && captured.length > 0) {
|
|
70
|
+
out.push(captured);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return out;
|
|
74
|
+
}
|
|
75
|
+
// ─── Glob patterns ────────────────────────────────────────────────────────────
|
|
76
|
+
// The contract's KnowledgeSourceScope only enforces TYPES on includeGlobs/excludeGlobs.
|
|
77
|
+
// This validator enforces SEMANTICS: callers must omit the field instead of passing
|
|
78
|
+
// an empty array, must not duplicate patterns, must not contain `..` or absolute
|
|
79
|
+
// paths, and excludeGlobs must not byte-match an includeGlobs entry (cancelling it).
|
|
80
|
+
export function validateGlobPatterns(scope) {
|
|
81
|
+
if (scope.kind === "files")
|
|
82
|
+
return;
|
|
83
|
+
validateGlobList(scope.includeGlobs, "include");
|
|
84
|
+
validateGlobList(scope.excludeGlobs, "exclude");
|
|
85
|
+
if (scope.includeGlobs !== undefined && scope.excludeGlobs !== undefined) {
|
|
86
|
+
const includeSet = new Set(scope.includeGlobs);
|
|
87
|
+
for (const pattern of scope.excludeGlobs) {
|
|
88
|
+
if (includeSet.has(pattern)) {
|
|
89
|
+
fail("exclude-cancels-include", `excludeGlobs entry "${pattern}" byte-matches an includeGlobs entry; the include is silently cancelled.`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
function validateGlobList(patterns, kind) {
|
|
95
|
+
if (patterns === undefined)
|
|
96
|
+
return;
|
|
97
|
+
if (patterns.length === 0) {
|
|
98
|
+
fail(kind === "include" ? "include-globs-empty-array" : "exclude-globs-empty-array", `${kind}Globs must be omitted, not supplied as an empty array.`);
|
|
99
|
+
}
|
|
100
|
+
const seen = new Set();
|
|
101
|
+
for (const pattern of patterns) {
|
|
102
|
+
assertGlobShape(pattern);
|
|
103
|
+
if (seen.has(pattern)) {
|
|
104
|
+
fail("duplicate-glob", `Duplicate ${kind} glob pattern "${pattern}".`);
|
|
105
|
+
}
|
|
106
|
+
seen.add(pattern);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
function isWindowsDriveAbsolute(pattern) {
|
|
110
|
+
// Matches C:\ or C:/ — a single drive letter followed by colon and a separator.
|
|
111
|
+
return (pattern.length >= 3 &&
|
|
112
|
+
/[A-Za-z]/.test(pattern[0] ?? "") &&
|
|
113
|
+
pattern[1] === ":" &&
|
|
114
|
+
(pattern[2] === "/" || pattern[2] === "\\"));
|
|
115
|
+
}
|
|
116
|
+
function assertGlobShape(pattern) {
|
|
117
|
+
if (pattern.startsWith("/")) {
|
|
118
|
+
fail("absolute-glob", `Glob pattern "${pattern}" is absolute; patterns must be source-root-relative.`);
|
|
119
|
+
}
|
|
120
|
+
if (isWindowsDriveAbsolute(pattern) || pattern.startsWith("\\\\")) {
|
|
121
|
+
fail("absolute-glob", `Glob pattern "${pattern}" is an absolute Windows path; patterns must be source-root-relative.`);
|
|
122
|
+
}
|
|
123
|
+
// Reject any `..` segment. Split on EITHER separator so Windows-style backslash paths
|
|
124
|
+
// are also caught (e.g. "sub\..\other"). Match component-bounded so substrings like
|
|
125
|
+
// "abc..def" inside a filename are allowed; only path-segment traversal is refused.
|
|
126
|
+
const segments = pattern.split(/[/\\]/);
|
|
127
|
+
for (const segment of segments) {
|
|
128
|
+
if (segment === "..") {
|
|
129
|
+
fail("glob-path-escape", `Glob pattern "${pattern}" contains a parent-directory segment.`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
// ─── Composite ─────────────────────────────────────────────────────────────────
|
|
134
|
+
export function validateSourceRoutingForCapsule(capsule, sources) {
|
|
135
|
+
validateAlwaysQuery(capsule, sources);
|
|
136
|
+
validateRoutingInstructionsScope(capsule.sourceRoutingInstructions, sources);
|
|
137
|
+
for (const src of sources) {
|
|
138
|
+
validateGlobPatterns(src.scope);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export interface StoreContentCipher {
|
|
2
|
+
readonly isEncrypted: boolean;
|
|
3
|
+
readonly sealText: (plaintext: string) => string;
|
|
4
|
+
readonly openText: (stored: string) => string;
|
|
5
|
+
readonly sealVector: (plaintext: Uint8Array) => Uint8Array;
|
|
6
|
+
readonly openVector: (stored: Uint8Array, plaintextByteLength: number) => Uint8Array;
|
|
7
|
+
readonly isSealed: (value: string) => boolean;
|
|
8
|
+
}
|
|
9
|
+
export declare const PLAINTEXT_CONTENT_CIPHER: StoreContentCipher;
|
|
10
|
+
export declare function createEncryptedContentCipher(key: Uint8Array): StoreContentCipher;
|
|
11
|
+
//# sourceMappingURL=store-content-cipher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"store-content-cipher.d.ts","sourceRoot":"","sources":["../src/store-content-cipher.ts"],"names":[],"mappings":"AA4BA,MAAM,WAAW,kBAAkB;IAGjC,QAAQ,CAAC,WAAW,EAAE,OAAO,CAAC;IAI9B,QAAQ,CAAC,QAAQ,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,MAAM,CAAC;IACjD,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,MAAM,CAAC;IAI9C,QAAQ,CAAC,UAAU,EAAE,CAAC,SAAS,EAAE,UAAU,KAAK,UAAU,CAAC;IAC3D,QAAQ,CAAC,UAAU,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,KAAK,UAAU,CAAC;IAIrF,QAAQ,CAAC,QAAQ,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC;CAC/C;AAID,eAAO,MAAM,wBAAwB,EAAE,kBAOtC,CAAC;AAEF,wBAAgB,4BAA4B,CAAC,GAAG,EAAE,UAAU,GAAG,kBAAkB,CA8ChF"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// Store content cipher — the single crypto boundary for Local Knowledge encryption at rest
|
|
2
|
+
// (Issue #1322, Epic #1319; ADR-0047). Resolves a 32-byte AES-256-GCM key once at store-open and
|
|
3
|
+
// binds it into a `StoreContentCipher` that the row layer threads through every read/write of the
|
|
4
|
+
// reconstructive content columns (document_texts.normalized_text,
|
|
5
|
+
// document_text_windows.normalized_text, vectors.embedding, sections.section_path_json, and
|
|
6
|
+
// parsed_units path JSON). The key NEVER appears in an error message, event, or persisted row; only
|
|
7
|
+
// sealed envelopes touch SQLite.
|
|
8
|
+
//
|
|
9
|
+
// Mirrors keiko-memory-vault's MemoryContentCipher (ADR-0035): crypto knowledge lives only here, so
|
|
10
|
+
// retrieval/parsing/UI layers stay crypto-free and just call seal*/open* at the column boundary.
|
|
11
|
+
//
|
|
12
|
+
// Plaintext mode binds the identity cipher (PLAINTEXT_CONTENT_CIPHER), so every call site is
|
|
13
|
+
// byte-for-byte unchanged when no key provider is supplied. Encryption is therefore opt-in per
|
|
14
|
+
// store-open: keiko-server injects a key provider for production stores; the in-package evaluation
|
|
15
|
+
// harness and tests open plaintext stores unchanged.
|
|
16
|
+
import { isSealed, openBytes, openString, sealBytes, sealString, } from "@oscharko-dev/keiko-security";
|
|
17
|
+
import { KnowledgeStoreError } from "./errors.js";
|
|
18
|
+
const KEY_BYTES = 32;
|
|
19
|
+
// Identity cipher for plaintext stores. Every method is a no-op pass-through so a store opened
|
|
20
|
+
// without a key provider behaves exactly as it did before encryption existed.
|
|
21
|
+
export const PLAINTEXT_CONTENT_CIPHER = {
|
|
22
|
+
isEncrypted: false,
|
|
23
|
+
sealText: (plaintext) => plaintext,
|
|
24
|
+
openText: (stored) => stored,
|
|
25
|
+
sealVector: (plaintext) => plaintext,
|
|
26
|
+
openVector: (stored) => stored,
|
|
27
|
+
isSealed: () => false,
|
|
28
|
+
};
|
|
29
|
+
export function createEncryptedContentCipher(key) {
|
|
30
|
+
// Copy into a Buffer so the cipher owns its key material and a caller mutating the provider's array
|
|
31
|
+
// cannot retroactively change the key. Length is validated here (not deeper) so a misconfigured key
|
|
32
|
+
// provider fails at store-open with a clear, secret-free message.
|
|
33
|
+
const keyBuf = Buffer.from(key);
|
|
34
|
+
if (keyBuf.length !== KEY_BYTES) {
|
|
35
|
+
throw new KnowledgeStoreError("encrypted local-knowledge store key must be exactly 32 bytes");
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
isEncrypted: true,
|
|
39
|
+
sealText: (plaintext) => sealString(keyBuf, plaintext),
|
|
40
|
+
openText: (stored) => {
|
|
41
|
+
if (!isSealed(stored)) {
|
|
42
|
+
throw new KnowledgeStoreError("encrypted Local Knowledge text content is not sealed at rest");
|
|
43
|
+
}
|
|
44
|
+
try {
|
|
45
|
+
return openString(keyBuf, stored);
|
|
46
|
+
}
|
|
47
|
+
catch (cause) {
|
|
48
|
+
throw new KnowledgeStoreError("cannot open encrypted Local Knowledge text content: wrong key or tampered content", { cause });
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
sealVector: (plaintext) => sealBytes(keyBuf, Buffer.from(plaintext)),
|
|
52
|
+
openVector: (stored, plaintextByteLength) => {
|
|
53
|
+
let opened;
|
|
54
|
+
try {
|
|
55
|
+
opened = openBytes(keyBuf, Buffer.from(stored));
|
|
56
|
+
}
|
|
57
|
+
catch (cause) {
|
|
58
|
+
throw new KnowledgeStoreError("cannot open encrypted Local Knowledge vector content: wrong key or tampered content", { cause });
|
|
59
|
+
}
|
|
60
|
+
if (opened.byteLength !== plaintextByteLength) {
|
|
61
|
+
throw new KnowledgeStoreError("encrypted Local Knowledge vector content length does not match vector_dimensions");
|
|
62
|
+
}
|
|
63
|
+
return opened;
|
|
64
|
+
},
|
|
65
|
+
isSealed,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
2
|
+
import type { StoreContentCipher } from "./store-content-cipher.js";
|
|
3
|
+
export declare function applyStoreContentEncryption(db: DatabaseSync, cipher: StoreContentCipher): void;
|
|
4
|
+
export declare const STORE_CONTENT_ENCRYPTION_TEST_CONSTANTS: {
|
|
5
|
+
readonly markerKey: "content_encryption";
|
|
6
|
+
readonly markerValue: "aes-256-gcm/v1";
|
|
7
|
+
readonly probeKey: "content_encryption_probe";
|
|
8
|
+
readonly probePlaintext: "keiko-local-knowledge-content-encryption-v1";
|
|
9
|
+
readonly scopeKey: "content_encryption_scope";
|
|
10
|
+
readonly scopeValue: "reconstructive-columns/v2";
|
|
11
|
+
};
|
|
12
|
+
//# sourceMappingURL=store-content-encryption.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"store-content-encryption.d.ts","sourceRoot":"","sources":["../src/store-content-encryption.ts"],"names":[],"mappings":"AAqBA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAIhD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAiRpE,wBAAgB,2BAA2B,CAAC,EAAE,EAAE,YAAY,EAAE,MAAM,EAAE,kBAAkB,GAAG,IAAI,CA0C9F;AAED,eAAO,MAAM,uCAAuC;;;;;;;CAO1C,CAAC"}
|