@oscharko-dev/keiko-local-knowledge 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bounded-document-extraction.d.ts +27 -0
- package/dist/bounded-document-extraction.d.ts.map +1 -0
- package/dist/bounded-document-extraction.js +214 -0
- package/dist/capsule-lifecycle.d.ts +33 -0
- package/dist/capsule-lifecycle.d.ts.map +1 -0
- package/dist/capsule-lifecycle.js +292 -0
- package/dist/capsule-set-lifecycle.d.ts +15 -0
- package/dist/capsule-set-lifecycle.d.ts.map +1 -0
- package/dist/capsule-set-lifecycle.js +158 -0
- package/dist/chunking/chunker-persist.d.ts +36 -0
- package/dist/chunking/chunker-persist.d.ts.map +1 -0
- package/dist/chunking/chunker-persist.js +74 -0
- package/dist/chunking/chunker-runner.d.ts +9 -0
- package/dist/chunking/chunker-runner.d.ts.map +1 -0
- package/dist/chunking/chunker-runner.js +218 -0
- package/dist/chunking/chunker.d.ts +7 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +139 -0
- package/dist/chunking/citation-mapper.d.ts +4 -0
- package/dist/chunking/citation-mapper.d.ts.map +1 -0
- package/dist/chunking/citation-mapper.js +180 -0
- package/dist/chunking/index.d.ts +6 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/token-estimator.d.ts +3 -0
- package/dist/chunking/token-estimator.d.ts.map +1 -0
- package/dist/chunking/token-estimator.js +26 -0
- package/dist/chunking/types.d.ts +49 -0
- package/dist/chunking/types.d.ts.map +1 -0
- package/dist/chunking/types.js +26 -0
- package/dist/composition.d.ts +57 -0
- package/dist/composition.d.ts.map +1 -0
- package/dist/composition.js +310 -0
- package/dist/conversation/citation-attacher.d.ts +8 -0
- package/dist/conversation/citation-attacher.d.ts.map +1 -0
- package/dist/conversation/citation-attacher.js +55 -0
- package/dist/conversation/citation-excerpts.d.ts +4 -0
- package/dist/conversation/citation-excerpts.d.ts.map +1 -0
- package/dist/conversation/citation-excerpts.js +41 -0
- package/dist/conversation/grounded-answer-runner.d.ts +9 -0
- package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
- package/dist/conversation/grounded-answer-runner.js +61 -0
- package/dist/conversation/index.d.ts +5 -0
- package/dist/conversation/index.d.ts.map +1 -0
- package/dist/conversation/index.js +7 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
- package/dist/conversation/model-gateway-answer-generator.js +105 -0
- package/dist/conversation/types.d.ts +35 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +24 -0
- package/dist/discovery/discovery-runner.d.ts +23 -0
- package/dist/discovery/discovery-runner.d.ts.map +1 -0
- package/dist/discovery/discovery-runner.js +109 -0
- package/dist/discovery/extract-progressive.d.ts +17 -0
- package/dist/discovery/extract-progressive.d.ts.map +1 -0
- package/dist/discovery/extract-progressive.js +522 -0
- package/dist/discovery/extract.d.ts +26 -0
- package/dist/discovery/extract.d.ts.map +1 -0
- package/dist/discovery/extract.js +906 -0
- package/dist/discovery/glob.d.ts +10 -0
- package/dist/discovery/glob.d.ts.map +1 -0
- package/dist/discovery/glob.js +72 -0
- package/dist/discovery/index.d.ts +6 -0
- package/dist/discovery/index.d.ts.map +1 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/media-type.d.ts +4 -0
- package/dist/discovery/media-type.d.ts.map +1 -0
- package/dist/discovery/media-type.js +62 -0
- package/dist/discovery/persist.d.ts +63 -0
- package/dist/discovery/persist.d.ts.map +1 -0
- package/dist/discovery/persist.js +345 -0
- package/dist/discovery/test-support.d.ts +16 -0
- package/dist/discovery/test-support.d.ts.map +1 -0
- package/dist/discovery/test-support.js +127 -0
- package/dist/discovery/types.d.ts +63 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +28 -0
- package/dist/discovery/walk.d.ts +12 -0
- package/dist/discovery/walk.d.ts.map +1 -0
- package/dist/discovery/walk.js +302 -0
- package/dist/errors.d.ts +13 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +22 -0
- package/dist/evaluations/dimensions.d.ts +14 -0
- package/dist/evaluations/dimensions.d.ts.map +1 -0
- package/dist/evaluations/dimensions.js +191 -0
- package/dist/evaluations/fixtures.d.ts +18 -0
- package/dist/evaluations/fixtures.d.ts.map +1 -0
- package/dist/evaluations/fixtures.js +858 -0
- package/dist/evaluations/index.d.ts +7 -0
- package/dist/evaluations/index.d.ts.map +1 -0
- package/dist/evaluations/index.js +10 -0
- package/dist/evaluations/report.d.ts +3 -0
- package/dist/evaluations/report.d.ts.map +1 -0
- package/dist/evaluations/report.js +31 -0
- package/dist/evaluations/runner-seed.d.ts +12 -0
- package/dist/evaluations/runner-seed.d.ts.map +1 -0
- package/dist/evaluations/runner-seed.js +175 -0
- package/dist/evaluations/runner.d.ts +8 -0
- package/dist/evaluations/runner.d.ts.map +1 -0
- package/dist/evaluations/runner.js +205 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
- package/dist/evaluations/scripted-embedding-adapter.js +163 -0
- package/dist/evaluations/types.d.ts +116 -0
- package/dist/evaluations/types.d.ts.map +1 -0
- package/dist/evaluations/types.js +27 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/indexing/bounded-indexing.d.ts +41 -0
- package/dist/indexing/bounded-indexing.d.ts.map +1 -0
- package/dist/indexing/bounded-indexing.js +240 -0
- package/dist/indexing/checkpoint-persist.d.ts +8 -0
- package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
- package/dist/indexing/checkpoint-persist.js +135 -0
- package/dist/indexing/checkpoint-resume.d.ts +20 -0
- package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
- package/dist/indexing/checkpoint-resume.js +50 -0
- package/dist/indexing/embedding-batcher.d.ts +3 -0
- package/dist/indexing/embedding-batcher.d.ts.map +1 -0
- package/dist/indexing/embedding-batcher.js +390 -0
- package/dist/indexing/index.d.ts +7 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +11 -0
- package/dist/indexing/job-persist.d.ts +46 -0
- package/dist/indexing/job-persist.d.ts.map +1 -0
- package/dist/indexing/job-persist.js +157 -0
- package/dist/indexing/job-resume.d.ts +4 -0
- package/dist/indexing/job-resume.d.ts.map +1 -0
- package/dist/indexing/job-resume.js +14 -0
- package/dist/indexing/orchestrator.d.ts +3 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -0
- package/dist/indexing/orchestrator.js +1151 -0
- package/dist/indexing/types.d.ts +156 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +30 -0
- package/dist/indexing/vector-persist.d.ts +32 -0
- package/dist/indexing/vector-persist.d.ts.map +1 -0
- package/dist/indexing/vector-persist.js +105 -0
- package/dist/parsers/_internal.d.ts +20 -0
- package/dist/parsers/_internal.d.ts.map +1 -0
- package/dist/parsers/_internal.js +122 -0
- package/dist/parsers/csv-parser.d.ts +3 -0
- package/dist/parsers/csv-parser.d.ts.map +1 -0
- package/dist/parsers/csv-parser.js +202 -0
- package/dist/parsers/docx-parser.d.ts +3 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +390 -0
- package/dist/parsers/html-parser.d.ts +3 -0
- package/dist/parsers/html-parser.d.ts.map +1 -0
- package/dist/parsers/html-parser.js +310 -0
- package/dist/parsers/index.d.ts +15 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +41 -0
- package/dist/parsers/json-parser.d.ts +3 -0
- package/dist/parsers/json-parser.d.ts.map +1 -0
- package/dist/parsers/json-parser.js +192 -0
- package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
- package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
- package/dist/parsers/large-document/capability-discovery.js +76 -0
- package/dist/parsers/large-document/diagnostics.d.ts +3 -0
- package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
- package/dist/parsers/large-document/diagnostics.js +11 -0
- package/dist/parsers/large-document/index.d.ts +15 -0
- package/dist/parsers/large-document/index.d.ts.map +1 -0
- package/dist/parsers/large-document/index.js +10 -0
- package/dist/parsers/large-document/legacy-format.d.ts +5 -0
- package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
- package/dist/parsers/large-document/legacy-format.js +25 -0
- package/dist/parsers/large-document/preflight.d.ts +9 -0
- package/dist/parsers/large-document/preflight.d.ts.map +1 -0
- package/dist/parsers/large-document/preflight.js +43 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-extraction.js +123 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-pdf.js +145 -0
- package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
- package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
- package/dist/parsers/large-document/synthetic-source.js +101 -0
- package/dist/parsers/large-document/window-builder.d.ts +24 -0
- package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
- package/dist/parsers/large-document/window-builder.js +75 -0
- package/dist/parsers/ocr/index.d.ts +4 -0
- package/dist/parsers/ocr/index.d.ts.map +1 -0
- package/dist/parsers/ocr/index.js +4 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
- package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
- package/dist/parsers/ocr/types.d.ts +16 -0
- package/dist/parsers/ocr/types.d.ts.map +1 -0
- package/dist/parsers/ocr/types.js +4 -0
- package/dist/parsers/parser-test-fixtures.d.ts +28 -0
- package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
- package/dist/parsers/parser-test-fixtures.js +139 -0
- package/dist/parsers/pdf-parser.d.ts +43 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +388 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +57 -0
- package/dist/parsers/text-parser.d.ts +3 -0
- package/dist/parsers/text-parser.d.ts.map +1 -0
- package/dist/parsers/text-parser.js +214 -0
- package/dist/parsers/types.d.ts +53 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +21 -0
- package/dist/parsers/unsupported-parser.d.ts +4 -0
- package/dist/parsers/unsupported-parser.d.ts.map +1 -0
- package/dist/parsers/unsupported-parser.js +97 -0
- package/dist/parsers/xlsx-parser.d.ts +3 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +425 -0
- package/dist/privacy/audit-emitter.d.ts +5 -0
- package/dist/privacy/audit-emitter.d.ts.map +1 -0
- package/dist/privacy/audit-emitter.js +93 -0
- package/dist/privacy/diagnostic-redactor.d.ts +2 -0
- package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
- package/dist/privacy/diagnostic-redactor.js +153 -0
- package/dist/privacy/index.d.ts +5 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +6 -0
- package/dist/privacy/retention-applier.d.ts +5 -0
- package/dist/privacy/retention-applier.d.ts.map +1 -0
- package/dist/privacy/retention-applier.js +88 -0
- package/dist/privacy/types.d.ts +98 -0
- package/dist/privacy/types.d.ts.map +1 -0
- package/dist/privacy/types.js +12 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
- package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
- package/dist/qualityIntelligence/index.d.ts +3 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +5 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
- package/dist/qualityIntelligence/qiHandoff.js +82 -0
- package/dist/retrieval/answer-grounding.d.ts +9 -0
- package/dist/retrieval/answer-grounding.d.ts.map +1 -0
- package/dist/retrieval/answer-grounding.js +31 -0
- package/dist/retrieval/context-pack-assembler.d.ts +24 -0
- package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
- package/dist/retrieval/context-pack-assembler.js +50 -0
- package/dist/retrieval/index.d.ts +6 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +9 -0
- package/dist/retrieval/retrieval-runner.d.ts +10 -0
- package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
- package/dist/retrieval/retrieval-runner.js +163 -0
- package/dist/retrieval/scoped-vector-search.d.ts +24 -0
- package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
- package/dist/retrieval/scoped-vector-search.js +864 -0
- package/dist/retrieval/types.d.ts +28 -0
- package/dist/retrieval/types.d.ts.map +1 -0
- package/dist/retrieval/types.js +33 -0
- package/dist/section-path-hash.d.ts +3 -0
- package/dist/section-path-hash.d.ts.map +1 -0
- package/dist/section-path-hash.js +9 -0
- package/dist/source-lifecycle.d.ts +14 -0
- package/dist/source-lifecycle.d.ts.map +1 -0
- package/dist/source-lifecycle.js +155 -0
- package/dist/source-routing-validation.d.ts +11 -0
- package/dist/source-routing-validation.d.ts.map +1 -0
- package/dist/source-routing-validation.js +140 -0
- package/dist/store-content-cipher.d.ts +11 -0
- package/dist/store-content-cipher.d.ts.map +1 -0
- package/dist/store-content-cipher.js +67 -0
- package/dist/store-content-encryption.d.ts +12 -0
- package/dist/store-content-encryption.d.ts.map +1 -0
- package/dist/store-content-encryption.js +275 -0
- package/dist/store-paths.d.ts +6 -0
- package/dist/store-paths.d.ts.map +1 -0
- package/dist/store-paths.js +61 -0
- package/dist/store.d.ts +30 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +219 -0
- package/dist/testing.d.ts +47 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +170 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +43 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import type { CapsuleAnswerGroundingPolicy, ChunkId, DocumentId, EmbeddingModelIdentity, KnowledgeCapsuleId, KnowledgeSourceId, ParsedUnit } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { RetrievalNoEvidenceReason, RetrievalReference } from "../retrieval/types.js";
|
|
3
|
+
export interface EvalChunkSpec {
|
|
4
|
+
readonly id: ChunkId;
|
|
5
|
+
readonly text: string;
|
|
6
|
+
readonly topic?: string;
|
|
7
|
+
readonly parsedUnitId?: string;
|
|
8
|
+
}
|
|
9
|
+
export type EvalParsedUnitWithoutDocId = Omit<Extract<ParsedUnit, {
|
|
10
|
+
kind: "page";
|
|
11
|
+
}>, "documentId"> | Omit<Extract<ParsedUnit, {
|
|
12
|
+
kind: "section";
|
|
13
|
+
}>, "documentId"> | Omit<Extract<ParsedUnit, {
|
|
14
|
+
kind: "json-path";
|
|
15
|
+
}>, "documentId"> | Omit<Extract<ParsedUnit, {
|
|
16
|
+
kind: "csv-row";
|
|
17
|
+
}>, "documentId"> | Omit<Extract<ParsedUnit, {
|
|
18
|
+
kind: "html-block";
|
|
19
|
+
}>, "documentId"> | Omit<Extract<ParsedUnit, {
|
|
20
|
+
kind: "unsupported-media";
|
|
21
|
+
}>, "documentId">;
|
|
22
|
+
export interface EvalParsedUnitSpec {
|
|
23
|
+
readonly id: string;
|
|
24
|
+
readonly unit: EvalParsedUnitWithoutDocId;
|
|
25
|
+
}
|
|
26
|
+
export interface EvalDocumentSpec {
|
|
27
|
+
readonly id: DocumentId;
|
|
28
|
+
readonly safeDisplayName: string;
|
|
29
|
+
readonly mediaType?: string;
|
|
30
|
+
readonly parserId?: string;
|
|
31
|
+
readonly parserVersion?: string;
|
|
32
|
+
readonly parsedUnits: readonly EvalParsedUnitSpec[];
|
|
33
|
+
readonly chunks: readonly EvalChunkSpec[];
|
|
34
|
+
}
|
|
35
|
+
export interface EvalSourceSpec {
|
|
36
|
+
readonly id: KnowledgeSourceId;
|
|
37
|
+
readonly documents: readonly EvalDocumentSpec[];
|
|
38
|
+
}
|
|
39
|
+
export interface EvalCapsuleSpec {
|
|
40
|
+
readonly id: KnowledgeCapsuleId;
|
|
41
|
+
readonly displayName: string;
|
|
42
|
+
readonly answerGroundingPolicy: CapsuleAnswerGroundingPolicy;
|
|
43
|
+
readonly embeddingModelIdentity: EmbeddingModelIdentity;
|
|
44
|
+
readonly sources: readonly EvalSourceSpec[];
|
|
45
|
+
}
|
|
46
|
+
export type EvalRetrievalScope = {
|
|
47
|
+
readonly kind: "capsule";
|
|
48
|
+
readonly capsuleId: KnowledgeCapsuleId;
|
|
49
|
+
} | {
|
|
50
|
+
readonly kind: "capsule-set";
|
|
51
|
+
readonly capsuleSetId: string;
|
|
52
|
+
readonly capsuleIds: readonly KnowledgeCapsuleId[];
|
|
53
|
+
};
|
|
54
|
+
export interface RetrievalEvalQuery {
|
|
55
|
+
readonly id: string;
|
|
56
|
+
readonly text: string;
|
|
57
|
+
readonly topic?: string;
|
|
58
|
+
readonly scope: EvalRetrievalScope;
|
|
59
|
+
readonly expectedChunkIds?: readonly ChunkId[];
|
|
60
|
+
readonly expectedNoEvidence?: boolean;
|
|
61
|
+
readonly expectedNoEvidenceReason?: RetrievalNoEvidenceReason;
|
|
62
|
+
readonly topK?: number;
|
|
63
|
+
readonly contextBudgetTokens?: number;
|
|
64
|
+
readonly queryEmbeddingIdentity?: EmbeddingModelIdentity;
|
|
65
|
+
}
|
|
66
|
+
export interface RetrievalEvalFixture {
|
|
67
|
+
readonly id: string;
|
|
68
|
+
readonly description: string;
|
|
69
|
+
readonly capsules: readonly EvalCapsuleSpec[];
|
|
70
|
+
readonly queries: readonly RetrievalEvalQuery[];
|
|
71
|
+
}
|
|
72
|
+
export interface RetrievalEvalDimensionScores {
|
|
73
|
+
readonly recall: number;
|
|
74
|
+
readonly precision: number;
|
|
75
|
+
readonly meanReciprocalRank: number;
|
|
76
|
+
readonly ndcg: number;
|
|
77
|
+
readonly sourceIsolation: number;
|
|
78
|
+
readonly citationQuality: number;
|
|
79
|
+
readonly noEvidenceAccuracy: number;
|
|
80
|
+
readonly contextBudgetFit: number;
|
|
81
|
+
readonly latencyMs: number;
|
|
82
|
+
}
|
|
83
|
+
export interface ModelJudgedRetrievalEvalInput {
|
|
84
|
+
readonly fixtureId: string;
|
|
85
|
+
readonly queryId: string;
|
|
86
|
+
readonly queryText: string;
|
|
87
|
+
readonly references: readonly RetrievalReference[];
|
|
88
|
+
readonly noEvidence: boolean;
|
|
89
|
+
readonly reason?: RetrievalNoEvidenceReason;
|
|
90
|
+
}
|
|
91
|
+
export interface ModelJudgedRetrievalEvalScores {
|
|
92
|
+
readonly groundedness: number;
|
|
93
|
+
readonly faithfulness: number;
|
|
94
|
+
}
|
|
95
|
+
export interface ModelJudgedRetrievalEvalJudge {
|
|
96
|
+
readonly judge: (input: ModelJudgedRetrievalEvalInput) => Promise<ModelJudgedRetrievalEvalScores>;
|
|
97
|
+
}
|
|
98
|
+
export interface RetrievalEvalScorecard {
|
|
99
|
+
readonly fixtureId: string;
|
|
100
|
+
readonly runId: string;
|
|
101
|
+
readonly dimensions: RetrievalEvalDimensionScores;
|
|
102
|
+
readonly passed: boolean;
|
|
103
|
+
readonly modelJudged?: ModelJudgedRetrievalEvalScores;
|
|
104
|
+
}
|
|
105
|
+
export interface RetrievalEvalThresholds {
|
|
106
|
+
readonly recall: number;
|
|
107
|
+
readonly precision: number;
|
|
108
|
+
readonly meanReciprocalRank: number;
|
|
109
|
+
readonly ndcg: number;
|
|
110
|
+
readonly sourceIsolation: number;
|
|
111
|
+
readonly citationQuality: number;
|
|
112
|
+
readonly noEvidenceAccuracy: number;
|
|
113
|
+
readonly contextBudgetFit: number;
|
|
114
|
+
}
|
|
115
|
+
export declare const PASS_THRESHOLDS: RetrievalEvalThresholds;
|
|
116
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/evaluations/types.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EACV,4BAA4B,EAC5B,OAAO,EACP,UAAU,EACV,sBAAsB,EACtB,kBAAkB,EAClB,iBAAiB,EACjB,UAAU,EACX,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EAAE,yBAAyB,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAQ3F,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,EAAE,EAAE,OAAO,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAKtB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAGxB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;CAChC;AAED,MAAM,MAAM,0BAA0B,GAClC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EAAE,YAAY,CAAC,GACzD,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC,EAAE,YAAY,CAAC,GAC5D,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE;IAAE,IAAI,EAAE,WAAW,CAAA;CAAE,CAAC,EAAE,YAAY,CAAC,GAC9D,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE;IAAE,IAAI,EAAE,SAAS,CAAA;CAAE,CAAC,EAAE,YAAY,CAAC,GAC5D,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE;IAAE,IAAI,EAAE,YAAY,CAAA;CAAE,CAAC,EAAE,YAAY,CAAC,GAC/D,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE;IAAE,IAAI,EAAE,mBAAmB,CAAA;CAAE,CAAC,EAAE,YAAY,CAAC,CAAC;AAE3E,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IAGpB,QAAQ,CAAC,IAAI,EAAE,0BAA0B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,EAAE,EAAE,UAAU,CAAC;IACxB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,WAAW,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACpD,QAAQ,CAAC,MAAM,EAAE,SAAS,aAAa,EAAE,CAAC;CAC3C;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,EAAE,EAAE,iBAAiB,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,SAAS,gBAAgB,EAAE,CAAC;CACjD;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,EAAE,EAAE,kBAAkB,CAAC;IAChC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,qBAAqB,EAAE,4BAA4B,CAAC;IAC7D,QAAQ,CAAC,sBAAsB,EAAE,sBAAsB,CAAC;IACxD,QAAQ,CAAC,OAAO,EAAE,SAAS,cAAc,EAAE,CAAC;CAC7C;AAKD,MAAM,MAAM,kBAAkB,GAC1B;IAAE,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC;IAAC,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAA;CAAE,GACpE;IACE,QAAQ,CAAC,IAAI,EAAE,aAAa,CAAC;IAC7B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,UAAU,EAAE,SAAS,kBAAkB,EAAE,CAAC;CACpD,CAAC;AAEN,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAItB,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,KAAK,EAAE,kBAAkB,CAAC;IACnC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,SAAS,OAAO,EAAE,CAAC;IAC/C,QAAQ,CAAC,kBAAkB,CAAC,EAAE,OAAO,CAAC;IACtC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,yBAAyB,CAAC;IAE9D,QAAQ,CAAC,IAAI,CAAC,EAAE,MAAM,CAAC;IAIvB,QAAQ,CAAC,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAItC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,sBAAsB,CAAC;CAC1D;AAED,MAAM,WAAW,oBAAoB;IACnC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,QAAQ,EAAE,SAAS,eAAe,EAAE,CAAC;IAC9C,QAAQ,CAAC,OAAO,EAAE,SAAS,kBAAkB,EAAE,CAAC;CACjD;AAOD,MAAM,WAAW,4BAA4B;IAC3C,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;IAGlC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,6BAA6B;IAC5C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACnD,QAAQ,CAAC,UAAU,EAAE,OAAO,CAAC;IAC7B,QAAQ,CAAC,MAAM,CAAC,EAAE,yBAAyB,CAAC;CAC7C;AAED,MAAM,WAAW,8BAA8B;IAC7C,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;CAC/B;AAED,MAAM,WAAW,6BAA6B;IAC5C,QAAQ,CAAC,KAAK,EAAE,CAAC,KAAK,EAAE,6BAA6B,KAAK,OAAO,CAAC,8BAA8B,CAAC,CAAC;CACnG;AAED,MAAM,WAAW,sBAAsB;IACrC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,4BAA4B,CAAC;IAClD,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,QAAQ,CAAC,WAAW,CAAC,EAAE,8BAA8B,CAAC;CACvD;AAOD,MAAM,WAAW,uBAAuB;IACtC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,eAAe,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;CACnC;AAED,eAAO,MAAM,eAAe,EAAE,uBAS5B,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// Type contracts for the retrieval evaluation harness (Epic #189, Issue #268). The
|
|
2
|
+
// harness is OFFLINE only — it composes #199 retrieval through a scripted (deterministic)
|
|
3
|
+
// embedding adapter so every scorecard is byte-identical across runs. That determinism is
|
|
4
|
+
// load-bearing for the audit ledger (#10): a non-deterministic eval result would force
|
|
5
|
+
// the manifest to encode timestamps or random salts and break the cross-machine equality
|
|
6
|
+
// check that the verification matrix relies on.
|
|
7
|
+
//
|
|
8
|
+
// `RetrievalEvalFixture` is the executable contract a fixture must satisfy: enough capsule
|
|
9
|
+
// + source + document + parsed-unit + chunk specs to materialise an in-memory store, plus
|
|
10
|
+
// a list of queries with ground truth (`expectedChunkIds`) or an explicit no-evidence flag.
|
|
11
|
+
//
|
|
12
|
+
// `RetrievalEvalScorecard` is the immutable result the runner returns. The quality
|
|
13
|
+
// dimensions are each in `[0, 1]`; latency is reported separately as a deterministic
|
|
14
|
+
// synthetic-millisecond total unless the caller overrides `deps.now`. `passed` is the
|
|
15
|
+
// conjunction of per-dimension thresholds in `PASS_THRESHOLDS` — exposing the constant
|
|
16
|
+
// lets a UI display a "X met 5/6 thresholds" breakdown without re-implementing the
|
|
17
|
+
// comparison.
|
|
18
|
+
export const PASS_THRESHOLDS = Object.freeze({
|
|
19
|
+
recall: 0.9,
|
|
20
|
+
precision: 0.8,
|
|
21
|
+
meanReciprocalRank: 0.9,
|
|
22
|
+
ndcg: 0.9,
|
|
23
|
+
sourceIsolation: 1.0,
|
|
24
|
+
citationQuality: 0.9,
|
|
25
|
+
noEvidenceAccuracy: 1.0,
|
|
26
|
+
contextBudgetFit: 1.0,
|
|
27
|
+
});
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export { KEIKO_LOCAL_KNOWLEDGE_VERSION } from "./version.js";
|
|
2
|
+
export { KnowledgeStoreError, KnowledgePathError, KnowledgeNotFoundError } from "./errors.js";
|
|
3
|
+
export { resolveKnowledgeStorePath, type ResolveKnowledgeStorePathOptions } from "./store-paths.js";
|
|
4
|
+
export { openKnowledgeStore, type KnowledgeStoreKeyProvider, type KnowledgeStoreKeyProviderContext, type KnowledgeStoreProtectionOptions, type KnowledgeStore, type OpenKnowledgeStoreOptions, } from "./store.js";
|
|
5
|
+
export { createCapsule, deleteCapsule, getCapsule, listCapsules, updateCapsuleState, updateCapsuleDetails, type CapsuleDetailsPatch, type CreateCapsuleInput, } from "./capsule-lifecycle.js";
|
|
6
|
+
export { addSourceToCapsule, listCapsuleSources, removeSourceFromCapsule, type AddCapsuleSourceInput, } from "./source-lifecycle.js";
|
|
7
|
+
export { createCapsuleSet, deleteCapsuleSet, getCapsuleSet, listCapsuleSets, type CreateCapsuleSetInput, } from "./capsule-set-lifecycle.js";
|
|
8
|
+
export { DEFAULT_MAX_BYTES, DEFAULT_MAX_NESTING_DEPTH, DEFAULT_MAX_OBJECTS, DEFAULT_MAX_UNITS, DEFAULT_TIMEOUT_MS, PARSER_ERROR_CODES, buildParserOptions, createDefaultParserRegistry, createParserRegistry, csvParser, docxParser, htmlParser, jsonParser, pdfParser, registerParser, resolveParser, textParser, unsupportedParser, xlsxParser, type AsyncParserAdapter, type ParserAdapter, type ParserCapability, type ParserErrorCode, type ParserOptions, type ParserRegistry, type ParserResolution, type ParserSelectionInput, } from "./parsers/index.js";
|
|
9
|
+
export { createOcrPipelineParser, nullOcrAdapter, type OcrAdapter, type OcrPageResult, type OcrPipelineAdapter, } from "./parsers/ocr/index.js";
|
|
10
|
+
export { extractBoundedDocumentText, type BoundedDocumentExtractionInput, type BoundedDocumentExtractionOptions, type BoundedDocumentExtractionOutcome, type BoundedDocumentExtractionResult, type BoundedDocumentFormat, } from "./bounded-document-extraction.js";
|
|
11
|
+
export { DEFAULT_DISCOVERY_OPTIONS, discoverAndExtract, documentIdFor, extensionOf, extractDocument, mediaTypeFor, walkSource, type DiscoverAndExtractDeps, type DiscoverAndExtractParams, type DiscoveredFile, type DiscoveryError, type DiscoveryErrorCode, type DiscoveryOptions, type ExtractDocumentDeps, type ExtractDocumentParams, type ExtractionEvent, type ExtractionOutcome, type ExtractionResult, type WalkYield, } from "./discovery/index.js";
|
|
12
|
+
export * from "./indexing/index.js";
|
|
13
|
+
export * from "./retrieval/index.js";
|
|
14
|
+
export * from "./evaluations/index.js";
|
|
15
|
+
export * from "./conversation/index.js";
|
|
16
|
+
export { readCitationExcerpt } from "./conversation/citation-excerpts.js";
|
|
17
|
+
export * from "./privacy/index.js";
|
|
18
|
+
export { addSourcesToCapsule, buildComposedRetrievalScope, composeCapsules, CompositionError, describeRetrievalScope, listCapsuleMembershipChanges, } from "./composition.js";
|
|
19
|
+
export type { AddSourcesToCapsuleResult, CapsuleMembershipChange, CapsuleMembershipChangeKind, ComposedRetrievalScope, ComposeCapsulesOptions, CompositionErrorCode, RetrievalCapsuleSummary, RetrievalScopeDisclosure, RetrievalSourceSummary, } from "./composition.js";
|
|
20
|
+
export { SourceRoutingValidationError, validateAlwaysQuery, validateGlobPatterns, validateRoutingInstructionsScope, validateSourceRoutingForCapsule, } from "./source-routing-validation.js";
|
|
21
|
+
export type { SourceRoutingValidationCode } from "./source-routing-validation.js";
|
|
22
|
+
export * as QualityIntelligenceHandoff from "./qualityIntelligence/index.js";
|
|
23
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAcA,OAAO,EAAE,6BAA6B,EAAE,MAAM,cAAc,CAAC;AAC7D,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAC9F,OAAO,EAAE,yBAAyB,EAAE,KAAK,gCAAgC,EAAE,MAAM,kBAAkB,CAAC;AACpG,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,EAC9B,KAAK,gCAAgC,EACrC,KAAK,+BAA+B,EACpC,KAAK,cAAc,EACnB,KAAK,yBAAyB,GAC/B,MAAM,YAAY,CAAC;AACpB,OAAO,EACL,aAAa,EACb,aAAa,EACb,UAAU,EACV,YAAY,EACZ,kBAAkB,EAClB,oBAAoB,EACpB,KAAK,mBAAmB,EACxB,KAAK,kBAAkB,GACxB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EACL,kBAAkB,EAClB,kBAAkB,EAClB,uBAAuB,EACvB,KAAK,qBAAqB,GAC3B,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EACL,gBAAgB,EAChB,gBAAgB,EAChB,aAAa,EACb,eAAe,EACf,KAAK,qBAAqB,GAC3B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,EACL,iBAAiB,EACjB,yBAAyB,EACzB,mBAAmB,EACnB,iBAAiB,EACjB,kBAAkB,EAClB,kBAAkB,EAClB,kBAAkB,EAClB,2BAA2B,EAC3B,oBAAoB,EACpB,SAAS,EACT,UAAU,EACV,UAAU,EACV,UAAU,EACV,SAAS,EACT,cAAc,EACd,aAAa,EACb,UAAU,EACV,iBAAiB,EACjB,UAAU,EACV,KAAK,kBAAkB,EACvB,KAAK,aAAa,EAClB,KAAK,gBAAgB,EACrB,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,oBAAoB,GAC1B,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EACL,uBAAuB,EACvB,cAAc,EACd,KAAK,UAAU,EACf,KAAK,aAAa,EAClB,KAAK,kBAAkB,GACxB,MAAM,wBAAwB,CAAC;AAIhC,OAAO,EACL,0BAA0B,EAC1B,KAAK,8BAA8B,EACnC,KAAK,gCAAgC,EACrC,KAAK,gCAAgC,EACrC,KAAK,+BAA+B,EACpC,KAAK,qBAAqB,GAC3B,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EACL,yBAAyB,EACzB,kBAAkB,EAClB,aAAa,EACb,WAAW,EACX,eAAe,EACf,YAAY,EACZ,UAAU,EACV,KAAK,sBAAsB,EAC3B,KAAK,wBAAwB,EAC7B,KAAK,cAAc,EACnB,KAAK,cAAc,EACnB,KAAK,kBAAkB,EACvB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,qBAAqB,EAC1B,KAAK,eAAe,EACpB,KAAK,iBAAiB,EACtB,KAAK,gBAAgB,EACrB,KAAK,SAAS,GACf,MAAM,sBAAsB,CAAC;AAC9B,cAAc,qBAAqB,CAAC;AACpC,cAAc,sBAAsB,CAAC;AACrC,cAAc,wBAAwB,CAAC;AACvC,cAAc,yBAAyB,CAAC;AACxC,OAAO,EAAE,mBAAmB,EAAE,MAAM,qCAAqC,CAAC;AAC1E,cAAc,oBAAoB,CAAC;AAEnC,OAAO,EACL,mBAAmB,EACnB,2BAA2B,EAC3B,eAAe,EACf,gBAAgB,EAChB,sBAAsB,EACtB,4BAA4B,GAC7B,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,yBAAyB,EACzB,uBAAuB,EACvB,2BAA2B,EAC3B,sBAAsB,EACtB,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,wBAAwB,EACxB,sBAAsB,GACvB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EACL,4BAA4B,EAC5B,mBAAmB,EACnB,oBAAoB,EACpB,gCAAgC,EAChC,+BAA+B,GAChC,MAAM,gCAAgC,CAAC;AACxC,YAAY,EAAE,2BAA2B,EAAE,MAAM,gCAAgC,CAAC;AAMlF,OAAO,KAAK,0BAA0B,MAAM,gCAAgC,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// Public surface of @oscharko-dev/keiko-local-knowledge (Epic #189, Issues #193, #263, #266,
|
|
2
|
+
// #194, #196, #199). Composes the #265 schema with a node:sqlite runtime, exposes typed
|
|
3
|
+
// CRUD for capsules/sources/sets, the parser registry (#266), the discovery +
|
|
4
|
+
// extraction bridge (#194), the indexing orchestrator (#196), and the retrieval
|
|
5
|
+
// orchestrator (#199) that turns a `ComposedRetrievalScope` + query into a ranked list
|
|
6
|
+
// of `RetrievalReference` + a `LocalKnowledgeGroundedContextPack`.
|
|
7
|
+
//
|
|
8
|
+
// The HTTP / UI wiring is OUT OF SCOPE for this package (lands in #200 Conversation
|
|
9
|
+
// Center integration). ADR-0019 direction rule 3e allows this package to depend on
|
|
10
|
+
// `@oscharko-dev/keiko-contracts`, `@oscharko-dev/keiko-workspace`, and
|
|
11
|
+
// `@oscharko-dev/keiko-model-gateway` — the model-gateway dep was added at #196 so the
|
|
12
|
+
// indexing + retrieval layers can call the OpenAI-compatible embeddings adapter
|
|
13
|
+
// through the same boundary the rest of the codebase uses.
|
|
14
|
+
export { KEIKO_LOCAL_KNOWLEDGE_VERSION } from "./version.js";
|
|
15
|
+
export { KnowledgeStoreError, KnowledgePathError, KnowledgeNotFoundError } from "./errors.js";
|
|
16
|
+
export { resolveKnowledgeStorePath } from "./store-paths.js";
|
|
17
|
+
export { openKnowledgeStore, } from "./store.js";
|
|
18
|
+
export { createCapsule, deleteCapsule, getCapsule, listCapsules, updateCapsuleState, updateCapsuleDetails, } from "./capsule-lifecycle.js";
|
|
19
|
+
export { addSourceToCapsule, listCapsuleSources, removeSourceFromCapsule, } from "./source-lifecycle.js";
|
|
20
|
+
export { createCapsuleSet, deleteCapsuleSet, getCapsuleSet, listCapsuleSets, } from "./capsule-set-lifecycle.js";
|
|
21
|
+
export { DEFAULT_MAX_BYTES, DEFAULT_MAX_NESTING_DEPTH, DEFAULT_MAX_OBJECTS, DEFAULT_MAX_UNITS, DEFAULT_TIMEOUT_MS, PARSER_ERROR_CODES, buildParserOptions, createDefaultParserRegistry, createParserRegistry, csvParser, docxParser, htmlParser, jsonParser, pdfParser, registerParser, resolveParser, textParser, unsupportedParser, xlsxParser, } from "./parsers/index.js";
|
|
22
|
+
// OCR adapter seam (Issue #202).
|
|
23
|
+
export { createOcrPipelineParser, nullOcrAdapter, } from "./parsers/ocr/index.js";
|
|
24
|
+
// Bounded request-local small-document text extraction for Repository Search (Issue #1285).
|
|
25
|
+
// Reuses the shipped DOCX/XLSX/PDF parser adapters as a pure, byte-capped text projection.
|
|
26
|
+
export { extractBoundedDocumentText, } from "./bounded-document-extraction.js";
|
|
27
|
+
export { DEFAULT_DISCOVERY_OPTIONS, discoverAndExtract, documentIdFor, extensionOf, extractDocument, mediaTypeFor, walkSource, } from "./discovery/index.js";
|
|
28
|
+
export * from "./indexing/index.js";
|
|
29
|
+
export * from "./retrieval/index.js";
|
|
30
|
+
export * from "./evaluations/index.js";
|
|
31
|
+
export * from "./conversation/index.js";
|
|
32
|
+
export { readCitationExcerpt } from "./conversation/citation-excerpts.js";
|
|
33
|
+
export * from "./privacy/index.js";
|
|
34
|
+
// Slice 4 (Issue #189) — non-destructive capsule-set composition exposed to the BFF.
|
|
35
|
+
export { addSourcesToCapsule, buildComposedRetrievalScope, composeCapsules, CompositionError, describeRetrievalScope, listCapsuleMembershipChanges, } from "./composition.js";
|
|
36
|
+
export { SourceRoutingValidationError, validateAlwaysQuery, validateGlobPatterns, validateRoutingInstructionsScope, validateSourceRoutingForCapsule, } from "./source-routing-validation.js";
|
|
37
|
+
// ─── Quality Intelligence handoff (Issue #278) ─────────────────────────────────
|
|
38
|
+
// Pure adapter that converts a local-knowledge RetrievalReference list into a list of
|
|
39
|
+
// `QualityIntelligenceLocalKnowledgeCapsuleEnvelope` instances for QI ingestion. No
|
|
40
|
+
// new retrieval logic; consumes only existing local-knowledge / contract types.
|
|
41
|
+
export * as QualityIntelligenceHandoff from "./qualityIntelligence/index.js";
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import type { ChunkId, DocumentId, EmbeddingModelIdentity, IndexingJobError, KnowledgeCapsuleId, KnowledgeSourceId, LargeDocumentResourcePolicy } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { OpenAIEmbeddingAdapter } from "@oscharko-dev/keiko-model-gateway";
|
|
3
|
+
import type { ChunkingOptions } from "../chunking/types.js";
|
|
4
|
+
import type { KnowledgeStore } from "../store.js";
|
|
5
|
+
export interface BoundedChunkParams {
|
|
6
|
+
readonly capsuleId: KnowledgeCapsuleId;
|
|
7
|
+
readonly sourceId: KnowledgeSourceId;
|
|
8
|
+
readonly documentId: DocumentId;
|
|
9
|
+
}
|
|
10
|
+
export declare class BoundedIndexingCancelledError extends Error {
|
|
11
|
+
constructor(message?: string);
|
|
12
|
+
}
|
|
13
|
+
export declare class BoundedIndexingPolicyError extends Error {
|
|
14
|
+
readonly code = "RESOURCE_POLICY_EXCEEDED";
|
|
15
|
+
constructor(message: string);
|
|
16
|
+
toIndexingError(): IndexingJobError;
|
|
17
|
+
}
|
|
18
|
+
export declare function chunkDocumentBounded(store: KnowledgeStore, params: BoundedChunkParams, options: ChunkingOptions | undefined, signal: AbortSignal | undefined, policy?: LargeDocumentResourcePolicy): number;
|
|
19
|
+
export interface BoundedEmbedDeps {
|
|
20
|
+
readonly store: KnowledgeStore;
|
|
21
|
+
readonly capsuleId: KnowledgeCapsuleId;
|
|
22
|
+
readonly documentId: DocumentId;
|
|
23
|
+
readonly adapter: OpenAIEmbeddingAdapter;
|
|
24
|
+
readonly identity: EmbeddingModelIdentity;
|
|
25
|
+
readonly batchSize: number;
|
|
26
|
+
readonly concurrency: number;
|
|
27
|
+
readonly now: () => number;
|
|
28
|
+
readonly idSource: () => string;
|
|
29
|
+
readonly signal?: AbortSignal;
|
|
30
|
+
readonly policy?: LargeDocumentResourcePolicy;
|
|
31
|
+
readonly onBatch?: (cursor: number, lastChunkId: ChunkId) => void;
|
|
32
|
+
}
|
|
33
|
+
export interface BoundedEmbedResult {
|
|
34
|
+
readonly vectorCount: number;
|
|
35
|
+
readonly errors: readonly IndexingJobError[];
|
|
36
|
+
readonly lastChunkId: ChunkId | null;
|
|
37
|
+
readonly embeddedCursor: number;
|
|
38
|
+
readonly cancelled: boolean;
|
|
39
|
+
}
|
|
40
|
+
export declare function embedDocumentChunksBounded(deps: BoundedEmbedDeps): Promise<BoundedEmbedResult>;
|
|
41
|
+
//# sourceMappingURL=bounded-indexing.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bounded-indexing.d.ts","sourceRoot":"","sources":["../../src/indexing/bounded-indexing.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EACV,OAAO,EACP,UAAU,EACV,sBAAsB,EACtB,gBAAgB,EAChB,kBAAkB,EAClB,iBAAiB,EACjB,2BAA2B,EAE5B,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,mCAAmC,CAAC;AAQhF,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAQ5D,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAKlD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAC;IACvC,QAAQ,CAAC,QAAQ,EAAE,iBAAiB,CAAC;IACrC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;CACjC;AAED,qBAAa,6BAA8B,SAAQ,KAAK;gBACnC,OAAO,SAA+B;CAI1D;AAED,qBAAa,0BAA2B,SAAQ,KAAK;IACnD,SAAgB,IAAI,8BAA8B;gBAC/B,OAAO,EAAE,MAAM;IAK3B,eAAe,IAAI,gBAAgB;CAG3C;AA4FD,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,cAAc,EACrB,MAAM,EAAE,kBAAkB,EAC1B,OAAO,EAAE,eAAe,GAAG,SAAS,EACpC,MAAM,EAAE,WAAW,GAAG,SAAS,EAC/B,MAAM,CAAC,EAAE,2BAA2B,GACnC,MAAM,CA6BR;AAwCD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,KAAK,EAAE,cAAc,CAAC;IAC/B,QAAQ,CAAC,SAAS,EAAE,kBAAkB,CAAC;IACvC,QAAQ,CAAC,UAAU,EAAE,UAAU,CAAC;IAChC,QAAQ,CAAC,OAAO,EAAE,sBAAsB,CAAC;IACzC,QAAQ,CAAC,QAAQ,EAAE,sBAAsB,CAAC;IAC1C,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,GAAG,EAAE,MAAM,MAAM,CAAC;IAC3B,QAAQ,CAAC,QAAQ,EAAE,MAAM,MAAM,CAAC;IAChC,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,MAAM,CAAC,EAAE,2BAA2B,CAAC;IAG9C,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,MAAM,EAAE,SAAS,gBAAgB,EAAE,CAAC;IAC7C,QAAQ,CAAC,WAAW,EAAE,OAAO,GAAG,IAAI,CAAC;IACrC,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC;CAC7B;AAqHD,wBAAsB,0BAA0B,CAC9C,IAAI,EAAE,gBAAgB,GACrB,OAAO,CAAC,kBAAkB,CAAC,CAmB7B"}
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// Bounded chunking + embedding for progressively-extracted large documents (Epic #1160,
|
|
2
|
+
// Issue #1286).
|
|
3
|
+
//
|
|
4
|
+
// The standard chunk/embed path loads the whole document text into a JS string and slices each
|
|
5
|
+
// chunk from it. For a large document that defeats the bounded-memory guarantee. These helpers read
|
|
6
|
+
// each parsed unit's / chunk's text back through the unified `readDocumentTextSpan` (SQLite SUBSTR
|
|
7
|
+
// over the bounded document_text_windows rows), so peak memory stays O(window/batch) regardless of
|
|
8
|
+
// document size. The chunk ids, offsets, token counts, and excerpt hashes are byte-identical to the
|
|
9
|
+
// standard chunker, so the bounded path and the standard path produce the same retrievable index.
|
|
10
|
+
import { chunkParsedUnit, chunkingStrategyKey, resolveChunkingOptions, } from "../chunking/chunker.js";
|
|
11
|
+
import { composeChunkId, rowToParsedUnit } from "../chunking/chunker-runner.js";
|
|
12
|
+
import { deleteChunksForDocument, insertChunkRow, selectParsedUnitsForDocument, } from "../chunking/chunker-persist.js";
|
|
13
|
+
import { readDocumentTextSpan } from "../discovery/persist.js";
|
|
14
|
+
import { embedChunkBatch } from "./embedding-batcher.js";
|
|
15
|
+
export class BoundedIndexingCancelledError extends Error {
|
|
16
|
+
constructor(message = "bounded indexing cancelled") {
|
|
17
|
+
super(message);
|
|
18
|
+
this.name = "BoundedIndexingCancelledError";
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
export class BoundedIndexingPolicyError extends Error {
|
|
22
|
+
code = "RESOURCE_POLICY_EXCEEDED";
|
|
23
|
+
constructor(message) {
|
|
24
|
+
super(message);
|
|
25
|
+
this.name = "BoundedIndexingPolicyError";
|
|
26
|
+
}
|
|
27
|
+
toIndexingError() {
|
|
28
|
+
return { code: this.code, message: this.message };
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
function rebaseUnit(unit, length) {
|
|
32
|
+
if (unit.kind === "unsupported-media")
|
|
33
|
+
return unit;
|
|
34
|
+
return { ...unit, characterStart: 0, characterEnd: length };
|
|
35
|
+
}
|
|
36
|
+
function boundedMaxChunks(options, policy) {
|
|
37
|
+
return Math.min(resolveChunkingOptions(options).maxChunks, policy?.maxChunkCount ?? Number.POSITIVE_INFINITY);
|
|
38
|
+
}
|
|
39
|
+
function assertChunkingCanContinue(signal, orderIndex, maxChunks) {
|
|
40
|
+
if (signal?.aborted === true)
|
|
41
|
+
throw new BoundedIndexingCancelledError();
|
|
42
|
+
if (orderIndex >= maxChunks) {
|
|
43
|
+
throw new BoundedIndexingPolicyError("large-document chunk count exceeded the configured resource policy");
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
function assertChunkingHasRemainder(orderIndex, rowIndex, rowCount, maxChunks) {
|
|
47
|
+
if (orderIndex < maxChunks || rowIndex >= rowCount - 1)
|
|
48
|
+
return;
|
|
49
|
+
throw new BoundedIndexingPolicyError("large-document chunk count exceeded the configured resource policy");
|
|
50
|
+
}
|
|
51
|
+
// Chunks one parsed unit (read via SUBSTR, rebased to window-local offsets) and persists its chunks
|
|
52
|
+
// with document-relative offsets. Returns the next order index.
|
|
53
|
+
function chunkOneUnit(ctx, row, orderIndex) {
|
|
54
|
+
const { db, cipher, params } = ctx;
|
|
55
|
+
const start = row.character_start ?? 0;
|
|
56
|
+
const end = row.character_end ?? start;
|
|
57
|
+
const unitText = readDocumentTextSpan(db, cipher, params.capsuleId, params.documentId, start, end) ?? "";
|
|
58
|
+
const rebased = rebaseUnit(rowToParsedUnit(row, params.documentId, cipher), unitText.length);
|
|
59
|
+
const chunks = chunkParsedUnit(rebased, unitText, optionsWithBudget(ctx.options, ctx.maxChunks - orderIndex));
|
|
60
|
+
let next = orderIndex;
|
|
61
|
+
for (const chunk of chunks) {
|
|
62
|
+
insertChunkRow(db, {
|
|
63
|
+
id: composeChunkId(params.documentId, row.id, next),
|
|
64
|
+
capsuleId: params.capsuleId,
|
|
65
|
+
sourceId: params.sourceId,
|
|
66
|
+
documentId: params.documentId,
|
|
67
|
+
parsedUnitId: row.id,
|
|
68
|
+
orderIndex: next,
|
|
69
|
+
tokenCount: chunk.tokenCount,
|
|
70
|
+
safeExcerptHash: chunk.safeExcerptHash,
|
|
71
|
+
chunkingStrategyVersion: ctx.strategyKey,
|
|
72
|
+
characterStart: start + chunk.characterStart,
|
|
73
|
+
characterEnd: start + chunk.characterEnd,
|
|
74
|
+
});
|
|
75
|
+
next += 1;
|
|
76
|
+
}
|
|
77
|
+
return next;
|
|
78
|
+
}
|
|
79
|
+
// Bounded re-implementation of the chunker: reads each parsed unit's text via SUBSTR, chunks it with
|
|
80
|
+
// window-relative offsets, then rebases the chunk offsets back to document-relative before persist,
|
|
81
|
+
// so the rows are byte-identical to chunkDocument run over the full text. Returns the chunk count.
|
|
82
|
+
export function chunkDocumentBounded(store, params, options, signal, policy) {
|
|
83
|
+
const db = store._internal.db;
|
|
84
|
+
const rows = selectParsedUnitsForDocument(db, params.capsuleId, params.documentId);
|
|
85
|
+
if (rows.length === 0)
|
|
86
|
+
return 0;
|
|
87
|
+
const ctx = {
|
|
88
|
+
db,
|
|
89
|
+
cipher: store._internal.contentCipher,
|
|
90
|
+
params,
|
|
91
|
+
options,
|
|
92
|
+
strategyKey: chunkingStrategyKey(options),
|
|
93
|
+
maxChunks: boundedMaxChunks(options, policy),
|
|
94
|
+
};
|
|
95
|
+
db.exec("BEGIN");
|
|
96
|
+
try {
|
|
97
|
+
deleteChunksForDocument(db, params.capsuleId, params.documentId);
|
|
98
|
+
let orderIndex = 0;
|
|
99
|
+
for (let rowIndex = 0; rowIndex < rows.length; rowIndex += 1) {
|
|
100
|
+
const row = rows[rowIndex];
|
|
101
|
+
if (row === undefined)
|
|
102
|
+
continue;
|
|
103
|
+
assertChunkingCanContinue(signal, orderIndex, ctx.maxChunks);
|
|
104
|
+
orderIndex = chunkOneUnit(ctx, row, orderIndex);
|
|
105
|
+
assertChunkingHasRemainder(orderIndex, rowIndex, rows.length, ctx.maxChunks);
|
|
106
|
+
}
|
|
107
|
+
db.exec("COMMIT");
|
|
108
|
+
return orderIndex;
|
|
109
|
+
}
|
|
110
|
+
catch (cause) {
|
|
111
|
+
db.exec("ROLLBACK");
|
|
112
|
+
throw cause;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
function optionsWithBudget(options, budget) {
|
|
116
|
+
return { ...(options ?? {}), maxChunks: budget };
|
|
117
|
+
}
|
|
118
|
+
// Selects the next batch of chunks that do NOT yet have a vector. The missing-vector gate is the
|
|
119
|
+
// resume mechanism: it self-heals across an interrupted embed, an externally deleted vector, and an
|
|
120
|
+
// incompatible-checkpoint restart (which deletes vectors first), without trusting a fragile cursor.
|
|
121
|
+
const SELECT_BOUNDED_CHUNKS_SQL = [
|
|
122
|
+
"SELECT c.id, c.source_id, c.order_index,",
|
|
123
|
+
" COALESCE(c.character_start, pu.character_start) AS char_start,",
|
|
124
|
+
" COALESCE(c.character_end, pu.character_end) AS char_end",
|
|
125
|
+
"FROM chunks AS c",
|
|
126
|
+
"JOIN parsed_units AS pu ON pu.capsule_id = c.capsule_id AND pu.id = c.parsed_unit_id",
|
|
127
|
+
"LEFT JOIN vectors AS v ON v.capsule_id = c.capsule_id AND v.chunk_id = c.id",
|
|
128
|
+
"WHERE c.capsule_id = :c AND c.document_id = :d AND v.id IS NULL",
|
|
129
|
+
"ORDER BY c.order_index ASC",
|
|
130
|
+
"LIMIT :limit",
|
|
131
|
+
].join(" ");
|
|
132
|
+
const COUNT_VECTORS_SQL = "SELECT COUNT(*) AS n FROM vectors WHERE capsule_id = :c AND document_id = :d";
|
|
133
|
+
function embeddedChunkCount(db, deps) {
|
|
134
|
+
const row = db
|
|
135
|
+
.prepare(COUNT_VECTORS_SQL)
|
|
136
|
+
.get({ c: String(deps.capsuleId), d: String(deps.documentId) });
|
|
137
|
+
return row.n;
|
|
138
|
+
}
|
|
139
|
+
function nextBatch(db, deps) {
|
|
140
|
+
return db.prepare(SELECT_BOUNDED_CHUNKS_SQL).all({
|
|
141
|
+
c: String(deps.capsuleId),
|
|
142
|
+
d: String(deps.documentId),
|
|
143
|
+
limit: deps.batchSize,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
function toChunkToEmbed(deps, row) {
|
|
147
|
+
const text = readDocumentTextSpan(deps.store._internal.db, deps.store._internal.contentCipher, deps.capsuleId, deps.documentId, row.char_start ?? 0, row.char_end ?? 0) ?? "";
|
|
148
|
+
return {
|
|
149
|
+
id: row.id,
|
|
150
|
+
capsuleId: deps.capsuleId,
|
|
151
|
+
sourceId: row.source_id,
|
|
152
|
+
documentId: deps.documentId,
|
|
153
|
+
text,
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
function embedOptions(deps) {
|
|
157
|
+
return {
|
|
158
|
+
adapter: deps.adapter,
|
|
159
|
+
store: deps.store,
|
|
160
|
+
pinnedIdentity: deps.identity,
|
|
161
|
+
concurrency: deps.concurrency,
|
|
162
|
+
...(deps.signal !== undefined ? { signal: deps.signal } : {}),
|
|
163
|
+
now: deps.now,
|
|
164
|
+
idSource: deps.idSource,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
// Embeds one batch, updates the accumulator, and advances the durable checkpoint. Returns false to
|
|
168
|
+
// stop the loop (the batch reported an error).
|
|
169
|
+
async function embedOneBatch(db, deps, rows, acc) {
|
|
170
|
+
const batch = rows.map((row) => toChunkToEmbed(deps, row));
|
|
171
|
+
const result = await embedChunkBatch(batch, embedOptions(deps));
|
|
172
|
+
acc.vectorCount += result.vectors.length;
|
|
173
|
+
acc.errors.push(...result.errors);
|
|
174
|
+
const lastVector = result.vectors[result.vectors.length - 1];
|
|
175
|
+
if (lastVector !== undefined)
|
|
176
|
+
acc.lastChunkId = lastVector.chunkId;
|
|
177
|
+
if (acc.lastChunkId !== null)
|
|
178
|
+
deps.onBatch?.(embeddedChunkCount(db, deps), acc.lastChunkId);
|
|
179
|
+
return result.errors.length === 0;
|
|
180
|
+
}
|
|
181
|
+
function markCancelled(acc) {
|
|
182
|
+
acc.cancelled = true;
|
|
183
|
+
}
|
|
184
|
+
function appendBatchPolicyError(acc) {
|
|
185
|
+
acc.errors.push({
|
|
186
|
+
code: "RESOURCE_POLICY_EXCEEDED",
|
|
187
|
+
message: "large-document embedding batch count exceeded the configured resource policy",
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
function batchPolicyExceeded(deps, batchCount) {
|
|
191
|
+
return batchCount >= (deps.policy?.maxEmbeddingBatchCount ?? Number.POSITIVE_INFINITY);
|
|
192
|
+
}
|
|
193
|
+
function sawCancellationError(acc) {
|
|
194
|
+
return acc.errors.some((error) => error.code === "CANCELLED");
|
|
195
|
+
}
|
|
196
|
+
async function embedLoopStep(db, deps, acc, loop) {
|
|
197
|
+
if (deps.signal?.aborted === true) {
|
|
198
|
+
markCancelled(acc);
|
|
199
|
+
return "stop";
|
|
200
|
+
}
|
|
201
|
+
const rows = nextBatch(db, deps);
|
|
202
|
+
const firstRow = rows[0];
|
|
203
|
+
if (firstRow === undefined || firstRow.id === loop.previousFirstId)
|
|
204
|
+
return "stop";
|
|
205
|
+
if (batchPolicyExceeded(deps, loop.batchCount)) {
|
|
206
|
+
appendBatchPolicyError(acc);
|
|
207
|
+
return "stop";
|
|
208
|
+
}
|
|
209
|
+
loop.batchCount += 1;
|
|
210
|
+
loop.previousFirstId = firstRow.id;
|
|
211
|
+
if (await embedOneBatch(db, deps, rows, acc))
|
|
212
|
+
return "continue";
|
|
213
|
+
if (sawCancellationError(acc))
|
|
214
|
+
markCancelled(acc);
|
|
215
|
+
return "stop";
|
|
216
|
+
}
|
|
217
|
+
// Embeds a document's not-yet-embedded chunks in bounded batches, reading each chunk's text via
|
|
218
|
+
// SUBSTR. Never materializes the whole document text or the whole chunk set, so peak memory stays
|
|
219
|
+
// O(batch) regardless of document size. Resume falls out of the missing-vector gate.
|
|
220
|
+
export async function embedDocumentChunksBounded(deps) {
|
|
221
|
+
const db = deps.store._internal.db;
|
|
222
|
+
const acc = {
|
|
223
|
+
vectorCount: 0,
|
|
224
|
+
errors: [],
|
|
225
|
+
lastChunkId: null,
|
|
226
|
+
cancelled: false,
|
|
227
|
+
};
|
|
228
|
+
const loop = { batchCount: 0 };
|
|
229
|
+
for (;;) {
|
|
230
|
+
if ((await embedLoopStep(db, deps, acc, loop)) === "stop")
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
233
|
+
return {
|
|
234
|
+
vectorCount: acc.vectorCount,
|
|
235
|
+
errors: acc.errors,
|
|
236
|
+
lastChunkId: acc.lastChunkId,
|
|
237
|
+
embeddedCursor: embeddedChunkCount(db, deps),
|
|
238
|
+
cancelled: acc.cancelled,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ExtractionCheckpointRecord, KnowledgeCapsuleId } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { DocumentId } from "@oscharko-dev/keiko-contracts";
|
|
3
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
4
|
+
export declare function upsertExtractionCheckpoint(db: DatabaseSync, checkpoint: ExtractionCheckpointRecord): void;
|
|
5
|
+
export declare function selectExtractionCheckpoint(db: DatabaseSync, capsuleId: KnowledgeCapsuleId, documentId: DocumentId): ExtractionCheckpointRecord | undefined;
|
|
6
|
+
export declare function listExtractionCheckpoints(db: DatabaseSync, capsuleId: KnowledgeCapsuleId): readonly ExtractionCheckpointRecord[];
|
|
7
|
+
export declare function deleteExtractionCheckpoint(db: DatabaseSync, capsuleId: KnowledgeCapsuleId, documentId: DocumentId): void;
|
|
8
|
+
//# sourceMappingURL=checkpoint-persist.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"checkpoint-persist.d.ts","sourceRoot":"","sources":["../../src/indexing/checkpoint-persist.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAGV,0BAA0B,EAE1B,kBAAkB,EAGnB,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EAAW,UAAU,EAAE,MAAM,+BAA+B,CAAC;AACzE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAgFhD,wBAAgB,0BAA0B,CACxC,EAAE,EAAE,YAAY,EAChB,UAAU,EAAE,0BAA0B,GACrC,IAAI,CA0BN;AA+CD,wBAAgB,0BAA0B,CACxC,EAAE,EAAE,YAAY,EAChB,SAAS,EAAE,kBAAkB,EAC7B,UAAU,EAAE,UAAU,GACrB,0BAA0B,GAAG,SAAS,CAGxC;AAED,wBAAgB,yBAAyB,CACvC,EAAE,EAAE,YAAY,EAChB,SAAS,EAAE,kBAAkB,GAC5B,SAAS,0BAA0B,EAAE,CAGvC;AAED,wBAAgB,0BAA0B,CACxC,EAAE,EAAE,YAAY,EAChB,SAAS,EAAE,kBAAkB,EAC7B,UAAU,EAAE,UAAU,GACrB,IAAI,CAEN"}
|