@oscharko-dev/keiko-local-knowledge 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -0
- package/dist/bounded-document-extraction.d.ts +27 -0
- package/dist/bounded-document-extraction.d.ts.map +1 -0
- package/dist/bounded-document-extraction.js +214 -0
- package/dist/capsule-lifecycle.d.ts +33 -0
- package/dist/capsule-lifecycle.d.ts.map +1 -0
- package/dist/capsule-lifecycle.js +292 -0
- package/dist/capsule-set-lifecycle.d.ts +15 -0
- package/dist/capsule-set-lifecycle.d.ts.map +1 -0
- package/dist/capsule-set-lifecycle.js +158 -0
- package/dist/chunking/chunker-persist.d.ts +36 -0
- package/dist/chunking/chunker-persist.d.ts.map +1 -0
- package/dist/chunking/chunker-persist.js +74 -0
- package/dist/chunking/chunker-runner.d.ts +9 -0
- package/dist/chunking/chunker-runner.d.ts.map +1 -0
- package/dist/chunking/chunker-runner.js +218 -0
- package/dist/chunking/chunker.d.ts +7 -0
- package/dist/chunking/chunker.d.ts.map +1 -0
- package/dist/chunking/chunker.js +139 -0
- package/dist/chunking/citation-mapper.d.ts +4 -0
- package/dist/chunking/citation-mapper.d.ts.map +1 -0
- package/dist/chunking/citation-mapper.js +180 -0
- package/dist/chunking/index.d.ts +6 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/token-estimator.d.ts +3 -0
- package/dist/chunking/token-estimator.d.ts.map +1 -0
- package/dist/chunking/token-estimator.js +26 -0
- package/dist/chunking/types.d.ts +49 -0
- package/dist/chunking/types.d.ts.map +1 -0
- package/dist/chunking/types.js +26 -0
- package/dist/composition.d.ts +57 -0
- package/dist/composition.d.ts.map +1 -0
- package/dist/composition.js +310 -0
- package/dist/conversation/citation-attacher.d.ts +8 -0
- package/dist/conversation/citation-attacher.d.ts.map +1 -0
- package/dist/conversation/citation-attacher.js +55 -0
- package/dist/conversation/citation-excerpts.d.ts +4 -0
- package/dist/conversation/citation-excerpts.d.ts.map +1 -0
- package/dist/conversation/citation-excerpts.js +41 -0
- package/dist/conversation/grounded-answer-runner.d.ts +9 -0
- package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
- package/dist/conversation/grounded-answer-runner.js +61 -0
- package/dist/conversation/index.d.ts +5 -0
- package/dist/conversation/index.d.ts.map +1 -0
- package/dist/conversation/index.js +7 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
- package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
- package/dist/conversation/model-gateway-answer-generator.js +105 -0
- package/dist/conversation/types.d.ts +35 -0
- package/dist/conversation/types.d.ts.map +1 -0
- package/dist/conversation/types.js +24 -0
- package/dist/discovery/discovery-runner.d.ts +23 -0
- package/dist/discovery/discovery-runner.d.ts.map +1 -0
- package/dist/discovery/discovery-runner.js +109 -0
- package/dist/discovery/extract-progressive.d.ts +17 -0
- package/dist/discovery/extract-progressive.d.ts.map +1 -0
- package/dist/discovery/extract-progressive.js +522 -0
- package/dist/discovery/extract.d.ts +26 -0
- package/dist/discovery/extract.d.ts.map +1 -0
- package/dist/discovery/extract.js +906 -0
- package/dist/discovery/glob.d.ts +10 -0
- package/dist/discovery/glob.d.ts.map +1 -0
- package/dist/discovery/glob.js +72 -0
- package/dist/discovery/index.d.ts +6 -0
- package/dist/discovery/index.d.ts.map +1 -0
- package/dist/discovery/index.js +8 -0
- package/dist/discovery/media-type.d.ts +4 -0
- package/dist/discovery/media-type.d.ts.map +1 -0
- package/dist/discovery/media-type.js +62 -0
- package/dist/discovery/persist.d.ts +63 -0
- package/dist/discovery/persist.d.ts.map +1 -0
- package/dist/discovery/persist.js +345 -0
- package/dist/discovery/test-support.d.ts +16 -0
- package/dist/discovery/test-support.d.ts.map +1 -0
- package/dist/discovery/test-support.js +127 -0
- package/dist/discovery/types.d.ts +63 -0
- package/dist/discovery/types.d.ts.map +1 -0
- package/dist/discovery/types.js +28 -0
- package/dist/discovery/walk.d.ts +12 -0
- package/dist/discovery/walk.d.ts.map +1 -0
- package/dist/discovery/walk.js +302 -0
- package/dist/errors.d.ts +13 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +22 -0
- package/dist/evaluations/dimensions.d.ts +14 -0
- package/dist/evaluations/dimensions.d.ts.map +1 -0
- package/dist/evaluations/dimensions.js +191 -0
- package/dist/evaluations/fixtures.d.ts +18 -0
- package/dist/evaluations/fixtures.d.ts.map +1 -0
- package/dist/evaluations/fixtures.js +858 -0
- package/dist/evaluations/index.d.ts +7 -0
- package/dist/evaluations/index.d.ts.map +1 -0
- package/dist/evaluations/index.js +10 -0
- package/dist/evaluations/report.d.ts +3 -0
- package/dist/evaluations/report.d.ts.map +1 -0
- package/dist/evaluations/report.js +31 -0
- package/dist/evaluations/runner-seed.d.ts +12 -0
- package/dist/evaluations/runner-seed.d.ts.map +1 -0
- package/dist/evaluations/runner-seed.js +175 -0
- package/dist/evaluations/runner.d.ts +8 -0
- package/dist/evaluations/runner.d.ts.map +1 -0
- package/dist/evaluations/runner.js +205 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
- package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
- package/dist/evaluations/scripted-embedding-adapter.js +163 -0
- package/dist/evaluations/types.d.ts +116 -0
- package/dist/evaluations/types.d.ts.map +1 -0
- package/dist/evaluations/types.js +27 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +41 -0
- package/dist/indexing/bounded-indexing.d.ts +41 -0
- package/dist/indexing/bounded-indexing.d.ts.map +1 -0
- package/dist/indexing/bounded-indexing.js +240 -0
- package/dist/indexing/checkpoint-persist.d.ts +8 -0
- package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
- package/dist/indexing/checkpoint-persist.js +135 -0
- package/dist/indexing/checkpoint-resume.d.ts +20 -0
- package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
- package/dist/indexing/checkpoint-resume.js +50 -0
- package/dist/indexing/embedding-batcher.d.ts +3 -0
- package/dist/indexing/embedding-batcher.d.ts.map +1 -0
- package/dist/indexing/embedding-batcher.js +390 -0
- package/dist/indexing/index.d.ts +7 -0
- package/dist/indexing/index.d.ts.map +1 -0
- package/dist/indexing/index.js +11 -0
- package/dist/indexing/job-persist.d.ts +46 -0
- package/dist/indexing/job-persist.d.ts.map +1 -0
- package/dist/indexing/job-persist.js +157 -0
- package/dist/indexing/job-resume.d.ts +4 -0
- package/dist/indexing/job-resume.d.ts.map +1 -0
- package/dist/indexing/job-resume.js +14 -0
- package/dist/indexing/orchestrator.d.ts +3 -0
- package/dist/indexing/orchestrator.d.ts.map +1 -0
- package/dist/indexing/orchestrator.js +1151 -0
- package/dist/indexing/types.d.ts +156 -0
- package/dist/indexing/types.d.ts.map +1 -0
- package/dist/indexing/types.js +30 -0
- package/dist/indexing/vector-persist.d.ts +32 -0
- package/dist/indexing/vector-persist.d.ts.map +1 -0
- package/dist/indexing/vector-persist.js +105 -0
- package/dist/parsers/_internal.d.ts +20 -0
- package/dist/parsers/_internal.d.ts.map +1 -0
- package/dist/parsers/_internal.js +122 -0
- package/dist/parsers/csv-parser.d.ts +3 -0
- package/dist/parsers/csv-parser.d.ts.map +1 -0
- package/dist/parsers/csv-parser.js +202 -0
- package/dist/parsers/docx-parser.d.ts +3 -0
- package/dist/parsers/docx-parser.d.ts.map +1 -0
- package/dist/parsers/docx-parser.js +390 -0
- package/dist/parsers/html-parser.d.ts +3 -0
- package/dist/parsers/html-parser.d.ts.map +1 -0
- package/dist/parsers/html-parser.js +310 -0
- package/dist/parsers/index.d.ts +15 -0
- package/dist/parsers/index.d.ts.map +1 -0
- package/dist/parsers/index.js +41 -0
- package/dist/parsers/json-parser.d.ts +3 -0
- package/dist/parsers/json-parser.d.ts.map +1 -0
- package/dist/parsers/json-parser.js +192 -0
- package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
- package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
- package/dist/parsers/large-document/capability-discovery.js +76 -0
- package/dist/parsers/large-document/diagnostics.d.ts +3 -0
- package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
- package/dist/parsers/large-document/diagnostics.js +11 -0
- package/dist/parsers/large-document/index.d.ts +15 -0
- package/dist/parsers/large-document/index.d.ts.map +1 -0
- package/dist/parsers/large-document/index.js +10 -0
- package/dist/parsers/large-document/legacy-format.d.ts +5 -0
- package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
- package/dist/parsers/large-document/legacy-format.js +25 -0
- package/dist/parsers/large-document/preflight.d.ts +9 -0
- package/dist/parsers/large-document/preflight.d.ts.map +1 -0
- package/dist/parsers/large-document/preflight.js +43 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
- package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-extraction.js +123 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
- package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
- package/dist/parsers/large-document/progressive-pdf.js +145 -0
- package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
- package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
- package/dist/parsers/large-document/synthetic-source.js +101 -0
- package/dist/parsers/large-document/window-builder.d.ts +24 -0
- package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
- package/dist/parsers/large-document/window-builder.js +75 -0
- package/dist/parsers/ocr/index.d.ts +4 -0
- package/dist/parsers/ocr/index.d.ts.map +1 -0
- package/dist/parsers/ocr/index.js +4 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
- package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
- package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
- package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
- package/dist/parsers/ocr/types.d.ts +16 -0
- package/dist/parsers/ocr/types.d.ts.map +1 -0
- package/dist/parsers/ocr/types.js +4 -0
- package/dist/parsers/parser-test-fixtures.d.ts +28 -0
- package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
- package/dist/parsers/parser-test-fixtures.js +139 -0
- package/dist/parsers/pdf-parser.d.ts +43 -0
- package/dist/parsers/pdf-parser.d.ts.map +1 -0
- package/dist/parsers/pdf-parser.js +388 -0
- package/dist/parsers/registry.d.ts +8 -0
- package/dist/parsers/registry.d.ts.map +1 -0
- package/dist/parsers/registry.js +57 -0
- package/dist/parsers/text-parser.d.ts +3 -0
- package/dist/parsers/text-parser.d.ts.map +1 -0
- package/dist/parsers/text-parser.js +214 -0
- package/dist/parsers/types.d.ts +53 -0
- package/dist/parsers/types.d.ts.map +1 -0
- package/dist/parsers/types.js +21 -0
- package/dist/parsers/unsupported-parser.d.ts +4 -0
- package/dist/parsers/unsupported-parser.d.ts.map +1 -0
- package/dist/parsers/unsupported-parser.js +97 -0
- package/dist/parsers/xlsx-parser.d.ts +3 -0
- package/dist/parsers/xlsx-parser.d.ts.map +1 -0
- package/dist/parsers/xlsx-parser.js +425 -0
- package/dist/privacy/audit-emitter.d.ts +5 -0
- package/dist/privacy/audit-emitter.d.ts.map +1 -0
- package/dist/privacy/audit-emitter.js +93 -0
- package/dist/privacy/diagnostic-redactor.d.ts +2 -0
- package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
- package/dist/privacy/diagnostic-redactor.js +153 -0
- package/dist/privacy/index.d.ts +5 -0
- package/dist/privacy/index.d.ts.map +1 -0
- package/dist/privacy/index.js +6 -0
- package/dist/privacy/retention-applier.d.ts +5 -0
- package/dist/privacy/retention-applier.d.ts.map +1 -0
- package/dist/privacy/retention-applier.js +88 -0
- package/dist/privacy/types.d.ts +98 -0
- package/dist/privacy/types.d.ts.map +1 -0
- package/dist/privacy/types.js +12 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
- package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
- package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
- package/dist/qualityIntelligence/index.d.ts +3 -0
- package/dist/qualityIntelligence/index.d.ts.map +1 -0
- package/dist/qualityIntelligence/index.js +5 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
- package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
- package/dist/qualityIntelligence/qiHandoff.js +82 -0
- package/dist/retrieval/answer-grounding.d.ts +9 -0
- package/dist/retrieval/answer-grounding.d.ts.map +1 -0
- package/dist/retrieval/answer-grounding.js +31 -0
- package/dist/retrieval/context-pack-assembler.d.ts +24 -0
- package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
- package/dist/retrieval/context-pack-assembler.js +50 -0
- package/dist/retrieval/index.d.ts +6 -0
- package/dist/retrieval/index.d.ts.map +1 -0
- package/dist/retrieval/index.js +9 -0
- package/dist/retrieval/retrieval-runner.d.ts +10 -0
- package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
- package/dist/retrieval/retrieval-runner.js +163 -0
- package/dist/retrieval/scoped-vector-search.d.ts +24 -0
- package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
- package/dist/retrieval/scoped-vector-search.js +864 -0
- package/dist/retrieval/types.d.ts +28 -0
- package/dist/retrieval/types.d.ts.map +1 -0
- package/dist/retrieval/types.js +33 -0
- package/dist/section-path-hash.d.ts +3 -0
- package/dist/section-path-hash.d.ts.map +1 -0
- package/dist/section-path-hash.js +9 -0
- package/dist/source-lifecycle.d.ts +14 -0
- package/dist/source-lifecycle.d.ts.map +1 -0
- package/dist/source-lifecycle.js +155 -0
- package/dist/source-routing-validation.d.ts +11 -0
- package/dist/source-routing-validation.d.ts.map +1 -0
- package/dist/source-routing-validation.js +140 -0
- package/dist/store-content-cipher.d.ts +11 -0
- package/dist/store-content-cipher.d.ts.map +1 -0
- package/dist/store-content-cipher.js +67 -0
- package/dist/store-content-encryption.d.ts +12 -0
- package/dist/store-content-encryption.d.ts.map +1 -0
- package/dist/store-content-encryption.js +275 -0
- package/dist/store-paths.d.ts +6 -0
- package/dist/store-paths.d.ts.map +1 -0
- package/dist/store-paths.js +61 -0
- package/dist/store.d.ts +30 -0
- package/dist/store.d.ts.map +1 -0
- package/dist/store.js +219 -0
- package/dist/testing.d.ts +47 -0
- package/dist/testing.d.ts.map +1 -0
- package/dist/testing.js +170 -0
- package/dist/version.d.ts +2 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +4 -0
- package/package.json +43 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// Pure scoring functions for the retrieval evaluation harness (Epic #189, Issue #268).
|
|
2
|
+
// Each function takes structural inputs (no store, no IO) and returns a number in
|
|
3
|
+
// `[0, 1]` — that range invariant is enforced by every branch and is load-bearing for the
|
|
4
|
+
// aggregator in `runner.ts`, which averages dimensions across queries.
|
|
5
|
+
//
|
|
6
|
+
// Vacuous cases:
|
|
7
|
+
// - `scoreRecall` with empty `expected` ⇒ 1.0 (nothing was missed).
|
|
8
|
+
// - `scorePrecision` with empty `returned` ⇒ 1.0 (no false positives possible). Callers
|
|
9
|
+
// that want "no refs is a failure" should reach for `scoreNoEvidenceAccuracy` instead.
|
|
10
|
+
// - `scoreSourceIsolation` with empty `returned` ⇒ 1.0 (no leak possible).
|
|
11
|
+
// - `scoreCitationQuality` with empty `references` ⇒ 1.0 (no malformed citations).
|
|
12
|
+
//
|
|
13
|
+
// These vacuous-1.0 conventions are deliberate: a fixture that expects no evidence should
|
|
14
|
+
// not be penalised on recall/precision/etc. The `noEvidenceAccuracy` dimension is the one
|
|
15
|
+
// that discriminates "expected nothing AND got nothing" vs "expected nothing but got
|
|
16
|
+
// something" vs "expected something but got nothing".
|
|
17
|
+
// ─── Recall ──────────────────────────────────────────────────────────────────
|
|
18
|
+
// `expected ∩ returned / |expected|`. We compare on `chunkId` because `RetrievalReference`
|
|
19
|
+
// carries the chunk id verbatim and that is the smallest discriminator the fixtures use.
|
|
20
|
+
export function scoreRecall(returned, expected) {
|
|
21
|
+
if (expected.length === 0)
|
|
22
|
+
return 1;
|
|
23
|
+
const returnedIds = new Set();
|
|
24
|
+
for (const ref of returned)
|
|
25
|
+
returnedIds.add(String(ref.chunkId));
|
|
26
|
+
let hits = 0;
|
|
27
|
+
for (const id of expected) {
|
|
28
|
+
if (returnedIds.has(String(id)))
|
|
29
|
+
hits += 1;
|
|
30
|
+
}
|
|
31
|
+
return hits / expected.length;
|
|
32
|
+
}
|
|
33
|
+
// ─── Precision ───────────────────────────────────────────────────────────────
|
|
34
|
+
// `expected ∩ returned / |returned|`.
|
|
35
|
+
export function scorePrecision(returned, expected) {
|
|
36
|
+
if (returned.length === 0)
|
|
37
|
+
return 1;
|
|
38
|
+
const expectedIds = new Set();
|
|
39
|
+
for (const id of expected)
|
|
40
|
+
expectedIds.add(String(id));
|
|
41
|
+
let hits = 0;
|
|
42
|
+
for (const ref of returned) {
|
|
43
|
+
if (expectedIds.has(String(ref.chunkId)))
|
|
44
|
+
hits += 1;
|
|
45
|
+
}
|
|
46
|
+
return hits / returned.length;
|
|
47
|
+
}
|
|
48
|
+
// ─── Ranking quality ────────────────────────────────────────────────────────
|
|
49
|
+
// MRR captures how quickly the first relevant chunk appears. Recall/precision alone can pass even
|
|
50
|
+
// when a relevant chunk is buried late in the retrieved list; MRR makes that regression visible.
|
|
51
|
+
export function scoreMeanReciprocalRank(returned, expected) {
|
|
52
|
+
if (expected.length === 0)
|
|
53
|
+
return 1;
|
|
54
|
+
const expectedIds = new Set();
|
|
55
|
+
for (const id of expected)
|
|
56
|
+
expectedIds.add(String(id));
|
|
57
|
+
for (let index = 0; index < returned.length; index += 1) {
|
|
58
|
+
if (expectedIds.has(String(returned[index]?.chunkId))) {
|
|
59
|
+
return 1 / (index + 1);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return 0;
|
|
63
|
+
}
|
|
64
|
+
function discountedGain(rankIndex) {
|
|
65
|
+
return 1 / Math.log2(rankIndex + 2);
|
|
66
|
+
}
|
|
67
|
+
// Binary relevance nDCG@returned.length. This rewards relevant chunks appearing earlier while
|
|
68
|
+
// staying deterministic and independent of model-judged graded relevance.
|
|
69
|
+
export function scoreNdcg(returned, expected) {
|
|
70
|
+
if (expected.length === 0)
|
|
71
|
+
return 1;
|
|
72
|
+
if (returned.length === 0)
|
|
73
|
+
return 0;
|
|
74
|
+
const expectedIds = new Set();
|
|
75
|
+
for (const id of expected)
|
|
76
|
+
expectedIds.add(String(id));
|
|
77
|
+
let dcg = 0;
|
|
78
|
+
for (let index = 0; index < returned.length; index += 1) {
|
|
79
|
+
if (expectedIds.has(String(returned[index]?.chunkId))) {
|
|
80
|
+
dcg += discountedGain(index);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const idealCount = Math.min(expectedIds.size, returned.length);
|
|
84
|
+
let ideal = 0;
|
|
85
|
+
for (let index = 0; index < idealCount; index += 1) {
|
|
86
|
+
ideal += discountedGain(index);
|
|
87
|
+
}
|
|
88
|
+
return ideal === 0 ? 0 : dcg / ideal;
|
|
89
|
+
}
|
|
90
|
+
// ─── Source isolation ────────────────────────────────────────────────────────
|
|
91
|
+
// A retrieval is source-isolated iff every returned reference belongs to a capsule that is
|
|
92
|
+
// in `scopeCapsuleIds`. A single leak across the capsule boundary drops the score to 0 —
|
|
93
|
+
// the issue brief calls source isolation a hard tenant-isolation guarantee, so partial
|
|
94
|
+
// credit would dilute its meaning. The scope is normalised to a string set so the function
|
|
95
|
+
// stays agnostic to the branded `KnowledgeCapsuleId` newtype.
|
|
96
|
+
export function scoreSourceIsolation(returned, scopeCapsuleIds) {
|
|
97
|
+
if (returned.length === 0)
|
|
98
|
+
return 1;
|
|
99
|
+
const allowed = new Set();
|
|
100
|
+
for (const id of scopeCapsuleIds)
|
|
101
|
+
allowed.add(String(id));
|
|
102
|
+
for (const ref of returned) {
|
|
103
|
+
if (!allowed.has(String(ref.capsuleId)))
|
|
104
|
+
return 0;
|
|
105
|
+
}
|
|
106
|
+
return 1;
|
|
107
|
+
}
|
|
108
|
+
function isPageCitationWellFormed(reference) {
|
|
109
|
+
return reference.citation.pageNumber !== undefined;
|
|
110
|
+
}
|
|
111
|
+
function isSectionCitationWellFormed(reference) {
|
|
112
|
+
const path = reference.citation.sectionPath;
|
|
113
|
+
return path !== undefined && path.length > 0;
|
|
114
|
+
}
|
|
115
|
+
function isSpanCitationWellFormed(reference) {
|
|
116
|
+
return (reference.citation.characterStart !== undefined && reference.citation.characterEnd !== undefined);
|
|
117
|
+
}
|
|
118
|
+
function isCitationWellFormed(input) {
|
|
119
|
+
switch (input.unitKind) {
|
|
120
|
+
case "page":
|
|
121
|
+
return isPageCitationWellFormed(input.reference);
|
|
122
|
+
case "section":
|
|
123
|
+
case "html-block":
|
|
124
|
+
return isSectionCitationWellFormed(input.reference);
|
|
125
|
+
case "json-path":
|
|
126
|
+
case "csv-row":
|
|
127
|
+
return isSpanCitationWellFormed(input.reference);
|
|
128
|
+
case "unsupported-media":
|
|
129
|
+
return true;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// A fixture passes per-chunk unit-kind metadata through to this function. We cannot infer
|
|
133
|
+
// the unit kind from a `CitationReference` alone — the contract permits any subset of the
|
|
134
|
+
// optional fields to be present — so the runner threads `chunkUnitKinds` through.
|
|
135
|
+
export function scoreCitationQuality(references, chunkUnitKinds) {
|
|
136
|
+
if (references.length === 0)
|
|
137
|
+
return 1;
|
|
138
|
+
let wellFormed = 0;
|
|
139
|
+
for (const reference of references) {
|
|
140
|
+
const unitKind = chunkUnitKinds.get(String(reference.chunkId));
|
|
141
|
+
// A reference for which we have no unit-kind metadata is treated as well-formed —
|
|
142
|
+
// there is nothing concrete to check against and penalising it would conflate
|
|
143
|
+
// "missing test metadata" with "missing citation field".
|
|
144
|
+
if (unitKind === undefined) {
|
|
145
|
+
wellFormed += 1;
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
if (isCitationWellFormed({ reference, unitKind }))
|
|
149
|
+
wellFormed += 1;
|
|
150
|
+
}
|
|
151
|
+
return wellFormed / references.length;
|
|
152
|
+
}
|
|
153
|
+
// Map a contract `ParsedUnit` kind to the requirement key. Exported so the runner can
|
|
154
|
+
// build the `chunkUnitKinds` map from its fixture seed without re-declaring the union.
|
|
155
|
+
export function citationRequirementForUnit(unit) {
|
|
156
|
+
return unit.kind;
|
|
157
|
+
}
|
|
158
|
+
// ─── No-evidence accuracy ────────────────────────────────────────────────────
|
|
159
|
+
// Binary: `1.0` when the actual result matches the expected no-evidence flag, `0.0`
|
|
160
|
+
// otherwise. The function returns the typed literal so a downstream `passed` check can
|
|
161
|
+
// use `===` without worrying about floating-point comparisons.
|
|
162
|
+
export function scoreNoEvidenceAccuracy(actualNoEvidence, expectedNoEvidence, actualReason, expectedReason) {
|
|
163
|
+
if (actualNoEvidence !== expectedNoEvidence)
|
|
164
|
+
return 0;
|
|
165
|
+
if (expectedReason !== undefined) {
|
|
166
|
+
return actualReason === expectedReason ? 1 : 0;
|
|
167
|
+
}
|
|
168
|
+
return 1;
|
|
169
|
+
}
|
|
170
|
+
// ─── Context-budget fit ──────────────────────────────────────────────────────
|
|
171
|
+
// `1.0` when the retrieved chunk-token total fits within the configured budget. When it
|
|
172
|
+
// exceeds the budget we return the bounded ratio `budget / used`, which keeps the score in
|
|
173
|
+
// `[0, 1]` while preserving how far over budget the retrieval spilled. Queries without a
|
|
174
|
+
// configured budget are treated vacuously as 1.0 because there is no concrete fit target.
|
|
175
|
+
export function scoreContextBudgetFit(references, chunkTokenCounts, budgetTokens) {
|
|
176
|
+
if (budgetTokens === undefined)
|
|
177
|
+
return 1;
|
|
178
|
+
if (references.length === 0)
|
|
179
|
+
return 1;
|
|
180
|
+
if (budgetTokens <= 0)
|
|
181
|
+
return 0;
|
|
182
|
+
let used = 0;
|
|
183
|
+
for (const reference of references) {
|
|
184
|
+
used += chunkTokenCounts.get(String(reference.chunkId)) ?? 0;
|
|
185
|
+
}
|
|
186
|
+
if (used <= 0)
|
|
187
|
+
return 1;
|
|
188
|
+
if (used <= budgetTokens)
|
|
189
|
+
return 1;
|
|
190
|
+
return budgetTokens / used;
|
|
191
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { EmbeddingModelIdentity } from "@oscharko-dev/keiko-contracts";
|
|
2
|
+
import type { RetrievalEvalFixture } from "./types.js";
|
|
3
|
+
export declare const EVAL_EMBEDDING_IDENTITY: EmbeddingModelIdentity;
|
|
4
|
+
export declare const STALE_QUERY_EMBEDDING_IDENTITY: EmbeddingModelIdentity;
|
|
5
|
+
export declare const EVAL_TOPIC_BOOST = 1;
|
|
6
|
+
export declare const singleTopicFixture: RetrievalEvalFixture;
|
|
7
|
+
export declare const multiCapsuleFixture: RetrievalEvalFixture;
|
|
8
|
+
export declare const noEvidenceFixture: RetrievalEvalFixture;
|
|
9
|
+
export declare const ambiguousQueryFixture: RetrievalEvalFixture;
|
|
10
|
+
export declare const sourceIsolationFixture: RetrievalEvalFixture;
|
|
11
|
+
export declare const wrongScopeFixture: RetrievalEvalFixture;
|
|
12
|
+
export declare const multiPageFixture: RetrievalEvalFixture;
|
|
13
|
+
export declare const structuredFileFixture: RetrievalEvalFixture;
|
|
14
|
+
export declare const contextBudgetFixture: RetrievalEvalFixture;
|
|
15
|
+
export declare const staleIndexFixture: RetrievalEvalFixture;
|
|
16
|
+
export declare const broadQueryDiversityFixture: RetrievalEvalFixture;
|
|
17
|
+
export declare const ALL_FIXTURES: readonly RetrievalEvalFixture[];
|
|
18
|
+
//# sourceMappingURL=fixtures.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fixtures.d.ts","sourceRoot":"","sources":["../../src/evaluations/fixtures.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAGV,sBAAsB,EAGvB,MAAM,+BAA+B,CAAC;AAEvC,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,YAAY,CAAC;AAEvD,eAAO,MAAM,uBAAuB,EAAE,sBAKrC,CAAC;AAEF,eAAO,MAAM,8BAA8B,EAAE,sBAK5C,CAAC;AAEF,eAAO,MAAM,gBAAgB,IAAM,CAAC;AAepC,eAAO,MAAM,kBAAkB,EAAE,oBAiDhC,CAAC;AAEF,eAAO,MAAM,mBAAmB,EAAE,oBAqFjC,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,oBA+C/B,CAAC;AAEF,eAAO,MAAM,qBAAqB,EAAE,oBAiDnC,CAAC;AAEF,eAAO,MAAM,sBAAsB,EAAE,oBA2EpC,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,oBA2E/B,CAAC;AAEF,eAAO,MAAM,gBAAgB,EAAE,oBAoE9B,CAAC;AAEF,eAAO,MAAM,qBAAqB,EAAE,oBAkLnC,CAAC;AAEF,eAAO,MAAM,oBAAoB,EAAE,oBAiDlC,CAAC;AAEF,eAAO,MAAM,iBAAiB,EAAE,oBA8C/B,CAAC;AAEF,eAAO,MAAM,0BAA0B,EAAE,oBA6ExC,CAAC;AAEF,eAAO,MAAM,YAAY,EAAE,SAAS,oBAAoB,EAY9C,CAAC"}
|