@oscharko-dev/keiko-local-knowledge 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. package/dist/.tsbuildinfo +1 -0
  2. package/dist/bounded-document-extraction.d.ts +27 -0
  3. package/dist/bounded-document-extraction.d.ts.map +1 -0
  4. package/dist/bounded-document-extraction.js +214 -0
  5. package/dist/capsule-lifecycle.d.ts +33 -0
  6. package/dist/capsule-lifecycle.d.ts.map +1 -0
  7. package/dist/capsule-lifecycle.js +292 -0
  8. package/dist/capsule-set-lifecycle.d.ts +15 -0
  9. package/dist/capsule-set-lifecycle.d.ts.map +1 -0
  10. package/dist/capsule-set-lifecycle.js +158 -0
  11. package/dist/chunking/chunker-persist.d.ts +36 -0
  12. package/dist/chunking/chunker-persist.d.ts.map +1 -0
  13. package/dist/chunking/chunker-persist.js +74 -0
  14. package/dist/chunking/chunker-runner.d.ts +9 -0
  15. package/dist/chunking/chunker-runner.d.ts.map +1 -0
  16. package/dist/chunking/chunker-runner.js +218 -0
  17. package/dist/chunking/chunker.d.ts +7 -0
  18. package/dist/chunking/chunker.d.ts.map +1 -0
  19. package/dist/chunking/chunker.js +139 -0
  20. package/dist/chunking/citation-mapper.d.ts +4 -0
  21. package/dist/chunking/citation-mapper.d.ts.map +1 -0
  22. package/dist/chunking/citation-mapper.js +180 -0
  23. package/dist/chunking/index.d.ts +6 -0
  24. package/dist/chunking/index.d.ts.map +1 -0
  25. package/dist/chunking/index.js +8 -0
  26. package/dist/chunking/token-estimator.d.ts +3 -0
  27. package/dist/chunking/token-estimator.d.ts.map +1 -0
  28. package/dist/chunking/token-estimator.js +26 -0
  29. package/dist/chunking/types.d.ts +49 -0
  30. package/dist/chunking/types.d.ts.map +1 -0
  31. package/dist/chunking/types.js +26 -0
  32. package/dist/composition.d.ts +57 -0
  33. package/dist/composition.d.ts.map +1 -0
  34. package/dist/composition.js +310 -0
  35. package/dist/conversation/citation-attacher.d.ts +8 -0
  36. package/dist/conversation/citation-attacher.d.ts.map +1 -0
  37. package/dist/conversation/citation-attacher.js +55 -0
  38. package/dist/conversation/citation-excerpts.d.ts +4 -0
  39. package/dist/conversation/citation-excerpts.d.ts.map +1 -0
  40. package/dist/conversation/citation-excerpts.js +41 -0
  41. package/dist/conversation/grounded-answer-runner.d.ts +9 -0
  42. package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
  43. package/dist/conversation/grounded-answer-runner.js +61 -0
  44. package/dist/conversation/index.d.ts +5 -0
  45. package/dist/conversation/index.d.ts.map +1 -0
  46. package/dist/conversation/index.js +7 -0
  47. package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
  48. package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
  49. package/dist/conversation/model-gateway-answer-generator.js +105 -0
  50. package/dist/conversation/types.d.ts +35 -0
  51. package/dist/conversation/types.d.ts.map +1 -0
  52. package/dist/conversation/types.js +24 -0
  53. package/dist/discovery/discovery-runner.d.ts +23 -0
  54. package/dist/discovery/discovery-runner.d.ts.map +1 -0
  55. package/dist/discovery/discovery-runner.js +109 -0
  56. package/dist/discovery/extract-progressive.d.ts +17 -0
  57. package/dist/discovery/extract-progressive.d.ts.map +1 -0
  58. package/dist/discovery/extract-progressive.js +522 -0
  59. package/dist/discovery/extract.d.ts +26 -0
  60. package/dist/discovery/extract.d.ts.map +1 -0
  61. package/dist/discovery/extract.js +906 -0
  62. package/dist/discovery/glob.d.ts +10 -0
  63. package/dist/discovery/glob.d.ts.map +1 -0
  64. package/dist/discovery/glob.js +72 -0
  65. package/dist/discovery/index.d.ts +6 -0
  66. package/dist/discovery/index.d.ts.map +1 -0
  67. package/dist/discovery/index.js +8 -0
  68. package/dist/discovery/media-type.d.ts +4 -0
  69. package/dist/discovery/media-type.d.ts.map +1 -0
  70. package/dist/discovery/media-type.js +62 -0
  71. package/dist/discovery/persist.d.ts +63 -0
  72. package/dist/discovery/persist.d.ts.map +1 -0
  73. package/dist/discovery/persist.js +345 -0
  74. package/dist/discovery/test-support.d.ts +16 -0
  75. package/dist/discovery/test-support.d.ts.map +1 -0
  76. package/dist/discovery/test-support.js +127 -0
  77. package/dist/discovery/types.d.ts +63 -0
  78. package/dist/discovery/types.d.ts.map +1 -0
  79. package/dist/discovery/types.js +28 -0
  80. package/dist/discovery/walk.d.ts +12 -0
  81. package/dist/discovery/walk.d.ts.map +1 -0
  82. package/dist/discovery/walk.js +302 -0
  83. package/dist/errors.d.ts +13 -0
  84. package/dist/errors.d.ts.map +1 -0
  85. package/dist/errors.js +22 -0
  86. package/dist/evaluations/dimensions.d.ts +14 -0
  87. package/dist/evaluations/dimensions.d.ts.map +1 -0
  88. package/dist/evaluations/dimensions.js +191 -0
  89. package/dist/evaluations/fixtures.d.ts +18 -0
  90. package/dist/evaluations/fixtures.d.ts.map +1 -0
  91. package/dist/evaluations/fixtures.js +858 -0
  92. package/dist/evaluations/index.d.ts +7 -0
  93. package/dist/evaluations/index.d.ts.map +1 -0
  94. package/dist/evaluations/index.js +10 -0
  95. package/dist/evaluations/report.d.ts +3 -0
  96. package/dist/evaluations/report.d.ts.map +1 -0
  97. package/dist/evaluations/report.js +31 -0
  98. package/dist/evaluations/runner-seed.d.ts +12 -0
  99. package/dist/evaluations/runner-seed.d.ts.map +1 -0
  100. package/dist/evaluations/runner-seed.js +175 -0
  101. package/dist/evaluations/runner.d.ts +8 -0
  102. package/dist/evaluations/runner.d.ts.map +1 -0
  103. package/dist/evaluations/runner.js +205 -0
  104. package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
  105. package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
  106. package/dist/evaluations/scripted-embedding-adapter.js +163 -0
  107. package/dist/evaluations/types.d.ts +116 -0
  108. package/dist/evaluations/types.d.ts.map +1 -0
  109. package/dist/evaluations/types.js +27 -0
  110. package/dist/index.d.ts +23 -0
  111. package/dist/index.d.ts.map +1 -0
  112. package/dist/index.js +41 -0
  113. package/dist/indexing/bounded-indexing.d.ts +41 -0
  114. package/dist/indexing/bounded-indexing.d.ts.map +1 -0
  115. package/dist/indexing/bounded-indexing.js +240 -0
  116. package/dist/indexing/checkpoint-persist.d.ts +8 -0
  117. package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
  118. package/dist/indexing/checkpoint-persist.js +135 -0
  119. package/dist/indexing/checkpoint-resume.d.ts +20 -0
  120. package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
  121. package/dist/indexing/checkpoint-resume.js +50 -0
  122. package/dist/indexing/embedding-batcher.d.ts +3 -0
  123. package/dist/indexing/embedding-batcher.d.ts.map +1 -0
  124. package/dist/indexing/embedding-batcher.js +390 -0
  125. package/dist/indexing/index.d.ts +7 -0
  126. package/dist/indexing/index.d.ts.map +1 -0
  127. package/dist/indexing/index.js +11 -0
  128. package/dist/indexing/job-persist.d.ts +46 -0
  129. package/dist/indexing/job-persist.d.ts.map +1 -0
  130. package/dist/indexing/job-persist.js +157 -0
  131. package/dist/indexing/job-resume.d.ts +4 -0
  132. package/dist/indexing/job-resume.d.ts.map +1 -0
  133. package/dist/indexing/job-resume.js +14 -0
  134. package/dist/indexing/orchestrator.d.ts +3 -0
  135. package/dist/indexing/orchestrator.d.ts.map +1 -0
  136. package/dist/indexing/orchestrator.js +1151 -0
  137. package/dist/indexing/types.d.ts +156 -0
  138. package/dist/indexing/types.d.ts.map +1 -0
  139. package/dist/indexing/types.js +30 -0
  140. package/dist/indexing/vector-persist.d.ts +32 -0
  141. package/dist/indexing/vector-persist.d.ts.map +1 -0
  142. package/dist/indexing/vector-persist.js +105 -0
  143. package/dist/parsers/_internal.d.ts +20 -0
  144. package/dist/parsers/_internal.d.ts.map +1 -0
  145. package/dist/parsers/_internal.js +122 -0
  146. package/dist/parsers/csv-parser.d.ts +3 -0
  147. package/dist/parsers/csv-parser.d.ts.map +1 -0
  148. package/dist/parsers/csv-parser.js +202 -0
  149. package/dist/parsers/docx-parser.d.ts +3 -0
  150. package/dist/parsers/docx-parser.d.ts.map +1 -0
  151. package/dist/parsers/docx-parser.js +390 -0
  152. package/dist/parsers/html-parser.d.ts +3 -0
  153. package/dist/parsers/html-parser.d.ts.map +1 -0
  154. package/dist/parsers/html-parser.js +310 -0
  155. package/dist/parsers/index.d.ts +15 -0
  156. package/dist/parsers/index.d.ts.map +1 -0
  157. package/dist/parsers/index.js +41 -0
  158. package/dist/parsers/json-parser.d.ts +3 -0
  159. package/dist/parsers/json-parser.d.ts.map +1 -0
  160. package/dist/parsers/json-parser.js +192 -0
  161. package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
  162. package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
  163. package/dist/parsers/large-document/capability-discovery.js +76 -0
  164. package/dist/parsers/large-document/diagnostics.d.ts +3 -0
  165. package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
  166. package/dist/parsers/large-document/diagnostics.js +11 -0
  167. package/dist/parsers/large-document/index.d.ts +15 -0
  168. package/dist/parsers/large-document/index.d.ts.map +1 -0
  169. package/dist/parsers/large-document/index.js +10 -0
  170. package/dist/parsers/large-document/legacy-format.d.ts +5 -0
  171. package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
  172. package/dist/parsers/large-document/legacy-format.js +25 -0
  173. package/dist/parsers/large-document/preflight.d.ts +9 -0
  174. package/dist/parsers/large-document/preflight.d.ts.map +1 -0
  175. package/dist/parsers/large-document/preflight.js +43 -0
  176. package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
  177. package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
  178. package/dist/parsers/large-document/progressive-extraction.js +123 -0
  179. package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
  180. package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
  181. package/dist/parsers/large-document/progressive-pdf.js +145 -0
  182. package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
  183. package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
  184. package/dist/parsers/large-document/synthetic-source.js +101 -0
  185. package/dist/parsers/large-document/window-builder.d.ts +24 -0
  186. package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
  187. package/dist/parsers/large-document/window-builder.js +75 -0
  188. package/dist/parsers/ocr/index.d.ts +4 -0
  189. package/dist/parsers/ocr/index.d.ts.map +1 -0
  190. package/dist/parsers/ocr/index.js +4 -0
  191. package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
  192. package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
  193. package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
  194. package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
  195. package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
  196. package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
  197. package/dist/parsers/ocr/types.d.ts +16 -0
  198. package/dist/parsers/ocr/types.d.ts.map +1 -0
  199. package/dist/parsers/ocr/types.js +4 -0
  200. package/dist/parsers/parser-test-fixtures.d.ts +28 -0
  201. package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
  202. package/dist/parsers/parser-test-fixtures.js +139 -0
  203. package/dist/parsers/pdf-parser.d.ts +43 -0
  204. package/dist/parsers/pdf-parser.d.ts.map +1 -0
  205. package/dist/parsers/pdf-parser.js +388 -0
  206. package/dist/parsers/registry.d.ts +8 -0
  207. package/dist/parsers/registry.d.ts.map +1 -0
  208. package/dist/parsers/registry.js +57 -0
  209. package/dist/parsers/text-parser.d.ts +3 -0
  210. package/dist/parsers/text-parser.d.ts.map +1 -0
  211. package/dist/parsers/text-parser.js +214 -0
  212. package/dist/parsers/types.d.ts +53 -0
  213. package/dist/parsers/types.d.ts.map +1 -0
  214. package/dist/parsers/types.js +21 -0
  215. package/dist/parsers/unsupported-parser.d.ts +4 -0
  216. package/dist/parsers/unsupported-parser.d.ts.map +1 -0
  217. package/dist/parsers/unsupported-parser.js +97 -0
  218. package/dist/parsers/xlsx-parser.d.ts +3 -0
  219. package/dist/parsers/xlsx-parser.d.ts.map +1 -0
  220. package/dist/parsers/xlsx-parser.js +425 -0
  221. package/dist/privacy/audit-emitter.d.ts +5 -0
  222. package/dist/privacy/audit-emitter.d.ts.map +1 -0
  223. package/dist/privacy/audit-emitter.js +93 -0
  224. package/dist/privacy/diagnostic-redactor.d.ts +2 -0
  225. package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
  226. package/dist/privacy/diagnostic-redactor.js +153 -0
  227. package/dist/privacy/index.d.ts +5 -0
  228. package/dist/privacy/index.d.ts.map +1 -0
  229. package/dist/privacy/index.js +6 -0
  230. package/dist/privacy/retention-applier.d.ts +5 -0
  231. package/dist/privacy/retention-applier.d.ts.map +1 -0
  232. package/dist/privacy/retention-applier.js +88 -0
  233. package/dist/privacy/types.d.ts +98 -0
  234. package/dist/privacy/types.d.ts.map +1 -0
  235. package/dist/privacy/types.js +12 -0
  236. package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
  237. package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
  238. package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
  239. package/dist/qualityIntelligence/index.d.ts +3 -0
  240. package/dist/qualityIntelligence/index.d.ts.map +1 -0
  241. package/dist/qualityIntelligence/index.js +5 -0
  242. package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
  243. package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
  244. package/dist/qualityIntelligence/qiHandoff.js +82 -0
  245. package/dist/retrieval/answer-grounding.d.ts +9 -0
  246. package/dist/retrieval/answer-grounding.d.ts.map +1 -0
  247. package/dist/retrieval/answer-grounding.js +31 -0
  248. package/dist/retrieval/context-pack-assembler.d.ts +24 -0
  249. package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
  250. package/dist/retrieval/context-pack-assembler.js +50 -0
  251. package/dist/retrieval/index.d.ts +6 -0
  252. package/dist/retrieval/index.d.ts.map +1 -0
  253. package/dist/retrieval/index.js +9 -0
  254. package/dist/retrieval/retrieval-runner.d.ts +10 -0
  255. package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
  256. package/dist/retrieval/retrieval-runner.js +163 -0
  257. package/dist/retrieval/scoped-vector-search.d.ts +24 -0
  258. package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
  259. package/dist/retrieval/scoped-vector-search.js +864 -0
  260. package/dist/retrieval/types.d.ts +28 -0
  261. package/dist/retrieval/types.d.ts.map +1 -0
  262. package/dist/retrieval/types.js +33 -0
  263. package/dist/section-path-hash.d.ts +3 -0
  264. package/dist/section-path-hash.d.ts.map +1 -0
  265. package/dist/section-path-hash.js +9 -0
  266. package/dist/source-lifecycle.d.ts +14 -0
  267. package/dist/source-lifecycle.d.ts.map +1 -0
  268. package/dist/source-lifecycle.js +155 -0
  269. package/dist/source-routing-validation.d.ts +11 -0
  270. package/dist/source-routing-validation.d.ts.map +1 -0
  271. package/dist/source-routing-validation.js +140 -0
  272. package/dist/store-content-cipher.d.ts +11 -0
  273. package/dist/store-content-cipher.d.ts.map +1 -0
  274. package/dist/store-content-cipher.js +67 -0
  275. package/dist/store-content-encryption.d.ts +12 -0
  276. package/dist/store-content-encryption.d.ts.map +1 -0
  277. package/dist/store-content-encryption.js +275 -0
  278. package/dist/store-paths.d.ts +6 -0
  279. package/dist/store-paths.d.ts.map +1 -0
  280. package/dist/store-paths.js +61 -0
  281. package/dist/store.d.ts +30 -0
  282. package/dist/store.d.ts.map +1 -0
  283. package/dist/store.js +219 -0
  284. package/dist/testing.d.ts +47 -0
  285. package/dist/testing.d.ts.map +1 -0
  286. package/dist/testing.js +170 -0
  287. package/dist/version.d.ts +2 -0
  288. package/dist/version.d.ts.map +1 -0
  289. package/dist/version.js +4 -0
  290. package/package.json +43 -0
@@ -0,0 +1,7 @@
1
+ export { ALL_FIXTURES, EVAL_EMBEDDING_IDENTITY, EVAL_TOPIC_BOOST, ambiguousQueryFixture, broadQueryDiversityFixture, contextBudgetFixture, multiCapsuleFixture, multiPageFixture, noEvidenceFixture, singleTopicFixture, staleIndexFixture, structuredFileFixture, sourceIsolationFixture, wrongScopeFixture, } from "./fixtures.js";
2
+ export { citationRequirementForUnit, scoreCitationQuality, scoreContextBudgetFit, scoreMeanReciprocalRank, scoreNdcg, scoreNoEvidenceAccuracy, scorePrecision, scoreRecall, scoreSourceIsolation, type CitationRequirementKey, } from "./dimensions.js";
3
+ export { createScriptedEmbeddingAdapter, fnv1a32, withTopicMarker, type ScriptedEmbeddingAdapterOptions, } from "./scripted-embedding-adapter.js";
4
+ export { renderRetrievalEvalQualityGateReport } from "./report.js";
5
+ export { runRetrievalEval, type RunRetrievalEvalDeps } from "./runner.js";
6
+ export { PASS_THRESHOLDS, type EvalCapsuleSpec, type EvalChunkSpec, type EvalDocumentSpec, type EvalParsedUnitSpec, type EvalParsedUnitWithoutDocId, type EvalRetrievalScope, type EvalSourceSpec, type ModelJudgedRetrievalEvalInput, type ModelJudgedRetrievalEvalJudge, type ModelJudgedRetrievalEvalScores, type RetrievalEvalDimensionScores, type RetrievalEvalFixture, type RetrievalEvalQuery, type RetrievalEvalScorecard, type RetrievalEvalThresholds, } from "./types.js";
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/evaluations/index.ts"],"names":[],"mappings":"AAKA,OAAO,EACL,YAAY,EACZ,uBAAuB,EACvB,gBAAgB,EAChB,qBAAqB,EACrB,0BAA0B,EAC1B,oBAAoB,EACpB,mBAAmB,EACnB,gBAAgB,EAChB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,qBAAqB,EACrB,sBAAsB,EACtB,iBAAiB,GAClB,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,0BAA0B,EAC1B,oBAAoB,EACpB,qBAAqB,EACrB,uBAAuB,EACvB,SAAS,EACT,uBAAuB,EACvB,cAAc,EACd,WAAW,EACX,oBAAoB,EACpB,KAAK,sBAAsB,GAC5B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,8BAA8B,EAC9B,OAAO,EACP,eAAe,EACf,KAAK,+BAA+B,GACrC,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,oCAAoC,EAAE,MAAM,aAAa,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,KAAK,oBAAoB,EAAE,MAAM,aAAa,CAAC;AAE1E,OAAO,EACL,eAAe,EACf,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,gBAAgB,EACrB,KAAK,kBAAkB,EACvB,KAAK,0BAA0B,EAC/B,KAAK,kBAAkB,EACvB,KAAK,cAAc,EACnB,KAAK,6BAA6B,EAClC,KAAK,6BAA6B,EAClC,KAAK,8BAA8B,EACnC,KAAK,4BAA4B,EACjC,KAAK,oBAAoB,EACzB,KAAK,kBAAkB,EACvB,KAAK,sBAAsB,EAC3B,KAAK,uBAAuB,GAC7B,MAAM,YAAY,CAAC"}
@@ -0,0 +1,10 @@
1
+ // Public surface of the retrieval evaluation harness (Epic #189, Issue #268). The harness
2
+ // is composed by the package barrel in ../index.ts; consumers outside this package never
3
+ // import from this subdirectory directly (ADR-0019 direction rule 3e + the trust-8
4
+ // test-support naming convention).
5
+ export { ALL_FIXTURES, EVAL_EMBEDDING_IDENTITY, EVAL_TOPIC_BOOST, ambiguousQueryFixture, broadQueryDiversityFixture, contextBudgetFixture, multiCapsuleFixture, multiPageFixture, noEvidenceFixture, singleTopicFixture, staleIndexFixture, structuredFileFixture, sourceIsolationFixture, wrongScopeFixture, } from "./fixtures.js";
6
+ export { citationRequirementForUnit, scoreCitationQuality, scoreContextBudgetFit, scoreMeanReciprocalRank, scoreNdcg, scoreNoEvidenceAccuracy, scorePrecision, scoreRecall, scoreSourceIsolation, } from "./dimensions.js";
7
+ export { createScriptedEmbeddingAdapter, fnv1a32, withTopicMarker, } from "./scripted-embedding-adapter.js";
8
+ export { renderRetrievalEvalQualityGateReport } from "./report.js";
9
+ export { runRetrievalEval } from "./runner.js";
10
+ export { PASS_THRESHOLDS, } from "./types.js";
@@ -0,0 +1,3 @@
1
+ import type { RetrievalEvalScorecard } from "./types.js";
2
+ export declare function renderRetrievalEvalQualityGateReport(scorecards: readonly RetrievalEvalScorecard[]): string;
3
+ //# sourceMappingURL=report.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/evaluations/report.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,YAAY,CAAC;AAMzD,wBAAgB,oCAAoC,CAClD,UAAU,EAAE,SAAS,sBAAsB,EAAE,GAC5C,MAAM,CAwBR"}
@@ -0,0 +1,31 @@
1
+ // Markdown renderer for Issue #268 quality-gate evidence. The report is built entirely
2
+ // from synthetic scorecards, so it is safe to attach to epic closure evidence without
3
+ // leaking customer content, credentials, or runtime logs.
4
+ function format(value) {
5
+ return value.toFixed(3);
6
+ }
7
+ export function renderRetrievalEvalQualityGateReport(scorecards) {
8
+ const header = [
9
+ "# Local Knowledge Retrieval Quality Gate",
10
+ "",
11
+ "| Fixture | Recall | Precision | MRR | nDCG | Isolation | Citation | No-evidence | Context budget | Latency | Pass |",
12
+ "| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |",
13
+ ];
14
+ const rows = scorecards.map((scorecard) => {
15
+ const d = scorecard.dimensions;
16
+ return `| ${[
17
+ scorecard.fixtureId,
18
+ format(d.recall),
19
+ format(d.precision),
20
+ format(d.meanReciprocalRank),
21
+ format(d.ndcg),
22
+ format(d.sourceIsolation),
23
+ format(d.citationQuality),
24
+ format(d.noEvidenceAccuracy),
25
+ format(d.contextBudgetFit),
26
+ format(d.latencyMs),
27
+ scorecard.passed ? "PASS" : "FAIL",
28
+ ].join(" | ")} |`;
29
+ });
30
+ return [...header, ...rows].join("\n");
31
+ }
@@ -0,0 +1,12 @@
1
+ import type { EmbeddingModelIdentity } from "@oscharko-dev/keiko-contracts";
2
+ import type { KnowledgeStore } from "../store.js";
3
+ import { type CitationRequirementKey } from "./dimensions.js";
4
+ import type { RetrievalEvalFixture } from "./types.js";
5
+ export interface SeededFixture {
6
+ readonly chunkUnitKinds: ReadonlyMap<string, CitationRequirementKey>;
7
+ readonly chunkTokenCounts: ReadonlyMap<string, number>;
8
+ readonly topicBoosts: Readonly<Record<string, number>>;
9
+ readonly identity: EmbeddingModelIdentity;
10
+ }
11
+ export declare function seedFixture(store: KnowledgeStore, fixture: RetrievalEvalFixture): SeededFixture;
12
+ //# sourceMappingURL=runner-seed.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner-seed.d.ts","sourceRoot":"","sources":["../../src/evaluations/runner-seed.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAEV,sBAAsB,EAGvB,MAAM,+BAA+B,CAAC;AAOvC,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EAA8B,KAAK,sBAAsB,EAAE,MAAM,iBAAiB,CAAC;AAC1F,OAAO,KAAK,EAIV,oBAAoB,EACrB,MAAM,YAAY,CAAC;AAEpB,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,cAAc,EAAE,WAAW,CAAC,MAAM,EAAE,sBAAsB,CAAC,CAAC;IACrE,QAAQ,CAAC,gBAAgB,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAGvD,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAEvD,QAAQ,CAAC,QAAQ,EAAE,sBAAsB,CAAC;CAC3C;AAwLD,wBAAgB,WAAW,CAAC,KAAK,EAAE,cAAc,EAAE,OAAO,EAAE,oBAAoB,GAAG,aAAa,CAoB/F"}
@@ -0,0 +1,175 @@
1
+ // Fixture seeding helpers for the eval runner (Epic #189, Issue #268). Extracted from
2
+ // `runner.ts` so each file stays under the 400-LOC budget. Materialises a fixture's
3
+ // capsules / sources / documents / parsed-units / chunks rows into a fresh store; the
4
+ // runner then embeds the chunks separately through `embedChunkBatch`.
5
+ //
6
+ // Topic boosts are aggregated here too: every chunk and every query that declares a
7
+ // `topic` contributes a boost of 1.0 to the map handed to the scripted adapter. Boosts of
8
+ // 1.0 produce the pure topic vector for marked inputs — strong enough that the
9
+ // ground-truth chunk always dominates the cosine ranking for its query.
10
+ import { createCapsule } from "../capsule-lifecycle.js";
11
+ import { createCapsuleSet, getCapsuleSet } from "../capsule-set-lifecycle.js";
12
+ import { insertChunkRow } from "../chunking/chunker-persist.js";
13
+ import { insertDocumentRow, insertParsedUnitRow } from "../discovery/persist.js";
14
+ import { addSourceToCapsule } from "../source-lifecycle.js";
15
+ import { citationRequirementForUnit } from "./dimensions.js";
16
+ function chunkParsedUnitId(documentId, parsedUnitId) {
17
+ return `unit-${documentId}-${parsedUnitId}`;
18
+ }
19
+ function sameEmbeddingIdentity(left, right) {
20
+ return (left.provider === right.provider &&
21
+ left.modelId === right.modelId &&
22
+ left.modelRevision === right.modelRevision &&
23
+ left.vectorDimensions === right.vectorDimensions &&
24
+ left.vectorMetric === right.vectorMetric);
25
+ }
26
+ function seedCapsule(store, capsule) {
27
+ createCapsule(store, {
28
+ id: capsule.id,
29
+ displayName: capsule.displayName,
30
+ tags: [],
31
+ retrievalEffort: "default",
32
+ outputMode: "answers",
33
+ answerGroundingPolicy: capsule.answerGroundingPolicy,
34
+ embeddingModelIdentity: capsule.embeddingModelIdentity,
35
+ lifecycleState: "draft",
36
+ storageReference: `eval/${String(capsule.id)}`,
37
+ });
38
+ }
39
+ function seedSource(store, capsuleId, source) {
40
+ addSourceToCapsule(store, capsuleId, {
41
+ id: source.id,
42
+ displayName: `Source ${String(source.id)}`,
43
+ tags: [],
44
+ scope: { kind: "folder", rootPath: "/srv/docs", recursive: true },
45
+ });
46
+ }
47
+ function composeParsedUnit(documentId, unit) {
48
+ return { ...unit.unit, documentId: documentId };
49
+ }
50
+ function seedDocument(store, capsule, source, doc) {
51
+ insertDocumentRow(store._internal.db, {
52
+ id: doc.id,
53
+ capsuleId: capsule.id,
54
+ sourceId: String(source.id),
55
+ documentPath: `docs/${doc.safeDisplayName}`,
56
+ sizeBytes: 1024,
57
+ mediaType: doc.mediaType ?? "text/plain",
58
+ contentHash: "a".repeat(64),
59
+ parserId: doc.parserId ?? "text",
60
+ parserVersion: doc.parserVersion ?? "1",
61
+ lastExtractedAt: 1_700_000_000_000,
62
+ status: "extracted",
63
+ safeDisplayName: doc.safeDisplayName,
64
+ });
65
+ for (const parsedUnit of doc.parsedUnits) {
66
+ insertParsedUnitRow(store._internal.db, store._internal.contentCipher, capsule.id, chunkParsedUnitId(String(doc.id), parsedUnit.id), composeParsedUnit(String(doc.id), parsedUnit));
67
+ }
68
+ }
69
+ function resolveChunkUnit(doc, chunk) {
70
+ if (doc.parsedUnits.length === 0) {
71
+ throw new Error(`eval document ${String(doc.id)} must declare at least one parsed unit`);
72
+ }
73
+ if (chunk.parsedUnitId === undefined) {
74
+ const first = doc.parsedUnits[0];
75
+ if (first === undefined)
76
+ throw new Error("unreachable");
77
+ return first;
78
+ }
79
+ const resolved = doc.parsedUnits.find((unit) => unit.id === chunk.parsedUnitId);
80
+ if (resolved === undefined) {
81
+ throw new Error(`eval chunk ${String(chunk.id)} references unknown parsed unit ${chunk.parsedUnitId}`);
82
+ }
83
+ return resolved;
84
+ }
85
+ function seedChunks(store, capsule, source, doc, chunkUnitKinds, chunkTokenCounts) {
86
+ let orderIndex = 0;
87
+ for (const chunk of doc.chunks) {
88
+ const parsedUnit = resolveChunkUnit(doc, chunk);
89
+ const composedUnit = composeParsedUnit(String(doc.id), parsedUnit);
90
+ insertChunkRow(store._internal.db, {
91
+ id: chunk.id,
92
+ capsuleId: capsule.id,
93
+ sourceId: source.id,
94
+ documentId: doc.id,
95
+ parsedUnitId: chunkParsedUnitId(String(doc.id), parsedUnit.id),
96
+ orderIndex,
97
+ tokenCount: chunk.text.length,
98
+ safeExcerptHash: "b".repeat(64),
99
+ chunkingStrategyVersion: "issue-195-v1",
100
+ // Synthetic span over the chunk's own text. Inert for the eval harness (it retrieves
101
+ // pre-seeded vectors rather than re-slicing source text) but satisfies the v8 columns.
102
+ characterStart: 0,
103
+ characterEnd: chunk.text.length,
104
+ });
105
+ chunkUnitKinds.set(String(chunk.id), citationRequirementForUnit(composedUnit));
106
+ chunkTokenCounts.set(String(chunk.id), chunk.text.length);
107
+ orderIndex += 1;
108
+ }
109
+ }
110
+ function collectTopicBoosts(fixture) {
111
+ const boosts = {};
112
+ for (const capsule of fixture.capsules) {
113
+ for (const source of capsule.sources) {
114
+ for (const doc of source.documents) {
115
+ for (const chunk of doc.chunks) {
116
+ if (chunk.topic !== undefined)
117
+ boosts[chunk.topic] = 1.0;
118
+ }
119
+ }
120
+ }
121
+ }
122
+ for (const query of fixture.queries) {
123
+ if (query.topic !== undefined)
124
+ boosts[query.topic] = 1.0;
125
+ }
126
+ return boosts;
127
+ }
128
+ function seedCapsuleSets(store, fixture) {
129
+ for (const query of fixture.queries) {
130
+ if (query.scope.kind !== "capsule-set")
131
+ continue;
132
+ const id = query.scope.capsuleSetId;
133
+ if (getCapsuleSet(store, id) !== undefined)
134
+ continue;
135
+ createCapsuleSet(store, {
136
+ id,
137
+ displayName: `Set ${query.scope.capsuleSetId}`,
138
+ tags: [],
139
+ capsuleIds: query.scope.capsuleIds,
140
+ });
141
+ }
142
+ }
143
+ function validateFixtureIdentity(fixture) {
144
+ const first = fixture.capsules[0];
145
+ if (first === undefined) {
146
+ throw new Error("fixture must declare at least one capsule");
147
+ }
148
+ for (const capsule of fixture.capsules) {
149
+ if (!sameEmbeddingIdentity(first.embeddingModelIdentity, capsule.embeddingModelIdentity)) {
150
+ throw new Error(`fixture ${fixture.id} mixes embedding identities; eval runner requires one identity per run`);
151
+ }
152
+ }
153
+ return first.embeddingModelIdentity;
154
+ }
155
+ export function seedFixture(store, fixture) {
156
+ const chunkUnitKinds = new Map();
157
+ const chunkTokenCounts = new Map();
158
+ for (const capsule of fixture.capsules) {
159
+ seedCapsule(store, capsule);
160
+ for (const source of capsule.sources) {
161
+ seedSource(store, capsule.id, source);
162
+ for (const doc of source.documents) {
163
+ seedDocument(store, capsule, source, doc);
164
+ seedChunks(store, capsule, source, doc, chunkUnitKinds, chunkTokenCounts);
165
+ }
166
+ }
167
+ }
168
+ seedCapsuleSets(store, fixture);
169
+ return {
170
+ chunkUnitKinds,
171
+ chunkTokenCounts,
172
+ topicBoosts: collectTopicBoosts(fixture),
173
+ identity: validateFixtureIdentity(fixture),
174
+ };
175
+ }
@@ -0,0 +1,8 @@
1
+ import type { ModelJudgedRetrievalEvalJudge, RetrievalEvalFixture, RetrievalEvalScorecard } from "./types.js";
2
+ export interface RunRetrievalEvalDeps {
3
+ readonly now?: () => number;
4
+ readonly runId?: string;
5
+ readonly modelJudge?: ModelJudgedRetrievalEvalJudge;
6
+ }
7
+ export declare function runRetrievalEval(fixture: RetrievalEvalFixture, deps?: RunRetrievalEvalDeps): Promise<RetrievalEvalScorecard>;
8
+ //# sourceMappingURL=runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/evaluations/runner.ts"],"names":[],"mappings":"AA6CA,OAAO,KAAK,EAEV,6BAA6B,EAE7B,oBAAoB,EAEpB,sBAAsB,EACvB,MAAM,YAAY,CAAC;AAMpB,MAAM,WAAW,oBAAoB;IAGnC,QAAQ,CAAC,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;IAG5B,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAGxB,QAAQ,CAAC,UAAU,CAAC,EAAE,6BAA6B,CAAC;CACrD;AAkPD,wBAAsB,gBAAgB,CACpC,OAAO,EAAE,oBAAoB,EAC7B,IAAI,GAAE,oBAAyB,GAC9B,OAAO,CAAC,sBAAsB,CAAC,CAkBjC"}
@@ -0,0 +1,205 @@
1
+ // Retrieval evaluation runner (Epic #189, Issue #268). Materialises a `RetrievalEvalFixture`
2
+ // into a fresh temporary SQLite store on disk, runs every query through `runLocalKnowledgeRetrieval`
3
+ // (#199) UNCHANGED, scores each query against the deterministic guardrail dimensions, and returns an immutable
4
+ // `RetrievalEvalScorecard`.
5
+ //
6
+ // Determinism contract:
7
+ // - The default `now()` is a monotonic counter starting at 0. Two runs of the same fixture
8
+ // therefore produce byte-identical scorecards (the latency dimension counts ticks of this
9
+ // counter, not wall-clock milliseconds).
10
+ // - A caller that wants real wall-clock latency passes its own `now: () => performance.now()`
11
+ // — but doing so DROPS the byte-identical guarantee and is incompatible with the audit
12
+ // ledger's manifest equality check.
13
+ // - The store path uses `mkdtempSync` (different per process) but the store contents are
14
+ // discarded at teardown; nothing about the temp path leaks into the scorecard.
15
+ //
16
+ // Seeding is implemented in `runner-seed.ts` so each file stays under the 400-LOC budget.
17
+ import { mkdtempSync, rmSync } from "node:fs";
18
+ import { tmpdir } from "node:os";
19
+ import { join } from "node:path";
20
+ import { embedChunkBatch } from "../indexing/embedding-batcher.js";
21
+ import { runLocalKnowledgeRetrieval } from "../retrieval/index.js";
22
+ import { openKnowledgeStore } from "../store.js";
23
+ import { scoreCitationQuality, scoreContextBudgetFit, scoreMeanReciprocalRank, scoreNdcg, scoreNoEvidenceAccuracy, scorePrecision, scoreRecall, scoreSourceIsolation, } from "./dimensions.js";
24
+ import { seedFixture } from "./runner-seed.js";
25
+ import { createScriptedEmbeddingAdapter, withTopicMarker } from "./scripted-embedding-adapter.js";
26
+ import { PASS_THRESHOLDS } from "./types.js";
27
+ function collectCapsuleChunks(capsule) {
28
+ const out = [];
29
+ for (const source of capsule.sources) {
30
+ for (const doc of source.documents) {
31
+ for (const chunk of doc.chunks) {
32
+ const text = chunk.topic !== undefined ? withTopicMarker(chunk.text, chunk.topic) : chunk.text;
33
+ out.push({
34
+ id: chunk.id,
35
+ capsuleId: capsule.id,
36
+ sourceId: source.id,
37
+ documentId: doc.id,
38
+ text,
39
+ });
40
+ }
41
+ }
42
+ }
43
+ return out;
44
+ }
45
+ async function embedAllChunks(store, fixture, seeded, now) {
46
+ const adapter = createScriptedEmbeddingAdapter({
47
+ identity: seeded.identity,
48
+ topicBoosts: seeded.topicBoosts,
49
+ });
50
+ let storageCounter = 0;
51
+ const idSource = () => {
52
+ storageCounter += 1;
53
+ return `eval-storage-${String(storageCounter)}`;
54
+ };
55
+ for (const capsule of fixture.capsules) {
56
+ const chunks = collectCapsuleChunks(capsule);
57
+ const result = await embedChunkBatch(chunks, {
58
+ adapter,
59
+ store,
60
+ pinnedIdentity: capsule.embeddingModelIdentity,
61
+ concurrency: 1,
62
+ now,
63
+ idSource,
64
+ });
65
+ if (result.errors.length > 0) {
66
+ const codes = result.errors.map((e) => e.code).join(",");
67
+ throw new Error(`embedding seeding failed for capsule ${String(capsule.id)}: ${codes}`);
68
+ }
69
+ }
70
+ }
71
+ function scopeCapsuleIds(query) {
72
+ if (query.scope.kind === "capsule")
73
+ return [query.scope.capsuleId];
74
+ return query.scope.capsuleIds;
75
+ }
76
+ function buildRetrievalQuery(query, queryText) {
77
+ const baseQuery = {
78
+ text: queryText,
79
+ ...(query.topK !== undefined ? { topK: query.topK } : {}),
80
+ // For the no-evidence fixture we apply a very high minScore so unrelated chunks are
81
+ // dropped. The fixture's query carries no topic marker, so the cosine of its vector
82
+ // with any topic-boosted chunk is far below 0.99.
83
+ ...(query.expectedNoEvidence === true ? { minScore: 0.99 } : {}),
84
+ };
85
+ if (query.scope.kind === "capsule") {
86
+ return { ...baseQuery, capsuleId: query.scope.capsuleId };
87
+ }
88
+ return { ...baseQuery, capsuleSetId: query.scope.capsuleSetId };
89
+ }
90
+ async function runOneQuery(store, query, seeded, now) {
91
+ // Wrap the query text in the topic marker so the scripted adapter applies the same
92
+ // topic boost it used at seed time.
93
+ const queryText = query.topic !== undefined ? withTopicMarker(query.text, query.topic) : query.text;
94
+ const adapter = createScriptedEmbeddingAdapter({
95
+ identity: query.queryEmbeddingIdentity ?? seeded.identity,
96
+ topicBoosts: seeded.topicBoosts,
97
+ });
98
+ const retrievalQuery = buildRetrievalQuery(query, queryText);
99
+ const start = now();
100
+ const result = await runLocalKnowledgeRetrieval({ store, embeddingAdapter: adapter }, retrievalQuery);
101
+ const end = now();
102
+ const expected = query.expectedChunkIds ?? [];
103
+ const expectedNoEvidence = query.expectedNoEvidence === true;
104
+ return {
105
+ query,
106
+ references: result.references,
107
+ noEvidence: result.noEvidence,
108
+ ...(result.reason !== undefined ? { reason: result.reason } : {}),
109
+ scores: {
110
+ recall: scoreRecall(result.references, expected),
111
+ precision: scorePrecision(result.references, expected),
112
+ meanReciprocalRank: scoreMeanReciprocalRank(result.references, expected),
113
+ ndcg: scoreNdcg(result.references, expected),
114
+ sourceIsolation: scoreSourceIsolation(result.references, scopeCapsuleIds(query)),
115
+ citationQuality: scoreCitationQuality(result.references, seeded.chunkUnitKinds),
116
+ noEvidenceAccuracy: scoreNoEvidenceAccuracy(result.noEvidence, expectedNoEvidence, result.reason, query.expectedNoEvidenceReason),
117
+ contextBudgetFit: scoreContextBudgetFit(result.references, seeded.chunkTokenCounts, query.contextBudgetTokens),
118
+ latencyTicks: end - start,
119
+ },
120
+ };
121
+ }
122
+ // ─── Aggregation ─────────────────────────────────────────────────────────────
123
+ async function runModelJudge(modelJudge, fixture, perQuery) {
124
+ if (modelJudge === undefined)
125
+ return undefined;
126
+ const judged = [];
127
+ for (const evaluation of perQuery) {
128
+ judged.push(await modelJudge.judge({
129
+ fixtureId: fixture.id,
130
+ queryId: evaluation.query.id,
131
+ queryText: evaluation.query.text,
132
+ references: evaluation.references,
133
+ noEvidence: evaluation.noEvidence,
134
+ ...(evaluation.reason !== undefined ? { reason: evaluation.reason } : {}),
135
+ }));
136
+ }
137
+ return {
138
+ groundedness: meanOf(judged.map((item) => item.groundedness)),
139
+ faithfulness: meanOf(judged.map((item) => item.faithfulness)),
140
+ };
141
+ }
142
+ function meanOf(values) {
143
+ if (values.length === 0)
144
+ return 0;
145
+ let sum = 0;
146
+ for (const v of values)
147
+ sum += v;
148
+ return sum / values.length;
149
+ }
150
+ function buildScorecard(fixture, runId, perQuery, modelJudged) {
151
+ const dimensions = {
152
+ recall: meanOf(perQuery.map((q) => q.scores.recall)),
153
+ precision: meanOf(perQuery.map((q) => q.scores.precision)),
154
+ meanReciprocalRank: meanOf(perQuery.map((q) => q.scores.meanReciprocalRank)),
155
+ ndcg: meanOf(perQuery.map((q) => q.scores.ndcg)),
156
+ sourceIsolation: meanOf(perQuery.map((q) => q.scores.sourceIsolation)),
157
+ citationQuality: meanOf(perQuery.map((q) => q.scores.citationQuality)),
158
+ noEvidenceAccuracy: meanOf(perQuery.map((q) => q.scores.noEvidenceAccuracy)),
159
+ contextBudgetFit: meanOf(perQuery.map((q) => q.scores.contextBudgetFit)),
160
+ latencyMs: perQuery.reduce((acc, q) => acc + q.scores.latencyTicks, 0),
161
+ };
162
+ const passed = dimensions.recall >= PASS_THRESHOLDS.recall &&
163
+ dimensions.precision >= PASS_THRESHOLDS.precision &&
164
+ dimensions.meanReciprocalRank >= PASS_THRESHOLDS.meanReciprocalRank &&
165
+ dimensions.ndcg >= PASS_THRESHOLDS.ndcg &&
166
+ dimensions.sourceIsolation >= PASS_THRESHOLDS.sourceIsolation &&
167
+ dimensions.citationQuality >= PASS_THRESHOLDS.citationQuality &&
168
+ dimensions.noEvidenceAccuracy >= PASS_THRESHOLDS.noEvidenceAccuracy &&
169
+ dimensions.contextBudgetFit >= PASS_THRESHOLDS.contextBudgetFit;
170
+ return modelJudged === undefined
171
+ ? { fixtureId: fixture.id, runId, dimensions, passed }
172
+ : { fixtureId: fixture.id, runId, dimensions, passed, modelJudged };
173
+ }
174
+ // ─── Default clock ───────────────────────────────────────────────────────────
175
+ // A monotonic integer counter created fresh per call. Returns 0 on first invocation, 1 on
176
+ // second, etc. Latency for a query is therefore exactly the number of `now()` reads inside
177
+ // the query (`runOneQuery` reads it twice, so every query reports `latencyTicks = 1`).
178
+ function defaultClock() {
179
+ let counter = -1;
180
+ return () => {
181
+ counter += 1;
182
+ return counter;
183
+ };
184
+ }
185
+ // ─── Public entrypoint ───────────────────────────────────────────────────────
186
+ export async function runRetrievalEval(fixture, deps = {}) {
187
+ const now = deps.now ?? defaultClock();
188
+ const runId = deps.runId ?? `eval-${fixture.id}`;
189
+ const dir = mkdtempSync(join(tmpdir(), "keiko-eval-"));
190
+ const store = openKnowledgeStore({ dbPath: join(dir, "eval.db") });
191
+ try {
192
+ const seeded = seedFixture(store, fixture);
193
+ await embedAllChunks(store, fixture, seeded, now);
194
+ const perQuery = [];
195
+ for (const query of fixture.queries) {
196
+ perQuery.push(await runOneQuery(store, query, seeded, now));
197
+ }
198
+ const modelJudged = await runModelJudge(deps.modelJudge, fixture, perQuery);
199
+ return buildScorecard(fixture, runId, perQuery, modelJudged);
200
+ }
201
+ finally {
202
+ store.close();
203
+ rmSync(dir, { recursive: true, force: true });
204
+ }
205
+ }
@@ -0,0 +1,13 @@
1
+ import type { EmbeddingModelIdentity } from "@oscharko-dev/keiko-contracts";
2
+ import type { OpenAIEmbeddingAdapter } from "@oscharko-dev/keiko-model-gateway";
3
+ export declare function fnv1a32(input: string): number;
4
+ export interface ScriptedEmbeddingAdapterOptions {
5
+ readonly identity: EmbeddingModelIdentity;
6
+ readonly topicBoosts?: Readonly<Record<string, number>>;
7
+ readonly endpoint?: string;
8
+ readonly apiKey?: string;
9
+ readonly apiKeyHeaderName?: string;
10
+ }
11
+ export declare function createScriptedEmbeddingAdapter(options: ScriptedEmbeddingAdapterOptions): OpenAIEmbeddingAdapter;
12
+ export declare function withTopicMarker(text: string, topic: string): string;
13
+ //# sourceMappingURL=scripted-embedding-adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scripted-embedding-adapter.d.ts","sourceRoot":"","sources":["../../src/evaluations/scripted-embedding-adapter.ts"],"names":[],"mappings":"AA0BA,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAC;AAC5E,OAAO,KAAK,EACV,sBAAsB,EAGvB,MAAM,mCAAmC,CAAC;AAW3C,wBAAgB,OAAO,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAQ7C;AA4ED,MAAM,WAAW,+BAA+B;IAK9C,QAAQ,CAAC,QAAQ,EAAE,sBAAsB,CAAC;IAI1C,QAAQ,CAAC,WAAW,CAAC,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAGxD,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CACpC;AA+BD,wBAAgB,8BAA8B,CAC5C,OAAO,EAAE,+BAA+B,GACvC,sBAAsB,CA6BxB;AAKD,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAKnE"}
@@ -0,0 +1,163 @@
1
+ // Scripted (offline, deterministic) embedding adapter for the retrieval eval harness
2
+ // (Epic #189, Issue #268). Implements `OpenAIEmbeddingAdapter` so the harness can plug
3
+ // directly into #199's retrieval runner — same code path as production, only the model
4
+ // call is replaced.
5
+ //
6
+ // Determinism guarantees (load-bearing for byte-identical scorecards):
7
+ // - No `Date.now()`, `Math.random()`, `performance.now()`, or `Date()` reads.
8
+ // - No global mutable state. The adapter holds only the immutable identity + the
9
+ // constructor's `topicSalts` map; nothing mutates after construction.
10
+ // - No `fetch` import, no network IO of any kind. The `request` method always resolves
11
+ // synchronously through `Promise.resolve`.
12
+ //
13
+ // Vector layout: a `vectorDimensions`-wide `Float32Array` filled by an FNV-1a hash of the
14
+ // input string (32-bit, see RFC reference text). Lane 0 carries a normalised input length
15
+ // signal so two strings of clearly different length never collide on the leading lane —
16
+ // this matters because the hash collision space on short strings is small, and we want
17
+ // the cosine of two distinct inputs to stay strictly < 1.
18
+ //
19
+ // Topic salt: every chunk in a fixture may declare an optional `topic`. The scripted
20
+ // adapter accepts a map `{ [topic]: boost }` — for any request whose text contains a known
21
+ // topic marker (a `[[topic]]` envelope in the input string), the corresponding boost is
22
+ // blended into the produced vector. This lets a fixture make the ground-truth chunk for a
23
+ // query verifiably the top result without depending on real semantic similarity. The
24
+ // marker is parsed; the marker itself is stripped from the FNV input so two queries with
25
+ // the same body but different topics still hash to similar (not identical) vectors.
26
+ // ─── FNV-1a 32-bit ───────────────────────────────────────────────────────────
27
+ // We use FNV-1a over UTF-16 code units (`charCodeAt`) so the hash is locale-independent
28
+ // and identical across every JS engine the package targets. The literals come from
29
+ // http://www.isthe.com/chongo/tech/comp/fnv/ — `OFFSET_BASIS = 2166136261` (0x811c9dc5),
30
+ // `PRIME = 16777619` (0x01000193). Multiplication uses `Math.imul` so it stays exact in
31
+ // 32-bit even on values that would otherwise round above 2^53.
32
+ const FNV_OFFSET_BASIS = 0x811c9dc5;
33
+ const FNV_PRIME = 0x01000193;
34
+ export function fnv1a32(input) {
35
+ let hash = FNV_OFFSET_BASIS;
36
+ for (let i = 0; i < input.length; i += 1) {
37
+ hash ^= input.charCodeAt(i);
38
+ hash = Math.imul(hash, FNV_PRIME);
39
+ }
40
+ // Coerce to unsigned 32-bit so callers see a consistent positive integer.
41
+ return hash >>> 0;
42
+ }
43
+ // ─── Topic markers ───────────────────────────────────────────────────────────
44
+ // A fixture embeds a topic marker inside the text it asks the adapter to embed. The format
45
+ // `[[topic:NAME]]` is chosen so it is impossible to occur in natural text the harness
46
+ // might emit by accident — the runner injects it explicitly when seeding chunks. The
47
+ // adapter strips the marker before hashing so the FNV component remains stable as topics
48
+ // are added or renamed.
49
+ const TOPIC_MARKER_PATTERN = /\[\[topic:([a-zA-Z0-9_-]+)\]\]/g;
50
+ const VALID_TOPIC_PATTERN = /^[a-zA-Z0-9_-]+$/;
51
+ function extractTopics(input) {
52
+ const topics = [];
53
+ // Reset state on the shared regex by constructing a fresh local copy — the `/g` flag
54
+ // makes the constant stateful and we never want the next call to inherit `lastIndex`.
55
+ const re = new RegExp(TOPIC_MARKER_PATTERN.source, "g");
56
+ let match = re.exec(input);
57
+ while (match !== null) {
58
+ const captured = match[1];
59
+ if (captured !== undefined)
60
+ topics.push(captured);
61
+ match = re.exec(input);
62
+ }
63
+ const stripped = input.replace(re, "");
64
+ return { topics, stripped };
65
+ }
66
+ // ─── Vector synthesis ────────────────────────────────────────────────────────
67
+ // Lane 0: normalised input length (clipped to `[0, 1]` so it interacts well with cosine).
68
+ // Lanes 1..dim-1: a permutation of the FNV hash mixed with the lane index. The mix uses
69
+ // `Math.imul` for 32-bit arithmetic, then squashes to `[-1, 1]` so cosine remains in its
70
+ // usual range. A topic salt adds a deterministic "topic vector" — derived from the
71
+ // topic-name hash — scaled by the configured boost.
72
+ const LENGTH_NORMALISATION_DIVISOR = 1024;
73
+ function laneFromHash(hash, laneIndex) {
74
+ // Mix lane index into the hash so each lane has a different but deterministic value.
75
+ const mixed = Math.imul(hash ^ laneIndex, FNV_PRIME) >>> 0;
76
+ // Map a 32-bit unsigned int into `[-1, 1)` by treating it as a float in `[0, 1)` then
77
+ // shifting + scaling. `0xffffffff + 1` is exact in float64.
78
+ return (mixed / 0x100000000) * 2 - 1;
79
+ }
80
+ function fillBaseVector(vector, hash, stripped) {
81
+ // Lane 0 is the length signal; clip to 1.0 so an extremely long input still produces a
82
+ // value in `[0, 1]`. The divisor is generous enough that natural fixture inputs all
83
+ // land below 1.0 and discriminate by length.
84
+ vector[0] = Math.min(stripped.length / LENGTH_NORMALISATION_DIVISOR, 1);
85
+ for (let i = 1; i < vector.length; i += 1) {
86
+ vector[i] = laneFromHash(hash, i);
87
+ }
88
+ }
89
+ function applyTopicBoost(vector, topic, boost) {
90
+ const topicHash = fnv1a32(`topic:${topic}`);
91
+ // Skip lane 0 — it is the length signal and we never want the boost to make two inputs
92
+ // of different length collide on the leading lane.
93
+ for (let i = 1; i < vector.length; i += 1) {
94
+ const topicLane = laneFromHash(topicHash, i);
95
+ // Blend: each lane becomes `(1 - boost) * base + boost * topicLane`. With `boost = 1`
96
+ // the vector is the pure topic vector — two inputs sharing a topic become identical
97
+ // on every lane except lane 0, which still records length. The resulting cosine is
98
+ // dominated by the topic lanes (1..dim-1), which is exactly the property a fixture
99
+ // needs to make ground-truth assertions deterministic.
100
+ const current = vector[i] ?? 0;
101
+ vector[i] = (1 - boost) * current + boost * topicLane;
102
+ }
103
+ }
104
+ function clampBoost(value) {
105
+ if (Number.isNaN(value))
106
+ return 0;
107
+ if (value < 0)
108
+ return 0;
109
+ if (value > 1)
110
+ return 1;
111
+ return value;
112
+ }
113
+ function resolveBoost(topics, boosts) {
114
+ // First topic wins. A fixture that wants multiple topics layered should pre-blend them
115
+ // into a single named topic — keeping a single boost per request keeps the cosine math
116
+ // easy to reason about for the determinism test.
117
+ for (const topic of topics) {
118
+ if (Object.prototype.hasOwnProperty.call(boosts, topic)) {
119
+ const raw = boosts[topic];
120
+ if (raw === undefined)
121
+ continue;
122
+ return { topic, boost: clampBoost(raw) };
123
+ }
124
+ }
125
+ return undefined;
126
+ }
127
+ export function createScriptedEmbeddingAdapter(options) {
128
+ const { identity } = options;
129
+ const topicBoosts = options.topicBoosts ?? {};
130
+ const endpoint = options.endpoint ?? "https://scripted.local/v1";
131
+ const apiKey = options.apiKey ?? "scripted-test-key";
132
+ const request = async (req) => {
133
+ const { topics, stripped } = extractTopics(req.input);
134
+ const hash = fnv1a32(stripped);
135
+ const vector = new Float32Array(identity.vectorDimensions);
136
+ fillBaseVector(vector, hash, stripped);
137
+ const blend = resolveBoost(topics, topicBoosts);
138
+ if (blend !== undefined)
139
+ applyTopicBoost(vector, blend.topic, blend.boost);
140
+ const successValue = {
141
+ vector,
142
+ modelId: identity.modelId,
143
+ ...(identity.modelRevision !== undefined ? { modelRevision: identity.modelRevision } : {}),
144
+ };
145
+ return Promise.resolve({ ok: true, value: successValue });
146
+ };
147
+ return {
148
+ endpoint,
149
+ apiKey,
150
+ ...(options.apiKeyHeaderName !== undefined
151
+ ? { apiKeyHeaderName: options.apiKeyHeaderName }
152
+ : {}),
153
+ request,
154
+ };
155
+ }
156
+ // ─── Marker helpers ──────────────────────────────────────────────────────────
157
+ // Exported so fixtures + runner can apply markers without hard-coding the format.
158
+ export function withTopicMarker(text, topic) {
159
+ if (!VALID_TOPIC_PATTERN.test(topic)) {
160
+ throw new Error(`invalid eval topic marker: ${topic}`);
161
+ }
162
+ return `[[topic:${topic}]]${text}`;
163
+ }