@oscharko-dev/keiko-local-knowledge 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. package/dist/.tsbuildinfo +1 -0
  2. package/dist/bounded-document-extraction.d.ts +27 -0
  3. package/dist/bounded-document-extraction.d.ts.map +1 -0
  4. package/dist/bounded-document-extraction.js +214 -0
  5. package/dist/capsule-lifecycle.d.ts +33 -0
  6. package/dist/capsule-lifecycle.d.ts.map +1 -0
  7. package/dist/capsule-lifecycle.js +292 -0
  8. package/dist/capsule-set-lifecycle.d.ts +15 -0
  9. package/dist/capsule-set-lifecycle.d.ts.map +1 -0
  10. package/dist/capsule-set-lifecycle.js +158 -0
  11. package/dist/chunking/chunker-persist.d.ts +36 -0
  12. package/dist/chunking/chunker-persist.d.ts.map +1 -0
  13. package/dist/chunking/chunker-persist.js +74 -0
  14. package/dist/chunking/chunker-runner.d.ts +9 -0
  15. package/dist/chunking/chunker-runner.d.ts.map +1 -0
  16. package/dist/chunking/chunker-runner.js +218 -0
  17. package/dist/chunking/chunker.d.ts +7 -0
  18. package/dist/chunking/chunker.d.ts.map +1 -0
  19. package/dist/chunking/chunker.js +139 -0
  20. package/dist/chunking/citation-mapper.d.ts +4 -0
  21. package/dist/chunking/citation-mapper.d.ts.map +1 -0
  22. package/dist/chunking/citation-mapper.js +180 -0
  23. package/dist/chunking/index.d.ts +6 -0
  24. package/dist/chunking/index.d.ts.map +1 -0
  25. package/dist/chunking/index.js +8 -0
  26. package/dist/chunking/token-estimator.d.ts +3 -0
  27. package/dist/chunking/token-estimator.d.ts.map +1 -0
  28. package/dist/chunking/token-estimator.js +26 -0
  29. package/dist/chunking/types.d.ts +49 -0
  30. package/dist/chunking/types.d.ts.map +1 -0
  31. package/dist/chunking/types.js +26 -0
  32. package/dist/composition.d.ts +57 -0
  33. package/dist/composition.d.ts.map +1 -0
  34. package/dist/composition.js +310 -0
  35. package/dist/conversation/citation-attacher.d.ts +8 -0
  36. package/dist/conversation/citation-attacher.d.ts.map +1 -0
  37. package/dist/conversation/citation-attacher.js +55 -0
  38. package/dist/conversation/citation-excerpts.d.ts +4 -0
  39. package/dist/conversation/citation-excerpts.d.ts.map +1 -0
  40. package/dist/conversation/citation-excerpts.js +41 -0
  41. package/dist/conversation/grounded-answer-runner.d.ts +9 -0
  42. package/dist/conversation/grounded-answer-runner.d.ts.map +1 -0
  43. package/dist/conversation/grounded-answer-runner.js +61 -0
  44. package/dist/conversation/index.d.ts +5 -0
  45. package/dist/conversation/index.d.ts.map +1 -0
  46. package/dist/conversation/index.js +7 -0
  47. package/dist/conversation/model-gateway-answer-generator.d.ts +28 -0
  48. package/dist/conversation/model-gateway-answer-generator.d.ts.map +1 -0
  49. package/dist/conversation/model-gateway-answer-generator.js +105 -0
  50. package/dist/conversation/types.d.ts +35 -0
  51. package/dist/conversation/types.d.ts.map +1 -0
  52. package/dist/conversation/types.js +24 -0
  53. package/dist/discovery/discovery-runner.d.ts +23 -0
  54. package/dist/discovery/discovery-runner.d.ts.map +1 -0
  55. package/dist/discovery/discovery-runner.js +109 -0
  56. package/dist/discovery/extract-progressive.d.ts +17 -0
  57. package/dist/discovery/extract-progressive.d.ts.map +1 -0
  58. package/dist/discovery/extract-progressive.js +522 -0
  59. package/dist/discovery/extract.d.ts +26 -0
  60. package/dist/discovery/extract.d.ts.map +1 -0
  61. package/dist/discovery/extract.js +906 -0
  62. package/dist/discovery/glob.d.ts +10 -0
  63. package/dist/discovery/glob.d.ts.map +1 -0
  64. package/dist/discovery/glob.js +72 -0
  65. package/dist/discovery/index.d.ts +6 -0
  66. package/dist/discovery/index.d.ts.map +1 -0
  67. package/dist/discovery/index.js +8 -0
  68. package/dist/discovery/media-type.d.ts +4 -0
  69. package/dist/discovery/media-type.d.ts.map +1 -0
  70. package/dist/discovery/media-type.js +62 -0
  71. package/dist/discovery/persist.d.ts +63 -0
  72. package/dist/discovery/persist.d.ts.map +1 -0
  73. package/dist/discovery/persist.js +345 -0
  74. package/dist/discovery/test-support.d.ts +16 -0
  75. package/dist/discovery/test-support.d.ts.map +1 -0
  76. package/dist/discovery/test-support.js +127 -0
  77. package/dist/discovery/types.d.ts +63 -0
  78. package/dist/discovery/types.d.ts.map +1 -0
  79. package/dist/discovery/types.js +28 -0
  80. package/dist/discovery/walk.d.ts +12 -0
  81. package/dist/discovery/walk.d.ts.map +1 -0
  82. package/dist/discovery/walk.js +302 -0
  83. package/dist/errors.d.ts +13 -0
  84. package/dist/errors.d.ts.map +1 -0
  85. package/dist/errors.js +22 -0
  86. package/dist/evaluations/dimensions.d.ts +14 -0
  87. package/dist/evaluations/dimensions.d.ts.map +1 -0
  88. package/dist/evaluations/dimensions.js +191 -0
  89. package/dist/evaluations/fixtures.d.ts +18 -0
  90. package/dist/evaluations/fixtures.d.ts.map +1 -0
  91. package/dist/evaluations/fixtures.js +858 -0
  92. package/dist/evaluations/index.d.ts +7 -0
  93. package/dist/evaluations/index.d.ts.map +1 -0
  94. package/dist/evaluations/index.js +10 -0
  95. package/dist/evaluations/report.d.ts +3 -0
  96. package/dist/evaluations/report.d.ts.map +1 -0
  97. package/dist/evaluations/report.js +31 -0
  98. package/dist/evaluations/runner-seed.d.ts +12 -0
  99. package/dist/evaluations/runner-seed.d.ts.map +1 -0
  100. package/dist/evaluations/runner-seed.js +175 -0
  101. package/dist/evaluations/runner.d.ts +8 -0
  102. package/dist/evaluations/runner.d.ts.map +1 -0
  103. package/dist/evaluations/runner.js +205 -0
  104. package/dist/evaluations/scripted-embedding-adapter.d.ts +13 -0
  105. package/dist/evaluations/scripted-embedding-adapter.d.ts.map +1 -0
  106. package/dist/evaluations/scripted-embedding-adapter.js +163 -0
  107. package/dist/evaluations/types.d.ts +116 -0
  108. package/dist/evaluations/types.d.ts.map +1 -0
  109. package/dist/evaluations/types.js +27 -0
  110. package/dist/index.d.ts +23 -0
  111. package/dist/index.d.ts.map +1 -0
  112. package/dist/index.js +41 -0
  113. package/dist/indexing/bounded-indexing.d.ts +41 -0
  114. package/dist/indexing/bounded-indexing.d.ts.map +1 -0
  115. package/dist/indexing/bounded-indexing.js +240 -0
  116. package/dist/indexing/checkpoint-persist.d.ts +8 -0
  117. package/dist/indexing/checkpoint-persist.d.ts.map +1 -0
  118. package/dist/indexing/checkpoint-persist.js +135 -0
  119. package/dist/indexing/checkpoint-resume.d.ts +20 -0
  120. package/dist/indexing/checkpoint-resume.d.ts.map +1 -0
  121. package/dist/indexing/checkpoint-resume.js +50 -0
  122. package/dist/indexing/embedding-batcher.d.ts +3 -0
  123. package/dist/indexing/embedding-batcher.d.ts.map +1 -0
  124. package/dist/indexing/embedding-batcher.js +390 -0
  125. package/dist/indexing/index.d.ts +7 -0
  126. package/dist/indexing/index.d.ts.map +1 -0
  127. package/dist/indexing/index.js +11 -0
  128. package/dist/indexing/job-persist.d.ts +46 -0
  129. package/dist/indexing/job-persist.d.ts.map +1 -0
  130. package/dist/indexing/job-persist.js +157 -0
  131. package/dist/indexing/job-resume.d.ts +4 -0
  132. package/dist/indexing/job-resume.d.ts.map +1 -0
  133. package/dist/indexing/job-resume.js +14 -0
  134. package/dist/indexing/orchestrator.d.ts +3 -0
  135. package/dist/indexing/orchestrator.d.ts.map +1 -0
  136. package/dist/indexing/orchestrator.js +1151 -0
  137. package/dist/indexing/types.d.ts +156 -0
  138. package/dist/indexing/types.d.ts.map +1 -0
  139. package/dist/indexing/types.js +30 -0
  140. package/dist/indexing/vector-persist.d.ts +32 -0
  141. package/dist/indexing/vector-persist.d.ts.map +1 -0
  142. package/dist/indexing/vector-persist.js +105 -0
  143. package/dist/parsers/_internal.d.ts +20 -0
  144. package/dist/parsers/_internal.d.ts.map +1 -0
  145. package/dist/parsers/_internal.js +122 -0
  146. package/dist/parsers/csv-parser.d.ts +3 -0
  147. package/dist/parsers/csv-parser.d.ts.map +1 -0
  148. package/dist/parsers/csv-parser.js +202 -0
  149. package/dist/parsers/docx-parser.d.ts +3 -0
  150. package/dist/parsers/docx-parser.d.ts.map +1 -0
  151. package/dist/parsers/docx-parser.js +390 -0
  152. package/dist/parsers/html-parser.d.ts +3 -0
  153. package/dist/parsers/html-parser.d.ts.map +1 -0
  154. package/dist/parsers/html-parser.js +310 -0
  155. package/dist/parsers/index.d.ts +15 -0
  156. package/dist/parsers/index.d.ts.map +1 -0
  157. package/dist/parsers/index.js +41 -0
  158. package/dist/parsers/json-parser.d.ts +3 -0
  159. package/dist/parsers/json-parser.d.ts.map +1 -0
  160. package/dist/parsers/json-parser.js +192 -0
  161. package/dist/parsers/large-document/capability-discovery.d.ts +27 -0
  162. package/dist/parsers/large-document/capability-discovery.d.ts.map +1 -0
  163. package/dist/parsers/large-document/capability-discovery.js +76 -0
  164. package/dist/parsers/large-document/diagnostics.d.ts +3 -0
  165. package/dist/parsers/large-document/diagnostics.d.ts.map +1 -0
  166. package/dist/parsers/large-document/diagnostics.js +11 -0
  167. package/dist/parsers/large-document/index.d.ts +15 -0
  168. package/dist/parsers/large-document/index.d.ts.map +1 -0
  169. package/dist/parsers/large-document/index.js +10 -0
  170. package/dist/parsers/large-document/legacy-format.d.ts +5 -0
  171. package/dist/parsers/large-document/legacy-format.d.ts.map +1 -0
  172. package/dist/parsers/large-document/legacy-format.js +25 -0
  173. package/dist/parsers/large-document/preflight.d.ts +9 -0
  174. package/dist/parsers/large-document/preflight.d.ts.map +1 -0
  175. package/dist/parsers/large-document/preflight.js +43 -0
  176. package/dist/parsers/large-document/progressive-extraction.d.ts +55 -0
  177. package/dist/parsers/large-document/progressive-extraction.d.ts.map +1 -0
  178. package/dist/parsers/large-document/progressive-extraction.js +123 -0
  179. package/dist/parsers/large-document/progressive-pdf.d.ts +20 -0
  180. package/dist/parsers/large-document/progressive-pdf.d.ts.map +1 -0
  181. package/dist/parsers/large-document/progressive-pdf.js +145 -0
  182. package/dist/parsers/large-document/synthetic-source.d.ts +9 -0
  183. package/dist/parsers/large-document/synthetic-source.d.ts.map +1 -0
  184. package/dist/parsers/large-document/synthetic-source.js +101 -0
  185. package/dist/parsers/large-document/window-builder.d.ts +24 -0
  186. package/dist/parsers/large-document/window-builder.d.ts.map +1 -0
  187. package/dist/parsers/large-document/window-builder.js +75 -0
  188. package/dist/parsers/ocr/index.d.ts +4 -0
  189. package/dist/parsers/ocr/index.d.ts.map +1 -0
  190. package/dist/parsers/ocr/index.js +4 -0
  191. package/dist/parsers/ocr/null-ocr-adapter.d.ts +3 -0
  192. package/dist/parsers/ocr/null-ocr-adapter.d.ts.map +1 -0
  193. package/dist/parsers/ocr/null-ocr-adapter.js +14 -0
  194. package/dist/parsers/ocr/ocr-pipeline-parser.d.ts +8 -0
  195. package/dist/parsers/ocr/ocr-pipeline-parser.d.ts.map +1 -0
  196. package/dist/parsers/ocr/ocr-pipeline-parser.js +147 -0
  197. package/dist/parsers/ocr/types.d.ts +16 -0
  198. package/dist/parsers/ocr/types.d.ts.map +1 -0
  199. package/dist/parsers/ocr/types.js +4 -0
  200. package/dist/parsers/parser-test-fixtures.d.ts +28 -0
  201. package/dist/parsers/parser-test-fixtures.d.ts.map +1 -0
  202. package/dist/parsers/parser-test-fixtures.js +139 -0
  203. package/dist/parsers/pdf-parser.d.ts +43 -0
  204. package/dist/parsers/pdf-parser.d.ts.map +1 -0
  205. package/dist/parsers/pdf-parser.js +388 -0
  206. package/dist/parsers/registry.d.ts +8 -0
  207. package/dist/parsers/registry.d.ts.map +1 -0
  208. package/dist/parsers/registry.js +57 -0
  209. package/dist/parsers/text-parser.d.ts +3 -0
  210. package/dist/parsers/text-parser.d.ts.map +1 -0
  211. package/dist/parsers/text-parser.js +214 -0
  212. package/dist/parsers/types.d.ts +53 -0
  213. package/dist/parsers/types.d.ts.map +1 -0
  214. package/dist/parsers/types.js +21 -0
  215. package/dist/parsers/unsupported-parser.d.ts +4 -0
  216. package/dist/parsers/unsupported-parser.d.ts.map +1 -0
  217. package/dist/parsers/unsupported-parser.js +97 -0
  218. package/dist/parsers/xlsx-parser.d.ts +3 -0
  219. package/dist/parsers/xlsx-parser.d.ts.map +1 -0
  220. package/dist/parsers/xlsx-parser.js +425 -0
  221. package/dist/privacy/audit-emitter.d.ts +5 -0
  222. package/dist/privacy/audit-emitter.d.ts.map +1 -0
  223. package/dist/privacy/audit-emitter.js +93 -0
  224. package/dist/privacy/diagnostic-redactor.d.ts +2 -0
  225. package/dist/privacy/diagnostic-redactor.d.ts.map +1 -0
  226. package/dist/privacy/diagnostic-redactor.js +153 -0
  227. package/dist/privacy/index.d.ts +5 -0
  228. package/dist/privacy/index.d.ts.map +1 -0
  229. package/dist/privacy/index.js +6 -0
  230. package/dist/privacy/retention-applier.d.ts +5 -0
  231. package/dist/privacy/retention-applier.d.ts.map +1 -0
  232. package/dist/privacy/retention-applier.js +88 -0
  233. package/dist/privacy/types.d.ts +98 -0
  234. package/dist/privacy/types.d.ts.map +1 -0
  235. package/dist/privacy/types.js +12 -0
  236. package/dist/qualityIntelligence/capsuleCorpus.d.ts +27 -0
  237. package/dist/qualityIntelligence/capsuleCorpus.d.ts.map +1 -0
  238. package/dist/qualityIntelligence/capsuleCorpus.js +58 -0
  239. package/dist/qualityIntelligence/index.d.ts +3 -0
  240. package/dist/qualityIntelligence/index.d.ts.map +1 -0
  241. package/dist/qualityIntelligence/index.js +5 -0
  242. package/dist/qualityIntelligence/qiHandoff.d.ts +36 -0
  243. package/dist/qualityIntelligence/qiHandoff.d.ts.map +1 -0
  244. package/dist/qualityIntelligence/qiHandoff.js +82 -0
  245. package/dist/retrieval/answer-grounding.d.ts +9 -0
  246. package/dist/retrieval/answer-grounding.d.ts.map +1 -0
  247. package/dist/retrieval/answer-grounding.js +31 -0
  248. package/dist/retrieval/context-pack-assembler.d.ts +24 -0
  249. package/dist/retrieval/context-pack-assembler.d.ts.map +1 -0
  250. package/dist/retrieval/context-pack-assembler.js +50 -0
  251. package/dist/retrieval/index.d.ts +6 -0
  252. package/dist/retrieval/index.d.ts.map +1 -0
  253. package/dist/retrieval/index.js +9 -0
  254. package/dist/retrieval/retrieval-runner.d.ts +10 -0
  255. package/dist/retrieval/retrieval-runner.d.ts.map +1 -0
  256. package/dist/retrieval/retrieval-runner.js +163 -0
  257. package/dist/retrieval/scoped-vector-search.d.ts +24 -0
  258. package/dist/retrieval/scoped-vector-search.d.ts.map +1 -0
  259. package/dist/retrieval/scoped-vector-search.js +864 -0
  260. package/dist/retrieval/types.d.ts +28 -0
  261. package/dist/retrieval/types.d.ts.map +1 -0
  262. package/dist/retrieval/types.js +33 -0
  263. package/dist/section-path-hash.d.ts +3 -0
  264. package/dist/section-path-hash.d.ts.map +1 -0
  265. package/dist/section-path-hash.js +9 -0
  266. package/dist/source-lifecycle.d.ts +14 -0
  267. package/dist/source-lifecycle.d.ts.map +1 -0
  268. package/dist/source-lifecycle.js +155 -0
  269. package/dist/source-routing-validation.d.ts +11 -0
  270. package/dist/source-routing-validation.d.ts.map +1 -0
  271. package/dist/source-routing-validation.js +140 -0
  272. package/dist/store-content-cipher.d.ts +11 -0
  273. package/dist/store-content-cipher.d.ts.map +1 -0
  274. package/dist/store-content-cipher.js +67 -0
  275. package/dist/store-content-encryption.d.ts +12 -0
  276. package/dist/store-content-encryption.d.ts.map +1 -0
  277. package/dist/store-content-encryption.js +275 -0
  278. package/dist/store-paths.d.ts +6 -0
  279. package/dist/store-paths.d.ts.map +1 -0
  280. package/dist/store-paths.js +61 -0
  281. package/dist/store.d.ts +30 -0
  282. package/dist/store.d.ts.map +1 -0
  283. package/dist/store.js +219 -0
  284. package/dist/testing.d.ts +47 -0
  285. package/dist/testing.d.ts.map +1 -0
  286. package/dist/testing.js +170 -0
  287. package/dist/version.d.ts +2 -0
  288. package/dist/version.d.ts.map +1 -0
  289. package/dist/version.js +4 -0
  290. package/package.json +43 -0
@@ -0,0 +1,858 @@
1
+ // Hand-crafted synthetic fixtures for the retrieval evaluation harness
2
+ // (Epic #189, Issue #268). Each fixture is a `const` typed by `RetrievalEvalFixture` so a
3
+ // caller can iterate them or pick one by id.
4
+ //
5
+ // Design constraints:
6
+ // - Topic salt: every chunk and matching query carry a topic marker. The scripted
7
+ // embedding adapter routes vectors toward the marked topic so the ground-truth chunk
8
+ // for a query becomes deterministically top-ranked.
9
+ // - Fixture diversity: the set covers direct lookup, capsule-set retrieval, no-evidence,
10
+ // wrong-scope, stale-index, context-budget pressure, structured citations, and
11
+ // multi-page citations without requiring customer data or network access.
12
+ export const EVAL_EMBEDDING_IDENTITY = {
13
+ provider: "openai",
14
+ modelId: "text-embedding-eval",
15
+ vectorDimensions: 16,
16
+ vectorMetric: "cosine",
17
+ };
18
+ export const STALE_QUERY_EMBEDDING_IDENTITY = {
19
+ provider: "openai",
20
+ modelId: "text-embedding-eval-v2",
21
+ vectorDimensions: 24,
22
+ vectorMetric: "cosine",
23
+ };
24
+ export const EVAL_TOPIC_BOOST = 1.0;
25
+ function chunkId(value) {
26
+ return value;
27
+ }
28
+ function documentId(value) {
29
+ return value;
30
+ }
31
+ function sourceId(value) {
32
+ return value;
33
+ }
34
+ function capsuleId(value) {
35
+ return value;
36
+ }
37
+ export const singleTopicFixture = {
38
+ id: "single-topic",
39
+ description: "One capsule with three chunks; query targets the two alpha chunks.",
40
+ capsules: [
41
+ {
42
+ id: capsuleId("cap-single"),
43
+ displayName: "Single Topic",
44
+ answerGroundingPolicy: "best-effort",
45
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
46
+ sources: [
47
+ {
48
+ id: sourceId("src-single"),
49
+ documents: [
50
+ {
51
+ id: documentId("doc-single"),
52
+ safeDisplayName: "single.txt",
53
+ parsedUnits: [
54
+ {
55
+ id: "page-1",
56
+ unit: {
57
+ kind: "page",
58
+ pageNumber: 1,
59
+ pageLabel: "1",
60
+ characterStart: 0,
61
+ characterEnd: 200,
62
+ },
63
+ },
64
+ ],
65
+ chunks: [
66
+ { id: chunkId("c-alpha-1"), text: "alpha chunk one body", topic: "alpha" },
67
+ { id: chunkId("c-alpha-2"), text: "alpha chunk two body", topic: "alpha" },
68
+ { id: chunkId("c-noise"), text: "noise chunk body", topic: "noise" },
69
+ ],
70
+ },
71
+ ],
72
+ },
73
+ ],
74
+ },
75
+ ],
76
+ queries: [
77
+ {
78
+ id: "q-alpha",
79
+ text: "what does alpha say?",
80
+ topic: "alpha",
81
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-single") },
82
+ expectedChunkIds: [chunkId("c-alpha-1"), chunkId("c-alpha-2")],
83
+ topK: 2,
84
+ },
85
+ ],
86
+ };
87
+ export const multiCapsuleFixture = {
88
+ id: "multi-capsule",
89
+ description: "Two capsules in one set; query pulls one chunk from each.",
90
+ capsules: [
91
+ {
92
+ id: capsuleId("cap-multi-a"),
93
+ displayName: "Multi A",
94
+ answerGroundingPolicy: "best-effort",
95
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
96
+ sources: [
97
+ {
98
+ id: sourceId("src-multi-a"),
99
+ documents: [
100
+ {
101
+ id: documentId("doc-multi-a"),
102
+ safeDisplayName: "a.txt",
103
+ parsedUnits: [
104
+ {
105
+ id: "page-a",
106
+ unit: {
107
+ kind: "page",
108
+ pageNumber: 1,
109
+ pageLabel: "1",
110
+ characterStart: 0,
111
+ characterEnd: 100,
112
+ },
113
+ },
114
+ ],
115
+ chunks: [
116
+ { id: chunkId("c-multi-a-shared"), text: "shared body a", topic: "shared" },
117
+ { id: chunkId("c-multi-a-private"), text: "private a", topic: "private-a" },
118
+ ],
119
+ },
120
+ ],
121
+ },
122
+ ],
123
+ },
124
+ {
125
+ id: capsuleId("cap-multi-b"),
126
+ displayName: "Multi B",
127
+ answerGroundingPolicy: "best-effort",
128
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
129
+ sources: [
130
+ {
131
+ id: sourceId("src-multi-b"),
132
+ documents: [
133
+ {
134
+ id: documentId("doc-multi-b"),
135
+ safeDisplayName: "b.txt",
136
+ parsedUnits: [
137
+ {
138
+ id: "page-b",
139
+ unit: {
140
+ kind: "page",
141
+ pageNumber: 2,
142
+ pageLabel: "2",
143
+ characterStart: 0,
144
+ characterEnd: 100,
145
+ },
146
+ },
147
+ ],
148
+ chunks: [
149
+ { id: chunkId("c-multi-b-shared"), text: "shared body b", topic: "shared" },
150
+ { id: chunkId("c-multi-b-private"), text: "private b", topic: "private-b" },
151
+ ],
152
+ },
153
+ ],
154
+ },
155
+ ],
156
+ },
157
+ ],
158
+ queries: [
159
+ {
160
+ id: "q-shared",
161
+ text: "explain the shared topic",
162
+ topic: "shared",
163
+ scope: {
164
+ kind: "capsule-set",
165
+ capsuleSetId: "set-multi",
166
+ capsuleIds: [capsuleId("cap-multi-a"), capsuleId("cap-multi-b")],
167
+ },
168
+ expectedChunkIds: [chunkId("c-multi-a-shared"), chunkId("c-multi-b-shared")],
169
+ topK: 2,
170
+ },
171
+ ],
172
+ };
173
+ export const noEvidenceFixture = {
174
+ id: "no-evidence",
175
+ description: "Capsule about alpha; query about beta returns no evidence.",
176
+ capsules: [
177
+ {
178
+ id: capsuleId("cap-no-evidence"),
179
+ displayName: "No Evidence",
180
+ answerGroundingPolicy: "best-effort",
181
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
182
+ sources: [
183
+ {
184
+ id: sourceId("src-no-evidence"),
185
+ documents: [
186
+ {
187
+ id: documentId("doc-no-evidence"),
188
+ safeDisplayName: "alpha-only.txt",
189
+ parsedUnits: [
190
+ {
191
+ id: "page-1",
192
+ unit: {
193
+ kind: "page",
194
+ pageNumber: 1,
195
+ pageLabel: "1",
196
+ characterStart: 0,
197
+ characterEnd: 100,
198
+ },
199
+ },
200
+ ],
201
+ chunks: [
202
+ { id: chunkId("c-alpha-only-1"), text: "alpha body", topic: "alpha" },
203
+ { id: chunkId("c-alpha-only-2"), text: "alpha body two", topic: "alpha" },
204
+ ],
205
+ },
206
+ ],
207
+ },
208
+ ],
209
+ },
210
+ ],
211
+ queries: [
212
+ {
213
+ id: "q-beta",
214
+ text: "tell me about beta",
215
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-no-evidence") },
216
+ expectedNoEvidence: true,
217
+ expectedNoEvidenceReason: "below-min-score",
218
+ },
219
+ ],
220
+ };
221
+ export const ambiguousQueryFixture = {
222
+ id: "ambiguous-query",
223
+ description: "Two chunks are equally acceptable; the query expects both in topK=2.",
224
+ capsules: [
225
+ {
226
+ id: capsuleId("cap-ambiguous"),
227
+ displayName: "Ambiguous",
228
+ answerGroundingPolicy: "best-effort",
229
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
230
+ sources: [
231
+ {
232
+ id: sourceId("src-ambiguous"),
233
+ documents: [
234
+ {
235
+ id: documentId("doc-ambiguous"),
236
+ safeDisplayName: "ambiguous.txt",
237
+ parsedUnits: [
238
+ {
239
+ id: "page-1",
240
+ unit: {
241
+ kind: "page",
242
+ pageNumber: 3,
243
+ pageLabel: "3",
244
+ characterStart: 0,
245
+ characterEnd: 200,
246
+ },
247
+ },
248
+ ],
249
+ chunks: [
250
+ { id: chunkId("c-amb-1"), text: "answer one", topic: "delta" },
251
+ { id: chunkId("c-amb-2"), text: "answer two", topic: "delta" },
252
+ { id: chunkId("c-amb-noise"), text: "noise", topic: "noise" },
253
+ ],
254
+ },
255
+ ],
256
+ },
257
+ ],
258
+ },
259
+ ],
260
+ queries: [
261
+ {
262
+ id: "q-ambiguous",
263
+ text: "summarize delta",
264
+ topic: "delta",
265
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-ambiguous") },
266
+ expectedChunkIds: [chunkId("c-amb-1"), chunkId("c-amb-2")],
267
+ topK: 2,
268
+ },
269
+ ],
270
+ };
271
+ export const sourceIsolationFixture = {
272
+ id: "source-isolation",
273
+ description: "Two capsules share a topic; scope to one capsule must not leak the other.",
274
+ capsules: [
275
+ {
276
+ id: capsuleId("cap-iso-a"),
277
+ displayName: "Isolation A",
278
+ answerGroundingPolicy: "best-effort",
279
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
280
+ sources: [
281
+ {
282
+ id: sourceId("src-iso-a"),
283
+ documents: [
284
+ {
285
+ id: documentId("doc-iso-a"),
286
+ safeDisplayName: "iso-a.txt",
287
+ parsedUnits: [
288
+ {
289
+ id: "page-a",
290
+ unit: {
291
+ kind: "page",
292
+ pageNumber: 1,
293
+ pageLabel: "1",
294
+ characterStart: 0,
295
+ characterEnd: 100,
296
+ },
297
+ },
298
+ ],
299
+ chunks: [{ id: chunkId("c-iso-a"), text: "scope body a", topic: "scope" }],
300
+ },
301
+ ],
302
+ },
303
+ ],
304
+ },
305
+ {
306
+ id: capsuleId("cap-iso-b"),
307
+ displayName: "Isolation B",
308
+ answerGroundingPolicy: "best-effort",
309
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
310
+ sources: [
311
+ {
312
+ id: sourceId("src-iso-b"),
313
+ documents: [
314
+ {
315
+ id: documentId("doc-iso-b"),
316
+ safeDisplayName: "iso-b.txt",
317
+ parsedUnits: [
318
+ {
319
+ id: "page-b",
320
+ unit: {
321
+ kind: "page",
322
+ pageNumber: 2,
323
+ pageLabel: "2",
324
+ characterStart: 0,
325
+ characterEnd: 100,
326
+ },
327
+ },
328
+ ],
329
+ chunks: [{ id: chunkId("c-iso-b"), text: "scope body b", topic: "scope" }],
330
+ },
331
+ ],
332
+ },
333
+ ],
334
+ },
335
+ ],
336
+ queries: [
337
+ {
338
+ id: "q-isolation",
339
+ text: "retrieve the scoped body",
340
+ topic: "scope",
341
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-iso-a") },
342
+ expectedChunkIds: [chunkId("c-iso-a")],
343
+ topK: 1,
344
+ },
345
+ ],
346
+ };
347
+ export const wrongScopeFixture = {
348
+ id: "wrong-scope",
349
+ description: "Query targets a topic that only exists in an unselected capsule.",
350
+ capsules: [
351
+ {
352
+ id: capsuleId("cap-wrong-a"),
353
+ displayName: "Wrong Scope A",
354
+ answerGroundingPolicy: "best-effort",
355
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
356
+ sources: [
357
+ {
358
+ id: sourceId("src-wrong-a"),
359
+ documents: [
360
+ {
361
+ id: documentId("doc-wrong-a"),
362
+ safeDisplayName: "a.txt",
363
+ parsedUnits: [
364
+ {
365
+ id: "page-a",
366
+ unit: {
367
+ kind: "page",
368
+ pageNumber: 1,
369
+ pageLabel: "1",
370
+ characterStart: 0,
371
+ characterEnd: 100,
372
+ },
373
+ },
374
+ ],
375
+ chunks: [{ id: chunkId("c-wrong-a"), text: "alpha only", topic: "alpha" }],
376
+ },
377
+ ],
378
+ },
379
+ ],
380
+ },
381
+ {
382
+ id: capsuleId("cap-wrong-b"),
383
+ displayName: "Wrong Scope B",
384
+ answerGroundingPolicy: "best-effort",
385
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
386
+ sources: [
387
+ {
388
+ id: sourceId("src-wrong-b"),
389
+ documents: [
390
+ {
391
+ id: documentId("doc-wrong-b"),
392
+ safeDisplayName: "b.txt",
393
+ parsedUnits: [
394
+ {
395
+ id: "page-b",
396
+ unit: {
397
+ kind: "page",
398
+ pageNumber: 1,
399
+ pageLabel: "1",
400
+ characterStart: 0,
401
+ characterEnd: 100,
402
+ },
403
+ },
404
+ ],
405
+ chunks: [{ id: chunkId("c-wrong-b"), text: "beta only", topic: "beta" }],
406
+ },
407
+ ],
408
+ },
409
+ ],
410
+ },
411
+ ],
412
+ queries: [
413
+ {
414
+ id: "q-wrong-scope",
415
+ text: "retrieve beta",
416
+ topic: "beta",
417
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-wrong-a") },
418
+ expectedNoEvidence: true,
419
+ expectedNoEvidenceReason: "below-min-score",
420
+ },
421
+ ],
422
+ };
423
+ export const multiPageFixture = {
424
+ id: "multi-page",
425
+ description: "One document spans two page units; query must cite page two.",
426
+ capsules: [
427
+ {
428
+ id: capsuleId("cap-multi-page"),
429
+ displayName: "Multi Page",
430
+ answerGroundingPolicy: "best-effort",
431
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
432
+ sources: [
433
+ {
434
+ id: sourceId("src-multi-page"),
435
+ documents: [
436
+ {
437
+ id: documentId("doc-multi-page"),
438
+ safeDisplayName: "manual.txt",
439
+ parsedUnits: [
440
+ {
441
+ id: "page-1",
442
+ unit: {
443
+ kind: "page",
444
+ pageNumber: 1,
445
+ pageLabel: "1",
446
+ characterStart: 0,
447
+ characterEnd: 120,
448
+ },
449
+ },
450
+ {
451
+ id: "page-2",
452
+ unit: {
453
+ kind: "page",
454
+ pageNumber: 2,
455
+ pageLabel: "2",
456
+ characterStart: 121,
457
+ characterEnd: 240,
458
+ },
459
+ },
460
+ ],
461
+ chunks: [
462
+ {
463
+ id: chunkId("c-page-1"),
464
+ text: "page one body",
465
+ topic: "intro",
466
+ parsedUnitId: "page-1",
467
+ },
468
+ {
469
+ id: chunkId("c-page-2"),
470
+ text: "page two body",
471
+ topic: "closing",
472
+ parsedUnitId: "page-2",
473
+ },
474
+ ],
475
+ },
476
+ ],
477
+ },
478
+ ],
479
+ },
480
+ ],
481
+ queries: [
482
+ {
483
+ id: "q-page-two",
484
+ text: "what is on the closing page?",
485
+ topic: "closing",
486
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-multi-page") },
487
+ expectedChunkIds: [chunkId("c-page-2")],
488
+ topK: 1,
489
+ },
490
+ ],
491
+ };
492
+ export const structuredFileFixture = {
493
+ id: "structured-files",
494
+ description: "Structured and semi-structured documents preserve unit-specific citations.",
495
+ capsules: [
496
+ {
497
+ id: capsuleId("cap-structured"),
498
+ displayName: "Structured",
499
+ answerGroundingPolicy: "best-effort",
500
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
501
+ sources: [
502
+ {
503
+ id: sourceId("src-structured"),
504
+ documents: [
505
+ {
506
+ id: documentId("doc-json"),
507
+ safeDisplayName: "policy.json",
508
+ mediaType: "application/json",
509
+ parserId: "json",
510
+ parsedUnits: [
511
+ {
512
+ id: "json-root",
513
+ unit: {
514
+ kind: "json-path",
515
+ jsonPointer: "/policy/title",
516
+ characterStart: 0,
517
+ characterEnd: 40,
518
+ },
519
+ },
520
+ ],
521
+ chunks: [
522
+ {
523
+ id: chunkId("c-json"),
524
+ text: "policy title block",
525
+ topic: "json-topic",
526
+ parsedUnitId: "json-root",
527
+ },
528
+ ],
529
+ },
530
+ {
531
+ id: documentId("doc-csv"),
532
+ safeDisplayName: "scores.csv",
533
+ mediaType: "text/csv",
534
+ parserId: "csv",
535
+ parsedUnits: [
536
+ {
537
+ id: "csv-row-2",
538
+ unit: {
539
+ kind: "csv-row",
540
+ tableName: "scores",
541
+ rowIndex: 2,
542
+ characterStart: 0,
543
+ characterEnd: 30,
544
+ },
545
+ },
546
+ ],
547
+ chunks: [
548
+ {
549
+ id: chunkId("c-csv"),
550
+ text: "csv row body",
551
+ topic: "csv-topic",
552
+ parsedUnitId: "csv-row-2",
553
+ },
554
+ ],
555
+ },
556
+ {
557
+ id: documentId("doc-xlsx"),
558
+ safeDisplayName: "controls.xlsx",
559
+ mediaType: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
560
+ parserId: "xlsx",
561
+ parsedUnits: [
562
+ {
563
+ id: "xlsx-row-17",
564
+ unit: {
565
+ kind: "csv-row",
566
+ tableName: "Controls",
567
+ rowIndex: 16,
568
+ characterStart: 0,
569
+ characterEnd: 64,
570
+ },
571
+ },
572
+ ],
573
+ chunks: [
574
+ {
575
+ id: chunkId("c-xlsx"),
576
+ text: "xlsx control row body",
577
+ topic: "xlsx-topic",
578
+ parsedUnitId: "xlsx-row-17",
579
+ },
580
+ ],
581
+ },
582
+ {
583
+ id: documentId("doc-html"),
584
+ safeDisplayName: "guide.html",
585
+ mediaType: "text/html",
586
+ parserId: "html",
587
+ parsedUnits: [
588
+ {
589
+ id: "html-block-1",
590
+ unit: {
591
+ kind: "html-block",
592
+ headingPath: ["Guide", "Overview"],
593
+ characterStart: 0,
594
+ characterEnd: 50,
595
+ },
596
+ },
597
+ ],
598
+ chunks: [
599
+ {
600
+ id: chunkId("c-html"),
601
+ text: "html body",
602
+ topic: "html-topic",
603
+ parsedUnitId: "html-block-1",
604
+ },
605
+ ],
606
+ },
607
+ {
608
+ id: documentId("doc-section"),
609
+ safeDisplayName: "chapter.md",
610
+ parserId: "markdown",
611
+ parsedUnits: [
612
+ {
613
+ id: "section-1",
614
+ unit: {
615
+ kind: "section",
616
+ sectionPath: ["Chapter 1", "Controls"],
617
+ characterStart: 0,
618
+ characterEnd: 60,
619
+ },
620
+ },
621
+ ],
622
+ chunks: [
623
+ {
624
+ id: chunkId("c-section"),
625
+ text: "section body",
626
+ topic: "section-topic",
627
+ parsedUnitId: "section-1",
628
+ },
629
+ ],
630
+ },
631
+ ],
632
+ },
633
+ ],
634
+ },
635
+ ],
636
+ queries: [
637
+ {
638
+ id: "q-json",
639
+ text: "find the json policy title",
640
+ topic: "json-topic",
641
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-structured") },
642
+ expectedChunkIds: [chunkId("c-json")],
643
+ topK: 1,
644
+ },
645
+ {
646
+ id: "q-csv",
647
+ text: "find the csv row",
648
+ topic: "csv-topic",
649
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-structured") },
650
+ expectedChunkIds: [chunkId("c-csv")],
651
+ topK: 1,
652
+ },
653
+ {
654
+ id: "q-section",
655
+ text: "find the markdown section",
656
+ topic: "section-topic",
657
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-structured") },
658
+ expectedChunkIds: [chunkId("c-section")],
659
+ topK: 1,
660
+ },
661
+ {
662
+ id: "q-xlsx",
663
+ text: "find the xlsx control row",
664
+ topic: "xlsx-topic",
665
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-structured") },
666
+ expectedChunkIds: [chunkId("c-xlsx")],
667
+ topK: 1,
668
+ },
669
+ ],
670
+ };
671
+ export const contextBudgetFixture = {
672
+ id: "context-budget",
673
+ description: "Returned chunks land exactly on the configured context-token budget.",
674
+ capsules: [
675
+ {
676
+ id: capsuleId("cap-budget"),
677
+ displayName: "Context Budget",
678
+ answerGroundingPolicy: "best-effort",
679
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
680
+ sources: [
681
+ {
682
+ id: sourceId("src-budget"),
683
+ documents: [
684
+ {
685
+ id: documentId("doc-budget"),
686
+ safeDisplayName: "budget.txt",
687
+ parsedUnits: [
688
+ {
689
+ id: "page-1",
690
+ unit: {
691
+ kind: "page",
692
+ pageNumber: 1,
693
+ pageLabel: "1",
694
+ characterStart: 0,
695
+ characterEnd: 120,
696
+ },
697
+ },
698
+ ],
699
+ chunks: [
700
+ { id: chunkId("c-budget-1"), text: "budgetchunk1", topic: "budget" },
701
+ { id: chunkId("c-budget-2"), text: "budgetchunk2", topic: "budget" },
702
+ ],
703
+ },
704
+ ],
705
+ },
706
+ ],
707
+ },
708
+ ],
709
+ queries: [
710
+ {
711
+ id: "q-budget",
712
+ text: "return both budget chunks",
713
+ topic: "budget",
714
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-budget") },
715
+ expectedChunkIds: [chunkId("c-budget-1"), chunkId("c-budget-2")],
716
+ topK: 2,
717
+ contextBudgetTokens: "budgetchunk1".length + "budgetchunk2".length,
718
+ },
719
+ ],
720
+ };
721
+ export const staleIndexFixture = {
722
+ id: "stale-index",
723
+ description: "Vectors were seeded under the pinned identity but the query adapter moved.",
724
+ capsules: [
725
+ {
726
+ id: capsuleId("cap-stale"),
727
+ displayName: "Stale Index",
728
+ answerGroundingPolicy: "best-effort",
729
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
730
+ sources: [
731
+ {
732
+ id: sourceId("src-stale"),
733
+ documents: [
734
+ {
735
+ id: documentId("doc-stale"),
736
+ safeDisplayName: "stale.txt",
737
+ parsedUnits: [
738
+ {
739
+ id: "page-1",
740
+ unit: {
741
+ kind: "page",
742
+ pageNumber: 1,
743
+ pageLabel: "1",
744
+ characterStart: 0,
745
+ characterEnd: 100,
746
+ },
747
+ },
748
+ ],
749
+ chunks: [{ id: chunkId("c-stale"), text: "stale body", topic: "stale" }],
750
+ },
751
+ ],
752
+ },
753
+ ],
754
+ },
755
+ ],
756
+ queries: [
757
+ {
758
+ id: "q-stale",
759
+ text: "retrieve stale body",
760
+ topic: "stale",
761
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-stale") },
762
+ expectedNoEvidence: true,
763
+ expectedNoEvidenceReason: "incompatible-embedding-identity",
764
+ queryEmbeddingIdentity: STALE_QUERY_EMBEDDING_IDENTITY,
765
+ },
766
+ ],
767
+ };
768
+ export const broadQueryDiversityFixture = {
769
+ id: "broad-query-diversity",
770
+ description: "Broad query should prefer supporting evidence across documents over duplicates.",
771
+ capsules: [
772
+ {
773
+ id: capsuleId("cap-diversity"),
774
+ displayName: "Diversity",
775
+ answerGroundingPolicy: "best-effort",
776
+ embeddingModelIdentity: EVAL_EMBEDDING_IDENTITY,
777
+ sources: [
778
+ {
779
+ id: sourceId("src-diversity"),
780
+ documents: [
781
+ {
782
+ id: documentId("doc-div-a"),
783
+ safeDisplayName: "controls-a.md",
784
+ parsedUnits: [
785
+ {
786
+ id: "section-a",
787
+ unit: {
788
+ kind: "section",
789
+ sectionPath: ["Controls"],
790
+ characterStart: 0,
791
+ characterEnd: 120,
792
+ },
793
+ },
794
+ ],
795
+ chunks: [
796
+ {
797
+ id: chunkId("c-div-a1"),
798
+ text: "controls implementation evidence primary with rollout trace",
799
+ topic: "diverse",
800
+ },
801
+ {
802
+ id: chunkId("c-div-a2"),
803
+ text: "controls duplicate",
804
+ topic: "diverse",
805
+ },
806
+ ],
807
+ },
808
+ {
809
+ id: documentId("doc-div-b"),
810
+ safeDisplayName: "controls-b.md",
811
+ parsedUnits: [
812
+ {
813
+ id: "section-b",
814
+ unit: {
815
+ kind: "section",
816
+ sectionPath: ["Risk Monitoring"],
817
+ characterStart: 0,
818
+ characterEnd: 120,
819
+ },
820
+ },
821
+ ],
822
+ chunks: [
823
+ {
824
+ id: chunkId("c-div-b"),
825
+ text: "risk monitoring rollout evidence",
826
+ topic: "diverse",
827
+ },
828
+ ],
829
+ },
830
+ ],
831
+ },
832
+ ],
833
+ },
834
+ ],
835
+ queries: [
836
+ {
837
+ id: "q-diverse",
838
+ text: "Summarize controls implementation risk monitoring rollout evidence",
839
+ topic: "diverse",
840
+ scope: { kind: "capsule", capsuleId: capsuleId("cap-diversity") },
841
+ expectedChunkIds: [chunkId("c-div-a1"), chunkId("c-div-b")],
842
+ topK: 2,
843
+ },
844
+ ],
845
+ };
846
+ export const ALL_FIXTURES = [
847
+ singleTopicFixture,
848
+ multiCapsuleFixture,
849
+ noEvidenceFixture,
850
+ ambiguousQueryFixture,
851
+ sourceIsolationFixture,
852
+ wrongScopeFixture,
853
+ multiPageFixture,
854
+ structuredFileFixture,
855
+ contextBudgetFixture,
856
+ staleIndexFixture,
857
+ broadQueryDiversityFixture,
858
+ ];