@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1115 @@
1
+ {
2
+ "bench": "agent-coding",
3
+ "stack": "pentatonic-memory",
4
+ "n_tasks": 22,
5
+ "n_correct": 21,
6
+ "accuracy": 0.9545454545454546,
7
+ "mean_score": 0.9772727272727273,
8
+ "p50_search_ms": 135.67769450310152,
9
+ "p95_search_ms": 761.0693425420322,
10
+ "total_tokens_in": 0,
11
+ "total_tokens_out": 0,
12
+ "total_usd": 0.0,
13
+ "by_tag": {
14
+ "api-signature": {
15
+ "n": 3,
16
+ "mean_score": 1.0,
17
+ "accuracy": 1.0
18
+ },
19
+ "bug-fix-recall": {
20
+ "n": 3,
21
+ "mean_score": 1.0,
22
+ "accuracy": 1.0
23
+ },
24
+ "deprecation": {
25
+ "n": 3,
26
+ "mean_score": 1.0,
27
+ "accuracy": 1.0
28
+ },
29
+ "config-recall": {
30
+ "n": 3,
31
+ "mean_score": 1.0,
32
+ "accuracy": 1.0
33
+ },
34
+ "pr-rationale": {
35
+ "n": 2,
36
+ "mean_score": 1.0,
37
+ "accuracy": 1.0
38
+ },
39
+ "cross-file-refactor": {
40
+ "n": 2,
41
+ "mean_score": 1.0,
42
+ "accuracy": 1.0
43
+ },
44
+ "stack-trace-match": {
45
+ "n": 1,
46
+ "mean_score": 1.0,
47
+ "accuracy": 1.0
48
+ },
49
+ "temporal": {
50
+ "n": 2,
51
+ "mean_score": 1.0,
52
+ "accuracy": 1.0
53
+ },
54
+ "contradiction": {
55
+ "n": 1,
56
+ "mean_score": 1.0,
57
+ "accuracy": 1.0
58
+ },
59
+ "rubric": {
60
+ "n": 2,
61
+ "mean_score": 0.75,
62
+ "accuracy": 0.5
63
+ }
64
+ },
65
+ "extra": {
66
+ "ingest_ms": 11312.783566012513,
67
+ "grading": "substring",
68
+ "limit": 3,
69
+ "tokens": {
70
+ "corpus_tokens": 5164,
71
+ "query_tokens": 339,
72
+ "context_tokens": 10595,
73
+ "retrieval_tokens": 10934,
74
+ "naive_tokens": 113947,
75
+ "saved_tokens": 103013,
76
+ "reduction_pct": 0.9040431077606256,
77
+ "mean_retrieval_tokens_per_task": 497.0,
78
+ "tokenizer": "cl100k_base",
79
+ "per_task": {
80
+ "api-invoice-signature": {
81
+ "query": 18,
82
+ "context": 507,
83
+ "retrieval": 525,
84
+ "judge_in": 0,
85
+ "judge_out": 0,
86
+ "judge_latency_ms": 0.0
87
+ },
88
+ "api-search-shape": {
89
+ "query": 16,
90
+ "context": 510,
91
+ "retrieval": 526,
92
+ "judge_in": 0,
93
+ "judge_out": 0,
94
+ "judge_latency_ms": 0.0
95
+ },
96
+ "api-invoice-amount-type": {
97
+ "query": 16,
98
+ "context": 468,
99
+ "retrieval": 484,
100
+ "judge_in": 0,
101
+ "judge_out": 0,
102
+ "judge_latency_ms": 0.0
103
+ },
104
+ "bugfix-stripe-rounding": {
105
+ "query": 10,
106
+ "context": 531,
107
+ "retrieval": 541,
108
+ "judge_in": 0,
109
+ "judge_out": 0,
110
+ "judge_latency_ms": 0.0
111
+ },
112
+ "bugfix-webhook-dup": {
113
+ "query": 10,
114
+ "context": 457,
115
+ "retrieval": 467,
116
+ "judge_in": 0,
117
+ "judge_out": 0,
118
+ "judge_latency_ms": 0.0
119
+ },
120
+ "bugfix-rounding-direction": {
121
+ "query": 19,
122
+ "context": 531,
123
+ "retrieval": 550,
124
+ "judge_in": 0,
125
+ "judge_out": 0,
126
+ "judge_latency_ms": 0.0
127
+ },
128
+ "deprecation-v1": {
129
+ "query": 11,
130
+ "context": 439,
131
+ "retrieval": 450,
132
+ "judge_in": 0,
133
+ "judge_out": 0,
134
+ "judge_latency_ms": 0.0
135
+ },
136
+ "deprecation-hs256": {
137
+ "query": 11,
138
+ "context": 438,
139
+ "retrieval": 449,
140
+ "judge_in": 0,
141
+ "judge_out": 0,
142
+ "judge_latency_ms": 0.0
143
+ },
144
+ "deprecation-search-tuple": {
145
+ "query": 13,
146
+ "context": 510,
147
+ "retrieval": 523,
148
+ "judge_in": 0,
149
+ "judge_out": 0,
150
+ "judge_latency_ms": 0.0
151
+ },
152
+ "config-retry-policy-base": {
153
+ "query": 15,
154
+ "context": 477,
155
+ "retrieval": 492,
156
+ "judge_in": 0,
157
+ "judge_out": 0,
158
+ "judge_latency_ms": 0.0
159
+ },
160
+ "config-session-ttl": {
161
+ "query": 14,
162
+ "context": 509,
163
+ "retrieval": 523,
164
+ "judge_in": 0,
165
+ "judge_out": 0,
166
+ "judge_latency_ms": 0.0
167
+ },
168
+ "config-search-hybrid-weight": {
169
+ "query": 11,
170
+ "context": 597,
171
+ "retrieval": 608,
172
+ "judge_in": 0,
173
+ "judge_out": 0,
174
+ "judge_latency_ms": 0.0
175
+ },
176
+ "pr-rationale-retry": {
177
+ "query": 19,
178
+ "context": 541,
179
+ "retrieval": 560,
180
+ "judge_in": 0,
181
+ "judge_out": 0,
182
+ "judge_latency_ms": 0.0
183
+ },
184
+ "pr-rationale-jwt-cleanup": {
185
+ "query": 13,
186
+ "context": 331,
187
+ "retrieval": 344,
188
+ "judge_in": 0,
189
+ "judge_out": 0,
190
+ "judge_latency_ms": 0.0
191
+ },
192
+ "cross-ref-invoicing": {
193
+ "query": 14,
194
+ "context": 505,
195
+ "retrieval": 519,
196
+ "judge_in": 0,
197
+ "judge_out": 0,
198
+ "judge_latency_ms": 0.0
199
+ },
200
+ "cross-ref-ratelimit": {
201
+ "query": 15,
202
+ "context": 411,
203
+ "retrieval": 426,
204
+ "judge_in": 0,
205
+ "judge_out": 0,
206
+ "judge_latency_ms": 0.0
207
+ },
208
+ "stack-trace-webhook": {
209
+ "query": 21,
210
+ "context": 516,
211
+ "retrieval": 537,
212
+ "judge_in": 0,
213
+ "judge_out": 0,
214
+ "judge_latency_ms": 0.0
215
+ },
216
+ "temporal-numpy": {
217
+ "query": 11,
218
+ "context": 489,
219
+ "retrieval": 500,
220
+ "judge_in": 0,
221
+ "judge_out": 0,
222
+ "judge_latency_ms": 0.0
223
+ },
224
+ "temporal-on-call": {
225
+ "query": 20,
226
+ "context": 391,
227
+ "retrieval": 411,
228
+ "judge_in": 0,
229
+ "judge_out": 0,
230
+ "judge_latency_ms": 0.0
231
+ },
232
+ "contradiction-algorithm": {
233
+ "query": 16,
234
+ "context": 508,
235
+ "retrieval": 524,
236
+ "judge_in": 0,
237
+ "judge_out": 0,
238
+ "judge_latency_ms": 0.0
239
+ },
240
+ "rubric-security-advice": {
241
+ "query": 25,
242
+ "context": 438,
243
+ "retrieval": 463,
244
+ "judge_in": 751,
245
+ "judge_out": 42,
246
+ "judge_latency_ms": 1048.672226011753
247
+ },
248
+ "rubric-incident-lesson": {
249
+ "query": 21,
250
+ "context": 491,
251
+ "retrieval": 512,
252
+ "judge_in": 784,
253
+ "judge_out": 38,
254
+ "judge_latency_ms": 1022.7693279981613
255
+ }
256
+ },
257
+ "judge_tokens_in": 1535,
258
+ "judge_tokens_out": 80,
259
+ "judge_calls": 2,
260
+ "judge_mean_latency_ms": 1035.7207770049572
261
+ },
262
+ "cost_usd": {
263
+ "assumed_completion_tokens_per_task": 100,
264
+ "rates": {
265
+ "input_per_1k": 0.0025,
266
+ "output_per_1k": 0.01,
267
+ "model": "gpt-4o"
268
+ },
269
+ "retrieval_usd_in": 0.027335,
270
+ "retrieval_usd_out": 0.022,
271
+ "retrieval_usd_total": 0.049335000000000004,
272
+ "naive_usd_total": 0.3068675,
273
+ "saved_usd": 0.2575325,
274
+ "saved_usd_per_1k_tasks": 11.706022727272726
275
+ }
276
+ },
277
+ "task_results": [
278
+ {
279
+ "task_id": "api-invoice-signature",
280
+ "query": "What's the request body shape for POST /v2/invoices on billing-svc?",
281
+ "answer": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing float amounts returns 400.\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature errors, see runbook-billing.\n---\n## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vector).\n- Max limit is 100 (hard cap in config).",
282
+ "hits": [
283
+ {
284
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
285
+ "score": 0.7391409873962402,
286
+ "source": "pentatonic-memory",
287
+ "doc_id": "api-billing-invoice"
288
+ },
289
+ {
290
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
291
+ "score": 0.49705252051353455,
292
+ "source": "pentatonic-memory",
293
+ "doc_id": "cross-ref-invoicing"
294
+ },
295
+ {
296
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
297
+ "score": 0.37377965450286865,
298
+ "source": "pentatonic-memory",
299
+ "doc_id": "api-search-query"
300
+ }
301
+ ],
302
+ "correct": true,
303
+ "score": 1.0,
304
+ "grading_notes": "all substrings matched",
305
+ "search_time_ms": 863.3305979892612,
306
+ "generation_time_ms": 0.0,
307
+ "tokens_in": 0,
308
+ "tokens_out": 0,
309
+ "retrieval_tokens": 525,
310
+ "query_tokens": 18,
311
+ "context_tokens": 507,
312
+ "judge_tokens_in": 0,
313
+ "judge_tokens_out": 0,
314
+ "judge_latency_ms": 0.0
315
+ },
316
+ {
317
+ "task_id": "api-search-shape",
318
+ "query": "What's the response shape of POST /v3/search on search-svc?",
319
+ "answer": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vector).\n- Max limit is 100 (hard cap in config).\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')\n---\n## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing float amounts returns 400.",
320
+ "hits": [
321
+ {
322
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
323
+ "score": 0.5933456420898438,
324
+ "source": "pentatonic-memory",
325
+ "doc_id": "api-search-query"
326
+ },
327
+ {
328
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
329
+ "score": 0.5079510807991028,
330
+ "source": "pentatonic-memory",
331
+ "doc_id": "readme-search"
332
+ },
333
+ {
334
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
335
+ "score": 0.3249790370464325,
336
+ "source": "pentatonic-memory",
337
+ "doc_id": "api-billing-invoice"
338
+ }
339
+ ],
340
+ "correct": true,
341
+ "score": 1.0,
342
+ "grading_notes": "all substrings matched",
343
+ "search_time_ms": 134.34049900388345,
344
+ "generation_time_ms": 0.0,
345
+ "tokens_in": 0,
346
+ "tokens_out": 0,
347
+ "retrieval_tokens": 526,
348
+ "query_tokens": 16,
349
+ "context_tokens": 510,
350
+ "judge_tokens_in": 0,
351
+ "judge_tokens_out": 0,
352
+ "judge_latency_ms": 0.0
353
+ },
354
+ {
355
+ "task_id": "api-invoice-amount-type",
356
+ "query": "When calling POST /v2/invoices, must amount be integer or float?",
357
+ "answer": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing float amounts returns 400.\n---\ncommit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.\n---\n# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (never overcharge)\n assert isinstance(amount_cents, int)\n assert amount_cents == 1299\n\ndef test_rounding_exact_cent():\n # 12.99 is exact, should be 1299\n assert StripeClient._to_cents(12.99) == 1299\n\ndef test_rounding_negative():\n # Refunds are signed; we keep the sign but still floor towards zero.\n assert StripeClient._to_cents(-12.99) == -1299\n```",
358
+ "hits": [
359
+ {
360
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
361
+ "score": 0.6483025550842285,
362
+ "source": "pentatonic-memory",
363
+ "doc_id": "api-billing-invoice"
364
+ },
365
+ {
366
+ "text": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
367
+ "score": 0.409417986869812,
368
+ "source": "pentatonic-memory",
369
+ "doc_id": "commit-a1b2c3"
370
+ },
371
+ {
372
+ "text": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (n",
373
+ "score": 0.49327391386032104,
374
+ "source": "pentatonic-memory",
375
+ "doc_id": "test-payments-rounding"
376
+ }
377
+ ],
378
+ "correct": true,
379
+ "score": 1.0,
380
+ "grading_notes": "all substrings matched",
381
+ "search_time_ms": 132.92760698823258,
382
+ "generation_time_ms": 0.0,
383
+ "tokens_in": 0,
384
+ "tokens_out": 0,
385
+ "retrieval_tokens": 484,
386
+ "query_tokens": 16,
387
+ "context_tokens": 468,
388
+ "judge_tokens_in": 0,
389
+ "judge_tokens_out": 0,
390
+ "judge_latency_ms": 0.0
391
+ },
392
+ {
393
+ "task_id": "bugfix-stripe-rounding",
394
+ "query": "Which commit fixed the stripe float-amount bug?",
395
+ "answer": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (never overcharge)\n assert isinstance(amount_cents, int)\n assert amount_cents == 1299\n\ndef test_rounding_exact_cent():\n # 12.99 is exact, should be 1299\n assert StripeClient._to_cents(12.99) == 1299\n\ndef test_rounding_negative():\n # Refunds are signed; we keep the sign but still floor towards zero.\n assert StripeClient._to_cents(-12.99) == -1299\n```\n---\ncommit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency,\n )\n )\n```",
396
+ "hits": [
397
+ {
398
+ "text": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (n",
399
+ "score": 0.5295395851135254,
400
+ "source": "pentatonic-memory",
401
+ "doc_id": "test-payments-rounding"
402
+ },
403
+ {
404
+ "text": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
405
+ "score": 0.6344451904296875,
406
+ "source": "pentatonic-memory",
407
+ "doc_id": "commit-a1b2c3"
408
+ },
409
+ {
410
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
411
+ "score": 0.4522598385810852,
412
+ "source": "pentatonic-memory",
413
+ "doc_id": "code-stripe-client"
414
+ }
415
+ ],
416
+ "correct": true,
417
+ "score": 1.0,
418
+ "grading_notes": "all substrings matched",
419
+ "search_time_ms": 131.76990099600516,
420
+ "generation_time_ms": 0.0,
421
+ "tokens_in": 0,
422
+ "tokens_out": 0,
423
+ "retrieval_tokens": 541,
424
+ "query_tokens": 10,
425
+ "context_tokens": 531,
426
+ "judge_tokens_in": 0,
427
+ "judge_tokens_out": 0,
428
+ "judge_latency_ms": 0.0
429
+ },
430
+ {
431
+ "task_id": "bugfix-webhook-dup",
432
+ "query": "How did we fix the duplicate Stripe webhook handling?",
433
+ "answer": "Issue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #76. Closed 2026-02-26.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.",
434
+ "hits": [
435
+ {
436
+ "text": "Issue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #7",
437
+ "score": 0.7316526770591736,
438
+ "source": "pentatonic-memory",
439
+ "doc_id": "issue-88-webhook-dup"
440
+ },
441
+ {
442
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
443
+ "score": 0.4363369345664978,
444
+ "source": "pentatonic-memory",
445
+ "doc_id": "incident-2026-03-02-webhook"
446
+ },
447
+ {
448
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
449
+ "score": 0.48752841353416443,
450
+ "source": "pentatonic-memory",
451
+ "doc_id": "runbook-billing"
452
+ }
453
+ ],
454
+ "correct": true,
455
+ "score": 1.0,
456
+ "grading_notes": "all substrings matched",
457
+ "search_time_ms": 129.61234597605653,
458
+ "generation_time_ms": 0.0,
459
+ "tokens_in": 0,
460
+ "tokens_out": 0,
461
+ "retrieval_tokens": 467,
462
+ "query_tokens": 10,
463
+ "context_tokens": 457,
464
+ "judge_tokens_in": 0,
465
+ "judge_tokens_out": 0,
466
+ "judge_latency_ms": 0.0
467
+ },
468
+ {
469
+ "task_id": "bugfix-rounding-direction",
470
+ "query": "When converting a float amount to cents, do we round up, down, or to nearest?",
471
+ "answer": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (never overcharge)\n assert isinstance(amount_cents, int)\n assert amount_cents == 1299\n\ndef test_rounding_exact_cent():\n # 12.99 is exact, should be 1299\n assert StripeClient._to_cents(12.99) == 1299\n\ndef test_rounding_negative():\n # Refunds are signed; we keep the sign but still floor towards zero.\n assert StripeClient._to_cents(-12.99) == -1299\n```\n---\ncommit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency,\n )\n )\n```",
472
+ "hits": [
473
+ {
474
+ "text": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (n",
475
+ "score": 0.624129056930542,
476
+ "source": "pentatonic-memory",
477
+ "doc_id": "test-payments-rounding"
478
+ },
479
+ {
480
+ "text": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
481
+ "score": 0.4027729034423828,
482
+ "source": "pentatonic-memory",
483
+ "doc_id": "commit-a1b2c3"
484
+ },
485
+ {
486
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
487
+ "score": 0.4700067639350891,
488
+ "source": "pentatonic-memory",
489
+ "doc_id": "code-stripe-client"
490
+ }
491
+ ],
492
+ "correct": true,
493
+ "score": 1.0,
494
+ "grading_notes": "all substrings matched",
495
+ "search_time_ms": 136.86645700363442,
496
+ "generation_time_ms": 0.0,
497
+ "tokens_in": 0,
498
+ "tokens_out": 0,
499
+ "retrieval_tokens": 550,
500
+ "query_tokens": 19,
501
+ "context_tokens": 531,
502
+ "judge_tokens_in": 0,
503
+ "judge_tokens_out": 0,
504
+ "judge_latency_ms": 0.0
505
+ },
506
+ {
507
+ "task_id": "deprecation-v1",
508
+ "query": "Are the /api/v1/ endpoints still supported?",
509
+ "answer": "commit d4e5f6a7\nAuthor: jordan@example.com\nDate: 2026-02-17T16:20:00Z\n\ndeprecate: remove legacy /api/v1/ endpoints\n\nAll /api/v1/* routes removed. /api/v2/ has been the default for 9 months.\nCallers still on v1 will get HTTP 410 Gone with a header pointing at the\nv2 equivalent. Full mapping in docs/V1_V2_MIGRATION.md. Sunset announced\n2025-11-01, enforced now.\n---\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all from an old mobile app build. told them to upgrade. no internal services affected.\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature errors, see runbook-billing.",
510
+ "hits": [
511
+ {
512
+ "text": "commit d4e5f6a7\nAuthor: jordan@example.com\nDate: 2026-02-17T16:20:00Z\n\ndeprecate: remove legacy /api/v1/ endpoints\n\nAll /api/v1/* routes removed. /api/v2/ has been the default for 9 months.\nCallers still on v1 will get HTTP 410 Gone with a header pointing at the\nv2 equivalent. Full mapping in docs/V1_V2_MIGRATION.md. Sunset announced\n2025-11-01, enforced now.",
513
+ "score": 0.5241410732269287,
514
+ "source": "pentatonic-memory",
515
+ "doc_id": "commit-d4e5f6"
516
+ },
517
+ {
518
+ "text": "[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all ",
519
+ "score": 0.5950751304626465,
520
+ "source": "pentatonic-memory",
521
+ "doc_id": "chat-deprecation"
522
+ },
523
+ {
524
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
525
+ "score": 0.2299472540616989,
526
+ "source": "pentatonic-memory",
527
+ "doc_id": "cross-ref-invoicing"
528
+ }
529
+ ],
530
+ "correct": true,
531
+ "score": 1.0,
532
+ "grading_notes": "all substrings matched",
533
+ "search_time_ms": 130.17492700600997,
534
+ "generation_time_ms": 0.0,
535
+ "tokens_in": 0,
536
+ "tokens_out": 0,
537
+ "retrieval_tokens": 450,
538
+ "query_tokens": 11,
539
+ "context_tokens": 439,
540
+ "judge_tokens_in": 0,
541
+ "judge_tokens_out": 0,
542
+ "judge_latency_ms": 0.0
543
+ },
544
+ {
545
+ "task_id": "deprecation-hs256",
546
+ "query": "Can auth-svc still accept HS256 JWTs?",
547
+ "answer": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\nPR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
548
+ "hits": [
549
+ {
550
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
551
+ "score": 0.5643326640129089,
552
+ "source": "pentatonic-memory",
553
+ "doc_id": "commit-b2c3d4"
554
+ },
555
+ {
556
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
557
+ "score": 0.5714235305786133,
558
+ "source": "pentatonic-memory",
559
+ "doc_id": "readme-auth"
560
+ },
561
+ {
562
+ "text": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
563
+ "score": 0.5556215643882751,
564
+ "source": "pentatonic-memory",
565
+ "doc_id": "pr-61-jwt-cleanup"
566
+ }
567
+ ],
568
+ "correct": true,
569
+ "score": 1.0,
570
+ "grading_notes": "all substrings matched",
571
+ "search_time_ms": 128.96630799514242,
572
+ "generation_time_ms": 0.0,
573
+ "tokens_in": 0,
574
+ "tokens_out": 0,
575
+ "retrieval_tokens": 449,
576
+ "query_tokens": 11,
577
+ "context_tokens": 438,
578
+ "judge_tokens_in": 0,
579
+ "judge_tokens_out": 0,
580
+ "judge_latency_ms": 0.0
581
+ },
582
+ {
583
+ "task_id": "deprecation-search-tuple",
584
+ "query": "What does SearchClient.search() return today \u2014 tuples or dicts?",
585
+ "answer": "commit c3d4e5f6\nAuthor: alex@example.com\nDate: 2026-02-14T09:00:00Z\n\nfeat(search): switch ANN backend from faiss to vector-db\n\nfaiss-cpu was hitting ~4s p99 on 2M vectors. Vector-DB HNSW on the same\ncorpus gives 40ms p99. Breaking: the SearchClient.search() return shape\nchanged \u2014 was list[tuple[int, float]], now list[dict] with keys {id, score,\npayload}. Migration guide in docs/SEARCH_MIGRATION.md.\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')\n---\n# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = None, hybrid_weight: float = 0.6) -> list[dict]:\n # Post-c3d4e5f6: returns list[dict] not list[tuple]\n r = self._http.post(f'{self.url}/v3/search', json={\n 'query': query, 'collection': collection, 'limit': limit,\n 'filter': filter or {}, 'hybrid_weight': hybrid_weight,\n })\n r.raise_for_status()\n return r.json()['results']\n```",
586
+ "hits": [
587
+ {
588
+ "text": "commit c3d4e5f6\nAuthor: alex@example.com\nDate: 2026-02-14T09:00:00Z\n\nfeat(search): switch ANN backend from faiss to vector-db\n\nfaiss-cpu was hitting ~4s p99 on 2M vectors. Vector-DB HNSW on the same\ncorpus gives 40ms p99. Breaking: the SearchClient.search() return shape\nchanged \u2014 was list[tuple[int, float]], now list[dict] with keys {id, score,\npayload}. Migration guide in docs/SEARCH_MIGRATION.md.",
589
+ "score": 0.4689617156982422,
590
+ "source": "pentatonic-memory",
591
+ "doc_id": "commit-c3d4e5"
592
+ },
593
+ {
594
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
595
+ "score": 0.5005181431770325,
596
+ "source": "pentatonic-memory",
597
+ "doc_id": "readme-search"
598
+ },
599
+ {
600
+ "text": "# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = Non",
601
+ "score": 0.6125697493553162,
602
+ "source": "pentatonic-memory",
603
+ "doc_id": "code-search-client"
604
+ }
605
+ ],
606
+ "correct": true,
607
+ "score": 1.0,
608
+ "grading_notes": "all substrings matched",
609
+ "search_time_ms": 129.7054660099093,
610
+ "generation_time_ms": 0.0,
611
+ "tokens_in": 0,
612
+ "tokens_out": 0,
613
+ "retrieval_tokens": 523,
614
+ "query_tokens": 13,
615
+ "context_tokens": 510,
616
+ "judge_tokens_in": 0,
617
+ "judge_tokens_out": 0,
618
+ "judge_latency_ms": 0.0
619
+ },
620
+ {
621
+ "task_id": "config-retry-policy-base",
622
+ "query": "What's the default stripe retry base delay in ms on billing-svc?",
623
+ "answer": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.\n---\nPR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\n# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session_ttl_minutes: 60\n refresh_ttl_days: 30\n\nsearch:\n url: http://search-svc.internal:8080\n api_key: ${SEARCH_API_KEY}\n default_collection: products\n default_hybrid_weight: 0.6",
624
+ "hits": [
625
+ {
626
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
627
+ "score": 0.5261839032173157,
628
+ "source": "pentatonic-memory",
629
+ "doc_id": "readme-billing"
630
+ },
631
+ {
632
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
633
+ "score": 0.6331774592399597,
634
+ "source": "pentatonic-memory",
635
+ "doc_id": "pr-47-retry"
636
+ },
637
+ {
638
+ "text": "# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session",
639
+ "score": 0.38896510004997253,
640
+ "source": "pentatonic-memory",
641
+ "doc_id": "config-sample"
642
+ }
643
+ ],
644
+ "correct": true,
645
+ "score": 1.0,
646
+ "grading_notes": "all substrings matched",
647
+ "search_time_ms": 151.04150801198557,
648
+ "generation_time_ms": 0.0,
649
+ "tokens_in": 0,
650
+ "tokens_out": 0,
651
+ "retrieval_tokens": 492,
652
+ "query_tokens": 15,
653
+ "context_tokens": 477,
654
+ "judge_tokens_in": 0,
655
+ "judge_tokens_out": 0,
656
+ "judge_latency_ms": 0.0
657
+ },
658
+ {
659
+ "task_id": "config-session-ttl",
660
+ "query": "What's the default SESSION_TTL_MINUTES on auth-svc?",
661
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session_ttl_minutes: 60\n refresh_ttl_days: 30\n\nsearch:\n url: http://search-svc.internal:8080\n api_key: ${SEARCH_API_KEY}\n default_collection: products\n default_hybrid_weight: 0.6\n---\ncommit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.",
662
+ "hits": [
663
+ {
664
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
665
+ "score": 0.4865476191043854,
666
+ "source": "pentatonic-memory",
667
+ "doc_id": "readme-auth"
668
+ },
669
+ {
670
+ "text": "# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session",
671
+ "score": 0.32987329363822937,
672
+ "source": "pentatonic-memory",
673
+ "doc_id": "config-sample"
674
+ },
675
+ {
676
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
677
+ "score": 0.3202645182609558,
678
+ "source": "pentatonic-memory",
679
+ "doc_id": "commit-b2c3d4"
680
+ }
681
+ ],
682
+ "correct": true,
683
+ "score": 1.0,
684
+ "grading_notes": "all substrings matched",
685
+ "search_time_ms": 139.195772993844,
686
+ "generation_time_ms": 0.0,
687
+ "tokens_in": 0,
688
+ "tokens_out": 0,
689
+ "retrieval_tokens": 523,
690
+ "query_tokens": 14,
691
+ "context_tokens": 509,
692
+ "judge_tokens_in": 0,
693
+ "judge_tokens_out": 0,
694
+ "judge_latency_ms": 0.0
695
+ },
696
+ {
697
+ "task_id": "config-search-hybrid-weight",
698
+ "query": "What's the default hybrid_weight for search-svc?",
699
+ "answer": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vector).\n- Max limit is 100 (hard cap in config).\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')\n---\n# monorepo ARCHITECTURE.md\n\n## Services\n- **gateway** \u2014 edge reverse proxy, terminates TLS, routes to services\n- **auth-svc** \u2014 JWT mint + verify, OIDC provider (RS256 since b2c3d4e5)\n- **billing-svc** \u2014 Stripe integration, invoicing, webhooks\n- **search-svc** \u2014 vector + BM25 hybrid search (vector-db backend since c3d4e5f6)\n- **order-svc** \u2014 order lifecycle\n- **email-svc** \u2014 transactional email (SendGrid)\n- **reporting-svc** \u2014 financial + ops dashboards\n\n## Inter-service RPC\nProtobuf over gRPC for hot paths (order/billing/auth). JSON over HTTP for\nplatform ops (admin panel, health checks). See ADR-007.\n\n## Language/runtime\n- Python 3.12 minimum (ADR-011)\n- Node 20 LTS for the gateway only (historical)\n- Go for the sealed-secrets operator\n\n## Data\n- Postgres 16 per-service\n- Redis for rate limits + ephemeral queues\n- Vector-DB for s",
700
+ "hits": [
701
+ {
702
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
703
+ "score": 0.496010422706604,
704
+ "source": "pentatonic-memory",
705
+ "doc_id": "api-search-query"
706
+ },
707
+ {
708
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
709
+ "score": 0.4002593159675598,
710
+ "source": "pentatonic-memory",
711
+ "doc_id": "readme-search"
712
+ },
713
+ {
714
+ "text": "# monorepo ARCHITECTURE.md\n\n## Services\n- **gateway** \u2014 edge reverse proxy, terminates TLS, routes to services\n- **auth-svc** \u2014 JWT mint + verify, OIDC provider (RS256 since b2c3d4e5)\n- **billing-svc** \u2014 Stripe integration, invoicing, webhooks\n- **search-svc** \u2014 vector + BM25 hybrid search (vector-db backend since c3d4e5f6)\n- **order-svc** \u2014 order lifecycle\n- **email-svc** \u2014 transactional email (Send",
715
+ "score": 0.2414265275001526,
716
+ "source": "pentatonic-memory",
717
+ "doc_id": "arch-overview"
718
+ }
719
+ ],
720
+ "correct": true,
721
+ "score": 1.0,
722
+ "grading_notes": "all substrings matched",
723
+ "search_time_ms": 136.14241901086643,
724
+ "generation_time_ms": 0.0,
725
+ "tokens_in": 0,
726
+ "tokens_out": 0,
727
+ "retrieval_tokens": 608,
728
+ "query_tokens": 11,
729
+ "context_tokens": 597,
730
+ "judge_tokens_in": 0,
731
+ "judge_tokens_out": 0,
732
+ "judge_latency_ms": 0.0
733
+ },
734
+ {
735
+ "task_id": "pr-rationale-retry",
736
+ "query": "Why did PR #47 choose base=200ms for stripe retries rather than 500ms?",
737
+ "answer": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retries might want to turn it off for tests.\npriya: yep, added. merging.\n\n[2026-02-19 Slack #payments-dev]\npriya: merged, pushed to staging 10m ago. Saw 3 successful retries on the live stripe-sandbox traffic in the last few mins. base_ms=200, backoff=*2, hit once at 400ms, once at 800ms, once at 1600ms. Behaviour as designed.\n---\nPR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency",
738
+ "hits": [
739
+ {
740
+ "text": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retrie",
741
+ "score": 0.756879448890686,
742
+ "source": "pentatonic-memory",
743
+ "doc_id": "chat-pr-47-review"
744
+ },
745
+ {
746
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
747
+ "score": 0.661232590675354,
748
+ "source": "pentatonic-memory",
749
+ "doc_id": "pr-47-retry"
750
+ },
751
+ {
752
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
753
+ "score": 0.4688180983066559,
754
+ "source": "pentatonic-memory",
755
+ "doc_id": "code-stripe-client"
756
+ }
757
+ ],
758
+ "correct": true,
759
+ "score": 1.0,
760
+ "grading_notes": "all substrings matched",
761
+ "search_time_ms": 150.86576298926957,
762
+ "generation_time_ms": 0.0,
763
+ "tokens_in": 0,
764
+ "tokens_out": 0,
765
+ "retrieval_tokens": 560,
766
+ "query_tokens": 19,
767
+ "context_tokens": 541,
768
+ "judge_tokens_in": 0,
769
+ "judge_tokens_out": 0,
770
+ "judge_latency_ms": 0.0
771
+ },
772
+ {
773
+ "task_id": "pr-rationale-jwt-cleanup",
774
+ "query": "What did PR #61 clean up and why was it safe?",
775
+ "answer": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.\n---\nPR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\nPR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
776
+ "hits": [
777
+ {
778
+ "text": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
779
+ "score": 0.3749690353870392,
780
+ "source": "pentatonic-memory",
781
+ "doc_id": "pr-61-jwt-cleanup"
782
+ },
783
+ {
784
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
785
+ "score": 0.28478312492370605,
786
+ "source": "pentatonic-memory",
787
+ "doc_id": "pr-47-retry"
788
+ },
789
+ {
790
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
791
+ "score": 0.3328581154346466,
792
+ "source": "pentatonic-memory",
793
+ "doc_id": "pr-52-search-migration"
794
+ }
795
+ ],
796
+ "correct": true,
797
+ "score": 1.0,
798
+ "grading_notes": "all substrings matched",
799
+ "search_time_ms": 131.8181079986971,
800
+ "generation_time_ms": 0.0,
801
+ "tokens_in": 0,
802
+ "tokens_out": 0,
803
+ "retrieval_tokens": 344,
804
+ "query_tokens": 13,
805
+ "context_tokens": 331,
806
+ "judge_tokens_in": 0,
807
+ "judge_tokens_out": 0,
808
+ "judge_latency_ms": 0.0
809
+ },
810
+ {
811
+ "task_id": "cross-ref-invoicing",
812
+ "query": "After Stripe posts invoice.payment_succeeded, which services consume the event?",
813
+ "answer": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature errors, see runbook-billing.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\nIssue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #76. Closed 2026-02-26.",
814
+ "hits": [
815
+ {
816
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
817
+ "score": 0.6640831232070923,
818
+ "source": "pentatonic-memory",
819
+ "doc_id": "cross-ref-invoicing"
820
+ },
821
+ {
822
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
823
+ "score": 0.37300166487693787,
824
+ "source": "pentatonic-memory",
825
+ "doc_id": "incident-2026-03-02-webhook"
826
+ },
827
+ {
828
+ "text": "Issue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #7",
829
+ "score": 0.38960394263267517,
830
+ "source": "pentatonic-memory",
831
+ "doc_id": "issue-88-webhook-dup"
832
+ }
833
+ ],
834
+ "correct": true,
835
+ "score": 1.0,
836
+ "grading_notes": "all substrings matched",
837
+ "search_time_ms": 135.2129699953366,
838
+ "generation_time_ms": 0.0,
839
+ "tokens_in": 0,
840
+ "tokens_out": 0,
841
+ "retrieval_tokens": 519,
842
+ "query_tokens": 14,
843
+ "context_tokens": 505,
844
+ "judge_tokens_in": 0,
845
+ "judge_tokens_out": 0,
846
+ "judge_latency_ms": 0.0
847
+ },
848
+ {
849
+ "task_id": "cross-ref-ratelimit",
850
+ "query": "Where does the shared rate-limit lib live and what's the method signature?",
851
+ "answer": "Refactor: extract rate-limit middleware into shared lib\nAuthor: alex\nStatus: merged 2026-03-18 (PR #108)\n\nBefore: every service rolled its own token-bucket rate limiter (billing-svc,\nauth-svc, search-svc, gateway all had near-identical code).\nAfter: `shared/rate_limit.py` with the canonical implementation. All 4\nservices now import from there.\nAPI: `RateLimiter(tokens_per_sec: int, burst: int).allow(key: str) -> bool`.\nBackend: Redis (shared across pods). Key format: `ratelimit:{service}:{key}`.\nTune: `RATELIMIT_TOKENS_PER_SEC` env var per service.\n---\nPR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\ncommit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.",
852
+ "hits": [
853
+ {
854
+ "text": "Refactor: extract rate-limit middleware into shared lib\nAuthor: alex\nStatus: merged 2026-03-18 (PR #108)\n\nBefore: every service rolled its own token-bucket rate limiter (billing-svc,\nauth-svc, search-svc, gateway all had near-identical code).\nAfter: `shared/rate_limit.py` with the canonical implementation. All 4\nservices now import from there.\nAPI: `RateLimiter(tokens_per_sec: int, burst: int).all",
855
+ "score": 0.6208145618438721,
856
+ "source": "pentatonic-memory",
857
+ "doc_id": "refactor-rate-limit"
858
+ },
859
+ {
860
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
861
+ "score": 0.39683374762535095,
862
+ "source": "pentatonic-memory",
863
+ "doc_id": "pr-47-retry"
864
+ },
865
+ {
866
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
867
+ "score": 0.1863126903772354,
868
+ "source": "pentatonic-memory",
869
+ "doc_id": "commit-b2c3d4"
870
+ }
871
+ ],
872
+ "correct": true,
873
+ "score": 1.0,
874
+ "grading_notes": "all substrings matched",
875
+ "search_time_ms": 135.21138599026017,
876
+ "generation_time_ms": 0.0,
877
+ "tokens_in": 0,
878
+ "tokens_out": 0,
879
+ "retrieval_tokens": 426,
880
+ "query_tokens": 15,
881
+ "context_tokens": 411,
882
+ "judge_tokens_in": 0,
883
+ "judge_tokens_out": 0,
884
+ "judge_latency_ms": 0.0
885
+ },
886
+ {
887
+ "task_id": "stack-trace-webhook",
888
+ "query": "If a SignatureVerificationError happens on billing-svc webhook, what's the most likely cause and fix?",
889
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\nStack trace from prod 2026-03-02 14:15 UTC:\n\nTraceback (most recent call last):\n File \"billing/webhooks.py\", line 42, in handle_webhook\n event = StripeClient.parse_event(payload, signature)\n File \"billing/stripe_client.py\", line 88, in parse_event\n return stripe.Webhook.construct_event(payload, sig, secret)\n File \"stripe/webhook.py\", line 56, in construct_event\n raise SignatureVerificationError(...)\nstripe.error.SignatureVerificationError: No signatures found matching expected signature\n\nCaused by: STRIPE_WEBHOOK_SECRET rotated in prod at 14:10 but pod restart\ndidn't pick up new value until 14:20. Fixed by kubectl rol",
890
+ "hits": [
891
+ {
892
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
893
+ "score": 0.7145698666572571,
894
+ "source": "pentatonic-memory",
895
+ "doc_id": "runbook-billing"
896
+ },
897
+ {
898
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
899
+ "score": 0.5628818869590759,
900
+ "source": "pentatonic-memory",
901
+ "doc_id": "incident-2026-03-02-webhook"
902
+ },
903
+ {
904
+ "text": "Stack trace from prod 2026-03-02 14:15 UTC:\n\nTraceback (most recent call last):\n File \"billing/webhooks.py\", line 42, in handle_webhook\n event = StripeClient.parse_event(payload, signature)\n File \"billing/stripe_client.py\", line 88, in parse_event\n return stripe.Webhook.construct_event(payload, sig, secret)\n File \"stripe/webhook.py\", line 56, in construct_event\n raise SignatureVerifica",
905
+ "score": 0.6209246516227722,
906
+ "source": "pentatonic-memory",
907
+ "doc_id": "stack-trace-nullpointer"
908
+ }
909
+ ],
910
+ "correct": true,
911
+ "score": 1.0,
912
+ "grading_notes": "all substrings matched",
913
+ "search_time_ms": 157.36434800783172,
914
+ "generation_time_ms": 0.0,
915
+ "tokens_in": 0,
916
+ "tokens_out": 0,
917
+ "retrieval_tokens": 537,
918
+ "query_tokens": 21,
919
+ "context_tokens": 516,
920
+ "judge_tokens_in": 0,
921
+ "judge_tokens_out": 0,
922
+ "judge_latency_ms": 0.0
923
+ },
924
+ {
925
+ "task_id": "temporal-numpy",
926
+ "query": "Why was numpy upgraded to 1.26 recently?",
927
+ "answer": "PR #101: Upgrade numpy 1.24.4 \u2192 1.26.2\nAuthor: alex\nStatus: merged\n\nFollow-up to ADR-011 (Python 3.12 minimum). numpy 1.24 doesn't ship 3.12\nwheels, so CI would have to build from source on every install.\nUpgraded to 1.26, which does ship 3.12 wheels. No API surface changes\naffecting us \u2014 only internal dtype-promotion warnings we've already\nsilenced via `warnings.filterwarnings`.\n\nVerified no call-sites depended on the deprecated `np.object_` alias.\nMerged 2026-03-11.\n---\nADR-011: Python 3.12 minimum across the fleet\nDate: 2026-03-10\nStatus: accepted\n\n**Context.** We've been pinned to 3.10 for 2 years. 3.12's faster interpreter\nand better typing are worth the cutover cost.\n\n**Decision.** Minimum Python = 3.12. All services, CI, dev containers.\nPython 3.11 and 3.10 removed from `.python-version` files.\n\n**Consequences.** One library (numpy==1.24.x) needs upgrade to 1.26 for\n3.12 wheels. PR #101 does the upgrade. No API changes for consumers.\n---\n[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retries might want to turn it off for tests.\npriya: yep, added. merging.\n\n[2026-02-19 Slack #payments-dev]\npriya: merged, pushed to staging 10m ago. Saw 3 successful retries on the live stripe-sandbox traffic in the last few mins. base_ms=200, backoff=*2, hit once at 400ms, once at 800ms, once at 1600ms. Behaviour as designed.",
928
+ "hits": [
929
+ {
930
+ "text": "PR #101: Upgrade numpy 1.24.4 \u2192 1.26.2\nAuthor: alex\nStatus: merged\n\nFollow-up to ADR-011 (Python 3.12 minimum). numpy 1.24 doesn't ship 3.12\nwheels, so CI would have to build from source on every install.\nUpgraded to 1.26, which does ship 3.12 wheels. No API surface changes\naffecting us \u2014 only internal dtype-promotion warnings we've already\nsilenced via `warnings.filterwarnings`.\n\nVerified no call",
931
+ "score": 0.6952402591705322,
932
+ "source": "pentatonic-memory",
933
+ "doc_id": "dep-numpy-upgrade"
934
+ },
935
+ {
936
+ "text": "ADR-011: Python 3.12 minimum across the fleet\nDate: 2026-03-10\nStatus: accepted\n\n**Context.** We've been pinned to 3.10 for 2 years. 3.12's faster interpreter\nand better typing are worth the cutover cost.\n\n**Decision.** Minimum Python = 3.12. All services, CI, dev containers.\nPython 3.11 and 3.10 removed from `.python-version` files.\n\n**Consequences.** One library (numpy==1.24.x) needs upgrade to ",
937
+ "score": 0.5172460675239563,
938
+ "source": "pentatonic-memory",
939
+ "doc_id": "decision-python"
940
+ },
941
+ {
942
+ "text": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retrie",
943
+ "score": 0.20378759503364563,
944
+ "source": "pentatonic-memory",
945
+ "doc_id": "chat-pr-47-review"
946
+ }
947
+ ],
948
+ "correct": true,
949
+ "score": 1.0,
950
+ "grading_notes": "all substrings matched",
951
+ "search_time_ms": 135.10690501425415,
952
+ "generation_time_ms": 0.0,
953
+ "tokens_in": 0,
954
+ "tokens_out": 0,
955
+ "retrieval_tokens": 500,
956
+ "query_tokens": 11,
957
+ "context_tokens": 489,
958
+ "judge_tokens_in": 0,
959
+ "judge_tokens_out": 0,
960
+ "judge_latency_ms": 0.0
961
+ },
962
+ {
963
+ "task_id": "temporal-on-call",
964
+ "query": "Who is the primary on-call for the week of April 15\u201321, 2026?",
965
+ "answer": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineering manager (kim) \u2192 CTO.\n---\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all from an old mobile app build. told them to upgrade. no internal services affected.\n---\nPR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
966
+ "hits": [
967
+ {
968
+ "text": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineeri",
969
+ "score": 0.6500120162963867,
970
+ "source": "pentatonic-memory",
971
+ "doc_id": "on-call-rotation"
972
+ },
973
+ {
974
+ "text": "[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all ",
975
+ "score": 0.2469588965177536,
976
+ "source": "pentatonic-memory",
977
+ "doc_id": "chat-deprecation"
978
+ },
979
+ {
980
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
981
+ "score": 0.22115403413772583,
982
+ "source": "pentatonic-memory",
983
+ "doc_id": "pr-52-search-migration"
984
+ }
985
+ ],
986
+ "correct": true,
987
+ "score": 1.0,
988
+ "grading_notes": "all substrings matched",
989
+ "search_time_ms": 181.58889500773512,
990
+ "generation_time_ms": 0.0,
991
+ "tokens_in": 0,
992
+ "tokens_out": 0,
993
+ "retrieval_tokens": 411,
994
+ "query_tokens": 20,
995
+ "context_tokens": 391,
996
+ "judge_tokens_in": 0,
997
+ "judge_tokens_out": 0,
998
+ "judge_latency_ms": 0.0
999
+ },
1000
+ {
1001
+ "task_id": "contradiction-algorithm",
1002
+ "query": "Which JWT signing algorithm does auth-svc use \u2014 HS256 or RS256?",
1003
+ "answer": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session_ttl_minutes: 60\n refresh_ttl_days: 30\n\nsearch:\n url: http://search-svc.internal:8080\n api_key: ${SEARCH_API_KEY}\n default_collection: products\n default_hybrid_weight: 0.6",
1004
+ "hits": [
1005
+ {
1006
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
1007
+ "score": 0.572783350944519,
1008
+ "source": "pentatonic-memory",
1009
+ "doc_id": "commit-b2c3d4"
1010
+ },
1011
+ {
1012
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
1013
+ "score": 0.6221848130226135,
1014
+ "source": "pentatonic-memory",
1015
+ "doc_id": "readme-auth"
1016
+ },
1017
+ {
1018
+ "text": "# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session",
1019
+ "score": 0.3809075355529785,
1020
+ "source": "pentatonic-memory",
1021
+ "doc_id": "config-sample"
1022
+ }
1023
+ ],
1024
+ "correct": true,
1025
+ "score": 1.0,
1026
+ "grading_notes": "all substrings matched",
1027
+ "search_time_ms": 163.09103701496497,
1028
+ "generation_time_ms": 0.0,
1029
+ "tokens_in": 0,
1030
+ "tokens_out": 0,
1031
+ "retrieval_tokens": 524,
1032
+ "query_tokens": 16,
1033
+ "context_tokens": 508,
1034
+ "judge_tokens_in": 0,
1035
+ "judge_tokens_out": 0,
1036
+ "judge_latency_ms": 0.0
1037
+ },
1038
+ {
1039
+ "task_id": "rubric-security-advice",
1040
+ "query": "A new engineer asks: can I add HS256 as a fallback for older clients in auth-svc? Advise them.",
1041
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\ncommit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.\n---\nPR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
1042
+ "hits": [
1043
+ {
1044
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
1045
+ "score": 0.44216230511665344,
1046
+ "source": "pentatonic-memory",
1047
+ "doc_id": "readme-auth"
1048
+ },
1049
+ {
1050
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
1051
+ "score": 0.45988526940345764,
1052
+ "source": "pentatonic-memory",
1053
+ "doc_id": "commit-b2c3d4"
1054
+ },
1055
+ {
1056
+ "text": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
1057
+ "score": 0.40329015254974365,
1058
+ "source": "pentatonic-memory",
1059
+ "doc_id": "pr-61-jwt-cleanup"
1060
+ }
1061
+ ],
1062
+ "correct": true,
1063
+ "score": 1.0,
1064
+ "grading_notes": "judge=1.00 \u2014 The answer explicitly says no / don't / avoid HS256 and cites that all services only hold the public key since the RS256",
1065
+ "search_time_ms": 140.2046139992308,
1066
+ "generation_time_ms": 0.0,
1067
+ "tokens_in": 0,
1068
+ "tokens_out": 0,
1069
+ "retrieval_tokens": 463,
1070
+ "query_tokens": 25,
1071
+ "context_tokens": 438,
1072
+ "judge_tokens_in": 751,
1073
+ "judge_tokens_out": 42,
1074
+ "judge_latency_ms": 1048.672226011753
1075
+ },
1076
+ {
1077
+ "task_id": "rubric-incident-lesson",
1078
+ "query": "We saw SignatureVerificationError in billing-svc logs right after a secret rotation. What's our SOP?",
1079
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\nSecurity bulletin 2026-03-22: rotate all API keys quarterly\n\nPolicy update. Effective 2026-04-01:\n- All third-party API keys (Stripe, Twilio, OpenAI, SendGrid, ...) rotate\n every 90 days. Automation in platform/secret-rotation/ handles this.\n- Internal JWT signing keys rotate yearly (RS256 makes this painless).\n- Database credentials rotate on-demand on suspected compromise.\n\nOwners:\n- billing-svc API keys \u2014 jordan\n- auth-svc signing keys \u2014 priya\n- search-svc API keys \u2014 alex\n\nNext rotation window: 2026-07-01.",
1080
+ "hits": [
1081
+ {
1082
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
1083
+ "score": 0.6220386624336243,
1084
+ "source": "pentatonic-memory",
1085
+ "doc_id": "runbook-billing"
1086
+ },
1087
+ {
1088
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
1089
+ "score": 0.6209463477134705,
1090
+ "source": "pentatonic-memory",
1091
+ "doc_id": "incident-2026-03-02-webhook"
1092
+ },
1093
+ {
1094
+ "text": "Security bulletin 2026-03-22: rotate all API keys quarterly\n\nPolicy update. Effective 2026-04-01:\n- All third-party API keys (Stripe, Twilio, OpenAI, SendGrid, ...) rotate\n every 90 days. Automation in platform/secret-rotation/ handles this.\n- Internal JWT signing keys rotate yearly (RS256 makes this painless).\n- Database credentials rotate on-demand on suspected compromise.\n\nOwners:\n- billing-sv",
1095
+ "score": 0.46955186128616333,
1096
+ "source": "pentatonic-memory",
1097
+ "doc_id": "security-rotation"
1098
+ }
1099
+ ],
1100
+ "correct": false,
1101
+ "score": 0.5,
1102
+ "grading_notes": "judge=0.50 \u2014 The answer mentions a pod restart (kubectl rollout restart) but does not explicitly reference that secrets only load at ",
1103
+ "search_time_ms": 138.89391400152817,
1104
+ "generation_time_ms": 0.0,
1105
+ "tokens_in": 0,
1106
+ "tokens_out": 0,
1107
+ "retrieval_tokens": 512,
1108
+ "query_tokens": 21,
1109
+ "context_tokens": 491,
1110
+ "judge_tokens_in": 784,
1111
+ "judge_tokens_out": 38,
1112
+ "judge_latency_ms": 1022.7693279981613
1113
+ }
1114
+ ]
1115
+ }