@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1115 @@
1
+ {
2
+ "bench": "agent-coding",
3
+ "stack": "pentatonic-memory",
4
+ "n_tasks": 22,
5
+ "n_correct": 14,
6
+ "accuracy": 0.6363636363636364,
7
+ "mean_score": 0.7272727272727273,
8
+ "p50_search_ms": 23.723827995127067,
9
+ "p95_search_ms": 31.46001180721214,
10
+ "total_tokens_in": 0,
11
+ "total_tokens_out": 0,
12
+ "total_usd": 0.0,
13
+ "by_tag": {
14
+ "api-signature": {
15
+ "n": 3,
16
+ "mean_score": 1.0,
17
+ "accuracy": 1.0
18
+ },
19
+ "bug-fix-recall": {
20
+ "n": 3,
21
+ "mean_score": 0.8333333333333334,
22
+ "accuracy": 0.6666666666666666
23
+ },
24
+ "deprecation": {
25
+ "n": 3,
26
+ "mean_score": 0.3333333333333333,
27
+ "accuracy": 0.3333333333333333
28
+ },
29
+ "config-recall": {
30
+ "n": 3,
31
+ "mean_score": 1.0,
32
+ "accuracy": 1.0
33
+ },
34
+ "pr-rationale": {
35
+ "n": 2,
36
+ "mean_score": 0.5,
37
+ "accuracy": 0.0
38
+ },
39
+ "cross-file-refactor": {
40
+ "n": 2,
41
+ "mean_score": 0.5,
42
+ "accuracy": 0.5
43
+ },
44
+ "stack-trace-match": {
45
+ "n": 1,
46
+ "mean_score": 1.0,
47
+ "accuracy": 1.0
48
+ },
49
+ "temporal": {
50
+ "n": 2,
51
+ "mean_score": 0.5,
52
+ "accuracy": 0.5
53
+ },
54
+ "contradiction": {
55
+ "n": 1,
56
+ "mean_score": 1.0,
57
+ "accuracy": 1.0
58
+ },
59
+ "rubric": {
60
+ "n": 2,
61
+ "mean_score": 0.75,
62
+ "accuracy": 0.5
63
+ }
64
+ },
65
+ "extra": {
66
+ "ingest_ms": 23966.172009008005,
67
+ "grading": "substring",
68
+ "limit": 3,
69
+ "tokens": {
70
+ "corpus_tokens": 5164,
71
+ "query_tokens": 339,
72
+ "context_tokens": 11496,
73
+ "retrieval_tokens": 11835,
74
+ "naive_tokens": 113947,
75
+ "saved_tokens": 102112,
76
+ "reduction_pct": 0.8961359228413209,
77
+ "mean_retrieval_tokens_per_task": 537.9545454545455,
78
+ "tokenizer": "cl100k_base",
79
+ "per_task": {
80
+ "api-invoice-signature": {
81
+ "query": 18,
82
+ "context": 510,
83
+ "retrieval": 528,
84
+ "judge_in": 0,
85
+ "judge_out": 0,
86
+ "judge_latency_ms": 0.0
87
+ },
88
+ "api-search-shape": {
89
+ "query": 16,
90
+ "context": 458,
91
+ "retrieval": 474,
92
+ "judge_in": 0,
93
+ "judge_out": 0,
94
+ "judge_latency_ms": 0.0
95
+ },
96
+ "api-invoice-amount-type": {
97
+ "query": 16,
98
+ "context": 545,
99
+ "retrieval": 561,
100
+ "judge_in": 0,
101
+ "judge_out": 0,
102
+ "judge_latency_ms": 0.0
103
+ },
104
+ "bugfix-stripe-rounding": {
105
+ "query": 10,
106
+ "context": 542,
107
+ "retrieval": 552,
108
+ "judge_in": 0,
109
+ "judge_out": 0,
110
+ "judge_latency_ms": 0.0
111
+ },
112
+ "bugfix-webhook-dup": {
113
+ "query": 10,
114
+ "context": 538,
115
+ "retrieval": 548,
116
+ "judge_in": 0,
117
+ "judge_out": 0,
118
+ "judge_latency_ms": 0.0
119
+ },
120
+ "bugfix-rounding-direction": {
121
+ "query": 19,
122
+ "context": 500,
123
+ "retrieval": 519,
124
+ "judge_in": 0,
125
+ "judge_out": 0,
126
+ "judge_latency_ms": 0.0
127
+ },
128
+ "deprecation-v1": {
129
+ "query": 11,
130
+ "context": 481,
131
+ "retrieval": 492,
132
+ "judge_in": 0,
133
+ "judge_out": 0,
134
+ "judge_latency_ms": 0.0
135
+ },
136
+ "deprecation-hs256": {
137
+ "query": 11,
138
+ "context": 577,
139
+ "retrieval": 588,
140
+ "judge_in": 0,
141
+ "judge_out": 0,
142
+ "judge_latency_ms": 0.0
143
+ },
144
+ "deprecation-search-tuple": {
145
+ "query": 13,
146
+ "context": 458,
147
+ "retrieval": 471,
148
+ "judge_in": 0,
149
+ "judge_out": 0,
150
+ "judge_latency_ms": 0.0
151
+ },
152
+ "config-retry-policy-base": {
153
+ "query": 15,
154
+ "context": 524,
155
+ "retrieval": 539,
156
+ "judge_in": 0,
157
+ "judge_out": 0,
158
+ "judge_latency_ms": 0.0
159
+ },
160
+ "config-session-ttl": {
161
+ "query": 14,
162
+ "context": 558,
163
+ "retrieval": 572,
164
+ "judge_in": 0,
165
+ "judge_out": 0,
166
+ "judge_latency_ms": 0.0
167
+ },
168
+ "config-search-hybrid-weight": {
169
+ "query": 11,
170
+ "context": 458,
171
+ "retrieval": 469,
172
+ "judge_in": 0,
173
+ "judge_out": 0,
174
+ "judge_latency_ms": 0.0
175
+ },
176
+ "pr-rationale-retry": {
177
+ "query": 19,
178
+ "context": 569,
179
+ "retrieval": 588,
180
+ "judge_in": 0,
181
+ "judge_out": 0,
182
+ "judge_latency_ms": 0.0
183
+ },
184
+ "pr-rationale-jwt-cleanup": {
185
+ "query": 13,
186
+ "context": 558,
187
+ "retrieval": 571,
188
+ "judge_in": 0,
189
+ "judge_out": 0,
190
+ "judge_latency_ms": 0.0
191
+ },
192
+ "cross-ref-invoicing": {
193
+ "query": 14,
194
+ "context": 551,
195
+ "retrieval": 565,
196
+ "judge_in": 0,
197
+ "judge_out": 0,
198
+ "judge_latency_ms": 0.0
199
+ },
200
+ "cross-ref-ratelimit": {
201
+ "query": 15,
202
+ "context": 505,
203
+ "retrieval": 520,
204
+ "judge_in": 0,
205
+ "judge_out": 0,
206
+ "judge_latency_ms": 0.0
207
+ },
208
+ "stack-trace-webhook": {
209
+ "query": 21,
210
+ "context": 527,
211
+ "retrieval": 548,
212
+ "judge_in": 0,
213
+ "judge_out": 0,
214
+ "judge_latency_ms": 0.0
215
+ },
216
+ "temporal-numpy": {
217
+ "query": 11,
218
+ "context": 515,
219
+ "retrieval": 526,
220
+ "judge_in": 0,
221
+ "judge_out": 0,
222
+ "judge_latency_ms": 0.0
223
+ },
224
+ "temporal-on-call": {
225
+ "query": 20,
226
+ "context": 572,
227
+ "retrieval": 592,
228
+ "judge_in": 0,
229
+ "judge_out": 0,
230
+ "judge_latency_ms": 0.0
231
+ },
232
+ "contradiction-algorithm": {
233
+ "query": 16,
234
+ "context": 506,
235
+ "retrieval": 522,
236
+ "judge_in": 0,
237
+ "judge_out": 0,
238
+ "judge_latency_ms": 0.0
239
+ },
240
+ "rubric-security-advice": {
241
+ "query": 25,
242
+ "context": 506,
243
+ "retrieval": 531,
244
+ "judge_in": 819,
245
+ "judge_out": 42,
246
+ "judge_latency_ms": 1898.8169969916344
247
+ },
248
+ "rubric-incident-lesson": {
249
+ "query": 21,
250
+ "context": 538,
251
+ "retrieval": 559,
252
+ "judge_in": 831,
253
+ "judge_out": 33,
254
+ "judge_latency_ms": 1775.7281960248947
255
+ }
256
+ },
257
+ "judge_tokens_in": 1650,
258
+ "judge_tokens_out": 75,
259
+ "judge_calls": 2,
260
+ "judge_mean_latency_ms": 1837.2725965082645
261
+ },
262
+ "cost_usd": {
263
+ "assumed_completion_tokens_per_task": 100,
264
+ "rates": {
265
+ "input_per_1k": 0.0025,
266
+ "output_per_1k": 0.01,
267
+ "model": "gpt-4o"
268
+ },
269
+ "retrieval_usd_in": 0.029587500000000003,
270
+ "retrieval_usd_out": 0.022,
271
+ "retrieval_usd_total": 0.0515875,
272
+ "naive_usd_total": 0.3068675,
273
+ "saved_usd": 0.25528,
274
+ "saved_usd_per_1k_tasks": 11.603636363636365
275
+ }
276
+ },
277
+ "task_results": [
278
+ {
279
+ "task_id": "api-invoice-signature",
280
+ "query": "What's the request body shape for POST /v2/invoices on billing-svc?",
281
+ "answer": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing float amounts returns 400.\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature errors, see runbook-billing.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.",
282
+ "hits": [
283
+ {
284
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
285
+ "score": 0.5234486235427094,
286
+ "source": "pentatonic-memory",
287
+ "doc_id": "api-billing-invoice"
288
+ },
289
+ {
290
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
291
+ "score": 0.5128006379857712,
292
+ "source": "pentatonic-memory",
293
+ "doc_id": "cross-ref-invoicing"
294
+ },
295
+ {
296
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
297
+ "score": 0.4950212144232699,
298
+ "source": "pentatonic-memory",
299
+ "doc_id": "readme-billing"
300
+ }
301
+ ],
302
+ "correct": true,
303
+ "score": 1.0,
304
+ "grading_notes": "all substrings matched",
305
+ "search_time_ms": 32.146587007446215,
306
+ "generation_time_ms": 0.0,
307
+ "tokens_in": 0,
308
+ "tokens_out": 0,
309
+ "retrieval_tokens": 528,
310
+ "query_tokens": 18,
311
+ "context_tokens": 510,
312
+ "judge_tokens_in": 0,
313
+ "judge_tokens_out": 0,
314
+ "judge_latency_ms": 0.0
315
+ },
316
+ {
317
+ "task_id": "api-search-shape",
318
+ "query": "What's the response shape of POST /v3/search on search-svc?",
319
+ "answer": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vector).\n- Max limit is 100 (hard cap in config).\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')\n---\nPR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
320
+ "hits": [
321
+ {
322
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
323
+ "score": 0.6493952468588011,
324
+ "source": "pentatonic-memory",
325
+ "doc_id": "api-search-query"
326
+ },
327
+ {
328
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
329
+ "score": 0.6397569972936464,
330
+ "source": "pentatonic-memory",
331
+ "doc_id": "readme-search"
332
+ },
333
+ {
334
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
335
+ "score": 0.6232839044452452,
336
+ "source": "pentatonic-memory",
337
+ "doc_id": "pr-52-search-migration"
338
+ }
339
+ ],
340
+ "correct": true,
341
+ "score": 1.0,
342
+ "grading_notes": "all substrings matched",
343
+ "search_time_ms": 27.56941900588572,
344
+ "generation_time_ms": 0.0,
345
+ "tokens_in": 0,
346
+ "tokens_out": 0,
347
+ "retrieval_tokens": 474,
348
+ "query_tokens": 16,
349
+ "context_tokens": 458,
350
+ "judge_tokens_in": 0,
351
+ "judge_tokens_out": 0,
352
+ "judge_latency_ms": 0.0
353
+ },
354
+ {
355
+ "task_id": "api-invoice-amount-type",
356
+ "query": "When calling POST /v2/invoices, must amount be integer or float?",
357
+ "answer": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing float amounts returns 400.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency,\n )\n )\n```\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature error",
358
+ "hits": [
359
+ {
360
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
361
+ "score": 0.6491271421290412,
362
+ "source": "pentatonic-memory",
363
+ "doc_id": "api-billing-invoice"
364
+ },
365
+ {
366
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
367
+ "score": 0.6309107760595571,
368
+ "source": "pentatonic-memory",
369
+ "doc_id": "code-stripe-client"
370
+ },
371
+ {
372
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
373
+ "score": 0.6220265628484812,
374
+ "source": "pentatonic-memory",
375
+ "doc_id": "cross-ref-invoicing"
376
+ }
377
+ ],
378
+ "correct": true,
379
+ "score": 1.0,
380
+ "grading_notes": "all substrings matched",
381
+ "search_time_ms": 25.806121004279703,
382
+ "generation_time_ms": 0.0,
383
+ "tokens_in": 0,
384
+ "tokens_out": 0,
385
+ "retrieval_tokens": 561,
386
+ "query_tokens": 16,
387
+ "context_tokens": 545,
388
+ "judge_tokens_in": 0,
389
+ "judge_tokens_out": 0,
390
+ "judge_latency_ms": 0.0
391
+ },
392
+ {
393
+ "task_id": "bugfix-stripe-rounding",
394
+ "query": "Which commit fixed the stripe float-amount bug?",
395
+ "answer": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return s",
396
+ "hits": [
397
+ {
398
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
399
+ "score": 0.6340595841514459,
400
+ "source": "pentatonic-memory",
401
+ "doc_id": "incident-2026-03-02-webhook"
402
+ },
403
+ {
404
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
405
+ "score": 0.6304819950864657,
406
+ "source": "pentatonic-memory",
407
+ "doc_id": "runbook-billing"
408
+ },
409
+ {
410
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
411
+ "score": 0.6261928813329707,
412
+ "source": "pentatonic-memory",
413
+ "doc_id": "code-stripe-client"
414
+ }
415
+ ],
416
+ "correct": true,
417
+ "score": 1.0,
418
+ "grading_notes": "all substrings matched",
419
+ "search_time_ms": 20.16072001424618,
420
+ "generation_time_ms": 0.0,
421
+ "tokens_in": 0,
422
+ "tokens_out": 0,
423
+ "retrieval_tokens": 552,
424
+ "query_tokens": 10,
425
+ "context_tokens": 542,
426
+ "judge_tokens_in": 0,
427
+ "judge_tokens_out": 0,
428
+ "judge_latency_ms": 0.0
429
+ },
430
+ {
431
+ "task_id": "bugfix-webhook-dup",
432
+ "query": "How did we fix the duplicate Stripe webhook handling?",
433
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.",
434
+ "hits": [
435
+ {
436
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
437
+ "score": 0.6652784013702207,
438
+ "source": "pentatonic-memory",
439
+ "doc_id": "runbook-billing"
440
+ },
441
+ {
442
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
443
+ "score": 0.6526478911682948,
444
+ "source": "pentatonic-memory",
445
+ "doc_id": "incident-2026-03-02-webhook"
446
+ },
447
+ {
448
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
449
+ "score": 0.619938009454783,
450
+ "source": "pentatonic-memory",
451
+ "doc_id": "readme-billing"
452
+ }
453
+ ],
454
+ "correct": false,
455
+ "score": 0.5,
456
+ "grading_notes": "missing 1/2: ['webhook_events']",
457
+ "search_time_ms": 17.374802991980687,
458
+ "generation_time_ms": 0.0,
459
+ "tokens_in": 0,
460
+ "tokens_out": 0,
461
+ "retrieval_tokens": 548,
462
+ "query_tokens": 10,
463
+ "context_tokens": 538,
464
+ "judge_tokens_in": 0,
465
+ "judge_tokens_out": 0,
466
+ "judge_latency_ms": 0.0
467
+ },
468
+ {
469
+ "task_id": "bugfix-rounding-direction",
470
+ "query": "When converting a float amount to cents, do we round up, down, or to nearest?",
471
+ "answer": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency,\n )\n )\n```\n---\nPR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')",
472
+ "hits": [
473
+ {
474
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
475
+ "score": 0.6101332294217348,
476
+ "source": "pentatonic-memory",
477
+ "doc_id": "code-stripe-client"
478
+ },
479
+ {
480
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
481
+ "score": 0.5819129164283284,
482
+ "source": "pentatonic-memory",
483
+ "doc_id": "pr-52-search-migration"
484
+ },
485
+ {
486
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
487
+ "score": 0.5732720961433274,
488
+ "source": "pentatonic-memory",
489
+ "doc_id": "readme-search"
490
+ }
491
+ ],
492
+ "correct": true,
493
+ "score": 1.0,
494
+ "grading_notes": "all substrings matched",
495
+ "search_time_ms": 21.919121994869784,
496
+ "generation_time_ms": 0.0,
497
+ "tokens_in": 0,
498
+ "tokens_out": 0,
499
+ "retrieval_tokens": 519,
500
+ "query_tokens": 19,
501
+ "context_tokens": 500,
502
+ "judge_tokens_in": 0,
503
+ "judge_tokens_out": 0,
504
+ "judge_latency_ms": 0.0
505
+ },
506
+ {
507
+ "task_id": "deprecation-v1",
508
+ "query": "Are the /api/v1/ endpoints still supported?",
509
+ "answer": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')",
510
+ "hits": [
511
+ {
512
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
513
+ "score": 0.5884919119283378,
514
+ "source": "pentatonic-memory",
515
+ "doc_id": "pr-52-search-migration"
516
+ },
517
+ {
518
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
519
+ "score": 0.5864806843871985,
520
+ "source": "pentatonic-memory",
521
+ "doc_id": "readme-auth"
522
+ },
523
+ {
524
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
525
+ "score": 0.5826976459817697,
526
+ "source": "pentatonic-memory",
527
+ "doc_id": "readme-search"
528
+ }
529
+ ],
530
+ "correct": false,
531
+ "score": 0.0,
532
+ "grading_notes": "missing 1/1: ['410']",
533
+ "search_time_ms": 20.105327013880014,
534
+ "generation_time_ms": 0.0,
535
+ "tokens_in": 0,
536
+ "tokens_out": 0,
537
+ "retrieval_tokens": 492,
538
+ "query_tokens": 11,
539
+ "context_tokens": 481,
540
+ "judge_tokens_in": 0,
541
+ "judge_tokens_out": 0,
542
+ "judge_latency_ms": 0.0
543
+ },
544
+ {
545
+ "task_id": "deprecation-hs256",
546
+ "query": "Can auth-svc still accept HS256 JWTs?",
547
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# monorepo ARCHITECTURE.md\n\n## Services\n- **gateway** \u2014 edge reverse proxy, terminates TLS, routes to services\n- **auth-svc** \u2014 JWT mint + verify, OIDC provider (RS256 since b2c3d4e5)\n- **billing-svc** \u2014 Stripe integration, invoicing, webhooks\n- **search-svc** \u2014 vector + BM25 hybrid search (vector-db backend since c3d4e5f6)\n- **order-svc** \u2014 order lifecycle\n- **email-svc** \u2014 transactional email (SendGrid)\n- **reporting-svc** \u2014 financial + ops dashboards\n\n## Inter-service RPC\nProtobuf over gRPC for hot paths (order/billing/auth). JSON over HTTP for\nplatform ops (admin panel, health checks). See ADR-007.\n\n## Language/runtime\n- Python 3.12 minimum (ADR-011)\n- Node 20 LTS for the gateway only (historical)\n- Go for the sealed-secrets operator\n\n## Data\n- Postgres 16 per-service\n- Redis for rate limits + ephemeral queues\n- Vector-DB for search vectors\n- S3 for",
548
+ "hits": [
549
+ {
550
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
551
+ "score": 0.6733812697159586,
552
+ "source": "pentatonic-memory",
553
+ "doc_id": "readme-auth"
554
+ },
555
+ {
556
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
557
+ "score": 0.626365947146506,
558
+ "source": "pentatonic-memory",
559
+ "doc_id": "runbook-billing"
560
+ },
561
+ {
562
+ "text": "# monorepo ARCHITECTURE.md\n\n## Services\n- **gateway** \u2014 edge reverse proxy, terminates TLS, routes to services\n- **auth-svc** \u2014 JWT mint + verify, OIDC provider (RS256 since b2c3d4e5)\n- **billing-svc** \u2014 Stripe integration, invoicing, webhooks\n- **search-svc** \u2014 vector + BM25 hybrid search (vector-db backend since c3d4e5f6)\n- **order-svc** \u2014 order lifecycle\n- **email-svc** \u2014 transactional email (Send",
563
+ "score": 0.6090713787319404,
564
+ "source": "pentatonic-memory",
565
+ "doc_id": "arch-overview"
566
+ }
567
+ ],
568
+ "correct": true,
569
+ "score": 1.0,
570
+ "grading_notes": "all substrings matched",
571
+ "search_time_ms": 24.6878860052675,
572
+ "generation_time_ms": 0.0,
573
+ "tokens_in": 0,
574
+ "tokens_out": 0,
575
+ "retrieval_tokens": 588,
576
+ "query_tokens": 11,
577
+ "context_tokens": 577,
578
+ "judge_tokens_in": 0,
579
+ "judge_tokens_out": 0,
580
+ "judge_latency_ms": 0.0
581
+ },
582
+ {
583
+ "task_id": "deprecation-search-tuple",
584
+ "query": "What does SearchClient.search() return today \u2014 tuples or dicts?",
585
+ "answer": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')\n---\nPR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.\n---\n## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vector).\n- Max limit is 100 (hard cap in config).",
586
+ "hits": [
587
+ {
588
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
589
+ "score": 0.6626000593379382,
590
+ "source": "pentatonic-memory",
591
+ "doc_id": "readme-search"
592
+ },
593
+ {
594
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
595
+ "score": 0.6277946915240228,
596
+ "source": "pentatonic-memory",
597
+ "doc_id": "pr-52-search-migration"
598
+ },
599
+ {
600
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
601
+ "score": 0.6237372786079042,
602
+ "source": "pentatonic-memory",
603
+ "doc_id": "api-search-query"
604
+ }
605
+ ],
606
+ "correct": false,
607
+ "score": 0.0,
608
+ "grading_notes": "missing 1/1: ['dict']",
609
+ "search_time_ms": 26.071515982039273,
610
+ "generation_time_ms": 0.0,
611
+ "tokens_in": 0,
612
+ "tokens_out": 0,
613
+ "retrieval_tokens": 471,
614
+ "query_tokens": 13,
615
+ "context_tokens": 458,
616
+ "judge_tokens_in": 0,
617
+ "judge_tokens_out": 0,
618
+ "judge_latency_ms": 0.0
619
+ },
620
+ {
621
+ "task_id": "config-retry-policy-base",
622
+ "query": "What's the default stripe retry base delay in ms on billing-svc?",
623
+ "answer": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency,\n )\n )\n```",
624
+ "hits": [
625
+ {
626
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
627
+ "score": 0.6641875127767956,
628
+ "source": "pentatonic-memory",
629
+ "doc_id": "readme-billing"
630
+ },
631
+ {
632
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
633
+ "score": 0.6449784142751414,
634
+ "source": "pentatonic-memory",
635
+ "doc_id": "runbook-billing"
636
+ },
637
+ {
638
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
639
+ "score": 0.6355303653702338,
640
+ "source": "pentatonic-memory",
641
+ "doc_id": "code-stripe-client"
642
+ }
643
+ ],
644
+ "correct": true,
645
+ "score": 1.0,
646
+ "grading_notes": "all substrings matched",
647
+ "search_time_ms": 24.91915298742242,
648
+ "generation_time_ms": 0.0,
649
+ "tokens_in": 0,
650
+ "tokens_out": 0,
651
+ "retrieval_tokens": 539,
652
+ "query_tokens": 15,
653
+ "context_tokens": 524,
654
+ "judge_tokens_in": 0,
655
+ "judge_tokens_out": 0,
656
+ "judge_latency_ms": 0.0
657
+ },
658
+ {
659
+ "task_id": "config-session-ttl",
660
+ "query": "What's the default SESSION_TTL_MINUTES on auth-svc?",
661
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.",
662
+ "hits": [
663
+ {
664
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
665
+ "score": 0.8868297332722167,
666
+ "source": "pentatonic-memory",
667
+ "doc_id": "readme-auth"
668
+ },
669
+ {
670
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
671
+ "score": 0.6156977309295835,
672
+ "source": "pentatonic-memory",
673
+ "doc_id": "runbook-billing"
674
+ },
675
+ {
676
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
677
+ "score": 0.6086505214346937,
678
+ "source": "pentatonic-memory",
679
+ "doc_id": "incident-2026-03-02-webhook"
680
+ }
681
+ ],
682
+ "correct": true,
683
+ "score": 1.0,
684
+ "grading_notes": "all substrings matched",
685
+ "search_time_ms": 26.754050981253386,
686
+ "generation_time_ms": 0.0,
687
+ "tokens_in": 0,
688
+ "tokens_out": 0,
689
+ "retrieval_tokens": 572,
690
+ "query_tokens": 14,
691
+ "context_tokens": 558,
692
+ "judge_tokens_in": 0,
693
+ "judge_tokens_out": 0,
694
+ "judge_latency_ms": 0.0
695
+ },
696
+ {
697
+ "task_id": "config-search-hybrid-weight",
698
+ "query": "What's the default hybrid_weight for search-svc?",
699
+ "answer": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vector).\n- Max limit is 100 (hard cap in config).\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (default: http://search-svc.internal:8080)\n- `SEARCH_API_KEY` (required)\n- `QDRANT_COLLECTION` (default: 'products')\n---\nPR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
700
+ "hits": [
701
+ {
702
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
703
+ "score": 0.6699708397910742,
704
+ "source": "pentatonic-memory",
705
+ "doc_id": "api-search-query"
706
+ },
707
+ {
708
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
709
+ "score": 0.6453127346775563,
710
+ "source": "pentatonic-memory",
711
+ "doc_id": "readme-search"
712
+ },
713
+ {
714
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
715
+ "score": 0.6129850651583699,
716
+ "source": "pentatonic-memory",
717
+ "doc_id": "pr-52-search-migration"
718
+ }
719
+ ],
720
+ "correct": true,
721
+ "score": 1.0,
722
+ "grading_notes": "all substrings matched",
723
+ "search_time_ms": 20.51944300183095,
724
+ "generation_time_ms": 0.0,
725
+ "tokens_in": 0,
726
+ "tokens_out": 0,
727
+ "retrieval_tokens": 469,
728
+ "query_tokens": 11,
729
+ "context_tokens": 458,
730
+ "judge_tokens_in": 0,
731
+ "judge_tokens_out": 0,
732
+ "judge_latency_ms": 0.0
733
+ },
734
+ {
735
+ "task_id": "pr-rationale-retry",
736
+ "query": "Why did PR #47 choose base=200ms for stripe retries rather than 500ms?",
737
+ "answer": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n customer=customer_id,\n amount=self._to_cents(amount),\n currency=currency,\n )\n )\n```\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`",
738
+ "hits": [
739
+ {
740
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
741
+ "score": 0.6442610719830753,
742
+ "source": "pentatonic-memory",
743
+ "doc_id": "code-stripe-client"
744
+ },
745
+ {
746
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
747
+ "score": 0.6375518535975762,
748
+ "source": "pentatonic-memory",
749
+ "doc_id": "readme-billing"
750
+ },
751
+ {
752
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
753
+ "score": 0.6105348147054268,
754
+ "source": "pentatonic-memory",
755
+ "doc_id": "readme-auth"
756
+ }
757
+ ],
758
+ "correct": false,
759
+ "score": 0.5,
760
+ "grading_notes": "missing 1/2: ['conservative']",
761
+ "search_time_ms": 23.105117987142876,
762
+ "generation_time_ms": 0.0,
763
+ "tokens_in": 0,
764
+ "tokens_out": 0,
765
+ "retrieval_tokens": 588,
766
+ "query_tokens": 19,
767
+ "context_tokens": 569,
768
+ "judge_tokens_in": 0,
769
+ "judge_tokens_out": 0,
770
+ "judge_latency_ms": 0.0
771
+ },
772
+ {
773
+ "task_id": "pr-rationale-jwt-cleanup",
774
+ "query": "What did PR #61 clean up and why was it safe?",
775
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.",
776
+ "hits": [
777
+ {
778
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
779
+ "score": 0.5771770068053738,
780
+ "source": "pentatonic-memory",
781
+ "doc_id": "readme-auth"
782
+ },
783
+ {
784
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
785
+ "score": 0.5745496565957877,
786
+ "source": "pentatonic-memory",
787
+ "doc_id": "incident-2026-03-02-webhook"
788
+ },
789
+ {
790
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
791
+ "score": 0.5713664267039132,
792
+ "source": "pentatonic-memory",
793
+ "doc_id": "runbook-billing"
794
+ }
795
+ ],
796
+ "correct": false,
797
+ "score": 0.5,
798
+ "grading_notes": "missing 1/2: ['JWT_SECRET']",
799
+ "search_time_ms": 19.333734991960227,
800
+ "generation_time_ms": 0.0,
801
+ "tokens_in": 0,
802
+ "tokens_out": 0,
803
+ "retrieval_tokens": 571,
804
+ "query_tokens": 13,
805
+ "context_tokens": 558,
806
+ "judge_tokens_in": 0,
807
+ "judge_tokens_out": 0,
808
+ "judge_latency_ms": 0.0
809
+ },
810
+ {
811
+ "task_id": "cross-ref-invoicing",
812
+ "query": "After Stripe posts invoice.payment_succeeded, which services consume the event?",
813
+ "answer": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature errors, see runbook-billing.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign * math.floor(abs(amount) * 100)\n\n def charge(self, *, customer_id: str, amount: float, currency: str = 'gbp') -> dict:\n return self._with_retry(\n lambda: stripe.Charge.create(\n cust",
814
+ "hits": [
815
+ {
816
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
817
+ "score": 0.6602750059501441,
818
+ "source": "pentatonic-memory",
819
+ "doc_id": "cross-ref-invoicing"
820
+ },
821
+ {
822
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
823
+ "score": 0.637876650269287,
824
+ "source": "pentatonic-memory",
825
+ "doc_id": "readme-billing"
826
+ },
827
+ {
828
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
829
+ "score": 0.6285438325236996,
830
+ "source": "pentatonic-memory",
831
+ "doc_id": "code-stripe-client"
832
+ }
833
+ ],
834
+ "correct": true,
835
+ "score": 1.0,
836
+ "grading_notes": "all substrings matched",
837
+ "search_time_ms": 23.52536199032329,
838
+ "generation_time_ms": 0.0,
839
+ "tokens_in": 0,
840
+ "tokens_out": 0,
841
+ "retrieval_tokens": 565,
842
+ "query_tokens": 14,
843
+ "context_tokens": 551,
844
+ "judge_tokens_in": 0,
845
+ "judge_tokens_out": 0,
846
+ "judge_latency_ms": 0.0
847
+ },
848
+ {
849
+ "task_id": "cross-ref-ratelimit",
850
+ "query": "Where does the shared rate-limit lib live and what's the method signature?",
851
+ "answer": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services",
852
+ "hits": [
853
+ {
854
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
855
+ "score": 0.6127739554707295,
856
+ "source": "pentatonic-memory",
857
+ "doc_id": "readme-billing"
858
+ },
859
+ {
860
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
861
+ "score": 0.6119995587510915,
862
+ "source": "pentatonic-memory",
863
+ "doc_id": "runbook-billing"
864
+ },
865
+ {
866
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
867
+ "score": 0.609809810293523,
868
+ "source": "pentatonic-memory",
869
+ "doc_id": "readme-auth"
870
+ }
871
+ ],
872
+ "correct": false,
873
+ "score": 0.0,
874
+ "grading_notes": "missing 2/2: ['shared/rate_limit.py', 'allow']",
875
+ "search_time_ms": 23.320078995311633,
876
+ "generation_time_ms": 0.0,
877
+ "tokens_in": 0,
878
+ "tokens_out": 0,
879
+ "retrieval_tokens": 520,
880
+ "query_tokens": 15,
881
+ "context_tokens": 505,
882
+ "judge_tokens_in": 0,
883
+ "judge_tokens_out": 0,
884
+ "judge_latency_ms": 0.0
885
+ },
886
+ {
887
+ "task_id": "stack-trace-webhook",
888
+ "query": "If a SignatureVerificationError happens on billing-svc webhook, what's the most likely cause and fix?",
889
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn webhook signature errors, see runbook-b",
890
+ "hits": [
891
+ {
892
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
893
+ "score": 0.7119100391751223,
894
+ "source": "pentatonic-memory",
895
+ "doc_id": "runbook-billing"
896
+ },
897
+ {
898
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
899
+ "score": 0.6685187427620528,
900
+ "source": "pentatonic-memory",
901
+ "doc_id": "incident-2026-03-02-webhook"
902
+ },
903
+ {
904
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
905
+ "score": 0.6557678973037321,
906
+ "source": "pentatonic-memory",
907
+ "doc_id": "cross-ref-invoicing"
908
+ }
909
+ ],
910
+ "correct": true,
911
+ "score": 1.0,
912
+ "grading_notes": "all substrings matched",
913
+ "search_time_ms": 23.922293999930844,
914
+ "generation_time_ms": 0.0,
915
+ "tokens_in": 0,
916
+ "tokens_out": 0,
917
+ "retrieval_tokens": 548,
918
+ "query_tokens": 21,
919
+ "context_tokens": 527,
920
+ "judge_tokens_in": 0,
921
+ "judge_tokens_out": 0,
922
+ "judge_latency_ms": 0.0
923
+ },
924
+ {
925
+ "task_id": "temporal-numpy",
926
+ "query": "Why was numpy upgraded to 1.26 recently?",
927
+ "answer": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# Feature flags (ops/flags.yaml)\n\nflags:\n billing.new_invoice_pdf:\n default: false\n description: 'Use the new weasyprint-based PDF renderer (vs the legacy wkhtmltopdf).'\n owner: jordan\n target_audience:\n - enterprise_customers: 100%\n - retail_customers: 20% # canary\n\n search.hybrid_v2:\n default: true\n description: 'Use the hybrid (vector + BM25) search path. Rollback key if vector-db melts.'\n owner: alex\n target_audience:\n - all: 100%\n\n auth.mfa_required:\n default: false\n description: 'Require TOTP MFA on login. Staged rollout to admin accounts first.'\n owner: priya\n target_audience:\n - admin_users: 100%\n - paying_users: 0% # not yet\n - free_users: 0%",
928
+ "hits": [
929
+ {
930
+ "text": "PR #52: Migrate /search call-sites to new result shape\nAuthor: alex\nStatus: merged\n\nFollow-up to the vector-db switch in c3d4e5f6. 14 call-sites updated to\nread .score / .payload instead of tuple indexing. Most were in\nrecommendations-svc. Caught one in ops-dashboard that was still using\nresult[0] \u2014 would have silently broken the live dashboard after deploy.\nMerged 2026-02-21.",
931
+ "score": 0.6055320123739515,
932
+ "source": "pentatonic-memory",
933
+ "doc_id": "pr-52-search-migration"
934
+ },
935
+ {
936
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
937
+ "score": 0.6027458380600648,
938
+ "source": "pentatonic-memory",
939
+ "doc_id": "readme-auth"
940
+ },
941
+ {
942
+ "text": "# Feature flags (ops/flags.yaml)\n\nflags:\n billing.new_invoice_pdf:\n default: false\n description: 'Use the new weasyprint-based PDF renderer (vs the legacy wkhtmltopdf).'\n owner: jordan\n target_audience:\n - enterprise_customers: 100%\n - retail_customers: 20% # canary\n\n search.hybrid_v2:\n default: true\n description: 'Use the hybrid (vector + BM25) search path. Rollback",
943
+ "score": 0.5953137468578912,
944
+ "source": "pentatonic-memory",
945
+ "doc_id": "flags-feature"
946
+ }
947
+ ],
948
+ "correct": false,
949
+ "score": 0.0,
950
+ "grading_notes": "missing 2/2: ['Python 3.12', 'wheels']",
951
+ "search_time_ms": 19.953389011789113,
952
+ "generation_time_ms": 0.0,
953
+ "tokens_in": 0,
954
+ "tokens_out": 0,
955
+ "retrieval_tokens": 526,
956
+ "query_tokens": 11,
957
+ "context_tokens": 515,
958
+ "judge_tokens_in": 0,
959
+ "judge_tokens_out": 0,
960
+ "judge_latency_ms": 0.0
961
+ },
962
+ {
963
+ "task_id": "temporal-on-call",
964
+ "query": "Who is the primary on-call for the week of April 15\u201321, 2026?",
965
+ "answer": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineering manager (kim) \u2192 CTO.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.",
966
+ "hits": [
967
+ {
968
+ "text": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineeri",
969
+ "score": 0.7794018626150157,
970
+ "source": "pentatonic-memory",
971
+ "doc_id": "on-call-rotation"
972
+ },
973
+ {
974
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
975
+ "score": 0.5821010124125434,
976
+ "source": "pentatonic-memory",
977
+ "doc_id": "readme-auth"
978
+ },
979
+ {
980
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
981
+ "score": 0.5789362698521102,
982
+ "source": "pentatonic-memory",
983
+ "doc_id": "incident-2026-03-02-webhook"
984
+ }
985
+ ],
986
+ "correct": true,
987
+ "score": 1.0,
988
+ "grading_notes": "all substrings matched",
989
+ "search_time_ms": 24.065143981715664,
990
+ "generation_time_ms": 0.0,
991
+ "tokens_in": 0,
992
+ "tokens_out": 0,
993
+ "retrieval_tokens": 592,
994
+ "query_tokens": 20,
995
+ "context_tokens": 572,
996
+ "judge_tokens_in": 0,
997
+ "judge_tokens_out": 0,
998
+ "judge_latency_ms": 0.0
999
+ },
1000
+ {
1001
+ "task_id": "contradiction-algorithm",
1002
+ "query": "Which JWT signing algorithm does auth-svc use \u2014 HS256 or RS256?",
1003
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.",
1004
+ "hits": [
1005
+ {
1006
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
1007
+ "score": 0.6794511051998786,
1008
+ "source": "pentatonic-memory",
1009
+ "doc_id": "readme-auth"
1010
+ },
1011
+ {
1012
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
1013
+ "score": 0.6399190695023318,
1014
+ "source": "pentatonic-memory",
1015
+ "doc_id": "runbook-billing"
1016
+ },
1017
+ {
1018
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
1019
+ "score": 0.6194263270592827,
1020
+ "source": "pentatonic-memory",
1021
+ "doc_id": "readme-billing"
1022
+ }
1023
+ ],
1024
+ "correct": true,
1025
+ "score": 1.0,
1026
+ "grading_notes": "all substrings matched",
1027
+ "search_time_ms": 22.98831701045856,
1028
+ "generation_time_ms": 0.0,
1029
+ "tokens_in": 0,
1030
+ "tokens_out": 0,
1031
+ "retrieval_tokens": 522,
1032
+ "query_tokens": 16,
1033
+ "context_tokens": 506,
1034
+ "judge_tokens_in": 0,
1035
+ "judge_tokens_out": 0,
1036
+ "judge_latency_ms": 0.0
1037
+ },
1038
+ {
1039
+ "task_id": "rubric-security-advice",
1040
+ "query": "A new engineer asks: can I add HS256 as a fallback for older clients in auth-svc? Advise them.",
1041
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192 `{access_token}`\n- `POST /v2/revoke {refresh_token}` \u2192 204\n- `GET /v2/.well-known/jwks.json` \u2014 the public JWKS for downstream services\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.",
1042
+ "hits": [
1043
+ {
1044
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
1045
+ "score": 0.6589155108095069,
1046
+ "source": "pentatonic-memory",
1047
+ "doc_id": "readme-auth"
1048
+ },
1049
+ {
1050
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
1051
+ "score": 0.6223584514709991,
1052
+ "source": "pentatonic-memory",
1053
+ "doc_id": "readme-billing"
1054
+ },
1055
+ {
1056
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
1057
+ "score": 0.6204836039661172,
1058
+ "source": "pentatonic-memory",
1059
+ "doc_id": "runbook-billing"
1060
+ }
1061
+ ],
1062
+ "correct": true,
1063
+ "score": 1.0,
1064
+ "grading_notes": "judge=1.00 \u2014 The answer explicitly says no / don't / avoid HS256 and cites that all services only hold the public key since the RS256",
1065
+ "search_time_ms": 26.804178982274607,
1066
+ "generation_time_ms": 0.0,
1067
+ "tokens_in": 0,
1068
+ "tokens_out": 0,
1069
+ "retrieval_tokens": 531,
1070
+ "query_tokens": 25,
1071
+ "context_tokens": 506,
1072
+ "judge_tokens_in": 819,
1073
+ "judge_tokens_out": 42,
1074
+ "judge_latency_ms": 1898.8169969916344
1075
+ },
1076
+ {
1077
+ "task_id": "rubric-incident-lesson",
1078
+ "query": "We saw SignatureVerificationError in billing-svc logs right after a secret rotation. What's our SOP?",
1079
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.\n\n**Action items:**\n1. Add webhook-secret rotation to the release checklist \u2014 force a rollout\n restart when the secret changes. (jordan, done 2026-03-04.)\n2. Investigate vault-secrets-operator for live-reload (priya, tracking).\n\n**Lesson:** any rotated secret that's only read at startup needs an explicit\npod-restart step. We'll codify this in the runbook.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITECTURE.md for the full picture.",
1080
+ "hits": [
1081
+ {
1082
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
1083
+ "score": 0.6796027221698456,
1084
+ "source": "pentatonic-memory",
1085
+ "doc_id": "runbook-billing"
1086
+ },
1087
+ {
1088
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
1089
+ "score": 0.6541125046797448,
1090
+ "source": "pentatonic-memory",
1091
+ "doc_id": "incident-2026-03-02-webhook"
1092
+ },
1093
+ {
1094
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
1095
+ "score": 0.6314274357753429,
1096
+ "source": "pentatonic-memory",
1097
+ "doc_id": "readme-billing"
1098
+ }
1099
+ ],
1100
+ "correct": false,
1101
+ "score": 0.5,
1102
+ "grading_notes": "judge=0.50 \u2014 The answer mentions a pod restart but does not explicitly reference that secrets only load at pod startup.",
1103
+ "search_time_ms": 24.85710399923846,
1104
+ "generation_time_ms": 0.0,
1105
+ "tokens_in": 0,
1106
+ "tokens_out": 0,
1107
+ "retrieval_tokens": 559,
1108
+ "query_tokens": 21,
1109
+ "context_tokens": 538,
1110
+ "judge_tokens_in": 831,
1111
+ "judge_tokens_out": 33,
1112
+ "judge_latency_ms": 1775.7281960248947
1113
+ }
1114
+ ]
1115
+ }