@pentatonic-ai/ai-agent-sdk 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +170 -69
  2. package/bin/__tests__/callback-server.test.js +4 -1
  3. package/bin/cli.js +41 -164
  4. package/bin/commands/config.js +251 -0
  5. package/package.json +2 -1
  6. package/packages/doctor/__tests__/detect.test.js +2 -6
  7. package/packages/doctor/src/checks/local-memory.js +164 -196
  8. package/packages/doctor/src/detect.js +11 -3
  9. package/packages/memory/src/corpus/adapters.js +104 -0
  10. package/packages/memory/src/corpus/cli.js +72 -7
  11. package/packages/memory/src/corpus/index.js +1 -1
  12. package/packages/memory-engine/.env.example +13 -0
  13. package/packages/memory-engine/README.md +131 -0
  14. package/packages/memory-engine/bench/README.md +99 -0
  15. package/packages/memory-engine/bench/scorecards-engine/agent-coding__pentatonic-baseline__20260427-142523.json +1115 -0
  16. package/packages/memory-engine/bench/scorecards-engine/chat-recall__pentatonic-baseline__20260427-142648.json +819 -0
  17. package/packages/memory-engine/bench/scorecards-engine/circular-economy__pentatonic-baseline__20260427-142757.json +1278 -0
  18. package/packages/memory-engine/bench/scorecards-engine/customer-support__pentatonic-baseline__20260427-142900.json +1018 -0
  19. package/packages/memory-engine/bench/scorecards-engine/marketplace-ops__pentatonic-baseline__20260427-142957.json +1038 -0
  20. package/packages/memory-engine/bench/scorecards-engine/product-catalogue__pentatonic-baseline__20260427-143122.json +961 -0
  21. package/packages/memory-engine/bench/scorecards-engine-via-docker/agent-coding__pentatonic-memory__20260427-161812.json +1115 -0
  22. package/packages/memory-engine/bench/scorecards-engine-via-docker/chat-recall__pentatonic-memory__20260427-161701.json +819 -0
  23. package/packages/memory-engine/bench/scorecards-engine-via-docker/circular-economy__pentatonic-memory__20260427-161713.json +1278 -0
  24. package/packages/memory-engine/bench/scorecards-engine-via-docker/customer-support__pentatonic-memory__20260427-161723.json +1018 -0
  25. package/packages/memory-engine/bench/scorecards-engine-via-docker/marketplace-ops__pentatonic-memory__20260427-161732.json +1038 -0
  26. package/packages/memory-engine/bench/scorecards-engine-via-docker/product-catalogue__pentatonic-memory__20260427-161741.json +937 -0
  27. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/agent-coding__pentatonic-memory__20260427-184718.json +1115 -0
  28. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/chat-recall__pentatonic-memory__20260427-184614.json +819 -0
  29. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/circular-economy__pentatonic-memory__20260427-184809.json +1278 -0
  30. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/customer-support__pentatonic-memory__20260427-184854.json +1018 -0
  31. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/marketplace-ops__pentatonic-memory__20260427-184929.json +1038 -0
  32. package/packages/memory-engine/bench/scorecards-engine-via-l2-7-layer-populated/product-catalogue__pentatonic-memory__20260427-185015.json +961 -0
  33. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/agent-coding__pentatonic-memory__20260427-175252.json +1115 -0
  34. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/chat-recall__pentatonic-memory__20260427-175312.json +819 -0
  35. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/circular-economy__pentatonic-memory__20260427-175335.json +1278 -0
  36. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/customer-support__pentatonic-memory__20260427-175355.json +1018 -0
  37. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/marketplace-ops__pentatonic-memory__20260427-175413.json +1038 -0
  38. package/packages/memory-engine/bench/scorecards-engine-via-l2-empty-layers/product-catalogue__pentatonic-memory__20260427-175430.json +883 -0
  39. package/packages/memory-engine/bench/scorecards-engine-via-shim/agent-coding__pentatonic-memory__20260427-155409.json +1115 -0
  40. package/packages/memory-engine/bench/scorecards-engine-via-shim/chat-recall__pentatonic-memory__20260427-155421.json +819 -0
  41. package/packages/memory-engine/bench/scorecards-engine-via-shim/circular-economy__pentatonic-memory__20260427-155433.json +1278 -0
  42. package/packages/memory-engine/bench/scorecards-engine-via-shim/customer-support__pentatonic-memory__20260427-155443.json +1018 -0
  43. package/packages/memory-engine/bench/scorecards-engine-via-shim/marketplace-ops__pentatonic-memory__20260427-155453.json +1038 -0
  44. package/packages/memory-engine/bench/scorecards-engine-via-shim/product-catalogue__pentatonic-memory__20260427-155503.json +937 -0
  45. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory-latest__20260427-145103.json +1115 -0
  46. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/agent-coding__pentatonic-memory__20260427-144909.json +1115 -0
  47. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory-latest__20260427-145153.json +819 -0
  48. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/chat-recall__pentatonic-memory__20260427-145120.json +542 -0
  49. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory-latest__20260427-145313.json +1278 -0
  50. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/circular-economy__pentatonic-memory__20260427-145207.json +894 -0
  51. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory-latest__20260427-145412.json +1018 -0
  52. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/customer-support__pentatonic-memory__20260427-145327.json +680 -0
  53. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory-latest__20260427-145517.json +1038 -0
  54. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/marketplace-ops__pentatonic-memory__20260427-145422.json +693 -0
  55. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory-latest__20260427-145616.json +961 -0
  56. package/packages/memory-engine/bench/scorecards-pentatonic-baseline/product-catalogue__pentatonic-memory__20260427-145528.json +727 -0
  57. package/packages/memory-engine/compat/Dockerfile +11 -0
  58. package/packages/memory-engine/compat/server.py +680 -0
  59. package/packages/memory-engine/docker-compose.yml +243 -0
  60. package/packages/memory-engine/docs/MIGRATION.md +178 -0
  61. package/packages/memory-engine/docs/RUNBOOK-AWS.md +375 -0
  62. package/packages/memory-engine/docs/why-v05-underperforms.md +138 -0
  63. package/packages/memory-engine/engine/README.md +52 -0
  64. package/packages/memory-engine/engine/l2-hybridrag-proxy.py +1543 -0
  65. package/packages/memory-engine/engine/l5-comms-layer.py +663 -0
  66. package/packages/memory-engine/engine/l6-document-store.py +1018 -0
  67. package/packages/memory-engine/engine/services/l2/Dockerfile +41 -0
  68. package/packages/memory-engine/engine/services/l2/init_databases.py +81 -0
  69. package/packages/memory-engine/engine/services/l2/l2-hybridrag-proxy.py +1543 -0
  70. package/packages/memory-engine/engine/services/l4/Dockerfile +15 -0
  71. package/packages/memory-engine/engine/services/l4/server.py +235 -0
  72. package/packages/memory-engine/engine/services/l5/Dockerfile +9 -0
  73. package/packages/memory-engine/engine/services/l5/l5-comms-layer.py +678 -0
  74. package/packages/memory-engine/engine/services/l6/Dockerfile +11 -0
  75. package/packages/memory-engine/engine/services/l6/l6-document-store.py +1016 -0
  76. package/packages/memory-engine/engine/services/nv-embed/Dockerfile +28 -0
  77. package/packages/memory-engine/engine/services/nv-embed/server.py +152 -0
  78. package/packages/memory-engine/pme_memory/__init__.py +0 -0
  79. package/packages/memory-engine/pme_memory/__main__.py +129 -0
  80. package/packages/memory-engine/pme_memory/artifacts.py +95 -0
  81. package/packages/memory-engine/pme_memory/embed.py +74 -0
  82. package/packages/memory-engine/pme_memory/health.py +36 -0
  83. package/packages/memory-engine/pme_memory/hygiene.py +159 -0
  84. package/packages/memory-engine/pme_memory/indexer.py +200 -0
  85. package/packages/memory-engine/pme_memory/needs.py +55 -0
  86. package/packages/memory-engine/pme_memory/provenance.py +80 -0
  87. package/packages/memory-engine/pme_memory/scoring.py +168 -0
  88. package/packages/memory-engine/pme_memory/search.py +52 -0
  89. package/packages/memory-engine/pme_memory/store.py +86 -0
  90. package/packages/memory-engine/pme_memory/synthesis.py +114 -0
  91. package/packages/memory-engine/pyproject.toml +65 -0
  92. package/packages/memory-engine/scripts/kg-extractor.py +557 -0
  93. package/packages/memory-engine/scripts/kg-preflexor-v2.py +738 -0
  94. package/packages/memory-engine/tests/test_api_contract.sh +57 -0
@@ -0,0 +1,1115 @@
1
+ {
2
+ "bench": "agent-coding",
3
+ "stack": "pentatonic-baseline",
4
+ "n_tasks": 22,
5
+ "n_correct": 20,
6
+ "accuracy": 0.9090909090909091,
7
+ "mean_score": 0.9545454545454546,
8
+ "p50_search_ms": 1267.9544819984585,
9
+ "p95_search_ms": 1691.2675759536796,
10
+ "total_tokens_in": 0,
11
+ "total_tokens_out": 0,
12
+ "total_usd": 0.0,
13
+ "by_tag": {
14
+ "api-signature": {
15
+ "n": 3,
16
+ "mean_score": 1.0,
17
+ "accuracy": 1.0
18
+ },
19
+ "bug-fix-recall": {
20
+ "n": 3,
21
+ "mean_score": 1.0,
22
+ "accuracy": 1.0
23
+ },
24
+ "deprecation": {
25
+ "n": 3,
26
+ "mean_score": 1.0,
27
+ "accuracy": 1.0
28
+ },
29
+ "config-recall": {
30
+ "n": 3,
31
+ "mean_score": 1.0,
32
+ "accuracy": 1.0
33
+ },
34
+ "pr-rationale": {
35
+ "n": 2,
36
+ "mean_score": 1.0,
37
+ "accuracy": 1.0
38
+ },
39
+ "cross-file-refactor": {
40
+ "n": 2,
41
+ "mean_score": 1.0,
42
+ "accuracy": 1.0
43
+ },
44
+ "stack-trace-match": {
45
+ "n": 1,
46
+ "mean_score": 1.0,
47
+ "accuracy": 1.0
48
+ },
49
+ "temporal": {
50
+ "n": 2,
51
+ "mean_score": 1.0,
52
+ "accuracy": 1.0
53
+ },
54
+ "contradiction": {
55
+ "n": 1,
56
+ "mean_score": 1.0,
57
+ "accuracy": 1.0
58
+ },
59
+ "rubric": {
60
+ "n": 2,
61
+ "mean_score": 0.5,
62
+ "accuracy": 0.0
63
+ }
64
+ },
65
+ "extra": {
66
+ "ingest_ms": 63144.58434400149,
67
+ "grading": "substring",
68
+ "limit": 3,
69
+ "tokens": {
70
+ "corpus_tokens": 5164,
71
+ "query_tokens": 339,
72
+ "context_tokens": 9063,
73
+ "retrieval_tokens": 9402,
74
+ "naive_tokens": 113947,
75
+ "saved_tokens": 104545,
76
+ "reduction_pct": 0.9174879549264131,
77
+ "mean_retrieval_tokens_per_task": 427.3636363636364,
78
+ "tokenizer": "cl100k_base",
79
+ "per_task": {
80
+ "api-invoice-signature": {
81
+ "query": 18,
82
+ "context": 455,
83
+ "retrieval": 473,
84
+ "judge_in": 0,
85
+ "judge_out": 0,
86
+ "judge_latency_ms": 0.0
87
+ },
88
+ "api-search-shape": {
89
+ "query": 16,
90
+ "context": 451,
91
+ "retrieval": 467,
92
+ "judge_in": 0,
93
+ "judge_out": 0,
94
+ "judge_latency_ms": 0.0
95
+ },
96
+ "api-invoice-amount-type": {
97
+ "query": 16,
98
+ "context": 430,
99
+ "retrieval": 446,
100
+ "judge_in": 0,
101
+ "judge_out": 0,
102
+ "judge_latency_ms": 0.0
103
+ },
104
+ "bugfix-stripe-rounding": {
105
+ "query": 10,
106
+ "context": 330,
107
+ "retrieval": 340,
108
+ "judge_in": 0,
109
+ "judge_out": 0,
110
+ "judge_latency_ms": 0.0
111
+ },
112
+ "bugfix-webhook-dup": {
113
+ "query": 10,
114
+ "context": 324,
115
+ "retrieval": 334,
116
+ "judge_in": 0,
117
+ "judge_out": 0,
118
+ "judge_latency_ms": 0.0
119
+ },
120
+ "bugfix-rounding-direction": {
121
+ "query": 19,
122
+ "context": 376,
123
+ "retrieval": 395,
124
+ "judge_in": 0,
125
+ "judge_out": 0,
126
+ "judge_latency_ms": 0.0
127
+ },
128
+ "deprecation-v1": {
129
+ "query": 11,
130
+ "context": 423,
131
+ "retrieval": 434,
132
+ "judge_in": 0,
133
+ "judge_out": 0,
134
+ "judge_latency_ms": 0.0
135
+ },
136
+ "deprecation-hs256": {
137
+ "query": 11,
138
+ "context": 431,
139
+ "retrieval": 442,
140
+ "judge_in": 0,
141
+ "judge_out": 0,
142
+ "judge_latency_ms": 0.0
143
+ },
144
+ "deprecation-search-tuple": {
145
+ "query": 13,
146
+ "context": 412,
147
+ "retrieval": 425,
148
+ "judge_in": 0,
149
+ "judge_out": 0,
150
+ "judge_latency_ms": 0.0
151
+ },
152
+ "config-retry-policy-base": {
153
+ "query": 15,
154
+ "context": 410,
155
+ "retrieval": 425,
156
+ "judge_in": 0,
157
+ "judge_out": 0,
158
+ "judge_latency_ms": 0.0
159
+ },
160
+ "config-session-ttl": {
161
+ "query": 14,
162
+ "context": 445,
163
+ "retrieval": 459,
164
+ "judge_in": 0,
165
+ "judge_out": 0,
166
+ "judge_latency_ms": 0.0
167
+ },
168
+ "config-search-hybrid-weight": {
169
+ "query": 11,
170
+ "context": 451,
171
+ "retrieval": 462,
172
+ "judge_in": 0,
173
+ "judge_out": 0,
174
+ "judge_latency_ms": 0.0
175
+ },
176
+ "pr-rationale-retry": {
177
+ "query": 19,
178
+ "context": 409,
179
+ "retrieval": 428,
180
+ "judge_in": 0,
181
+ "judge_out": 0,
182
+ "judge_latency_ms": 0.0
183
+ },
184
+ "pr-rationale-jwt-cleanup": {
185
+ "query": 13,
186
+ "context": 350,
187
+ "retrieval": 363,
188
+ "judge_in": 0,
189
+ "judge_out": 0,
190
+ "judge_latency_ms": 0.0
191
+ },
192
+ "cross-ref-invoicing": {
193
+ "query": 14,
194
+ "context": 472,
195
+ "retrieval": 486,
196
+ "judge_in": 0,
197
+ "judge_out": 0,
198
+ "judge_latency_ms": 0.0
199
+ },
200
+ "cross-ref-ratelimit": {
201
+ "query": 15,
202
+ "context": 404,
203
+ "retrieval": 419,
204
+ "judge_in": 0,
205
+ "judge_out": 0,
206
+ "judge_latency_ms": 0.0
207
+ },
208
+ "stack-trace-webhook": {
209
+ "query": 21,
210
+ "context": 394,
211
+ "retrieval": 415,
212
+ "judge_in": 0,
213
+ "judge_out": 0,
214
+ "judge_latency_ms": 0.0
215
+ },
216
+ "temporal-numpy": {
217
+ "query": 11,
218
+ "context": 431,
219
+ "retrieval": 442,
220
+ "judge_in": 0,
221
+ "judge_out": 0,
222
+ "judge_latency_ms": 0.0
223
+ },
224
+ "temporal-on-call": {
225
+ "query": 20,
226
+ "context": 432,
227
+ "retrieval": 452,
228
+ "judge_in": 0,
229
+ "judge_out": 0,
230
+ "judge_latency_ms": 0.0
231
+ },
232
+ "contradiction-algorithm": {
233
+ "query": 16,
234
+ "context": 396,
235
+ "retrieval": 412,
236
+ "judge_in": 0,
237
+ "judge_out": 0,
238
+ "judge_latency_ms": 0.0
239
+ },
240
+ "rubric-security-advice": {
241
+ "query": 25,
242
+ "context": 444,
243
+ "retrieval": 469,
244
+ "judge_in": 757,
245
+ "judge_out": 37,
246
+ "judge_latency_ms": 1715.3592600226402
247
+ },
248
+ "rubric-incident-lesson": {
249
+ "query": 21,
250
+ "context": 393,
251
+ "retrieval": 414,
252
+ "judge_in": 687,
253
+ "judge_out": 35,
254
+ "judge_latency_ms": 1701.407715022564
255
+ }
256
+ },
257
+ "judge_tokens_in": 1444,
258
+ "judge_tokens_out": 72,
259
+ "judge_calls": 2,
260
+ "judge_mean_latency_ms": 1708.383487522602
261
+ },
262
+ "cost_usd": {
263
+ "assumed_completion_tokens_per_task": 100,
264
+ "rates": {
265
+ "input_per_1k": 0.0025,
266
+ "output_per_1k": 0.01,
267
+ "model": "gpt-4o"
268
+ },
269
+ "retrieval_usd_in": 0.023504999999999998,
270
+ "retrieval_usd_out": 0.022,
271
+ "retrieval_usd_total": 0.045505,
272
+ "naive_usd_total": 0.3068675,
273
+ "saved_usd": 0.2613625,
274
+ "saved_usd_per_1k_tasks": 11.880113636363637
275
+ }
276
+ },
277
+ "task_results": [
278
+ {
279
+ "task_id": "api-invoice-signature",
280
+ "query": "What's the request body shape for POST /v2/invoices on billing-svc?",
281
+ "answer": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consume\n---\n## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vecto",
282
+ "hits": [
283
+ {
284
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
285
+ "score": 0.7466,
286
+ "source": "pentatonic-baseline:L0_workspace_bm25",
287
+ "doc_id": "api-billing-invoice"
288
+ },
289
+ {
290
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
291
+ "score": 0.6158,
292
+ "source": "pentatonic-baseline:L0_workspace_bm25",
293
+ "doc_id": "cross-ref-invoicing"
294
+ },
295
+ {
296
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
297
+ "score": 0.6023,
298
+ "source": "pentatonic-baseline:L0_workspace_bm25",
299
+ "doc_id": "api-search-query"
300
+ }
301
+ ],
302
+ "correct": true,
303
+ "score": 1.0,
304
+ "grading_notes": "all substrings matched",
305
+ "search_time_ms": 1254.5839749800507,
306
+ "generation_time_ms": 0.0,
307
+ "tokens_in": 0,
308
+ "tokens_out": 0,
309
+ "retrieval_tokens": 473,
310
+ "query_tokens": 18,
311
+ "context_tokens": 455,
312
+ "judge_tokens_in": 0,
313
+ "judge_tokens_out": 0,
314
+ "judge_latency_ms": 0.0
315
+ },
316
+ {
317
+ "task_id": "api-search-shape",
318
+ "query": "What's the response shape of POST /v3/search on search-svc?",
319
+ "answer": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vecto\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (defaul\n---\n# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = None, hybrid_weight: float = 0.6) -> list[dict]:\n # Post-c3d4e5f6: returns list[dict] not list[t",
320
+ "hits": [
321
+ {
322
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
323
+ "score": 0.6658,
324
+ "source": "pentatonic-baseline:L0_workspace_bm25",
325
+ "doc_id": "api-search-query"
326
+ },
327
+ {
328
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
329
+ "score": 0.6209,
330
+ "source": "pentatonic-baseline:L0_workspace_bm25",
331
+ "doc_id": "readme-search"
332
+ },
333
+ {
334
+ "text": "# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = Non",
335
+ "score": 0.6005,
336
+ "source": "pentatonic-baseline:L0_workspace_bm25",
337
+ "doc_id": "code-search-client"
338
+ }
339
+ ],
340
+ "correct": true,
341
+ "score": 1.0,
342
+ "grading_notes": "all substrings matched",
343
+ "search_time_ms": 1246.5275280119386,
344
+ "generation_time_ms": 0.0,
345
+ "tokens_in": 0,
346
+ "tokens_out": 0,
347
+ "retrieval_tokens": 467,
348
+ "query_tokens": 16,
349
+ "context_tokens": 451,
350
+ "judge_tokens_in": 0,
351
+ "judge_tokens_out": 0,
352
+ "judge_latency_ms": 0.0
353
+ },
354
+ {
355
+ "task_id": "api-invoice-amount-type",
356
+ "query": "When calling POST /v2/invoices, must amount be integer or float?",
357
+ "answer": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers:**\n- `Idempotency-Key` (optional, required for retries)\n\nAmounts MUST be integers (cents). Passing\n---\n# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (never overcharge)\n assert isinstance(amount_cents, int)\n assert amount_cents == 1299\n\ndef test_\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign *",
358
+ "hits": [
359
+ {
360
+ "text": "## billing-svc: POST /v2/invoices\n\nCreate an invoice for a customer.\n\n**Request body:**\n```json\n{\"customer_id\": \"cus_abc\", \"amount_cents\": 1299, \"currency\": \"gbp\", \"description\": \"Annual plan\"}\n```\n\n**Responses:**\n- `201` \u2014 invoice created: `{\"id\": \"inv_xyz\", \"status\": \"draft\", \"pdf_url\": \"...\"}`\n- `400` \u2014 validation error\n- `404` \u2014 customer not found\n- `409` \u2014 duplicate idempotency_key\n\n**Headers",
361
+ "score": 0.6832,
362
+ "source": "pentatonic-baseline:L0_workspace_bm25",
363
+ "doc_id": "api-billing-invoice"
364
+ },
365
+ {
366
+ "text": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (n",
367
+ "score": 0.649,
368
+ "source": "pentatonic-baseline:L0_workspace_bm25",
369
+ "doc_id": "test-payments-rounding"
370
+ },
371
+ {
372
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
373
+ "score": 0.6122,
374
+ "source": "pentatonic-baseline:L0_workspace_bm25",
375
+ "doc_id": "code-stripe-client"
376
+ }
377
+ ],
378
+ "correct": true,
379
+ "score": 1.0,
380
+ "grading_notes": "all substrings matched",
381
+ "search_time_ms": 1750.7977380009834,
382
+ "generation_time_ms": 0.0,
383
+ "tokens_in": 0,
384
+ "tokens_out": 0,
385
+ "retrieval_tokens": 446,
386
+ "query_tokens": 16,
387
+ "context_tokens": 430,
388
+ "judge_tokens_in": 0,
389
+ "judge_tokens_out": 0,
390
+ "judge_latency_ms": 0.0
391
+ },
392
+ {
393
+ "task_id": "bugfix-stripe-rounding",
394
+ "query": "Which commit fixed the stripe float-amount bug?",
395
+ "answer": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.\n---\ncommit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.\n---\n# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (never overcharge)\n assert isinstance(amount_cents, int)\n assert amount_cents == 1299\n\ndef test_",
396
+ "hits": [
397
+ {
398
+ "text": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
399
+ "score": 0.7629,
400
+ "source": "pentatonic-baseline",
401
+ "doc_id": "commit-a1b2c3"
402
+ },
403
+ {
404
+ "text": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
405
+ "score": 0.6804,
406
+ "source": "pentatonic-baseline:L0_workspace_bm25",
407
+ "doc_id": "commit-a1b2c3"
408
+ },
409
+ {
410
+ "text": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (n",
411
+ "score": 0.6632,
412
+ "source": "pentatonic-baseline:L0_workspace_bm25",
413
+ "doc_id": "test-payments-rounding"
414
+ }
415
+ ],
416
+ "correct": true,
417
+ "score": 1.0,
418
+ "grading_notes": "all substrings matched",
419
+ "search_time_ms": 1239.0598890196998,
420
+ "generation_time_ms": 0.0,
421
+ "tokens_in": 0,
422
+ "tokens_out": 0,
423
+ "retrieval_tokens": 340,
424
+ "query_tokens": 10,
425
+ "context_tokens": 330,
426
+ "judge_tokens_in": 0,
427
+ "judge_tokens_out": 0,
428
+ "judge_latency_ms": 0.0
429
+ },
430
+ {
431
+ "task_id": "bugfix-webhook-dup",
432
+ "query": "How did we fix the duplicate Stripe webhook handling?",
433
+ "answer": "Issue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #76. Closed 2026-02-26.\n---\nIssue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #76. Closed 2026-02-26.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.",
434
+ "hits": [
435
+ {
436
+ "text": "Issue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #7",
437
+ "score": 0.7903,
438
+ "source": "pentatonic-baseline",
439
+ "doc_id": "issue-88-webhook-dup"
440
+ },
441
+ {
442
+ "text": "Issue #88: Stripe webhook handler processes duplicate events\nReporter: support\nStatus: resolved\n\nProd saw the same event_id land in webhook_events twice when Stripe retried\na 500. Handler was non-idempotent \u2014 it assumed first-delivery. Fix:\nadded unique constraint on webhook_events.stripe_event_id; handler now\nattempts INSERT ON CONFLICT DO NOTHING and treats the duplicate as a no-op.\nMerged PR #7",
443
+ "score": 0.7427,
444
+ "source": "pentatonic-baseline:L0_workspace_bm25",
445
+ "doc_id": "issue-88-webhook-dup"
446
+ },
447
+ {
448
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
449
+ "score": 0.6326,
450
+ "source": "pentatonic-baseline",
451
+ "doc_id": "runbook-billing"
452
+ }
453
+ ],
454
+ "correct": true,
455
+ "score": 1.0,
456
+ "grading_notes": "all substrings matched",
457
+ "search_time_ms": 1166.5845589886885,
458
+ "generation_time_ms": 0.0,
459
+ "tokens_in": 0,
460
+ "tokens_out": 0,
461
+ "retrieval_tokens": 334,
462
+ "query_tokens": 10,
463
+ "context_tokens": 324,
464
+ "judge_tokens_in": 0,
465
+ "judge_tokens_out": 0,
466
+ "judge_latency_ms": 0.0
467
+ },
468
+ {
469
+ "task_id": "bugfix-rounding-direction",
470
+ "query": "When converting a float amount to cents, do we round up, down, or to nearest?",
471
+ "answer": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (never overcharge)\n assert isinstance(amount_cents, int)\n assert amount_cents == 1299\n\ndef test_\n---\n# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero so we never overcharge (see a1b2c3d4).\n sign = 1 if amount >= 0 else -1\n return sign *\n---\ncommit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
472
+ "hits": [
473
+ {
474
+ "text": "# tests/test_payments.py\n\n```python\nimport math\nfrom billing.stripe_client import StripeClient\n\ndef test_rounding():\n # Reproduction of the float-amount bug fixed in a1b2c3d4\n client = StripeClient(api_key='sk_test_xxx')\n # Amount comes in as a float due to upstream division\n amount_float = 12.99001\n amount_cents = client._to_cents(amount_float)\n # Must be an int, rounded DOWN (n",
475
+ "score": 0.6878,
476
+ "source": "pentatonic-baseline:L0_workspace_bm25",
477
+ "doc_id": "test-payments-rounding"
478
+ },
479
+ {
480
+ "text": "# billing/stripe_client.py (excerpt)\n\n```python\nimport math\nimport stripe\n\nclass StripeClient:\n def __init__(self, api_key: str, retry_policy: dict | None = None):\n stripe.api_key = api_key\n self.retry_policy = retry_policy or {'base_ms': 200, 'max_ms': 8000, 'max_retries': 5, 'jitter': True}\n\n @staticmethod\n def _to_cents(amount: float) -> int:\n # Floor towards zero ",
481
+ "score": 0.6293,
482
+ "source": "pentatonic-baseline:L0_workspace_bm25",
483
+ "doc_id": "code-stripe-client"
484
+ },
485
+ {
486
+ "text": "commit a1b2c3d4\nAuthor: jordan@example.com\nDate: 2026-02-10T14:30:00Z\n\nfix(payments): round stripe amounts to cents before submit\n\nStripe.charges.create() was receiving floats like 12.99001, which the API\nrejects with amount_integer_required. Added `math.floor(amount_cents)`\nbefore the call. Unit test added in tests/test_payments.py::test_rounding.",
487
+ "score": 0.6111,
488
+ "source": "pentatonic-baseline:L0_workspace_bm25",
489
+ "doc_id": "commit-a1b2c3"
490
+ }
491
+ ],
492
+ "correct": true,
493
+ "score": 1.0,
494
+ "grading_notes": "all substrings matched",
495
+ "search_time_ms": 1257.2615180106368,
496
+ "generation_time_ms": 0.0,
497
+ "tokens_in": 0,
498
+ "tokens_out": 0,
499
+ "retrieval_tokens": 395,
500
+ "query_tokens": 19,
501
+ "context_tokens": 376,
502
+ "judge_tokens_in": 0,
503
+ "judge_tokens_out": 0,
504
+ "judge_latency_ms": 0.0
505
+ },
506
+ {
507
+ "task_id": "deprecation-v1",
508
+ "query": "Are the /api/v1/ endpoints still supported?",
509
+ "answer": "commit d4e5f6a7\nAuthor: jordan@example.com\nDate: 2026-02-17T16:20:00Z\n\ndeprecate: remove legacy /api/v1/ endpoints\n\nAll /api/v1/* routes removed. /api/v2/ has been the default for 9 months.\nCallers still on v1 will get HTTP 410 Gone with a header pointing at the\nv2 equivalent. Full mapping in docs/V1_V2_MIGRATION.md. Sunset announced\n2025-11-01, enforced now.\n---\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all from an old mobile app build. told them to upgrade. no internal services affected.\n---\n<!-- doc_id: chat-deprecation -->\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all from an old mobile app build. told them to upgrade. no internal se",
510
+ "hits": [
511
+ {
512
+ "text": "commit d4e5f6a7\nAuthor: jordan@example.com\nDate: 2026-02-17T16:20:00Z\n\ndeprecate: remove legacy /api/v1/ endpoints\n\nAll /api/v1/* routes removed. /api/v2/ has been the default for 9 months.\nCallers still on v1 will get HTTP 410 Gone with a header pointing at the\nv2 equivalent. Full mapping in docs/V1_V2_MIGRATION.md. Sunset announced\n2025-11-01, enforced now.",
513
+ "score": 0.6597,
514
+ "source": "pentatonic-baseline:L0_workspace_bm25",
515
+ "doc_id": "commit-d4e5f6"
516
+ },
517
+ {
518
+ "text": "[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all ",
519
+ "score": 0.6515,
520
+ "source": "pentatonic-baseline:L0_workspace_bm25",
521
+ "doc_id": "chat-deprecation"
522
+ },
523
+ {
524
+ "text": "<!-- doc_id: chat-deprecation -->\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gon",
525
+ "score": 0.4796,
526
+ "source": "pentatonic-baseline:L6_documents",
527
+ "doc_id": "chat-deprecation"
528
+ }
529
+ ],
530
+ "correct": true,
531
+ "score": 1.0,
532
+ "grading_notes": "all substrings matched",
533
+ "search_time_ms": 1249.9748029804323,
534
+ "generation_time_ms": 0.0,
535
+ "tokens_in": 0,
536
+ "tokens_out": 0,
537
+ "retrieval_tokens": 434,
538
+ "query_tokens": 11,
539
+ "context_tokens": 423,
540
+ "judge_tokens_in": 0,
541
+ "judge_tokens_out": 0,
542
+ "judge_latency_ms": 0.0
543
+ },
544
+ {
545
+ "task_id": "deprecation-hs256",
546
+ "query": "Can auth-svc still accept HS256 JWTs?",
547
+ "answer": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192\n---\n[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retries might want to turn it off for tests.\npriya: yep, added. merging.\n\n[2026-02-19 Slack #payments-dev]",
548
+ "hits": [
549
+ {
550
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
551
+ "score": 0.6757,
552
+ "source": "pentatonic-baseline:L0_workspace_bm25",
553
+ "doc_id": "commit-b2c3d4"
554
+ },
555
+ {
556
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
557
+ "score": 0.6509,
558
+ "source": "pentatonic-baseline:L0_workspace_bm25",
559
+ "doc_id": "readme-auth"
560
+ },
561
+ {
562
+ "text": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retrie",
563
+ "score": 0.5016,
564
+ "source": "pentatonic-baseline:L0_workspace_bm25",
565
+ "doc_id": "chat-pr-47-review"
566
+ }
567
+ ],
568
+ "correct": true,
569
+ "score": 1.0,
570
+ "grading_notes": "all substrings matched",
571
+ "search_time_ms": 1340.946468000766,
572
+ "generation_time_ms": 0.0,
573
+ "tokens_in": 0,
574
+ "tokens_out": 0,
575
+ "retrieval_tokens": 442,
576
+ "query_tokens": 11,
577
+ "context_tokens": 431,
578
+ "judge_tokens_in": 0,
579
+ "judge_tokens_out": 0,
580
+ "judge_latency_ms": 0.0
581
+ },
582
+ {
583
+ "task_id": "deprecation-search-tuple",
584
+ "query": "What does SearchClient.search() return today \u2014 tuples or dicts?",
585
+ "answer": "commit c3d4e5f6\nAuthor: alex@example.com\nDate: 2026-02-14T09:00:00Z\n\nfeat(search): switch ANN backend from faiss to vector-db\n\nfaiss-cpu was hitting ~4s p99 on 2M vectors. Vector-DB HNSW on the same\ncorpus gives 40ms p99. Breaking: the SearchClient.search() return shape\nchanged \u2014 was list[tuple[int, float]], now list[dict] with keys {id, score,\npayload}. Migration guide in docs/SEARCH_MIGRATION.md.\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (defaul\n---\n# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = None, hybrid_weight: float = 0.6) -> list[dict]:\n # Post-c3d4e5f6: returns list[dict] not list[t",
586
+ "hits": [
587
+ {
588
+ "text": "commit c3d4e5f6\nAuthor: alex@example.com\nDate: 2026-02-14T09:00:00Z\n\nfeat(search): switch ANN backend from faiss to vector-db\n\nfaiss-cpu was hitting ~4s p99 on 2M vectors. Vector-DB HNSW on the same\ncorpus gives 40ms p99. Breaking: the SearchClient.search() return shape\nchanged \u2014 was list[tuple[int, float]], now list[dict] with keys {id, score,\npayload}. Migration guide in docs/SEARCH_MIGRATION.md.",
589
+ "score": 0.6144,
590
+ "source": "pentatonic-baseline:L0_workspace_bm25",
591
+ "doc_id": "commit-c3d4e5"
592
+ },
593
+ {
594
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
595
+ "score": 0.5357,
596
+ "source": "pentatonic-baseline:L0_workspace_bm25",
597
+ "doc_id": "readme-search"
598
+ },
599
+ {
600
+ "text": "# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = Non",
601
+ "score": 0.514,
602
+ "source": "pentatonic-baseline:L0_workspace_bm25",
603
+ "doc_id": "code-search-client"
604
+ }
605
+ ],
606
+ "correct": true,
607
+ "score": 1.0,
608
+ "grading_notes": "all substrings matched",
609
+ "search_time_ms": 1271.0949099855497,
610
+ "generation_time_ms": 0.0,
611
+ "tokens_in": 0,
612
+ "tokens_out": 0,
613
+ "retrieval_tokens": 425,
614
+ "query_tokens": 13,
615
+ "context_tokens": 412,
616
+ "judge_tokens_in": 0,
617
+ "judge_tokens_out": 0,
618
+ "judge_latency_ms": 0.0
619
+ },
620
+ {
621
+ "task_id": "config-retry-policy-base",
622
+ "query": "What's the default stripe retry base delay in ms on billing-svc?",
623
+ "answer": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoin\n---\n[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retries might want to turn it off for tests.\npriya: yep, added. merging.\n\n[2026-02-19 Slack #payments-dev]",
624
+ "hits": [
625
+ {
626
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
627
+ "score": 0.7033,
628
+ "source": "pentatonic-baseline:L0_workspace_bm25",
629
+ "doc_id": "pr-47-retry"
630
+ },
631
+ {
632
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
633
+ "score": 0.672,
634
+ "source": "pentatonic-baseline:L0_workspace_bm25",
635
+ "doc_id": "readme-billing"
636
+ },
637
+ {
638
+ "text": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retrie",
639
+ "score": 0.6684,
640
+ "source": "pentatonic-baseline:L0_workspace_bm25",
641
+ "doc_id": "chat-pr-47-review"
642
+ }
643
+ ],
644
+ "correct": true,
645
+ "score": 1.0,
646
+ "grading_notes": "all substrings matched",
647
+ "search_time_ms": 1263.2199359941296,
648
+ "generation_time_ms": 0.0,
649
+ "tokens_in": 0,
650
+ "tokens_out": 0,
651
+ "retrieval_tokens": 425,
652
+ "query_tokens": 15,
653
+ "context_tokens": 410,
654
+ "judge_tokens_in": 0,
655
+ "judge_tokens_out": 0,
656
+ "judge_latency_ms": 0.0
657
+ },
658
+ {
659
+ "task_id": "config-session-ttl",
660
+ "query": "What's the default SESSION_TTL_MINUTES on auth-svc?",
661
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192\n---\n# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session_ttl_minutes: 60\n refresh_ttl_days: 30\n\nsearch:\n url: http://search-svc.internal:8080\n api_key:\n---\ncommit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.",
662
+ "hits": [
663
+ {
664
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
665
+ "score": 0.6372,
666
+ "source": "pentatonic-baseline:L0_workspace_bm25",
667
+ "doc_id": "readme-auth"
668
+ },
669
+ {
670
+ "text": "# config/production.yaml\n\nbilling:\n stripe:\n api_key: ${STRIPE_API_KEY}\n webhook_secret: ${STRIPE_WEBHOOK_SECRET}\n retry_policy:\n base_ms: 200\n max_ms: 8000\n max_retries: 5\n jitter: true\n\nauth:\n jwt:\n algorithm: RS256\n private_key_path: /etc/secrets/jwt-private.pem\n public_key_path: /etc/secrets/jwt-public.pem\n issuer: https://auth.example.com\n session",
671
+ "score": 0.5543,
672
+ "source": "pentatonic-baseline:L0_workspace_bm25",
673
+ "doc_id": "config-sample"
674
+ },
675
+ {
676
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
677
+ "score": 0.4654,
678
+ "source": "pentatonic-baseline:L0_workspace_bm25",
679
+ "doc_id": "commit-b2c3d4"
680
+ }
681
+ ],
682
+ "correct": true,
683
+ "score": 1.0,
684
+ "grading_notes": "all substrings matched",
685
+ "search_time_ms": 1263.2656499918085,
686
+ "generation_time_ms": 0.0,
687
+ "tokens_in": 0,
688
+ "tokens_out": 0,
689
+ "retrieval_tokens": 459,
690
+ "query_tokens": 14,
691
+ "context_tokens": 445,
692
+ "judge_tokens_in": 0,
693
+ "judge_tokens_out": 0,
694
+ "judge_latency_ms": 0.0
695
+ },
696
+ {
697
+ "task_id": "config-search-hybrid-weight",
698
+ "query": "What's the default hybrid_weight for search-svc?",
699
+ "answer": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6 vector + 0.4 BM25) \u2014 override with\n `hybrid_weight` in the body (0.0 = pure BM25, 1.0 = pure vecto\n---\n# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old service, it's using the legacy SDK \u2014 upgrade to\nsearch-sdk >= 3.0.\n\n## Environment\n- `SEARCH_URL` (defaul\n---\n# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = None, hybrid_weight: float = 0.6) -> list[dict]:\n # Post-c3d4e5f6: returns list[dict] not list[t",
700
+ "hits": [
701
+ {
702
+ "text": "## search-svc: POST /v3/search\n\nVector + keyword hybrid search.\n\n**Request body:**\n```json\n{\"query\": \"widget\", \"collection\": \"products\", \"limit\": 20, \"filter\": {\"category\": \"electronics\"}}\n```\n\n**Responses:**\n- `200` \u2014 `{\"results\": [{\"id\": \"p_123\", \"score\": 0.81, \"payload\": {...}}, ...], \"total\": 42}`\n- `400` \u2014 malformed filter\n- `429` \u2014 rate-limited\n\n**Notes:**\n- Results are ranked by hybrid (0.6",
703
+ "score": 0.5876,
704
+ "source": "pentatonic-baseline:L0_workspace_bm25",
705
+ "doc_id": "api-search-query"
706
+ },
707
+ {
708
+ "text": "# search-svc README\n\nOwner: alex. On-call: search-oncall@example.com.\n\n## SDK\n```python\nfrom search_sdk import SearchClient\nclient = SearchClient(url=os.environ['SEARCH_URL'])\nresults = client.search(query='widget', limit=20)\nfor r in results:\n print(r['id'], r['score'], r['payload']['title'])\n```\nNOTE: pre-c3d4e5f6 the return shape was `list[tuple[int, float]]`. If you\nsee that in an old servi",
709
+ "score": 0.5703,
710
+ "source": "pentatonic-baseline:L0_workspace_bm25",
711
+ "doc_id": "readme-search"
712
+ },
713
+ {
714
+ "text": "# search_sdk/client.py (excerpt)\n\n```python\nimport httpx\n\nclass SearchClient:\n def __init__(self, url: str, api_key: str | None = None):\n self.url = url.rstrip('/')\n self._http = httpx.Client(headers={'Authorization': f'Bearer {api_key}'} if api_key else {})\n\n def search(self, query: str, *, collection: str = 'products', limit: int = 20,\n filter: dict | None = Non",
715
+ "score": 0.5421,
716
+ "source": "pentatonic-baseline:L0_workspace_bm25",
717
+ "doc_id": "code-search-client"
718
+ }
719
+ ],
720
+ "correct": true,
721
+ "score": 1.0,
722
+ "grading_notes": "all substrings matched",
723
+ "search_time_ms": 1285.4226699855644,
724
+ "generation_time_ms": 0.0,
725
+ "tokens_in": 0,
726
+ "tokens_out": 0,
727
+ "retrieval_tokens": 462,
728
+ "query_tokens": 11,
729
+ "context_tokens": 451,
730
+ "judge_tokens_in": 0,
731
+ "judge_tokens_out": 0,
732
+ "judge_latency_ms": 0.0
733
+ },
734
+ {
735
+ "task_id": "pr-rationale-retry",
736
+ "query": "Why did PR #47 choose base=200ms for stripe retries rather than 500ms?",
737
+ "answer": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retries might want to turn it off for tests.\npriya: yep, added. merging.\n\n[2026-02-19 Slack #payments-dev]\n---\nPR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoin",
738
+ "hits": [
739
+ {
740
+ "text": "[2026-02-18 Slack #payments-dev]\npriya: @jordan can you review #47? it's the stripe retry stuff\njordan: on it. Why base=200 and not 500?\npriya: stripe's own SDK uses 250 internally but I went conservative with 200 so we retry faster on transient blips. Max 8s is the ceiling stripe recommends.\njordan: makes sense. one nit \u2014 can we surface jitter=true as configurable? teams with deterministic retrie",
741
+ "score": 0.7484,
742
+ "source": "pentatonic-baseline:L0_workspace_bm25",
743
+ "doc_id": "chat-pr-47-review"
744
+ },
745
+ {
746
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
747
+ "score": 0.7145,
748
+ "source": "pentatonic-baseline:L0_workspace_bm25",
749
+ "doc_id": "pr-47-retry"
750
+ },
751
+ {
752
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
753
+ "score": 0.4761,
754
+ "source": "pentatonic-baseline:L0_workspace_bm25",
755
+ "doc_id": "readme-billing"
756
+ }
757
+ ],
758
+ "correct": true,
759
+ "score": 1.0,
760
+ "grading_notes": "all substrings matched",
761
+ "search_time_ms": 1318.2544060109649,
762
+ "generation_time_ms": 0.0,
763
+ "tokens_in": 0,
764
+ "tokens_out": 0,
765
+ "retrieval_tokens": 428,
766
+ "query_tokens": 19,
767
+ "context_tokens": 409,
768
+ "judge_tokens_in": 0,
769
+ "judge_tokens_out": 0,
770
+ "judge_latency_ms": 0.0
771
+ },
772
+ {
773
+ "task_id": "pr-rationale-jwt-cleanup",
774
+ "query": "What did PR #61 clean up and why was it safe?",
775
+ "answer": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.\n---\nPR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.\n---\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all from an old mobile app build. told them to upgrade. no internal services affected.",
776
+ "hits": [
777
+ {
778
+ "text": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
779
+ "score": 0.6087,
780
+ "source": "pentatonic-baseline",
781
+ "doc_id": "pr-61-jwt-cleanup"
782
+ },
783
+ {
784
+ "text": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
785
+ "score": 0.5675,
786
+ "source": "pentatonic-baseline:L0_workspace_bm25",
787
+ "doc_id": "pr-61-jwt-cleanup"
788
+ },
789
+ {
790
+ "text": "[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all ",
791
+ "score": 0.5332,
792
+ "source": "pentatonic-baseline:L0_workspace_bm25",
793
+ "doc_id": "chat-deprecation"
794
+ }
795
+ ],
796
+ "correct": true,
797
+ "score": 1.0,
798
+ "grading_notes": "all substrings matched",
799
+ "search_time_ms": 1295.8309090172406,
800
+ "generation_time_ms": 0.0,
801
+ "tokens_in": 0,
802
+ "tokens_out": 0,
803
+ "retrieval_tokens": 363,
804
+ "query_tokens": 13,
805
+ "context_tokens": 350,
806
+ "judge_tokens_in": 0,
807
+ "judge_tokens_out": 0,
808
+ "judge_latency_ms": 0.0
809
+ },
810
+ {
811
+ "task_id": "cross-ref-invoicing",
812
+ "query": "After Stripe posts invoice.payment_succeeded, which services consume the event?",
813
+ "answer": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consumes `OrderPaid` for financial reports\n7. `email-svc` consumes `OrderPaid` for the receipt email\n\nOn we\n---\n# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `order-svc`\n5. `order-svc` marks order as paid, emits `OrderPaid` on pubsub\n6. `reporting-svc` consume\n---\n# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.py` \u2014 wraps stripe-python, adds retry + idempotency\n- `billing/webhooks.py` \u2014 Stripe webhook endpoint, signed and idempotent\n- `billing/invoicing.py` \u2014 PDF invoice generation (weasyprint)\n\nSee ARCHITE",
814
+ "hits": [
815
+ {
816
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
817
+ "score": 0.7495,
818
+ "source": "pentatonic-baseline",
819
+ "doc_id": "cross-ref-invoicing"
820
+ },
821
+ {
822
+ "text": "# Cross-reference: invoicing pipeline\n\nEnd-to-end path for an invoice being paid:\n\n1. `billing-svc` creates the invoice (see POST /v2/invoices, api-billing-invoice)\n2. Customer pays via Stripe portal (no internal code path)\n3. Stripe posts a `invoice.payment_succeeded` webhook to billing-svc\n4. `billing/webhooks.py::handle_webhook` verifies signature, enqueues\n internal `InvoicePaid` event on `o",
823
+ "score": 0.7179,
824
+ "source": "pentatonic-baseline:L0_workspace_bm25",
825
+ "doc_id": "cross-ref-invoicing"
826
+ },
827
+ {
828
+ "text": "# billing-svc README\n\nService owner: jordan. On-call: billing-oncall@example.com.\n\n## Environment\n- `STRIPE_API_KEY` (required)\n- `STRIPE_WEBHOOK_SECRET` (required)\n- `BILLING_DB_URL` (required)\n- `BILLING_STRIPE_RETRY_POLICY_BASE_MS` (default 200)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_MS` (default 8000)\n- `BILLING_STRIPE_RETRY_POLICY_MAX_RETRIES` (default 5)\n\n## Key modules\n- `billing/stripe_client.",
829
+ "score": 0.5969,
830
+ "source": "pentatonic-baseline",
831
+ "doc_id": "readme-billing"
832
+ }
833
+ ],
834
+ "correct": true,
835
+ "score": 1.0,
836
+ "grading_notes": "all substrings matched",
837
+ "search_time_ms": 1290.6972740020137,
838
+ "generation_time_ms": 0.0,
839
+ "tokens_in": 0,
840
+ "tokens_out": 0,
841
+ "retrieval_tokens": 486,
842
+ "query_tokens": 14,
843
+ "context_tokens": 472,
844
+ "judge_tokens_in": 0,
845
+ "judge_tokens_out": 0,
846
+ "judge_latency_ms": 0.0
847
+ },
848
+ {
849
+ "task_id": "cross-ref-ratelimit",
850
+ "query": "Where does the shared rate-limit lib live and what's the method signature?",
851
+ "answer": "Refactor: extract rate-limit middleware into shared lib\nAuthor: alex\nStatus: merged 2026-03-18 (PR #108)\n\nBefore: every service rolled its own token-bucket rate limiter (billing-svc,\nauth-svc, search-svc, gateway all had near-identical code).\nAfter: `shared/rate_limit.py` with the canonical implementation. All 4\nservices now import from there.\nAPI: `RateLimiter(tokens_per_sec: int, burst: int).allow(key: str) -> bool`.\nBackend: Redis (shared across pods). Key format: `ratelimit:{service}:{key}`.\n---\nPR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {base_ms, max_ms, max_retries, jitter}. Merged 2026-02-19.\n---\n# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n\n**Fix:** kubectl rollout restart deployment/billing-svc at 14:20. Normal\noperation resumed 14:22.",
852
+ "hits": [
853
+ {
854
+ "text": "Refactor: extract rate-limit middleware into shared lib\nAuthor: alex\nStatus: merged 2026-03-18 (PR #108)\n\nBefore: every service rolled its own token-bucket rate limiter (billing-svc,\nauth-svc, search-svc, gateway all had near-identical code).\nAfter: `shared/rate_limit.py` with the canonical implementation. All 4\nservices now import from there.\nAPI: `RateLimiter(tokens_per_sec: int, burst: int).all",
855
+ "score": 0.6958,
856
+ "source": "pentatonic-baseline:L0_workspace_bm25",
857
+ "doc_id": "refactor-rate-limit"
858
+ },
859
+ {
860
+ "text": "PR #47: Exponential backoff on StripeClient retries\nAuthor: priya\nStatus: merged\n\nSymptom: StripeClient was retrying every 100ms on 429 rate-limit responses,\nwhich made the rate-limit situation worse. Root cause: retry_delay_ms was\nhardcoded. Fix: exponential backoff with jitter, base=200ms, max=8s, 5 retries.\nTuned per Stripe's own guidance. Config key billing.stripe.retry_policy now\naccepts {bas",
861
+ "score": 0.5577,
862
+ "source": "pentatonic-baseline:L0_workspace_bm25",
863
+ "doc_id": "pr-47-retry"
864
+ },
865
+ {
866
+ "text": "# Incident 2026-03-02: billing-svc webhook signature errors\n\n**Severity:** SEV-2\n**Duration:** 2026-03-02 14:15 UTC \u2192 14:22 UTC (7 min)\n**Impact:** Stripe webhooks dropped during the window; 3 payment events\nnot ingested. Replayed via Stripe dashboard at 14:30.\n\n**Root cause:** STRIPE_WEBHOOK_SECRET was rotated in vault at 14:10. Pods\ndon't watch vault live; the new secret only loaded on restart.\n",
867
+ "score": 0.4544,
868
+ "source": "pentatonic-baseline:L0_workspace_bm25",
869
+ "doc_id": "incident-2026-03-02-webhook"
870
+ }
871
+ ],
872
+ "correct": true,
873
+ "score": 1.0,
874
+ "grading_notes": "all substrings matched",
875
+ "search_time_ms": 1235.1318040164188,
876
+ "generation_time_ms": 0.0,
877
+ "tokens_in": 0,
878
+ "tokens_out": 0,
879
+ "retrieval_tokens": 419,
880
+ "query_tokens": 15,
881
+ "context_tokens": 404,
882
+ "judge_tokens_in": 0,
883
+ "judge_tokens_out": 0,
884
+ "judge_latency_ms": 0.0
885
+ },
886
+ {
887
+ "task_id": "stack-trace-webhook",
888
+ "query": "If a SignatureVerificationError happens on billing-svc webhook, what's the most likely cause and fix?",
889
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\nStack trace from prod 2026-03-02 14:15 UTC:\n\nTraceback (most recent call last):\n File \"billing/webhooks.py\", line 42, in handle_webhook\n event = StripeClient.parse_event(payload, signature)\n File \"billing/stripe_client.py\", line 88, in parse_event\n return stripe.Webhook.construct_event(payload, sig, secret)\n File \"stripe/webhook.py\", line 56, in construct_event\n raise SignatureVerificationError(...)\nstripe.error.SignatureVerificationError: No signatures found matching expected signature\n\nCaused by: STRIPE_WEBHOOK_SECRET rotated in prod at 14:10 but pod restart\ndidn't pick up new va\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.",
890
+ "hits": [
891
+ {
892
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
893
+ "score": 0.8118,
894
+ "source": "pentatonic-baseline",
895
+ "doc_id": "runbook-billing"
896
+ },
897
+ {
898
+ "text": "Stack trace from prod 2026-03-02 14:15 UTC:\n\nTraceback (most recent call last):\n File \"billing/webhooks.py\", line 42, in handle_webhook\n event = StripeClient.parse_event(payload, signature)\n File \"billing/stripe_client.py\", line 88, in parse_event\n return stripe.Webhook.construct_event(payload, sig, secret)\n File \"stripe/webhook.py\", line 56, in construct_event\n raise SignatureVerifica",
899
+ "score": 0.7632,
900
+ "source": "pentatonic-baseline",
901
+ "doc_id": "stack-trace-nullpointer"
902
+ },
903
+ {
904
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
905
+ "score": 0.7203,
906
+ "source": "pentatonic-baseline:L0_workspace_bm25",
907
+ "doc_id": "runbook-billing"
908
+ }
909
+ ],
910
+ "correct": true,
911
+ "score": 1.0,
912
+ "grading_notes": "all substrings matched",
913
+ "search_time_ms": 1203.479966003215,
914
+ "generation_time_ms": 0.0,
915
+ "tokens_in": 0,
916
+ "tokens_out": 0,
917
+ "retrieval_tokens": 415,
918
+ "query_tokens": 21,
919
+ "context_tokens": 394,
920
+ "judge_tokens_in": 0,
921
+ "judge_tokens_out": 0,
922
+ "judge_latency_ms": 0.0
923
+ },
924
+ {
925
+ "task_id": "temporal-numpy",
926
+ "query": "Why was numpy upgraded to 1.26 recently?",
927
+ "answer": "PR #101: Upgrade numpy 1.24.4 \u2192 1.26.2\nAuthor: alex\nStatus: merged\n\nFollow-up to ADR-011 (Python 3.12 minimum). numpy 1.24 doesn't ship 3.12\nwheels, so CI would have to build from source on every install.\nUpgraded to 1.26, which does ship 3.12 wheels. No API surface changes\naffecting us \u2014 only internal dtype-promotion warnings we've already\nsilenced via `warnings.filterwarnings`.\n\nVerified no call-sites depended on the deprecated `np.object_` alias.\nMerged 2026-03-11.\n---\nPR #101: Upgrade numpy 1.24.4 \u2192 1.26.2\nAuthor: alex\nStatus: merged\n\nFollow-up to ADR-011 (Python 3.12 minimum). numpy 1.24 doesn't ship 3.12\nwheels, so CI would have to build from source on every install.\nUpgraded to 1.26, which does ship 3.12 wheels. No API surface changes\naffecting us \u2014 only internal dtype-promotion warnings we've already\nsilenced via `warnings.filterwarnings`.\n\nVerified no call-sites depended on the deprecated `np.object_` alias.\nMerged 2026-03-11.\n---\nADR-011: Python 3.12 minimum across the fleet\nDate: 2026-03-10\nStatus: accepted\n\n**Context.** We've been pinned to 3.10 for 2 years. 3.12's faster interpreter\nand better typing are worth the cutover cost.\n\n**Decision.** Minimum Python = 3.12. All services, CI, dev containers.\nPython 3.11 and 3.10 removed from `.python-version` files.\n\n**Consequences.** One library (numpy==1.24.x) needs upgrade to 1.26 for\n3.12 wheels. PR #101 does the upgrade. No API changes for consumers.",
928
+ "hits": [
929
+ {
930
+ "text": "PR #101: Upgrade numpy 1.24.4 \u2192 1.26.2\nAuthor: alex\nStatus: merged\n\nFollow-up to ADR-011 (Python 3.12 minimum). numpy 1.24 doesn't ship 3.12\nwheels, so CI would have to build from source on every install.\nUpgraded to 1.26, which does ship 3.12 wheels. No API surface changes\naffecting us \u2014 only internal dtype-promotion warnings we've already\nsilenced via `warnings.filterwarnings`.\n\nVerified no call",
931
+ "score": 0.7771,
932
+ "source": "pentatonic-baseline",
933
+ "doc_id": "dep-numpy-upgrade"
934
+ },
935
+ {
936
+ "text": "PR #101: Upgrade numpy 1.24.4 \u2192 1.26.2\nAuthor: alex\nStatus: merged\n\nFollow-up to ADR-011 (Python 3.12 minimum). numpy 1.24 doesn't ship 3.12\nwheels, so CI would have to build from source on every install.\nUpgraded to 1.26, which does ship 3.12 wheels. No API surface changes\naffecting us \u2014 only internal dtype-promotion warnings we've already\nsilenced via `warnings.filterwarnings`.\n\nVerified no call",
937
+ "score": 0.7281,
938
+ "source": "pentatonic-baseline:L0_workspace_bm25",
939
+ "doc_id": "dep-numpy-upgrade"
940
+ },
941
+ {
942
+ "text": "ADR-011: Python 3.12 minimum across the fleet\nDate: 2026-03-10\nStatus: accepted\n\n**Context.** We've been pinned to 3.10 for 2 years. 3.12's faster interpreter\nand better typing are worth the cutover cost.\n\n**Decision.** Minimum Python = 3.12. All services, CI, dev containers.\nPython 3.11 and 3.10 removed from `.python-version` files.\n\n**Consequences.** One library (numpy==1.24.x) needs upgrade to ",
943
+ "score": 0.6324,
944
+ "source": "pentatonic-baseline:L0_workspace_bm25",
945
+ "doc_id": "decision-python"
946
+ }
947
+ ],
948
+ "correct": true,
949
+ "score": 1.0,
950
+ "grading_notes": "all substrings matched",
951
+ "search_time_ms": 1267.1056179970037,
952
+ "generation_time_ms": 0.0,
953
+ "tokens_in": 0,
954
+ "tokens_out": 0,
955
+ "retrieval_tokens": 442,
956
+ "query_tokens": 11,
957
+ "context_tokens": 431,
958
+ "judge_tokens_in": 0,
959
+ "judge_tokens_out": 0,
960
+ "judge_latency_ms": 0.0
961
+ },
962
+ {
963
+ "task_id": "temporal-on-call",
964
+ "query": "Who is the primary on-call for the week of April 15\u201321, 2026?",
965
+ "answer": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineering manager (kim) \u2192 CTO.\n---\n# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineering manager (kim) \u2192 CTO.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192",
966
+ "hits": [
967
+ {
968
+ "text": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineeri",
969
+ "score": 0.7727,
970
+ "source": "pentatonic-baseline",
971
+ "doc_id": "on-call-rotation"
972
+ },
973
+ {
974
+ "text": "# On-call rotation April 2026\n\n| Week | Primary | Secondary |\n|------|---------|-----------|\n| Apr 1\u20137 | jordan | priya |\n| Apr 8\u201314 | priya | alex |\n| Apr 15\u201321 | alex | jordan |\n| Apr 22\u201328 | jordan | priya |\n| Apr 29\u201330 | priya | alex |\n\nHandoff: Mondays 10:00 UTC, #on-call channel. Previous week's primary\nruns through the incident log.\n\nEscalation path: primary \u2192 secondary \u2192 engineeri",
975
+ "score": 0.71,
976
+ "source": "pentatonic-baseline:L0_workspace_bm25",
977
+ "doc_id": "on-call-rotation"
978
+ },
979
+ {
980
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
981
+ "score": 0.4314,
982
+ "source": "pentatonic-baseline:L0_workspace_bm25",
983
+ "doc_id": "readme-auth"
984
+ }
985
+ ],
986
+ "correct": true,
987
+ "score": 1.0,
988
+ "grading_notes": "all substrings matched",
989
+ "search_time_ms": 1279.5606150175445,
990
+ "generation_time_ms": 0.0,
991
+ "tokens_in": 0,
992
+ "tokens_out": 0,
993
+ "retrieval_tokens": 452,
994
+ "query_tokens": 20,
995
+ "context_tokens": 432,
996
+ "judge_tokens_in": 0,
997
+ "judge_tokens_out": 0,
998
+ "judge_latency_ms": 0.0
999
+ },
1000
+ {
1001
+ "task_id": "contradiction-algorithm",
1002
+ "query": "Which JWT signing algorithm does auth-svc use \u2014 HS256 or RS256?",
1003
+ "answer": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192\n---\ncommit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.\n---\nPR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
1004
+ "hits": [
1005
+ {
1006
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
1007
+ "score": 0.6996,
1008
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1009
+ "doc_id": "readme-auth"
1010
+ },
1011
+ {
1012
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
1013
+ "score": 0.6791,
1014
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1015
+ "doc_id": "commit-b2c3d4"
1016
+ },
1017
+ {
1018
+ "text": "PR #61: Remove JWT_SECRET env var from all services\nAuthor: priya\nStatus: merged\n\nFollowing the RS256 migration (b2c3d4e5), JWT_SECRET is no longer read\nanywhere except auth-svc. Removed it from 11 deployment manifests and\nfrom all .env.example files. Sealed-secret file for staging/prod cleaned\nin platform/secrets/ \u2014 old key destroyed via vault rotate. Merged 2026-02-24.",
1019
+ "score": 0.6419,
1020
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1021
+ "doc_id": "pr-61-jwt-cleanup"
1022
+ }
1023
+ ],
1024
+ "correct": true,
1025
+ "score": 1.0,
1026
+ "grading_notes": "all substrings matched",
1027
+ "search_time_ms": 1353.9299910189584,
1028
+ "generation_time_ms": 0.0,
1029
+ "tokens_in": 0,
1030
+ "tokens_out": 0,
1031
+ "retrieval_tokens": 412,
1032
+ "query_tokens": 16,
1033
+ "context_tokens": 396,
1034
+ "judge_tokens_in": 0,
1035
+ "judge_tokens_out": 0,
1036
+ "judge_latency_ms": 0.0
1037
+ },
1038
+ {
1039
+ "task_id": "rubric-security-advice",
1040
+ "query": "A new engineer asks: can I add HS256 as a fallback for older clients in auth-svc? Advise them.",
1041
+ "answer": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_PUBLIC_KEY_PATH\neverywhere else. BREAKING: old HS256 tokens rejected after 2026-02-20.\n---\n# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /v2/login {email, password}` \u2192 `{access_token, refresh_token}`\n- `POST /v2/refresh {refresh_token}` \u2192\n---\n[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all from an old mobile app build. told them to upgrade. no internal services affected.",
1042
+ "hits": [
1043
+ {
1044
+ "text": "commit b2c3d4e5\nAuthor: priya@example.com\nDate: 2026-02-12T11:15:00Z\n\nfeat(auth): rotate JWT signing key to RS256\n\nWe were using HS256 shared-secret JWTs, which meant any service that verified\ntokens also held the minting secret. Migrated to RS256 asymmetric signing.\nPrivate key lives in auth-svc only; all other services only ever hold the\npublic key. Env var JWT_PRIVATE_KEY_PATH on auth-svc, JWT_",
1045
+ "score": 0.6146,
1046
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1047
+ "doc_id": "commit-b2c3d4"
1048
+ },
1049
+ {
1050
+ "text": "# auth-svc README\n\nOwner: priya. On-call: auth-oncall@example.com.\n\n## Environment\n- `JWT_PRIVATE_KEY_PATH` (required, RS256 PEM)\n- `JWT_PUBLIC_KEY_PATH` (required, RS256 PEM)\n- `JWT_ISSUER` (default: 'https://auth.example.com')\n- `SESSION_TTL_MINUTES` (default: 60)\n- `REFRESH_TTL_DAYS` (default: 30)\n\n**Do not use HS256**. The migration in b2c3d4e5 removed all HS256 paths.\n\n## Public API\n- `POST /",
1051
+ "score": 0.5987,
1052
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1053
+ "doc_id": "readme-auth"
1054
+ },
1055
+ {
1056
+ "text": "[2026-02-15 Slack #platform]\njordan: reminder: /api/v1 is going away in 2 days. anyone still seeing v1 traffic?\nalex: we're clean on search-svc. grep'd all consumers.\npriya: auth is clean, v2-only for 6 months\njordan: cool. flipping the 410 flag on 2026-02-17 09:00 UTC. if your service gets paged, check your callers.\n\n[2026-02-17 Slack #platform]\njordan: v1 is gone. 410s in the last hour: 12. all ",
1057
+ "score": 0.5356,
1058
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1059
+ "doc_id": "chat-deprecation"
1060
+ }
1061
+ ],
1062
+ "correct": false,
1063
+ "score": 0.5,
1064
+ "grading_notes": "judge=0.50 \u2014 The answer advises against adding HS256 without citing that all services only hold the public key since the RS256 migrat",
1065
+ "search_time_ms": 1301.4432359777857,
1066
+ "generation_time_ms": 0.0,
1067
+ "tokens_in": 0,
1068
+ "tokens_out": 0,
1069
+ "retrieval_tokens": 469,
1070
+ "query_tokens": 25,
1071
+ "context_tokens": 444,
1072
+ "judge_tokens_in": 757,
1073
+ "judge_tokens_out": 37,
1074
+ "judge_latency_ms": 1715.3592600226402
1075
+ },
1076
+ {
1077
+ "task_id": "rubric-incident-lesson",
1078
+ "query": "We saw SignatureVerificationError in billing-svc logs right after a secret rotation. What's our SOP?",
1079
+ "answer": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\n# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but pods didn't reload\nuntil restart. See incident post-mortem in ops/incidents/2026-03-02-webhook.md.\n---\nStack trace from prod 2026-03-02 14:15 UTC:\n\nTraceback (most recent call last):\n File \"billing/webhooks.py\", line 42, in handle_webhook\n event = StripeClient.parse_event(payload, signature)\n File \"billing/stripe_client.py\", line 88, in parse_event\n return stripe.Webhook.construct_event(payload, sig, secret)\n File \"stripe/webhook.py\", line 56, in construct_event\n raise SignatureVerificationError(...)\nstripe.error.SignatureVerificationError: No signatures found matching expected signature\n\nCaused by: STRIPE_WEBHOOK_SECRET rotated in prod at 14:10 but pod restart\ndidn't pick up new va",
1080
+ "hits": [
1081
+ {
1082
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
1083
+ "score": 0.7663,
1084
+ "source": "pentatonic-baseline",
1085
+ "doc_id": "runbook-billing"
1086
+ },
1087
+ {
1088
+ "text": "# billing-svc runbook: webhook signature errors\n\nSymptom: logs show `stripe.error.SignatureVerificationError`.\n\nDiagnosis:\n1. Confirm the webhook endpoint URL matches Stripe dashboard\n2. Check `STRIPE_WEBHOOK_SECRET` matches the secret in Stripe dashboard\n3. If secret was rotated recently, `kubectl rollout restart deployment/billing-svc`\n\nWe hit this on 2026-03-02 14:15 UTC \u2014 secret rotated but po",
1089
+ "score": 0.6988,
1090
+ "source": "pentatonic-baseline:L0_workspace_bm25",
1091
+ "doc_id": "runbook-billing"
1092
+ },
1093
+ {
1094
+ "text": "Stack trace from prod 2026-03-02 14:15 UTC:\n\nTraceback (most recent call last):\n File \"billing/webhooks.py\", line 42, in handle_webhook\n event = StripeClient.parse_event(payload, signature)\n File \"billing/stripe_client.py\", line 88, in parse_event\n return stripe.Webhook.construct_event(payload, sig, secret)\n File \"stripe/webhook.py\", line 56, in construct_event\n raise SignatureVerifica",
1095
+ "score": 0.6836,
1096
+ "source": "pentatonic-baseline",
1097
+ "doc_id": "stack-trace-nullpointer"
1098
+ }
1099
+ ],
1100
+ "correct": false,
1101
+ "score": 0.5,
1102
+ "grading_notes": "judge=0.50 \u2014 Mentioned a pod restart (kubectl rollout restart) without referencing that secrets only load at pod startup.",
1103
+ "search_time_ms": 1268.8033459999133,
1104
+ "generation_time_ms": 0.0,
1105
+ "tokens_in": 0,
1106
+ "tokens_out": 0,
1107
+ "retrieval_tokens": 414,
1108
+ "query_tokens": 21,
1109
+ "context_tokens": 393,
1110
+ "judge_tokens_in": 687,
1111
+ "judge_tokens_out": 35,
1112
+ "judge_latency_ms": 1701.407715022564
1113
+ }
1114
+ ]
1115
+ }