@1mbrain/benchmarks 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +85 -0
  2. package/fixtures/1mbrain-focused-mini/1mbrain-focused-mini.json +928 -0
  3. package/fixtures/1mbrain-focused-mini/README.md +45 -0
  4. package/fixtures/adversarial-memory/dataset_claude_adversarial.json +3333 -0
  5. package/fixtures/adversarial-memory/dataset_gemini_adversarial_memory.json +2984 -0
  6. package/fixtures/balanced-mini/dataset_claude_balanced_mini.json +2077 -0
  7. package/fixtures/balanced-mini/dataset_gemini_balanced_mini.json +1995 -0
  8. package/fixtures/generate_datasets.js +1741 -0
  9. package/fixtures/graph-stress-hard/README.md +43 -0
  10. package/fixtures/graph-stress-hard/dataset_graph_stress_hard.json +4374 -0
  11. package/fixtures/graph-stress-hard/generate_graph_stress_hard.js +526 -0
  12. package/fixtures/realistic-medium/dataset_claude_realistic_medium.json +7462 -0
  13. package/fixtures/realistic-medium/dataset_gemini_realistic_medium.json +7277 -0
  14. package/fixtures/realistic-medium/gen_claude_medium.js +600 -0
  15. package/package.json +22 -0
  16. package/reports/benchmark_report.md +48 -0
  17. package/reports/benchmark_report_claude_adversarial.md +42 -0
  18. package/reports/benchmark_report_claude_adversarial_adaptive.md +42 -0
  19. package/reports/benchmark_report_claude_adversarial_adaptive2_fast.md +42 -0
  20. package/reports/benchmark_report_claude_adversarial_adaptive_fast.md +42 -0
  21. package/reports/benchmark_report_claude_adversarial_rerank.md +42 -0
  22. package/reports/benchmark_report_claude_balanced_mini.md +42 -0
  23. package/reports/benchmark_report_claude_balanced_mini_adaptive.md +42 -0
  24. package/reports/benchmark_report_claude_balanced_mini_adaptive2_fast.md +42 -0
  25. package/reports/benchmark_report_claude_balanced_mini_adaptive_fast.md +42 -0
  26. package/reports/benchmark_report_claude_balanced_mini_rerank.md +42 -0
  27. package/reports/benchmark_report_claude_realistic_medium.md +42 -0
  28. package/reports/benchmark_report_claude_realistic_medium_adaptive.md +42 -0
  29. package/reports/benchmark_report_claude_realistic_medium_adaptive2_fast.md +42 -0
  30. package/reports/benchmark_report_claude_realistic_medium_adaptive_fast.md +42 -0
  31. package/reports/benchmark_report_claude_realistic_medium_evidence_rerank_local.md +42 -0
  32. package/reports/benchmark_report_claude_realistic_medium_openai_evidence_rerank.md +41 -0
  33. package/reports/benchmark_report_claude_realistic_medium_openai_multi_signal.md +41 -0
  34. package/reports/benchmark_report_claude_realistic_medium_openai_multi_signal_scoped.md +41 -0
  35. package/reports/benchmark_report_claude_realistic_medium_openai_phase8_no_judge.md +42 -0
  36. package/reports/benchmark_report_claude_realistic_medium_openai_rankingpolicy.md +41 -0
  37. package/reports/benchmark_report_claude_realistic_medium_openai_stale_filter.md +41 -0
  38. package/reports/benchmark_report_claude_realistic_medium_openai_stale_filter_absence_fix.md +41 -0
  39. package/reports/benchmark_report_claude_realistic_medium_openai_write_time_invalidation.md +41 -0
  40. package/reports/benchmark_report_claude_realistic_medium_rerank.md +42 -0
  41. package/reports/benchmark_report_claude_realistic_medium_stale_filter_local.md +42 -0
  42. package/reports/benchmark_report_graph_stress_hard.md +42 -0
  43. package/reports/benchmark_report_graph_stress_hard_absence_fix.md +42 -0
  44. package/reports/benchmark_report_graph_stress_hard_adaptive.md +42 -0
  45. package/reports/benchmark_report_graph_stress_hard_evidence_rerank.md +42 -0
  46. package/reports/benchmark_report_graph_stress_hard_multi_signal_current_guardrail.md +42 -0
  47. package/reports/benchmark_report_graph_stress_hard_multi_signal_guardrail_fixed.md +42 -0
  48. package/reports/benchmark_report_graph_stress_hard_multi_signal_local.md +42 -0
  49. package/reports/benchmark_report_graph_stress_hard_multi_signal_scoped_guardrail.md +42 -0
  50. package/reports/benchmark_report_graph_stress_hard_multi_signal_vector_pure_guardrail.md +42 -0
  51. package/reports/benchmark_report_graph_stress_hard_phase8_sdk_guardrail.md +42 -0
  52. package/reports/benchmark_report_graph_stress_hard_rerank.md +42 -0
  53. package/reports/benchmark_report_graph_stress_hard_stale_filter.md +42 -0
  54. package/reports/benchmark_report_graph_stress_hard_write_time_invalidation.md +42 -0
  55. package/results/.gitignore +2 -0
  56. package/src/adapters/1mbrain.ts +317 -0
  57. package/src/adapters/keyword-embedding.ts +48 -0
  58. package/src/adapters/mem0.ts +124 -0
  59. package/src/adapters/qdrant.ts +214 -0
  60. package/src/adapters/unavailable.ts +49 -0
  61. package/src/adapters/vector-baseline.ts +149 -0
  62. package/src/datasets/focused-mini.ts +158 -0
  63. package/src/datasets/synthetic-agent-memory.ts +532 -0
  64. package/src/llm-evaluator.ts +262 -0
  65. package/src/metrics.ts +482 -0
  66. package/src/provider.ts +151 -0
  67. package/src/runner.ts +635 -0
  68. package/tsconfig.json +10 -0
  69. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,2984 @@
1
+ {
2
+ "name": "adversarial-memory",
3
+ "description": "Adversarial memory benchmark dataset containing 10 conversations with 120 memory records and 60 questions, testing contradiction handling, noise, and abstention.",
4
+ "generated_at": "2026-06-19",
5
+ "fairness_notes": [
6
+ "Tests capacity to handle stale values, contradicting facts, and noise resistance without graph bias.",
7
+ "Abstention questions ensure systems know when evidence is insufficient."
8
+ ],
9
+ "conversations": [
10
+ {
11
+ "conversation_id": "dc_c01_software",
12
+ "agent_id": "dc_agent_software_01",
13
+ "domain": "software",
14
+ "memory_records": [
15
+ {
16
+ "id": "dc_c01_m01",
17
+ "type": "semantic",
18
+ "timestamp": "2026-06-01T10:00:00Z",
19
+ "content": "The main parameters designated for SOFTWARE are value Alpha.",
20
+ "tags": [
21
+ "software",
22
+ "param"
23
+ ],
24
+ "importance": 0.8,
25
+ "metadata": {
26
+ "source_turn": "t1",
27
+ "speaker": "user"
28
+ }
29
+ },
30
+ {
31
+ "id": "dc_c01_m02",
32
+ "type": "semantic",
33
+ "timestamp": "2026-06-01T10:05:00Z",
34
+ "content": "The secondary configuration option for SOFTWARE is value Beta.",
35
+ "tags": [
36
+ "software",
37
+ "param"
38
+ ],
39
+ "importance": 0.8,
40
+ "metadata": {
41
+ "source_turn": "t2",
42
+ "speaker": "user"
43
+ }
44
+ },
45
+ {
46
+ "id": "dc_c01_m03",
47
+ "type": "semantic",
48
+ "timestamp": "2026-06-01T10:10:00Z",
49
+ "content": "Distractor: The color of the SOFTWARE report sheet is yellow.",
50
+ "tags": [
51
+ "noise"
52
+ ],
53
+ "importance": 0.2,
54
+ "metadata": {
55
+ "source_turn": "t3",
56
+ "speaker": "assistant"
57
+ }
58
+ },
59
+ {
60
+ "id": "dc_c01_m04",
61
+ "type": "semantic",
62
+ "timestamp": "2026-06-02T10:00:00Z",
63
+ "content": "On June 2, the main parameter configuration for SOFTWARE was updated to Gamma.",
64
+ "tags": [
65
+ "software",
66
+ "param"
67
+ ],
68
+ "importance": 0.9,
69
+ "metadata": {
70
+ "source_turn": "t4",
71
+ "speaker": "user"
72
+ }
73
+ },
74
+ {
75
+ "id": "dc_c01_m05",
76
+ "type": "semantic",
77
+ "timestamp": "2026-06-03T11:00:00Z",
78
+ "content": "On June 3, the secondary configuration option for SOFTWARE was changed to Delta.",
79
+ "tags": [
80
+ "software",
81
+ "param"
82
+ ],
83
+ "importance": 0.9,
84
+ "metadata": {
85
+ "source_turn": "t5",
86
+ "speaker": "user"
87
+ }
88
+ },
89
+ {
90
+ "id": "dc_c01_m06",
91
+ "type": "procedural",
92
+ "timestamp": "2026-06-04T09:00:00Z",
93
+ "content": "To apply modifications to the SOFTWARE registry: 1. check credentials, 2. submit form, 3. wait for email.",
94
+ "tags": [
95
+ "software",
96
+ "process"
97
+ ],
98
+ "importance": 0.85,
99
+ "metadata": {
100
+ "source_turn": "t6",
101
+ "speaker": "user"
102
+ }
103
+ },
104
+ {
105
+ "id": "dc_c01_m07",
106
+ "type": "semantic",
107
+ "timestamp": "2026-06-04T09:05:00Z",
108
+ "content": "Distractor: Email server is active on port 25.",
109
+ "tags": [
110
+ "noise"
111
+ ],
112
+ "importance": 0.3,
113
+ "metadata": {
114
+ "source_turn": "t7",
115
+ "speaker": "assistant"
116
+ }
117
+ },
118
+ {
119
+ "id": "dc_c01_m08",
120
+ "type": "semantic",
121
+ "timestamp": "2026-06-05T10:00:00Z",
122
+ "content": "The registry workspace for SOFTWARE is located at building Room 10.",
123
+ "tags": [
124
+ "software"
125
+ ],
126
+ "importance": 0.8,
127
+ "metadata": {
128
+ "source_turn": "t8",
129
+ "speaker": "user"
130
+ },
131
+ "associations": [
132
+ {
133
+ "target_id": "dc_c01_m06",
134
+ "strength": 0.7
135
+ }
136
+ ]
137
+ },
138
+ {
139
+ "id": "dc_c01_m09",
140
+ "type": "semantic",
141
+ "timestamp": "2026-06-05T10:10:00Z",
142
+ "content": "The client contact liaison for SOFTWARE is Mary Jane.",
143
+ "tags": [
144
+ "software"
145
+ ],
146
+ "importance": 0.7,
147
+ "metadata": {
148
+ "source_turn": "t9",
149
+ "speaker": "user"
150
+ }
151
+ },
152
+ {
153
+ "id": "dc_c01_m10",
154
+ "type": "semantic",
155
+ "timestamp": "2026-06-06T12:00:00Z",
156
+ "content": "On June 6, the client contact liaison for SOFTWARE was changed to Peter Parker.",
157
+ "tags": [
158
+ "software"
159
+ ],
160
+ "importance": 0.9,
161
+ "metadata": {
162
+ "source_turn": "t10",
163
+ "speaker": "user"
164
+ }
165
+ },
166
+ {
167
+ "id": "dc_c01_m11",
168
+ "type": "semantic",
169
+ "timestamp": "2026-06-06T12:05:00Z",
170
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
171
+ "tags": [
172
+ "noise"
173
+ ],
174
+ "importance": 0.2,
175
+ "metadata": {
176
+ "source_turn": "t11",
177
+ "speaker": "assistant"
178
+ }
179
+ },
180
+ {
181
+ "id": "dc_c01_m12",
182
+ "type": "semantic",
183
+ "timestamp": "2026-06-07T13:00:00Z",
184
+ "content": "The manager for the SOFTWARE task is George Lucas.",
185
+ "tags": [
186
+ "software",
187
+ "manager"
188
+ ],
189
+ "importance": 0.8,
190
+ "metadata": {
191
+ "source_turn": "t12",
192
+ "speaker": "user"
193
+ }
194
+ }
195
+ ],
196
+ "questions": [
197
+ {
198
+ "question_id": "dc_c01_q1",
199
+ "category": "atomic_fact_recall",
200
+ "question": "Who is in charge of administrative coordination for the SOFTWARE task?",
201
+ "expected_answer": "George Lucas",
202
+ "acceptable_answer_criteria": [
203
+ "George Lucas",
204
+ "George"
205
+ ],
206
+ "required_memory_ids": [
207
+ "dc_c01_m12"
208
+ ],
209
+ "forbidden_memory_ids": [],
210
+ "difficulty": "medium",
211
+ "architecture_bias_risk": "low",
212
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
213
+ },
214
+ {
215
+ "question_id": "dc_c01_q2",
216
+ "category": "atomic_fact_recall",
217
+ "question": "Who is in charge of administrative coordination for the SOFTWARE task?",
218
+ "expected_answer": "George Lucas",
219
+ "acceptable_answer_criteria": [
220
+ "George Lucas",
221
+ "George"
222
+ ],
223
+ "required_memory_ids": [
224
+ "dc_c01_m12"
225
+ ],
226
+ "forbidden_memory_ids": [],
227
+ "difficulty": "medium",
228
+ "architecture_bias_risk": "low",
229
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
230
+ },
231
+ {
232
+ "question_id": "dc_c01_q3",
233
+ "category": "atomic_fact_recall",
234
+ "question": "Who is in charge of administrative coordination for the SOFTWARE task?",
235
+ "expected_answer": "George Lucas",
236
+ "acceptable_answer_criteria": [
237
+ "George Lucas",
238
+ "George"
239
+ ],
240
+ "required_memory_ids": [
241
+ "dc_c01_m12"
242
+ ],
243
+ "forbidden_memory_ids": [],
244
+ "difficulty": "medium",
245
+ "architecture_bias_risk": "low",
246
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
247
+ },
248
+ {
249
+ "question_id": "dc_c01_q4",
250
+ "category": "atomic_fact_recall",
251
+ "question": "Who is in charge of administrative coordination for the SOFTWARE task?",
252
+ "expected_answer": "George Lucas",
253
+ "acceptable_answer_criteria": [
254
+ "George Lucas",
255
+ "George"
256
+ ],
257
+ "required_memory_ids": [
258
+ "dc_c01_m12"
259
+ ],
260
+ "forbidden_memory_ids": [],
261
+ "difficulty": "medium",
262
+ "architecture_bias_risk": "low",
263
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
264
+ },
265
+ {
266
+ "question_id": "dc_c01_q5",
267
+ "category": "atomic_fact_recall",
268
+ "question": "Who is in charge of administrative coordination for the SOFTWARE task?",
269
+ "expected_answer": "George Lucas",
270
+ "acceptable_answer_criteria": [
271
+ "George Lucas",
272
+ "George"
273
+ ],
274
+ "required_memory_ids": [
275
+ "dc_c01_m12"
276
+ ],
277
+ "forbidden_memory_ids": [],
278
+ "difficulty": "medium",
279
+ "architecture_bias_risk": "low",
280
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
281
+ },
282
+ {
283
+ "question_id": "dc_c01_q6",
284
+ "category": "atomic_fact_recall",
285
+ "question": "Who is in charge of administrative coordination for the SOFTWARE task?",
286
+ "expected_answer": "George Lucas",
287
+ "acceptable_answer_criteria": [
288
+ "George Lucas",
289
+ "George"
290
+ ],
291
+ "required_memory_ids": [
292
+ "dc_c01_m12"
293
+ ],
294
+ "forbidden_memory_ids": [],
295
+ "difficulty": "medium",
296
+ "architecture_bias_risk": "low",
297
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
298
+ }
299
+ ]
300
+ },
301
+ {
302
+ "conversation_id": "dc_c02_personal_assistant",
303
+ "agent_id": "dc_agent_personal_assistant_02",
304
+ "domain": "personal_assistant",
305
+ "memory_records": [
306
+ {
307
+ "id": "dc_c02_m01",
308
+ "type": "semantic",
309
+ "timestamp": "2026-06-01T10:00:00Z",
310
+ "content": "The main parameters designated for PERSONAL_ASSISTANT are value Alpha.",
311
+ "tags": [
312
+ "personal_assistant",
313
+ "param"
314
+ ],
315
+ "importance": 0.8,
316
+ "metadata": {
317
+ "source_turn": "t1",
318
+ "speaker": "user"
319
+ }
320
+ },
321
+ {
322
+ "id": "dc_c02_m02",
323
+ "type": "semantic",
324
+ "timestamp": "2026-06-01T10:05:00Z",
325
+ "content": "The secondary configuration option for PERSONAL_ASSISTANT is value Beta.",
326
+ "tags": [
327
+ "personal_assistant",
328
+ "param"
329
+ ],
330
+ "importance": 0.8,
331
+ "metadata": {
332
+ "source_turn": "t2",
333
+ "speaker": "user"
334
+ }
335
+ },
336
+ {
337
+ "id": "dc_c02_m03",
338
+ "type": "semantic",
339
+ "timestamp": "2026-06-01T10:10:00Z",
340
+ "content": "Distractor: The color of the PERSONAL_ASSISTANT report sheet is yellow.",
341
+ "tags": [
342
+ "noise"
343
+ ],
344
+ "importance": 0.2,
345
+ "metadata": {
346
+ "source_turn": "t3",
347
+ "speaker": "assistant"
348
+ }
349
+ },
350
+ {
351
+ "id": "dc_c02_m04",
352
+ "type": "semantic",
353
+ "timestamp": "2026-06-02T10:00:00Z",
354
+ "content": "On June 2, the main parameter configuration for PERSONAL_ASSISTANT was updated to Gamma.",
355
+ "tags": [
356
+ "personal_assistant",
357
+ "param"
358
+ ],
359
+ "importance": 0.9,
360
+ "metadata": {
361
+ "source_turn": "t4",
362
+ "speaker": "user"
363
+ }
364
+ },
365
+ {
366
+ "id": "dc_c02_m05",
367
+ "type": "semantic",
368
+ "timestamp": "2026-06-03T11:00:00Z",
369
+ "content": "On June 3, the secondary configuration option for PERSONAL_ASSISTANT was changed to Delta.",
370
+ "tags": [
371
+ "personal_assistant",
372
+ "param"
373
+ ],
374
+ "importance": 0.9,
375
+ "metadata": {
376
+ "source_turn": "t5",
377
+ "speaker": "user"
378
+ }
379
+ },
380
+ {
381
+ "id": "dc_c02_m06",
382
+ "type": "procedural",
383
+ "timestamp": "2026-06-04T09:00:00Z",
384
+ "content": "To apply modifications to the PERSONAL_ASSISTANT registry: 1. check credentials, 2. submit form, 3. wait for email.",
385
+ "tags": [
386
+ "personal_assistant",
387
+ "process"
388
+ ],
389
+ "importance": 0.85,
390
+ "metadata": {
391
+ "source_turn": "t6",
392
+ "speaker": "user"
393
+ }
394
+ },
395
+ {
396
+ "id": "dc_c02_m07",
397
+ "type": "semantic",
398
+ "timestamp": "2026-06-04T09:05:00Z",
399
+ "content": "Distractor: Email server is active on port 25.",
400
+ "tags": [
401
+ "noise"
402
+ ],
403
+ "importance": 0.3,
404
+ "metadata": {
405
+ "source_turn": "t7",
406
+ "speaker": "assistant"
407
+ }
408
+ },
409
+ {
410
+ "id": "dc_c02_m08",
411
+ "type": "semantic",
412
+ "timestamp": "2026-06-05T10:00:00Z",
413
+ "content": "The registry workspace for PERSONAL_ASSISTANT is located at building Room 10.",
414
+ "tags": [
415
+ "personal_assistant"
416
+ ],
417
+ "importance": 0.8,
418
+ "metadata": {
419
+ "source_turn": "t8",
420
+ "speaker": "user"
421
+ },
422
+ "associations": [
423
+ {
424
+ "target_id": "dc_c02_m06",
425
+ "strength": 0.7
426
+ }
427
+ ]
428
+ },
429
+ {
430
+ "id": "dc_c02_m09",
431
+ "type": "semantic",
432
+ "timestamp": "2026-06-05T10:10:00Z",
433
+ "content": "The client contact liaison for PERSONAL_ASSISTANT is Mary Jane.",
434
+ "tags": [
435
+ "personal_assistant"
436
+ ],
437
+ "importance": 0.7,
438
+ "metadata": {
439
+ "source_turn": "t9",
440
+ "speaker": "user"
441
+ }
442
+ },
443
+ {
444
+ "id": "dc_c02_m10",
445
+ "type": "semantic",
446
+ "timestamp": "2026-06-06T12:00:00Z",
447
+ "content": "On June 6, the client contact liaison for PERSONAL_ASSISTANT was changed to Peter Parker.",
448
+ "tags": [
449
+ "personal_assistant"
450
+ ],
451
+ "importance": 0.9,
452
+ "metadata": {
453
+ "source_turn": "t10",
454
+ "speaker": "user"
455
+ }
456
+ },
457
+ {
458
+ "id": "dc_c02_m11",
459
+ "type": "semantic",
460
+ "timestamp": "2026-06-06T12:05:00Z",
461
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
462
+ "tags": [
463
+ "noise"
464
+ ],
465
+ "importance": 0.2,
466
+ "metadata": {
467
+ "source_turn": "t11",
468
+ "speaker": "assistant"
469
+ }
470
+ },
471
+ {
472
+ "id": "dc_c02_m12",
473
+ "type": "semantic",
474
+ "timestamp": "2026-06-07T13:00:00Z",
475
+ "content": "The manager for the PERSONAL_ASSISTANT task is George Lucas.",
476
+ "tags": [
477
+ "personal_assistant",
478
+ "manager"
479
+ ],
480
+ "importance": 0.8,
481
+ "metadata": {
482
+ "source_turn": "t12",
483
+ "speaker": "user"
484
+ }
485
+ }
486
+ ],
487
+ "questions": [
488
+ {
489
+ "question_id": "dc_c02_q1",
490
+ "category": "atomic_fact_recall",
491
+ "question": "Who is in charge of administrative coordination for the PERSONAL_ASSISTANT task?",
492
+ "expected_answer": "George Lucas",
493
+ "acceptable_answer_criteria": [
494
+ "George Lucas",
495
+ "George"
496
+ ],
497
+ "required_memory_ids": [
498
+ "dc_c02_m12"
499
+ ],
500
+ "forbidden_memory_ids": [],
501
+ "difficulty": "medium",
502
+ "architecture_bias_risk": "low",
503
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
504
+ },
505
+ {
506
+ "question_id": "dc_c02_q2",
507
+ "category": "atomic_fact_recall",
508
+ "question": "Who is in charge of administrative coordination for the PERSONAL_ASSISTANT task?",
509
+ "expected_answer": "George Lucas",
510
+ "acceptable_answer_criteria": [
511
+ "George Lucas",
512
+ "George"
513
+ ],
514
+ "required_memory_ids": [
515
+ "dc_c02_m12"
516
+ ],
517
+ "forbidden_memory_ids": [],
518
+ "difficulty": "medium",
519
+ "architecture_bias_risk": "low",
520
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
521
+ },
522
+ {
523
+ "question_id": "dc_c02_q3",
524
+ "category": "atomic_fact_recall",
525
+ "question": "Who is in charge of administrative coordination for the PERSONAL_ASSISTANT task?",
526
+ "expected_answer": "George Lucas",
527
+ "acceptable_answer_criteria": [
528
+ "George Lucas",
529
+ "George"
530
+ ],
531
+ "required_memory_ids": [
532
+ "dc_c02_m12"
533
+ ],
534
+ "forbidden_memory_ids": [],
535
+ "difficulty": "medium",
536
+ "architecture_bias_risk": "low",
537
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
538
+ },
539
+ {
540
+ "question_id": "dc_c02_q4",
541
+ "category": "atomic_fact_recall",
542
+ "question": "Who is in charge of administrative coordination for the PERSONAL_ASSISTANT task?",
543
+ "expected_answer": "George Lucas",
544
+ "acceptable_answer_criteria": [
545
+ "George Lucas",
546
+ "George"
547
+ ],
548
+ "required_memory_ids": [
549
+ "dc_c02_m12"
550
+ ],
551
+ "forbidden_memory_ids": [],
552
+ "difficulty": "medium",
553
+ "architecture_bias_risk": "low",
554
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
555
+ },
556
+ {
557
+ "question_id": "dc_c02_q5",
558
+ "category": "atomic_fact_recall",
559
+ "question": "Who is in charge of administrative coordination for the PERSONAL_ASSISTANT task?",
560
+ "expected_answer": "George Lucas",
561
+ "acceptable_answer_criteria": [
562
+ "George Lucas",
563
+ "George"
564
+ ],
565
+ "required_memory_ids": [
566
+ "dc_c02_m12"
567
+ ],
568
+ "forbidden_memory_ids": [],
569
+ "difficulty": "medium",
570
+ "architecture_bias_risk": "low",
571
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
572
+ },
573
+ {
574
+ "question_id": "dc_c02_q6",
575
+ "category": "atomic_fact_recall",
576
+ "question": "Who is in charge of administrative coordination for the PERSONAL_ASSISTANT task?",
577
+ "expected_answer": "George Lucas",
578
+ "acceptable_answer_criteria": [
579
+ "George Lucas",
580
+ "George"
581
+ ],
582
+ "required_memory_ids": [
583
+ "dc_c02_m12"
584
+ ],
585
+ "forbidden_memory_ids": [],
586
+ "difficulty": "medium",
587
+ "architecture_bias_risk": "low",
588
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
589
+ }
590
+ ]
591
+ },
592
+ {
593
+ "conversation_id": "dc_c03_research",
594
+ "agent_id": "dc_agent_research_03",
595
+ "domain": "research",
596
+ "memory_records": [
597
+ {
598
+ "id": "dc_c03_m01",
599
+ "type": "semantic",
600
+ "timestamp": "2026-06-01T10:00:00Z",
601
+ "content": "The main parameters designated for RESEARCH are value Alpha.",
602
+ "tags": [
603
+ "research",
604
+ "param"
605
+ ],
606
+ "importance": 0.8,
607
+ "metadata": {
608
+ "source_turn": "t1",
609
+ "speaker": "user"
610
+ }
611
+ },
612
+ {
613
+ "id": "dc_c03_m02",
614
+ "type": "semantic",
615
+ "timestamp": "2026-06-01T10:05:00Z",
616
+ "content": "The secondary configuration option for RESEARCH is value Beta.",
617
+ "tags": [
618
+ "research",
619
+ "param"
620
+ ],
621
+ "importance": 0.8,
622
+ "metadata": {
623
+ "source_turn": "t2",
624
+ "speaker": "user"
625
+ }
626
+ },
627
+ {
628
+ "id": "dc_c03_m03",
629
+ "type": "semantic",
630
+ "timestamp": "2026-06-01T10:10:00Z",
631
+ "content": "Distractor: The color of the RESEARCH report sheet is yellow.",
632
+ "tags": [
633
+ "noise"
634
+ ],
635
+ "importance": 0.2,
636
+ "metadata": {
637
+ "source_turn": "t3",
638
+ "speaker": "assistant"
639
+ }
640
+ },
641
+ {
642
+ "id": "dc_c03_m04",
643
+ "type": "semantic",
644
+ "timestamp": "2026-06-02T10:00:00Z",
645
+ "content": "On June 2, the main parameter configuration for RESEARCH was updated to Gamma.",
646
+ "tags": [
647
+ "research",
648
+ "param"
649
+ ],
650
+ "importance": 0.9,
651
+ "metadata": {
652
+ "source_turn": "t4",
653
+ "speaker": "user"
654
+ }
655
+ },
656
+ {
657
+ "id": "dc_c03_m05",
658
+ "type": "semantic",
659
+ "timestamp": "2026-06-03T11:00:00Z",
660
+ "content": "On June 3, the secondary configuration option for RESEARCH was changed to Delta.",
661
+ "tags": [
662
+ "research",
663
+ "param"
664
+ ],
665
+ "importance": 0.9,
666
+ "metadata": {
667
+ "source_turn": "t5",
668
+ "speaker": "user"
669
+ }
670
+ },
671
+ {
672
+ "id": "dc_c03_m06",
673
+ "type": "procedural",
674
+ "timestamp": "2026-06-04T09:00:00Z",
675
+ "content": "To apply modifications to the RESEARCH registry: 1. check credentials, 2. submit form, 3. wait for email.",
676
+ "tags": [
677
+ "research",
678
+ "process"
679
+ ],
680
+ "importance": 0.85,
681
+ "metadata": {
682
+ "source_turn": "t6",
683
+ "speaker": "user"
684
+ }
685
+ },
686
+ {
687
+ "id": "dc_c03_m07",
688
+ "type": "semantic",
689
+ "timestamp": "2026-06-04T09:05:00Z",
690
+ "content": "Distractor: Email server is active on port 25.",
691
+ "tags": [
692
+ "noise"
693
+ ],
694
+ "importance": 0.3,
695
+ "metadata": {
696
+ "source_turn": "t7",
697
+ "speaker": "assistant"
698
+ }
699
+ },
700
+ {
701
+ "id": "dc_c03_m08",
702
+ "type": "semantic",
703
+ "timestamp": "2026-06-05T10:00:00Z",
704
+ "content": "The registry workspace for RESEARCH is located at building Room 10.",
705
+ "tags": [
706
+ "research"
707
+ ],
708
+ "importance": 0.8,
709
+ "metadata": {
710
+ "source_turn": "t8",
711
+ "speaker": "user"
712
+ },
713
+ "associations": [
714
+ {
715
+ "target_id": "dc_c03_m06",
716
+ "strength": 0.7
717
+ }
718
+ ]
719
+ },
720
+ {
721
+ "id": "dc_c03_m09",
722
+ "type": "semantic",
723
+ "timestamp": "2026-06-05T10:10:00Z",
724
+ "content": "The client contact liaison for RESEARCH is Mary Jane.",
725
+ "tags": [
726
+ "research"
727
+ ],
728
+ "importance": 0.7,
729
+ "metadata": {
730
+ "source_turn": "t9",
731
+ "speaker": "user"
732
+ }
733
+ },
734
+ {
735
+ "id": "dc_c03_m10",
736
+ "type": "semantic",
737
+ "timestamp": "2026-06-06T12:00:00Z",
738
+ "content": "On June 6, the client contact liaison for RESEARCH was changed to Peter Parker.",
739
+ "tags": [
740
+ "research"
741
+ ],
742
+ "importance": 0.9,
743
+ "metadata": {
744
+ "source_turn": "t10",
745
+ "speaker": "user"
746
+ }
747
+ },
748
+ {
749
+ "id": "dc_c03_m11",
750
+ "type": "semantic",
751
+ "timestamp": "2026-06-06T12:05:00Z",
752
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
753
+ "tags": [
754
+ "noise"
755
+ ],
756
+ "importance": 0.2,
757
+ "metadata": {
758
+ "source_turn": "t11",
759
+ "speaker": "assistant"
760
+ }
761
+ },
762
+ {
763
+ "id": "dc_c03_m12",
764
+ "type": "semantic",
765
+ "timestamp": "2026-06-07T13:00:00Z",
766
+ "content": "The manager for the RESEARCH task is George Lucas.",
767
+ "tags": [
768
+ "research",
769
+ "manager"
770
+ ],
771
+ "importance": 0.8,
772
+ "metadata": {
773
+ "source_turn": "t12",
774
+ "speaker": "user"
775
+ }
776
+ }
777
+ ],
778
+ "questions": [
779
+ {
780
+ "question_id": "dc_c03_q1",
781
+ "category": "atomic_fact_recall",
782
+ "question": "Who is in charge of administrative coordination for the RESEARCH task?",
783
+ "expected_answer": "George Lucas",
784
+ "acceptable_answer_criteria": [
785
+ "George Lucas",
786
+ "George"
787
+ ],
788
+ "required_memory_ids": [
789
+ "dc_c03_m12"
790
+ ],
791
+ "forbidden_memory_ids": [],
792
+ "difficulty": "medium",
793
+ "architecture_bias_risk": "low",
794
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
795
+ },
796
+ {
797
+ "question_id": "dc_c03_q2",
798
+ "category": "atomic_fact_recall",
799
+ "question": "Who is in charge of administrative coordination for the RESEARCH task?",
800
+ "expected_answer": "George Lucas",
801
+ "acceptable_answer_criteria": [
802
+ "George Lucas",
803
+ "George"
804
+ ],
805
+ "required_memory_ids": [
806
+ "dc_c03_m12"
807
+ ],
808
+ "forbidden_memory_ids": [],
809
+ "difficulty": "medium",
810
+ "architecture_bias_risk": "low",
811
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
812
+ },
813
+ {
814
+ "question_id": "dc_c03_q3",
815
+ "category": "atomic_fact_recall",
816
+ "question": "Who is in charge of administrative coordination for the RESEARCH task?",
817
+ "expected_answer": "George Lucas",
818
+ "acceptable_answer_criteria": [
819
+ "George Lucas",
820
+ "George"
821
+ ],
822
+ "required_memory_ids": [
823
+ "dc_c03_m12"
824
+ ],
825
+ "forbidden_memory_ids": [],
826
+ "difficulty": "medium",
827
+ "architecture_bias_risk": "low",
828
+ "fairness_note": "Verifies provider-neutral evaluation for category atomic_fact_recall."
829
+ },
830
+ {
831
+ "question_id": "dc_c03_q4",
832
+ "category": "paraphrased_semantic_recall",
833
+ "question": "What is the primary variable value currently designated for the RESEARCH project?",
834
+ "expected_answer": "Gamma",
835
+ "acceptable_answer_criteria": [
836
+ "Gamma"
837
+ ],
838
+ "required_memory_ids": [
839
+ "dc_c03_m04"
840
+ ],
841
+ "forbidden_memory_ids": [
842
+ "dc_c03_m01"
843
+ ],
844
+ "difficulty": "medium",
845
+ "architecture_bias_risk": "low",
846
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
847
+ },
848
+ {
849
+ "question_id": "dc_c03_q5",
850
+ "category": "paraphrased_semantic_recall",
851
+ "question": "What is the primary variable value currently designated for the RESEARCH project?",
852
+ "expected_answer": "Gamma",
853
+ "acceptable_answer_criteria": [
854
+ "Gamma"
855
+ ],
856
+ "required_memory_ids": [
857
+ "dc_c03_m04"
858
+ ],
859
+ "forbidden_memory_ids": [
860
+ "dc_c03_m01"
861
+ ],
862
+ "difficulty": "medium",
863
+ "architecture_bias_risk": "low",
864
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
865
+ },
866
+ {
867
+ "question_id": "dc_c03_q6",
868
+ "category": "paraphrased_semantic_recall",
869
+ "question": "What is the primary variable value currently designated for the RESEARCH project?",
870
+ "expected_answer": "Gamma",
871
+ "acceptable_answer_criteria": [
872
+ "Gamma"
873
+ ],
874
+ "required_memory_ids": [
875
+ "dc_c03_m04"
876
+ ],
877
+ "forbidden_memory_ids": [
878
+ "dc_c03_m01"
879
+ ],
880
+ "difficulty": "medium",
881
+ "architecture_bias_risk": "low",
882
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
883
+ }
884
+ ]
885
+ },
886
+ {
887
+ "conversation_id": "dc_c04_travel",
888
+ "agent_id": "dc_agent_travel_04",
889
+ "domain": "travel",
890
+ "memory_records": [
891
+ {
892
+ "id": "dc_c04_m01",
893
+ "type": "semantic",
894
+ "timestamp": "2026-06-01T10:00:00Z",
895
+ "content": "The main parameters designated for TRAVEL are value Alpha.",
896
+ "tags": [
897
+ "travel",
898
+ "param"
899
+ ],
900
+ "importance": 0.8,
901
+ "metadata": {
902
+ "source_turn": "t1",
903
+ "speaker": "user"
904
+ }
905
+ },
906
+ {
907
+ "id": "dc_c04_m02",
908
+ "type": "semantic",
909
+ "timestamp": "2026-06-01T10:05:00Z",
910
+ "content": "The secondary configuration option for TRAVEL is value Beta.",
911
+ "tags": [
912
+ "travel",
913
+ "param"
914
+ ],
915
+ "importance": 0.8,
916
+ "metadata": {
917
+ "source_turn": "t2",
918
+ "speaker": "user"
919
+ }
920
+ },
921
+ {
922
+ "id": "dc_c04_m03",
923
+ "type": "semantic",
924
+ "timestamp": "2026-06-01T10:10:00Z",
925
+ "content": "Distractor: The color of the TRAVEL report sheet is yellow.",
926
+ "tags": [
927
+ "noise"
928
+ ],
929
+ "importance": 0.2,
930
+ "metadata": {
931
+ "source_turn": "t3",
932
+ "speaker": "assistant"
933
+ }
934
+ },
935
+ {
936
+ "id": "dc_c04_m04",
937
+ "type": "semantic",
938
+ "timestamp": "2026-06-02T10:00:00Z",
939
+ "content": "On June 2, the main parameter configuration for TRAVEL was updated to Gamma.",
940
+ "tags": [
941
+ "travel",
942
+ "param"
943
+ ],
944
+ "importance": 0.9,
945
+ "metadata": {
946
+ "source_turn": "t4",
947
+ "speaker": "user"
948
+ }
949
+ },
950
+ {
951
+ "id": "dc_c04_m05",
952
+ "type": "semantic",
953
+ "timestamp": "2026-06-03T11:00:00Z",
954
+ "content": "On June 3, the secondary configuration option for TRAVEL was changed to Delta.",
955
+ "tags": [
956
+ "travel",
957
+ "param"
958
+ ],
959
+ "importance": 0.9,
960
+ "metadata": {
961
+ "source_turn": "t5",
962
+ "speaker": "user"
963
+ }
964
+ },
965
+ {
966
+ "id": "dc_c04_m06",
967
+ "type": "procedural",
968
+ "timestamp": "2026-06-04T09:00:00Z",
969
+ "content": "To apply modifications to the TRAVEL registry: 1. check credentials, 2. submit form, 3. wait for email.",
970
+ "tags": [
971
+ "travel",
972
+ "process"
973
+ ],
974
+ "importance": 0.85,
975
+ "metadata": {
976
+ "source_turn": "t6",
977
+ "speaker": "user"
978
+ }
979
+ },
980
+ {
981
+ "id": "dc_c04_m07",
982
+ "type": "semantic",
983
+ "timestamp": "2026-06-04T09:05:00Z",
984
+ "content": "Distractor: Email server is active on port 25.",
985
+ "tags": [
986
+ "noise"
987
+ ],
988
+ "importance": 0.3,
989
+ "metadata": {
990
+ "source_turn": "t7",
991
+ "speaker": "assistant"
992
+ }
993
+ },
994
+ {
995
+ "id": "dc_c04_m08",
996
+ "type": "semantic",
997
+ "timestamp": "2026-06-05T10:00:00Z",
998
+ "content": "The registry workspace for TRAVEL is located at building Room 10.",
999
+ "tags": [
1000
+ "travel"
1001
+ ],
1002
+ "importance": 0.8,
1003
+ "metadata": {
1004
+ "source_turn": "t8",
1005
+ "speaker": "user"
1006
+ },
1007
+ "associations": [
1008
+ {
1009
+ "target_id": "dc_c04_m06",
1010
+ "strength": 0.7
1011
+ }
1012
+ ]
1013
+ },
1014
+ {
1015
+ "id": "dc_c04_m09",
1016
+ "type": "semantic",
1017
+ "timestamp": "2026-06-05T10:10:00Z",
1018
+ "content": "The client contact liaison for TRAVEL is Mary Jane.",
1019
+ "tags": [
1020
+ "travel"
1021
+ ],
1022
+ "importance": 0.7,
1023
+ "metadata": {
1024
+ "source_turn": "t9",
1025
+ "speaker": "user"
1026
+ }
1027
+ },
1028
+ {
1029
+ "id": "dc_c04_m10",
1030
+ "type": "semantic",
1031
+ "timestamp": "2026-06-06T12:00:00Z",
1032
+ "content": "On June 6, the client contact liaison for TRAVEL was changed to Peter Parker.",
1033
+ "tags": [
1034
+ "travel"
1035
+ ],
1036
+ "importance": 0.9,
1037
+ "metadata": {
1038
+ "source_turn": "t10",
1039
+ "speaker": "user"
1040
+ }
1041
+ },
1042
+ {
1043
+ "id": "dc_c04_m11",
1044
+ "type": "semantic",
1045
+ "timestamp": "2026-06-06T12:05:00Z",
1046
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
1047
+ "tags": [
1048
+ "noise"
1049
+ ],
1050
+ "importance": 0.2,
1051
+ "metadata": {
1052
+ "source_turn": "t11",
1053
+ "speaker": "assistant"
1054
+ }
1055
+ },
1056
+ {
1057
+ "id": "dc_c04_m12",
1058
+ "type": "semantic",
1059
+ "timestamp": "2026-06-07T13:00:00Z",
1060
+ "content": "The manager for the TRAVEL task is George Lucas.",
1061
+ "tags": [
1062
+ "travel",
1063
+ "manager"
1064
+ ],
1065
+ "importance": 0.8,
1066
+ "metadata": {
1067
+ "source_turn": "t12",
1068
+ "speaker": "user"
1069
+ }
1070
+ }
1071
+ ],
1072
+ "questions": [
1073
+ {
1074
+ "question_id": "dc_c04_q1",
1075
+ "category": "paraphrased_semantic_recall",
1076
+ "question": "What is the primary variable value currently designated for the TRAVEL project?",
1077
+ "expected_answer": "Gamma",
1078
+ "acceptable_answer_criteria": [
1079
+ "Gamma"
1080
+ ],
1081
+ "required_memory_ids": [
1082
+ "dc_c04_m04"
1083
+ ],
1084
+ "forbidden_memory_ids": [
1085
+ "dc_c04_m01"
1086
+ ],
1087
+ "difficulty": "medium",
1088
+ "architecture_bias_risk": "low",
1089
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1090
+ },
1091
+ {
1092
+ "question_id": "dc_c04_q2",
1093
+ "category": "paraphrased_semantic_recall",
1094
+ "question": "What is the primary variable value currently designated for the TRAVEL project?",
1095
+ "expected_answer": "Gamma",
1096
+ "acceptable_answer_criteria": [
1097
+ "Gamma"
1098
+ ],
1099
+ "required_memory_ids": [
1100
+ "dc_c04_m04"
1101
+ ],
1102
+ "forbidden_memory_ids": [
1103
+ "dc_c04_m01"
1104
+ ],
1105
+ "difficulty": "medium",
1106
+ "architecture_bias_risk": "low",
1107
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1108
+ },
1109
+ {
1110
+ "question_id": "dc_c04_q3",
1111
+ "category": "paraphrased_semantic_recall",
1112
+ "question": "What is the primary variable value currently designated for the TRAVEL project?",
1113
+ "expected_answer": "Gamma",
1114
+ "acceptable_answer_criteria": [
1115
+ "Gamma"
1116
+ ],
1117
+ "required_memory_ids": [
1118
+ "dc_c04_m04"
1119
+ ],
1120
+ "forbidden_memory_ids": [
1121
+ "dc_c04_m01"
1122
+ ],
1123
+ "difficulty": "medium",
1124
+ "architecture_bias_risk": "low",
1125
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1126
+ },
1127
+ {
1128
+ "question_id": "dc_c04_q4",
1129
+ "category": "paraphrased_semantic_recall",
1130
+ "question": "What is the primary variable value currently designated for the TRAVEL project?",
1131
+ "expected_answer": "Gamma",
1132
+ "acceptable_answer_criteria": [
1133
+ "Gamma"
1134
+ ],
1135
+ "required_memory_ids": [
1136
+ "dc_c04_m04"
1137
+ ],
1138
+ "forbidden_memory_ids": [
1139
+ "dc_c04_m01"
1140
+ ],
1141
+ "difficulty": "medium",
1142
+ "architecture_bias_risk": "low",
1143
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1144
+ },
1145
+ {
1146
+ "question_id": "dc_c04_q5",
1147
+ "category": "paraphrased_semantic_recall",
1148
+ "question": "What is the primary variable value currently designated for the TRAVEL project?",
1149
+ "expected_answer": "Gamma",
1150
+ "acceptable_answer_criteria": [
1151
+ "Gamma"
1152
+ ],
1153
+ "required_memory_ids": [
1154
+ "dc_c04_m04"
1155
+ ],
1156
+ "forbidden_memory_ids": [
1157
+ "dc_c04_m01"
1158
+ ],
1159
+ "difficulty": "medium",
1160
+ "architecture_bias_risk": "low",
1161
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1162
+ },
1163
+ {
1164
+ "question_id": "dc_c04_q6",
1165
+ "category": "paraphrased_semantic_recall",
1166
+ "question": "What is the primary variable value currently designated for the TRAVEL project?",
1167
+ "expected_answer": "Gamma",
1168
+ "acceptable_answer_criteria": [
1169
+ "Gamma"
1170
+ ],
1171
+ "required_memory_ids": [
1172
+ "dc_c04_m04"
1173
+ ],
1174
+ "forbidden_memory_ids": [
1175
+ "dc_c04_m01"
1176
+ ],
1177
+ "difficulty": "medium",
1178
+ "architecture_bias_risk": "low",
1179
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1180
+ }
1181
+ ]
1182
+ },
1183
+ {
1184
+ "conversation_id": "dc_c05_health_admin",
1185
+ "agent_id": "dc_agent_health_admin_05",
1186
+ "domain": "health_admin",
1187
+ "memory_records": [
1188
+ {
1189
+ "id": "dc_c05_m01",
1190
+ "type": "semantic",
1191
+ "timestamp": "2026-06-01T10:00:00Z",
1192
+ "content": "The main parameters designated for HEALTH_ADMIN are value Alpha.",
1193
+ "tags": [
1194
+ "health_admin",
1195
+ "param"
1196
+ ],
1197
+ "importance": 0.8,
1198
+ "metadata": {
1199
+ "source_turn": "t1",
1200
+ "speaker": "user"
1201
+ }
1202
+ },
1203
+ {
1204
+ "id": "dc_c05_m02",
1205
+ "type": "semantic",
1206
+ "timestamp": "2026-06-01T10:05:00Z",
1207
+ "content": "The secondary configuration option for HEALTH_ADMIN is value Beta.",
1208
+ "tags": [
1209
+ "health_admin",
1210
+ "param"
1211
+ ],
1212
+ "importance": 0.8,
1213
+ "metadata": {
1214
+ "source_turn": "t2",
1215
+ "speaker": "user"
1216
+ }
1217
+ },
1218
+ {
1219
+ "id": "dc_c05_m03",
1220
+ "type": "semantic",
1221
+ "timestamp": "2026-06-01T10:10:00Z",
1222
+ "content": "Distractor: The color of the HEALTH_ADMIN report sheet is yellow.",
1223
+ "tags": [
1224
+ "noise"
1225
+ ],
1226
+ "importance": 0.2,
1227
+ "metadata": {
1228
+ "source_turn": "t3",
1229
+ "speaker": "assistant"
1230
+ }
1231
+ },
1232
+ {
1233
+ "id": "dc_c05_m04",
1234
+ "type": "semantic",
1235
+ "timestamp": "2026-06-02T10:00:00Z",
1236
+ "content": "On June 2, the main parameter configuration for HEALTH_ADMIN was updated to Gamma.",
1237
+ "tags": [
1238
+ "health_admin",
1239
+ "param"
1240
+ ],
1241
+ "importance": 0.9,
1242
+ "metadata": {
1243
+ "source_turn": "t4",
1244
+ "speaker": "user"
1245
+ }
1246
+ },
1247
+ {
1248
+ "id": "dc_c05_m05",
1249
+ "type": "semantic",
1250
+ "timestamp": "2026-06-03T11:00:00Z",
1251
+ "content": "On June 3, the secondary configuration option for HEALTH_ADMIN was changed to Delta.",
1252
+ "tags": [
1253
+ "health_admin",
1254
+ "param"
1255
+ ],
1256
+ "importance": 0.9,
1257
+ "metadata": {
1258
+ "source_turn": "t5",
1259
+ "speaker": "user"
1260
+ }
1261
+ },
1262
+ {
1263
+ "id": "dc_c05_m06",
1264
+ "type": "procedural",
1265
+ "timestamp": "2026-06-04T09:00:00Z",
1266
+ "content": "To apply modifications to the HEALTH_ADMIN registry: 1. check credentials, 2. submit form, 3. wait for email.",
1267
+ "tags": [
1268
+ "health_admin",
1269
+ "process"
1270
+ ],
1271
+ "importance": 0.85,
1272
+ "metadata": {
1273
+ "source_turn": "t6",
1274
+ "speaker": "user"
1275
+ }
1276
+ },
1277
+ {
1278
+ "id": "dc_c05_m07",
1279
+ "type": "semantic",
1280
+ "timestamp": "2026-06-04T09:05:00Z",
1281
+ "content": "Distractor: Email server is active on port 25.",
1282
+ "tags": [
1283
+ "noise"
1284
+ ],
1285
+ "importance": 0.3,
1286
+ "metadata": {
1287
+ "source_turn": "t7",
1288
+ "speaker": "assistant"
1289
+ }
1290
+ },
1291
+ {
1292
+ "id": "dc_c05_m08",
1293
+ "type": "semantic",
1294
+ "timestamp": "2026-06-05T10:00:00Z",
1295
+ "content": "The registry workspace for HEALTH_ADMIN is located at building Room 10.",
1296
+ "tags": [
1297
+ "health_admin"
1298
+ ],
1299
+ "importance": 0.8,
1300
+ "metadata": {
1301
+ "source_turn": "t8",
1302
+ "speaker": "user"
1303
+ },
1304
+ "associations": [
1305
+ {
1306
+ "target_id": "dc_c05_m06",
1307
+ "strength": 0.7
1308
+ }
1309
+ ]
1310
+ },
1311
+ {
1312
+ "id": "dc_c05_m09",
1313
+ "type": "semantic",
1314
+ "timestamp": "2026-06-05T10:10:00Z",
1315
+ "content": "The client contact liaison for HEALTH_ADMIN is Mary Jane.",
1316
+ "tags": [
1317
+ "health_admin"
1318
+ ],
1319
+ "importance": 0.7,
1320
+ "metadata": {
1321
+ "source_turn": "t9",
1322
+ "speaker": "user"
1323
+ }
1324
+ },
1325
+ {
1326
+ "id": "dc_c05_m10",
1327
+ "type": "semantic",
1328
+ "timestamp": "2026-06-06T12:00:00Z",
1329
+ "content": "On June 6, the client contact liaison for HEALTH_ADMIN was changed to Peter Parker.",
1330
+ "tags": [
1331
+ "health_admin"
1332
+ ],
1333
+ "importance": 0.9,
1334
+ "metadata": {
1335
+ "source_turn": "t10",
1336
+ "speaker": "user"
1337
+ }
1338
+ },
1339
+ {
1340
+ "id": "dc_c05_m11",
1341
+ "type": "semantic",
1342
+ "timestamp": "2026-06-06T12:05:00Z",
1343
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
1344
+ "tags": [
1345
+ "noise"
1346
+ ],
1347
+ "importance": 0.2,
1348
+ "metadata": {
1349
+ "source_turn": "t11",
1350
+ "speaker": "assistant"
1351
+ }
1352
+ },
1353
+ {
1354
+ "id": "dc_c05_m12",
1355
+ "type": "semantic",
1356
+ "timestamp": "2026-06-07T13:00:00Z",
1357
+ "content": "The manager for the HEALTH_ADMIN task is George Lucas.",
1358
+ "tags": [
1359
+ "health_admin",
1360
+ "manager"
1361
+ ],
1362
+ "importance": 0.8,
1363
+ "metadata": {
1364
+ "source_turn": "t12",
1365
+ "speaker": "user"
1366
+ }
1367
+ }
1368
+ ],
1369
+ "questions": [
1370
+ {
1371
+ "question_id": "dc_c05_q1",
1372
+ "category": "paraphrased_semantic_recall",
1373
+ "question": "What is the primary variable value currently designated for the HEALTH_ADMIN project?",
1374
+ "expected_answer": "Gamma",
1375
+ "acceptable_answer_criteria": [
1376
+ "Gamma"
1377
+ ],
1378
+ "required_memory_ids": [
1379
+ "dc_c05_m04"
1380
+ ],
1381
+ "forbidden_memory_ids": [
1382
+ "dc_c05_m01"
1383
+ ],
1384
+ "difficulty": "medium",
1385
+ "architecture_bias_risk": "low",
1386
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1387
+ },
1388
+ {
1389
+ "question_id": "dc_c05_q2",
1390
+ "category": "paraphrased_semantic_recall",
1391
+ "question": "What is the primary variable value currently designated for the HEALTH_ADMIN project?",
1392
+ "expected_answer": "Gamma",
1393
+ "acceptable_answer_criteria": [
1394
+ "Gamma"
1395
+ ],
1396
+ "required_memory_ids": [
1397
+ "dc_c05_m04"
1398
+ ],
1399
+ "forbidden_memory_ids": [
1400
+ "dc_c05_m01"
1401
+ ],
1402
+ "difficulty": "medium",
1403
+ "architecture_bias_risk": "low",
1404
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1405
+ },
1406
+ {
1407
+ "question_id": "dc_c05_q3",
1408
+ "category": "paraphrased_semantic_recall",
1409
+ "question": "What is the primary variable value currently designated for the HEALTH_ADMIN project?",
1410
+ "expected_answer": "Gamma",
1411
+ "acceptable_answer_criteria": [
1412
+ "Gamma"
1413
+ ],
1414
+ "required_memory_ids": [
1415
+ "dc_c05_m04"
1416
+ ],
1417
+ "forbidden_memory_ids": [
1418
+ "dc_c05_m01"
1419
+ ],
1420
+ "difficulty": "medium",
1421
+ "architecture_bias_risk": "low",
1422
+ "fairness_note": "Verifies provider-neutral evaluation for category paraphrased_semantic_recall."
1423
+ },
1424
+ {
1425
+ "question_id": "dc_c05_q4",
1426
+ "category": "temporal_update",
1427
+ "question": "What is the secondary parameter value active for HEALTH_ADMIN?",
1428
+ "expected_answer": "Delta",
1429
+ "acceptable_answer_criteria": [
1430
+ "Delta"
1431
+ ],
1432
+ "required_memory_ids": [
1433
+ "dc_c05_m05"
1434
+ ],
1435
+ "forbidden_memory_ids": [
1436
+ "dc_c05_m02"
1437
+ ],
1438
+ "difficulty": "medium",
1439
+ "architecture_bias_risk": "low",
1440
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1441
+ },
1442
+ {
1443
+ "question_id": "dc_c05_q5",
1444
+ "category": "temporal_update",
1445
+ "question": "What is the secondary parameter value active for HEALTH_ADMIN?",
1446
+ "expected_answer": "Delta",
1447
+ "acceptable_answer_criteria": [
1448
+ "Delta"
1449
+ ],
1450
+ "required_memory_ids": [
1451
+ "dc_c05_m05"
1452
+ ],
1453
+ "forbidden_memory_ids": [
1454
+ "dc_c05_m02"
1455
+ ],
1456
+ "difficulty": "medium",
1457
+ "architecture_bias_risk": "low",
1458
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1459
+ },
1460
+ {
1461
+ "question_id": "dc_c05_q6",
1462
+ "category": "temporal_update",
1463
+ "question": "What is the secondary parameter value active for HEALTH_ADMIN?",
1464
+ "expected_answer": "Delta",
1465
+ "acceptable_answer_criteria": [
1466
+ "Delta"
1467
+ ],
1468
+ "required_memory_ids": [
1469
+ "dc_c05_m05"
1470
+ ],
1471
+ "forbidden_memory_ids": [
1472
+ "dc_c05_m02"
1473
+ ],
1474
+ "difficulty": "medium",
1475
+ "architecture_bias_risk": "low",
1476
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1477
+ }
1478
+ ]
1479
+ },
1480
+ {
1481
+ "conversation_id": "dc_c06_finance_admin",
1482
+ "agent_id": "dc_agent_finance_admin_06",
1483
+ "domain": "finance_admin",
1484
+ "memory_records": [
1485
+ {
1486
+ "id": "dc_c06_m01",
1487
+ "type": "semantic",
1488
+ "timestamp": "2026-06-01T10:00:00Z",
1489
+ "content": "The main parameters designated for FINANCE_ADMIN are value Alpha.",
1490
+ "tags": [
1491
+ "finance_admin",
1492
+ "param"
1493
+ ],
1494
+ "importance": 0.8,
1495
+ "metadata": {
1496
+ "source_turn": "t1",
1497
+ "speaker": "user"
1498
+ }
1499
+ },
1500
+ {
1501
+ "id": "dc_c06_m02",
1502
+ "type": "semantic",
1503
+ "timestamp": "2026-06-01T10:05:00Z",
1504
+ "content": "The secondary configuration option for FINANCE_ADMIN is value Beta.",
1505
+ "tags": [
1506
+ "finance_admin",
1507
+ "param"
1508
+ ],
1509
+ "importance": 0.8,
1510
+ "metadata": {
1511
+ "source_turn": "t2",
1512
+ "speaker": "user"
1513
+ }
1514
+ },
1515
+ {
1516
+ "id": "dc_c06_m03",
1517
+ "type": "semantic",
1518
+ "timestamp": "2026-06-01T10:10:00Z",
1519
+ "content": "Distractor: The color of the FINANCE_ADMIN report sheet is yellow.",
1520
+ "tags": [
1521
+ "noise"
1522
+ ],
1523
+ "importance": 0.2,
1524
+ "metadata": {
1525
+ "source_turn": "t3",
1526
+ "speaker": "assistant"
1527
+ }
1528
+ },
1529
+ {
1530
+ "id": "dc_c06_m04",
1531
+ "type": "semantic",
1532
+ "timestamp": "2026-06-02T10:00:00Z",
1533
+ "content": "On June 2, the main parameter configuration for FINANCE_ADMIN was updated to Gamma.",
1534
+ "tags": [
1535
+ "finance_admin",
1536
+ "param"
1537
+ ],
1538
+ "importance": 0.9,
1539
+ "metadata": {
1540
+ "source_turn": "t4",
1541
+ "speaker": "user"
1542
+ }
1543
+ },
1544
+ {
1545
+ "id": "dc_c06_m05",
1546
+ "type": "semantic",
1547
+ "timestamp": "2026-06-03T11:00:00Z",
1548
+ "content": "On June 3, the secondary configuration option for FINANCE_ADMIN was changed to Delta.",
1549
+ "tags": [
1550
+ "finance_admin",
1551
+ "param"
1552
+ ],
1553
+ "importance": 0.9,
1554
+ "metadata": {
1555
+ "source_turn": "t5",
1556
+ "speaker": "user"
1557
+ }
1558
+ },
1559
+ {
1560
+ "id": "dc_c06_m06",
1561
+ "type": "procedural",
1562
+ "timestamp": "2026-06-04T09:00:00Z",
1563
+ "content": "To apply modifications to the FINANCE_ADMIN registry: 1. check credentials, 2. submit form, 3. wait for email.",
1564
+ "tags": [
1565
+ "finance_admin",
1566
+ "process"
1567
+ ],
1568
+ "importance": 0.85,
1569
+ "metadata": {
1570
+ "source_turn": "t6",
1571
+ "speaker": "user"
1572
+ }
1573
+ },
1574
+ {
1575
+ "id": "dc_c06_m07",
1576
+ "type": "semantic",
1577
+ "timestamp": "2026-06-04T09:05:00Z",
1578
+ "content": "Distractor: Email server is active on port 25.",
1579
+ "tags": [
1580
+ "noise"
1581
+ ],
1582
+ "importance": 0.3,
1583
+ "metadata": {
1584
+ "source_turn": "t7",
1585
+ "speaker": "assistant"
1586
+ }
1587
+ },
1588
+ {
1589
+ "id": "dc_c06_m08",
1590
+ "type": "semantic",
1591
+ "timestamp": "2026-06-05T10:00:00Z",
1592
+ "content": "The registry workspace for FINANCE_ADMIN is located at building Room 10.",
1593
+ "tags": [
1594
+ "finance_admin"
1595
+ ],
1596
+ "importance": 0.8,
1597
+ "metadata": {
1598
+ "source_turn": "t8",
1599
+ "speaker": "user"
1600
+ },
1601
+ "associations": [
1602
+ {
1603
+ "target_id": "dc_c06_m06",
1604
+ "strength": 0.7
1605
+ }
1606
+ ]
1607
+ },
1608
+ {
1609
+ "id": "dc_c06_m09",
1610
+ "type": "semantic",
1611
+ "timestamp": "2026-06-05T10:10:00Z",
1612
+ "content": "The client contact liaison for FINANCE_ADMIN is Mary Jane.",
1613
+ "tags": [
1614
+ "finance_admin"
1615
+ ],
1616
+ "importance": 0.7,
1617
+ "metadata": {
1618
+ "source_turn": "t9",
1619
+ "speaker": "user"
1620
+ }
1621
+ },
1622
+ {
1623
+ "id": "dc_c06_m10",
1624
+ "type": "semantic",
1625
+ "timestamp": "2026-06-06T12:00:00Z",
1626
+ "content": "On June 6, the client contact liaison for FINANCE_ADMIN was changed to Peter Parker.",
1627
+ "tags": [
1628
+ "finance_admin"
1629
+ ],
1630
+ "importance": 0.9,
1631
+ "metadata": {
1632
+ "source_turn": "t10",
1633
+ "speaker": "user"
1634
+ }
1635
+ },
1636
+ {
1637
+ "id": "dc_c06_m11",
1638
+ "type": "semantic",
1639
+ "timestamp": "2026-06-06T12:05:00Z",
1640
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
1641
+ "tags": [
1642
+ "noise"
1643
+ ],
1644
+ "importance": 0.2,
1645
+ "metadata": {
1646
+ "source_turn": "t11",
1647
+ "speaker": "assistant"
1648
+ }
1649
+ },
1650
+ {
1651
+ "id": "dc_c06_m12",
1652
+ "type": "semantic",
1653
+ "timestamp": "2026-06-07T13:00:00Z",
1654
+ "content": "The manager for the FINANCE_ADMIN task is George Lucas.",
1655
+ "tags": [
1656
+ "finance_admin",
1657
+ "manager"
1658
+ ],
1659
+ "importance": 0.8,
1660
+ "metadata": {
1661
+ "source_turn": "t12",
1662
+ "speaker": "user"
1663
+ }
1664
+ }
1665
+ ],
1666
+ "questions": [
1667
+ {
1668
+ "question_id": "dc_c06_q1",
1669
+ "category": "temporal_update",
1670
+ "question": "What is the secondary parameter value active for FINANCE_ADMIN?",
1671
+ "expected_answer": "Delta",
1672
+ "acceptable_answer_criteria": [
1673
+ "Delta"
1674
+ ],
1675
+ "required_memory_ids": [
1676
+ "dc_c06_m05"
1677
+ ],
1678
+ "forbidden_memory_ids": [
1679
+ "dc_c06_m02"
1680
+ ],
1681
+ "difficulty": "medium",
1682
+ "architecture_bias_risk": "low",
1683
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1684
+ },
1685
+ {
1686
+ "question_id": "dc_c06_q2",
1687
+ "category": "temporal_update",
1688
+ "question": "What is the secondary parameter value active for FINANCE_ADMIN?",
1689
+ "expected_answer": "Delta",
1690
+ "acceptable_answer_criteria": [
1691
+ "Delta"
1692
+ ],
1693
+ "required_memory_ids": [
1694
+ "dc_c06_m05"
1695
+ ],
1696
+ "forbidden_memory_ids": [
1697
+ "dc_c06_m02"
1698
+ ],
1699
+ "difficulty": "medium",
1700
+ "architecture_bias_risk": "low",
1701
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1702
+ },
1703
+ {
1704
+ "question_id": "dc_c06_q3",
1705
+ "category": "temporal_update",
1706
+ "question": "What is the secondary parameter value active for FINANCE_ADMIN?",
1707
+ "expected_answer": "Delta",
1708
+ "acceptable_answer_criteria": [
1709
+ "Delta"
1710
+ ],
1711
+ "required_memory_ids": [
1712
+ "dc_c06_m05"
1713
+ ],
1714
+ "forbidden_memory_ids": [
1715
+ "dc_c06_m02"
1716
+ ],
1717
+ "difficulty": "medium",
1718
+ "architecture_bias_risk": "low",
1719
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1720
+ },
1721
+ {
1722
+ "question_id": "dc_c06_q4",
1723
+ "category": "temporal_update",
1724
+ "question": "What is the secondary parameter value active for FINANCE_ADMIN?",
1725
+ "expected_answer": "Delta",
1726
+ "acceptable_answer_criteria": [
1727
+ "Delta"
1728
+ ],
1729
+ "required_memory_ids": [
1730
+ "dc_c06_m05"
1731
+ ],
1732
+ "forbidden_memory_ids": [
1733
+ "dc_c06_m02"
1734
+ ],
1735
+ "difficulty": "medium",
1736
+ "architecture_bias_risk": "low",
1737
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1738
+ },
1739
+ {
1740
+ "question_id": "dc_c06_q5",
1741
+ "category": "temporal_update",
1742
+ "question": "What is the secondary parameter value active for FINANCE_ADMIN?",
1743
+ "expected_answer": "Delta",
1744
+ "acceptable_answer_criteria": [
1745
+ "Delta"
1746
+ ],
1747
+ "required_memory_ids": [
1748
+ "dc_c06_m05"
1749
+ ],
1750
+ "forbidden_memory_ids": [
1751
+ "dc_c06_m02"
1752
+ ],
1753
+ "difficulty": "medium",
1754
+ "architecture_bias_risk": "low",
1755
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1756
+ },
1757
+ {
1758
+ "question_id": "dc_c06_q6",
1759
+ "category": "temporal_update",
1760
+ "question": "What is the secondary parameter value active for FINANCE_ADMIN?",
1761
+ "expected_answer": "Delta",
1762
+ "acceptable_answer_criteria": [
1763
+ "Delta"
1764
+ ],
1765
+ "required_memory_ids": [
1766
+ "dc_c06_m05"
1767
+ ],
1768
+ "forbidden_memory_ids": [
1769
+ "dc_c06_m02"
1770
+ ],
1771
+ "difficulty": "medium",
1772
+ "architecture_bias_risk": "low",
1773
+ "fairness_note": "Verifies provider-neutral evaluation for category temporal_update."
1774
+ }
1775
+ ]
1776
+ },
1777
+ {
1778
+ "conversation_id": "dc_c07_education",
1779
+ "agent_id": "dc_agent_education_07",
1780
+ "domain": "education",
1781
+ "memory_records": [
1782
+ {
1783
+ "id": "dc_c07_m01",
1784
+ "type": "semantic",
1785
+ "timestamp": "2026-06-01T10:00:00Z",
1786
+ "content": "The main parameters designated for EDUCATION are value Alpha.",
1787
+ "tags": [
1788
+ "education",
1789
+ "param"
1790
+ ],
1791
+ "importance": 0.8,
1792
+ "metadata": {
1793
+ "source_turn": "t1",
1794
+ "speaker": "user"
1795
+ }
1796
+ },
1797
+ {
1798
+ "id": "dc_c07_m02",
1799
+ "type": "semantic",
1800
+ "timestamp": "2026-06-01T10:05:00Z",
1801
+ "content": "The secondary configuration option for EDUCATION is value Beta.",
1802
+ "tags": [
1803
+ "education",
1804
+ "param"
1805
+ ],
1806
+ "importance": 0.8,
1807
+ "metadata": {
1808
+ "source_turn": "t2",
1809
+ "speaker": "user"
1810
+ }
1811
+ },
1812
+ {
1813
+ "id": "dc_c07_m03",
1814
+ "type": "semantic",
1815
+ "timestamp": "2026-06-01T10:10:00Z",
1816
+ "content": "Distractor: The color of the EDUCATION report sheet is yellow.",
1817
+ "tags": [
1818
+ "noise"
1819
+ ],
1820
+ "importance": 0.2,
1821
+ "metadata": {
1822
+ "source_turn": "t3",
1823
+ "speaker": "assistant"
1824
+ }
1825
+ },
1826
+ {
1827
+ "id": "dc_c07_m04",
1828
+ "type": "semantic",
1829
+ "timestamp": "2026-06-02T10:00:00Z",
1830
+ "content": "On June 2, the main parameter configuration for EDUCATION was updated to Gamma.",
1831
+ "tags": [
1832
+ "education",
1833
+ "param"
1834
+ ],
1835
+ "importance": 0.9,
1836
+ "metadata": {
1837
+ "source_turn": "t4",
1838
+ "speaker": "user"
1839
+ }
1840
+ },
1841
+ {
1842
+ "id": "dc_c07_m05",
1843
+ "type": "semantic",
1844
+ "timestamp": "2026-06-03T11:00:00Z",
1845
+ "content": "On June 3, the secondary configuration option for EDUCATION was changed to Delta.",
1846
+ "tags": [
1847
+ "education",
1848
+ "param"
1849
+ ],
1850
+ "importance": 0.9,
1851
+ "metadata": {
1852
+ "source_turn": "t5",
1853
+ "speaker": "user"
1854
+ }
1855
+ },
1856
+ {
1857
+ "id": "dc_c07_m06",
1858
+ "type": "procedural",
1859
+ "timestamp": "2026-06-04T09:00:00Z",
1860
+ "content": "To apply modifications to the EDUCATION registry: 1. check credentials, 2. submit form, 3. wait for email.",
1861
+ "tags": [
1862
+ "education",
1863
+ "process"
1864
+ ],
1865
+ "importance": 0.85,
1866
+ "metadata": {
1867
+ "source_turn": "t6",
1868
+ "speaker": "user"
1869
+ }
1870
+ },
1871
+ {
1872
+ "id": "dc_c07_m07",
1873
+ "type": "semantic",
1874
+ "timestamp": "2026-06-04T09:05:00Z",
1875
+ "content": "Distractor: Email server is active on port 25.",
1876
+ "tags": [
1877
+ "noise"
1878
+ ],
1879
+ "importance": 0.3,
1880
+ "metadata": {
1881
+ "source_turn": "t7",
1882
+ "speaker": "assistant"
1883
+ }
1884
+ },
1885
+ {
1886
+ "id": "dc_c07_m08",
1887
+ "type": "semantic",
1888
+ "timestamp": "2026-06-05T10:00:00Z",
1889
+ "content": "The registry workspace for EDUCATION is located at building Room 10.",
1890
+ "tags": [
1891
+ "education"
1892
+ ],
1893
+ "importance": 0.8,
1894
+ "metadata": {
1895
+ "source_turn": "t8",
1896
+ "speaker": "user"
1897
+ },
1898
+ "associations": [
1899
+ {
1900
+ "target_id": "dc_c07_m06",
1901
+ "strength": 0.7
1902
+ }
1903
+ ]
1904
+ },
1905
+ {
1906
+ "id": "dc_c07_m09",
1907
+ "type": "semantic",
1908
+ "timestamp": "2026-06-05T10:10:00Z",
1909
+ "content": "The client contact liaison for EDUCATION is Mary Jane.",
1910
+ "tags": [
1911
+ "education"
1912
+ ],
1913
+ "importance": 0.7,
1914
+ "metadata": {
1915
+ "source_turn": "t9",
1916
+ "speaker": "user"
1917
+ }
1918
+ },
1919
+ {
1920
+ "id": "dc_c07_m10",
1921
+ "type": "semantic",
1922
+ "timestamp": "2026-06-06T12:00:00Z",
1923
+ "content": "On June 6, the client contact liaison for EDUCATION was changed to Peter Parker.",
1924
+ "tags": [
1925
+ "education"
1926
+ ],
1927
+ "importance": 0.9,
1928
+ "metadata": {
1929
+ "source_turn": "t10",
1930
+ "speaker": "user"
1931
+ }
1932
+ },
1933
+ {
1934
+ "id": "dc_c07_m11",
1935
+ "type": "semantic",
1936
+ "timestamp": "2026-06-06T12:05:00Z",
1937
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
1938
+ "tags": [
1939
+ "noise"
1940
+ ],
1941
+ "importance": 0.2,
1942
+ "metadata": {
1943
+ "source_turn": "t11",
1944
+ "speaker": "assistant"
1945
+ }
1946
+ },
1947
+ {
1948
+ "id": "dc_c07_m12",
1949
+ "type": "semantic",
1950
+ "timestamp": "2026-06-07T13:00:00Z",
1951
+ "content": "The manager for the EDUCATION task is George Lucas.",
1952
+ "tags": [
1953
+ "education",
1954
+ "manager"
1955
+ ],
1956
+ "importance": 0.8,
1957
+ "metadata": {
1958
+ "source_turn": "t12",
1959
+ "speaker": "user"
1960
+ }
1961
+ }
1962
+ ],
1963
+ "questions": [
1964
+ {
1965
+ "question_id": "dc_c07_q1",
1966
+ "category": "contradiction_resolution",
1967
+ "question": "Does Mary Jane still serve as the primary external liaison for the EDUCATION engagement?",
1968
+ "expected_answer": "No, it was updated to Peter Parker on June 6.",
1969
+ "acceptable_answer_criteria": [
1970
+ "No, it is Peter Parker",
1971
+ "No",
1972
+ "Peter Parker"
1973
+ ],
1974
+ "required_memory_ids": [
1975
+ "dc_c07_m10",
1976
+ "dc_c07_m09"
1977
+ ],
1978
+ "forbidden_memory_ids": [],
1979
+ "difficulty": "medium",
1980
+ "architecture_bias_risk": "low",
1981
+ "fairness_note": "Verifies provider-neutral evaluation for category contradiction_resolution."
1982
+ },
1983
+ {
1984
+ "question_id": "dc_c07_q2",
1985
+ "category": "contradiction_resolution",
1986
+ "question": "Does Mary Jane still serve as the primary external liaison for the EDUCATION engagement?",
1987
+ "expected_answer": "No, it was updated to Peter Parker on June 6.",
1988
+ "acceptable_answer_criteria": [
1989
+ "No, it is Peter Parker",
1990
+ "No",
1991
+ "Peter Parker"
1992
+ ],
1993
+ "required_memory_ids": [
1994
+ "dc_c07_m10",
1995
+ "dc_c07_m09"
1996
+ ],
1997
+ "forbidden_memory_ids": [],
1998
+ "difficulty": "medium",
1999
+ "architecture_bias_risk": "low",
2000
+ "fairness_note": "Verifies provider-neutral evaluation for category contradiction_resolution."
2001
+ },
2002
+ {
2003
+ "question_id": "dc_c07_q3",
2004
+ "category": "contradiction_resolution",
2005
+ "question": "Does Mary Jane still serve as the primary external liaison for the EDUCATION engagement?",
2006
+ "expected_answer": "No, it was updated to Peter Parker on June 6.",
2007
+ "acceptable_answer_criteria": [
2008
+ "No, it is Peter Parker",
2009
+ "No",
2010
+ "Peter Parker"
2011
+ ],
2012
+ "required_memory_ids": [
2013
+ "dc_c07_m10",
2014
+ "dc_c07_m09"
2015
+ ],
2016
+ "forbidden_memory_ids": [],
2017
+ "difficulty": "medium",
2018
+ "architecture_bias_risk": "low",
2019
+ "fairness_note": "Verifies provider-neutral evaluation for category contradiction_resolution."
2020
+ },
2021
+ {
2022
+ "question_id": "dc_c07_q4",
2023
+ "category": "contradiction_resolution",
2024
+ "question": "Does Mary Jane still serve as the primary external liaison for the EDUCATION engagement?",
2025
+ "expected_answer": "No, it was updated to Peter Parker on June 6.",
2026
+ "acceptable_answer_criteria": [
2027
+ "No, it is Peter Parker",
2028
+ "No",
2029
+ "Peter Parker"
2030
+ ],
2031
+ "required_memory_ids": [
2032
+ "dc_c07_m10",
2033
+ "dc_c07_m09"
2034
+ ],
2035
+ "forbidden_memory_ids": [],
2036
+ "difficulty": "medium",
2037
+ "architecture_bias_risk": "low",
2038
+ "fairness_note": "Verifies provider-neutral evaluation for category contradiction_resolution."
2039
+ },
2040
+ {
2041
+ "question_id": "dc_c07_q5",
2042
+ "category": "contradiction_resolution",
2043
+ "question": "Does Mary Jane still serve as the primary external liaison for the EDUCATION engagement?",
2044
+ "expected_answer": "No, it was updated to Peter Parker on June 6.",
2045
+ "acceptable_answer_criteria": [
2046
+ "No, it is Peter Parker",
2047
+ "No",
2048
+ "Peter Parker"
2049
+ ],
2050
+ "required_memory_ids": [
2051
+ "dc_c07_m10",
2052
+ "dc_c07_m09"
2053
+ ],
2054
+ "forbidden_memory_ids": [],
2055
+ "difficulty": "medium",
2056
+ "architecture_bias_risk": "low",
2057
+ "fairness_note": "Verifies provider-neutral evaluation for category contradiction_resolution."
2058
+ },
2059
+ {
2060
+ "question_id": "dc_c07_q6",
2061
+ "category": "contradiction_resolution",
2062
+ "question": "Does Mary Jane still serve as the primary external liaison for the EDUCATION engagement?",
2063
+ "expected_answer": "No, it was updated to Peter Parker on June 6.",
2064
+ "acceptable_answer_criteria": [
2065
+ "No, it is Peter Parker",
2066
+ "No",
2067
+ "Peter Parker"
2068
+ ],
2069
+ "required_memory_ids": [
2070
+ "dc_c07_m10",
2071
+ "dc_c07_m09"
2072
+ ],
2073
+ "forbidden_memory_ids": [],
2074
+ "difficulty": "medium",
2075
+ "architecture_bias_risk": "low",
2076
+ "fairness_note": "Verifies provider-neutral evaluation for category contradiction_resolution."
2077
+ }
2078
+ ]
2079
+ },
2080
+ {
2081
+ "conversation_id": "dc_c08_creative_work",
2082
+ "agent_id": "dc_agent_creative_work_08",
2083
+ "domain": "creative_work",
2084
+ "memory_records": [
2085
+ {
2086
+ "id": "dc_c08_m01",
2087
+ "type": "semantic",
2088
+ "timestamp": "2026-06-01T10:00:00Z",
2089
+ "content": "The main parameters designated for CREATIVE_WORK are value Alpha.",
2090
+ "tags": [
2091
+ "creative_work",
2092
+ "param"
2093
+ ],
2094
+ "importance": 0.8,
2095
+ "metadata": {
2096
+ "source_turn": "t1",
2097
+ "speaker": "user"
2098
+ }
2099
+ },
2100
+ {
2101
+ "id": "dc_c08_m02",
2102
+ "type": "semantic",
2103
+ "timestamp": "2026-06-01T10:05:00Z",
2104
+ "content": "The secondary configuration option for CREATIVE_WORK is value Beta.",
2105
+ "tags": [
2106
+ "creative_work",
2107
+ "param"
2108
+ ],
2109
+ "importance": 0.8,
2110
+ "metadata": {
2111
+ "source_turn": "t2",
2112
+ "speaker": "user"
2113
+ }
2114
+ },
2115
+ {
2116
+ "id": "dc_c08_m03",
2117
+ "type": "semantic",
2118
+ "timestamp": "2026-06-01T10:10:00Z",
2119
+ "content": "Distractor: The color of the CREATIVE_WORK report sheet is yellow.",
2120
+ "tags": [
2121
+ "noise"
2122
+ ],
2123
+ "importance": 0.2,
2124
+ "metadata": {
2125
+ "source_turn": "t3",
2126
+ "speaker": "assistant"
2127
+ }
2128
+ },
2129
+ {
2130
+ "id": "dc_c08_m04",
2131
+ "type": "semantic",
2132
+ "timestamp": "2026-06-02T10:00:00Z",
2133
+ "content": "On June 2, the main parameter configuration for CREATIVE_WORK was updated to Gamma.",
2134
+ "tags": [
2135
+ "creative_work",
2136
+ "param"
2137
+ ],
2138
+ "importance": 0.9,
2139
+ "metadata": {
2140
+ "source_turn": "t4",
2141
+ "speaker": "user"
2142
+ }
2143
+ },
2144
+ {
2145
+ "id": "dc_c08_m05",
2146
+ "type": "semantic",
2147
+ "timestamp": "2026-06-03T11:00:00Z",
2148
+ "content": "On June 3, the secondary configuration option for CREATIVE_WORK was changed to Delta.",
2149
+ "tags": [
2150
+ "creative_work",
2151
+ "param"
2152
+ ],
2153
+ "importance": 0.9,
2154
+ "metadata": {
2155
+ "source_turn": "t5",
2156
+ "speaker": "user"
2157
+ }
2158
+ },
2159
+ {
2160
+ "id": "dc_c08_m06",
2161
+ "type": "procedural",
2162
+ "timestamp": "2026-06-04T09:00:00Z",
2163
+ "content": "To apply modifications to the CREATIVE_WORK registry: 1. check credentials, 2. submit form, 3. wait for email.",
2164
+ "tags": [
2165
+ "creative_work",
2166
+ "process"
2167
+ ],
2168
+ "importance": 0.85,
2169
+ "metadata": {
2170
+ "source_turn": "t6",
2171
+ "speaker": "user"
2172
+ }
2173
+ },
2174
+ {
2175
+ "id": "dc_c08_m07",
2176
+ "type": "semantic",
2177
+ "timestamp": "2026-06-04T09:05:00Z",
2178
+ "content": "Distractor: Email server is active on port 25.",
2179
+ "tags": [
2180
+ "noise"
2181
+ ],
2182
+ "importance": 0.3,
2183
+ "metadata": {
2184
+ "source_turn": "t7",
2185
+ "speaker": "assistant"
2186
+ }
2187
+ },
2188
+ {
2189
+ "id": "dc_c08_m08",
2190
+ "type": "semantic",
2191
+ "timestamp": "2026-06-05T10:00:00Z",
2192
+ "content": "The registry workspace for CREATIVE_WORK is located at building Room 10.",
2193
+ "tags": [
2194
+ "creative_work"
2195
+ ],
2196
+ "importance": 0.8,
2197
+ "metadata": {
2198
+ "source_turn": "t8",
2199
+ "speaker": "user"
2200
+ },
2201
+ "associations": [
2202
+ {
2203
+ "target_id": "dc_c08_m06",
2204
+ "strength": 0.7
2205
+ }
2206
+ ]
2207
+ },
2208
+ {
2209
+ "id": "dc_c08_m09",
2210
+ "type": "semantic",
2211
+ "timestamp": "2026-06-05T10:10:00Z",
2212
+ "content": "The client contact liaison for CREATIVE_WORK is Mary Jane.",
2213
+ "tags": [
2214
+ "creative_work"
2215
+ ],
2216
+ "importance": 0.7,
2217
+ "metadata": {
2218
+ "source_turn": "t9",
2219
+ "speaker": "user"
2220
+ }
2221
+ },
2222
+ {
2223
+ "id": "dc_c08_m10",
2224
+ "type": "semantic",
2225
+ "timestamp": "2026-06-06T12:00:00Z",
2226
+ "content": "On June 6, the client contact liaison for CREATIVE_WORK was changed to Peter Parker.",
2227
+ "tags": [
2228
+ "creative_work"
2229
+ ],
2230
+ "importance": 0.9,
2231
+ "metadata": {
2232
+ "source_turn": "t10",
2233
+ "speaker": "user"
2234
+ }
2235
+ },
2236
+ {
2237
+ "id": "dc_c08_m11",
2238
+ "type": "semantic",
2239
+ "timestamp": "2026-06-06T12:05:00Z",
2240
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
2241
+ "tags": [
2242
+ "noise"
2243
+ ],
2244
+ "importance": 0.2,
2245
+ "metadata": {
2246
+ "source_turn": "t11",
2247
+ "speaker": "assistant"
2248
+ }
2249
+ },
2250
+ {
2251
+ "id": "dc_c08_m12",
2252
+ "type": "semantic",
2253
+ "timestamp": "2026-06-07T13:00:00Z",
2254
+ "content": "The manager for the CREATIVE_WORK task is George Lucas.",
2255
+ "tags": [
2256
+ "creative_work",
2257
+ "manager"
2258
+ ],
2259
+ "importance": 0.8,
2260
+ "metadata": {
2261
+ "source_turn": "t12",
2262
+ "speaker": "user"
2263
+ }
2264
+ }
2265
+ ],
2266
+ "questions": [
2267
+ {
2268
+ "question_id": "dc_c08_q1",
2269
+ "category": "multi_hop_association",
2270
+ "question": "Where should we go to apply updates to the registry for the CREATIVE_WORK task?",
2271
+ "expected_answer": "Room 10",
2272
+ "acceptable_answer_criteria": [
2273
+ "Room 10",
2274
+ "building Room 10"
2275
+ ],
2276
+ "required_memory_ids": [
2277
+ "dc_c08_m08",
2278
+ "dc_c08_m06"
2279
+ ],
2280
+ "forbidden_memory_ids": [],
2281
+ "difficulty": "hard",
2282
+ "architecture_bias_risk": "medium",
2283
+ "fairness_note": "Verifies provider-neutral evaluation for category multi_hop_association."
2284
+ },
2285
+ {
2286
+ "question_id": "dc_c08_q2",
2287
+ "category": "multi_hop_association",
2288
+ "question": "Where should we go to apply updates to the registry for the CREATIVE_WORK task?",
2289
+ "expected_answer": "Room 10",
2290
+ "acceptable_answer_criteria": [
2291
+ "Room 10",
2292
+ "building Room 10"
2293
+ ],
2294
+ "required_memory_ids": [
2295
+ "dc_c08_m08",
2296
+ "dc_c08_m06"
2297
+ ],
2298
+ "forbidden_memory_ids": [],
2299
+ "difficulty": "hard",
2300
+ "architecture_bias_risk": "medium",
2301
+ "fairness_note": "Verifies provider-neutral evaluation for category multi_hop_association."
2302
+ },
2303
+ {
2304
+ "question_id": "dc_c08_q3",
2305
+ "category": "multi_hop_association",
2306
+ "question": "Where should we go to apply updates to the registry for the CREATIVE_WORK task?",
2307
+ "expected_answer": "Room 10",
2308
+ "acceptable_answer_criteria": [
2309
+ "Room 10",
2310
+ "building Room 10"
2311
+ ],
2312
+ "required_memory_ids": [
2313
+ "dc_c08_m08",
2314
+ "dc_c08_m06"
2315
+ ],
2316
+ "forbidden_memory_ids": [],
2317
+ "difficulty": "hard",
2318
+ "architecture_bias_risk": "medium",
2319
+ "fairness_note": "Verifies provider-neutral evaluation for category multi_hop_association."
2320
+ },
2321
+ {
2322
+ "question_id": "dc_c08_q4",
2323
+ "category": "multi_hop_association",
2324
+ "question": "Where should we go to apply updates to the registry for the CREATIVE_WORK task?",
2325
+ "expected_answer": "Room 10",
2326
+ "acceptable_answer_criteria": [
2327
+ "Room 10",
2328
+ "building Room 10"
2329
+ ],
2330
+ "required_memory_ids": [
2331
+ "dc_c08_m08",
2332
+ "dc_c08_m06"
2333
+ ],
2334
+ "forbidden_memory_ids": [],
2335
+ "difficulty": "hard",
2336
+ "architecture_bias_risk": "medium",
2337
+ "fairness_note": "Verifies provider-neutral evaluation for category multi_hop_association."
2338
+ },
2339
+ {
2340
+ "question_id": "dc_c08_q5",
2341
+ "category": "multi_hop_association",
2342
+ "question": "Where should we go to apply updates to the registry for the CREATIVE_WORK task?",
2343
+ "expected_answer": "Room 10",
2344
+ "acceptable_answer_criteria": [
2345
+ "Room 10",
2346
+ "building Room 10"
2347
+ ],
2348
+ "required_memory_ids": [
2349
+ "dc_c08_m08",
2350
+ "dc_c08_m06"
2351
+ ],
2352
+ "forbidden_memory_ids": [],
2353
+ "difficulty": "hard",
2354
+ "architecture_bias_risk": "medium",
2355
+ "fairness_note": "Verifies provider-neutral evaluation for category multi_hop_association."
2356
+ },
2357
+ {
2358
+ "question_id": "dc_c08_q6",
2359
+ "category": "multi_hop_association",
2360
+ "question": "Where should we go to apply updates to the registry for the CREATIVE_WORK task?",
2361
+ "expected_answer": "Room 10",
2362
+ "acceptable_answer_criteria": [
2363
+ "Room 10",
2364
+ "building Room 10"
2365
+ ],
2366
+ "required_memory_ids": [
2367
+ "dc_c08_m08",
2368
+ "dc_c08_m06"
2369
+ ],
2370
+ "forbidden_memory_ids": [],
2371
+ "difficulty": "hard",
2372
+ "architecture_bias_risk": "medium",
2373
+ "fairness_note": "Verifies provider-neutral evaluation for category multi_hop_association."
2374
+ }
2375
+ ]
2376
+ },
2377
+ {
2378
+ "conversation_id": "dc_c09_software",
2379
+ "agent_id": "dc_agent_software_09",
2380
+ "domain": "software",
2381
+ "memory_records": [
2382
+ {
2383
+ "id": "dc_c09_m01",
2384
+ "type": "semantic",
2385
+ "timestamp": "2026-06-01T10:00:00Z",
2386
+ "content": "The main parameters designated for SOFTWARE are value Alpha.",
2387
+ "tags": [
2388
+ "software",
2389
+ "param"
2390
+ ],
2391
+ "importance": 0.8,
2392
+ "metadata": {
2393
+ "source_turn": "t1",
2394
+ "speaker": "user"
2395
+ }
2396
+ },
2397
+ {
2398
+ "id": "dc_c09_m02",
2399
+ "type": "semantic",
2400
+ "timestamp": "2026-06-01T10:05:00Z",
2401
+ "content": "The secondary configuration option for SOFTWARE is value Beta.",
2402
+ "tags": [
2403
+ "software",
2404
+ "param"
2405
+ ],
2406
+ "importance": 0.8,
2407
+ "metadata": {
2408
+ "source_turn": "t2",
2409
+ "speaker": "user"
2410
+ }
2411
+ },
2412
+ {
2413
+ "id": "dc_c09_m03",
2414
+ "type": "semantic",
2415
+ "timestamp": "2026-06-01T10:10:00Z",
2416
+ "content": "Distractor: The color of the SOFTWARE report sheet is yellow.",
2417
+ "tags": [
2418
+ "noise"
2419
+ ],
2420
+ "importance": 0.2,
2421
+ "metadata": {
2422
+ "source_turn": "t3",
2423
+ "speaker": "assistant"
2424
+ }
2425
+ },
2426
+ {
2427
+ "id": "dc_c09_m04",
2428
+ "type": "semantic",
2429
+ "timestamp": "2026-06-02T10:00:00Z",
2430
+ "content": "On June 2, the main parameter configuration for SOFTWARE was updated to Gamma.",
2431
+ "tags": [
2432
+ "software",
2433
+ "param"
2434
+ ],
2435
+ "importance": 0.9,
2436
+ "metadata": {
2437
+ "source_turn": "t4",
2438
+ "speaker": "user"
2439
+ }
2440
+ },
2441
+ {
2442
+ "id": "dc_c09_m05",
2443
+ "type": "semantic",
2444
+ "timestamp": "2026-06-03T11:00:00Z",
2445
+ "content": "On June 3, the secondary configuration option for SOFTWARE was changed to Delta.",
2446
+ "tags": [
2447
+ "software",
2448
+ "param"
2449
+ ],
2450
+ "importance": 0.9,
2451
+ "metadata": {
2452
+ "source_turn": "t5",
2453
+ "speaker": "user"
2454
+ }
2455
+ },
2456
+ {
2457
+ "id": "dc_c09_m06",
2458
+ "type": "procedural",
2459
+ "timestamp": "2026-06-04T09:00:00Z",
2460
+ "content": "To apply modifications to the SOFTWARE registry: 1. check credentials, 2. submit form, 3. wait for email.",
2461
+ "tags": [
2462
+ "software",
2463
+ "process"
2464
+ ],
2465
+ "importance": 0.85,
2466
+ "metadata": {
2467
+ "source_turn": "t6",
2468
+ "speaker": "user"
2469
+ }
2470
+ },
2471
+ {
2472
+ "id": "dc_c09_m07",
2473
+ "type": "semantic",
2474
+ "timestamp": "2026-06-04T09:05:00Z",
2475
+ "content": "Distractor: Email server is active on port 25.",
2476
+ "tags": [
2477
+ "noise"
2478
+ ],
2479
+ "importance": 0.3,
2480
+ "metadata": {
2481
+ "source_turn": "t7",
2482
+ "speaker": "assistant"
2483
+ }
2484
+ },
2485
+ {
2486
+ "id": "dc_c09_m08",
2487
+ "type": "semantic",
2488
+ "timestamp": "2026-06-05T10:00:00Z",
2489
+ "content": "The registry workspace for SOFTWARE is located at building Room 10.",
2490
+ "tags": [
2491
+ "software"
2492
+ ],
2493
+ "importance": 0.8,
2494
+ "metadata": {
2495
+ "source_turn": "t8",
2496
+ "speaker": "user"
2497
+ },
2498
+ "associations": [
2499
+ {
2500
+ "target_id": "dc_c09_m06",
2501
+ "strength": 0.7
2502
+ }
2503
+ ]
2504
+ },
2505
+ {
2506
+ "id": "dc_c09_m09",
2507
+ "type": "semantic",
2508
+ "timestamp": "2026-06-05T10:10:00Z",
2509
+ "content": "The client contact liaison for SOFTWARE is Mary Jane.",
2510
+ "tags": [
2511
+ "software"
2512
+ ],
2513
+ "importance": 0.7,
2514
+ "metadata": {
2515
+ "source_turn": "t9",
2516
+ "speaker": "user"
2517
+ }
2518
+ },
2519
+ {
2520
+ "id": "dc_c09_m10",
2521
+ "type": "semantic",
2522
+ "timestamp": "2026-06-06T12:00:00Z",
2523
+ "content": "On June 6, the client contact liaison for SOFTWARE was changed to Peter Parker.",
2524
+ "tags": [
2525
+ "software"
2526
+ ],
2527
+ "importance": 0.9,
2528
+ "metadata": {
2529
+ "source_turn": "t10",
2530
+ "speaker": "user"
2531
+ }
2532
+ },
2533
+ {
2534
+ "id": "dc_c09_m11",
2535
+ "type": "semantic",
2536
+ "timestamp": "2026-06-06T12:05:00Z",
2537
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
2538
+ "tags": [
2539
+ "noise"
2540
+ ],
2541
+ "importance": 0.2,
2542
+ "metadata": {
2543
+ "source_turn": "t11",
2544
+ "speaker": "assistant"
2545
+ }
2546
+ },
2547
+ {
2548
+ "id": "dc_c09_m12",
2549
+ "type": "semantic",
2550
+ "timestamp": "2026-06-07T13:00:00Z",
2551
+ "content": "The manager for the SOFTWARE task is George Lucas.",
2552
+ "tags": [
2553
+ "software",
2554
+ "manager"
2555
+ ],
2556
+ "importance": 0.8,
2557
+ "metadata": {
2558
+ "source_turn": "t12",
2559
+ "speaker": "user"
2560
+ }
2561
+ }
2562
+ ],
2563
+ "questions": [
2564
+ {
2565
+ "question_id": "dc_c09_q1",
2566
+ "category": "noise_resistance",
2567
+ "question": "What is the procedure for enacting modifications on the SOFTWARE registry?",
2568
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email",
2569
+ "acceptable_answer_criteria": [
2570
+ "check credentials",
2571
+ "submit form",
2572
+ "wait for email"
2573
+ ],
2574
+ "required_memory_ids": [
2575
+ "dc_c09_m06"
2576
+ ],
2577
+ "forbidden_memory_ids": [
2578
+ "dc_c09_m07"
2579
+ ],
2580
+ "difficulty": "medium",
2581
+ "architecture_bias_risk": "low",
2582
+ "fairness_note": "Verifies provider-neutral evaluation for category noise_resistance."
2583
+ },
2584
+ {
2585
+ "question_id": "dc_c09_q2",
2586
+ "category": "noise_resistance",
2587
+ "question": "What is the procedure for enacting modifications on the SOFTWARE registry?",
2588
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email",
2589
+ "acceptable_answer_criteria": [
2590
+ "check credentials",
2591
+ "submit form",
2592
+ "wait for email"
2593
+ ],
2594
+ "required_memory_ids": [
2595
+ "dc_c09_m06"
2596
+ ],
2597
+ "forbidden_memory_ids": [
2598
+ "dc_c09_m07"
2599
+ ],
2600
+ "difficulty": "medium",
2601
+ "architecture_bias_risk": "low",
2602
+ "fairness_note": "Verifies provider-neutral evaluation for category noise_resistance."
2603
+ },
2604
+ {
2605
+ "question_id": "dc_c09_q3",
2606
+ "category": "noise_resistance",
2607
+ "question": "What is the procedure for enacting modifications on the SOFTWARE registry?",
2608
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email",
2609
+ "acceptable_answer_criteria": [
2610
+ "check credentials",
2611
+ "submit form",
2612
+ "wait for email"
2613
+ ],
2614
+ "required_memory_ids": [
2615
+ "dc_c09_m06"
2616
+ ],
2617
+ "forbidden_memory_ids": [
2618
+ "dc_c09_m07"
2619
+ ],
2620
+ "difficulty": "medium",
2621
+ "architecture_bias_risk": "low",
2622
+ "fairness_note": "Verifies provider-neutral evaluation for category noise_resistance."
2623
+ },
2624
+ {
2625
+ "question_id": "dc_c09_q4",
2626
+ "category": "noise_resistance",
2627
+ "question": "What is the procedure for enacting modifications on the SOFTWARE registry?",
2628
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email",
2629
+ "acceptable_answer_criteria": [
2630
+ "check credentials",
2631
+ "submit form",
2632
+ "wait for email"
2633
+ ],
2634
+ "required_memory_ids": [
2635
+ "dc_c09_m06"
2636
+ ],
2637
+ "forbidden_memory_ids": [
2638
+ "dc_c09_m07"
2639
+ ],
2640
+ "difficulty": "medium",
2641
+ "architecture_bias_risk": "low",
2642
+ "fairness_note": "Verifies provider-neutral evaluation for category noise_resistance."
2643
+ },
2644
+ {
2645
+ "question_id": "dc_c09_q5",
2646
+ "category": "noise_resistance",
2647
+ "question": "What is the procedure for enacting modifications on the SOFTWARE registry?",
2648
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email",
2649
+ "acceptable_answer_criteria": [
2650
+ "check credentials",
2651
+ "submit form",
2652
+ "wait for email"
2653
+ ],
2654
+ "required_memory_ids": [
2655
+ "dc_c09_m06"
2656
+ ],
2657
+ "forbidden_memory_ids": [
2658
+ "dc_c09_m07"
2659
+ ],
2660
+ "difficulty": "medium",
2661
+ "architecture_bias_risk": "low",
2662
+ "fairness_note": "Verifies provider-neutral evaluation for category noise_resistance."
2663
+ },
2664
+ {
2665
+ "question_id": "dc_c09_q6",
2666
+ "category": "noise_resistance",
2667
+ "question": "What is the procedure for enacting modifications on the SOFTWARE registry?",
2668
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email",
2669
+ "acceptable_answer_criteria": [
2670
+ "check credentials",
2671
+ "submit form",
2672
+ "wait for email"
2673
+ ],
2674
+ "required_memory_ids": [
2675
+ "dc_c09_m06"
2676
+ ],
2677
+ "forbidden_memory_ids": [
2678
+ "dc_c09_m07"
2679
+ ],
2680
+ "difficulty": "medium",
2681
+ "architecture_bias_risk": "low",
2682
+ "fairness_note": "Verifies provider-neutral evaluation for category noise_resistance."
2683
+ }
2684
+ ]
2685
+ },
2686
+ {
2687
+ "conversation_id": "dc_c10_personal_assistant",
2688
+ "agent_id": "dc_agent_personal_assistant_10",
2689
+ "domain": "personal_assistant",
2690
+ "memory_records": [
2691
+ {
2692
+ "id": "dc_c10_m01",
2693
+ "type": "semantic",
2694
+ "timestamp": "2026-06-01T10:00:00Z",
2695
+ "content": "The main parameters designated for PERSONAL_ASSISTANT are value Alpha.",
2696
+ "tags": [
2697
+ "personal_assistant",
2698
+ "param"
2699
+ ],
2700
+ "importance": 0.8,
2701
+ "metadata": {
2702
+ "source_turn": "t1",
2703
+ "speaker": "user"
2704
+ }
2705
+ },
2706
+ {
2707
+ "id": "dc_c10_m02",
2708
+ "type": "semantic",
2709
+ "timestamp": "2026-06-01T10:05:00Z",
2710
+ "content": "The secondary configuration option for PERSONAL_ASSISTANT is value Beta.",
2711
+ "tags": [
2712
+ "personal_assistant",
2713
+ "param"
2714
+ ],
2715
+ "importance": 0.8,
2716
+ "metadata": {
2717
+ "source_turn": "t2",
2718
+ "speaker": "user"
2719
+ }
2720
+ },
2721
+ {
2722
+ "id": "dc_c10_m03",
2723
+ "type": "semantic",
2724
+ "timestamp": "2026-06-01T10:10:00Z",
2725
+ "content": "Distractor: The color of the PERSONAL_ASSISTANT report sheet is yellow.",
2726
+ "tags": [
2727
+ "noise"
2728
+ ],
2729
+ "importance": 0.2,
2730
+ "metadata": {
2731
+ "source_turn": "t3",
2732
+ "speaker": "assistant"
2733
+ }
2734
+ },
2735
+ {
2736
+ "id": "dc_c10_m04",
2737
+ "type": "semantic",
2738
+ "timestamp": "2026-06-02T10:00:00Z",
2739
+ "content": "On June 2, the main parameter configuration for PERSONAL_ASSISTANT was updated to Gamma.",
2740
+ "tags": [
2741
+ "personal_assistant",
2742
+ "param"
2743
+ ],
2744
+ "importance": 0.9,
2745
+ "metadata": {
2746
+ "source_turn": "t4",
2747
+ "speaker": "user"
2748
+ }
2749
+ },
2750
+ {
2751
+ "id": "dc_c10_m05",
2752
+ "type": "semantic",
2753
+ "timestamp": "2026-06-03T11:00:00Z",
2754
+ "content": "On June 3, the secondary configuration option for PERSONAL_ASSISTANT was changed to Delta.",
2755
+ "tags": [
2756
+ "personal_assistant",
2757
+ "param"
2758
+ ],
2759
+ "importance": 0.9,
2760
+ "metadata": {
2761
+ "source_turn": "t5",
2762
+ "speaker": "user"
2763
+ }
2764
+ },
2765
+ {
2766
+ "id": "dc_c10_m06",
2767
+ "type": "procedural",
2768
+ "timestamp": "2026-06-04T09:00:00Z",
2769
+ "content": "To apply modifications to the PERSONAL_ASSISTANT registry: 1. check credentials, 2. submit form, 3. wait for email.",
2770
+ "tags": [
2771
+ "personal_assistant",
2772
+ "process"
2773
+ ],
2774
+ "importance": 0.85,
2775
+ "metadata": {
2776
+ "source_turn": "t6",
2777
+ "speaker": "user"
2778
+ }
2779
+ },
2780
+ {
2781
+ "id": "dc_c10_m07",
2782
+ "type": "semantic",
2783
+ "timestamp": "2026-06-04T09:05:00Z",
2784
+ "content": "Distractor: Email server is active on port 25.",
2785
+ "tags": [
2786
+ "noise"
2787
+ ],
2788
+ "importance": 0.3,
2789
+ "metadata": {
2790
+ "source_turn": "t7",
2791
+ "speaker": "assistant"
2792
+ }
2793
+ },
2794
+ {
2795
+ "id": "dc_c10_m08",
2796
+ "type": "semantic",
2797
+ "timestamp": "2026-06-05T10:00:00Z",
2798
+ "content": "The registry workspace for PERSONAL_ASSISTANT is located at building Room 10.",
2799
+ "tags": [
2800
+ "personal_assistant"
2801
+ ],
2802
+ "importance": 0.8,
2803
+ "metadata": {
2804
+ "source_turn": "t8",
2805
+ "speaker": "user"
2806
+ },
2807
+ "associations": [
2808
+ {
2809
+ "target_id": "dc_c10_m06",
2810
+ "strength": 0.7
2811
+ }
2812
+ ]
2813
+ },
2814
+ {
2815
+ "id": "dc_c10_m09",
2816
+ "type": "semantic",
2817
+ "timestamp": "2026-06-05T10:10:00Z",
2818
+ "content": "The client contact liaison for PERSONAL_ASSISTANT is Mary Jane.",
2819
+ "tags": [
2820
+ "personal_assistant"
2821
+ ],
2822
+ "importance": 0.7,
2823
+ "metadata": {
2824
+ "source_turn": "t9",
2825
+ "speaker": "user"
2826
+ }
2827
+ },
2828
+ {
2829
+ "id": "dc_c10_m10",
2830
+ "type": "semantic",
2831
+ "timestamp": "2026-06-06T12:00:00Z",
2832
+ "content": "On June 6, the client contact liaison for PERSONAL_ASSISTANT was changed to Peter Parker.",
2833
+ "tags": [
2834
+ "personal_assistant"
2835
+ ],
2836
+ "importance": 0.9,
2837
+ "metadata": {
2838
+ "source_turn": "t10",
2839
+ "speaker": "user"
2840
+ }
2841
+ },
2842
+ {
2843
+ "id": "dc_c10_m11",
2844
+ "type": "semantic",
2845
+ "timestamp": "2026-06-06T12:05:00Z",
2846
+ "content": "Distractor: Peter Parker works as a freelance photographer.",
2847
+ "tags": [
2848
+ "noise"
2849
+ ],
2850
+ "importance": 0.2,
2851
+ "metadata": {
2852
+ "source_turn": "t11",
2853
+ "speaker": "assistant"
2854
+ }
2855
+ },
2856
+ {
2857
+ "id": "dc_c10_m12",
2858
+ "type": "semantic",
2859
+ "timestamp": "2026-06-07T13:00:00Z",
2860
+ "content": "The manager for the PERSONAL_ASSISTANT task is George Lucas.",
2861
+ "tags": [
2862
+ "personal_assistant",
2863
+ "manager"
2864
+ ],
2865
+ "importance": 0.8,
2866
+ "metadata": {
2867
+ "source_turn": "t12",
2868
+ "speaker": "user"
2869
+ }
2870
+ }
2871
+ ],
2872
+ "questions": [
2873
+ {
2874
+ "question_id": "dc_c10_q1",
2875
+ "category": "procedural_recall",
2876
+ "question": "What steps are necessary to execute the PERSONAL_ASSISTANT update?",
2877
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email.",
2878
+ "acceptable_answer_criteria": [
2879
+ "check credentials",
2880
+ "submit form",
2881
+ "wait for email"
2882
+ ],
2883
+ "required_memory_ids": [
2884
+ "dc_c10_m06"
2885
+ ],
2886
+ "forbidden_memory_ids": [],
2887
+ "difficulty": "medium",
2888
+ "architecture_bias_risk": "low",
2889
+ "fairness_note": "Verifies provider-neutral evaluation for category procedural_recall."
2890
+ },
2891
+ {
2892
+ "question_id": "dc_c10_q2",
2893
+ "category": "procedural_recall",
2894
+ "question": "What steps are necessary to execute the PERSONAL_ASSISTANT update?",
2895
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email.",
2896
+ "acceptable_answer_criteria": [
2897
+ "check credentials",
2898
+ "submit form",
2899
+ "wait for email"
2900
+ ],
2901
+ "required_memory_ids": [
2902
+ "dc_c10_m06"
2903
+ ],
2904
+ "forbidden_memory_ids": [],
2905
+ "difficulty": "medium",
2906
+ "architecture_bias_risk": "low",
2907
+ "fairness_note": "Verifies provider-neutral evaluation for category procedural_recall."
2908
+ },
2909
+ {
2910
+ "question_id": "dc_c10_q3",
2911
+ "category": "procedural_recall",
2912
+ "question": "What steps are necessary to execute the PERSONAL_ASSISTANT update?",
2913
+ "expected_answer": "1. check credentials, 2. submit form, 3. wait for email.",
2914
+ "acceptable_answer_criteria": [
2915
+ "check credentials",
2916
+ "submit form",
2917
+ "wait for email"
2918
+ ],
2919
+ "required_memory_ids": [
2920
+ "dc_c10_m06"
2921
+ ],
2922
+ "forbidden_memory_ids": [],
2923
+ "difficulty": "medium",
2924
+ "architecture_bias_risk": "low",
2925
+ "fairness_note": "Verifies provider-neutral evaluation for category procedural_recall."
2926
+ },
2927
+ {
2928
+ "question_id": "dc_c10_q4",
2929
+ "category": "abstention",
2930
+ "question": "Which telephone contact number should we call to reach the director of PERSONAL_ASSISTANT?",
2931
+ "expected_answer": "not enough information",
2932
+ "acceptable_answer_criteria": [
2933
+ "not enough information",
2934
+ "unknown",
2935
+ "insufficient evidence"
2936
+ ],
2937
+ "required_memory_ids": [],
2938
+ "forbidden_memory_ids": [
2939
+ "dc_c10_m12"
2940
+ ],
2941
+ "difficulty": "hard",
2942
+ "architecture_bias_risk": "low",
2943
+ "fairness_note": "Verifies provider-neutral evaluation for category abstention."
2944
+ },
2945
+ {
2946
+ "question_id": "dc_c10_q5",
2947
+ "category": "abstention",
2948
+ "question": "Which telephone contact number should we call to reach the director of PERSONAL_ASSISTANT?",
2949
+ "expected_answer": "not enough information",
2950
+ "acceptable_answer_criteria": [
2951
+ "not enough information",
2952
+ "unknown",
2953
+ "insufficient evidence"
2954
+ ],
2955
+ "required_memory_ids": [],
2956
+ "forbidden_memory_ids": [
2957
+ "dc_c10_m12"
2958
+ ],
2959
+ "difficulty": "hard",
2960
+ "architecture_bias_risk": "low",
2961
+ "fairness_note": "Verifies provider-neutral evaluation for category abstention."
2962
+ },
2963
+ {
2964
+ "question_id": "dc_c10_q6",
2965
+ "category": "abstention",
2966
+ "question": "Which telephone contact number should we call to reach the director of PERSONAL_ASSISTANT?",
2967
+ "expected_answer": "not enough information",
2968
+ "acceptable_answer_criteria": [
2969
+ "not enough information",
2970
+ "unknown",
2971
+ "insufficient evidence"
2972
+ ],
2973
+ "required_memory_ids": [],
2974
+ "forbidden_memory_ids": [
2975
+ "dc_c10_m12"
2976
+ ],
2977
+ "difficulty": "hard",
2978
+ "architecture_bias_risk": "low",
2979
+ "fairness_note": "Verifies provider-neutral evaluation for category abstention."
2980
+ }
2981
+ ]
2982
+ }
2983
+ ]
2984
+ }