@chllming/wave-orchestration 0.5.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/CHANGELOG.md +46 -3
  2. package/README.md +33 -5
  3. package/docs/README.md +18 -4
  4. package/docs/agents/wave-cont-eval-role.md +36 -0
  5. package/docs/agents/{wave-evaluator-role.md → wave-cont-qa-role.md} +14 -11
  6. package/docs/agents/wave-documentation-role.md +1 -1
  7. package/docs/agents/wave-infra-role.md +1 -1
  8. package/docs/agents/wave-integration-role.md +3 -3
  9. package/docs/agents/wave-launcher-role.md +4 -3
  10. package/docs/agents/wave-security-role.md +40 -0
  11. package/docs/concepts/context7-vs-skills.md +1 -1
  12. package/docs/concepts/what-is-a-wave.md +56 -6
  13. package/docs/evals/README.md +166 -0
  14. package/docs/evals/benchmark-catalog.json +663 -0
  15. package/docs/guides/author-and-run-waves.md +135 -0
  16. package/docs/guides/planner.md +5 -0
  17. package/docs/guides/terminal-surfaces.md +2 -0
  18. package/docs/plans/component-cutover-matrix.json +1 -1
  19. package/docs/plans/component-cutover-matrix.md +1 -1
  20. package/docs/plans/current-state.md +19 -1
  21. package/docs/plans/examples/wave-example-live-proof.md +435 -0
  22. package/docs/plans/migration.md +42 -0
  23. package/docs/plans/wave-orchestrator.md +46 -7
  24. package/docs/plans/waves/wave-0.md +4 -4
  25. package/docs/reference/live-proof-waves.md +177 -0
  26. package/docs/reference/migration-0.2-to-0.5.md +26 -19
  27. package/docs/reference/npmjs-trusted-publishing.md +6 -5
  28. package/docs/reference/runtime-config/README.md +13 -3
  29. package/docs/reference/sample-waves.md +87 -0
  30. package/docs/reference/skills.md +110 -42
  31. package/docs/research/agent-context-sources.md +130 -11
  32. package/docs/research/coordination-failure-review.md +266 -0
  33. package/docs/roadmap.md +6 -2
  34. package/package.json +2 -2
  35. package/releases/manifest.json +20 -2
  36. package/scripts/research/agent-context-archive.mjs +83 -1
  37. package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +811 -0
  38. package/scripts/wave-orchestrator/adhoc.mjs +1331 -0
  39. package/scripts/wave-orchestrator/agent-state.mjs +358 -6
  40. package/scripts/wave-orchestrator/artifact-schemas.mjs +173 -0
  41. package/scripts/wave-orchestrator/clarification-triage.mjs +10 -3
  42. package/scripts/wave-orchestrator/config.mjs +48 -12
  43. package/scripts/wave-orchestrator/context7.mjs +2 -0
  44. package/scripts/wave-orchestrator/coord-cli.mjs +51 -19
  45. package/scripts/wave-orchestrator/coordination-store.mjs +26 -4
  46. package/scripts/wave-orchestrator/coordination.mjs +83 -9
  47. package/scripts/wave-orchestrator/dashboard-state.mjs +20 -8
  48. package/scripts/wave-orchestrator/dep-cli.mjs +5 -2
  49. package/scripts/wave-orchestrator/docs-queue.mjs +8 -2
  50. package/scripts/wave-orchestrator/evals.mjs +451 -0
  51. package/scripts/wave-orchestrator/feedback.mjs +15 -1
  52. package/scripts/wave-orchestrator/install.mjs +32 -9
  53. package/scripts/wave-orchestrator/launcher-closure.mjs +281 -0
  54. package/scripts/wave-orchestrator/launcher-runtime.mjs +334 -0
  55. package/scripts/wave-orchestrator/launcher.mjs +709 -601
  56. package/scripts/wave-orchestrator/ledger.mjs +123 -20
  57. package/scripts/wave-orchestrator/local-executor.mjs +99 -12
  58. package/scripts/wave-orchestrator/planner.mjs +177 -42
  59. package/scripts/wave-orchestrator/replay.mjs +6 -3
  60. package/scripts/wave-orchestrator/role-helpers.mjs +84 -0
  61. package/scripts/wave-orchestrator/shared.mjs +75 -11
  62. package/scripts/wave-orchestrator/skills.mjs +637 -106
  63. package/scripts/wave-orchestrator/traces.mjs +71 -48
  64. package/scripts/wave-orchestrator/wave-files.mjs +947 -101
  65. package/scripts/wave.mjs +9 -0
  66. package/skills/README.md +202 -0
  67. package/skills/provider-aws/SKILL.md +111 -0
  68. package/skills/provider-aws/adapters/claude.md +1 -0
  69. package/skills/provider-aws/adapters/codex.md +1 -0
  70. package/skills/provider-aws/references/service-verification.md +39 -0
  71. package/skills/provider-aws/skill.json +50 -1
  72. package/skills/provider-custom-deploy/SKILL.md +59 -0
  73. package/skills/provider-custom-deploy/skill.json +46 -1
  74. package/skills/provider-docker-compose/SKILL.md +90 -0
  75. package/skills/provider-docker-compose/adapters/local.md +1 -0
  76. package/skills/provider-docker-compose/skill.json +49 -1
  77. package/skills/provider-github-release/SKILL.md +116 -1
  78. package/skills/provider-github-release/adapters/claude.md +1 -0
  79. package/skills/provider-github-release/adapters/codex.md +1 -0
  80. package/skills/provider-github-release/skill.json +51 -1
  81. package/skills/provider-kubernetes/SKILL.md +137 -0
  82. package/skills/provider-kubernetes/adapters/claude.md +1 -0
  83. package/skills/provider-kubernetes/adapters/codex.md +1 -0
  84. package/skills/provider-kubernetes/references/kubectl-patterns.md +58 -0
  85. package/skills/provider-kubernetes/skill.json +48 -1
  86. package/skills/provider-railway/SKILL.md +118 -1
  87. package/skills/provider-railway/references/verification-commands.md +39 -0
  88. package/skills/provider-railway/skill.json +67 -1
  89. package/skills/provider-ssh-manual/SKILL.md +91 -0
  90. package/skills/provider-ssh-manual/skill.json +50 -1
  91. package/skills/repo-coding-rules/SKILL.md +84 -0
  92. package/skills/repo-coding-rules/skill.json +30 -1
  93. package/skills/role-cont-eval/SKILL.md +90 -0
  94. package/skills/role-cont-eval/adapters/codex.md +1 -0
  95. package/skills/role-cont-eval/skill.json +36 -0
  96. package/skills/role-cont-qa/SKILL.md +93 -0
  97. package/skills/role-cont-qa/adapters/claude.md +1 -0
  98. package/skills/role-cont-qa/skill.json +36 -0
  99. package/skills/role-deploy/SKILL.md +90 -0
  100. package/skills/role-deploy/skill.json +32 -1
  101. package/skills/role-documentation/SKILL.md +66 -0
  102. package/skills/role-documentation/skill.json +32 -1
  103. package/skills/role-implementation/SKILL.md +62 -0
  104. package/skills/role-implementation/skill.json +32 -1
  105. package/skills/role-infra/SKILL.md +74 -0
  106. package/skills/role-infra/skill.json +32 -1
  107. package/skills/role-integration/SKILL.md +79 -1
  108. package/skills/role-integration/skill.json +32 -1
  109. package/skills/role-research/SKILL.md +58 -0
  110. package/skills/role-research/skill.json +32 -1
  111. package/skills/role-security/SKILL.md +60 -0
  112. package/skills/role-security/skill.json +36 -0
  113. package/skills/runtime-claude/SKILL.md +60 -1
  114. package/skills/runtime-claude/skill.json +32 -1
  115. package/skills/runtime-codex/SKILL.md +52 -1
  116. package/skills/runtime-codex/skill.json +32 -1
  117. package/skills/runtime-local/SKILL.md +39 -0
  118. package/skills/runtime-local/skill.json +32 -1
  119. package/skills/runtime-opencode/SKILL.md +51 -0
  120. package/skills/runtime-opencode/skill.json +32 -1
  121. package/skills/wave-core/SKILL.md +107 -0
  122. package/skills/wave-core/references/marker-syntax.md +62 -0
  123. package/skills/wave-core/skill.json +31 -1
  124. package/wave.config.json +35 -6
  125. package/skills/role-evaluator/SKILL.md +0 -6
  126. package/skills/role-evaluator/skill.json +0 -5
@@ -0,0 +1,811 @@
1
+ import baseManifest from "./harness-and-blackboard-2026-03-21.mjs";
2
+
3
+ const TOPICS = {
4
+ HARNESS: "harnesses-and-practice",
5
+ PLANNING: "planning-and-orchestration",
6
+ LONG_RUNNING: "long-running-agents-and-compaction",
7
+ SKILLS: "skills-and-procedural-memory",
8
+ BLACKBOARD: "blackboard-and-shared-workspaces",
9
+ REPO: "repo-context-and-evaluation",
10
+ SECURITY: "security-and-secure-code-generation",
11
+ };
12
+
13
+ function arxivPaper(arxivId, entry) {
14
+ return {
15
+ kind: "paper",
16
+ venue: `arXiv ${arxivId}`,
17
+ sourcePage: `https://arxiv.org/abs/${arxivId}`,
18
+ sourcePdf: `https://arxiv.org/pdf/${arxivId}.pdf`,
19
+ ...entry,
20
+ };
21
+ }
22
+
23
+ function externalPaper(sourcePage, entry) {
24
+ return {
25
+ kind: "paper",
26
+ sourcePage,
27
+ ...entry,
28
+ };
29
+ }
30
+
31
+ function directPdfPaper(sourcePdf, entry) {
32
+ return {
33
+ kind: "paper",
34
+ sourcePage: sourcePdf,
35
+ sourcePdf,
36
+ ...entry,
37
+ };
38
+ }
39
+
40
+ function withExtraTopics(entry, extraTopics) {
41
+ return {
42
+ ...entry,
43
+ topics: [...new Set([...(entry.topics ?? []), ...extraTopics])],
44
+ };
45
+ }
46
+
47
+ const PLANNING_BASE_SLUGS = new Set([
48
+ "building-effective-ai-coding-agents-for-the-terminal-scaffolding-harness-context-engineering-and-lessons-learned",
49
+ "vero-an-evaluation-harness-for-agents-to-optimize-agents",
50
+ "evoclaw-evaluating-ai-agents-on-continuous-software-evolution",
51
+ "exploring-advanced-llm-multi-agent-systems-based-on-blackboard-architecture",
52
+ "llm-based-multi-agent-blackboard-system-for-information-discovery-in-data-science",
53
+ "dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation",
54
+ "symphony-synergistic-multi-agent-planning-with-heterogeneous-language-model-assembly",
55
+ "silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems",
56
+ ]);
57
+
58
+ const SKILLS_BASE_SLUGS = new Set([
59
+ "memory-for-autonomous-llm-agents-mechanisms-evaluation-and-emerging-frontiers",
60
+ ]);
61
+
62
+ const contextEngineeringManifest = [
63
+ arxivPaper("2510.04618", {
64
+ title: "Agentic Context Engineering: Evolving Contexts for Self-Improving Language Models",
65
+ slug: "agentic-context-engineering-evolving-contexts-for-self-improving-language-models",
66
+ year: 2025,
67
+ researchBucket: "P0 direct hits",
68
+ mapsTo: "Evolving playbooks, context mutation, and self-improving agent instructions.",
69
+ fit: "Directly relevant to treating harness context as a maintained artifact instead of a fixed prompt.",
70
+ topics: [TOPICS.HARNESS, TOPICS.LONG_RUNNING],
71
+ }),
72
+ arxivPaper("2509.13313", {
73
+ title: "ReSum: Unlocking Long-Horizon Search Intelligence via Context Summarization",
74
+ slug: "resum-unlocking-long-horizon-search-intelligence-via-context-summarization",
75
+ year: 2025,
76
+ researchBucket: "P0 direct hits",
77
+ mapsTo: "Periodic summarization, context compaction, and long-horizon search loops.",
78
+ fit: "Direct evidence for summary-based compaction in long-running agent workflows.",
79
+ topics: [TOPICS.LONG_RUNNING, TOPICS.HARNESS],
80
+ }),
81
+ arxivPaper("2601.12030", {
82
+ title: "ARC: Active and Reflection-driven Context Management for Long-Horizon Information Seeking Agents",
83
+ slug: "arc-active-and-reflection-driven-context-management-for-long-horizon-information-seeking-agents",
84
+ year: 2026,
85
+ researchBucket: "P1 strong adjacent work",
86
+ mapsTo: "Reflection-driven context selection and active memory management for long-horizon agents.",
87
+ fit: "Useful companion to compaction papers when deciding how much context should stay live.",
88
+ topics: [TOPICS.LONG_RUNNING, TOPICS.HARNESS],
89
+ }),
90
+ arxivPaper("2601.21557", {
91
+ title: "Meta Context Engineering via Agentic Skill Evolution",
92
+ slug: "meta-context-engineering-via-agentic-skill-evolution",
93
+ year: 2026,
94
+ researchBucket: "P1 strong adjacent work",
95
+ mapsTo: "Skill evolution, meta-level context updates, and self-improving agent workflows.",
96
+ fit: "Extends context engineering from manual playbooks to learned skill updates.",
97
+ topics: [TOPICS.HARNESS, TOPICS.LONG_RUNNING, TOPICS.SKILLS],
98
+ }),
99
+ arxivPaper("2510.21413", {
100
+ title: "Context Engineering for AI Agents in Open-Source Software",
101
+ slug: "context-engineering-for-ai-agents-in-open-source-software",
102
+ year: 2025,
103
+ researchBucket: "P0 direct hits",
104
+ mapsTo: "Real-world context engineering patterns in open-source agentic coding.",
105
+ fit: "Empirical evidence for how repositories structure and maintain coding-agent context.",
106
+ topics: [TOPICS.HARNESS, TOPICS.REPO],
107
+ }),
108
+ arxivPaper("2603.09619", {
109
+ title: "Context Engineering: From Prompts to Corporate Multi-Agent Architecture",
110
+ slug: "context-engineering-from-prompts-to-corporate-multi-agent-architecture",
111
+ year: 2026,
112
+ researchBucket: "P1 strong adjacent work",
113
+ mapsTo: "Enterprise multi-agent context architecture, governance, and prompt-to-system transitions.",
114
+ fit: "Broad systems framing for moving from prompt snippets to durable agent architecture.",
115
+ topics: [TOPICS.HARNESS, TOPICS.BLACKBOARD],
116
+ }),
117
+ arxivPaper("2601.06606", {
118
+ title: "CEDAR: Context Engineering for Agentic Data Science",
119
+ slug: "cedar-context-engineering-for-agentic-data-science",
120
+ year: 2026,
121
+ researchBucket: "P1 strong adjacent work",
122
+ mapsTo: "Context engineering for multi-step data science agents and artifact-rich workflows.",
123
+ fit: "Domain-specific, but useful for studying context structure in complex multi-stage tasks.",
124
+ topics: [TOPICS.HARNESS, TOPICS.LONG_RUNNING],
125
+ }),
126
+ ];
127
+
128
+ const repoContextManifest = [
129
+ arxivPaper("2511.12884", {
130
+ title: "Agent READMEs: An Empirical Study of Context Files for Agentic Coding",
131
+ slug: "agent-readmes-an-empirical-study-of-context-files-for-agentic-coding",
132
+ year: 2025,
133
+ researchBucket: "P0 direct hits",
134
+ mapsTo: "Repository context files, activation conditions, and empirical effects on agentic coding.",
135
+ fit: "One of the closest papers to the repo's AGENTS.md and skill-surface concerns.",
136
+ topics: [TOPICS.REPO],
137
+ }),
138
+ arxivPaper("2512.18925", {
139
+ title: "Beyond the Prompt: An Empirical Study of Cursor Rules",
140
+ slug: "beyond-the-prompt-an-empirical-study-of-cursor-rules",
141
+ year: 2025,
142
+ researchBucket: "P1 strong adjacent work",
143
+ mapsTo: "Tool-specific rules files and developer-authored coding context in real projects.",
144
+ fit: "Useful evidence on how developers externalize coding guidance beyond the base prompt.",
145
+ topics: [TOPICS.REPO],
146
+ notes:
147
+ "Normalized to the current arXiv title for 2512.18925; the supplied list described the paper more generically as developer-provided context for coding assistants.",
148
+ }),
149
+ arxivPaper("2511.09268", {
150
+ title: "Decoding the Configuration of AI Coding Agents: Insights from Claude Code Projects",
151
+ slug: "decoding-the-configuration-of-ai-coding-agents-insights-from-claude-code-projects",
152
+ year: 2025,
153
+ researchBucket: "P1 strong adjacent work",
154
+ mapsTo: "Real-world coding-agent configuration patterns, defaults, and project-level conventions.",
155
+ fit: "Helps reason about which configuration layers belong in the harness versus the repo.",
156
+ topics: [TOPICS.REPO],
157
+ }),
158
+ arxivPaper("2601.20404", {
159
+ title: "On the Impact of AGENTS.md Files on the Efficiency of AI Coding Agents",
160
+ slug: "on-the-impact-of-agents-md-files-on-the-efficiency-of-ai-coding-agents",
161
+ year: 2026,
162
+ researchBucket: "P0 direct hits",
163
+ mapsTo: "Efficiency impact of AGENTS.md files on coding-agent task completion.",
164
+ fit: "Direct evaluation signal for repository-level context files.",
165
+ topics: [TOPICS.REPO],
166
+ }),
167
+ arxivPaper("2603.16021", {
168
+ title: "Interpretable Context Methodology: Folder Structure as Agentic Architecture",
169
+ slug: "interpretable-context-methodology-folder-structure-as-agentic-architecture",
170
+ year: 2026,
171
+ researchBucket: "P1 strong adjacent work",
172
+ mapsTo: "Folder structure and filesystem organization as agent-readable context.",
173
+ fit: "Relevant when directory layout itself becomes part of the harness contract.",
174
+ topics: [TOPICS.REPO, TOPICS.HARNESS],
175
+ }),
176
+ ];
177
+
178
+ const skillsManifest = [
179
+ arxivPaper("2602.20867", {
180
+ title: "SoK: Agentic Skills -- Beyond Tool Use in LLM Agents",
181
+ slug: "sok-agentic-skills-beyond-tool-use-in-llm-agents",
182
+ year: 2026,
183
+ researchBucket: "P0 direct hits",
184
+ mapsTo: "Skill lifecycle, design patterns, acquisition, composition, evaluation, and governance.",
185
+ fit: "Best current framing paper for treating skills as reusable procedural modules rather than prompt fragments.",
186
+ topics: [TOPICS.SKILLS, TOPICS.SECURITY],
187
+ }),
188
+ arxivPaper("2602.12430", {
189
+ title:
190
+ "Agent Skills for Large Language Models: Architecture, Acquisition, Security, and the Path Forward",
191
+ slug: "agent-skills-for-large-language-models-architecture-acquisition-security-and-the-path-forward",
192
+ year: 2026,
193
+ researchBucket: "P0 direct hits",
194
+ mapsTo: "Skill architecture, acquisition pathways, security concerns, and open ecosystem governance.",
195
+ fit: "Useful survey-style companion to the SoK, especially for skill-library security and governance.",
196
+ topics: [TOPICS.SKILLS, TOPICS.SECURITY],
197
+ }),
198
+ arxivPaper("2305.16291", {
199
+ title: "Voyager: An Open-Ended Embodied Agent with Large Language Models",
200
+ slug: "voyager-an-open-ended-embodied-agent-with-large-language-models",
201
+ year: 2023,
202
+ researchBucket: "P2 lineage and older references",
203
+ mapsTo: "Growing executable skill libraries and open-ended reuse in an embodied environment.",
204
+ fit: "One of the clearest ancestors of the modern skill-library pattern.",
205
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
206
+ }),
207
+ arxivPaper("2302.04761", {
208
+ title: "Toolformer: Language Models Can Teach Themselves to Use Tools",
209
+ slug: "toolformer-language-models-can-teach-themselves-to-use-tools",
210
+ year: 2023,
211
+ researchBucket: "P2 lineage and older references",
212
+ mapsTo: "Self-supervised tool invocation and capability selection during reasoning.",
213
+ fit: "Foundational precursor for skill routing and external capability use.",
214
+ topics: [TOPICS.SKILLS],
215
+ }),
216
+ arxivPaper("2305.17126", {
217
+ title: "Large Language Models as Tool Makers",
218
+ slug: "large-language-models-as-tool-makers",
219
+ year: 2023,
220
+ researchBucket: "P2 lineage and older references",
221
+ mapsTo: "Tool creation and executable capability synthesis instead of tool use alone.",
222
+ fit: "Important precursor to programmatic skills and generated reusable procedures.",
223
+ topics: [TOPICS.SKILLS],
224
+ }),
225
+ arxivPaper("2306.07863", {
226
+ title: "Synapse: Trajectory-as-Exemplar Prompting with Memory for Computer Control",
227
+ slug: "synapse-trajectory-as-exemplar-prompting-with-memory-for-computer-control",
228
+ year: 2023,
229
+ researchBucket: "P1 strong adjacent work",
230
+ mapsTo: "Trajectory memory, exemplar reuse, and procedural recall for computer-control agents.",
231
+ fit: "Useful early bridge between trajectory memory and reusable procedural skills.",
232
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
233
+ }),
234
+ arxivPaper("2308.10144", {
235
+ title: "ExpeL: LLM Agents Are Experiential Learners",
236
+ slug: "expel-llm-agents-are-experiential-learners",
237
+ year: 2023,
238
+ researchBucket: "P1 strong adjacent work",
239
+ mapsTo: "Learning from experience through externalized feedback and reusable strategy updates.",
240
+ fit: "Helps motivate persistent skill and memory stores instead of stateless prompting.",
241
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
242
+ }),
243
+ arxivPaper("2409.07429", {
244
+ title: "Agent Workflow Memory",
245
+ slug: "agent-workflow-memory",
246
+ year: 2024,
247
+ researchBucket: "P0 direct hits",
248
+ mapsTo: "Reusable workflow induction from trajectories for web and long-horizon agents.",
249
+ fit: "One of the strongest practical papers on extracting reusable procedures from successful experience.",
250
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
251
+ }),
252
+ arxivPaper("2504.07079", {
253
+ title: "SkillWeaver: Web Agents can Self-Improve by Discovering and Honing Skills",
254
+ slug: "skillweaver-web-agents-can-self-improve-by-discovering-and-honing-skills",
255
+ year: 2025,
256
+ researchBucket: "P0 direct hits",
257
+ mapsTo: "Skill discovery, refinement, and reusable API-like abstractions for web agents.",
258
+ fit: "Strong evidence that reusable discovered skills can materially improve long-horizon execution.",
259
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
260
+ }),
261
+ arxivPaper("2510.14308", {
262
+ title: "ReUseIt: Synthesizing Reusable AI Agent Workflows for Web Automation",
263
+ slug: "reuseit-synthesizing-reusable-ai-agent-workflows-for-web-automation",
264
+ year: 2025,
265
+ researchBucket: "P0 direct hits",
266
+ mapsTo: "Reusable workflow synthesis from successful and failed web automation traces.",
267
+ fit: "Useful for turning concrete traces into editable workflow skills instead of opaque prompt tuning.",
268
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
269
+ }),
270
+ arxivPaper("2602.01869", {
271
+ title:
272
+ "ProcMEM: Learning Reusable Procedural Memory from Experience via Non-Parametric PPO for LLM Agents",
273
+ slug: "procmem-learning-reusable-procedural-memory-from-experience-via-non-parametric-ppo-for-llm-agents",
274
+ year: 2026,
275
+ researchBucket: "P0 direct hits",
276
+ mapsTo: "Reusable procedural memory learned from experience without weight updates.",
277
+ fit: "Directly relevant to a skills layer built from trajectories rather than static hand-authored instructions.",
278
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
279
+ }),
280
+ arxivPaper("2512.18950", {
281
+ title:
282
+ "Learning Hierarchical Procedural Memory for LLM Agents through Bayesian Selection and Contrastive Refinement",
283
+ slug: "learning-hierarchical-procedural-memory-for-llm-agents-through-bayesian-selection-and-contrastive-refinement",
284
+ year: 2025,
285
+ researchBucket: "P1 strong adjacent work",
286
+ mapsTo: "Hierarchical procedural memory with control policies for continue, skip, repeat, and abort.",
287
+ fit: "Useful when flat skill lists are not enough and the harness needs structured playbooks.",
288
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
289
+ }),
290
+ arxivPaper("2508.06433", {
291
+ title: "Mem^p: Exploring Agent Procedural Memory",
292
+ slug: "memp-exploring-agent-procedural-memory",
293
+ year: 2025,
294
+ researchBucket: "P1 strong adjacent work",
295
+ mapsTo: "Procedural memory construction, retrieval, and update policies across agent trajectories.",
296
+ fit: "Useful companion to workflow-memory and procedural-memory papers when designing a skill store.",
297
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
298
+ }),
299
+ arxivPaper("2602.02474", {
300
+ title: "MemSkill: Learning and Evolving Memory Skills for Self-Evolving Agents",
301
+ slug: "memskill-learning-and-evolving-memory-skills-for-self-evolving-agents",
302
+ year: 2026,
303
+ researchBucket: "P1 strong adjacent work",
304
+ mapsTo: "Memory operations as skills with explicit skill-bank evolution.",
305
+ fit: "Strong fit for the frontier where memory and skills are treated as one evolving substrate.",
306
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
307
+ }),
308
+ arxivPaper("2512.17102", {
309
+ title: "Reinforcement Learning for Self-Improving Agent with Skill Library",
310
+ slug: "reinforcement-learning-for-self-improving-agent-with-skill-library",
311
+ year: 2025,
312
+ researchBucket: "P1 strong adjacent work",
313
+ mapsTo: "Skill-library accumulation, RL-based selection, and lower-cost task completion.",
314
+ fit: "Useful evidence for skill-library routing and continual improvement over related tasks.",
315
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
316
+ }),
317
+ arxivPaper("2601.03509", {
318
+ title: "Evolving Programmatic Skill Networks",
319
+ slug: "evolving-programmatic-skill-networks",
320
+ year: 2026,
321
+ researchBucket: "P0 direct hits",
322
+ mapsTo: "Composable executable skills, structured reflection, maturity-aware updates, and rollback validation.",
323
+ fit: "One of the strongest current papers on programmatic skill composition and controlled skill evolution.",
324
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
325
+ }),
326
+ arxivPaper("2603.12056", {
327
+ title: "XSkill: Continual Learning from Experience and Skills in Multimodal Agents",
328
+ slug: "xskill-continual-learning-from-experience-and-skills-in-multimodal-agents",
329
+ year: 2026,
330
+ researchBucket: "P1 strong adjacent work",
331
+ mapsTo: "Continual learning that combines experience replay with reusable multimodal skills.",
332
+ fit: "Useful for the broader direction of skill reuse outside purely text-only agents.",
333
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
334
+ }),
335
+ arxivPaper("2603.18743", {
336
+ title: "Memento-Skills: Let Agents Design Agents",
337
+ slug: "memento-skills-let-agents-design-agents",
338
+ year: 2026,
339
+ researchBucket: "P1 strong adjacent work",
340
+ mapsTo: "Markdown-based evolving skills, read-write-reflect loops, and learned routing.",
341
+ fit: "Directly relevant to repo-local skill files and self-evolving skill libraries.",
342
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
343
+ }),
344
+ arxivPaper("2603.17187", {
345
+ title: "MetaClaw: Just Talk -- An Agent That Meta-Learns and Evolves in the Wild",
346
+ slug: "metaclaw-just-talk-an-agent-that-meta-learns-and-evolves-in-the-wild",
347
+ year: 2026,
348
+ researchBucket: "P1 strong adjacent work",
349
+ mapsTo: "Failure-driven skill synthesis and zero-downtime evolution of agent capabilities.",
350
+ fit: "Useful frontier reference for agents that grow a skill library from live operating experience.",
351
+ topics: [TOPICS.SKILLS, TOPICS.LONG_RUNNING],
352
+ }),
353
+ arxivPaper("2602.12670", {
354
+ title: "SkillsBench: Benchmarking How Well Agent Skills Work Across Diverse Tasks",
355
+ slug: "skillsbench-benchmarking-how-well-agent-skills-work-across-diverse-tasks",
356
+ year: 2026,
357
+ researchBucket: "P0 direct hits",
358
+ mapsTo: "Cross-domain skill evaluation, curated-versus-generated skills, and routing quality.",
359
+ fit: "Best current benchmark evidence that curated human-authored skills help while self-generated ones often do not.",
360
+ topics: [TOPICS.SKILLS, TOPICS.REPO],
361
+ }),
362
+ ];
363
+
364
+ const sharedWorkspaceManifest = [
365
+ arxivPaper("2510.14312", {
366
+ title: "Terrarium: Revisiting the Blackboard for Multi-Agent Safety, Privacy, and Security Studies",
367
+ slug: "terrarium-revisiting-the-blackboard-for-multi-agent-safety-privacy-and-security-studies",
368
+ year: 2025,
369
+ researchBucket: "P0 direct hits",
370
+ mapsTo: "Blackboard coordination under safety, privacy, and security constraints.",
371
+ fit: "Rare direct treatment of attack surfaces in shared-workspace multi-agent systems.",
372
+ topics: [TOPICS.BLACKBOARD, TOPICS.SECURITY, TOPICS.PLANNING],
373
+ }),
374
+ arxivPaper("2603.03780", {
375
+ title: "MACC: Multi-Agent Collaborative Competition for Scientific Exploration",
376
+ slug: "macc-multi-agent-collaborative-competition-for-scientific-exploration",
377
+ year: 2026,
378
+ researchBucket: "P1 strong adjacent work",
379
+ mapsTo: "Shared scientific workspaces, institutional incentives, and collaborative competition among agents.",
380
+ fit: "Useful for studying coordination designs beyond simple supervisor-worker delegation.",
381
+ topics: [TOPICS.BLACKBOARD, TOPICS.HARNESS, TOPICS.PLANNING],
382
+ }),
383
+ arxivPaper("2603.08369", {
384
+ title:
385
+ "M3-ACE: Rectifying Visual Perception in Multimodal Math Reasoning via Multi-Agentic Context Engineering",
386
+ slug: "m3-ace-rectifying-visual-perception-in-multimodal-math-reasoning-via-multi-agentic-context-engineering",
387
+ year: 2026,
388
+ researchBucket: "P1 strong adjacent work",
389
+ mapsTo: "Shared evidential context and multi-agent collaboration in multimodal reasoning.",
390
+ fit: "Domain-specific but still relevant to context-sharing patterns across agents.",
391
+ topics: [TOPICS.BLACKBOARD, TOPICS.HARNESS],
392
+ }),
393
+ ];
394
+
395
+ const orchestrationManifest = [
396
+ arxivPaper("2601.13671", {
397
+ title: "The Orchestration of Multi-Agent Systems: Architectures, Protocols, and Enterprise Adoption",
398
+ slug: "the-orchestration-of-multi-agent-systems-architectures-protocols-and-enterprise-adoption",
399
+ year: 2026,
400
+ researchBucket: "P1 strong adjacent work",
401
+ mapsTo: "Architectures, protocols, and adoption patterns for orchestrated agent systems.",
402
+ fit: "Broad systems framing for mapping blackboard ideas into deployable orchestration.",
403
+ topics: [TOPICS.BLACKBOARD, TOPICS.HARNESS, TOPICS.PLANNING],
404
+ }),
405
+ arxivPaper("2601.12560", {
406
+ title:
407
+ "Agentic Artificial Intelligence (AI): Architectures, Taxonomies, and Evaluation of Large Language Model Agents",
408
+ slug: "agentic-artificial-intelligence-ai-architectures-taxonomies-and-evaluation-of-large-language-model-agents",
409
+ year: 2026,
410
+ researchBucket: "P2 lineage and older references",
411
+ mapsTo: "Agent architectures, taxonomies, evaluation dimensions, and governance patterns.",
412
+ fit: "Survey-style reference for architecture and evaluation vocabulary.",
413
+ topics: [TOPICS.BLACKBOARD, TOPICS.HARNESS],
414
+ }),
415
+ arxivPaper("2603.15021", {
416
+ title: "Describing Agentic AI Systems with C4: Lessons from Industry Projects",
417
+ slug: "describing-agentic-ai-systems-with-c4-lessons-from-industry-projects",
418
+ year: 2026,
419
+ researchBucket: "P1 strong adjacent work",
420
+ mapsTo: "Architecture documentation patterns, collaboration boundaries, and quality gates for agentic systems.",
421
+ fit: "Useful when the orchestration surface needs a software-architecture representation.",
422
+ topics: [TOPICS.BLACKBOARD, TOPICS.HARNESS, TOPICS.PLANNING],
423
+ }),
424
+ ];
425
+
426
+ const planningManifest = [
427
+ arxivPaper("2603.11445", {
428
+ title:
429
+ "Verified Multi-Agent Orchestration: A Plan-Execute-Verify-Replan Framework for Complex Query Resolution",
430
+ slug: "verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution",
431
+ year: 2026,
432
+ researchBucket: "P0 direct hits",
433
+ mapsTo: "DAG decomposition, parallel execution, verification, and replanning for complex queries.",
434
+ fit: "Direct blueprint for a planner-verifier harness loop instead of one-shot multi-agent delegation.",
435
+ topics: [TOPICS.PLANNING, TOPICS.HARNESS],
436
+ }),
437
+ arxivPaper("2602.07839", {
438
+ title: "TodoEvolve: Learning to Architect Agent Planning Systems",
439
+ slug: "todoevolve-learning-to-architect-agent-planning-systems",
440
+ year: 2026,
441
+ researchBucket: "P0 direct hits",
442
+ mapsTo: "Meta-planning, task-specific planning topology, and dynamic planning revision.",
443
+ fit: "Useful when the planning loop itself should adapt instead of staying hand-designed.",
444
+ topics: [TOPICS.PLANNING, TOPICS.HARNESS],
445
+ }),
446
+ arxivPaper("2503.03505", {
447
+ title: "Parallelized Planning-Acting for Efficient LLM-based Multi-Agent Systems in Minecraft",
448
+ slug: "parallelized-planning-acting-for-efficient-llm-based-multi-agent-systems-in-minecraft",
449
+ year: 2025,
450
+ researchBucket: "P1 strong adjacent work",
451
+ mapsTo: "Parallel planning/acting schedules, latency-sensitive coordination, and dynamic environments.",
452
+ fit: "Useful reference for reducing serialized planning bottlenecks in multi-agent execution.",
453
+ notes:
454
+ "Normalized to the current arXiv title for 2503.03505; the user-supplied list described the paper more generally as dynamic-environment planning.",
455
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
456
+ }),
457
+ arxivPaper("2603.03005", {
458
+ title:
459
+ "OrchMAS: Orchestrated Reasoning with Multi Collaborative Heterogeneous Scientific Expert Structured Agents",
460
+ slug: "orchmas-orchestrated-reasoning-with-multi-collaborative-heterogeneous-scientific-expert-structured-agents",
461
+ year: 2026,
462
+ researchBucket: "P1 strong adjacent work",
463
+ mapsTo: "Structured orchestration, heterogeneous experts, and coordinated reasoning pipelines.",
464
+ fit: "Another current orchestration reference for role-structured collaborative planning.",
465
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
466
+ }),
467
+ arxivPaper("2510.12120", {
468
+ title: "Towards Engineering Multi-Agent LLMs: A Protocol-Driven Approach",
469
+ slug: "towards-engineering-multi-agent-llms-a-protocol-driven-approach",
470
+ year: 2025,
471
+ researchBucket: "P1 strong adjacent work",
472
+ mapsTo: "Protocol-driven agent coordination, interface contracts, and multi-agent engineering discipline.",
473
+ fit: "Useful when orchestration protocols should be explicit artifacts rather than prompt folklore.",
474
+ topics: [TOPICS.PLANNING, TOPICS.HARNESS],
475
+ }),
476
+ arxivPaper("2504.21030", {
477
+ title:
478
+ "Advancing Multi-Agent Systems Through Model Context Protocol: Architecture, Implementation, and Applications",
479
+ slug: "advancing-multi-agent-systems-through-model-context-protocol-architecture-implementation-and-applications",
480
+ year: 2025,
481
+ researchBucket: "P1 strong adjacent work",
482
+ mapsTo: "MCP-based system architecture, server coordination, and protocol-mediated agent integration.",
483
+ fit: "Relevant when the planning harness sits on top of MCP-style tool and server boundaries.",
484
+ topics: [TOPICS.PLANNING, TOPICS.HARNESS],
485
+ }),
486
+ arxivPaper("2601.11595", {
487
+ title: "Enhancing Model Context Protocol (MCP) with Context-Aware Server Collaboration",
488
+ slug: "enhancing-model-context-protocol-mcp-with-context-aware-server-collaboration",
489
+ year: 2026,
490
+ researchBucket: "P1 strong adjacent work",
491
+ mapsTo: "Context-aware server collaboration and richer coordination on MCP-style infrastructures.",
492
+ fit: "Useful follow-on reference for collaborative planning over protocolized tool ecosystems.",
493
+ topics: [TOPICS.PLANNING, TOPICS.HARNESS],
494
+ }),
495
+ arxivPaper("2503.13657", {
496
+ title: "Why Do Multi-Agent LLM Systems Fail?",
497
+ slug: "why-do-multi-agent-llm-systems-fail",
498
+ year: 2025,
499
+ researchBucket: "P0 direct hits",
500
+ mapsTo: "Failure taxonomy for orchestration, inter-agent alignment, verification, and termination.",
501
+ fit: "One of the most useful planning reality checks for where orchestration loops actually break down.",
502
+ topics: [TOPICS.PLANNING, TOPICS.REPO],
503
+ }),
504
+ arxivPaper("2505.11556", {
505
+ title: "Systematic Failures in Collective Reasoning under Distributed Information in Multi-Agent LLMs",
506
+ slug: "systematic-failures-in-collective-reasoning-under-distributed-information-in-multi-agent-llms",
507
+ year: 2025,
508
+ researchBucket: "P1 strong adjacent work",
509
+ mapsTo: "Collective reasoning failures when evidence is distributed across agents.",
510
+ fit: "Useful diagnostic reference for whether planning systems actually pool distributed evidence.",
511
+ notes:
512
+ "Normalized to the current arXiv title for 2505.11556; the user-supplied list referred to the benchmark as HiddenBench.",
513
+ topics: [TOPICS.PLANNING, TOPICS.REPO, TOPICS.BLACKBOARD],
514
+ }),
515
+ arxivPaper("2602.13255", {
516
+ title: "DPBench: Large Language Models Struggle with Simultaneous Coordination",
517
+ slug: "dpbench-large-language-models-struggle-with-simultaneous-coordination",
518
+ year: 2026,
519
+ researchBucket: "P1 strong adjacent work",
520
+ mapsTo: "Distributed-information coordination benchmarks with simultaneous constraints.",
521
+ fit: "Useful benchmark for testing whether coordination-heavy planning systems scale beyond serial reasoning.",
522
+ topics: [TOPICS.PLANNING, TOPICS.REPO],
523
+ }),
524
+ arxivPaper("2602.01011", {
525
+ title: "Multi-Agent Teams Hold Experts Back",
526
+ slug: "multi-agent-teams-hold-experts-back",
527
+ year: 2026,
528
+ researchBucket: "P1 strong adjacent work",
529
+ mapsTo: "Expert-underutilization failures in self-organizing multi-agent teams.",
530
+ fit: "Useful caution against naive team-size scaling and weak expertise weighting.",
531
+ topics: [TOPICS.PLANNING, TOPICS.REPO],
532
+ }),
533
+ externalPaper("https://link.springer.com/article/10.1007/s44336-024-00009-2", {
534
+ title: "A Survey on LLM-based Multi-agent Systems: Workflow, Infrastructure, and Challenges",
535
+ slug: "a-survey-on-llm-based-multi-agent-systems-workflow-infrastructure-and-challenges",
536
+ preferHtml: true,
537
+ year: 2024,
538
+ venue: "Vicinagearth 1, 9 (2024)",
539
+ researchBucket: "P2 lineage and older references",
540
+ mapsTo: "Survey of LLM-MAS construction, interaction, planning, and communication patterns.",
541
+ fit: "Broad framing reference for situating planning and blackboard papers in the wider MAS landscape.",
542
+ textSourceUrl:
543
+ "https://r.jina.ai/http://https://link.springer.com/article/10.1007/s44336-024-00009-2",
544
+ textSourceFormat: "jina-markdown",
545
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
546
+ }),
547
+ arxivPaper("2404.04834", {
548
+ title:
549
+ "LLM-Based Multi-Agent Systems for Software Engineering: Literature Review, Vision and the Road Ahead",
550
+ slug: "llm-based-multi-agent-systems-for-software-engineering-literature-review-vision-and-the-road-ahead",
551
+ year: 2024,
552
+ researchBucket: "P1 strong adjacent work",
553
+ mapsTo: "Multi-agent software-engineering workflows, shared repositories, and coding-oriented coordination patterns.",
554
+ fit: "Useful bridge between general multi-agent planning papers and repository-centered coding systems.",
555
+ topics: [TOPICS.PLANNING, TOPICS.HARNESS, TOPICS.REPO],
556
+ }),
557
+ arxivPaper("2508.12683", {
558
+ title:
559
+ "A Taxonomy of Hierarchical Multi-Agent Systems: Design Patterns, Coordination Mechanisms, and Industrial Applications",
560
+ slug: "a-taxonomy-of-hierarchical-multi-agent-systems-design-patterns-coordination-mechanisms-and-industrial-applications",
561
+ year: 2025,
562
+ researchBucket: "P1 strong adjacent work",
563
+ mapsTo: "Hierarchical MAS design patterns and coordination mechanisms for comparing against blackboard systems.",
564
+ fit: "Useful contrast class when deciding between hierarchy-heavy and shared-workspace planning designs.",
565
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
566
+ }),
567
+ externalPaper("https://ojs.aaai.org/index.php/aimagazine/article/view/537", {
568
+ title:
569
+ "Blackboard Systems, Part One: The Blackboard Model of Problem Solving and the Evolution of Blackboard Architectures",
570
+ slug: "blackboard-systems-part-one-the-blackboard-model-of-problem-solving-and-the-evolution-of-blackboard-architectures",
571
+ authors: "H. Penny Nii",
572
+ year: 1986,
573
+ venue: "AI Magazine 7(2) (1986)",
574
+ researchBucket: "P2 lineage and older references",
575
+ mapsTo: "Foundational blackboard architecture concepts, control structure, and problem-solving model.",
576
+ fit: "Classic grounding for the blackboard pattern that still informs shared-workspace planning design.",
577
+ textSourceUrl: "https://r.jina.ai/http://https://ojs.aaai.org/index.php/aimagazine/article/view/537",
578
+ textSourceFormat: "jina-markdown",
579
+ skipSourcePageFetch: true,
580
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
581
+ }),
582
+ externalPaper("https://www.sciencedirect.com/science/article/abs/pii/0004370285900633", {
583
+ title: "A Blackboard Architecture for Control",
584
+ slug: "a-blackboard-architecture-for-control",
585
+ authors: "Barbara Hayes-Roth",
586
+ year: 1985,
587
+ venue: "Artificial Intelligence 26(3) (1985)",
588
+ researchBucket: "P2 lineage and older references",
589
+ mapsTo: "Agenda-driven control, behavioral goals, and blackboard-based control architecture.",
590
+ fit: "Foundational reference for the control side of blackboard-style planning systems.",
591
+ textSourceUrl:
592
+ "https://r.jina.ai/http://https://www.sciencedirect.com/science/article/abs/pii/0004370285900633",
593
+ textSourceFormat: "jina-markdown",
594
+ skipSourcePageFetch: true,
595
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
596
+ }),
597
+ directPdfPaper("https://cdn.aaai.org/AAAI/1986/AAAI86-010.pdf", {
598
+ title: "Incremental Planning to Control a Blackboard-Based Problem Solver",
599
+ slug: "incremental-planning-to-control-a-blackboard-based-problem-solver",
600
+ authors: "Edmund H. Durfee, Victor R. Lesser",
601
+ year: 1986,
602
+ venue: "AAAI-86",
603
+ researchBucket: "P2 lineage and older references",
604
+ mapsTo: "Incremental planning, plan monitoring, and repair for blackboard-based control.",
605
+ fit: "Direct classic reference connecting planning explicitly to blackboard control.",
606
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
607
+ }),
608
+ directPdfPaper("https://mas.cs.umass.edu/Documents/Corkill/ai-expert.pdf", {
609
+ title: "Blackboard Systems",
610
+ slug: "blackboard-systems",
611
+ authors: "Daniel D. Corkill",
612
+ year: 1991,
613
+ venue: "AI Expert 6(9) (1991)",
614
+ researchBucket: "P2 lineage and older references",
615
+ mapsTo: "Blackboard characteristics, specialist coordination, and applicability to ill-defined problems.",
616
+ fit: "Useful retrospective on what blackboard systems are good at and where they fit.",
617
+ topics: [TOPICS.PLANNING, TOPICS.BLACKBOARD],
618
+ }),
619
+ ];
620
+
621
+ const secureCodeGenerationManifest = [
622
+ arxivPaper("2407.07064", {
623
+ title: "Prompting Techniques for Secure Code Generation: A Systematic Investigation",
624
+ slug: "prompting-techniques-for-secure-code-generation-a-systematic-investigation",
625
+ year: 2024,
626
+ researchBucket: "P1 strong adjacent work",
627
+ mapsTo: "Prompt variants and their effect on secure code generation quality.",
628
+ fit: "Baseline evidence for what prompt-only hardening can and cannot accomplish.",
629
+ topics: [TOPICS.SECURITY],
630
+ }),
631
+ directPdfPaper("https://emaiannone.github.io/assets/pdf/c6.pdf", {
632
+ title:
633
+ "Retrieve, Refine, or Both? Using Task-Specific Guidelines for Secure Python Code Generation",
634
+ slug: "retrieve-refine-or-both-using-task-specific-guidelines-for-secure-python-code-generation",
635
+ year: 2025,
636
+ venue: "ICSME 2025",
637
+ authors: "Catherine Tony, Emanuele Iannone, Riccardo Scandariato",
638
+ researchBucket: "P1 strong adjacent work",
639
+ mapsTo: "Guideline retrieval versus iterative refinement for secure Python generation.",
640
+ fit: "Direct comparison point for retrieval-only, repair-only, and hybrid security loops in coding workflows.",
641
+ topics: [TOPICS.SECURITY],
642
+ }),
643
+ externalPaper("https://www.sciencedirect.com/science/article/pii/S0164121225003516", {
644
+ title: "Discrete Prompt Optimization Using Genetic Algorithm for Secure Python Code Generation",
645
+ slug: "discrete-prompt-optimization-using-genetic-algorithm-for-secure-python-code-generation",
646
+ authors: "Catherine Tony, Riccardo Scandariato, Max Kretschmann, Maura Pintor",
647
+ year: 2026,
648
+ venue: "Journal of Systems and Software 232 (2026)",
649
+ researchBucket: "P2 lineage and older references",
650
+ mapsTo: "Search-based prompt optimization for secure Python code generation.",
651
+ fit: "Adjacent baseline for automated prompt tuning in security-sensitive coding.",
652
+ textSourceUrl:
653
+ "https://r.jina.ai/http://https://www.sciencedirect.com/science/article/pii/S0164121225003516",
654
+ textSourceFormat: "jina-markdown",
655
+ skipSourcePageFetch: true,
656
+ notes:
657
+ "Primary cache text uses r.jina.ai because the ScienceDirect article page is not reliably extractable through Readability alone.",
658
+ topics: [TOPICS.SECURITY],
659
+ }),
660
+ arxivPaper("2506.07313", {
661
+ title:
662
+ "SCGAgent: Recreating the Benefits of Reasoning Models for Secure Code Generation with Agentic Workflows",
663
+ slug: "scgagent-recreating-the-benefits-of-reasoning-models-for-secure-code-generation-with-agentic-workflows",
664
+ year: 2025,
665
+ researchBucket: "P0 direct hits",
666
+ mapsTo: "Agentic secure-code workflows that recover reasoning-model benefits through orchestration.",
667
+ fit: "Very close to the repo's interest in harness-mediated secure coding workflows.",
668
+ topics: [TOPICS.SECURITY, TOPICS.HARNESS],
669
+ }),
670
+ arxivPaper("2510.18204", {
671
+ title: "RESCUE: Retrieval Augmented Secure Code Generation",
672
+ slug: "rescue-retrieval-augmented-secure-code-generation",
673
+ year: 2025,
674
+ researchBucket: "P0 direct hits",
675
+ mapsTo: "Security-focused retrieval augmentation for code generation.",
676
+ fit: "Useful reference for designing security-specific retrieval layers instead of generic RAG.",
677
+ topics: [TOPICS.SECURITY],
678
+ }),
679
+ arxivPaper("2601.00509", {
680
+ title:
681
+ "Improving LLM-Assisted Secure Code Generation through Retrieval-Augmented-Generation and Multi-Tool Feedback",
682
+ slug: "improving-llm-assisted-secure-code-generation-through-retrieval-augmented-generation-and-multi-tool-feedback",
683
+ year: 2026,
684
+ researchBucket: "P0 direct hits",
685
+ mapsTo: "RAG, compiler feedback, static analysis, and multi-tool repair for secure coding.",
686
+ fit: "Strong direct fit for a tool-augmented secure-coding harness with iterative verification.",
687
+ topics: [TOPICS.SECURITY, TOPICS.HARNESS],
688
+ }),
689
+ arxivPaper("2602.01187", {
690
+ title: "Autoregressive, Yet Revisable: In Decoding Revision for Secure Code Generation",
691
+ slug: "autoregressive-yet-revisable-in-decoding-revision-for-secure-code-generation",
692
+ year: 2026,
693
+ researchBucket: "P1 strong adjacent work",
694
+ mapsTo: "Decoding-time revision and self-correction for secure generation.",
695
+ fit: "Adds a decoding-centric alternative to outer-loop repair orchestration.",
696
+ topics: [TOPICS.SECURITY],
697
+ }),
698
+ arxivPaper("2603.11212", {
699
+ title:
700
+ "Security-by-Design for LLM-Based Code Generation: Leveraging Internal Representations for Concept-Driven Steering Mechanisms",
701
+ slug: "security-by-design-for-llm-based-code-generation-leveraging-internal-representations-for-concept-driven-steering-mechanisms",
702
+ year: 2026,
703
+ researchBucket: "P1 strong adjacent work",
704
+ mapsTo: "Internal representation steering and concept-driven controls for secure generation.",
705
+ fit: "Mechanism-oriented reference for security controls beyond prompt engineering.",
706
+ topics: [TOPICS.SECURITY],
707
+ }),
708
+ arxivPaper("2602.05868", {
709
+ title:
710
+ "Persistent Human Feedback, LLMs, and Static Analyzers for Secure Code Generation and Vulnerability Detection",
711
+ slug: "persistent-human-feedback-llms-and-static-analyzers-for-secure-code-generation-and-vulnerability-detection",
712
+ year: 2026,
713
+ researchBucket: "P1 strong adjacent work",
714
+ mapsTo: "Human feedback, static analyzers, and vulnerability detection in secure code generation.",
715
+ fit: "Useful reality check on combining analyzers and humans instead of trusting either alone.",
716
+ topics: [TOPICS.SECURITY, TOPICS.HARNESS],
717
+ }),
718
+ ];
719
+
720
+ const securityEvaluationManifest = [
721
+ arxivPaper("2504.21205", {
722
+ title:
723
+ "SecRepoBench: Benchmarking Code Agents for Secure Code Completion in Real-World Repositories",
724
+ slug: "secrepobench-benchmarking-code-agents-for-secure-code-completion-in-real-world-repositories",
725
+ year: 2025,
726
+ researchBucket: "P0 direct hits",
727
+ mapsTo: "Repository-level secure code completion for code agents in real-world projects.",
728
+ fit: "Direct benchmark match for secure, repo-grounded coding-agent evaluation.",
729
+ notes:
730
+ "Normalized to the latest arXiv title for 2504.21205; earlier versions framed the paper around LLMs rather than code agents.",
731
+ topics: [TOPICS.SECURITY, TOPICS.REPO],
732
+ }),
733
+ arxivPaper("2509.22097", {
734
+ title: "SecureAgentBench: Benchmarking Secure Code Generation under Realistic Vulnerability Scenarios",
735
+ slug: "secureagentbench-benchmarking-secure-code-generation-under-realistic-vulnerability-scenarios",
736
+ year: 2025,
737
+ researchBucket: "P0 direct hits",
738
+ mapsTo: "Secure code generation tasks under realistic vulnerability scenarios.",
739
+ fit: "Useful benchmark for testing whether secure coding improvements hold beyond toy snippets.",
740
+ topics: [TOPICS.SECURITY, TOPICS.REPO],
741
+ }),
742
+ arxivPaper("2410.11096", {
743
+ title: "SeCodePLT: A Unified Platform for Evaluating the Security of Code GenAI",
744
+ slug: "secodeplt-a-unified-platform-for-evaluating-the-security-of-code-genai",
745
+ year: 2024,
746
+ researchBucket: "P1 strong adjacent work",
747
+ mapsTo: "Unified evaluation platform for insecure generation and code security analysis.",
748
+ fit: "Broader benchmarking substrate for security-focused evaluation workflows.",
749
+ topics: [TOPICS.SECURITY, TOPICS.REPO],
750
+ }),
751
+ arxivPaper("2603.10969", {
752
+ title: "TOSSS: a CVE-based Software Security Benchmark for Large Language Models",
753
+ slug: "tosss-a-cve-based-software-security-benchmark-for-large-language-models",
754
+ year: 2026,
755
+ researchBucket: "P1 strong adjacent work",
756
+ mapsTo: "CVE-grounded software security benchmark tasks.",
757
+ fit: "Useful when evaluation should anchor to real vulnerability patterns rather than synthetic prompts.",
758
+ topics: [TOPICS.SECURITY, TOPICS.REPO],
759
+ }),
760
+ arxivPaper("2412.15004", {
761
+ title: "From Vulnerabilities to Remediation: A Systematic Literature Review of LLMs in Code Security",
762
+ slug: "from-vulnerabilities-to-remediation-a-systematic-literature-review-of-llms-in-code-security",
763
+ year: 2024,
764
+ researchBucket: "P2 lineage and older references",
765
+ mapsTo: "Survey of LLM use across vulnerability finding, explanation, and remediation.",
766
+ fit: "Breadth reference for situating the secure-coding cache in the wider literature.",
767
+ topics: [TOPICS.SECURITY],
768
+ }),
769
+ arxivPaper("2511.10271", {
770
+ title:
771
+ "Quality Assurance of LLM-generated Code: Addressing Non-Functional Quality Characteristics",
772
+ slug: "quality-assurance-of-llm-generated-code-addressing-non-functional-quality-characteristics",
773
+ year: 2025,
774
+ researchBucket: "P1 strong adjacent work",
775
+ mapsTo: "Security and other non-functional quality checks for LLM-generated code.",
776
+ fit: "Useful broader QA framing around generated-code risk, beyond only functional correctness.",
777
+ topics: [TOPICS.SECURITY, TOPICS.REPO],
778
+ }),
779
+ arxivPaper("2603.09002", {
780
+ title: "Security Considerations for Multi-agent Systems",
781
+ slug: "security-considerations-for-multi-agent-systems",
782
+ year: 2026,
783
+ researchBucket: "P1 strong adjacent work",
784
+ mapsTo: "Credential, provenance, auditability, and shared-state risks in multi-agent systems.",
785
+ fit: "Useful systems-level security framing for agent orchestration and shared workspaces.",
786
+ topics: [TOPICS.SECURITY, TOPICS.BLACKBOARD],
787
+ }),
788
+ ];
789
+
790
+ export const paperManifest = [
791
+ ...baseManifest.map((entry) => {
792
+ const extraTopics = [];
793
+ if (PLANNING_BASE_SLUGS.has(entry.slug)) {
794
+ extraTopics.push(TOPICS.PLANNING);
795
+ }
796
+ if (SKILLS_BASE_SLUGS.has(entry.slug)) {
797
+ extraTopics.push(TOPICS.SKILLS);
798
+ }
799
+ return extraTopics.length > 0 ? withExtraTopics(entry, extraTopics) : entry;
800
+ }),
801
+ ...contextEngineeringManifest,
802
+ ...repoContextManifest,
803
+ ...skillsManifest,
804
+ ...sharedWorkspaceManifest,
805
+ ...orchestrationManifest,
806
+ ...planningManifest,
807
+ ...secureCodeGenerationManifest,
808
+ ...securityEvaluationManifest,
809
+ ];
810
+
811
+ export default paperManifest;