sentinelayer-cli 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +996 -996
  2. package/bin/create-sentinelayer.js +5 -5
  3. package/bin/sentinelayer-cli.js +4 -4
  4. package/bin/sl.js +5 -5
  5. package/package.json +64 -63
  6. package/src/agents/jules/config/definition.js +160 -160
  7. package/src/agents/jules/config/system-prompt.js +182 -182
  8. package/src/agents/jules/error-intake.js +51 -51
  9. package/src/agents/jules/fix-cycle.js +17 -17
  10. package/src/agents/jules/loop.js +457 -450
  11. package/src/agents/jules/pulse.js +10 -10
  12. package/src/agents/jules/stream.js +187 -186
  13. package/src/agents/jules/swarm/file-scanner.js +74 -74
  14. package/src/agents/jules/swarm/index.js +11 -11
  15. package/src/agents/jules/swarm/orchestrator.js +362 -362
  16. package/src/agents/jules/swarm/pattern-hunter.js +123 -123
  17. package/src/agents/jules/swarm/sub-agent.js +311 -309
  18. package/src/agents/jules/tools/aidenid-email.js +189 -189
  19. package/src/agents/jules/tools/auth-audit.js +1699 -1691
  20. package/src/agents/jules/tools/dispatch.js +340 -335
  21. package/src/agents/jules/tools/file-edit.js +2 -2
  22. package/src/agents/jules/tools/file-read.js +2 -2
  23. package/src/agents/jules/tools/frontend-analyze.js +570 -570
  24. package/src/agents/jules/tools/glob.js +2 -2
  25. package/src/agents/jules/tools/grep.js +2 -2
  26. package/src/agents/jules/tools/index.js +29 -29
  27. package/src/agents/jules/tools/path-guards.js +2 -2
  28. package/src/agents/jules/tools/runtime-audit.js +507 -507
  29. package/src/agents/jules/tools/shell.js +2 -2
  30. package/src/agents/jules/tools/url-policy.js +100 -100
  31. package/src/agents/persona-visuals.js +64 -61
  32. package/src/agents/shared-tools/dispatch-core.js +320 -315
  33. package/src/agents/shared-tools/file-edit.js +180 -180
  34. package/src/agents/shared-tools/file-read.js +100 -100
  35. package/src/agents/shared-tools/glob.js +168 -168
  36. package/src/agents/shared-tools/grep.js +228 -228
  37. package/src/agents/shared-tools/index.js +46 -46
  38. package/src/agents/shared-tools/path-guards.js +161 -161
  39. package/src/agents/shared-tools/shell.js +383 -383
  40. package/src/ai/aidenid.js +1021 -1009
  41. package/src/ai/client.js +553 -553
  42. package/src/ai/domain-target-store.js +268 -268
  43. package/src/ai/identity-store.js +270 -270
  44. package/src/ai/proxy.js +137 -137
  45. package/src/ai/site-store.js +145 -145
  46. package/src/audit/agents/architecture.js +180 -180
  47. package/src/audit/agents/compliance.js +179 -179
  48. package/src/audit/agents/documentation.js +165 -165
  49. package/src/audit/agents/performance.js +145 -145
  50. package/src/audit/agents/security.js +215 -215
  51. package/src/audit/agents/testing.js +172 -172
  52. package/src/audit/orchestrator.js +557 -557
  53. package/src/audit/package.js +204 -204
  54. package/src/audit/registry.js +284 -284
  55. package/src/audit/replay.js +103 -103
  56. package/src/auth/gate.js +400 -371
  57. package/src/auth/http.js +681 -611
  58. package/src/auth/service.js +1106 -1106
  59. package/src/auth/session-store.js +813 -813
  60. package/src/cli.js +257 -252
  61. package/src/commands/ai/identity-lifecycle.js +1338 -1338
  62. package/src/commands/ai/provision-governance.js +1272 -1272
  63. package/src/commands/ai/shared.js +147 -147
  64. package/src/commands/ai.js +11 -11
  65. package/src/commands/apply.js +12 -12
  66. package/src/commands/audit.js +1171 -1166
  67. package/src/commands/auth.js +419 -419
  68. package/src/commands/chat.js +191 -191
  69. package/src/commands/config.js +184 -184
  70. package/src/commands/cost.js +311 -311
  71. package/src/commands/daemon/core.js +850 -850
  72. package/src/commands/daemon/extended.js +1048 -1048
  73. package/src/commands/daemon/shared.js +213 -213
  74. package/src/commands/daemon.js +11 -11
  75. package/src/commands/guide.js +174 -174
  76. package/src/commands/ingest.js +58 -58
  77. package/src/commands/init.js +55 -55
  78. package/src/commands/legacy-args.js +10 -10
  79. package/src/commands/mcp.js +461 -461
  80. package/src/commands/omargate.js +29 -29
  81. package/src/commands/persona.js +20 -20
  82. package/src/commands/plugin.js +260 -260
  83. package/src/commands/policy.js +132 -132
  84. package/src/commands/prompt.js +238 -238
  85. package/src/commands/review.js +704 -704
  86. package/src/commands/scan.js +872 -872
  87. package/src/commands/session.js +590 -0
  88. package/src/commands/spec.js +778 -716
  89. package/src/commands/swarm.js +651 -651
  90. package/src/commands/telemetry.js +202 -202
  91. package/src/commands/watch.js +511 -511
  92. package/src/config/agent-dictionary.js +182 -182
  93. package/src/config/io.js +56 -56
  94. package/src/config/paths.js +18 -18
  95. package/src/config/schema.js +55 -55
  96. package/src/config/service.js +184 -184
  97. package/src/cost/budget.js +235 -235
  98. package/src/cost/history.js +188 -188
  99. package/src/cost/tracker.js +171 -171
  100. package/src/daemon/artifact-lineage.js +534 -534
  101. package/src/daemon/assignment-ledger.js +966 -770
  102. package/src/daemon/ast-parser-layer.js +258 -258
  103. package/src/daemon/budget-governor.js +633 -633
  104. package/src/daemon/callgraph-overlay.js +646 -646
  105. package/src/daemon/error-worker.js +1209 -626
  106. package/src/daemon/fix-cycle.js +384 -377
  107. package/src/daemon/hybrid-mapper.js +929 -929
  108. package/src/daemon/ingest-refresh.js +10 -9
  109. package/src/daemon/jira-lifecycle.js +767 -632
  110. package/src/daemon/operator-control.js +657 -657
  111. package/src/daemon/pulse.js +327 -327
  112. package/src/daemon/reliability-lane.js +471 -471
  113. package/src/daemon/scope-engine.js +1068 -0
  114. package/src/daemon/watchdog.js +971 -971
  115. package/src/events/schema.js +190 -0
  116. package/src/guide/generator.js +316 -316
  117. package/src/ingest/engine.js +918 -918
  118. package/src/interactive/index.js +97 -97
  119. package/src/legacy-cli.js +3161 -2994
  120. package/src/mcp/registry.js +695 -695
  121. package/src/memory/blackboard.js +301 -301
  122. package/src/memory/retrieval.js +581 -581
  123. package/src/plugin/manifest.js +553 -553
  124. package/src/policy/packs.js +144 -144
  125. package/src/prompt/generator.js +136 -118
  126. package/src/review/ai-review.js +679 -679
  127. package/src/review/local-review.js +1351 -1305
  128. package/src/review/omargate-interactive.js +68 -68
  129. package/src/review/omargate-orchestrator.js +404 -300
  130. package/src/review/persona-prompts.js +296 -296
  131. package/src/review/replay.js +235 -235
  132. package/src/review/report.js +664 -664
  133. package/src/review/scan-modes.js +48 -42
  134. package/src/review/spec-binding.js +487 -487
  135. package/src/scaffold/generator.js +67 -67
  136. package/src/scaffold/templates.js +150 -150
  137. package/src/scan/generator.js +418 -418
  138. package/src/scan/gh-secrets.js +107 -107
  139. package/src/session/agent-registry.js +352 -0
  140. package/src/session/daemon.js +801 -0
  141. package/src/session/paths.js +33 -0
  142. package/src/session/runtime-bridge.js +739 -0
  143. package/src/session/store.js +388 -0
  144. package/src/session/stream.js +325 -0
  145. package/src/spec/generator.js +619 -519
  146. package/src/spec/regenerate.js +237 -237
  147. package/src/spec/templates.js +91 -91
  148. package/src/swarm/dashboard.js +247 -247
  149. package/src/swarm/factory.js +363 -363
  150. package/src/swarm/pentest.js +934 -934
  151. package/src/swarm/registry.js +419 -419
  152. package/src/swarm/report.js +158 -158
  153. package/src/swarm/runtime.js +576 -576
  154. package/src/swarm/scenario-dsl.js +272 -272
  155. package/src/telemetry/ledger.js +302 -302
  156. package/src/telemetry/session-tracker.js +234 -234
  157. package/src/telemetry/sync.js +203 -203
  158. package/src/ui/command-hints.js +13 -13
  159. package/src/ui/markdown.js +220 -220
@@ -1,450 +1,457 @@
1
- import { randomUUID } from "node:crypto";
2
- import { createMultiProviderApiClient } from "../../ai/client.js";
3
- import { evaluateBudget } from "../../cost/budget.js";
4
- import { dispatchTool, createAgentContext, BudgetExhaustedError } from "./tools/dispatch.js";
5
- import { JULES_DEFINITION } from "./config/definition.js";
6
- import { shouldSpawnSubAgents, runJulesSwarm } from "./swarm/orchestrator.js";
7
- import { frontendAnalyze } from "./tools/frontend-analyze.js";
8
-
9
- /**
10
- * Jules Tanaka — Agentic Loop
11
- *
12
- * Core state machine: LLM → tool_use → execute → result → LLM → repeat
13
- * With sub-agent swarm integration for large codebases.
14
- *
15
- * This loop is self-contained: it uses the existing ai/client.js for LLM calls,
16
- * the existing cost/budget.js for budget enforcement, and the Jules tool
17
- * dispatch for tool execution. No dependency on Batches O-Q.
18
- */
19
-
20
- const DEFAULT_MAX_TURNS = 25;
21
- const HEARTBEAT_INTERVAL_TURNS = 5;
22
-
23
- /**
24
- * Run Jules' agentic audit loop.
25
- *
26
- * @param {object} config
27
- * @param {string} config.systemPrompt - Jules' full system prompt
28
- * @param {object} config.scopeMap - { primary, secondary, tertiary } file lists
29
- * @param {string} config.rootPath - Codebase root
30
- * @param {object} [config.omarBaseline] - Deterministic baseline findings (if available)
31
- * @param {object} [config.blackboard] - Shared blackboard for cross-agent findings
32
- * @param {object} [config.memory] - Memory index for cross-run recall
33
- * @param {object} [config.budget] - Budget overrides
34
- * @param {object} [config.provider] - LLM provider overrides
35
- * @param {string} [config.mode] - "primary" | "secondary" | "tertiary"
36
- * @param {number} [config.maxTurns] - Max loop iterations
37
- * @param {AbortController} [config.abortController]
38
- * @param {function} [config.onEvent] - Streaming event callback
39
- * @returns {AsyncGenerator<JulesEvent>} Yields events as they occur
40
- */
41
- export async function* julesAuditLoop(config) {
42
- const {
43
- systemPrompt,
44
- scopeMap,
45
- rootPath,
46
- omarBaseline,
47
- blackboard,
48
- memory,
49
- provider,
50
- mode = "primary",
51
- maxTurns = DEFAULT_MAX_TURNS,
52
- abortController,
53
- onEvent,
54
- } = config;
55
-
56
- const budget = { ...JULES_DEFINITION.budget, ...config.budget };
57
- const runId = `jules-${Date.now()}-${randomUUID().slice(0, 8)}`;
58
- const startedAt = Date.now();
59
- const client = createMultiProviderApiClient(provider || {});
60
-
61
- const ctx = createAgentContext({
62
- agentIdentity: { id: JULES_DEFINITION.id, persona: JULES_DEFINITION.persona },
63
- budget,
64
- runId,
65
- onEvent,
66
- });
67
-
68
- const emit = (event, payload) => {
69
- const evt = {
70
- stream: "sl_event",
71
- event,
72
- agent: { id: JULES_DEFINITION.id, persona: JULES_DEFINITION.persona, color: JULES_DEFINITION.color, avatar: JULES_DEFINITION.avatar },
73
- payload,
74
- usage: {
75
- costUsd: ctx.usage.costUsd,
76
- outputTokens: ctx.usage.outputTokens,
77
- toolCalls: ctx.usage.toolCalls,
78
- durationMs: Date.now() - startedAt,
79
- },
80
- };
81
- if (onEvent) onEvent(evt);
82
- return evt;
83
- };
84
-
85
- yield emit("agent_start", { mode, runId, maxTurns, budget });
86
-
87
- // ── Phase 0: Prerequisites ────────────────────────────────────────
88
-
89
- yield emit("progress", { phase: "prerequisites", message: "Detecting framework..." });
90
-
91
- let framework = {};
92
- try {
93
- framework = frontendAnalyze({ operation: "detect_framework", path: rootPath });
94
- ctx.usage.toolCalls++;
95
- yield emit("tool_result", { tool: "FrontendAnalyze", operation: "detect_framework", result: { framework: framework.framework, componentCount: framework.componentCount } });
96
- } catch { /* proceed without */ }
97
-
98
- // ── Phase 1: Swarm or direct? ─────────────────────────────────────
99
-
100
- const spawnDecision = shouldSpawnSubAgents(scopeMap);
101
- let swarmFindings = [];
102
-
103
- if (spawnDecision.spawn && blackboard) {
104
- yield emit("progress", { phase: "swarm", message: `Large frontend (${spawnDecision.reason}). Spawning sub-agents...` });
105
-
106
- const swarmResult = await runJulesSwarm({
107
- scopeMap,
108
- rootPath,
109
- blackboard,
110
- budget: { ...budget, maxCostUsd: budget.maxCostUsd * 0.6 }, // 60% for swarm
111
- provider,
112
- parentAbort: abortController,
113
- onEvent,
114
- });
115
-
116
- swarmFindings = swarmResult.agentResults.flatMap(r => r.findings);
117
- ctx.usage.costUsd += swarmResult.usage.totalCostUsd;
118
- ctx.usage.toolCalls += swarmResult.usage.totalToolCalls;
119
-
120
- yield emit("swarm_complete", {
121
- totalFindings: swarmFindings.length,
122
- totalAgents: swarmResult.usage.totalAgents,
123
- totalCostUsd: swarmResult.usage.totalCostUsd,
124
- });
125
- }
126
-
127
- // ── Phase 2: Jules primary deep analysis (agentic LLM loop) ──────
128
-
129
- yield emit("progress", { phase: "deep_analysis", message: "Starting deep analysis..." });
130
-
131
- // Build context for LLM — BLIND-FIRST: no Omar baseline or swarm findings
132
- // in the initial context. Only codebase metadata and memory recall (past runs,
133
- // not current-run findings). Swarm/baseline reconciliation happens AFTER the
134
- // independent deep analysis completes.
135
- const contextParts = [];
136
- contextParts.push(`Framework: ${framework.framework || "unknown"}`);
137
- contextParts.push(`Mode: ${mode}`);
138
- contextParts.push(`Components: ${framework.componentCount || "unknown"}`);
139
- contextParts.push(`Scope: ${(scopeMap.primary || []).length} primary files`);
140
-
141
- if (memory) {
142
- try {
143
- const recalled = memory.query ? memory.query({
144
- files: (scopeMap.primary || []).map(f => f.path || f),
145
- limit: 10,
146
- }) : [];
147
- if (recalled.length > 0) {
148
- contextParts.push(`\nPrevious findings recalled from memory (${recalled.length}):`);
149
- for (const r of recalled) {
150
- contextParts.push(`- ${r.content || r.text || JSON.stringify(r).slice(0, 100)}`);
151
- }
152
- }
153
- } catch { /* memory recall failure is non-blocking */ }
154
- }
155
-
156
- const messages = [
157
- { role: "user", content: contextParts.join("\n") +
158
- "\n\nPerform your deep analysis now. Use FileRead, Grep, Glob, and FrontendAnalyze tools as needed. " +
159
- "Return your findings in a ```json code block as an array of { severity, file, line, title, evidence, rootCause, recommendedFix, trafficLight, reproduction, user_impact, confidence }." },
160
- ];
161
-
162
- const allFindings = [...swarmFindings];
163
- let turnCount = 0;
164
-
165
- while (turnCount < maxTurns) {
166
- if (abortController?.signal.aborted) {
167
- yield emit("agent_abort", { reason: "user_cancelled" });
168
- break;
169
- }
170
-
171
- // Budget check before LLM call
172
- const preCheck = evaluateBudget({
173
- sessionSummary: {
174
- costUsd: ctx.usage.costUsd,
175
- outputTokens: ctx.usage.outputTokens,
176
- durationMs: Date.now() - startedAt,
177
- toolCalls: ctx.usage.toolCalls,
178
- },
179
- ...budget,
180
- });
181
-
182
- if (preCheck.blocking) {
183
- yield emit("budget_stop", { reasons: preCheck.reasons });
184
- break;
185
- }
186
-
187
- if (preCheck.warnings.length > 0) {
188
- yield emit("budget_warning", { warnings: preCheck.warnings });
189
- }
190
-
191
- turnCount++;
192
-
193
- // Heartbeat
194
- if (turnCount % HEARTBEAT_INTERVAL_TURNS === 0) {
195
- yield emit("heartbeat", {
196
- turnsCompleted: turnCount,
197
- turnsMax: maxTurns,
198
- findingsSoFar: allFindings.length,
199
- budgetRemaining: {
200
- costUsd: Math.max(0, budget.maxCostUsd - ctx.usage.costUsd),
201
- pct: Math.max(0, 100 - (ctx.usage.costUsd / budget.maxCostUsd * 100)),
202
- },
203
- });
204
- }
205
-
206
- // Call LLM — format system prompt + messages into a single prompt
207
- // for the MultiProviderApiClient which uses a completions-style API
208
- let response;
209
- try {
210
- response = await client.invoke({
211
- prompt: formatPromptForClient(systemPrompt, messages),
212
- });
213
- } catch (err) {
214
- yield emit("llm_error", { error: err.message, turn: turnCount });
215
- break;
216
- }
217
-
218
- const responseText = response.text || "";
219
- ctx.usage.outputTokens += Math.ceil(responseText.length / 4);
220
- ctx.usage.costUsd += (Math.ceil(responseText.length / 4) / 1_000_000) * 15;
221
-
222
- yield emit("reasoning", {
223
- phase: "deep_analysis",
224
- turn: turnCount,
225
- summary: responseText.slice(0, 200),
226
- });
227
-
228
- // Parse tool_use blocks
229
- const toolCalls = parseToolUseBlocks(responseText);
230
-
231
- if (toolCalls.length === 0) {
232
- // No tools — extract findings from response
233
- const parsed = extractJsonFindings(responseText);
234
- for (const finding of parsed) {
235
- allFindings.push(finding);
236
- yield emit("finding", { ...finding });
237
- if (blackboard) {
238
- try {
239
- await blackboard.appendEntry({
240
- agentId: JULES_DEFINITION.id,
241
- source: "jules-primary",
242
- ...finding,
243
- });
244
- } catch { /* blackboard write failure non-blocking */ }
245
- }
246
- }
247
- messages.push({ role: "assistant", content: responseText });
248
- break; // LLM is done
249
- }
250
-
251
- // Execute tool calls
252
- const results = [];
253
- for (const call of toolCalls) {
254
- try {
255
- const result = await dispatchTool(call.tool, call.input, ctx);
256
- results.push({ tool: call.tool, result });
257
- yield emit("tool_call", { tool: call.tool, input: sanitizeForEvent(call.input) });
258
- } catch (err) {
259
- if (err instanceof BudgetExhaustedError) {
260
- yield emit("budget_stop", { reason: err.message });
261
- break;
262
- }
263
- results.push({ tool: call.tool, error: err.message });
264
- }
265
- }
266
-
267
- // Feed results back
268
- messages.push({ role: "assistant", content: responseText });
269
- messages.push({
270
- role: "user",
271
- content: results.map(r =>
272
- r.error
273
- ? `Tool ${r.tool} failed: ${r.error}`
274
- : `Tool ${r.tool} result:\n${JSON.stringify(r.result).slice(0, 3000)}`,
275
- ).join("\n\n") + "\n\nContinue your analysis. If done, return findings in a ```json code block.",
276
- });
277
- }
278
-
279
- // ── Phase 2b: Reconciliation (post-blind-pass) ─────────────────────
280
- // Now that the independent analysis is complete, cross-reference with
281
- // swarm findings and Omar baseline. This preserves blind-first: the
282
- // persona formed its own opinion before seeing prior conclusions.
283
-
284
- const hasSwarmContext = swarmFindings.length > 0;
285
- const baselineFindings = omarBaseline
286
- ? (omarBaseline.findings || omarBaseline.summary || [])
287
- : [];
288
- const hasBaselineContext = Array.isArray(baselineFindings) && baselineFindings.length > 0;
289
-
290
- if (hasSwarmContext || hasBaselineContext) {
291
- yield emit("progress", { phase: "reconciliation", message: "Cross-referencing with sub-agent and baseline findings..." });
292
-
293
- const reconcileParts = [];
294
- reconcileParts.push("Your independent analysis is complete. Now cross-reference with the following prior findings.");
295
- reconcileParts.push("For each prior finding: confirm if your analysis agrees, dispute with evidence if you disagree, or flag as missed if you did not cover it.");
296
-
297
- if (hasSwarmContext) {
298
- reconcileParts.push(`\nYour sub-agents found ${swarmFindings.length} findings:`);
299
- for (const f of swarmFindings.slice(0, 30)) {
300
- reconcileParts.push(`- [${f.severity || "P3"}] ${f.file || ""}:${f.line || ""} ${f.title || f.type || ""}`);
301
- }
302
- }
303
-
304
- if (hasBaselineContext) {
305
- reconcileParts.push(`\nOmar baseline reported ${baselineFindings.length} findings:`);
306
- for (const f of baselineFindings.slice(0, 20)) {
307
- reconcileParts.push(`- [${f.severity || ""}] ${f.file || ""}:${f.line || ""} ${f.message || f.title || ""}`);
308
- }
309
- }
310
-
311
- reconcileParts.push("\nReturn any additional or revised findings as a JSON array in a ```json code block. If no changes, return an empty array [].");
312
-
313
- messages.push({ role: "user", content: reconcileParts.join("\n") });
314
-
315
- // Budget check before reconciliation turn
316
- const reconcilePreCheck = evaluateBudget({
317
- sessionSummary: {
318
- costUsd: ctx.usage.costUsd,
319
- outputTokens: ctx.usage.outputTokens,
320
- durationMs: Date.now() - startedAt,
321
- toolCalls: ctx.usage.toolCalls,
322
- },
323
- ...budget,
324
- });
325
-
326
- if (!reconcilePreCheck.blocking) {
327
- try {
328
- const reconcileResponse = await client.invoke({
329
- prompt: formatPromptForClient(systemPrompt, messages),
330
- });
331
-
332
- const reconcileText = reconcileResponse.text || "";
333
- ctx.usage.outputTokens += Math.ceil(reconcileText.length / 4);
334
- ctx.usage.costUsd += (Math.ceil(reconcileText.length / 4) / 1_000_000) * 15;
335
-
336
- yield emit("reasoning", { phase: "reconciliation", summary: reconcileText.slice(0, 200) });
337
-
338
- const reconcileFindings = extractJsonFindings(reconcileText);
339
- for (const finding of reconcileFindings) {
340
- allFindings.push(finding);
341
- yield emit("finding", { ...finding, source: "reconciliation" });
342
- if (blackboard) {
343
- try {
344
- await blackboard.appendEntry({
345
- agentId: JULES_DEFINITION.id,
346
- source: "jules-reconciliation",
347
- ...finding,
348
- });
349
- } catch { /* blackboard write failure non-blocking */ }
350
- }
351
- }
352
-
353
- messages.push({ role: "assistant", content: reconcileText });
354
- } catch (err) {
355
- yield emit("llm_error", { error: err.message, phase: "reconciliation" });
356
- }
357
- } else {
358
- yield emit("budget_stop", { reasons: reconcilePreCheck.reasons, phase: "reconciliation" });
359
- }
360
- }
361
-
362
- // ── Phase 3: Build final report ───────────────────────────────────
363
-
364
- const durationMs = Date.now() - startedAt;
365
- const severityCounts = { P0: 0, P1: 0, P2: 0, P3: 0 };
366
- for (const f of allFindings) {
367
- const sev = (f.severity || "P3").toUpperCase();
368
- if (severityCounts[sev] !== undefined) severityCounts[sev]++;
369
- else severityCounts.P3++;
370
- }
371
-
372
- const report = {
373
- runId,
374
- persona: JULES_DEFINITION.persona,
375
- mode,
376
- framework: framework.framework || "unknown",
377
- status: "completed",
378
- findings: allFindings,
379
- summary: {
380
- total: allFindings.length,
381
- ...severityCounts,
382
- blocking: severityCounts.P0 > 0 || severityCounts.P1 > 0,
383
- },
384
- usage: {
385
- turns: turnCount,
386
- costUsd: ctx.usage.costUsd,
387
- outputTokens: ctx.usage.outputTokens,
388
- toolCalls: ctx.usage.toolCalls,
389
- durationMs,
390
- },
391
- signature: JULES_DEFINITION.signature,
392
- };
393
-
394
- yield emit("agent_complete", {
395
- ...report.summary,
396
- costUsd: ctx.usage.costUsd,
397
- durationMs,
398
- turns: turnCount,
399
- });
400
-
401
- return report;
402
- }
403
-
404
- // ── Helpers ──────────────────────────────────────────────────────────
405
-
406
- function parseToolUseBlocks(text) {
407
- const calls = [];
408
- const regex = /```tool_use\s*\n([\s\S]*?)```/g;
409
- let match;
410
- while ((match = regex.exec(text)) !== null) {
411
- try {
412
- const parsed = JSON.parse(match[1].trim());
413
- if (parsed.tool && parsed.input) calls.push(parsed);
414
- } catch { /* skip malformed */ }
415
- }
416
- return calls;
417
- }
418
-
419
- function extractJsonFindings(text) {
420
- const jsonMatch = text.match(/```json\s*\n([\s\S]*?)```/);
421
- if (!jsonMatch) return [];
422
- try {
423
- const parsed = JSON.parse(jsonMatch[1].trim());
424
- if (Array.isArray(parsed)) return parsed;
425
- if (parsed.findings && Array.isArray(parsed.findings)) return parsed.findings;
426
- } catch { /* skip malformed */ }
427
- return [];
428
- }
429
-
430
- function sanitizeForEvent(input) {
431
- const sanitized = { ...input };
432
- if (typeof sanitized.content === "string" && sanitized.content.length > 200) {
433
- sanitized.content = `[${sanitized.content.length} chars]`;
434
- }
435
- return sanitized;
436
- }
437
-
438
- /**
439
- * Format system prompt + chat messages into a single prompt string
440
- * for MultiProviderApiClient which uses a completions-style API.
441
- */
442
- function formatPromptForClient(systemPrompt, messages) {
443
- const parts = [];
444
- if (systemPrompt) parts.push(systemPrompt);
445
- for (const msg of messages) {
446
- const role = msg.role === "assistant" ? "ASSISTANT" : "USER";
447
- parts.push(`\n${role}:\n${msg.content}`);
448
- }
449
- return parts.join("\n");
450
- }
1
+ import { randomUUID } from "node:crypto";
2
+ import { createMultiProviderApiClient } from "../../ai/client.js";
3
+ import { evaluateBudget } from "../../cost/budget.js";
4
+ import { dispatchTool, createAgentContext, BudgetExhaustedError } from "./tools/dispatch.js";
5
+ import { JULES_DEFINITION } from "./config/definition.js";
6
+ import { shouldSpawnSubAgents, runJulesSwarm } from "./swarm/orchestrator.js";
7
+ import { frontendAnalyze } from "./tools/frontend-analyze.js";
8
+ import { createAgentEvent } from "../../events/schema.js";
9
+
10
+ /**
11
+ * Jules Tanaka — Agentic Loop
12
+ *
13
+ * Core state machine: LLM tool_use → execute → result → LLM → repeat
14
+ * With sub-agent swarm integration for large codebases.
15
+ *
16
+ * This loop is self-contained: it uses the existing ai/client.js for LLM calls,
17
+ * the existing cost/budget.js for budget enforcement, and the Jules tool
18
+ * dispatch for tool execution. No dependency on Batches O-Q.
19
+ */
20
+
21
+ const DEFAULT_MAX_TURNS = 25;
22
+ const HEARTBEAT_INTERVAL_TURNS = 5;
23
+
24
+ /**
25
+ * Run Jules' agentic audit loop.
26
+ *
27
+ * @param {object} config
28
+ * @param {string} config.systemPrompt - Jules' full system prompt
29
+ * @param {object} config.scopeMap - { primary, secondary, tertiary } file lists
30
+ * @param {string} config.rootPath - Codebase root
31
+ * @param {object} [config.omarBaseline] - Deterministic baseline findings (if available)
32
+ * @param {object} [config.blackboard] - Shared blackboard for cross-agent findings
33
+ * @param {object} [config.memory] - Memory index for cross-run recall
34
+ * @param {object} [config.budget] - Budget overrides
35
+ * @param {object} [config.provider] - LLM provider overrides
36
+ * @param {string} [config.mode] - "primary" | "secondary" | "tertiary"
37
+ * @param {number} [config.maxTurns] - Max loop iterations
38
+ * @param {AbortController} [config.abortController]
39
+ * @param {function} [config.onEvent] - Streaming event callback
40
+ * @returns {AsyncGenerator<JulesEvent>} Yields events as they occur
41
+ */
42
+ export async function* julesAuditLoop(config) {
43
+ const {
44
+ systemPrompt,
45
+ scopeMap,
46
+ rootPath,
47
+ omarBaseline,
48
+ blackboard,
49
+ memory,
50
+ provider,
51
+ mode = "primary",
52
+ maxTurns = DEFAULT_MAX_TURNS,
53
+ abortController,
54
+ onEvent,
55
+ } = config;
56
+
57
+ const budget = { ...JULES_DEFINITION.budget, ...config.budget };
58
+ const runId = `jules-${Date.now()}-${randomUUID().slice(0, 8)}`;
59
+ const startedAt = Date.now();
60
+ const client = createMultiProviderApiClient(provider || {});
61
+
62
+ const ctx = createAgentContext({
63
+ agentIdentity: { id: JULES_DEFINITION.id, persona: JULES_DEFINITION.persona },
64
+ budget,
65
+ runId,
66
+ onEvent,
67
+ });
68
+
69
+ const emit = (event, payload) => {
70
+ const evt = createAgentEvent({
71
+ event,
72
+ agent: {
73
+ id: JULES_DEFINITION.id,
74
+ persona: JULES_DEFINITION.persona,
75
+ color: JULES_DEFINITION.color,
76
+ avatar: JULES_DEFINITION.avatar,
77
+ },
78
+ payload,
79
+ usage: {
80
+ costUsd: ctx.usage.costUsd,
81
+ outputTokens: ctx.usage.outputTokens,
82
+ toolCalls: ctx.usage.toolCalls,
83
+ durationMs: Date.now() - startedAt,
84
+ },
85
+ runId,
86
+ sessionId: ctx.sessionId,
87
+ });
88
+ if (onEvent) onEvent(evt);
89
+ return evt;
90
+ };
91
+
92
+ yield emit("agent_start", { mode, runId, maxTurns, budget });
93
+
94
+ // ── Phase 0: Prerequisites ────────────────────────────────────────
95
+
96
+ yield emit("progress", { phase: "prerequisites", message: "Detecting framework..." });
97
+
98
+ let framework = {};
99
+ try {
100
+ framework = frontendAnalyze({ operation: "detect_framework", path: rootPath });
101
+ ctx.usage.toolCalls++;
102
+ yield emit("tool_result", { tool: "FrontendAnalyze", operation: "detect_framework", result: { framework: framework.framework, componentCount: framework.componentCount } });
103
+ } catch { /* proceed without */ }
104
+
105
+ // ── Phase 1: Swarm or direct? ─────────────────────────────────────
106
+
107
+ const spawnDecision = shouldSpawnSubAgents(scopeMap);
108
+ let swarmFindings = [];
109
+
110
+ if (spawnDecision.spawn && blackboard) {
111
+ yield emit("progress", { phase: "swarm", message: `Large frontend (${spawnDecision.reason}). Spawning sub-agents...` });
112
+
113
+ const swarmResult = await runJulesSwarm({
114
+ scopeMap,
115
+ rootPath,
116
+ blackboard,
117
+ budget: { ...budget, maxCostUsd: budget.maxCostUsd * 0.6 }, // 60% for swarm
118
+ provider,
119
+ parentAbort: abortController,
120
+ onEvent,
121
+ });
122
+
123
+ swarmFindings = swarmResult.agentResults.flatMap(r => r.findings);
124
+ ctx.usage.costUsd += swarmResult.usage.totalCostUsd;
125
+ ctx.usage.toolCalls += swarmResult.usage.totalToolCalls;
126
+
127
+ yield emit("swarm_complete", {
128
+ totalFindings: swarmFindings.length,
129
+ totalAgents: swarmResult.usage.totalAgents,
130
+ totalCostUsd: swarmResult.usage.totalCostUsd,
131
+ });
132
+ }
133
+
134
+ // ── Phase 2: Jules primary deep analysis (agentic LLM loop) ──────
135
+
136
+ yield emit("progress", { phase: "deep_analysis", message: "Starting deep analysis..." });
137
+
138
+ // Build context for LLM — BLIND-FIRST: no Omar baseline or swarm findings
139
+ // in the initial context. Only codebase metadata and memory recall (past runs,
140
+ // not current-run findings). Swarm/baseline reconciliation happens AFTER the
141
+ // independent deep analysis completes.
142
+ const contextParts = [];
143
+ contextParts.push(`Framework: ${framework.framework || "unknown"}`);
144
+ contextParts.push(`Mode: ${mode}`);
145
+ contextParts.push(`Components: ${framework.componentCount || "unknown"}`);
146
+ contextParts.push(`Scope: ${(scopeMap.primary || []).length} primary files`);
147
+
148
+ if (memory) {
149
+ try {
150
+ const recalled = memory.query ? memory.query({
151
+ files: (scopeMap.primary || []).map(f => f.path || f),
152
+ limit: 10,
153
+ }) : [];
154
+ if (recalled.length > 0) {
155
+ contextParts.push(`\nPrevious findings recalled from memory (${recalled.length}):`);
156
+ for (const r of recalled) {
157
+ contextParts.push(`- ${r.content || r.text || JSON.stringify(r).slice(0, 100)}`);
158
+ }
159
+ }
160
+ } catch { /* memory recall failure is non-blocking */ }
161
+ }
162
+
163
+ const messages = [
164
+ { role: "user", content: contextParts.join("\n") +
165
+ "\n\nPerform your deep analysis now. Use FileRead, Grep, Glob, and FrontendAnalyze tools as needed. " +
166
+ "Return your findings in a ```json code block as an array of { severity, file, line, title, evidence, rootCause, recommendedFix, trafficLight, reproduction, user_impact, confidence }." },
167
+ ];
168
+
169
+ const allFindings = [...swarmFindings];
170
+ let turnCount = 0;
171
+
172
+ while (turnCount < maxTurns) {
173
+ if (abortController?.signal.aborted) {
174
+ yield emit("agent_abort", { reason: "user_cancelled" });
175
+ break;
176
+ }
177
+
178
+ // Budget check before LLM call
179
+ const preCheck = evaluateBudget({
180
+ sessionSummary: {
181
+ costUsd: ctx.usage.costUsd,
182
+ outputTokens: ctx.usage.outputTokens,
183
+ durationMs: Date.now() - startedAt,
184
+ toolCalls: ctx.usage.toolCalls,
185
+ },
186
+ ...budget,
187
+ });
188
+
189
+ if (preCheck.blocking) {
190
+ yield emit("budget_stop", { reasons: preCheck.reasons });
191
+ break;
192
+ }
193
+
194
+ if (preCheck.warnings.length > 0) {
195
+ yield emit("budget_warning", { warnings: preCheck.warnings });
196
+ }
197
+
198
+ turnCount++;
199
+
200
+ // Heartbeat
201
+ if (turnCount % HEARTBEAT_INTERVAL_TURNS === 0) {
202
+ yield emit("heartbeat", {
203
+ turnsCompleted: turnCount,
204
+ turnsMax: maxTurns,
205
+ findingsSoFar: allFindings.length,
206
+ budgetRemaining: {
207
+ costUsd: Math.max(0, budget.maxCostUsd - ctx.usage.costUsd),
208
+ pct: Math.max(0, 100 - (ctx.usage.costUsd / budget.maxCostUsd * 100)),
209
+ },
210
+ });
211
+ }
212
+
213
+ // Call LLM — format system prompt + messages into a single prompt
214
+ // for the MultiProviderApiClient which uses a completions-style API
215
+ let response;
216
+ try {
217
+ response = await client.invoke({
218
+ prompt: formatPromptForClient(systemPrompt, messages),
219
+ });
220
+ } catch (err) {
221
+ yield emit("llm_error", { error: err.message, turn: turnCount });
222
+ break;
223
+ }
224
+
225
+ const responseText = response.text || "";
226
+ ctx.usage.outputTokens += Math.ceil(responseText.length / 4);
227
+ ctx.usage.costUsd += (Math.ceil(responseText.length / 4) / 1_000_000) * 15;
228
+
229
+ yield emit("reasoning", {
230
+ phase: "deep_analysis",
231
+ turn: turnCount,
232
+ summary: responseText.slice(0, 200),
233
+ });
234
+
235
+ // Parse tool_use blocks
236
+ const toolCalls = parseToolUseBlocks(responseText);
237
+
238
+ if (toolCalls.length === 0) {
239
+ // No tools — extract findings from response
240
+ const parsed = extractJsonFindings(responseText);
241
+ for (const finding of parsed) {
242
+ allFindings.push(finding);
243
+ yield emit("finding", { ...finding });
244
+ if (blackboard) {
245
+ try {
246
+ await blackboard.appendEntry({
247
+ agentId: JULES_DEFINITION.id,
248
+ source: "jules-primary",
249
+ ...finding,
250
+ });
251
+ } catch { /* blackboard write failure non-blocking */ }
252
+ }
253
+ }
254
+ messages.push({ role: "assistant", content: responseText });
255
+ break; // LLM is done
256
+ }
257
+
258
+ // Execute tool calls
259
+ const results = [];
260
+ for (const call of toolCalls) {
261
+ try {
262
+ const result = await dispatchTool(call.tool, call.input, ctx);
263
+ results.push({ tool: call.tool, result });
264
+ yield emit("tool_call", { tool: call.tool, input: sanitizeForEvent(call.input) });
265
+ } catch (err) {
266
+ if (err instanceof BudgetExhaustedError) {
267
+ yield emit("budget_stop", { reason: err.message });
268
+ break;
269
+ }
270
+ results.push({ tool: call.tool, error: err.message });
271
+ }
272
+ }
273
+
274
+ // Feed results back
275
+ messages.push({ role: "assistant", content: responseText });
276
+ messages.push({
277
+ role: "user",
278
+ content: results.map(r =>
279
+ r.error
280
+ ? `Tool ${r.tool} failed: ${r.error}`
281
+ : `Tool ${r.tool} result:\n${JSON.stringify(r.result).slice(0, 3000)}`,
282
+ ).join("\n\n") + "\n\nContinue your analysis. If done, return findings in a ```json code block.",
283
+ });
284
+ }
285
+
286
+ // ── Phase 2b: Reconciliation (post-blind-pass) ─────────────────────
287
+ // Now that the independent analysis is complete, cross-reference with
288
+ // swarm findings and Omar baseline. This preserves blind-first: the
289
+ // persona formed its own opinion before seeing prior conclusions.
290
+
291
+ const hasSwarmContext = swarmFindings.length > 0;
292
+ const baselineFindings = omarBaseline
293
+ ? (omarBaseline.findings || omarBaseline.summary || [])
294
+ : [];
295
+ const hasBaselineContext = Array.isArray(baselineFindings) && baselineFindings.length > 0;
296
+
297
+ if (hasSwarmContext || hasBaselineContext) {
298
+ yield emit("progress", { phase: "reconciliation", message: "Cross-referencing with sub-agent and baseline findings..." });
299
+
300
+ const reconcileParts = [];
301
+ reconcileParts.push("Your independent analysis is complete. Now cross-reference with the following prior findings.");
302
+ reconcileParts.push("For each prior finding: confirm if your analysis agrees, dispute with evidence if you disagree, or flag as missed if you did not cover it.");
303
+
304
+ if (hasSwarmContext) {
305
+ reconcileParts.push(`\nYour sub-agents found ${swarmFindings.length} findings:`);
306
+ for (const f of swarmFindings.slice(0, 30)) {
307
+ reconcileParts.push(`- [${f.severity || "P3"}] ${f.file || ""}:${f.line || ""} ${f.title || f.type || ""}`);
308
+ }
309
+ }
310
+
311
+ if (hasBaselineContext) {
312
+ reconcileParts.push(`\nOmar baseline reported ${baselineFindings.length} findings:`);
313
+ for (const f of baselineFindings.slice(0, 20)) {
314
+ reconcileParts.push(`- [${f.severity || ""}] ${f.file || ""}:${f.line || ""} ${f.message || f.title || ""}`);
315
+ }
316
+ }
317
+
318
+ reconcileParts.push("\nReturn any additional or revised findings as a JSON array in a ```json code block. If no changes, return an empty array [].");
319
+
320
+ messages.push({ role: "user", content: reconcileParts.join("\n") });
321
+
322
+ // Budget check before reconciliation turn
323
+ const reconcilePreCheck = evaluateBudget({
324
+ sessionSummary: {
325
+ costUsd: ctx.usage.costUsd,
326
+ outputTokens: ctx.usage.outputTokens,
327
+ durationMs: Date.now() - startedAt,
328
+ toolCalls: ctx.usage.toolCalls,
329
+ },
330
+ ...budget,
331
+ });
332
+
333
+ if (!reconcilePreCheck.blocking) {
334
+ try {
335
+ const reconcileResponse = await client.invoke({
336
+ prompt: formatPromptForClient(systemPrompt, messages),
337
+ });
338
+
339
+ const reconcileText = reconcileResponse.text || "";
340
+ ctx.usage.outputTokens += Math.ceil(reconcileText.length / 4);
341
+ ctx.usage.costUsd += (Math.ceil(reconcileText.length / 4) / 1_000_000) * 15;
342
+
343
+ yield emit("reasoning", { phase: "reconciliation", summary: reconcileText.slice(0, 200) });
344
+
345
+ const reconcileFindings = extractJsonFindings(reconcileText);
346
+ for (const finding of reconcileFindings) {
347
+ allFindings.push(finding);
348
+ yield emit("finding", { ...finding, source: "reconciliation" });
349
+ if (blackboard) {
350
+ try {
351
+ await blackboard.appendEntry({
352
+ agentId: JULES_DEFINITION.id,
353
+ source: "jules-reconciliation",
354
+ ...finding,
355
+ });
356
+ } catch { /* blackboard write failure non-blocking */ }
357
+ }
358
+ }
359
+
360
+ messages.push({ role: "assistant", content: reconcileText });
361
+ } catch (err) {
362
+ yield emit("llm_error", { error: err.message, phase: "reconciliation" });
363
+ }
364
+ } else {
365
+ yield emit("budget_stop", { reasons: reconcilePreCheck.reasons, phase: "reconciliation" });
366
+ }
367
+ }
368
+
369
+ // ── Phase 3: Build final report ───────────────────────────────────
370
+
371
+ const durationMs = Date.now() - startedAt;
372
+ const severityCounts = { P0: 0, P1: 0, P2: 0, P3: 0 };
373
+ for (const f of allFindings) {
374
+ const sev = (f.severity || "P3").toUpperCase();
375
+ if (severityCounts[sev] !== undefined) severityCounts[sev]++;
376
+ else severityCounts.P3++;
377
+ }
378
+
379
+ const report = {
380
+ runId,
381
+ persona: JULES_DEFINITION.persona,
382
+ mode,
383
+ framework: framework.framework || "unknown",
384
+ status: "completed",
385
+ findings: allFindings,
386
+ summary: {
387
+ total: allFindings.length,
388
+ ...severityCounts,
389
+ blocking: severityCounts.P0 > 0 || severityCounts.P1 > 0,
390
+ },
391
+ usage: {
392
+ turns: turnCount,
393
+ costUsd: ctx.usage.costUsd,
394
+ outputTokens: ctx.usage.outputTokens,
395
+ toolCalls: ctx.usage.toolCalls,
396
+ durationMs,
397
+ },
398
+ signature: JULES_DEFINITION.signature,
399
+ };
400
+
401
+ yield emit("agent_complete", {
402
+ ...report.summary,
403
+ costUsd: ctx.usage.costUsd,
404
+ durationMs,
405
+ turns: turnCount,
406
+ });
407
+
408
+ return report;
409
+ }
410
+
411
+ // ── Helpers ──────────────────────────────────────────────────────────
412
+
413
+ function parseToolUseBlocks(text) {
414
+ const calls = [];
415
+ const regex = /```tool_use\s*\n([\s\S]*?)```/g;
416
+ let match;
417
+ while ((match = regex.exec(text)) !== null) {
418
+ try {
419
+ const parsed = JSON.parse(match[1].trim());
420
+ if (parsed.tool && parsed.input) calls.push(parsed);
421
+ } catch { /* skip malformed */ }
422
+ }
423
+ return calls;
424
+ }
425
+
426
+ function extractJsonFindings(text) {
427
+ const jsonMatch = text.match(/```json\s*\n([\s\S]*?)```/);
428
+ if (!jsonMatch) return [];
429
+ try {
430
+ const parsed = JSON.parse(jsonMatch[1].trim());
431
+ if (Array.isArray(parsed)) return parsed;
432
+ if (parsed.findings && Array.isArray(parsed.findings)) return parsed.findings;
433
+ } catch { /* skip malformed */ }
434
+ return [];
435
+ }
436
+
437
+ function sanitizeForEvent(input) {
438
+ const sanitized = { ...input };
439
+ if (typeof sanitized.content === "string" && sanitized.content.length > 200) {
440
+ sanitized.content = `[${sanitized.content.length} chars]`;
441
+ }
442
+ return sanitized;
443
+ }
444
+
445
+ /**
446
+ * Format system prompt + chat messages into a single prompt string
447
+ * for MultiProviderApiClient which uses a completions-style API.
448
+ */
449
+ function formatPromptForClient(systemPrompt, messages) {
450
+ const parts = [];
451
+ if (systemPrompt) parts.push(systemPrompt);
452
+ for (const msg of messages) {
453
+ const role = msg.role === "assistant" ? "ASSISTANT" : "USER";
454
+ parts.push(`\n${role}:\n${msg.content}`);
455
+ }
456
+ return parts.join("\n");
457
+ }