htmlgraph 0.23.4__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- htmlgraph/__init__.py +5 -1
- htmlgraph/cigs/__init__.py +77 -0
- htmlgraph/cigs/autonomy.py +385 -0
- htmlgraph/cigs/cost.py +475 -0
- htmlgraph/cigs/messages_basic.py +472 -0
- htmlgraph/cigs/messaging.py +365 -0
- htmlgraph/cigs/models.py +771 -0
- htmlgraph/cigs/pattern_storage.py +427 -0
- htmlgraph/cigs/patterns.py +503 -0
- htmlgraph/cigs/posttool_analyzer.py +234 -0
- htmlgraph/cigs/tracker.py +317 -0
- htmlgraph/cli.py +325 -11
- htmlgraph/hooks/cigs_pretool_enforcer.py +350 -0
- htmlgraph/hooks/posttooluse.py +50 -2
- htmlgraph/hooks/task_enforcer.py +60 -4
- htmlgraph/models.py +14 -1
- htmlgraph/orchestration/headless_spawner.py +525 -35
- htmlgraph/orchestrator-system-prompt-optimized.txt +259 -53
- htmlgraph/reflection.py +442 -0
- htmlgraph/sdk.py +26 -9
- {htmlgraph-0.23.4.dist-info → htmlgraph-0.24.0.dist-info}/METADATA +2 -1
- {htmlgraph-0.23.4.dist-info → htmlgraph-0.24.0.dist-info}/RECORD +29 -17
- {htmlgraph-0.23.4.data → htmlgraph-0.24.0.data}/data/htmlgraph/dashboard.html +0 -0
- {htmlgraph-0.23.4.data → htmlgraph-0.24.0.data}/data/htmlgraph/styles.css +0 -0
- {htmlgraph-0.23.4.data → htmlgraph-0.24.0.data}/data/htmlgraph/templates/AGENTS.md.template +0 -0
- {htmlgraph-0.23.4.data → htmlgraph-0.24.0.data}/data/htmlgraph/templates/CLAUDE.md.template +0 -0
- {htmlgraph-0.23.4.data → htmlgraph-0.24.0.data}/data/htmlgraph/templates/GEMINI.md.template +0 -0
- {htmlgraph-0.23.4.dist-info → htmlgraph-0.24.0.dist-info}/WHEEL +0 -0
- {htmlgraph-0.23.4.dist-info → htmlgraph-0.24.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
"""Headless AI spawner for multi-AI orchestration."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import os
|
|
4
5
|
import subprocess
|
|
5
6
|
from dataclasses import dataclass
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from htmlgraph.sdk import SDK
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
@dataclass
|
|
@@ -14,6 +19,7 @@ class AIResult:
|
|
|
14
19
|
tokens_used: int | None
|
|
15
20
|
error: str | None
|
|
16
21
|
raw_output: dict | list | str | None
|
|
22
|
+
tracked_events: list[dict] | None = None # Events tracked in HtmlGraph
|
|
17
23
|
|
|
18
24
|
|
|
19
25
|
class HeadlessSpawner:
|
|
@@ -59,13 +65,316 @@ class HeadlessSpawner:
|
|
|
59
65
|
"""Initialize spawner."""
|
|
60
66
|
pass
|
|
61
67
|
|
|
68
|
+
def _get_sdk(self) -> "SDK | None":
|
|
69
|
+
"""
|
|
70
|
+
Get SDK instance for HtmlGraph tracking with parent session support.
|
|
71
|
+
|
|
72
|
+
Returns None if SDK unavailable.
|
|
73
|
+
"""
|
|
74
|
+
try:
|
|
75
|
+
from htmlgraph.sdk import SDK
|
|
76
|
+
|
|
77
|
+
# Read parent session context from environment
|
|
78
|
+
parent_session = os.getenv("HTMLGRAPH_PARENT_SESSION")
|
|
79
|
+
parent_agent = os.getenv("HTMLGRAPH_PARENT_AGENT")
|
|
80
|
+
|
|
81
|
+
# Create SDK with parent session context
|
|
82
|
+
sdk = SDK(
|
|
83
|
+
agent=f"spawner-{parent_agent}" if parent_agent else "spawner",
|
|
84
|
+
parent_session=parent_session, # Pass parent session
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return sdk
|
|
88
|
+
|
|
89
|
+
except Exception:
|
|
90
|
+
# SDK unavailable or not properly initialized (optional dependency)
|
|
91
|
+
# This happens in test contexts without active sessions
|
|
92
|
+
# Don't log error to avoid noise in tests
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def _parse_and_track_gemini_events(
|
|
96
|
+
self, jsonl_output: str, sdk: "SDK"
|
|
97
|
+
) -> list[dict]:
|
|
98
|
+
"""
|
|
99
|
+
Parse Gemini stream-json events and track in HtmlGraph.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
jsonl_output: JSONL output from Gemini CLI
|
|
103
|
+
sdk: HtmlGraph SDK instance for tracking
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Parsed events list
|
|
107
|
+
"""
|
|
108
|
+
events = []
|
|
109
|
+
|
|
110
|
+
# Get parent context for metadata
|
|
111
|
+
parent_activity = os.getenv("HTMLGRAPH_PARENT_ACTIVITY")
|
|
112
|
+
nesting_depth_str = os.getenv("HTMLGRAPH_NESTING_DEPTH", "0")
|
|
113
|
+
nesting_depth = int(nesting_depth_str) if nesting_depth_str.isdigit() else 0
|
|
114
|
+
|
|
115
|
+
for line in jsonl_output.splitlines():
|
|
116
|
+
if not line.strip():
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
event = json.loads(line)
|
|
121
|
+
events.append(event)
|
|
122
|
+
|
|
123
|
+
# Track based on event type
|
|
124
|
+
event_type = event.get("type")
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
if event_type == "tool_use":
|
|
128
|
+
tool_name = event.get("tool_name", "unknown_tool")
|
|
129
|
+
parameters = event.get("parameters", {})
|
|
130
|
+
payload = {
|
|
131
|
+
"tool_name": tool_name,
|
|
132
|
+
"parameters": parameters,
|
|
133
|
+
}
|
|
134
|
+
if parent_activity:
|
|
135
|
+
payload["parent_activity"] = parent_activity
|
|
136
|
+
if nesting_depth > 0:
|
|
137
|
+
payload["nesting_depth"] = nesting_depth
|
|
138
|
+
sdk.track_activity(
|
|
139
|
+
tool="gemini_tool_call",
|
|
140
|
+
summary=f"Gemini called {tool_name}",
|
|
141
|
+
payload=payload,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
elif event_type == "tool_result":
|
|
145
|
+
status = event.get("status", "unknown")
|
|
146
|
+
success = status == "success"
|
|
147
|
+
tool_id = event.get("tool_id", "unknown")
|
|
148
|
+
payload = {"tool_id": tool_id, "status": status}
|
|
149
|
+
if parent_activity:
|
|
150
|
+
payload["parent_activity"] = parent_activity
|
|
151
|
+
if nesting_depth > 0:
|
|
152
|
+
payload["nesting_depth"] = nesting_depth
|
|
153
|
+
sdk.track_activity(
|
|
154
|
+
tool="gemini_tool_result",
|
|
155
|
+
summary=f"Gemini tool result: {status}",
|
|
156
|
+
success=success,
|
|
157
|
+
payload=payload,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
elif event_type == "message":
|
|
161
|
+
role = event.get("role")
|
|
162
|
+
if role == "assistant":
|
|
163
|
+
content = event.get("content", "")
|
|
164
|
+
# Truncate for summary
|
|
165
|
+
summary = (
|
|
166
|
+
content[:100] + "..." if len(content) > 100 else content
|
|
167
|
+
)
|
|
168
|
+
payload = {"role": role, "content_length": len(content)}
|
|
169
|
+
if parent_activity:
|
|
170
|
+
payload["parent_activity"] = parent_activity
|
|
171
|
+
if nesting_depth > 0:
|
|
172
|
+
payload["nesting_depth"] = nesting_depth
|
|
173
|
+
sdk.track_activity(
|
|
174
|
+
tool="gemini_message",
|
|
175
|
+
summary=f"Gemini: {summary}",
|
|
176
|
+
payload=payload,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
elif event_type == "result":
|
|
180
|
+
stats = event.get("stats", {})
|
|
181
|
+
payload = {"stats": stats}
|
|
182
|
+
if parent_activity:
|
|
183
|
+
payload["parent_activity"] = parent_activity
|
|
184
|
+
if nesting_depth > 0:
|
|
185
|
+
payload["nesting_depth"] = nesting_depth
|
|
186
|
+
sdk.track_activity(
|
|
187
|
+
tool="gemini_completion",
|
|
188
|
+
summary="Gemini task completed",
|
|
189
|
+
payload=payload,
|
|
190
|
+
)
|
|
191
|
+
except Exception:
|
|
192
|
+
# Tracking failure should not break parsing
|
|
193
|
+
pass
|
|
194
|
+
|
|
195
|
+
except json.JSONDecodeError:
|
|
196
|
+
# Skip malformed lines
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
return events
|
|
200
|
+
|
|
201
|
+
def _parse_and_track_codex_events(
|
|
202
|
+
self, jsonl_output: str, sdk: "SDK"
|
|
203
|
+
) -> list[dict]:
|
|
204
|
+
"""
|
|
205
|
+
Parse Codex JSONL events and track in HtmlGraph.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
jsonl_output: JSONL output from Codex CLI
|
|
209
|
+
sdk: HtmlGraph SDK instance for tracking
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Parsed events list
|
|
213
|
+
"""
|
|
214
|
+
events = []
|
|
215
|
+
parse_errors = []
|
|
216
|
+
|
|
217
|
+
# Get parent context for metadata
|
|
218
|
+
parent_activity = os.getenv("HTMLGRAPH_PARENT_ACTIVITY")
|
|
219
|
+
nesting_depth_str = os.getenv("HTMLGRAPH_NESTING_DEPTH", "0")
|
|
220
|
+
nesting_depth = int(nesting_depth_str) if nesting_depth_str.isdigit() else 0
|
|
221
|
+
|
|
222
|
+
for line_num, line in enumerate(jsonl_output.splitlines(), start=1):
|
|
223
|
+
if not line.strip():
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
event = json.loads(line)
|
|
228
|
+
events.append(event)
|
|
229
|
+
|
|
230
|
+
event_type = event.get("type")
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
# Track item.started events
|
|
234
|
+
if event_type == "item.started":
|
|
235
|
+
item = event.get("item", {})
|
|
236
|
+
item_type = item.get("type")
|
|
237
|
+
|
|
238
|
+
if item_type == "command_execution":
|
|
239
|
+
command = item.get("command", "")
|
|
240
|
+
payload = {"command": command}
|
|
241
|
+
if parent_activity:
|
|
242
|
+
payload["parent_activity"] = parent_activity
|
|
243
|
+
if nesting_depth > 0:
|
|
244
|
+
payload["nesting_depth"] = nesting_depth
|
|
245
|
+
sdk.track_activity(
|
|
246
|
+
tool="codex_command",
|
|
247
|
+
summary=f"Codex executing: {command[:80]}",
|
|
248
|
+
payload=payload,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Track item.completed events
|
|
252
|
+
elif event_type == "item.completed":
|
|
253
|
+
item = event.get("item", {})
|
|
254
|
+
item_type = item.get("type")
|
|
255
|
+
|
|
256
|
+
if item_type == "file_change":
|
|
257
|
+
path = item.get("path", "unknown")
|
|
258
|
+
payload = {"path": path}
|
|
259
|
+
if parent_activity:
|
|
260
|
+
payload["parent_activity"] = parent_activity
|
|
261
|
+
if nesting_depth > 0:
|
|
262
|
+
payload["nesting_depth"] = nesting_depth
|
|
263
|
+
sdk.track_activity(
|
|
264
|
+
tool="codex_file_change",
|
|
265
|
+
summary=f"Codex modified: {path}",
|
|
266
|
+
file_paths=[path],
|
|
267
|
+
payload=payload,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
elif item_type == "agent_message":
|
|
271
|
+
text = item.get("text", "")
|
|
272
|
+
summary = text[:100] + "..." if len(text) > 100 else text
|
|
273
|
+
payload = {"text_length": len(text)}
|
|
274
|
+
if parent_activity:
|
|
275
|
+
payload["parent_activity"] = parent_activity
|
|
276
|
+
if nesting_depth > 0:
|
|
277
|
+
payload["nesting_depth"] = nesting_depth
|
|
278
|
+
sdk.track_activity(
|
|
279
|
+
tool="codex_message",
|
|
280
|
+
summary=f"Codex: {summary}",
|
|
281
|
+
payload=payload,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Track turn.completed for token usage
|
|
285
|
+
elif event_type == "turn.completed":
|
|
286
|
+
usage = event.get("usage", {})
|
|
287
|
+
total_tokens = sum(usage.values())
|
|
288
|
+
payload = {"usage": usage}
|
|
289
|
+
if parent_activity:
|
|
290
|
+
payload["parent_activity"] = parent_activity
|
|
291
|
+
if nesting_depth > 0:
|
|
292
|
+
payload["nesting_depth"] = nesting_depth
|
|
293
|
+
sdk.track_activity(
|
|
294
|
+
tool="codex_completion",
|
|
295
|
+
summary=f"Codex turn completed ({total_tokens} tokens)",
|
|
296
|
+
payload=payload,
|
|
297
|
+
)
|
|
298
|
+
except Exception:
|
|
299
|
+
# Tracking failure should not break parsing
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
except json.JSONDecodeError as e:
|
|
303
|
+
parse_errors.append(
|
|
304
|
+
{
|
|
305
|
+
"line_number": line_num,
|
|
306
|
+
"error": str(e),
|
|
307
|
+
"content": line[:100],
|
|
308
|
+
}
|
|
309
|
+
)
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
return events
|
|
313
|
+
|
|
314
|
+
def _parse_and_track_copilot_events(
|
|
315
|
+
self, prompt: str, response: str, sdk: "SDK"
|
|
316
|
+
) -> list[dict]:
|
|
317
|
+
"""
|
|
318
|
+
Track Copilot execution (start and result only).
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
prompt: Original prompt
|
|
322
|
+
response: Response from Copilot
|
|
323
|
+
sdk: HtmlGraph SDK instance for tracking
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Synthetic events list for consistency
|
|
327
|
+
"""
|
|
328
|
+
events = []
|
|
329
|
+
|
|
330
|
+
# Get parent context for metadata
|
|
331
|
+
parent_activity = os.getenv("HTMLGRAPH_PARENT_ACTIVITY")
|
|
332
|
+
nesting_depth_str = os.getenv("HTMLGRAPH_NESTING_DEPTH", "0")
|
|
333
|
+
nesting_depth = int(nesting_depth_str) if nesting_depth_str.isdigit() else 0
|
|
334
|
+
|
|
335
|
+
try:
|
|
336
|
+
# Track start
|
|
337
|
+
start_event = {"type": "copilot_start", "prompt": prompt[:100]}
|
|
338
|
+
events.append(start_event)
|
|
339
|
+
payload: dict[str, str | int] = {"prompt_length": len(prompt)}
|
|
340
|
+
if parent_activity:
|
|
341
|
+
payload["parent_activity"] = parent_activity
|
|
342
|
+
if nesting_depth > 0:
|
|
343
|
+
payload["nesting_depth"] = nesting_depth
|
|
344
|
+
sdk.track_activity(
|
|
345
|
+
tool="copilot_start",
|
|
346
|
+
summary=f"Copilot started with prompt: {prompt[:80]}",
|
|
347
|
+
payload=payload,
|
|
348
|
+
)
|
|
349
|
+
except Exception:
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
try:
|
|
353
|
+
# Track result
|
|
354
|
+
result_event = {"type": "copilot_result", "response": response[:100]}
|
|
355
|
+
events.append(result_event)
|
|
356
|
+
payload_result: dict[str, str | int] = {"response_length": len(response)}
|
|
357
|
+
if parent_activity:
|
|
358
|
+
payload_result["parent_activity"] = parent_activity
|
|
359
|
+
if nesting_depth > 0:
|
|
360
|
+
payload_result["nesting_depth"] = nesting_depth
|
|
361
|
+
sdk.track_activity(
|
|
362
|
+
tool="copilot_result",
|
|
363
|
+
summary=f"Copilot completed: {response[:80]}",
|
|
364
|
+
payload=payload_result,
|
|
365
|
+
)
|
|
366
|
+
except Exception:
|
|
367
|
+
pass
|
|
368
|
+
|
|
369
|
+
return events
|
|
370
|
+
|
|
62
371
|
def spawn_gemini(
|
|
63
372
|
self,
|
|
64
373
|
prompt: str,
|
|
65
|
-
output_format: str = "json",
|
|
374
|
+
output_format: str = "stream-json",
|
|
66
375
|
model: str | None = None,
|
|
67
376
|
include_directories: list[str] | None = None,
|
|
68
|
-
|
|
377
|
+
track_in_htmlgraph: bool = True,
|
|
69
378
|
timeout: int = 120,
|
|
70
379
|
) -> AIResult:
|
|
71
380
|
"""
|
|
@@ -73,15 +382,21 @@ class HeadlessSpawner:
|
|
|
73
382
|
|
|
74
383
|
Args:
|
|
75
384
|
prompt: Task description for Gemini
|
|
76
|
-
output_format: "json" or "stream-json"
|
|
385
|
+
output_format: "json" or "stream-json" (default: "stream-json" for real-time tracking)
|
|
77
386
|
model: Model selection (e.g., "gemini-2.0-flash"). Default: None (uses default)
|
|
78
387
|
include_directories: List of directories to include for context. Default: None
|
|
79
|
-
|
|
388
|
+
track_in_htmlgraph: Enable HtmlGraph activity tracking. Default: True
|
|
80
389
|
timeout: Max seconds to wait
|
|
81
390
|
|
|
82
391
|
Returns:
|
|
83
|
-
AIResult with response or error
|
|
392
|
+
AIResult with response or error and tracked events if tracking enabled
|
|
84
393
|
"""
|
|
394
|
+
# Initialize tracking if enabled
|
|
395
|
+
sdk: SDK | None = None
|
|
396
|
+
tracked_events: list[dict] = []
|
|
397
|
+
if track_in_htmlgraph:
|
|
398
|
+
sdk = self._get_sdk()
|
|
399
|
+
|
|
85
400
|
try:
|
|
86
401
|
# Build command based on tested pattern from spike spk-4029eef3
|
|
87
402
|
cmd = ["gemini", "-p", prompt, "--output-format", output_format]
|
|
@@ -95,8 +410,20 @@ class HeadlessSpawner:
|
|
|
95
410
|
for directory in include_directories:
|
|
96
411
|
cmd.extend(["--include-directories", directory])
|
|
97
412
|
|
|
98
|
-
# Add
|
|
99
|
-
cmd.
|
|
413
|
+
# CRITICAL: Add --yolo for headless mode (auto-approve all tools)
|
|
414
|
+
cmd.append("--yolo")
|
|
415
|
+
|
|
416
|
+
# Track spawner start if SDK available
|
|
417
|
+
if sdk:
|
|
418
|
+
try:
|
|
419
|
+
sdk.track_activity(
|
|
420
|
+
tool="gemini_spawn_start",
|
|
421
|
+
summary=f"Spawning Gemini: {prompt[:80]}",
|
|
422
|
+
payload={"prompt_length": len(prompt), "model": model},
|
|
423
|
+
)
|
|
424
|
+
except Exception:
|
|
425
|
+
# Tracking failure should not break execution
|
|
426
|
+
pass
|
|
100
427
|
|
|
101
428
|
# Execute with timeout and stderr redirection
|
|
102
429
|
# Note: Cannot use capture_output with stderr parameter
|
|
@@ -116,9 +443,58 @@ class HeadlessSpawner:
|
|
|
116
443
|
tokens_used=None,
|
|
117
444
|
error=f"Gemini CLI failed with exit code {result.returncode}",
|
|
118
445
|
raw_output=None,
|
|
446
|
+
tracked_events=tracked_events,
|
|
119
447
|
)
|
|
120
448
|
|
|
121
|
-
#
|
|
449
|
+
# Handle stream-json format with real-time tracking
|
|
450
|
+
if output_format == "stream-json" and sdk:
|
|
451
|
+
try:
|
|
452
|
+
tracked_events = self._parse_and_track_gemini_events(
|
|
453
|
+
result.stdout, sdk
|
|
454
|
+
)
|
|
455
|
+
# Only use stream-json parsing if we got valid events
|
|
456
|
+
if tracked_events:
|
|
457
|
+
# For stream-json, we need to extract response differently
|
|
458
|
+
# Look for the last message or result event
|
|
459
|
+
response_text = ""
|
|
460
|
+
for event in tracked_events:
|
|
461
|
+
if event.get("type") == "result":
|
|
462
|
+
response_text = event.get("response", "")
|
|
463
|
+
break
|
|
464
|
+
elif event.get("type") == "message":
|
|
465
|
+
content = event.get("content", "")
|
|
466
|
+
if content:
|
|
467
|
+
response_text = content
|
|
468
|
+
|
|
469
|
+
# Token usage from stats in result event
|
|
470
|
+
tokens = None
|
|
471
|
+
for event in tracked_events:
|
|
472
|
+
if event.get("type") == "result":
|
|
473
|
+
stats = event.get("stats", {})
|
|
474
|
+
if stats and "models" in stats:
|
|
475
|
+
total_tokens = 0
|
|
476
|
+
for model_stats in stats["models"].values():
|
|
477
|
+
model_tokens = model_stats.get(
|
|
478
|
+
"tokens", {}
|
|
479
|
+
).get("total", 0)
|
|
480
|
+
total_tokens += model_tokens
|
|
481
|
+
tokens = total_tokens if total_tokens > 0 else None
|
|
482
|
+
break
|
|
483
|
+
|
|
484
|
+
return AIResult(
|
|
485
|
+
success=True,
|
|
486
|
+
response=response_text,
|
|
487
|
+
tokens_used=tokens,
|
|
488
|
+
error=None,
|
|
489
|
+
raw_output={"events": tracked_events},
|
|
490
|
+
tracked_events=tracked_events,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
except Exception:
|
|
494
|
+
# Fall back to regular JSON parsing if tracking fails
|
|
495
|
+
pass
|
|
496
|
+
|
|
497
|
+
# Parse JSON response (for json format or fallback)
|
|
122
498
|
try:
|
|
123
499
|
output = json.loads(result.stdout)
|
|
124
500
|
except json.JSONDecodeError as e:
|
|
@@ -128,6 +504,7 @@ class HeadlessSpawner:
|
|
|
128
504
|
tokens_used=None,
|
|
129
505
|
error=f"Failed to parse JSON output: {e}",
|
|
130
506
|
raw_output={"stdout": result.stdout},
|
|
507
|
+
tracked_events=tracked_events,
|
|
131
508
|
)
|
|
132
509
|
|
|
133
510
|
# Extract response and token usage from parsed output
|
|
@@ -150,15 +527,22 @@ class HeadlessSpawner:
|
|
|
150
527
|
tokens_used=tokens,
|
|
151
528
|
error=None,
|
|
152
529
|
raw_output=output,
|
|
530
|
+
tracked_events=tracked_events,
|
|
153
531
|
)
|
|
154
532
|
|
|
155
|
-
except subprocess.TimeoutExpired:
|
|
533
|
+
except subprocess.TimeoutExpired as e:
|
|
156
534
|
return AIResult(
|
|
157
535
|
success=False,
|
|
158
536
|
response="",
|
|
159
537
|
tokens_used=None,
|
|
160
538
|
error=f"Gemini CLI timed out after {timeout} seconds",
|
|
161
|
-
raw_output=
|
|
539
|
+
raw_output={
|
|
540
|
+
"partial_stdout": e.stdout.decode() if e.stdout else None,
|
|
541
|
+
"partial_stderr": e.stderr.decode() if e.stderr else None,
|
|
542
|
+
}
|
|
543
|
+
if e.stdout or e.stderr
|
|
544
|
+
else None,
|
|
545
|
+
tracked_events=tracked_events,
|
|
162
546
|
)
|
|
163
547
|
except FileNotFoundError:
|
|
164
548
|
return AIResult(
|
|
@@ -167,6 +551,7 @@ class HeadlessSpawner:
|
|
|
167
551
|
tokens_used=None,
|
|
168
552
|
error="Gemini CLI not found. Ensure 'gemini' is installed and in PATH.",
|
|
169
553
|
raw_output=None,
|
|
554
|
+
tracked_events=tracked_events,
|
|
170
555
|
)
|
|
171
556
|
except Exception as e:
|
|
172
557
|
return AIResult(
|
|
@@ -175,24 +560,24 @@ class HeadlessSpawner:
|
|
|
175
560
|
tokens_used=None,
|
|
176
561
|
error=f"Unexpected error: {type(e).__name__}: {e}",
|
|
177
562
|
raw_output=None,
|
|
563
|
+
tracked_events=tracked_events,
|
|
178
564
|
)
|
|
179
565
|
|
|
180
566
|
def spawn_codex(
|
|
181
567
|
self,
|
|
182
568
|
prompt: str,
|
|
183
|
-
approval: str = "never",
|
|
184
569
|
output_json: bool = True,
|
|
185
570
|
model: str | None = None,
|
|
186
571
|
sandbox: str | None = None,
|
|
187
|
-
full_auto: bool =
|
|
572
|
+
full_auto: bool = True,
|
|
188
573
|
images: list[str] | None = None,
|
|
189
|
-
color: str = "auto",
|
|
190
574
|
output_last_message: str | None = None,
|
|
191
575
|
output_schema: str | None = None,
|
|
192
576
|
skip_git_check: bool = False,
|
|
193
577
|
working_directory: str | None = None,
|
|
194
578
|
use_oss: bool = False,
|
|
195
579
|
bypass_approvals: bool = False,
|
|
580
|
+
track_in_htmlgraph: bool = True,
|
|
196
581
|
timeout: int = 120,
|
|
197
582
|
) -> AIResult:
|
|
198
583
|
"""
|
|
@@ -200,24 +585,29 @@ class HeadlessSpawner:
|
|
|
200
585
|
|
|
201
586
|
Args:
|
|
202
587
|
prompt: Task description for Codex
|
|
203
|
-
|
|
204
|
-
output_json: Use --json flag for JSONL output
|
|
588
|
+
output_json: Use --json flag for JSONL output (enables real-time tracking)
|
|
205
589
|
model: Model selection (e.g., "gpt-4-turbo"). Default: None
|
|
206
590
|
sandbox: Sandbox mode ("read-only", "workspace-write", "danger-full-access"). Default: None
|
|
207
|
-
full_auto: Enable full auto mode (--full-auto). Default:
|
|
591
|
+
full_auto: Enable full auto mode (--full-auto). Default: True (required for headless)
|
|
208
592
|
images: List of image paths (--image). Default: None
|
|
209
|
-
color: Color output control ("auto", "on", "off"). Default: "auto"
|
|
210
593
|
output_last_message: Write last message to file (--output-last-message). Default: None
|
|
211
594
|
output_schema: JSON schema for validation (--output-schema). Default: None
|
|
212
595
|
skip_git_check: Skip git repo check (--skip-git-repo-check). Default: False
|
|
213
596
|
working_directory: Workspace directory (--cd). Default: None
|
|
214
597
|
use_oss: Use local Ollama provider (--oss). Default: False
|
|
215
598
|
bypass_approvals: Dangerously bypass approvals (--dangerously-bypass-approvals-and-sandbox). Default: False
|
|
599
|
+
track_in_htmlgraph: Enable HtmlGraph activity tracking. Default: True
|
|
216
600
|
timeout: Max seconds to wait
|
|
217
601
|
|
|
218
602
|
Returns:
|
|
219
|
-
AIResult with response
|
|
603
|
+
AIResult with response, error, and tracked events if tracking enabled
|
|
220
604
|
"""
|
|
605
|
+
# Initialize tracking if enabled
|
|
606
|
+
sdk: SDK | None = None
|
|
607
|
+
tracked_events: list[dict] = []
|
|
608
|
+
if track_in_htmlgraph and output_json:
|
|
609
|
+
sdk = self._get_sdk()
|
|
610
|
+
|
|
221
611
|
cmd = ["codex", "exec"]
|
|
222
612
|
|
|
223
613
|
if output_json:
|
|
@@ -240,9 +630,6 @@ class HeadlessSpawner:
|
|
|
240
630
|
for image in images:
|
|
241
631
|
cmd.extend(["--image", image])
|
|
242
632
|
|
|
243
|
-
# Add color option
|
|
244
|
-
cmd.extend(["--color", color])
|
|
245
|
-
|
|
246
633
|
# Add output last message file if specified
|
|
247
634
|
if output_last_message:
|
|
248
635
|
cmd.extend(["--output-last-message", output_last_message])
|
|
@@ -267,7 +654,24 @@ class HeadlessSpawner:
|
|
|
267
654
|
if bypass_approvals:
|
|
268
655
|
cmd.append("--dangerously-bypass-approvals-and-sandbox")
|
|
269
656
|
|
|
270
|
-
|
|
657
|
+
# Add prompt as final argument
|
|
658
|
+
cmd.append(prompt)
|
|
659
|
+
|
|
660
|
+
# Track spawner start if SDK available
|
|
661
|
+
if sdk:
|
|
662
|
+
try:
|
|
663
|
+
sdk.track_activity(
|
|
664
|
+
tool="codex_spawn_start",
|
|
665
|
+
summary=f"Spawning Codex: {prompt[:80]}",
|
|
666
|
+
payload={
|
|
667
|
+
"prompt_length": len(prompt),
|
|
668
|
+
"model": model,
|
|
669
|
+
"sandbox": sandbox,
|
|
670
|
+
},
|
|
671
|
+
)
|
|
672
|
+
except Exception:
|
|
673
|
+
# Tracking failure should not break execution
|
|
674
|
+
pass
|
|
271
675
|
|
|
272
676
|
try:
|
|
273
677
|
result = subprocess.run(
|
|
@@ -286,16 +690,34 @@ class HeadlessSpawner:
|
|
|
286
690
|
tokens_used=None,
|
|
287
691
|
error=None if result.returncode == 0 else "Command failed",
|
|
288
692
|
raw_output=result.stdout,
|
|
693
|
+
tracked_events=tracked_events,
|
|
289
694
|
)
|
|
290
695
|
|
|
291
696
|
# Parse JSONL output
|
|
292
697
|
events = []
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
698
|
+
parse_errors = []
|
|
699
|
+
|
|
700
|
+
# Use tracking parser if SDK is available
|
|
701
|
+
if sdk:
|
|
702
|
+
tracked_events = self._parse_and_track_codex_events(result.stdout, sdk)
|
|
703
|
+
events = tracked_events
|
|
704
|
+
else:
|
|
705
|
+
# Fallback to regular parsing without tracking
|
|
706
|
+
for line_num, line in enumerate(result.stdout.splitlines(), start=1):
|
|
707
|
+
if line.strip():
|
|
708
|
+
try:
|
|
709
|
+
events.append(json.loads(line))
|
|
710
|
+
except json.JSONDecodeError as e:
|
|
711
|
+
parse_errors.append(
|
|
712
|
+
{
|
|
713
|
+
"line_number": line_num,
|
|
714
|
+
"error": str(e),
|
|
715
|
+
"content": line[
|
|
716
|
+
:100
|
|
717
|
+
], # First 100 chars for debugging
|
|
718
|
+
}
|
|
719
|
+
)
|
|
720
|
+
continue
|
|
299
721
|
|
|
300
722
|
# Extract agent message
|
|
301
723
|
response = None
|
|
@@ -318,7 +740,11 @@ class HeadlessSpawner:
|
|
|
318
740
|
response=response or "",
|
|
319
741
|
tokens_used=tokens,
|
|
320
742
|
error=None if result.returncode == 0 else "Command failed",
|
|
321
|
-
raw_output=
|
|
743
|
+
raw_output={
|
|
744
|
+
"events": events,
|
|
745
|
+
"parse_errors": parse_errors if parse_errors else None,
|
|
746
|
+
},
|
|
747
|
+
tracked_events=tracked_events,
|
|
322
748
|
)
|
|
323
749
|
|
|
324
750
|
except FileNotFoundError:
|
|
@@ -328,14 +754,30 @@ class HeadlessSpawner:
|
|
|
328
754
|
tokens_used=None,
|
|
329
755
|
error="Codex CLI not found. Install from: https://github.com/openai/codex",
|
|
330
756
|
raw_output=None,
|
|
757
|
+
tracked_events=tracked_events,
|
|
331
758
|
)
|
|
332
|
-
except subprocess.TimeoutExpired:
|
|
759
|
+
except subprocess.TimeoutExpired as e:
|
|
333
760
|
return AIResult(
|
|
334
761
|
success=False,
|
|
335
762
|
response="",
|
|
336
763
|
tokens_used=None,
|
|
337
764
|
error=f"Timed out after {timeout} seconds",
|
|
765
|
+
raw_output={
|
|
766
|
+
"partial_stdout": e.stdout.decode() if e.stdout else None,
|
|
767
|
+
"partial_stderr": e.stderr.decode() if e.stderr else None,
|
|
768
|
+
}
|
|
769
|
+
if e.stdout or e.stderr
|
|
770
|
+
else None,
|
|
771
|
+
tracked_events=tracked_events,
|
|
772
|
+
)
|
|
773
|
+
except Exception as e:
|
|
774
|
+
return AIResult(
|
|
775
|
+
success=False,
|
|
776
|
+
response="",
|
|
777
|
+
tokens_used=None,
|
|
778
|
+
error=f"Unexpected error: {type(e).__name__}: {e}",
|
|
338
779
|
raw_output=None,
|
|
780
|
+
tracked_events=tracked_events,
|
|
339
781
|
)
|
|
340
782
|
|
|
341
783
|
def spawn_copilot(
|
|
@@ -344,6 +786,7 @@ class HeadlessSpawner:
|
|
|
344
786
|
allow_tools: list[str] | None = None,
|
|
345
787
|
allow_all_tools: bool = False,
|
|
346
788
|
deny_tools: list[str] | None = None,
|
|
789
|
+
track_in_htmlgraph: bool = True,
|
|
347
790
|
timeout: int = 120,
|
|
348
791
|
) -> AIResult:
|
|
349
792
|
"""
|
|
@@ -354,11 +797,18 @@ class HeadlessSpawner:
|
|
|
354
797
|
allow_tools: List of tools to auto-approve (e.g., ["shell(git)", "write(*.py)"])
|
|
355
798
|
allow_all_tools: Auto-approve all tools (--allow-all-tools). Default: False
|
|
356
799
|
deny_tools: List of tools to deny (--deny-tool). Default: None
|
|
800
|
+
track_in_htmlgraph: Enable HtmlGraph activity tracking. Default: True
|
|
357
801
|
timeout: Max seconds to wait
|
|
358
802
|
|
|
359
803
|
Returns:
|
|
360
|
-
AIResult with response
|
|
804
|
+
AIResult with response, error, and tracked events if tracking enabled
|
|
361
805
|
"""
|
|
806
|
+
# Initialize tracking if enabled
|
|
807
|
+
sdk = None
|
|
808
|
+
tracked_events = []
|
|
809
|
+
if track_in_htmlgraph:
|
|
810
|
+
sdk = self._get_sdk()
|
|
811
|
+
|
|
362
812
|
cmd = ["copilot", "-p", prompt]
|
|
363
813
|
|
|
364
814
|
# Add allow all tools flag
|
|
@@ -375,6 +825,18 @@ class HeadlessSpawner:
|
|
|
375
825
|
for tool in deny_tools:
|
|
376
826
|
cmd.extend(["--deny-tool", tool])
|
|
377
827
|
|
|
828
|
+
# Track spawner start if SDK available
|
|
829
|
+
if sdk:
|
|
830
|
+
try:
|
|
831
|
+
sdk.track_activity(
|
|
832
|
+
tool="copilot_spawn_start",
|
|
833
|
+
summary=f"Spawning Copilot: {prompt[:80]}",
|
|
834
|
+
payload={"prompt_length": len(prompt)},
|
|
835
|
+
)
|
|
836
|
+
except Exception:
|
|
837
|
+
# Tracking failure should not break execution
|
|
838
|
+
pass
|
|
839
|
+
|
|
378
840
|
try:
|
|
379
841
|
result = subprocess.run(
|
|
380
842
|
cmd,
|
|
@@ -406,12 +868,19 @@ class HeadlessSpawner:
|
|
|
406
868
|
tokens = 0 # Placeholder
|
|
407
869
|
break
|
|
408
870
|
|
|
871
|
+
# Track Copilot execution if SDK available
|
|
872
|
+
if sdk:
|
|
873
|
+
tracked_events = self._parse_and_track_copilot_events(
|
|
874
|
+
prompt, response, sdk
|
|
875
|
+
)
|
|
876
|
+
|
|
409
877
|
return AIResult(
|
|
410
878
|
success=result.returncode == 0,
|
|
411
879
|
response=response,
|
|
412
880
|
tokens_used=tokens,
|
|
413
881
|
error=None if result.returncode == 0 else result.stderr,
|
|
414
882
|
raw_output=result.stdout,
|
|
883
|
+
tracked_events=tracked_events,
|
|
415
884
|
)
|
|
416
885
|
|
|
417
886
|
except FileNotFoundError:
|
|
@@ -421,14 +890,30 @@ class HeadlessSpawner:
|
|
|
421
890
|
tokens_used=None,
|
|
422
891
|
error="Copilot CLI not found. Install from: https://docs.github.com/en/copilot/using-github-copilot/using-github-copilot-in-the-command-line",
|
|
423
892
|
raw_output=None,
|
|
893
|
+
tracked_events=tracked_events,
|
|
424
894
|
)
|
|
425
|
-
except subprocess.TimeoutExpired:
|
|
895
|
+
except subprocess.TimeoutExpired as e:
|
|
426
896
|
return AIResult(
|
|
427
897
|
success=False,
|
|
428
898
|
response="",
|
|
429
899
|
tokens_used=None,
|
|
430
900
|
error=f"Timed out after {timeout} seconds",
|
|
901
|
+
raw_output={
|
|
902
|
+
"partial_stdout": e.stdout.decode() if e.stdout else None,
|
|
903
|
+
"partial_stderr": e.stderr.decode() if e.stderr else None,
|
|
904
|
+
}
|
|
905
|
+
if e.stdout or e.stderr
|
|
906
|
+
else None,
|
|
907
|
+
tracked_events=tracked_events,
|
|
908
|
+
)
|
|
909
|
+
except Exception as e:
|
|
910
|
+
return AIResult(
|
|
911
|
+
success=False,
|
|
912
|
+
response="",
|
|
913
|
+
tokens_used=None,
|
|
914
|
+
error=f"Unexpected error: {type(e).__name__}: {e}",
|
|
431
915
|
raw_output=None,
|
|
916
|
+
tracked_events=tracked_events,
|
|
432
917
|
)
|
|
433
918
|
|
|
434
919
|
def spawn_claude(
|
|
@@ -548,19 +1033,24 @@ class HeadlessSpawner:
|
|
|
548
1033
|
error="Claude CLI not found. Install Claude Code from: https://claude.com/claude-code",
|
|
549
1034
|
raw_output=None,
|
|
550
1035
|
)
|
|
551
|
-
except subprocess.TimeoutExpired:
|
|
1036
|
+
except subprocess.TimeoutExpired as e:
|
|
552
1037
|
return AIResult(
|
|
553
1038
|
success=False,
|
|
554
1039
|
response="",
|
|
555
1040
|
tokens_used=None,
|
|
556
1041
|
error=f"Timed out after {timeout} seconds",
|
|
557
|
-
raw_output=
|
|
1042
|
+
raw_output={
|
|
1043
|
+
"partial_stdout": e.stdout.decode() if e.stdout else None,
|
|
1044
|
+
"partial_stderr": e.stderr.decode() if e.stderr else None,
|
|
1045
|
+
}
|
|
1046
|
+
if e.stdout or e.stderr
|
|
1047
|
+
else None,
|
|
558
1048
|
)
|
|
559
1049
|
except Exception as e:
|
|
560
1050
|
return AIResult(
|
|
561
1051
|
success=False,
|
|
562
1052
|
response="",
|
|
563
1053
|
tokens_used=None,
|
|
564
|
-
error=f"Unexpected error: {
|
|
1054
|
+
error=f"Unexpected error: {type(e).__name__}: {e}",
|
|
565
1055
|
raw_output=None,
|
|
566
1056
|
)
|