foundry-mcp 0.8.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of foundry-mcp might be problematic. Click here for more details.

Files changed (153) hide show
  1. foundry_mcp/__init__.py +13 -0
  2. foundry_mcp/cli/__init__.py +67 -0
  3. foundry_mcp/cli/__main__.py +9 -0
  4. foundry_mcp/cli/agent.py +96 -0
  5. foundry_mcp/cli/commands/__init__.py +37 -0
  6. foundry_mcp/cli/commands/cache.py +137 -0
  7. foundry_mcp/cli/commands/dashboard.py +148 -0
  8. foundry_mcp/cli/commands/dev.py +446 -0
  9. foundry_mcp/cli/commands/journal.py +377 -0
  10. foundry_mcp/cli/commands/lifecycle.py +274 -0
  11. foundry_mcp/cli/commands/modify.py +824 -0
  12. foundry_mcp/cli/commands/plan.py +640 -0
  13. foundry_mcp/cli/commands/pr.py +393 -0
  14. foundry_mcp/cli/commands/review.py +667 -0
  15. foundry_mcp/cli/commands/session.py +472 -0
  16. foundry_mcp/cli/commands/specs.py +686 -0
  17. foundry_mcp/cli/commands/tasks.py +807 -0
  18. foundry_mcp/cli/commands/testing.py +676 -0
  19. foundry_mcp/cli/commands/validate.py +982 -0
  20. foundry_mcp/cli/config.py +98 -0
  21. foundry_mcp/cli/context.py +298 -0
  22. foundry_mcp/cli/logging.py +212 -0
  23. foundry_mcp/cli/main.py +44 -0
  24. foundry_mcp/cli/output.py +122 -0
  25. foundry_mcp/cli/registry.py +110 -0
  26. foundry_mcp/cli/resilience.py +178 -0
  27. foundry_mcp/cli/transcript.py +217 -0
  28. foundry_mcp/config.py +1454 -0
  29. foundry_mcp/core/__init__.py +144 -0
  30. foundry_mcp/core/ai_consultation.py +1773 -0
  31. foundry_mcp/core/batch_operations.py +1202 -0
  32. foundry_mcp/core/cache.py +195 -0
  33. foundry_mcp/core/capabilities.py +446 -0
  34. foundry_mcp/core/concurrency.py +898 -0
  35. foundry_mcp/core/context.py +540 -0
  36. foundry_mcp/core/discovery.py +1603 -0
  37. foundry_mcp/core/error_collection.py +728 -0
  38. foundry_mcp/core/error_store.py +592 -0
  39. foundry_mcp/core/health.py +749 -0
  40. foundry_mcp/core/intake.py +933 -0
  41. foundry_mcp/core/journal.py +700 -0
  42. foundry_mcp/core/lifecycle.py +412 -0
  43. foundry_mcp/core/llm_config.py +1376 -0
  44. foundry_mcp/core/llm_patterns.py +510 -0
  45. foundry_mcp/core/llm_provider.py +1569 -0
  46. foundry_mcp/core/logging_config.py +374 -0
  47. foundry_mcp/core/metrics_persistence.py +584 -0
  48. foundry_mcp/core/metrics_registry.py +327 -0
  49. foundry_mcp/core/metrics_store.py +641 -0
  50. foundry_mcp/core/modifications.py +224 -0
  51. foundry_mcp/core/naming.py +146 -0
  52. foundry_mcp/core/observability.py +1216 -0
  53. foundry_mcp/core/otel.py +452 -0
  54. foundry_mcp/core/otel_stubs.py +264 -0
  55. foundry_mcp/core/pagination.py +255 -0
  56. foundry_mcp/core/progress.py +387 -0
  57. foundry_mcp/core/prometheus.py +564 -0
  58. foundry_mcp/core/prompts/__init__.py +464 -0
  59. foundry_mcp/core/prompts/fidelity_review.py +691 -0
  60. foundry_mcp/core/prompts/markdown_plan_review.py +515 -0
  61. foundry_mcp/core/prompts/plan_review.py +627 -0
  62. foundry_mcp/core/providers/__init__.py +237 -0
  63. foundry_mcp/core/providers/base.py +515 -0
  64. foundry_mcp/core/providers/claude.py +472 -0
  65. foundry_mcp/core/providers/codex.py +637 -0
  66. foundry_mcp/core/providers/cursor_agent.py +630 -0
  67. foundry_mcp/core/providers/detectors.py +515 -0
  68. foundry_mcp/core/providers/gemini.py +426 -0
  69. foundry_mcp/core/providers/opencode.py +718 -0
  70. foundry_mcp/core/providers/opencode_wrapper.js +308 -0
  71. foundry_mcp/core/providers/package-lock.json +24 -0
  72. foundry_mcp/core/providers/package.json +25 -0
  73. foundry_mcp/core/providers/registry.py +607 -0
  74. foundry_mcp/core/providers/test_provider.py +171 -0
  75. foundry_mcp/core/providers/validation.py +857 -0
  76. foundry_mcp/core/rate_limit.py +427 -0
  77. foundry_mcp/core/research/__init__.py +68 -0
  78. foundry_mcp/core/research/memory.py +528 -0
  79. foundry_mcp/core/research/models.py +1234 -0
  80. foundry_mcp/core/research/providers/__init__.py +40 -0
  81. foundry_mcp/core/research/providers/base.py +242 -0
  82. foundry_mcp/core/research/providers/google.py +507 -0
  83. foundry_mcp/core/research/providers/perplexity.py +442 -0
  84. foundry_mcp/core/research/providers/semantic_scholar.py +544 -0
  85. foundry_mcp/core/research/providers/tavily.py +383 -0
  86. foundry_mcp/core/research/workflows/__init__.py +25 -0
  87. foundry_mcp/core/research/workflows/base.py +298 -0
  88. foundry_mcp/core/research/workflows/chat.py +271 -0
  89. foundry_mcp/core/research/workflows/consensus.py +539 -0
  90. foundry_mcp/core/research/workflows/deep_research.py +4142 -0
  91. foundry_mcp/core/research/workflows/ideate.py +682 -0
  92. foundry_mcp/core/research/workflows/thinkdeep.py +405 -0
  93. foundry_mcp/core/resilience.py +600 -0
  94. foundry_mcp/core/responses.py +1624 -0
  95. foundry_mcp/core/review.py +366 -0
  96. foundry_mcp/core/security.py +438 -0
  97. foundry_mcp/core/spec.py +4119 -0
  98. foundry_mcp/core/task.py +2463 -0
  99. foundry_mcp/core/testing.py +839 -0
  100. foundry_mcp/core/validation.py +2357 -0
  101. foundry_mcp/dashboard/__init__.py +32 -0
  102. foundry_mcp/dashboard/app.py +119 -0
  103. foundry_mcp/dashboard/components/__init__.py +17 -0
  104. foundry_mcp/dashboard/components/cards.py +88 -0
  105. foundry_mcp/dashboard/components/charts.py +177 -0
  106. foundry_mcp/dashboard/components/filters.py +136 -0
  107. foundry_mcp/dashboard/components/tables.py +195 -0
  108. foundry_mcp/dashboard/data/__init__.py +11 -0
  109. foundry_mcp/dashboard/data/stores.py +433 -0
  110. foundry_mcp/dashboard/launcher.py +300 -0
  111. foundry_mcp/dashboard/views/__init__.py +12 -0
  112. foundry_mcp/dashboard/views/errors.py +217 -0
  113. foundry_mcp/dashboard/views/metrics.py +164 -0
  114. foundry_mcp/dashboard/views/overview.py +96 -0
  115. foundry_mcp/dashboard/views/providers.py +83 -0
  116. foundry_mcp/dashboard/views/sdd_workflow.py +255 -0
  117. foundry_mcp/dashboard/views/tool_usage.py +139 -0
  118. foundry_mcp/prompts/__init__.py +9 -0
  119. foundry_mcp/prompts/workflows.py +525 -0
  120. foundry_mcp/resources/__init__.py +9 -0
  121. foundry_mcp/resources/specs.py +591 -0
  122. foundry_mcp/schemas/__init__.py +38 -0
  123. foundry_mcp/schemas/intake-schema.json +89 -0
  124. foundry_mcp/schemas/sdd-spec-schema.json +414 -0
  125. foundry_mcp/server.py +150 -0
  126. foundry_mcp/tools/__init__.py +10 -0
  127. foundry_mcp/tools/unified/__init__.py +92 -0
  128. foundry_mcp/tools/unified/authoring.py +3620 -0
  129. foundry_mcp/tools/unified/context_helpers.py +98 -0
  130. foundry_mcp/tools/unified/documentation_helpers.py +268 -0
  131. foundry_mcp/tools/unified/environment.py +1341 -0
  132. foundry_mcp/tools/unified/error.py +479 -0
  133. foundry_mcp/tools/unified/health.py +225 -0
  134. foundry_mcp/tools/unified/journal.py +841 -0
  135. foundry_mcp/tools/unified/lifecycle.py +640 -0
  136. foundry_mcp/tools/unified/metrics.py +777 -0
  137. foundry_mcp/tools/unified/plan.py +876 -0
  138. foundry_mcp/tools/unified/pr.py +294 -0
  139. foundry_mcp/tools/unified/provider.py +589 -0
  140. foundry_mcp/tools/unified/research.py +1283 -0
  141. foundry_mcp/tools/unified/review.py +1042 -0
  142. foundry_mcp/tools/unified/review_helpers.py +314 -0
  143. foundry_mcp/tools/unified/router.py +102 -0
  144. foundry_mcp/tools/unified/server.py +565 -0
  145. foundry_mcp/tools/unified/spec.py +1283 -0
  146. foundry_mcp/tools/unified/task.py +3846 -0
  147. foundry_mcp/tools/unified/test.py +431 -0
  148. foundry_mcp/tools/unified/verification.py +520 -0
  149. foundry_mcp-0.8.22.dist-info/METADATA +344 -0
  150. foundry_mcp-0.8.22.dist-info/RECORD +153 -0
  151. foundry_mcp-0.8.22.dist-info/WHEEL +4 -0
  152. foundry_mcp-0.8.22.dist-info/entry_points.txt +3 -0
  153. foundry_mcp-0.8.22.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,4142 @@
1
+ """Deep Research workflow with async background execution.
2
+
3
+ Provides multi-phase iterative research through query decomposition,
4
+ parallel source gathering, content analysis, and synthesized reporting.
5
+
6
+ Key Features:
7
+ - Background execution via daemon threads with asyncio.run()
8
+ - Immediate research_id return on start
9
+ - Status polling while running
10
+ - Task lifecycle tracking with cancellation support
11
+ - Multi-agent supervisor orchestration hooks
12
+
13
+ Note: Uses daemon threads (not asyncio.create_task()) to ensure background
14
+ execution works correctly from synchronous MCP tool handlers where there
15
+ is no running event loop.
16
+
17
+ Inspired by:
18
+ - open_deep_research: Multi-agent supervision with think-tool pauses
19
+ - Claude-Deep-Research: Dual-source search with link following
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import asyncio
25
+ import atexit
26
+ import json
27
+ import logging
28
+ import re
29
+ import sys
30
+ import threading
31
+ import time
32
+ import traceback
33
+ from dataclasses import dataclass, field as dataclass_field
34
+ from datetime import datetime
35
+ from enum import Enum
36
+ from pathlib import Path
37
+ from typing import Any, Callable, Optional
38
+ from uuid import uuid4
39
+ from weakref import WeakValueDictionary
40
+
41
+ from foundry_mcp.config import ResearchConfig
42
+ from foundry_mcp.core.research.memory import ResearchMemory
43
+ from foundry_mcp.core.research.models import (
44
+ ConfidenceLevel,
45
+ DeepResearchPhase,
46
+ DeepResearchState,
47
+ DOMAIN_TIERS,
48
+ PhaseMetrics,
49
+ ResearchMode,
50
+ ResearchSource,
51
+ SourceQuality,
52
+ )
53
+ from foundry_mcp.core.error_collection import ErrorRecord
54
+ from foundry_mcp.core.error_store import FileErrorStore
55
+ from foundry_mcp.core.providers import ContextWindowError
56
+ from foundry_mcp.core.research.providers import (
57
+ SearchProvider,
58
+ SearchProviderError,
59
+ GoogleSearchProvider,
60
+ PerplexitySearchProvider,
61
+ SemanticScholarProvider,
62
+ TavilySearchProvider,
63
+ )
64
+ from foundry_mcp.core.research.workflows.base import ResearchWorkflowBase, WorkflowResult
65
+
66
+ logger = logging.getLogger(__name__)
67
+
68
+
69
+ # =============================================================================
70
+ # Crash Handler Infrastructure
71
+ # =============================================================================
72
+
73
+ # Track active research sessions for crash recovery
74
+ _active_research_sessions: dict[str, "DeepResearchState"] = {}
75
+
76
+
77
+ def _crash_handler(exc_type: type, exc_value: BaseException, exc_tb: Any) -> None:
78
+ """Handle uncaught exceptions by logging to stderr and writing crash markers.
79
+
80
+ This handler catches process-level crashes that escape normal exception handling
81
+ and ensures we have visibility into what went wrong.
82
+ """
83
+ tb_str = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
84
+
85
+ # Always write to stderr for visibility
86
+ print(
87
+ f"\n{'='*60}\n"
88
+ f"DEEP RESEARCH CRASH HANDLER\n"
89
+ f"{'='*60}\n"
90
+ f"Exception: {exc_type.__name__}: {exc_value}\n"
91
+ f"Active sessions: {list(_active_research_sessions.keys())}\n"
92
+ f"Traceback:\n{tb_str}"
93
+ f"{'='*60}\n",
94
+ file=sys.stderr,
95
+ flush=True,
96
+ )
97
+
98
+ # Try to save crash markers for active research sessions
99
+ for research_id, state in _active_research_sessions.items():
100
+ try:
101
+ state.metadata["crash"] = True
102
+ state.metadata["crash_error"] = str(exc_value)
103
+ # Write crash marker file
104
+ crash_path = (
105
+ Path.home()
106
+ / ".foundry-mcp"
107
+ / "research"
108
+ / "deep_research"
109
+ / f"{research_id}.crash"
110
+ )
111
+ crash_path.parent.mkdir(parents=True, exist_ok=True)
112
+ crash_path.write_text(tb_str)
113
+ except Exception:
114
+ pass # Best effort - don't fail the crash handler
115
+
116
+ # Call original handler
117
+ sys.__excepthook__(exc_type, exc_value, exc_tb)
118
+
119
+
120
+ # Install crash handler
121
+ sys.excepthook = _crash_handler
122
+
123
+
124
+ @atexit.register
125
+ def _cleanup_on_exit() -> None:
126
+ """Mark any active sessions as interrupted on normal exit."""
127
+ for research_id, state in _active_research_sessions.items():
128
+ if state.completed_at is None:
129
+ state.metadata["interrupted"] = True
130
+
131
+
132
+ # =============================================================================
133
+ # Domain-Based Source Quality Assessment
134
+ # =============================================================================
135
+
136
+
137
+ def _extract_domain(url: str) -> Optional[str]:
138
+ """Extract domain from URL.
139
+
140
+ Args:
141
+ url: Full URL string
142
+
143
+ Returns:
144
+ Domain string (e.g., "arxiv.org") or None if extraction fails
145
+ """
146
+ if not url:
147
+ return None
148
+ try:
149
+ # Handle URLs without scheme
150
+ if "://" not in url:
151
+ url = "https://" + url
152
+ # Extract domain using simple parsing
153
+ from urllib.parse import urlparse
154
+ parsed = urlparse(url)
155
+ domain = parsed.netloc.lower()
156
+ # Remove www. prefix
157
+ if domain.startswith("www."):
158
+ domain = domain[4:]
159
+ return domain if domain else None
160
+ except Exception:
161
+ return None
162
+
163
+
164
+ def _extract_hostname(url: str) -> Optional[str]:
165
+ """Extract full hostname from URL (preserves subdomains like www.).
166
+
167
+ Args:
168
+ url: Full URL string
169
+
170
+ Returns:
171
+ Full hostname (e.g., "www.arxiv.org", "docs.python.org") or None
172
+ """
173
+ if not url:
174
+ return None
175
+ try:
176
+ # Handle URLs without scheme
177
+ if "://" not in url:
178
+ url = "https://" + url
179
+ from urllib.parse import urlparse
180
+ parsed = urlparse(url)
181
+ return parsed.netloc.lower() if parsed.netloc else None
182
+ except Exception:
183
+ return None
184
+
185
+
186
+ def _domain_matches_pattern(domain: str, pattern: str) -> bool:
187
+ """Check if domain matches a pattern (supports wildcards).
188
+
189
+ Patterns:
190
+ - "arxiv.org" - exact match
191
+ - "*.edu" - matches stanford.edu, mit.edu, etc.
192
+ - "docs.*" - matches docs.python.org, docs.microsoft.com, etc.
193
+
194
+ Args:
195
+ domain: Domain to check (e.g., "stanford.edu")
196
+ pattern: Pattern to match (e.g., "*.edu")
197
+
198
+ Returns:
199
+ True if domain matches pattern
200
+ """
201
+ pattern = pattern.lower()
202
+ domain = domain.lower()
203
+
204
+ if "*" not in pattern:
205
+ # Exact match or subdomain match
206
+ return domain == pattern or domain.endswith("." + pattern)
207
+
208
+ if pattern.startswith("*."):
209
+ # Suffix pattern: *.edu matches stanford.edu
210
+ suffix = pattern[2:]
211
+ return domain == suffix or domain.endswith("." + suffix)
212
+
213
+ if pattern.endswith(".*"):
214
+ # Prefix pattern: docs.* matches docs.python.org
215
+ prefix = pattern[:-2]
216
+ return domain == prefix or domain.startswith(prefix + ".")
217
+
218
+ # General wildcard (treat as contains)
219
+ return pattern.replace("*", "") in domain
220
+
221
+
222
+ def get_domain_quality(url: str, mode: ResearchMode) -> SourceQuality:
223
+ """Determine source quality based on domain and research mode.
224
+
225
+ Args:
226
+ url: Source URL
227
+ mode: Research mode (general, academic, technical)
228
+
229
+ Returns:
230
+ SourceQuality based on domain tier matching
231
+ """
232
+ domain = _extract_domain(url)
233
+ if not domain:
234
+ return SourceQuality.UNKNOWN
235
+
236
+ tiers = DOMAIN_TIERS.get(mode.value, DOMAIN_TIERS["general"])
237
+
238
+ # Check high-priority domains first
239
+ for pattern in tiers.get("high", []):
240
+ if _domain_matches_pattern(domain, pattern):
241
+ return SourceQuality.HIGH
242
+
243
+ # Check low-priority domains
244
+ for pattern in tiers.get("low", []):
245
+ if _domain_matches_pattern(domain, pattern):
246
+ return SourceQuality.LOW
247
+
248
+ # Default to medium for unmatched domains
249
+ return SourceQuality.MEDIUM
250
+
251
+
252
+ def _normalize_title(title: str) -> str:
253
+ """Normalize title for deduplication matching.
254
+
255
+ Converts to lowercase, removes punctuation, and collapses whitespace
256
+ to enable matching the same paper from different sources (e.g., arXiv vs OpenReview).
257
+
258
+ Args:
259
+ title: Source title to normalize
260
+
261
+ Returns:
262
+ Normalized title string for comparison
263
+ """
264
+ if not title:
265
+ return ""
266
+ # Lowercase, remove punctuation, collapse whitespace
267
+ normalized = title.lower()
268
+ normalized = re.sub(r"[^\w\s]", "", normalized)
269
+ normalized = re.sub(r"\s+", " ", normalized).strip()
270
+ return normalized
271
+
272
+
273
+ # =============================================================================
274
+ # Task Lifecycle
275
+ # =============================================================================
276
+
277
+
278
+ class TaskStatus(str, Enum):
279
+ """Status of a background research task."""
280
+
281
+ PENDING = "pending" # Created but not started
282
+ RUNNING = "running" # Currently executing
283
+ COMPLETED = "completed" # Finished successfully
284
+ FAILED = "failed" # Finished with error
285
+ CANCELLED = "cancelled" # Cancelled by user
286
+ TIMEOUT = "timeout" # Exceeded timeout limit
287
+
288
+
289
+ class AgentRole(str, Enum):
290
+ """Specialist agent roles in the multi-agent research workflow.
291
+
292
+ Agent Responsibilities:
293
+ - SUPERVISOR: Orchestrates phase transitions, evaluates quality gates,
294
+ decides on iteration vs completion. The supervisor runs think-tool
295
+ pauses between phases to evaluate progress and adjust strategy.
296
+ - PLANNER: Decomposes the original query into focused sub-queries,
297
+ generates the research brief, and identifies key themes to explore.
298
+ - GATHERER: Executes parallel search across providers, handles rate
299
+ limiting, deduplicates sources, and validates source quality.
300
+ - ANALYZER: Extracts findings from sources, assesses evidence quality,
301
+ identifies contradictions, and rates source reliability.
302
+ - SYNTHESIZER: Generates coherent report sections, ensures logical
303
+ flow, integrates findings, and produces the final synthesis.
304
+ - REFINER: Identifies knowledge gaps, generates follow-up queries,
305
+ determines if additional iteration is needed, and prioritizes gaps.
306
+ """
307
+
308
+ SUPERVISOR = "supervisor"
309
+ PLANNER = "planner"
310
+ GATHERER = "gatherer"
311
+ ANALYZER = "analyzer"
312
+ SYNTHESIZER = "synthesizer"
313
+ REFINER = "refiner"
314
+
315
+
316
+ # Mapping from workflow phases to specialist agents
317
+ PHASE_TO_AGENT: dict[DeepResearchPhase, AgentRole] = {
318
+ DeepResearchPhase.PLANNING: AgentRole.PLANNER,
319
+ DeepResearchPhase.GATHERING: AgentRole.GATHERER,
320
+ DeepResearchPhase.ANALYSIS: AgentRole.ANALYZER,
321
+ DeepResearchPhase.SYNTHESIS: AgentRole.SYNTHESIZER,
322
+ DeepResearchPhase.REFINEMENT: AgentRole.REFINER,
323
+ }
324
+
325
+
326
+ @dataclass
327
+ class AgentDecision:
328
+ """Records a decision made by an agent during workflow execution.
329
+
330
+ Used for traceability and debugging. Each decision captures:
331
+ - Which agent made the decision
332
+ - What action was taken
333
+ - The rationale behind the decision
334
+ - Inputs provided to the agent
335
+ - Outputs produced (if any)
336
+ - Timestamp for ordering
337
+
338
+ Handoff Protocol:
339
+ - Inputs: The context passed to the agent (query, state summary, etc.)
340
+ - Outputs: The results produced (sub-queries, findings, report sections)
341
+ - The supervisor evaluates outputs before proceeding to next phase
342
+ """
343
+
344
+ agent: AgentRole
345
+ action: str # e.g., "decompose_query", "evaluate_phase", "decide_iteration"
346
+ rationale: str # Why this decision was made
347
+ inputs: dict[str, Any] # Context provided to the agent
348
+ outputs: Optional[dict[str, Any]] = None # Results produced
349
+ timestamp: datetime = dataclass_field(default_factory=datetime.utcnow)
350
+
351
+ def to_dict(self) -> dict[str, Any]:
352
+ """Convert to dictionary for JSON serialization."""
353
+ return {
354
+ "agent": self.agent.value,
355
+ "action": self.action,
356
+ "rationale": self.rationale,
357
+ "inputs": self.inputs,
358
+ "outputs": self.outputs,
359
+ "timestamp": self.timestamp.isoformat(),
360
+ }
361
+
362
+
363
+ class BackgroundTask:
364
+ """Tracks a background research task.
365
+
366
+ Supports both asyncio.Task-based and thread-based execution for
367
+ lifecycle management, cancellation support, and timeout handling.
368
+ """
369
+
370
+ def __init__(
371
+ self,
372
+ research_id: str,
373
+ task: Optional[asyncio.Task[WorkflowResult]] = None,
374
+ thread: Optional[threading.Thread] = None,
375
+ timeout: Optional[float] = None,
376
+ ) -> None:
377
+ """Initialize background task.
378
+
379
+ Args:
380
+ research_id: ID of the research session
381
+ task: Optional asyncio task running the workflow (legacy)
382
+ thread: Optional thread running the workflow (preferred)
383
+ timeout: Optional timeout in seconds
384
+ """
385
+ self.research_id = research_id
386
+ self.task = task
387
+ self.thread = thread
388
+ self.timeout = timeout
389
+ self.status = TaskStatus.RUNNING
390
+ self.started_at = time.time()
391
+ self.completed_at: Optional[float] = None
392
+ self.error: Optional[str] = None
393
+ self.result: Optional[WorkflowResult] = None
394
+ # Event for signaling cancellation to thread-based execution
395
+ self._cancel_event = threading.Event()
396
+
397
+ @property
398
+ def elapsed_ms(self) -> float:
399
+ """Get elapsed time in milliseconds."""
400
+ end = self.completed_at or time.time()
401
+ return (end - self.started_at) * 1000
402
+
403
+ @property
404
+ def is_timed_out(self) -> bool:
405
+ """Check if task has exceeded timeout."""
406
+ if self.timeout is None:
407
+ return False
408
+ return (time.time() - self.started_at) > self.timeout
409
+
410
+ @property
411
+ def is_cancelled(self) -> bool:
412
+ """Check if cancellation has been requested."""
413
+ return self._cancel_event.is_set()
414
+
415
+ @property
416
+ def is_done(self) -> bool:
417
+ """Check if the task is done (for both thread and asyncio modes).
418
+
419
+ Returns:
420
+ True if the task has completed, False if still running.
421
+ """
422
+ if self.thread is not None:
423
+ return not self.thread.is_alive()
424
+ elif self.task is not None:
425
+ return self.task.done()
426
+ # Neither thread nor task - consider done (shouldn't happen)
427
+ return True
428
+
429
+ def cancel(self) -> bool:
430
+ """Cancel the task.
431
+
432
+ Returns:
433
+ True if cancellation was requested, False if already done
434
+ """
435
+ # Handle thread-based execution
436
+ if self.thread is not None:
437
+ if not self.thread.is_alive():
438
+ return False
439
+ self._cancel_event.set()
440
+ # Give thread a chance to clean up
441
+ self.thread.join(timeout=5.0)
442
+ self.status = TaskStatus.CANCELLED
443
+ self.completed_at = time.time()
444
+ return True
445
+ # Handle asyncio-based execution (legacy)
446
+ elif self.task is not None:
447
+ if self.task.done():
448
+ return False
449
+ self.task.cancel()
450
+ self.status = TaskStatus.CANCELLED
451
+ self.completed_at = time.time()
452
+ return True
453
+ return False
454
+
455
+ def mark_completed(self, result: WorkflowResult) -> None:
456
+ """Mark task as completed with result."""
457
+ self.status = TaskStatus.COMPLETED if result.success else TaskStatus.FAILED
458
+ self.result = result
459
+ self.completed_at = time.time()
460
+ if not result.success:
461
+ self.error = result.error
462
+
463
+ def mark_timeout(self) -> None:
464
+ """Mark task as timed out."""
465
+ self.status = TaskStatus.TIMEOUT
466
+ self.completed_at = time.time()
467
+ self.error = f"Task exceeded timeout of {self.timeout}s"
468
+ # Signal cancellation
469
+ if self.thread is not None:
470
+ self._cancel_event.set()
471
+ elif self.task is not None:
472
+ self.task.cancel()
473
+
474
+
475
+ # =============================================================================
476
+ # Supervisor Hooks (Multi-Agent Orchestration)
477
+ # =============================================================================
478
+
479
+
480
+ class SupervisorHooks:
481
+ """Hooks for multi-agent supervisor orchestration.
482
+
483
+ Allows external orchestrators to inject behavior at key workflow
484
+ points, enabling think-tool pauses, agent handoffs, and custom
485
+ routing logic.
486
+ """
487
+
488
+ def __init__(self) -> None:
489
+ """Initialize with no-op defaults."""
490
+ self._on_phase_start: Optional[Callable[[DeepResearchState], None]] = None
491
+ self._on_phase_complete: Optional[Callable[[DeepResearchState], None]] = None
492
+ self._on_think_pause: Optional[Callable[[DeepResearchState, str], str]] = None
493
+ self._on_agent_handoff: Optional[Callable[[str, dict], dict]] = None
494
+
495
+ def on_phase_start(self, callback: Callable[[DeepResearchState], None]) -> None:
496
+ """Register callback for phase start events."""
497
+ self._on_phase_start = callback
498
+
499
+ def on_phase_complete(self, callback: Callable[[DeepResearchState], None]) -> None:
500
+ """Register callback for phase completion events."""
501
+ self._on_phase_complete = callback
502
+
503
+ def on_think_pause(self, callback: Callable[[DeepResearchState, str], str]) -> None:
504
+ """Register callback for think-tool pauses.
505
+
506
+ The callback receives the current state and a reflection prompt,
507
+ and should return guidance for the next step.
508
+ """
509
+ self._on_think_pause = callback
510
+
511
+ def on_agent_handoff(self, callback: Callable[[str, dict], dict]) -> None:
512
+ """Register callback for agent handoffs.
513
+
514
+ The callback receives the target agent name and context dict,
515
+ and should return the agent's response.
516
+ """
517
+ self._on_agent_handoff = callback
518
+
519
+ def emit_phase_start(self, state: DeepResearchState) -> None:
520
+ """Emit phase start event."""
521
+ if self._on_phase_start:
522
+ try:
523
+ self._on_phase_start(state)
524
+ except Exception as exc:
525
+ logger.error("Phase start hook failed: %s", exc)
526
+
527
+ def emit_phase_complete(self, state: DeepResearchState) -> None:
528
+ """Emit phase complete event."""
529
+ if self._on_phase_complete:
530
+ try:
531
+ self._on_phase_complete(state)
532
+ except Exception as exc:
533
+ logger.error("Phase complete hook failed: %s", exc)
534
+
535
+ def think_pause(self, state: DeepResearchState, prompt: str) -> Optional[str]:
536
+ """Execute think pause if callback registered."""
537
+ if self._on_think_pause:
538
+ try:
539
+ return self._on_think_pause(state, prompt)
540
+ except Exception as exc:
541
+ logger.error("Think pause hook failed: %s", exc)
542
+ return None
543
+
544
+ def agent_handoff(self, agent: str, context: dict) -> Optional[dict]:
545
+ """Execute agent handoff if callback registered."""
546
+ if self._on_agent_handoff:
547
+ try:
548
+ return self._on_agent_handoff(agent, context)
549
+ except Exception as exc:
550
+ logger.error("Agent handoff hook failed: %s", exc)
551
+ return None
552
+
553
+
554
+ # =============================================================================
555
+ # Supervisor Orchestrator
556
+ # =============================================================================
557
+
558
+
559
+ class SupervisorOrchestrator:
560
+ """Coordinates specialist agents and manages phase transitions.
561
+
562
+ The supervisor is responsible for:
563
+ 1. Deciding which specialist agent to dispatch for each phase
564
+ 2. Evaluating phase completion quality before proceeding
565
+ 3. Inserting think-tool pauses for reflection and strategy adjustment
566
+ 4. Recording all decisions for traceability
567
+ 5. Managing iteration vs completion decisions
568
+
569
+ The orchestrator integrates with SupervisorHooks to allow external
570
+ customization of decision logic (e.g., via LLM-based evaluation).
571
+
572
+ Phase Dispatch Flow:
573
+ ```
574
+ SUPERVISOR -> evaluate context -> dispatch to PLANNER
575
+ -> think pause (evaluate planning quality)
576
+ -> dispatch to GATHERER
577
+ -> think pause (evaluate source quality)
578
+ -> dispatch to ANALYZER
579
+ -> think pause (evaluate findings)
580
+ -> dispatch to SYNTHESIZER
581
+ -> think pause (evaluate report)
582
+ -> decide: complete OR dispatch to REFINER
583
+ ```
584
+ """
585
+
586
+ def __init__(self) -> None:
587
+ """Initialize the supervisor orchestrator."""
588
+ self._decisions: list[AgentDecision] = []
589
+
590
+ def dispatch_to_agent(
591
+ self,
592
+ state: DeepResearchState,
593
+ phase: DeepResearchPhase,
594
+ ) -> AgentDecision:
595
+ """Dispatch work to the appropriate specialist agent for a phase.
596
+
597
+ Args:
598
+ state: Current research state
599
+ phase: The phase to execute
600
+
601
+ Returns:
602
+ AgentDecision recording the dispatch
603
+ """
604
+ agent = PHASE_TO_AGENT.get(phase, AgentRole.SUPERVISOR)
605
+ inputs = self._build_agent_inputs(state, phase)
606
+
607
+ decision = AgentDecision(
608
+ agent=agent,
609
+ action=f"execute_{phase.value}",
610
+ rationale=f"Phase {phase.value} requires {agent.value} specialist",
611
+ inputs=inputs,
612
+ )
613
+
614
+ self._decisions.append(decision)
615
+ return decision
616
+
617
+ def _build_agent_inputs(
618
+ self,
619
+ state: DeepResearchState,
620
+ phase: DeepResearchPhase,
621
+ ) -> dict[str, Any]:
622
+ """Build the input context for a specialist agent.
623
+
624
+ Handoff inputs vary by phase:
625
+ - PLANNING: original query, system prompt
626
+ - GATHERING: sub-queries, source types, rate limits
627
+ - ANALYSIS: sources, findings so far
628
+ - SYNTHESIS: findings, gaps, iteration count
629
+ - REFINEMENT: gaps, remaining iterations, report draft
630
+ """
631
+ base_inputs = {
632
+ "research_id": state.id,
633
+ "original_query": state.original_query,
634
+ "current_phase": phase.value,
635
+ "iteration": state.iteration,
636
+ }
637
+
638
+ if phase == DeepResearchPhase.PLANNING:
639
+ return {
640
+ **base_inputs,
641
+ "system_prompt": state.system_prompt,
642
+ "max_sub_queries": state.max_sub_queries,
643
+ }
644
+ elif phase == DeepResearchPhase.GATHERING:
645
+ return {
646
+ **base_inputs,
647
+ "sub_queries": [q.query for q in state.pending_sub_queries()],
648
+ "source_types": [st.value for st in state.source_types],
649
+ "max_sources_per_query": state.max_sources_per_query,
650
+ }
651
+ elif phase == DeepResearchPhase.ANALYSIS:
652
+ return {
653
+ **base_inputs,
654
+ "source_count": len(state.sources),
655
+ "high_quality_sources": len(
656
+ [s for s in state.sources if s.quality == SourceQuality.HIGH]
657
+ ),
658
+ }
659
+ elif phase == DeepResearchPhase.SYNTHESIS:
660
+ return {
661
+ **base_inputs,
662
+ "finding_count": len(state.findings),
663
+ "gap_count": len(state.gaps),
664
+ "has_research_brief": state.research_brief is not None,
665
+ }
666
+ elif phase == DeepResearchPhase.REFINEMENT:
667
+ return {
668
+ **base_inputs,
669
+ "gaps": [g.description for g in state.gaps if not g.resolved],
670
+ "remaining_iterations": state.max_iterations - state.iteration,
671
+ "has_report_draft": state.report is not None,
672
+ }
673
+ return base_inputs
674
+
675
+ def evaluate_phase_completion(
676
+ self,
677
+ state: DeepResearchState,
678
+ phase: DeepResearchPhase,
679
+ ) -> AgentDecision:
680
+ """Supervisor evaluates whether a phase completed successfully.
681
+
682
+ This is the think-tool pause where the supervisor reflects on
683
+ the phase's outputs and decides whether to proceed.
684
+
685
+ Args:
686
+ state: Current research state (after phase execution)
687
+ phase: The phase that just completed
688
+
689
+ Returns:
690
+ AgentDecision with evaluation and proceed/retry rationale
691
+ """
692
+ evaluation = self._evaluate_phase_quality(state, phase)
693
+
694
+ decision = AgentDecision(
695
+ agent=AgentRole.SUPERVISOR,
696
+ action="evaluate_phase",
697
+ rationale=evaluation["rationale"],
698
+ inputs={
699
+ "phase": phase.value,
700
+ "iteration": state.iteration,
701
+ },
702
+ outputs=evaluation,
703
+ )
704
+
705
+ self._decisions.append(decision)
706
+ return decision
707
+
708
+ def _evaluate_phase_quality(
709
+ self,
710
+ state: DeepResearchState,
711
+ phase: DeepResearchPhase,
712
+ ) -> dict[str, Any]:
713
+ """Evaluate the quality of a completed phase.
714
+
715
+ Returns metrics and a proceed/retry recommendation.
716
+ """
717
+ if phase == DeepResearchPhase.PLANNING:
718
+ sub_query_count = len(state.sub_queries)
719
+ quality_ok = sub_query_count >= 2 # At least 2 sub-queries
720
+ return {
721
+ "sub_query_count": sub_query_count,
722
+ "has_research_brief": state.research_brief is not None,
723
+ "quality_ok": quality_ok,
724
+ "rationale": (
725
+ f"Planning produced {sub_query_count} sub-queries. "
726
+ f"{'Sufficient' if quality_ok else 'Insufficient'} for gathering."
727
+ ),
728
+ }
729
+
730
+ elif phase == DeepResearchPhase.GATHERING:
731
+ source_count = len(state.sources)
732
+ quality_ok = source_count >= 3 # At least 3 sources
733
+ return {
734
+ "source_count": source_count,
735
+ "quality_ok": quality_ok,
736
+ "rationale": (
737
+ f"Gathering collected {source_count} sources. "
738
+ f"{'Sufficient' if quality_ok else 'May need more sources'}."
739
+ ),
740
+ }
741
+
742
+ elif phase == DeepResearchPhase.ANALYSIS:
743
+ finding_count = len(state.findings)
744
+ high_confidence = len(
745
+ [f for f in state.findings if f.confidence == ConfidenceLevel.HIGH]
746
+ )
747
+ quality_ok = finding_count >= 2
748
+ return {
749
+ "finding_count": finding_count,
750
+ "high_confidence_count": high_confidence,
751
+ "quality_ok": quality_ok,
752
+ "rationale": (
753
+ f"Analysis extracted {finding_count} findings "
754
+ f"({high_confidence} high confidence). "
755
+ f"{'Ready for synthesis' if quality_ok else 'May need more analysis'}."
756
+ ),
757
+ }
758
+
759
+ elif phase == DeepResearchPhase.SYNTHESIS:
760
+ has_report = state.report is not None
761
+ report_length = len(state.report) if state.report else 0
762
+ quality_ok = has_report and report_length > 100
763
+ return {
764
+ "has_report": has_report,
765
+ "report_length": report_length,
766
+ "quality_ok": quality_ok,
767
+ "rationale": (
768
+ f"Synthesis {'produced' if has_report else 'failed to produce'} report "
769
+ f"({report_length} chars). "
770
+ f"{'Complete' if quality_ok else 'May need refinement'}."
771
+ ),
772
+ }
773
+
774
+ elif phase == DeepResearchPhase.REFINEMENT:
775
+ unaddressed_gaps = len([g for g in state.gaps if not g.resolved])
776
+ can_iterate = state.iteration < state.max_iterations
777
+ should_iterate = unaddressed_gaps > 0 and can_iterate
778
+ return {
779
+ "unaddressed_gaps": unaddressed_gaps,
780
+ "iteration": state.iteration,
781
+ "max_iterations": state.max_iterations,
782
+ "should_iterate": should_iterate,
783
+ "rationale": (
784
+ f"Refinement found {unaddressed_gaps} gaps. "
785
+ f"{'Will iterate' if should_iterate else 'Completing'} "
786
+ f"(iteration {state.iteration}/{state.max_iterations})."
787
+ ),
788
+ }
789
+
790
+ return {"rationale": f"Phase {phase.value} completed", "quality_ok": True}
791
+
792
+ def decide_iteration(self, state: DeepResearchState) -> AgentDecision:
793
+ """Supervisor decides whether to iterate or complete.
794
+
795
+ Called after synthesis to determine if refinement is needed.
796
+
797
+ Args:
798
+ state: Current research state
799
+
800
+ Returns:
801
+ AgentDecision with iterate vs complete decision
802
+ """
803
+ unaddressed_gaps = [g for g in state.gaps if not g.resolved]
804
+ can_iterate = state.iteration < state.max_iterations
805
+ should_iterate = len(unaddressed_gaps) > 0 and can_iterate
806
+
807
+ decision = AgentDecision(
808
+ agent=AgentRole.SUPERVISOR,
809
+ action="decide_iteration",
810
+ rationale=(
811
+ f"{'Iterating' if should_iterate else 'Completing'}: "
812
+ f"{len(unaddressed_gaps)} gaps, "
813
+ f"iteration {state.iteration}/{state.max_iterations}"
814
+ ),
815
+ inputs={
816
+ "gap_count": len(unaddressed_gaps),
817
+ "iteration": state.iteration,
818
+ "max_iterations": state.max_iterations,
819
+ },
820
+ outputs={
821
+ "should_iterate": should_iterate,
822
+ "next_phase": (
823
+ DeepResearchPhase.REFINEMENT.value
824
+ if should_iterate
825
+ else "COMPLETED"
826
+ ),
827
+ },
828
+ )
829
+
830
+ self._decisions.append(decision)
831
+ return decision
832
+
833
+ def record_to_state(self, state: DeepResearchState) -> None:
834
+ """Record all decisions to the state's metadata for persistence.
835
+
836
+ Args:
837
+ state: Research state to update
838
+ """
839
+ if "agent_decisions" not in state.metadata:
840
+ state.metadata["agent_decisions"] = []
841
+
842
+ state.metadata["agent_decisions"].extend(
843
+ [d.to_dict() for d in self._decisions]
844
+ )
845
+ self._decisions.clear()
846
+
847
+ def get_reflection_prompt(self, state: DeepResearchState, phase: DeepResearchPhase) -> str:
848
+ """Generate a reflection prompt for the supervisor think pause.
849
+
850
+ Args:
851
+ state: Current research state
852
+ phase: Phase that just completed
853
+
854
+ Returns:
855
+ Prompt for supervisor reflection
856
+ """
857
+ prompts = {
858
+ DeepResearchPhase.PLANNING: (
859
+ f"Planning complete. Generated {len(state.sub_queries)} sub-queries. "
860
+ f"Research brief: {bool(state.research_brief)}. "
861
+ "Evaluate: Are sub-queries comprehensive? Any gaps in coverage?"
862
+ ),
863
+ DeepResearchPhase.GATHERING: (
864
+ f"Gathering complete. Collected {len(state.sources)} sources. "
865
+ f"Evaluate: Is source diversity sufficient? Quality distribution?"
866
+ ),
867
+ DeepResearchPhase.ANALYSIS: (
868
+ f"Analysis complete. Extracted {len(state.findings)} findings, "
869
+ f"identified {len(state.gaps)} gaps. "
870
+ "Evaluate: Are findings well-supported? Critical gaps?"
871
+ ),
872
+ DeepResearchPhase.SYNTHESIS: (
873
+ f"Synthesis complete. Report: {len(state.report or '')} chars. "
874
+ f"Iteration {state.iteration}/{state.max_iterations}. "
875
+ "Evaluate: Report quality? Need refinement?"
876
+ ),
877
+ DeepResearchPhase.REFINEMENT: (
878
+ f"Refinement complete. Gaps addressed: "
879
+ f"{len([g for g in state.gaps if g.resolved])}/{len(state.gaps)}. "
880
+ "Evaluate: Continue iterating or finalize?"
881
+ ),
882
+ }
883
+ return prompts.get(phase, f"Phase {phase.value} complete. Evaluate progress.")
884
+
885
+
886
+ # =============================================================================
887
+ # Deep Research Workflow
888
+ # =============================================================================
889
+
890
+
891
+ class DeepResearchWorkflow(ResearchWorkflowBase):
892
+ """Multi-phase deep research workflow with background execution.
893
+
894
+ Supports:
895
+ - Async execution with immediate research_id return
896
+ - Status polling while research runs in background
897
+ - Cancellation and timeout handling
898
+ - Multi-agent supervisor hooks
899
+ - Session persistence for resume capability
900
+
901
+ Workflow Phases:
902
+ 1. PLANNING - Decompose query into sub-queries
903
+ 2. GATHERING - Execute sub-queries in parallel
904
+ 3. ANALYSIS - Extract findings and assess quality
905
+ 4. SYNTHESIS - Generate comprehensive report
906
+ 5. REFINEMENT - Identify gaps and iterate if needed
907
+ """
908
+
909
+ # Class-level task registry for background task tracking
910
+ _tasks: WeakValueDictionary[str, BackgroundTask] = WeakValueDictionary()
911
+
912
+ def __init__(
913
+ self,
914
+ config: ResearchConfig,
915
+ memory: Optional[ResearchMemory] = None,
916
+ hooks: Optional[SupervisorHooks] = None,
917
+ ) -> None:
918
+ """Initialize deep research workflow.
919
+
920
+ Args:
921
+ config: Research configuration
922
+ memory: Optional memory instance for persistence
923
+ hooks: Optional supervisor hooks for orchestration
924
+ """
925
+ super().__init__(config, memory)
926
+ self.hooks = hooks or SupervisorHooks()
927
+ self.orchestrator = SupervisorOrchestrator()
928
+ self._search_providers: dict[str, SearchProvider] = {}
929
+
930
+ def _audit_enabled(self) -> bool:
931
+ """Return True if audit artifacts are enabled."""
932
+ return bool(getattr(self.config, "deep_research_audit_artifacts", True))
933
+
934
+ def _audit_path(self, research_id: str) -> Path:
935
+ """Resolve audit artifact path for a research session."""
936
+ storage_path = self.config.get_storage_path()
937
+ return storage_path / "deep_research" / f"{research_id}.audit.jsonl"
938
+
939
+ def _write_audit_event(
940
+ self,
941
+ state: Optional[DeepResearchState],
942
+ event_type: str,
943
+ data: Optional[dict[str, Any]] = None,
944
+ level: str = "info",
945
+ ) -> None:
946
+ """Write a JSONL audit event for deep research observability."""
947
+ if not self._audit_enabled():
948
+ return
949
+
950
+ research_id = state.id if state else None
951
+ payload = {
952
+ "timestamp": datetime.utcnow().isoformat() + "Z",
953
+ "event_id": uuid4().hex,
954
+ "event_type": event_type,
955
+ "level": level,
956
+ "research_id": research_id,
957
+ "phase": state.phase.value if state else None,
958
+ "iteration": state.iteration if state else None,
959
+ "data": data or {},
960
+ }
961
+
962
+ try:
963
+ if research_id is None:
964
+ return
965
+ path = self._audit_path(research_id)
966
+ path.parent.mkdir(parents=True, exist_ok=True)
967
+ with path.open("a", encoding="utf-8") as handle:
968
+ handle.write(json.dumps(payload, ensure_ascii=True))
969
+ handle.write("\n")
970
+ except Exception as exc:
971
+ logger.error("Failed to write audit event: %s", exc)
972
+ # Fallback to stderr for crash visibility
973
+ print(
974
+ f"AUDIT_FALLBACK: {event_type} for {research_id} - {exc}",
975
+ file=sys.stderr,
976
+ flush=True,
977
+ )
978
+
979
+ def _record_workflow_error(
980
+ self,
981
+ error: Exception,
982
+ state: DeepResearchState,
983
+ context: str,
984
+ ) -> None:
985
+ """Record error to the persistent error store.
986
+
987
+ Args:
988
+ error: The exception that occurred
989
+ state: Current research state
990
+ context: Context string (e.g., "background_task", "orchestrator")
991
+ """
992
+ try:
993
+ error_store = FileErrorStore(Path.home() / ".foundry-mcp" / "errors")
994
+ record = ErrorRecord(
995
+ id=f"err_{uuid4().hex[:12]}",
996
+ fingerprint=f"deep-research:{context}:{type(error).__name__}",
997
+ error_code="WORKFLOW_ERROR",
998
+ error_type="internal",
999
+ tool_name=f"deep-research:{context}",
1000
+ correlation_id=state.id,
1001
+ message=str(error),
1002
+ exception_type=type(error).__name__,
1003
+ stack_trace=traceback.format_exc(),
1004
+ input_summary={
1005
+ "research_id": state.id,
1006
+ "phase": state.phase.value,
1007
+ "iteration": state.iteration,
1008
+ },
1009
+ )
1010
+ error_store.append(record)
1011
+ except Exception as store_err:
1012
+ logger.error("Failed to record error to store: %s", store_err)
1013
+
1014
+ def _safe_orchestrator_transition(
1015
+ self,
1016
+ state: DeepResearchState,
1017
+ phase: DeepResearchPhase,
1018
+ ) -> None:
1019
+ """Safely execute orchestrator phase transition with error logging.
1020
+
1021
+ This wraps orchestrator calls with exception handling to ensure any
1022
+ failures are properly logged and recorded before re-raising.
1023
+
1024
+ Args:
1025
+ state: Current research state
1026
+ phase: The phase that just completed
1027
+
1028
+ Raises:
1029
+ Exception: Re-raises any exception after logging
1030
+ """
1031
+ try:
1032
+ self.orchestrator.evaluate_phase_completion(state, phase)
1033
+ prompt = self.orchestrator.get_reflection_prompt(state, phase)
1034
+ self.hooks.think_pause(state, prompt)
1035
+ self.orchestrator.record_to_state(state)
1036
+ state.advance_phase()
1037
+ except Exception as exc:
1038
+ logger.exception(
1039
+ "Orchestrator transition failed for phase %s, research %s: %s",
1040
+ phase.value,
1041
+ state.id,
1042
+ exc,
1043
+ )
1044
+ self._write_audit_event(
1045
+ state,
1046
+ "orchestrator_error",
1047
+ data={
1048
+ "phase": phase.value,
1049
+ "error": str(exc),
1050
+ "traceback": traceback.format_exc(),
1051
+ },
1052
+ level="error",
1053
+ )
1054
+ self._record_workflow_error(exc, state, f"orchestrator_{phase.value}")
1055
+ raise # Re-raise to be caught by workflow exception handler
1056
+
1057
+ # =========================================================================
1058
+ # Public API
1059
+ # =========================================================================
1060
+
1061
+ def execute(
1062
+ self,
1063
+ query: Optional[str] = None,
1064
+ research_id: Optional[str] = None,
1065
+ action: str = "start",
1066
+ provider_id: Optional[str] = None,
1067
+ system_prompt: Optional[str] = None,
1068
+ max_iterations: int = 3,
1069
+ max_sub_queries: int = 5,
1070
+ max_sources_per_query: int = 5,
1071
+ follow_links: bool = True,
1072
+ timeout_per_operation: float = 120.0,
1073
+ max_concurrent: int = 3,
1074
+ background: bool = False,
1075
+ task_timeout: Optional[float] = None,
1076
+ **kwargs: Any,
1077
+ ) -> WorkflowResult:
1078
+ """Execute deep research workflow.
1079
+
1080
+ Actions:
1081
+ - start: Begin new research session
1082
+ - continue: Resume existing session
1083
+ - status: Get current status
1084
+ - report: Get final report
1085
+ - cancel: Cancel running task
1086
+
1087
+ Args:
1088
+ query: Research query (required for 'start')
1089
+ research_id: Session ID (required for continue/status/report/cancel)
1090
+ action: One of 'start', 'continue', 'status', 'report', 'cancel'
1091
+ provider_id: Provider for LLM operations
1092
+ system_prompt: Optional custom system prompt
1093
+ max_iterations: Maximum refinement iterations (default: 3)
1094
+ max_sub_queries: Maximum sub-queries to generate (default: 5)
1095
+ max_sources_per_query: Maximum sources per query (default: 5)
1096
+ follow_links: Whether to extract content from URLs (default: True)
1097
+ timeout_per_operation: Timeout per operation in seconds (default: 30)
1098
+ max_concurrent: Maximum concurrent operations (default: 3)
1099
+ background: Run in background, return immediately (default: False)
1100
+ task_timeout: Overall timeout for background task (optional)
1101
+
1102
+ Returns:
1103
+ WorkflowResult with research state or error
1104
+ """
1105
+ if action == "start":
1106
+ return self._start_research(
1107
+ query=query,
1108
+ provider_id=provider_id,
1109
+ system_prompt=system_prompt,
1110
+ max_iterations=max_iterations,
1111
+ max_sub_queries=max_sub_queries,
1112
+ max_sources_per_query=max_sources_per_query,
1113
+ follow_links=follow_links,
1114
+ timeout_per_operation=timeout_per_operation,
1115
+ max_concurrent=max_concurrent,
1116
+ background=background,
1117
+ task_timeout=task_timeout,
1118
+ )
1119
+ elif action == "continue":
1120
+ return self._continue_research(
1121
+ research_id=research_id,
1122
+ provider_id=provider_id,
1123
+ timeout_per_operation=timeout_per_operation,
1124
+ max_concurrent=max_concurrent,
1125
+ background=background,
1126
+ task_timeout=task_timeout,
1127
+ )
1128
+ elif action == "status":
1129
+ return self._get_status(research_id=research_id)
1130
+ elif action == "report":
1131
+ return self._get_report(research_id=research_id)
1132
+ elif action == "cancel":
1133
+ return self._cancel_research(research_id=research_id)
1134
+ else:
1135
+ return WorkflowResult(
1136
+ success=False,
1137
+ content="",
1138
+ error=f"Unknown action '{action}'. Use: start, continue, status, report, cancel",
1139
+ )
1140
+
1141
+ # =========================================================================
1142
+ # Background Task Management
1143
+ # =========================================================================
1144
+
1145
+ def _start_background_task(
1146
+ self,
1147
+ state: DeepResearchState,
1148
+ provider_id: Optional[str],
1149
+ timeout_per_operation: float,
1150
+ max_concurrent: int,
1151
+ task_timeout: Optional[float],
1152
+ ) -> WorkflowResult:
1153
+ """Start research as a background task using a daemon thread.
1154
+
1155
+ Returns immediately with research_id. The actual workflow
1156
+ runs in a daemon thread using asyncio.run().
1157
+
1158
+ This approach works correctly from sync MCP tool handlers where
1159
+ there is no running event loop.
1160
+ """
1161
+ # Create BackgroundTask tracking structure first
1162
+ bg_task = BackgroundTask(
1163
+ research_id=state.id,
1164
+ timeout=task_timeout,
1165
+ )
1166
+ self._tasks[state.id] = bg_task
1167
+
1168
+ # Register session for crash handler visibility
1169
+ _active_research_sessions[state.id] = state
1170
+
1171
+ # Reference to self for use in thread
1172
+ workflow = self
1173
+
1174
+ def run_in_thread() -> None:
1175
+ """Thread target that runs the async workflow."""
1176
+ try:
1177
+ async def run_workflow() -> WorkflowResult:
1178
+ """Execute the full workflow asynchronously."""
1179
+ try:
1180
+ coro = workflow._execute_workflow_async(
1181
+ state=state,
1182
+ provider_id=provider_id,
1183
+ timeout_per_operation=timeout_per_operation,
1184
+ max_concurrent=max_concurrent,
1185
+ )
1186
+ if task_timeout:
1187
+ return await asyncio.wait_for(coro, timeout=task_timeout)
1188
+ return await coro
1189
+ except asyncio.CancelledError:
1190
+ state.metadata["cancelled"] = True
1191
+ workflow.memory.save_deep_research(state)
1192
+ workflow._write_audit_event(
1193
+ state,
1194
+ "workflow_cancelled",
1195
+ data={"cancelled": True},
1196
+ level="warning",
1197
+ )
1198
+ return WorkflowResult(
1199
+ success=False,
1200
+ content="",
1201
+ error="Research was cancelled",
1202
+ metadata={"research_id": state.id, "cancelled": True},
1203
+ )
1204
+ except asyncio.TimeoutError:
1205
+ state.metadata["timeout"] = True
1206
+ state.metadata["abort_phase"] = state.phase.value
1207
+ state.metadata["abort_iteration"] = state.iteration
1208
+ workflow.memory.save_deep_research(state)
1209
+ workflow._write_audit_event(
1210
+ state,
1211
+ "workflow_timeout",
1212
+ data={
1213
+ "timeout_seconds": task_timeout,
1214
+ "abort_phase": state.phase.value,
1215
+ "abort_iteration": state.iteration,
1216
+ },
1217
+ level="warning",
1218
+ )
1219
+ return WorkflowResult(
1220
+ success=False,
1221
+ content="",
1222
+ error=f"Research timed out after {task_timeout}s",
1223
+ metadata={"research_id": state.id, "timeout": True},
1224
+ )
1225
+ except Exception as exc:
1226
+ logger.exception("Background workflow failed: %s", exc)
1227
+ workflow._write_audit_event(
1228
+ state,
1229
+ "workflow_error",
1230
+ data={"error": str(exc)},
1231
+ level="error",
1232
+ )
1233
+ return WorkflowResult(
1234
+ success=False,
1235
+ content="",
1236
+ error=str(exc),
1237
+ metadata={"research_id": state.id},
1238
+ )
1239
+
1240
+ # Run the async workflow in a new event loop
1241
+ result = asyncio.run(run_workflow())
1242
+
1243
+ # Handle completion
1244
+ if result.metadata and result.metadata.get("timeout"):
1245
+ bg_task.status = TaskStatus.TIMEOUT
1246
+ bg_task.result = result
1247
+ bg_task.completed_at = time.time()
1248
+ bg_task.error = result.error
1249
+ else:
1250
+ bg_task.mark_completed(result)
1251
+
1252
+ except Exception as exc:
1253
+ # Log the exception with full traceback
1254
+ logger.exception(
1255
+ "Background task failed for research %s: %s",
1256
+ state.id, exc
1257
+ )
1258
+ bg_task.status = TaskStatus.FAILED
1259
+ bg_task.error = str(exc)
1260
+ bg_task.completed_at = time.time()
1261
+ # Record to error store and audit (best effort)
1262
+ try:
1263
+ workflow._record_workflow_error(exc, state, "background_task")
1264
+ workflow._write_audit_event(
1265
+ state,
1266
+ "background_task_failed",
1267
+ data={
1268
+ "error": str(exc),
1269
+ "traceback": traceback.format_exc(),
1270
+ },
1271
+ level="error",
1272
+ )
1273
+ except Exception:
1274
+ pass # Already logged above
1275
+ finally:
1276
+ # Unregister from active sessions
1277
+ _active_research_sessions.pop(state.id, None)
1278
+
1279
+ # Create and start the daemon thread
1280
+ thread = threading.Thread(
1281
+ target=run_in_thread,
1282
+ name=f"deep-research-{state.id[:8]}",
1283
+ daemon=True, # Don't prevent process exit
1284
+ )
1285
+ bg_task.thread = thread
1286
+
1287
+ self._write_audit_event(
1288
+ state,
1289
+ "background_task_started",
1290
+ data={
1291
+ "task_timeout": task_timeout,
1292
+ "timeout_per_operation": timeout_per_operation,
1293
+ "max_concurrent": max_concurrent,
1294
+ "thread_name": thread.name,
1295
+ },
1296
+ )
1297
+
1298
+ thread.start()
1299
+
1300
+ return WorkflowResult(
1301
+ success=True,
1302
+ content=f"Research started in background: {state.id}",
1303
+ metadata={
1304
+ "research_id": state.id,
1305
+ "background": True,
1306
+ "phase": state.phase.value,
1307
+ },
1308
+ )
1309
+
1310
+ def get_background_task(self, research_id: str) -> Optional[BackgroundTask]:
1311
+ """Get a background task by research ID."""
1312
+ return self._tasks.get(research_id)
1313
+
1314
+ # =========================================================================
1315
+ # Action Handlers
1316
+ # =========================================================================
1317
+
1318
+ def _start_research(
1319
+ self,
1320
+ query: Optional[str],
1321
+ provider_id: Optional[str],
1322
+ system_prompt: Optional[str],
1323
+ max_iterations: int,
1324
+ max_sub_queries: int,
1325
+ max_sources_per_query: int,
1326
+ follow_links: bool,
1327
+ timeout_per_operation: float,
1328
+ max_concurrent: int,
1329
+ background: bool,
1330
+ task_timeout: Optional[float],
1331
+ ) -> WorkflowResult:
1332
+ """Start a new deep research session."""
1333
+ if not query:
1334
+ return WorkflowResult(
1335
+ success=False,
1336
+ content="",
1337
+ error="Query is required to start research",
1338
+ )
1339
+
1340
+ # Resolve per-phase providers and models from config
1341
+ # Supports ProviderSpec format: "[cli]gemini:pro" -> (provider_id, model)
1342
+ planning_pid, planning_model = self.config.resolve_phase_provider("planning")
1343
+ analysis_pid, analysis_model = self.config.resolve_phase_provider("analysis")
1344
+ synthesis_pid, synthesis_model = self.config.resolve_phase_provider("synthesis")
1345
+ refinement_pid, refinement_model = self.config.resolve_phase_provider("refinement")
1346
+
1347
+ # Create initial state with per-phase provider configuration
1348
+ state = DeepResearchState(
1349
+ original_query=query,
1350
+ max_iterations=max_iterations,
1351
+ max_sub_queries=max_sub_queries,
1352
+ max_sources_per_query=max_sources_per_query,
1353
+ follow_links=follow_links,
1354
+ research_mode=ResearchMode(self.config.deep_research_mode),
1355
+ system_prompt=system_prompt,
1356
+ # Per-phase providers: explicit provider_id overrides config
1357
+ planning_provider=provider_id or planning_pid,
1358
+ analysis_provider=provider_id or analysis_pid,
1359
+ synthesis_provider=provider_id or synthesis_pid,
1360
+ refinement_provider=provider_id or refinement_pid,
1361
+ # Per-phase models from ProviderSpec (only used if provider_id not overridden)
1362
+ planning_model=None if provider_id else planning_model,
1363
+ analysis_model=None if provider_id else analysis_model,
1364
+ synthesis_model=None if provider_id else synthesis_model,
1365
+ refinement_model=None if provider_id else refinement_model,
1366
+ )
1367
+
1368
+ # Save initial state
1369
+ self.memory.save_deep_research(state)
1370
+ self._write_audit_event(
1371
+ state,
1372
+ "workflow_start",
1373
+ data={
1374
+ "query": state.original_query,
1375
+ "config": {
1376
+ "max_iterations": max_iterations,
1377
+ "max_sub_queries": max_sub_queries,
1378
+ "max_sources_per_query": max_sources_per_query,
1379
+ "follow_links": follow_links,
1380
+ "timeout_per_operation": timeout_per_operation,
1381
+ "max_concurrent": max_concurrent,
1382
+ },
1383
+ "provider_id": provider_id,
1384
+ "background": background,
1385
+ "task_timeout": task_timeout,
1386
+ },
1387
+ )
1388
+
1389
+ if background:
1390
+ return self._start_background_task(
1391
+ state=state,
1392
+ provider_id=provider_id,
1393
+ timeout_per_operation=timeout_per_operation,
1394
+ max_concurrent=max_concurrent,
1395
+ task_timeout=task_timeout,
1396
+ )
1397
+
1398
+ # Synchronous execution
1399
+ try:
1400
+ loop = asyncio.get_event_loop()
1401
+ if loop.is_running():
1402
+ # Already in async context, run directly
1403
+ import concurrent.futures
1404
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1405
+ future = executor.submit(
1406
+ asyncio.run,
1407
+ self._execute_workflow_async(
1408
+ state=state,
1409
+ provider_id=provider_id,
1410
+ timeout_per_operation=timeout_per_operation,
1411
+ max_concurrent=max_concurrent,
1412
+ ),
1413
+ )
1414
+ return future.result()
1415
+ else:
1416
+ return loop.run_until_complete(
1417
+ self._execute_workflow_async(
1418
+ state=state,
1419
+ provider_id=provider_id,
1420
+ timeout_per_operation=timeout_per_operation,
1421
+ max_concurrent=max_concurrent,
1422
+ )
1423
+ )
1424
+ except RuntimeError:
1425
+ return asyncio.run(
1426
+ self._execute_workflow_async(
1427
+ state=state,
1428
+ provider_id=provider_id,
1429
+ timeout_per_operation=timeout_per_operation,
1430
+ max_concurrent=max_concurrent,
1431
+ )
1432
+ )
1433
+
1434
+ def _continue_research(
1435
+ self,
1436
+ research_id: Optional[str],
1437
+ provider_id: Optional[str],
1438
+ timeout_per_operation: float,
1439
+ max_concurrent: int,
1440
+ background: bool = False,
1441
+ task_timeout: Optional[float] = None,
1442
+ ) -> WorkflowResult:
1443
+ """Continue an existing research session.
1444
+
1445
+ Args:
1446
+ research_id: ID of the research session to continue
1447
+ provider_id: Optional provider ID for LLM calls
1448
+ timeout_per_operation: Timeout per operation in seconds
1449
+ max_concurrent: Maximum concurrent operations
1450
+ background: If True, run in background thread (default: False)
1451
+ task_timeout: Overall timeout for background task (optional)
1452
+
1453
+ Returns:
1454
+ WorkflowResult with research state or error
1455
+ """
1456
+ if not research_id:
1457
+ return WorkflowResult(
1458
+ success=False,
1459
+ content="",
1460
+ error="research_id is required to continue research",
1461
+ )
1462
+
1463
+ # Load existing state
1464
+ state = self.memory.load_deep_research(research_id)
1465
+ if state is None:
1466
+ return WorkflowResult(
1467
+ success=False,
1468
+ content="",
1469
+ error=f"Research session '{research_id}' not found",
1470
+ )
1471
+
1472
+ if state.completed_at is not None:
1473
+ return WorkflowResult(
1474
+ success=True,
1475
+ content=state.report or "Research already completed",
1476
+ metadata={
1477
+ "research_id": state.id,
1478
+ "phase": state.phase.value,
1479
+ "is_complete": True,
1480
+ },
1481
+ )
1482
+
1483
+ # Run in background if requested
1484
+ if background:
1485
+ return self._start_background_task(
1486
+ state=state,
1487
+ provider_id=provider_id,
1488
+ timeout_per_operation=timeout_per_operation,
1489
+ max_concurrent=max_concurrent,
1490
+ task_timeout=task_timeout,
1491
+ )
1492
+
1493
+ # Continue from current phase synchronously
1494
+ try:
1495
+ loop = asyncio.get_event_loop()
1496
+ if loop.is_running():
1497
+ # Already in async context, run in thread pool
1498
+ import concurrent.futures
1499
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1500
+ future = executor.submit(
1501
+ asyncio.run,
1502
+ self._execute_workflow_async(
1503
+ state=state,
1504
+ provider_id=provider_id,
1505
+ timeout_per_operation=timeout_per_operation,
1506
+ max_concurrent=max_concurrent,
1507
+ ),
1508
+ )
1509
+ return future.result()
1510
+ else:
1511
+ return loop.run_until_complete(
1512
+ self._execute_workflow_async(
1513
+ state=state,
1514
+ provider_id=provider_id,
1515
+ timeout_per_operation=timeout_per_operation,
1516
+ max_concurrent=max_concurrent,
1517
+ )
1518
+ )
1519
+ except RuntimeError:
1520
+ return asyncio.run(
1521
+ self._execute_workflow_async(
1522
+ state=state,
1523
+ provider_id=provider_id,
1524
+ timeout_per_operation=timeout_per_operation,
1525
+ max_concurrent=max_concurrent,
1526
+ )
1527
+ )
1528
+
1529
+ def _get_status(self, research_id: Optional[str]) -> WorkflowResult:
1530
+ """Get the current status of a research session."""
1531
+ if not research_id:
1532
+ return WorkflowResult(
1533
+ success=False,
1534
+ content="",
1535
+ error="research_id is required",
1536
+ )
1537
+
1538
+ # Check background task first
1539
+ bg_task = self.get_background_task(research_id)
1540
+ if bg_task:
1541
+ # Also load persisted state to get progress metrics
1542
+ state = self.memory.load_deep_research(research_id)
1543
+ metadata: dict[str, Any] = {
1544
+ "research_id": research_id,
1545
+ "task_status": bg_task.status.value,
1546
+ "elapsed_ms": bg_task.elapsed_ms,
1547
+ "is_complete": bg_task.is_done,
1548
+ }
1549
+ # Include progress from persisted state if available
1550
+ if state:
1551
+ metadata.update({
1552
+ "original_query": state.original_query,
1553
+ "phase": state.phase.value,
1554
+ "iteration": state.iteration,
1555
+ "max_iterations": state.max_iterations,
1556
+ "sub_queries_total": len(state.sub_queries),
1557
+ "sub_queries_completed": len(state.completed_sub_queries()),
1558
+ "source_count": len(state.sources),
1559
+ "finding_count": len(state.findings),
1560
+ "gap_count": len(state.unresolved_gaps()),
1561
+ "total_tokens_used": state.total_tokens_used,
1562
+ "is_failed": bool(state.metadata.get("failed")),
1563
+ "failure_error": state.metadata.get("failure_error"),
1564
+ })
1565
+ return WorkflowResult(
1566
+ success=True,
1567
+ content=f"Task status: {bg_task.status.value}",
1568
+ metadata=metadata,
1569
+ )
1570
+
1571
+ # Fall back to persisted state (task completed or not running)
1572
+ state = self.memory.load_deep_research(research_id)
1573
+ if state is None:
1574
+ return WorkflowResult(
1575
+ success=False,
1576
+ content="",
1577
+ error=f"Research session '{research_id}' not found",
1578
+ )
1579
+
1580
+ # Determine status string
1581
+ is_failed = bool(state.metadata.get("failed"))
1582
+ if is_failed:
1583
+ status_str = "Failed"
1584
+ elif state.completed_at:
1585
+ status_str = "Completed"
1586
+ else:
1587
+ status_str = "In Progress"
1588
+
1589
+ status_lines = [
1590
+ f"Research ID: {state.id}",
1591
+ f"Query: {state.original_query}",
1592
+ f"Phase: {state.phase.value}",
1593
+ f"Iteration: {state.iteration}/{state.max_iterations}",
1594
+ f"Sub-queries: {len(state.completed_sub_queries())}/{len(state.sub_queries)} completed",
1595
+ f"Sources: {len(state.sources)} examined",
1596
+ f"Findings: {len(state.findings)}",
1597
+ f"Gaps: {len(state.unresolved_gaps())} unresolved",
1598
+ f"Status: {status_str}",
1599
+ ]
1600
+ if state.metadata.get("timeout"):
1601
+ status_lines.append("Timeout: True")
1602
+ if state.metadata.get("cancelled"):
1603
+ status_lines.append("Cancelled: True")
1604
+ if is_failed:
1605
+ failure_error = state.metadata.get("failure_error", "Unknown error")
1606
+ status_lines.append(f"Error: {failure_error}")
1607
+
1608
+ return WorkflowResult(
1609
+ success=True,
1610
+ content="\n".join(status_lines),
1611
+ metadata={
1612
+ "research_id": state.id,
1613
+ "original_query": state.original_query,
1614
+ "phase": state.phase.value,
1615
+ "iteration": state.iteration,
1616
+ "max_iterations": state.max_iterations,
1617
+ "sub_queries_total": len(state.sub_queries),
1618
+ "sub_queries_completed": len(state.completed_sub_queries()),
1619
+ "source_count": len(state.sources),
1620
+ "finding_count": len(state.findings),
1621
+ "gap_count": len(state.unresolved_gaps()),
1622
+ "is_complete": state.completed_at is not None,
1623
+ "is_failed": is_failed,
1624
+ "failure_error": state.metadata.get("failure_error"),
1625
+ "total_tokens_used": state.total_tokens_used,
1626
+ "total_duration_ms": state.total_duration_ms,
1627
+ "timed_out": bool(state.metadata.get("timeout")),
1628
+ "cancelled": bool(state.metadata.get("cancelled")),
1629
+ },
1630
+ )
1631
+
1632
+ def _get_report(self, research_id: Optional[str]) -> WorkflowResult:
1633
+ """Get the final report from a research session."""
1634
+ if not research_id:
1635
+ return WorkflowResult(
1636
+ success=False,
1637
+ content="",
1638
+ error="research_id is required",
1639
+ )
1640
+
1641
+ state = self.memory.load_deep_research(research_id)
1642
+ if state is None:
1643
+ return WorkflowResult(
1644
+ success=False,
1645
+ content="",
1646
+ error=f"Research session '{research_id}' not found",
1647
+ )
1648
+
1649
+ if not state.report:
1650
+ return WorkflowResult(
1651
+ success=False,
1652
+ content="",
1653
+ error="Research report not yet generated",
1654
+ )
1655
+
1656
+ return WorkflowResult(
1657
+ success=True,
1658
+ content=state.report,
1659
+ metadata={
1660
+ "research_id": state.id,
1661
+ "original_query": state.original_query,
1662
+ "source_count": len(state.sources),
1663
+ "finding_count": len(state.findings),
1664
+ "iteration": state.iteration,
1665
+ "is_complete": state.completed_at is not None,
1666
+ },
1667
+ )
1668
+
1669
+ def _cancel_research(self, research_id: Optional[str]) -> WorkflowResult:
1670
+ """Cancel a running research task."""
1671
+ if not research_id:
1672
+ return WorkflowResult(
1673
+ success=False,
1674
+ content="",
1675
+ error="research_id is required",
1676
+ )
1677
+
1678
+ bg_task = self.get_background_task(research_id)
1679
+ if bg_task is None:
1680
+ return WorkflowResult(
1681
+ success=False,
1682
+ content="",
1683
+ error=f"No running task found for '{research_id}'",
1684
+ )
1685
+
1686
+ if bg_task.cancel():
1687
+ state = self.memory.load_deep_research(research_id)
1688
+ if state:
1689
+ self._write_audit_event(
1690
+ state,
1691
+ "workflow_cancelled",
1692
+ data={"cancelled": True},
1693
+ level="warning",
1694
+ )
1695
+ return WorkflowResult(
1696
+ success=True,
1697
+ content=f"Research '{research_id}' cancelled",
1698
+ metadata={"research_id": research_id, "cancelled": True},
1699
+ )
1700
+ else:
1701
+ return WorkflowResult(
1702
+ success=False,
1703
+ content="",
1704
+ error=f"Task '{research_id}' already completed",
1705
+ )
1706
+
1707
+ # =========================================================================
1708
+ # Async Workflow Execution
1709
+ # =========================================================================
1710
+
1711
+ async def _execute_workflow_async(
1712
+ self,
1713
+ state: DeepResearchState,
1714
+ provider_id: Optional[str],
1715
+ timeout_per_operation: float,
1716
+ max_concurrent: int,
1717
+ ) -> WorkflowResult:
1718
+ """Execute the full workflow asynchronously.
1719
+
1720
+ This is the main async entry point that orchestrates all phases.
1721
+ """
1722
+ start_time = time.perf_counter()
1723
+
1724
+ try:
1725
+ # Phase execution based on current state
1726
+ if state.phase == DeepResearchPhase.PLANNING:
1727
+ phase_started = time.perf_counter()
1728
+ self.hooks.emit_phase_start(state)
1729
+ self._write_audit_event(
1730
+ state,
1731
+ "phase_start",
1732
+ data={"phase": state.phase.value},
1733
+ )
1734
+ result = await self._execute_planning_async(
1735
+ state=state,
1736
+ provider_id=state.planning_provider,
1737
+ timeout=self.config.get_phase_timeout("planning"),
1738
+ )
1739
+ if not result.success:
1740
+ self._write_audit_event(
1741
+ state,
1742
+ "phase_error",
1743
+ data={"phase": state.phase.value, "error": result.error},
1744
+ level="error",
1745
+ )
1746
+ state.mark_failed(result.error or f"Phase {state.phase.value} failed")
1747
+ self.memory.save_deep_research(state)
1748
+ return result
1749
+ self.hooks.emit_phase_complete(state)
1750
+ self._write_audit_event(
1751
+ state,
1752
+ "phase_complete",
1753
+ data={
1754
+ "phase": state.phase.value,
1755
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1756
+ },
1757
+ )
1758
+ # Think pause: supervisor evaluates planning quality
1759
+ self._safe_orchestrator_transition(state, DeepResearchPhase.PLANNING)
1760
+
1761
+ if state.phase == DeepResearchPhase.GATHERING:
1762
+ phase_started = time.perf_counter()
1763
+ self.hooks.emit_phase_start(state)
1764
+ self._write_audit_event(
1765
+ state,
1766
+ "phase_start",
1767
+ data={"phase": state.phase.value},
1768
+ )
1769
+ result = await self._execute_gathering_async(
1770
+ state=state,
1771
+ provider_id=provider_id,
1772
+ timeout=timeout_per_operation,
1773
+ max_concurrent=max_concurrent,
1774
+ )
1775
+ if not result.success:
1776
+ self._write_audit_event(
1777
+ state,
1778
+ "phase_error",
1779
+ data={"phase": state.phase.value, "error": result.error},
1780
+ level="error",
1781
+ )
1782
+ state.mark_failed(result.error or f"Phase {state.phase.value} failed")
1783
+ self.memory.save_deep_research(state)
1784
+ return result
1785
+ self.hooks.emit_phase_complete(state)
1786
+ self._write_audit_event(
1787
+ state,
1788
+ "phase_complete",
1789
+ data={
1790
+ "phase": state.phase.value,
1791
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1792
+ },
1793
+ )
1794
+ # Think pause: supervisor evaluates gathering quality
1795
+ self._safe_orchestrator_transition(state, DeepResearchPhase.GATHERING)
1796
+
1797
+ if state.phase == DeepResearchPhase.ANALYSIS:
1798
+ phase_started = time.perf_counter()
1799
+ self.hooks.emit_phase_start(state)
1800
+ self._write_audit_event(
1801
+ state,
1802
+ "phase_start",
1803
+ data={"phase": state.phase.value},
1804
+ )
1805
+ result = await self._execute_analysis_async(
1806
+ state=state,
1807
+ provider_id=state.analysis_provider,
1808
+ timeout=self.config.get_phase_timeout("analysis"),
1809
+ )
1810
+ if not result.success:
1811
+ self._write_audit_event(
1812
+ state,
1813
+ "phase_error",
1814
+ data={"phase": state.phase.value, "error": result.error},
1815
+ level="error",
1816
+ )
1817
+ state.mark_failed(result.error or f"Phase {state.phase.value} failed")
1818
+ self.memory.save_deep_research(state)
1819
+ return result
1820
+ self.hooks.emit_phase_complete(state)
1821
+ self._write_audit_event(
1822
+ state,
1823
+ "phase_complete",
1824
+ data={
1825
+ "phase": state.phase.value,
1826
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1827
+ },
1828
+ )
1829
+ # Think pause: supervisor evaluates analysis quality
1830
+ self._safe_orchestrator_transition(state, DeepResearchPhase.ANALYSIS)
1831
+
1832
+ if state.phase == DeepResearchPhase.SYNTHESIS:
1833
+ phase_started = time.perf_counter()
1834
+ self.hooks.emit_phase_start(state)
1835
+ self._write_audit_event(
1836
+ state,
1837
+ "phase_start",
1838
+ data={"phase": state.phase.value},
1839
+ )
1840
+ result = await self._execute_synthesis_async(
1841
+ state=state,
1842
+ provider_id=state.synthesis_provider,
1843
+ timeout=self.config.get_phase_timeout("synthesis"),
1844
+ )
1845
+ if not result.success:
1846
+ self._write_audit_event(
1847
+ state,
1848
+ "phase_error",
1849
+ data={"phase": state.phase.value, "error": result.error},
1850
+ level="error",
1851
+ )
1852
+ state.mark_failed(result.error or f"Phase {state.phase.value} failed")
1853
+ self.memory.save_deep_research(state)
1854
+ return result
1855
+ self.hooks.emit_phase_complete(state)
1856
+ self._write_audit_event(
1857
+ state,
1858
+ "phase_complete",
1859
+ data={
1860
+ "phase": state.phase.value,
1861
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1862
+ },
1863
+ )
1864
+ # Think pause: supervisor evaluates synthesis and decides iteration
1865
+ try:
1866
+ self.orchestrator.evaluate_phase_completion(state, DeepResearchPhase.SYNTHESIS)
1867
+ self.orchestrator.decide_iteration(state)
1868
+ prompt = self.orchestrator.get_reflection_prompt(state, DeepResearchPhase.SYNTHESIS)
1869
+ self.hooks.think_pause(state, prompt)
1870
+ self.orchestrator.record_to_state(state)
1871
+ except Exception as exc:
1872
+ logger.exception(
1873
+ "Orchestrator transition failed for synthesis, research %s: %s",
1874
+ state.id,
1875
+ exc,
1876
+ )
1877
+ self._write_audit_event(
1878
+ state,
1879
+ "orchestrator_error",
1880
+ data={
1881
+ "phase": "synthesis",
1882
+ "error": str(exc),
1883
+ "traceback": traceback.format_exc(),
1884
+ },
1885
+ level="error",
1886
+ )
1887
+ self._record_workflow_error(exc, state, "orchestrator_synthesis")
1888
+ raise
1889
+
1890
+ # Check if refinement needed
1891
+ if state.should_continue_refinement():
1892
+ state.phase = DeepResearchPhase.REFINEMENT
1893
+ else:
1894
+ state.mark_completed(report=result.content)
1895
+
1896
+ # Handle refinement phase
1897
+ if state.phase == DeepResearchPhase.REFINEMENT:
1898
+ phase_started = time.perf_counter()
1899
+ self.hooks.emit_phase_start(state)
1900
+ self._write_audit_event(
1901
+ state,
1902
+ "phase_start",
1903
+ data={"phase": state.phase.value},
1904
+ )
1905
+ # Generate follow-up queries from gaps
1906
+ await self._execute_refinement_async(
1907
+ state=state,
1908
+ provider_id=state.refinement_provider,
1909
+ timeout=self.config.get_phase_timeout("refinement"),
1910
+ )
1911
+ self.hooks.emit_phase_complete(state)
1912
+ self._write_audit_event(
1913
+ state,
1914
+ "phase_complete",
1915
+ data={
1916
+ "phase": state.phase.value,
1917
+ "duration_ms": (time.perf_counter() - phase_started) * 1000,
1918
+ },
1919
+ )
1920
+
1921
+ if state.should_continue_refinement():
1922
+ state.start_new_iteration()
1923
+ # Recursively continue workflow
1924
+ return await self._execute_workflow_async(
1925
+ state=state,
1926
+ provider_id=provider_id,
1927
+ timeout_per_operation=timeout_per_operation,
1928
+ max_concurrent=max_concurrent,
1929
+ )
1930
+ else:
1931
+ state.mark_completed(report=state.report)
1932
+
1933
+ # Calculate duration
1934
+ duration_ms = (time.perf_counter() - start_time) * 1000
1935
+ state.total_duration_ms += duration_ms
1936
+
1937
+ # Save final state
1938
+ self.memory.save_deep_research(state)
1939
+ self._write_audit_event(
1940
+ state,
1941
+ "workflow_complete",
1942
+ data={
1943
+ "success": True,
1944
+ "phase": state.phase.value,
1945
+ "iteration": state.iteration,
1946
+ "sub_query_count": len(state.sub_queries),
1947
+ "source_count": len(state.sources),
1948
+ "finding_count": len(state.findings),
1949
+ "gap_count": len(state.unresolved_gaps()),
1950
+ "report_length": len(state.report or ""),
1951
+ # Existing totals
1952
+ "total_tokens_used": state.total_tokens_used,
1953
+ "total_duration_ms": state.total_duration_ms,
1954
+ # Token breakdown totals
1955
+ "total_input_tokens": sum(
1956
+ m.input_tokens for m in state.phase_metrics
1957
+ ),
1958
+ "total_output_tokens": sum(
1959
+ m.output_tokens for m in state.phase_metrics
1960
+ ),
1961
+ "total_cached_tokens": sum(
1962
+ m.cached_tokens for m in state.phase_metrics
1963
+ ),
1964
+ # Per-phase metrics
1965
+ "phase_metrics": [
1966
+ {
1967
+ "phase": m.phase,
1968
+ "duration_ms": m.duration_ms,
1969
+ "input_tokens": m.input_tokens,
1970
+ "output_tokens": m.output_tokens,
1971
+ "cached_tokens": m.cached_tokens,
1972
+ "provider_id": m.provider_id,
1973
+ "model_used": m.model_used,
1974
+ }
1975
+ for m in state.phase_metrics
1976
+ ],
1977
+ # Search provider stats
1978
+ "search_provider_stats": state.search_provider_stats,
1979
+ "total_search_queries": sum(state.search_provider_stats.values()),
1980
+ # Source hostnames
1981
+ "source_hostnames": sorted(
1982
+ set(
1983
+ h
1984
+ for s in state.sources
1985
+ if s.url and (h := _extract_hostname(s.url))
1986
+ )
1987
+ ),
1988
+ # Research mode
1989
+ "research_mode": state.research_mode.value,
1990
+ },
1991
+ )
1992
+
1993
+ return WorkflowResult(
1994
+ success=True,
1995
+ content=state.report or "Research completed",
1996
+ provider_id=provider_id,
1997
+ tokens_used=state.total_tokens_used,
1998
+ duration_ms=duration_ms,
1999
+ metadata={
2000
+ "research_id": state.id,
2001
+ "phase": state.phase.value,
2002
+ "iteration": state.iteration,
2003
+ "sub_query_count": len(state.sub_queries),
2004
+ "source_count": len(state.sources),
2005
+ "finding_count": len(state.findings),
2006
+ "gap_count": len(state.unresolved_gaps()),
2007
+ "is_complete": state.completed_at is not None,
2008
+ },
2009
+ )
2010
+
2011
+ except Exception as exc:
2012
+ tb_str = traceback.format_exc()
2013
+ logger.exception(
2014
+ "Workflow execution failed at phase %s, iteration %d: %s",
2015
+ state.phase.value,
2016
+ state.iteration,
2017
+ exc,
2018
+ )
2019
+ self.memory.save_deep_research(state)
2020
+ self._write_audit_event(
2021
+ state,
2022
+ "workflow_error",
2023
+ data={
2024
+ "error": str(exc),
2025
+ "traceback": tb_str,
2026
+ "phase": state.phase.value,
2027
+ "iteration": state.iteration,
2028
+ },
2029
+ level="error",
2030
+ )
2031
+ self._record_workflow_error(exc, state, "workflow_execution")
2032
+ return WorkflowResult(
2033
+ success=False,
2034
+ content="",
2035
+ error=str(exc),
2036
+ metadata={
2037
+ "research_id": state.id,
2038
+ "phase": state.phase.value,
2039
+ "iteration": state.iteration,
2040
+ },
2041
+ )
2042
+
2043
+ # =========================================================================
2044
+ # Phase Implementations (Stubs for now - implemented in later tasks)
2045
+ # =========================================================================
2046
+
2047
+ async def _execute_planning_async(
2048
+ self,
2049
+ state: DeepResearchState,
2050
+ provider_id: Optional[str],
2051
+ timeout: float,
2052
+ ) -> WorkflowResult:
2053
+ """Execute planning phase: decompose query into sub-queries.
2054
+
2055
+ This phase:
2056
+ 1. Analyzes the original research query
2057
+ 2. Generates a research brief explaining the approach
2058
+ 3. Decomposes the query into 2-5 focused sub-queries
2059
+ 4. Assigns priorities to each sub-query
2060
+
2061
+ Args:
2062
+ state: Current research state
2063
+ provider_id: LLM provider to use
2064
+ timeout: Request timeout in seconds
2065
+
2066
+ Returns:
2067
+ WorkflowResult with planning outcome
2068
+ """
2069
+ logger.info("Starting planning phase for query: %s", state.original_query[:100])
2070
+
2071
+ # Build the planning prompt
2072
+ system_prompt = self._build_planning_system_prompt(state)
2073
+ user_prompt = self._build_planning_user_prompt(state)
2074
+
2075
+ # Execute LLM call with context window error handling
2076
+ try:
2077
+ result = self._execute_provider(
2078
+ prompt=user_prompt,
2079
+ provider_id=provider_id or state.planning_provider,
2080
+ model=state.planning_model,
2081
+ system_prompt=system_prompt,
2082
+ timeout=timeout,
2083
+ temperature=0.7, # Some creativity for diverse sub-queries
2084
+ )
2085
+ except ContextWindowError as e:
2086
+ logger.error(
2087
+ "Planning phase context window exceeded: prompt_tokens=%s, "
2088
+ "max_tokens=%s, truncation_needed=%s, provider=%s",
2089
+ e.prompt_tokens,
2090
+ e.max_tokens,
2091
+ e.truncation_needed,
2092
+ e.provider,
2093
+ )
2094
+ return WorkflowResult(
2095
+ success=False,
2096
+ content="",
2097
+ error=str(e),
2098
+ metadata={
2099
+ "research_id": state.id,
2100
+ "phase": "planning",
2101
+ "error_type": "context_window_exceeded",
2102
+ "prompt_tokens": e.prompt_tokens,
2103
+ "max_tokens": e.max_tokens,
2104
+ "truncation_needed": e.truncation_needed,
2105
+ },
2106
+ )
2107
+
2108
+ if not result.success:
2109
+ logger.error("Planning phase LLM call failed: %s", result.error)
2110
+ return result
2111
+
2112
+ # Track token usage
2113
+ if result.tokens_used:
2114
+ state.total_tokens_used += result.tokens_used
2115
+
2116
+ # Track phase metrics for audit
2117
+ state.phase_metrics.append(
2118
+ PhaseMetrics(
2119
+ phase="planning",
2120
+ duration_ms=result.duration_ms or 0.0,
2121
+ input_tokens=result.input_tokens or 0,
2122
+ output_tokens=result.output_tokens or 0,
2123
+ cached_tokens=result.cached_tokens or 0,
2124
+ provider_id=result.provider_id,
2125
+ model_used=result.model_used,
2126
+ )
2127
+ )
2128
+
2129
+ # Parse the response
2130
+ parsed = self._parse_planning_response(result.content, state)
2131
+
2132
+ if not parsed["success"]:
2133
+ logger.warning("Failed to parse planning response, using fallback")
2134
+ # Fallback: treat entire query as single sub-query
2135
+ state.research_brief = f"Direct research on: {state.original_query}"
2136
+ state.add_sub_query(
2137
+ query=state.original_query,
2138
+ rationale="Original query used directly due to parsing failure",
2139
+ priority=1,
2140
+ )
2141
+ else:
2142
+ state.research_brief = parsed["research_brief"]
2143
+ for sq in parsed["sub_queries"]:
2144
+ state.add_sub_query(
2145
+ query=sq["query"],
2146
+ rationale=sq.get("rationale"),
2147
+ priority=sq.get("priority", 1),
2148
+ )
2149
+
2150
+ # Save state after planning
2151
+ self.memory.save_deep_research(state)
2152
+ self._write_audit_event(
2153
+ state,
2154
+ "planning_result",
2155
+ data={
2156
+ "provider_id": result.provider_id,
2157
+ "model_used": result.model_used,
2158
+ "tokens_used": result.tokens_used,
2159
+ "duration_ms": result.duration_ms,
2160
+ "system_prompt": system_prompt,
2161
+ "user_prompt": user_prompt,
2162
+ "raw_response": result.content,
2163
+ "parse_success": parsed["success"],
2164
+ "research_brief": state.research_brief,
2165
+ "sub_queries": [
2166
+ {
2167
+ "id": sq.id,
2168
+ "query": sq.query,
2169
+ "rationale": sq.rationale,
2170
+ "priority": sq.priority,
2171
+ }
2172
+ for sq in state.sub_queries
2173
+ ],
2174
+ },
2175
+ )
2176
+
2177
+ logger.info(
2178
+ "Planning phase complete: %d sub-queries generated",
2179
+ len(state.sub_queries),
2180
+ )
2181
+
2182
+ return WorkflowResult(
2183
+ success=True,
2184
+ content=state.research_brief or "Planning complete",
2185
+ provider_id=result.provider_id,
2186
+ model_used=result.model_used,
2187
+ tokens_used=result.tokens_used,
2188
+ duration_ms=result.duration_ms,
2189
+ metadata={
2190
+ "research_id": state.id,
2191
+ "sub_query_count": len(state.sub_queries),
2192
+ "research_brief": state.research_brief,
2193
+ },
2194
+ )
2195
+
2196
+ def _build_planning_system_prompt(self, state: DeepResearchState) -> str:
2197
+ """Build system prompt for query decomposition.
2198
+
2199
+ Args:
2200
+ state: Current research state
2201
+
2202
+ Returns:
2203
+ System prompt string
2204
+ """
2205
+ return """You are a research planning assistant. Your task is to analyze a research query and decompose it into focused sub-queries that can be researched independently.
2206
+
2207
+ Your response MUST be valid JSON with this exact structure:
2208
+ {
2209
+ "research_brief": "A 2-3 sentence summary of the research approach and what aspects will be investigated",
2210
+ "sub_queries": [
2211
+ {
2212
+ "query": "A specific, focused search query",
2213
+ "rationale": "Why this sub-query is important for the research",
2214
+ "priority": 1
2215
+ }
2216
+ ]
2217
+ }
2218
+
2219
+ Guidelines:
2220
+ - Generate 2-5 sub-queries (aim for 3-4 typically)
2221
+ - Each sub-query should focus on a distinct aspect of the research
2222
+ - Queries should be specific enough to yield relevant search results
2223
+ - Priority 1 is highest (most important), higher numbers are lower priority
2224
+ - Avoid overlapping queries - each should cover unique ground
2225
+ - Consider different angles: definition, examples, comparisons, recent developments, expert opinions
2226
+
2227
+ IMPORTANT: Return ONLY valid JSON, no markdown formatting or extra text."""
2228
+
2229
+ def _build_planning_user_prompt(self, state: DeepResearchState) -> str:
2230
+ """Build user prompt for query decomposition.
2231
+
2232
+ Args:
2233
+ state: Current research state
2234
+
2235
+ Returns:
2236
+ User prompt string
2237
+ """
2238
+ prompt = f"""Research Query: {state.original_query}
2239
+
2240
+ Please decompose this research query into {state.max_sub_queries} or fewer focused sub-queries.
2241
+
2242
+ Consider:
2243
+ 1. What are the key aspects that need investigation?
2244
+ 2. What background information would help understand this topic?
2245
+ 3. What specific questions would lead to comprehensive coverage?
2246
+ 4. What different perspectives or sources might be valuable?
2247
+
2248
+ Generate the research plan as JSON."""
2249
+
2250
+ # Add custom system prompt context if provided
2251
+ if state.system_prompt:
2252
+ prompt += f"\n\nAdditional context: {state.system_prompt}"
2253
+
2254
+ return prompt
2255
+
2256
+ def _parse_planning_response(
2257
+ self,
2258
+ content: str,
2259
+ state: DeepResearchState,
2260
+ ) -> dict[str, Any]:
2261
+ """Parse LLM response into structured planning data.
2262
+
2263
+ Attempts to extract JSON from the response, with fallback handling
2264
+ for various response formats.
2265
+
2266
+ Args:
2267
+ content: Raw LLM response content
2268
+ state: Current research state (for max_sub_queries limit)
2269
+
2270
+ Returns:
2271
+ Dict with 'success', 'research_brief', and 'sub_queries' keys
2272
+ """
2273
+ result = {
2274
+ "success": False,
2275
+ "research_brief": None,
2276
+ "sub_queries": [],
2277
+ }
2278
+
2279
+ if not content:
2280
+ return result
2281
+
2282
+ # Try to extract JSON from the response
2283
+ json_str = self._extract_json(content)
2284
+ if not json_str:
2285
+ logger.warning("No JSON found in planning response")
2286
+ return result
2287
+
2288
+ try:
2289
+ data = json.loads(json_str)
2290
+ except json.JSONDecodeError as e:
2291
+ logger.error("Failed to parse JSON from planning response: %s", e)
2292
+ return result
2293
+
2294
+ # Extract research brief
2295
+ result["research_brief"] = data.get("research_brief", "")
2296
+
2297
+ # Extract and validate sub-queries
2298
+ raw_queries = data.get("sub_queries", [])
2299
+ if not isinstance(raw_queries, list):
2300
+ logger.warning("sub_queries is not a list")
2301
+ return result
2302
+
2303
+ for i, sq in enumerate(raw_queries):
2304
+ if not isinstance(sq, dict):
2305
+ continue
2306
+ query = sq.get("query", "").strip()
2307
+ if not query:
2308
+ continue
2309
+
2310
+ # Limit to max_sub_queries
2311
+ if len(result["sub_queries"]) >= state.max_sub_queries:
2312
+ break
2313
+
2314
+ result["sub_queries"].append({
2315
+ "query": query,
2316
+ "rationale": sq.get("rationale", ""),
2317
+ "priority": min(max(int(sq.get("priority", i + 1)), 1), 10),
2318
+ })
2319
+
2320
+ # Mark success if we got at least one sub-query
2321
+ result["success"] = len(result["sub_queries"]) > 0
2322
+
2323
+ return result
2324
+
2325
+ def _extract_json(self, content: str) -> Optional[str]:
2326
+ """Extract JSON object from content that may contain other text.
2327
+
2328
+ Handles cases where JSON is wrapped in markdown code blocks
2329
+ or mixed with explanatory text.
2330
+
2331
+ Args:
2332
+ content: Raw content that may contain JSON
2333
+
2334
+ Returns:
2335
+ Extracted JSON string or None if not found
2336
+ """
2337
+ # First, try to find JSON in code blocks
2338
+ code_block_pattern = r'```(?:json)?\s*([\s\S]*?)```'
2339
+ matches = re.findall(code_block_pattern, content)
2340
+ for match in matches:
2341
+ match = match.strip()
2342
+ if match.startswith('{'):
2343
+ return match
2344
+
2345
+ # Try to find raw JSON object
2346
+ # Look for the outermost { ... } pair
2347
+ brace_start = content.find('{')
2348
+ if brace_start == -1:
2349
+ return None
2350
+
2351
+ # Find matching closing brace
2352
+ depth = 0
2353
+ for i, char in enumerate(content[brace_start:], brace_start):
2354
+ if char == '{':
2355
+ depth += 1
2356
+ elif char == '}':
2357
+ depth -= 1
2358
+ if depth == 0:
2359
+ return content[brace_start:i + 1]
2360
+
2361
+ return None
2362
+
2363
+ async def _execute_gathering_async(
2364
+ self,
2365
+ state: DeepResearchState,
2366
+ provider_id: Optional[str],
2367
+ timeout: float,
2368
+ max_concurrent: int,
2369
+ ) -> WorkflowResult:
2370
+ """Execute gathering phase: parallel sub-query execution.
2371
+
2372
+ This phase:
2373
+ 1. Gets all pending sub-queries from planning phase
2374
+ 2. Executes them concurrently with rate limiting
2375
+ 3. Collects and deduplicates sources
2376
+ 4. Marks sub-queries as completed/failed
2377
+
2378
+ Args:
2379
+ state: Current research state with sub-queries
2380
+ provider_id: LLM provider (not used in gathering)
2381
+ timeout: Request timeout in seconds
2382
+ max_concurrent: Maximum concurrent search requests
2383
+
2384
+ Returns:
2385
+ WorkflowResult with gathering outcome
2386
+ """
2387
+ pending_queries = state.pending_sub_queries()
2388
+ if not pending_queries:
2389
+ logger.warning("No pending sub-queries for gathering phase")
2390
+ return WorkflowResult(
2391
+ success=True,
2392
+ content="No sub-queries to execute",
2393
+ metadata={"research_id": state.id, "source_count": 0},
2394
+ )
2395
+
2396
+ logger.info(
2397
+ "Starting gathering phase: %d sub-queries, max_concurrent=%d",
2398
+ len(pending_queries),
2399
+ max_concurrent,
2400
+ )
2401
+
2402
+ provider_names = getattr(
2403
+ self.config,
2404
+ "deep_research_providers",
2405
+ ["tavily", "google", "semantic_scholar"],
2406
+ )
2407
+ available_providers: list[SearchProvider] = []
2408
+ unavailable_providers: list[str] = []
2409
+
2410
+ for name in provider_names:
2411
+ provider = self._get_search_provider(name)
2412
+ if provider is None:
2413
+ unavailable_providers.append(name)
2414
+ continue
2415
+ available_providers.append(provider)
2416
+
2417
+ if not available_providers:
2418
+ return WorkflowResult(
2419
+ success=False,
2420
+ content="",
2421
+ error=(
2422
+ "No search providers available. Configure API keys for "
2423
+ "Tavily, Google, or Semantic Scholar."
2424
+ ),
2425
+ )
2426
+
2427
+ # Semaphore for concurrency control
2428
+ semaphore = asyncio.Semaphore(max_concurrent)
2429
+
2430
+ # Track collected sources for deduplication
2431
+ seen_urls: set[str] = set()
2432
+ seen_titles: dict[str, str] = {} # normalized_title -> first source URL
2433
+ total_sources_added = 0
2434
+ failed_queries = 0
2435
+
2436
+ async def execute_sub_query(sub_query) -> tuple[int, Optional[str]]:
2437
+ """Execute a single sub-query and return (sources_added, error)."""
2438
+ async with semaphore:
2439
+ sub_query.status = "executing"
2440
+
2441
+ provider_errors: list[str] = []
2442
+ added = 0
2443
+
2444
+ for provider in available_providers:
2445
+ provider_name = provider.get_provider_name()
2446
+ try:
2447
+ sources = await provider.search(
2448
+ query=sub_query.query,
2449
+ max_results=state.max_sources_per_query,
2450
+ sub_query_id=sub_query.id,
2451
+ include_raw_content=state.follow_links,
2452
+ )
2453
+
2454
+ # Add sources with deduplication
2455
+ for source in sources:
2456
+ # URL-based deduplication
2457
+ if source.url and source.url in seen_urls:
2458
+ continue # Skip duplicate URL
2459
+
2460
+ # Title-based deduplication (same paper from different domains)
2461
+ normalized_title = _normalize_title(source.title)
2462
+ if normalized_title and len(normalized_title) > 20:
2463
+ if normalized_title in seen_titles:
2464
+ logger.debug(
2465
+ "Skipping duplicate by title: %s (already have %s)",
2466
+ source.url,
2467
+ seen_titles[normalized_title],
2468
+ )
2469
+ continue # Skip duplicate title
2470
+ seen_titles[normalized_title] = source.url or ""
2471
+
2472
+ if source.url:
2473
+ seen_urls.add(source.url)
2474
+ # Apply domain-based quality scoring
2475
+ if source.quality == SourceQuality.UNKNOWN:
2476
+ source.quality = get_domain_quality(
2477
+ source.url, state.research_mode
2478
+ )
2479
+
2480
+ # Add source to state
2481
+ state.sources.append(source)
2482
+ state.total_sources_examined += 1
2483
+ sub_query.source_ids.append(source.id)
2484
+ added += 1
2485
+
2486
+ self._write_audit_event(
2487
+ state,
2488
+ "gathering_provider_result",
2489
+ data={
2490
+ "provider": provider_name,
2491
+ "sub_query_id": sub_query.id,
2492
+ "sub_query": sub_query.query,
2493
+ "sources_added": len(sources),
2494
+ },
2495
+ )
2496
+ # Track search provider query count
2497
+ state.search_provider_stats[provider_name] = (
2498
+ state.search_provider_stats.get(provider_name, 0) + 1
2499
+ )
2500
+ except SearchProviderError as e:
2501
+ provider_errors.append(f"{provider_name}: {e}")
2502
+ self._write_audit_event(
2503
+ state,
2504
+ "gathering_provider_result",
2505
+ data={
2506
+ "provider": provider_name,
2507
+ "sub_query_id": sub_query.id,
2508
+ "sub_query": sub_query.query,
2509
+ "sources_added": 0,
2510
+ "error": str(e),
2511
+ },
2512
+ level="warning",
2513
+ )
2514
+ except Exception as e:
2515
+ provider_errors.append(f"{provider_name}: {e}")
2516
+ self._write_audit_event(
2517
+ state,
2518
+ "gathering_provider_result",
2519
+ data={
2520
+ "provider": provider_name,
2521
+ "sub_query_id": sub_query.id,
2522
+ "sub_query": sub_query.query,
2523
+ "sources_added": 0,
2524
+ "error": str(e),
2525
+ },
2526
+ level="warning",
2527
+ )
2528
+
2529
+ if added > 0:
2530
+ sub_query.mark_completed(
2531
+ findings=f"Found {added} sources"
2532
+ )
2533
+ logger.debug(
2534
+ "Sub-query '%s' completed: %d sources",
2535
+ sub_query.query[:50],
2536
+ added,
2537
+ )
2538
+ return added, None
2539
+
2540
+ error_summary = "; ".join(provider_errors) or "No sources found"
2541
+ sub_query.mark_failed(error_summary)
2542
+ logger.warning(
2543
+ "Sub-query '%s' failed: %s",
2544
+ sub_query.query[:50],
2545
+ error_summary,
2546
+ )
2547
+ return 0, error_summary
2548
+
2549
+ # Execute all sub-queries concurrently
2550
+ tasks = [execute_sub_query(sq) for sq in pending_queries]
2551
+ results = await asyncio.gather(*tasks, return_exceptions=True)
2552
+
2553
+ # Aggregate results
2554
+ for result in results:
2555
+ if isinstance(result, Exception):
2556
+ failed_queries += 1
2557
+ logger.error("Task exception: %s", result)
2558
+ else:
2559
+ added, error = result
2560
+ total_sources_added += added
2561
+ if error:
2562
+ failed_queries += 1
2563
+
2564
+ # Update state timestamp
2565
+ state.updated_at = __import__("datetime").datetime.utcnow()
2566
+
2567
+ # Save state
2568
+ self.memory.save_deep_research(state)
2569
+ self._write_audit_event(
2570
+ state,
2571
+ "gathering_result",
2572
+ data={
2573
+ "source_count": total_sources_added,
2574
+ "queries_executed": len(pending_queries),
2575
+ "queries_failed": failed_queries,
2576
+ "unique_urls": len(seen_urls),
2577
+ "providers_used": [p.get_provider_name() for p in available_providers],
2578
+ "providers_unavailable": unavailable_providers,
2579
+ },
2580
+ )
2581
+
2582
+ # Determine success
2583
+ success = total_sources_added > 0 or failed_queries < len(pending_queries)
2584
+
2585
+ # Build error message if all queries failed
2586
+ error_msg = None
2587
+ if not success:
2588
+ providers_used = [p.get_provider_name() for p in available_providers]
2589
+ if failed_queries == len(pending_queries):
2590
+ error_msg = (
2591
+ f"All {failed_queries} sub-queries failed to find sources. "
2592
+ f"Providers used: {providers_used}. "
2593
+ f"Unavailable providers: {unavailable_providers}"
2594
+ )
2595
+
2596
+ logger.info(
2597
+ "Gathering phase complete: %d sources from %d queries (%d failed)",
2598
+ total_sources_added,
2599
+ len(pending_queries),
2600
+ failed_queries,
2601
+ )
2602
+
2603
+ return WorkflowResult(
2604
+ success=success,
2605
+ content=f"Gathered {total_sources_added} sources from {len(pending_queries)} sub-queries",
2606
+ error=error_msg,
2607
+ metadata={
2608
+ "research_id": state.id,
2609
+ "source_count": total_sources_added,
2610
+ "queries_executed": len(pending_queries),
2611
+ "queries_failed": failed_queries,
2612
+ "unique_urls": len(seen_urls),
2613
+ "providers_used": [p.get_provider_name() for p in available_providers],
2614
+ "providers_unavailable": unavailable_providers,
2615
+ },
2616
+ )
2617
+
2618
+ def _get_search_provider(self, provider_name: str) -> Optional[SearchProvider]:
2619
+ """Get or create a search provider instance.
2620
+
2621
+ Args:
2622
+ provider_name: Name of the provider (e.g., "tavily")
2623
+
2624
+ Returns:
2625
+ SearchProvider instance or None if unavailable
2626
+ """
2627
+ if provider_name in self._search_providers:
2628
+ return self._search_providers[provider_name]
2629
+
2630
+ try:
2631
+ if provider_name == "tavily":
2632
+ provider = TavilySearchProvider()
2633
+ self._search_providers[provider_name] = provider
2634
+ return provider
2635
+ if provider_name == "perplexity":
2636
+ provider = PerplexitySearchProvider()
2637
+ self._search_providers[provider_name] = provider
2638
+ return provider
2639
+ if provider_name == "google":
2640
+ provider = GoogleSearchProvider()
2641
+ self._search_providers[provider_name] = provider
2642
+ return provider
2643
+ if provider_name == "semantic_scholar":
2644
+ provider = SemanticScholarProvider()
2645
+ self._search_providers[provider_name] = provider
2646
+ return provider
2647
+ else:
2648
+ logger.warning("Unknown search provider: %s", provider_name)
2649
+ return None
2650
+ except ValueError as e:
2651
+ # API key not configured
2652
+ logger.error("Failed to initialize %s provider: %s", provider_name, e)
2653
+ return None
2654
+ except Exception as e:
2655
+ logger.error("Error initializing %s provider: %s", provider_name, e)
2656
+ return None
2657
+
2658
+ async def _execute_analysis_async(
2659
+ self,
2660
+ state: DeepResearchState,
2661
+ provider_id: Optional[str],
2662
+ timeout: float,
2663
+ ) -> WorkflowResult:
2664
+ """Execute analysis phase: extract findings from sources.
2665
+
2666
+ This phase:
2667
+ 1. Builds prompt with gathered source summaries
2668
+ 2. Uses LLM to extract key findings
2669
+ 3. Assesses confidence levels for each finding
2670
+ 4. Identifies knowledge gaps requiring follow-up
2671
+ 5. Updates source quality assessments
2672
+
2673
+ Args:
2674
+ state: Current research state with gathered sources
2675
+ provider_id: LLM provider to use
2676
+ timeout: Request timeout in seconds
2677
+
2678
+ Returns:
2679
+ WorkflowResult with analysis outcome
2680
+ """
2681
+ if not state.sources:
2682
+ logger.warning("No sources to analyze")
2683
+ return WorkflowResult(
2684
+ success=True,
2685
+ content="No sources to analyze",
2686
+ metadata={"research_id": state.id, "finding_count": 0},
2687
+ )
2688
+
2689
+ logger.info(
2690
+ "Starting analysis phase: %d sources to analyze",
2691
+ len(state.sources),
2692
+ )
2693
+
2694
+ # Build the analysis prompt
2695
+ system_prompt = self._build_analysis_system_prompt(state)
2696
+ user_prompt = self._build_analysis_user_prompt(state)
2697
+
2698
+ # Execute LLM call with context window error handling
2699
+ try:
2700
+ result = self._execute_provider(
2701
+ prompt=user_prompt,
2702
+ provider_id=provider_id or state.analysis_provider,
2703
+ model=state.analysis_model,
2704
+ system_prompt=system_prompt,
2705
+ timeout=timeout,
2706
+ temperature=0.3, # Lower temperature for analytical tasks
2707
+ )
2708
+ except ContextWindowError as e:
2709
+ logger.error(
2710
+ "Analysis phase context window exceeded: prompt_tokens=%s, "
2711
+ "max_tokens=%s, truncation_needed=%s, provider=%s, source_count=%d",
2712
+ e.prompt_tokens,
2713
+ e.max_tokens,
2714
+ e.truncation_needed,
2715
+ e.provider,
2716
+ len(state.sources),
2717
+ )
2718
+ return WorkflowResult(
2719
+ success=False,
2720
+ content="",
2721
+ error=str(e),
2722
+ metadata={
2723
+ "research_id": state.id,
2724
+ "phase": "analysis",
2725
+ "error_type": "context_window_exceeded",
2726
+ "prompt_tokens": e.prompt_tokens,
2727
+ "max_tokens": e.max_tokens,
2728
+ "truncation_needed": e.truncation_needed,
2729
+ "source_count": len(state.sources),
2730
+ "guidance": "Try reducing max_sources_per_query or processing sources in batches",
2731
+ },
2732
+ )
2733
+
2734
+ if not result.success:
2735
+ logger.error("Analysis phase LLM call failed: %s", result.error)
2736
+ return result
2737
+
2738
+ # Track token usage
2739
+ if result.tokens_used:
2740
+ state.total_tokens_used += result.tokens_used
2741
+
2742
+ # Track phase metrics for audit
2743
+ state.phase_metrics.append(
2744
+ PhaseMetrics(
2745
+ phase="analysis",
2746
+ duration_ms=result.duration_ms or 0.0,
2747
+ input_tokens=result.input_tokens or 0,
2748
+ output_tokens=result.output_tokens or 0,
2749
+ cached_tokens=result.cached_tokens or 0,
2750
+ provider_id=result.provider_id,
2751
+ model_used=result.model_used,
2752
+ )
2753
+ )
2754
+
2755
+ # Parse the response
2756
+ parsed = self._parse_analysis_response(result.content, state)
2757
+
2758
+ if not parsed["success"]:
2759
+ logger.warning("Failed to parse analysis response")
2760
+ self._write_audit_event(
2761
+ state,
2762
+ "analysis_result",
2763
+ data={
2764
+ "provider_id": result.provider_id,
2765
+ "model_used": result.model_used,
2766
+ "tokens_used": result.tokens_used,
2767
+ "duration_ms": result.duration_ms,
2768
+ "system_prompt": system_prompt,
2769
+ "user_prompt": user_prompt,
2770
+ "raw_response": result.content,
2771
+ "parse_success": False,
2772
+ "findings": [],
2773
+ "gaps": [],
2774
+ "quality_updates": [],
2775
+ },
2776
+ level="warning",
2777
+ )
2778
+ # Still mark as success but with no findings
2779
+ return WorkflowResult(
2780
+ success=True,
2781
+ content="Analysis completed but no findings extracted",
2782
+ metadata={
2783
+ "research_id": state.id,
2784
+ "finding_count": 0,
2785
+ "parse_error": True,
2786
+ },
2787
+ )
2788
+
2789
+ # Add findings to state
2790
+ for finding_data in parsed["findings"]:
2791
+ state.add_finding(
2792
+ content=finding_data["content"],
2793
+ confidence=finding_data["confidence"],
2794
+ source_ids=finding_data.get("source_ids", []),
2795
+ category=finding_data.get("category"),
2796
+ )
2797
+
2798
+ # Add gaps to state
2799
+ for gap_data in parsed["gaps"]:
2800
+ state.add_gap(
2801
+ description=gap_data["description"],
2802
+ suggested_queries=gap_data.get("suggested_queries", []),
2803
+ priority=gap_data.get("priority", 1),
2804
+ )
2805
+
2806
+ # Update source quality assessments
2807
+ for quality_update in parsed.get("quality_updates", []):
2808
+ source = state.get_source(quality_update["source_id"])
2809
+ if source:
2810
+ try:
2811
+ source.quality = SourceQuality(quality_update["quality"])
2812
+ except ValueError:
2813
+ pass # Invalid quality value, skip
2814
+
2815
+ # Save state
2816
+ self.memory.save_deep_research(state)
2817
+ self._write_audit_event(
2818
+ state,
2819
+ "analysis_result",
2820
+ data={
2821
+ "provider_id": result.provider_id,
2822
+ "model_used": result.model_used,
2823
+ "tokens_used": result.tokens_used,
2824
+ "duration_ms": result.duration_ms,
2825
+ "system_prompt": system_prompt,
2826
+ "user_prompt": user_prompt,
2827
+ "raw_response": result.content,
2828
+ "parse_success": True,
2829
+ "findings": parsed["findings"],
2830
+ "gaps": parsed["gaps"],
2831
+ "quality_updates": parsed.get("quality_updates", []),
2832
+ },
2833
+ )
2834
+
2835
+ logger.info(
2836
+ "Analysis phase complete: %d findings, %d gaps identified",
2837
+ len(parsed["findings"]),
2838
+ len(parsed["gaps"]),
2839
+ )
2840
+
2841
+ return WorkflowResult(
2842
+ success=True,
2843
+ content=f"Extracted {len(parsed['findings'])} findings and identified {len(parsed['gaps'])} gaps",
2844
+ provider_id=result.provider_id,
2845
+ model_used=result.model_used,
2846
+ tokens_used=result.tokens_used,
2847
+ duration_ms=result.duration_ms,
2848
+ metadata={
2849
+ "research_id": state.id,
2850
+ "finding_count": len(parsed["findings"]),
2851
+ "gap_count": len(parsed["gaps"]),
2852
+ "source_count": len(state.sources),
2853
+ },
2854
+ )
2855
+
2856
+ def _build_analysis_system_prompt(self, state: DeepResearchState) -> str:
2857
+ """Build system prompt for source analysis.
2858
+
2859
+ Args:
2860
+ state: Current research state
2861
+
2862
+ Returns:
2863
+ System prompt string
2864
+ """
2865
+ return """You are a research analyst. Your task is to analyze research sources and extract key findings, assess their quality, and identify knowledge gaps.
2866
+
2867
+ Your response MUST be valid JSON with this exact structure:
2868
+ {
2869
+ "findings": [
2870
+ {
2871
+ "content": "A clear, specific finding or insight extracted from the sources",
2872
+ "confidence": "low|medium|high",
2873
+ "source_ids": ["src-xxx", "src-yyy"],
2874
+ "category": "optional category/theme"
2875
+ }
2876
+ ],
2877
+ "gaps": [
2878
+ {
2879
+ "description": "Description of missing information or unanswered question",
2880
+ "suggested_queries": ["follow-up query 1", "follow-up query 2"],
2881
+ "priority": 1
2882
+ }
2883
+ ],
2884
+ "quality_updates": [
2885
+ {
2886
+ "source_id": "src-xxx",
2887
+ "quality": "low|medium|high"
2888
+ }
2889
+ ]
2890
+ }
2891
+
2892
+ Guidelines for findings:
2893
+ - Extract 2-5 key findings from the sources
2894
+ - Each finding should be a specific, actionable insight
2895
+ - Confidence levels: "low" (single weak source), "medium" (multiple sources or one authoritative), "high" (multiple authoritative sources agree)
2896
+ - Include source_ids that support each finding
2897
+ - Categorize findings by theme when applicable
2898
+
2899
+ Guidelines for gaps:
2900
+ - Identify 1-3 knowledge gaps or unanswered questions
2901
+ - Provide specific follow-up queries that could fill each gap
2902
+ - Priority 1 is most important, higher numbers are lower priority
2903
+
2904
+ Guidelines for quality_updates:
2905
+ - Assess source quality based on authority, relevance, and recency
2906
+ - "low" = questionable reliability, "medium" = generally reliable, "high" = authoritative
2907
+
2908
+ IMPORTANT: Return ONLY valid JSON, no markdown formatting or extra text."""
2909
+
2910
+ def _build_analysis_user_prompt(self, state: DeepResearchState) -> str:
2911
+ """Build user prompt with source summaries for analysis.
2912
+
2913
+ Args:
2914
+ state: Current research state
2915
+
2916
+ Returns:
2917
+ User prompt string
2918
+ """
2919
+ prompt_parts = [
2920
+ f"Original Research Query: {state.original_query}",
2921
+ "",
2922
+ "Research Brief:",
2923
+ state.research_brief or "Direct research on the query",
2924
+ "",
2925
+ "Sources to Analyze:",
2926
+ "",
2927
+ ]
2928
+
2929
+ # Add source summaries
2930
+ for i, source in enumerate(state.sources[:20], 1): # Limit to 20 sources
2931
+ prompt_parts.append(f"Source {i} (ID: {source.id}):")
2932
+ prompt_parts.append(f" Title: {source.title}")
2933
+ if source.url:
2934
+ prompt_parts.append(f" URL: {source.url}")
2935
+ if source.snippet:
2936
+ # Truncate long snippets
2937
+ snippet = source.snippet[:500] + "..." if len(source.snippet) > 500 else source.snippet
2938
+ prompt_parts.append(f" Snippet: {snippet}")
2939
+ if source.content:
2940
+ # Truncate long content
2941
+ content = source.content[:1000] + "..." if len(source.content) > 1000 else source.content
2942
+ prompt_parts.append(f" Content: {content}")
2943
+ prompt_parts.append("")
2944
+
2945
+ prompt_parts.extend([
2946
+ "Please analyze these sources and:",
2947
+ "1. Extract 2-5 key findings relevant to the research query",
2948
+ "2. Assess confidence levels based on source agreement and authority",
2949
+ "3. Identify any knowledge gaps or unanswered questions",
2950
+ "4. Assess the quality of each source",
2951
+ "",
2952
+ "Return your analysis as JSON.",
2953
+ ])
2954
+
2955
+ return "\n".join(prompt_parts)
2956
+
2957
+ def _parse_analysis_response(
2958
+ self,
2959
+ content: str,
2960
+ state: DeepResearchState,
2961
+ ) -> dict[str, Any]:
2962
+ """Parse LLM response into structured analysis data.
2963
+
2964
+ Args:
2965
+ content: Raw LLM response content
2966
+ state: Current research state
2967
+
2968
+ Returns:
2969
+ Dict with 'success', 'findings', 'gaps', and 'quality_updates' keys
2970
+ """
2971
+ result = {
2972
+ "success": False,
2973
+ "findings": [],
2974
+ "gaps": [],
2975
+ "quality_updates": [],
2976
+ }
2977
+
2978
+ if not content:
2979
+ return result
2980
+
2981
+ # Try to extract JSON from the response
2982
+ json_str = self._extract_json(content)
2983
+ if not json_str:
2984
+ logger.warning("No JSON found in analysis response")
2985
+ return result
2986
+
2987
+ try:
2988
+ data = json.loads(json_str)
2989
+ except json.JSONDecodeError as e:
2990
+ logger.error("Failed to parse JSON from analysis response: %s", e)
2991
+ return result
2992
+
2993
+ # Parse findings
2994
+ raw_findings = data.get("findings", [])
2995
+ if isinstance(raw_findings, list):
2996
+ for f in raw_findings:
2997
+ if not isinstance(f, dict):
2998
+ continue
2999
+ content_text = f.get("content", "").strip()
3000
+ if not content_text:
3001
+ continue
3002
+
3003
+ # Map confidence string to enum
3004
+ confidence_str = f.get("confidence", "medium").lower()
3005
+ confidence_map = {
3006
+ "low": ConfidenceLevel.LOW,
3007
+ "medium": ConfidenceLevel.MEDIUM,
3008
+ "high": ConfidenceLevel.HIGH,
3009
+ "confirmed": ConfidenceLevel.CONFIRMED,
3010
+ "speculation": ConfidenceLevel.SPECULATION,
3011
+ }
3012
+ confidence = confidence_map.get(confidence_str, ConfidenceLevel.MEDIUM)
3013
+
3014
+ result["findings"].append({
3015
+ "content": content_text,
3016
+ "confidence": confidence,
3017
+ "source_ids": f.get("source_ids", []),
3018
+ "category": f.get("category"),
3019
+ })
3020
+
3021
+ # Parse gaps
3022
+ raw_gaps = data.get("gaps", [])
3023
+ if isinstance(raw_gaps, list):
3024
+ for g in raw_gaps:
3025
+ if not isinstance(g, dict):
3026
+ continue
3027
+ description = g.get("description", "").strip()
3028
+ if not description:
3029
+ continue
3030
+
3031
+ result["gaps"].append({
3032
+ "description": description,
3033
+ "suggested_queries": g.get("suggested_queries", []),
3034
+ "priority": min(max(int(g.get("priority", 1)), 1), 10),
3035
+ })
3036
+
3037
+ # Parse quality updates
3038
+ raw_quality = data.get("quality_updates", [])
3039
+ if isinstance(raw_quality, list):
3040
+ for q in raw_quality:
3041
+ if not isinstance(q, dict):
3042
+ continue
3043
+ source_id = q.get("source_id", "").strip()
3044
+ quality = q.get("quality", "").lower()
3045
+ if source_id and quality in ("low", "medium", "high", "unknown"):
3046
+ result["quality_updates"].append({
3047
+ "source_id": source_id,
3048
+ "quality": quality,
3049
+ })
3050
+
3051
+ # Mark success if we got at least one finding
3052
+ result["success"] = len(result["findings"]) > 0
3053
+
3054
+ return result
3055
+
3056
+ async def _execute_synthesis_async(
3057
+ self,
3058
+ state: DeepResearchState,
3059
+ provider_id: Optional[str],
3060
+ timeout: float,
3061
+ ) -> WorkflowResult:
3062
+ """Execute synthesis phase: generate comprehensive report from findings.
3063
+
3064
+ This phase:
3065
+ 1. Builds a synthesis prompt with all findings grouped by theme
3066
+ 2. Includes source references for citation
3067
+ 3. Generates a structured markdown report with:
3068
+ - Executive summary
3069
+ - Key findings organized by theme
3070
+ - Source citations
3071
+ - Knowledge gaps and limitations
3072
+ - Conclusions with actionable insights
3073
+ 4. Stores the report in state.report
3074
+
3075
+ Args:
3076
+ state: Current research state with findings from analysis
3077
+ provider_id: LLM provider to use
3078
+ timeout: Request timeout in seconds
3079
+
3080
+ Returns:
3081
+ WorkflowResult with synthesis outcome
3082
+ """
3083
+ if not state.findings:
3084
+ logger.warning("No findings to synthesize")
3085
+ # Generate a minimal report even without findings
3086
+ state.report = self._generate_empty_report(state)
3087
+ self._write_audit_event(
3088
+ state,
3089
+ "synthesis_result",
3090
+ data={
3091
+ "provider_id": None,
3092
+ "model_used": None,
3093
+ "tokens_used": None,
3094
+ "duration_ms": None,
3095
+ "system_prompt": None,
3096
+ "user_prompt": None,
3097
+ "raw_response": None,
3098
+ "report": state.report,
3099
+ "empty_report": True,
3100
+ },
3101
+ level="warning",
3102
+ )
3103
+ return WorkflowResult(
3104
+ success=True,
3105
+ content=state.report,
3106
+ metadata={
3107
+ "research_id": state.id,
3108
+ "finding_count": 0,
3109
+ "empty_report": True,
3110
+ },
3111
+ )
3112
+
3113
+ logger.info(
3114
+ "Starting synthesis phase: %d findings, %d sources",
3115
+ len(state.findings),
3116
+ len(state.sources),
3117
+ )
3118
+
3119
+ # Build the synthesis prompt
3120
+ system_prompt = self._build_synthesis_system_prompt(state)
3121
+ user_prompt = self._build_synthesis_user_prompt(state)
3122
+
3123
+ # Execute LLM call with context window error handling
3124
+ try:
3125
+ result = self._execute_provider(
3126
+ prompt=user_prompt,
3127
+ provider_id=provider_id or state.synthesis_provider,
3128
+ model=state.synthesis_model,
3129
+ system_prompt=system_prompt,
3130
+ timeout=timeout,
3131
+ temperature=0.5, # Balanced for coherent but varied writing
3132
+ )
3133
+ except ContextWindowError as e:
3134
+ logger.error(
3135
+ "Synthesis phase context window exceeded: prompt_tokens=%s, "
3136
+ "max_tokens=%s, truncation_needed=%s, provider=%s, finding_count=%d",
3137
+ e.prompt_tokens,
3138
+ e.max_tokens,
3139
+ e.truncation_needed,
3140
+ e.provider,
3141
+ len(state.findings),
3142
+ )
3143
+ return WorkflowResult(
3144
+ success=False,
3145
+ content="",
3146
+ error=str(e),
3147
+ metadata={
3148
+ "research_id": state.id,
3149
+ "phase": "synthesis",
3150
+ "error_type": "context_window_exceeded",
3151
+ "prompt_tokens": e.prompt_tokens,
3152
+ "max_tokens": e.max_tokens,
3153
+ "truncation_needed": e.truncation_needed,
3154
+ "finding_count": len(state.findings),
3155
+ "guidance": "Try reducing the number of findings or source content included",
3156
+ },
3157
+ )
3158
+
3159
+ if not result.success:
3160
+ logger.error("Synthesis phase LLM call failed: %s", result.error)
3161
+ return result
3162
+
3163
+ # Track token usage
3164
+ if result.tokens_used:
3165
+ state.total_tokens_used += result.tokens_used
3166
+
3167
+ # Track phase metrics for audit
3168
+ state.phase_metrics.append(
3169
+ PhaseMetrics(
3170
+ phase="synthesis",
3171
+ duration_ms=result.duration_ms or 0.0,
3172
+ input_tokens=result.input_tokens or 0,
3173
+ output_tokens=result.output_tokens or 0,
3174
+ cached_tokens=result.cached_tokens or 0,
3175
+ provider_id=result.provider_id,
3176
+ model_used=result.model_used,
3177
+ )
3178
+ )
3179
+
3180
+ # Extract the markdown report from the response
3181
+ report = self._extract_markdown_report(result.content)
3182
+
3183
+ if not report:
3184
+ logger.warning("Failed to extract report from synthesis response")
3185
+ # Use raw content as fallback
3186
+ report = result.content
3187
+
3188
+ # Store report in state
3189
+ state.report = report
3190
+
3191
+ # Save state
3192
+ self.memory.save_deep_research(state)
3193
+ self._write_audit_event(
3194
+ state,
3195
+ "synthesis_result",
3196
+ data={
3197
+ "provider_id": result.provider_id,
3198
+ "model_used": result.model_used,
3199
+ "tokens_used": result.tokens_used,
3200
+ "duration_ms": result.duration_ms,
3201
+ "system_prompt": system_prompt,
3202
+ "user_prompt": user_prompt,
3203
+ "raw_response": result.content,
3204
+ "report": state.report,
3205
+ "report_length": len(state.report),
3206
+ },
3207
+ )
3208
+
3209
+ logger.info(
3210
+ "Synthesis phase complete: report length %d chars",
3211
+ len(state.report),
3212
+ )
3213
+
3214
+ return WorkflowResult(
3215
+ success=True,
3216
+ content=state.report,
3217
+ provider_id=result.provider_id,
3218
+ model_used=result.model_used,
3219
+ tokens_used=result.tokens_used,
3220
+ duration_ms=result.duration_ms,
3221
+ metadata={
3222
+ "research_id": state.id,
3223
+ "finding_count": len(state.findings),
3224
+ "source_count": len(state.sources),
3225
+ "report_length": len(state.report),
3226
+ "iteration": state.iteration,
3227
+ },
3228
+ )
3229
+
3230
+ def _build_synthesis_system_prompt(self, state: DeepResearchState) -> str:
3231
+ """Build system prompt for report synthesis.
3232
+
3233
+ Args:
3234
+ state: Current research state
3235
+
3236
+ Returns:
3237
+ System prompt string
3238
+ """
3239
+ return """You are a research synthesizer. Your task is to create a comprehensive, well-structured research report from analyzed findings.
3240
+
3241
+ Generate a markdown-formatted report with the following structure:
3242
+
3243
+ # Research Report: [Topic]
3244
+
3245
+ ## Executive Summary
3246
+ A 2-3 paragraph overview of the key insights and conclusions.
3247
+
3248
+ ## Key Findings
3249
+
3250
+ ### [Theme/Category 1]
3251
+ - Finding with supporting evidence and source citations [Source ID]
3252
+ - Related findings grouped together
3253
+
3254
+ ### [Theme/Category 2]
3255
+ - Continue for each major theme...
3256
+
3257
+ ## Analysis
3258
+
3259
+ ### Supporting Evidence
3260
+ Discussion of well-supported findings with high confidence.
3261
+
3262
+ ### Conflicting Information
3263
+ Note any contradictions or disagreements between sources (if present).
3264
+
3265
+ ### Limitations
3266
+ Acknowledge gaps in the research and areas needing further investigation.
3267
+
3268
+ ## Sources
3269
+ List sources as markdown links with their IDs: **[src-xxx]** [Title](URL)
3270
+
3271
+ ## Conclusions
3272
+ Actionable insights and recommendations based on the findings.
3273
+
3274
+ ---
3275
+
3276
+ Guidelines:
3277
+ - Organize findings thematically rather than listing them sequentially
3278
+ - Cite source IDs in brackets when referencing specific information [src-xxx]
3279
+ - Distinguish between high-confidence findings (well-supported) and lower-confidence insights
3280
+ - Be specific and actionable in conclusions
3281
+ - Keep the report focused on the original research query
3282
+ - Use clear, professional language
3283
+ - Include all relevant findings - don't omit information
3284
+
3285
+ IMPORTANT: Return ONLY the markdown report, no preamble or meta-commentary."""
3286
+
3287
+ def _build_synthesis_user_prompt(self, state: DeepResearchState) -> str:
3288
+ """Build user prompt with findings and sources for synthesis.
3289
+
3290
+ Args:
3291
+ state: Current research state
3292
+
3293
+ Returns:
3294
+ User prompt string
3295
+ """
3296
+ prompt_parts = [
3297
+ f"# Research Query\n{state.original_query}",
3298
+ "",
3299
+ f"## Research Brief\n{state.research_brief or 'Direct research on the query'}",
3300
+ "",
3301
+ "## Findings to Synthesize",
3302
+ "",
3303
+ ]
3304
+
3305
+ # Group findings by category if available
3306
+ categorized: dict[str, list] = {}
3307
+ uncategorized = []
3308
+
3309
+ for finding in state.findings:
3310
+ category = finding.category or "General"
3311
+ if category not in categorized:
3312
+ categorized[category] = []
3313
+ categorized[category].append(finding)
3314
+
3315
+ # Add findings by category
3316
+ for category, findings in categorized.items():
3317
+ prompt_parts.append(f"### {category}")
3318
+ for f in findings:
3319
+ confidence_label = f.confidence.value if hasattr(f.confidence, 'value') else str(f.confidence)
3320
+ source_refs = ", ".join(f.source_ids) if f.source_ids else "no sources"
3321
+ prompt_parts.append(f"- [{confidence_label.upper()}] {f.content}")
3322
+ prompt_parts.append(f" Sources: {source_refs}")
3323
+ prompt_parts.append("")
3324
+
3325
+ # Add knowledge gaps
3326
+ if state.gaps:
3327
+ prompt_parts.append("## Knowledge Gaps Identified")
3328
+ for gap in state.gaps:
3329
+ status = "addressed" if gap.resolved else "unresolved"
3330
+ prompt_parts.append(f"- [{status}] {gap.description}")
3331
+ prompt_parts.append("")
3332
+
3333
+ # Add source reference list
3334
+ prompt_parts.append("## Source Reference")
3335
+ for source in state.sources[:30]: # Limit to 30 for context window
3336
+ quality = source.quality.value if hasattr(source.quality, 'value') else str(source.quality)
3337
+ prompt_parts.append(f"- {source.id}: {source.title} [{quality}]")
3338
+ if source.url:
3339
+ prompt_parts.append(f" URL: {source.url}")
3340
+ prompt_parts.append("")
3341
+
3342
+ # Add synthesis instructions
3343
+ prompt_parts.extend([
3344
+ "## Instructions",
3345
+ f"Generate a comprehensive research report addressing the query: '{state.original_query}'",
3346
+ "",
3347
+ f"This is iteration {state.iteration} of {state.max_iterations}.",
3348
+ f"Total findings: {len(state.findings)}",
3349
+ f"Total sources: {len(state.sources)}",
3350
+ f"Unresolved gaps: {len(state.unresolved_gaps())}",
3351
+ "",
3352
+ "Create a well-structured markdown report following the format specified.",
3353
+ ])
3354
+
3355
+ return "\n".join(prompt_parts)
3356
+
3357
+ def _extract_markdown_report(self, content: str) -> Optional[str]:
3358
+ """Extract markdown report from LLM response.
3359
+
3360
+ The response should be pure markdown, but this handles cases where
3361
+ the LLM wraps it in code blocks or adds preamble.
3362
+
3363
+ Args:
3364
+ content: Raw LLM response content
3365
+
3366
+ Returns:
3367
+ Extracted markdown report or None if extraction fails
3368
+ """
3369
+ if not content:
3370
+ return None
3371
+
3372
+ # If content starts with markdown heading, it's likely clean
3373
+ if content.strip().startswith("#"):
3374
+ return content.strip()
3375
+
3376
+ # Check for markdown code block wrapper
3377
+ if "```markdown" in content or "```md" in content:
3378
+ # Extract content between code blocks
3379
+ pattern = r'```(?:markdown|md)?\s*([\s\S]*?)```'
3380
+ matches = re.findall(pattern, content)
3381
+ if matches:
3382
+ return matches[0].strip()
3383
+
3384
+ # Check for generic code block
3385
+ if "```" in content:
3386
+ pattern = r'```\s*([\s\S]*?)```'
3387
+ matches = re.findall(pattern, content)
3388
+ for match in matches:
3389
+ # Check if it looks like markdown (has headings)
3390
+ if match.strip().startswith("#") or "##" in match:
3391
+ return match.strip()
3392
+
3393
+ # Look for first heading and take everything from there
3394
+ heading_match = re.search(r'^(#[^\n]+)', content, re.MULTILINE)
3395
+ if heading_match:
3396
+ start_pos = heading_match.start()
3397
+ return content[start_pos:].strip()
3398
+
3399
+ # If nothing else works, return the trimmed content
3400
+ return content.strip() if len(content.strip()) > 50 else None
3401
+
3402
+ def _generate_empty_report(self, state: DeepResearchState) -> str:
3403
+ """Generate a minimal report when no findings are available.
3404
+
3405
+ Args:
3406
+ state: Current research state
3407
+
3408
+ Returns:
3409
+ Minimal markdown report
3410
+ """
3411
+ return f"""# Research Report
3412
+
3413
+ ## Executive Summary
3414
+
3415
+ Research was conducted on the query: "{state.original_query}"
3416
+
3417
+ Unfortunately, the analysis phase did not yield extractable findings from the gathered sources. This may indicate:
3418
+ - The sources lacked relevant information
3419
+ - The query may need refinement
3420
+ - Additional research iterations may be needed
3421
+
3422
+ ## Research Query
3423
+
3424
+ {state.original_query}
3425
+
3426
+ ## Research Brief
3427
+
3428
+ {state.research_brief or "No research brief generated."}
3429
+
3430
+ ## Sources Examined
3431
+
3432
+ {len(state.sources)} source(s) were examined during this research session.
3433
+
3434
+ ## Recommendations
3435
+
3436
+ 1. Consider refining the research query for more specific results
3437
+ 2. Try additional research iterations if available
3438
+ 3. Review the gathered sources manually for relevant information
3439
+
3440
+ ---
3441
+
3442
+ *Report generated with no extractable findings. Iteration {state.iteration}/{state.max_iterations}.*
3443
+ """
3444
+
3445
+ async def _execute_refinement_async(
3446
+ self,
3447
+ state: DeepResearchState,
3448
+ provider_id: Optional[str],
3449
+ timeout: float,
3450
+ ) -> WorkflowResult:
3451
+ """Execute refinement phase: analyze gaps and generate follow-up queries.
3452
+
3453
+ This phase:
3454
+ 1. Reviews the current report and identified gaps
3455
+ 2. Uses LLM to assess gap severity and addressability
3456
+ 3. Generates follow-up queries for unresolved gaps
3457
+ 4. Converts high-priority gaps to new sub-queries for next iteration
3458
+ 5. Respects max_iterations limit for workflow termination
3459
+
3460
+ Args:
3461
+ state: Current research state with report and gaps
3462
+ provider_id: LLM provider to use
3463
+ timeout: Request timeout in seconds
3464
+
3465
+ Returns:
3466
+ WorkflowResult with refinement outcome
3467
+ """
3468
+ unresolved_gaps = state.unresolved_gaps()
3469
+
3470
+ # Check iteration limit
3471
+ if state.iteration >= state.max_iterations:
3472
+ logger.info(
3473
+ "Refinement: max iterations (%d) reached, no further refinement",
3474
+ state.max_iterations,
3475
+ )
3476
+ self._write_audit_event(
3477
+ state,
3478
+ "refinement_result",
3479
+ data={
3480
+ "reason": "max_iterations_reached",
3481
+ "unresolved_gaps": len(unresolved_gaps),
3482
+ "iteration": state.iteration,
3483
+ },
3484
+ level="warning",
3485
+ )
3486
+ return WorkflowResult(
3487
+ success=True,
3488
+ content="Max iterations reached, refinement complete",
3489
+ metadata={
3490
+ "research_id": state.id,
3491
+ "iteration": state.iteration,
3492
+ "max_iterations": state.max_iterations,
3493
+ "unresolved_gaps": len(unresolved_gaps),
3494
+ "reason": "max_iterations_reached",
3495
+ },
3496
+ )
3497
+
3498
+ if not unresolved_gaps:
3499
+ logger.info("Refinement: no unresolved gaps, research complete")
3500
+ self._write_audit_event(
3501
+ state,
3502
+ "refinement_result",
3503
+ data={
3504
+ "reason": "no_gaps",
3505
+ "unresolved_gaps": 0,
3506
+ "iteration": state.iteration,
3507
+ },
3508
+ )
3509
+ return WorkflowResult(
3510
+ success=True,
3511
+ content="No unresolved gaps, research complete",
3512
+ metadata={
3513
+ "research_id": state.id,
3514
+ "iteration": state.iteration,
3515
+ "reason": "no_gaps",
3516
+ },
3517
+ )
3518
+
3519
+ logger.info(
3520
+ "Starting refinement phase: %d unresolved gaps, iteration %d/%d",
3521
+ len(unresolved_gaps),
3522
+ state.iteration,
3523
+ state.max_iterations,
3524
+ )
3525
+
3526
+ # Build the refinement prompt
3527
+ system_prompt = self._build_refinement_system_prompt(state)
3528
+ user_prompt = self._build_refinement_user_prompt(state)
3529
+
3530
+ # Execute LLM call with context window error handling
3531
+ try:
3532
+ result = self._execute_provider(
3533
+ prompt=user_prompt,
3534
+ provider_id=provider_id or state.refinement_provider,
3535
+ model=state.refinement_model,
3536
+ system_prompt=system_prompt,
3537
+ timeout=timeout,
3538
+ temperature=0.4, # Lower temperature for focused analysis
3539
+ )
3540
+ except ContextWindowError as e:
3541
+ logger.error(
3542
+ "Refinement phase context window exceeded: prompt_tokens=%s, "
3543
+ "max_tokens=%s, gap_count=%d",
3544
+ e.prompt_tokens,
3545
+ e.max_tokens,
3546
+ len(unresolved_gaps),
3547
+ )
3548
+ return WorkflowResult(
3549
+ success=False,
3550
+ content="",
3551
+ error=str(e),
3552
+ metadata={
3553
+ "research_id": state.id,
3554
+ "phase": "refinement",
3555
+ "error_type": "context_window_exceeded",
3556
+ "prompt_tokens": e.prompt_tokens,
3557
+ "max_tokens": e.max_tokens,
3558
+ },
3559
+ )
3560
+
3561
+ if not result.success:
3562
+ logger.error("Refinement phase LLM call failed: %s", result.error)
3563
+ return result
3564
+
3565
+ # Track token usage
3566
+ if result.tokens_used:
3567
+ state.total_tokens_used += result.tokens_used
3568
+
3569
+ # Track phase metrics for audit
3570
+ state.phase_metrics.append(
3571
+ PhaseMetrics(
3572
+ phase="refinement",
3573
+ duration_ms=result.duration_ms or 0.0,
3574
+ input_tokens=result.input_tokens or 0,
3575
+ output_tokens=result.output_tokens or 0,
3576
+ cached_tokens=result.cached_tokens or 0,
3577
+ provider_id=result.provider_id,
3578
+ model_used=result.model_used,
3579
+ )
3580
+ )
3581
+
3582
+ # Parse the response
3583
+ parsed = self._parse_refinement_response(result.content, state)
3584
+
3585
+ if not parsed["success"]:
3586
+ logger.warning("Failed to parse refinement response, using existing gap suggestions")
3587
+ # Fallback: use existing gap suggestions as follow-up queries
3588
+ follow_up_queries = self._extract_fallback_queries(state)
3589
+ else:
3590
+ follow_up_queries = parsed["follow_up_queries"]
3591
+
3592
+ # Mark gaps as resolved if specified
3593
+ for gap_id in parsed.get("addressed_gap_ids", []):
3594
+ gap = state.get_gap(gap_id)
3595
+ if gap:
3596
+ gap.resolved = True
3597
+
3598
+ # Convert follow-up queries to new sub-queries for next iteration
3599
+ new_sub_queries = 0
3600
+ for query_data in follow_up_queries[:state.max_sub_queries]:
3601
+ # Add as new sub-query
3602
+ state.add_sub_query(
3603
+ query=query_data["query"],
3604
+ rationale=query_data.get("rationale", "Follow-up from gap analysis"),
3605
+ priority=query_data.get("priority", 1),
3606
+ )
3607
+ new_sub_queries += 1
3608
+
3609
+ # Save state
3610
+ self.memory.save_deep_research(state)
3611
+ self._write_audit_event(
3612
+ state,
3613
+ "refinement_result",
3614
+ data={
3615
+ "provider_id": result.provider_id,
3616
+ "model_used": result.model_used,
3617
+ "tokens_used": result.tokens_used,
3618
+ "duration_ms": result.duration_ms,
3619
+ "system_prompt": system_prompt,
3620
+ "user_prompt": user_prompt,
3621
+ "raw_response": result.content,
3622
+ "parse_success": parsed["success"],
3623
+ "gap_analysis": parsed.get("gap_analysis", []),
3624
+ "follow_up_queries": follow_up_queries,
3625
+ "addressed_gap_ids": parsed.get("addressed_gap_ids", []),
3626
+ "should_iterate": parsed.get("should_iterate", True),
3627
+ },
3628
+ )
3629
+
3630
+ logger.info(
3631
+ "Refinement phase complete: %d follow-up queries generated",
3632
+ new_sub_queries,
3633
+ )
3634
+
3635
+ return WorkflowResult(
3636
+ success=True,
3637
+ content=f"Generated {new_sub_queries} follow-up queries from {len(unresolved_gaps)} gaps",
3638
+ provider_id=result.provider_id,
3639
+ model_used=result.model_used,
3640
+ tokens_used=result.tokens_used,
3641
+ duration_ms=result.duration_ms,
3642
+ metadata={
3643
+ "research_id": state.id,
3644
+ "iteration": state.iteration,
3645
+ "unresolved_gaps": len(unresolved_gaps),
3646
+ "follow_up_queries": new_sub_queries,
3647
+ "gaps_addressed": len(parsed.get("addressed_gap_ids", [])),
3648
+ },
3649
+ )
3650
+
3651
+ def _build_refinement_system_prompt(self, state: DeepResearchState) -> str:
3652
+ """Build system prompt for gap analysis and refinement.
3653
+
3654
+ Args:
3655
+ state: Current research state
3656
+
3657
+ Returns:
3658
+ System prompt string
3659
+ """
3660
+ return """You are a research refiner. Your task is to analyze knowledge gaps identified during research and generate focused follow-up queries to address them.
3661
+
3662
+ Your response MUST be valid JSON with this exact structure:
3663
+ {
3664
+ "gap_analysis": [
3665
+ {
3666
+ "gap_id": "gap-xxx",
3667
+ "severity": "critical|moderate|minor",
3668
+ "addressable": true,
3669
+ "rationale": "Why this gap matters and whether it can be addressed"
3670
+ }
3671
+ ],
3672
+ "follow_up_queries": [
3673
+ {
3674
+ "query": "A specific, focused search query to address the gap",
3675
+ "target_gap_id": "gap-xxx",
3676
+ "rationale": "How this query will fill the gap",
3677
+ "priority": 1
3678
+ }
3679
+ ],
3680
+ "addressed_gap_ids": ["gap-xxx"],
3681
+ "iteration_recommendation": {
3682
+ "should_iterate": true,
3683
+ "rationale": "Why iteration is or isn't recommended"
3684
+ }
3685
+ }
3686
+
3687
+ Guidelines:
3688
+ - Assess each gap's severity: "critical" (blocks conclusions), "moderate" (affects confidence), "minor" (nice to have)
3689
+ - Only mark gaps as addressable if follow-up research can realistically fill them
3690
+ - Generate 1-3 highly focused follow-up queries per addressable gap
3691
+ - Priority 1 is highest priority
3692
+ - Mark gaps as addressed if the current report already covers them adequately
3693
+ - Recommend iteration only if there are addressable critical/moderate gaps AND value exceeds research cost
3694
+
3695
+ IMPORTANT: Return ONLY valid JSON, no markdown formatting or extra text."""
3696
+
3697
+ def _build_refinement_user_prompt(self, state: DeepResearchState) -> str:
3698
+ """Build user prompt with gaps and report context for refinement.
3699
+
3700
+ Args:
3701
+ state: Current research state
3702
+
3703
+ Returns:
3704
+ User prompt string
3705
+ """
3706
+ prompt_parts = [
3707
+ f"# Research Query\n{state.original_query}",
3708
+ "",
3709
+ f"## Research Status",
3710
+ f"- Iteration: {state.iteration}/{state.max_iterations}",
3711
+ f"- Sources examined: {len(state.sources)}",
3712
+ f"- Findings extracted: {len(state.findings)}",
3713
+ f"- Unresolved gaps: {len(state.unresolved_gaps())}",
3714
+ "",
3715
+ ]
3716
+
3717
+ # Add report summary (truncated for context window)
3718
+ if state.report:
3719
+ report_excerpt = state.report[:2000]
3720
+ if len(state.report) > 2000:
3721
+ report_excerpt += "\n\n[Report truncated...]"
3722
+ prompt_parts.append("## Current Report Summary")
3723
+ prompt_parts.append(report_excerpt)
3724
+ prompt_parts.append("")
3725
+
3726
+ # Add unresolved gaps
3727
+ prompt_parts.append("## Unresolved Knowledge Gaps")
3728
+ for gap in state.unresolved_gaps():
3729
+ prompt_parts.append(f"\n### Gap: {gap.id}")
3730
+ prompt_parts.append(f"Description: {gap.description}")
3731
+ prompt_parts.append(f"Priority: {gap.priority}")
3732
+ if gap.suggested_queries:
3733
+ prompt_parts.append("Suggested queries from analysis:")
3734
+ for sq in gap.suggested_queries[:3]:
3735
+ prompt_parts.append(f" - {sq}")
3736
+ prompt_parts.append("")
3737
+
3738
+ # Add high-confidence findings for context
3739
+ high_conf_findings = [
3740
+ f for f in state.findings
3741
+ if hasattr(f.confidence, 'value') and f.confidence.value in ('high', 'confirmed')
3742
+ ]
3743
+ if high_conf_findings:
3744
+ prompt_parts.append("## High-Confidence Findings Already Established")
3745
+ for f in high_conf_findings[:5]:
3746
+ prompt_parts.append(f"- {f.content[:200]}")
3747
+ prompt_parts.append("")
3748
+
3749
+ # Add instructions
3750
+ prompt_parts.extend([
3751
+ "## Instructions",
3752
+ "1. Analyze each gap for severity and addressability",
3753
+ "2. Generate focused follow-up queries for addressable gaps",
3754
+ "3. Mark any gaps that are actually addressed by existing findings",
3755
+ "4. Recommend whether iteration is worthwhile given remaining gaps",
3756
+ "",
3757
+ "Return your analysis as JSON.",
3758
+ ])
3759
+
3760
+ return "\n".join(prompt_parts)
3761
+
3762
+ def _parse_refinement_response(
3763
+ self,
3764
+ content: str,
3765
+ state: DeepResearchState,
3766
+ ) -> dict[str, Any]:
3767
+ """Parse LLM response into structured refinement data.
3768
+
3769
+ Args:
3770
+ content: Raw LLM response content
3771
+ state: Current research state
3772
+
3773
+ Returns:
3774
+ Dict with 'success', 'follow_up_queries', 'addressed_gap_ids', etc.
3775
+ """
3776
+ result = {
3777
+ "success": False,
3778
+ "gap_analysis": [],
3779
+ "follow_up_queries": [],
3780
+ "addressed_gap_ids": [],
3781
+ "should_iterate": True,
3782
+ }
3783
+
3784
+ if not content:
3785
+ return result
3786
+
3787
+ # Try to extract JSON from the response
3788
+ json_str = self._extract_json(content)
3789
+ if not json_str:
3790
+ logger.warning("No JSON found in refinement response")
3791
+ return result
3792
+
3793
+ try:
3794
+ data = json.loads(json_str)
3795
+ except json.JSONDecodeError as e:
3796
+ logger.error("Failed to parse JSON from refinement response: %s", e)
3797
+ return result
3798
+
3799
+ # Parse gap analysis
3800
+ raw_analysis = data.get("gap_analysis", [])
3801
+ if isinstance(raw_analysis, list):
3802
+ for ga in raw_analysis:
3803
+ if not isinstance(ga, dict):
3804
+ continue
3805
+ result["gap_analysis"].append({
3806
+ "gap_id": ga.get("gap_id", ""),
3807
+ "severity": ga.get("severity", "moderate"),
3808
+ "addressable": ga.get("addressable", True),
3809
+ "rationale": ga.get("rationale", ""),
3810
+ })
3811
+
3812
+ # Parse follow-up queries
3813
+ raw_queries = data.get("follow_up_queries", [])
3814
+ if isinstance(raw_queries, list):
3815
+ for fq in raw_queries:
3816
+ if not isinstance(fq, dict):
3817
+ continue
3818
+ query = fq.get("query", "").strip()
3819
+ if not query:
3820
+ continue
3821
+ result["follow_up_queries"].append({
3822
+ "query": query,
3823
+ "target_gap_id": fq.get("target_gap_id", ""),
3824
+ "rationale": fq.get("rationale", ""),
3825
+ "priority": min(max(int(fq.get("priority", 1)), 1), 10),
3826
+ })
3827
+
3828
+ # Parse addressed gaps
3829
+ raw_addressed = data.get("addressed_gap_ids", [])
3830
+ if isinstance(raw_addressed, list):
3831
+ result["addressed_gap_ids"] = [
3832
+ gid for gid in raw_addressed if isinstance(gid, str)
3833
+ ]
3834
+
3835
+ # Parse iteration recommendation
3836
+ iter_rec = data.get("iteration_recommendation", {})
3837
+ if isinstance(iter_rec, dict):
3838
+ result["should_iterate"] = iter_rec.get("should_iterate", True)
3839
+
3840
+ # Mark success if we got at least one follow-up query
3841
+ result["success"] = len(result["follow_up_queries"]) > 0
3842
+
3843
+ return result
3844
+
3845
+ def _extract_fallback_queries(self, state: DeepResearchState) -> list[dict[str, Any]]:
3846
+ """Extract follow-up queries from existing gap suggestions as fallback.
3847
+
3848
+ Used when LLM parsing fails but we still want to progress.
3849
+
3850
+ Args:
3851
+ state: Current research state with gaps
3852
+
3853
+ Returns:
3854
+ List of follow-up query dictionaries
3855
+ """
3856
+ queries = []
3857
+ for gap in state.unresolved_gaps():
3858
+ for i, sq in enumerate(gap.suggested_queries[:2]): # Max 2 per gap
3859
+ queries.append({
3860
+ "query": sq,
3861
+ "target_gap_id": gap.id,
3862
+ "rationale": f"Suggested query from gap: {gap.description[:50]}",
3863
+ "priority": gap.priority,
3864
+ })
3865
+ return queries[:state.max_sub_queries] # Respect limit
3866
+
3867
+ # =========================================================================
3868
+ # Utility Methods
3869
+ # =========================================================================
3870
+
3871
+ def list_sessions(
3872
+ self,
3873
+ limit: int = 50,
3874
+ cursor: Optional[str] = None,
3875
+ completed_only: bool = False,
3876
+ ) -> list[dict[str, Any]]:
3877
+ """List deep research sessions.
3878
+
3879
+ Args:
3880
+ limit: Maximum sessions to return
3881
+ cursor: Pagination cursor (research_id to start after)
3882
+ completed_only: Only return completed sessions
3883
+
3884
+ Returns:
3885
+ List of session summaries
3886
+ """
3887
+ sessions = self.memory.list_deep_research(
3888
+ limit=limit,
3889
+ cursor=cursor,
3890
+ completed_only=completed_only,
3891
+ )
3892
+
3893
+ return [
3894
+ {
3895
+ "id": s.id,
3896
+ "query": s.original_query,
3897
+ "phase": s.phase.value,
3898
+ "iteration": s.iteration,
3899
+ "source_count": len(s.sources),
3900
+ "finding_count": len(s.findings),
3901
+ "is_complete": s.completed_at is not None,
3902
+ "created_at": s.created_at.isoformat(),
3903
+ "updated_at": s.updated_at.isoformat(),
3904
+ }
3905
+ for s in sessions
3906
+ ]
3907
+
3908
+ def delete_session(self, research_id: str) -> bool:
3909
+ """Delete a research session.
3910
+
3911
+ Args:
3912
+ research_id: ID of session to delete
3913
+
3914
+ Returns:
3915
+ True if deleted, False if not found
3916
+ """
3917
+ return self.memory.delete_deep_research(research_id)
3918
+
3919
+ def resume_research(
3920
+ self,
3921
+ research_id: str,
3922
+ provider_id: Optional[str] = None,
3923
+ timeout_per_operation: float = 120.0,
3924
+ max_concurrent: int = 3,
3925
+ ) -> WorkflowResult:
3926
+ """Resume an interrupted deep research workflow from persisted state.
3927
+
3928
+ Loads the DeepResearchState from persistence, validates it, and resumes
3929
+ execution from the current phase. Handles edge cases like corrupted
3930
+ state or missing sources gracefully.
3931
+
3932
+ Args:
3933
+ research_id: ID of the research session to resume
3934
+ provider_id: Optional provider override for LLM operations
3935
+ timeout_per_operation: Timeout per operation in seconds
3936
+ max_concurrent: Maximum concurrent operations
3937
+
3938
+ Returns:
3939
+ WorkflowResult with resumed research outcome or error
3940
+ """
3941
+ logger.info("Attempting to resume research session: %s", research_id)
3942
+
3943
+ # Load existing state
3944
+ state = self.memory.load_deep_research(research_id)
3945
+
3946
+ if state is None:
3947
+ logger.warning("Research session '%s' not found in persistence", research_id)
3948
+ return WorkflowResult(
3949
+ success=False,
3950
+ content="",
3951
+ error=f"Research session '{research_id}' not found. It may have expired or been deleted.",
3952
+ metadata={"research_id": research_id, "error_type": "not_found"},
3953
+ )
3954
+
3955
+ # Check if already completed
3956
+ if state.completed_at is not None:
3957
+ logger.info(
3958
+ "Research session '%s' already completed at %s",
3959
+ research_id,
3960
+ state.completed_at.isoformat(),
3961
+ )
3962
+ return WorkflowResult(
3963
+ success=True,
3964
+ content=state.report or "Research already completed",
3965
+ metadata={
3966
+ "research_id": state.id,
3967
+ "phase": state.phase.value,
3968
+ "is_complete": True,
3969
+ "completed_at": state.completed_at.isoformat(),
3970
+ "resumed": False,
3971
+ },
3972
+ )
3973
+
3974
+ # Validate state integrity
3975
+ validation_result = self._validate_state_for_resume(state)
3976
+ if not validation_result["valid"]:
3977
+ logger.error(
3978
+ "Research session '%s' failed validation: %s",
3979
+ research_id,
3980
+ validation_result["error"],
3981
+ )
3982
+ return WorkflowResult(
3983
+ success=False,
3984
+ content="",
3985
+ error=validation_result["error"],
3986
+ metadata={
3987
+ "research_id": research_id,
3988
+ "error_type": "validation_failed",
3989
+ "phase": state.phase.value,
3990
+ "issues": validation_result.get("issues", []),
3991
+ },
3992
+ )
3993
+
3994
+ # Log resumption context
3995
+ logger.info(
3996
+ "Resuming research '%s': phase=%s, iteration=%d/%d, "
3997
+ "sub_queries=%d (completed=%d), sources=%d, findings=%d, gaps=%d",
3998
+ research_id,
3999
+ state.phase.value,
4000
+ state.iteration,
4001
+ state.max_iterations,
4002
+ len(state.sub_queries),
4003
+ len(state.completed_sub_queries()),
4004
+ len(state.sources),
4005
+ len(state.findings),
4006
+ len(state.unresolved_gaps()),
4007
+ )
4008
+
4009
+ # Resume workflow execution
4010
+ try:
4011
+ loop = asyncio.get_event_loop()
4012
+ if loop.is_running():
4013
+ import concurrent.futures
4014
+ with concurrent.futures.ThreadPoolExecutor() as executor:
4015
+ future = executor.submit(
4016
+ asyncio.run,
4017
+ self._execute_workflow_async(
4018
+ state=state,
4019
+ provider_id=provider_id,
4020
+ timeout_per_operation=timeout_per_operation,
4021
+ max_concurrent=max_concurrent,
4022
+ ),
4023
+ )
4024
+ result = future.result()
4025
+ else:
4026
+ result = loop.run_until_complete(
4027
+ self._execute_workflow_async(
4028
+ state=state,
4029
+ provider_id=provider_id,
4030
+ timeout_per_operation=timeout_per_operation,
4031
+ max_concurrent=max_concurrent,
4032
+ )
4033
+ )
4034
+ except RuntimeError:
4035
+ result = asyncio.run(
4036
+ self._execute_workflow_async(
4037
+ state=state,
4038
+ provider_id=provider_id,
4039
+ timeout_per_operation=timeout_per_operation,
4040
+ max_concurrent=max_concurrent,
4041
+ )
4042
+ )
4043
+
4044
+ # Add resumption metadata
4045
+ if result.metadata is None:
4046
+ result.metadata = {}
4047
+ result.metadata["resumed"] = True
4048
+ result.metadata["resumed_from_phase"] = state.phase.value
4049
+
4050
+ return result
4051
+
4052
+ def _validate_state_for_resume(self, state: DeepResearchState) -> dict[str, Any]:
4053
+ """Validate a DeepResearchState for safe resumption.
4054
+
4055
+ Checks for common corruption issues and missing required data.
4056
+
4057
+ Args:
4058
+ state: The state to validate
4059
+
4060
+ Returns:
4061
+ Dict with 'valid' bool and 'error'/'issues' if invalid
4062
+ """
4063
+ issues = []
4064
+
4065
+ # Check required fields
4066
+ if not state.original_query:
4067
+ issues.append("Missing original_query")
4068
+
4069
+ if not state.id:
4070
+ issues.append("Missing research ID")
4071
+
4072
+ # Phase-specific validation
4073
+ if state.phase.value in ("gathering", "analysis", "synthesis", "refinement"):
4074
+ # These phases require sub-queries from planning
4075
+ if not state.sub_queries:
4076
+ issues.append(f"No sub-queries found for {state.phase.value} phase")
4077
+
4078
+ if state.phase.value in ("analysis", "synthesis"):
4079
+ # These phases require sources from gathering
4080
+ if not state.sources and state.phase.value == "analysis":
4081
+ # Only warn for analysis - synthesis can work with findings
4082
+ issues.append("No sources found for analysis phase")
4083
+
4084
+ if state.phase.value == "synthesis":
4085
+ # Synthesis requires findings from analysis
4086
+ if not state.findings:
4087
+ issues.append("No findings found for synthesis phase")
4088
+
4089
+ # Check for corrupted collections (None instead of empty list)
4090
+ if state.sub_queries is None:
4091
+ issues.append("Corrupted sub_queries collection (null)")
4092
+ if state.sources is None:
4093
+ issues.append("Corrupted sources collection (null)")
4094
+ if state.findings is None:
4095
+ issues.append("Corrupted findings collection (null)")
4096
+ if state.gaps is None:
4097
+ issues.append("Corrupted gaps collection (null)")
4098
+
4099
+ if issues:
4100
+ return {
4101
+ "valid": False,
4102
+ "error": f"State validation failed: {'; '.join(issues)}",
4103
+ "issues": issues,
4104
+ }
4105
+
4106
+ return {"valid": True}
4107
+
4108
+ def list_resumable_sessions(self) -> list[dict[str, Any]]:
4109
+ """List all in-progress research sessions that can be resumed.
4110
+
4111
+ Scans persistence for sessions that are not completed and can be resumed.
4112
+
4113
+ Returns:
4114
+ List of session summaries with resumption context
4115
+ """
4116
+ sessions = self.memory.list_deep_research(completed_only=False)
4117
+
4118
+ resumable = []
4119
+ for state in sessions:
4120
+ if state.completed_at is not None:
4121
+ continue # Skip completed
4122
+
4123
+ validation = self._validate_state_for_resume(state)
4124
+
4125
+ resumable.append({
4126
+ "id": state.id,
4127
+ "query": state.original_query[:100] + ("..." if len(state.original_query) > 100 else ""),
4128
+ "phase": state.phase.value,
4129
+ "iteration": state.iteration,
4130
+ "max_iterations": state.max_iterations,
4131
+ "sub_queries": len(state.sub_queries),
4132
+ "completed_queries": len(state.completed_sub_queries()),
4133
+ "sources": len(state.sources),
4134
+ "findings": len(state.findings),
4135
+ "gaps": len(state.unresolved_gaps()),
4136
+ "can_resume": validation["valid"],
4137
+ "issues": validation.get("issues", []),
4138
+ "created_at": state.created_at.isoformat(),
4139
+ "updated_at": state.updated_at.isoformat(),
4140
+ })
4141
+
4142
+ return resumable